Merge pull request #2724 from swelljoe/test-web-lib-funcs-strings

Add ip, paths, string tests for web-lib-funcs
2026-06-05 04:40:24 +01:00 · 2026-05-20 15:50:13 -07:00
parent cbc9595649 53c3bef94c
commit 16c16f4fd4
6 changed files with 686 additions and 50 deletions
--- a/miniserv.pl
+++ b/miniserv.pl
@@ -7027,27 +7027,50 @@ return $_[0] =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)$/ &&
 }

 # Check if some IPv6 address is properly formatted, and returns 1 if so.
+# Kept in sync with web-lib-funcs.pl's copy.
 sub check_ip6address
 {
-  my @blocks = split(/:/, $_[0]);
-  return 0 if (@blocks == 0 || @blocks > 8);
-  my $ib = $#blocks;
-  my $where = index($blocks[$ib],"/");
-  my $m = 0;
-  if ($where != -1) {
-    my $b = substr($blocks[$ib],0,$where);
-    $m = substr($blocks[$ib],$where+1,length($blocks[$ib])-($where+1));
-    $blocks[$ib]=$b;
-  }
-  return 0 if ($m <0 || $m >128); 
-  my $b;
-  my $empty = 0;
-  foreach $b (@blocks) {
-	  return 0 if ($b ne "" && $b !~ /^[0-9a-f]{1,4}$/i);
-	  $empty++ if ($b eq "");
-	  }
-  return 0 if ($empty > 1 && !($_[0] =~ /^::/ && $empty == 2));
-  return 1;
+my $addr = $_[0];
+my $m = 0;
+
+# Strip an optional /N netmask before splitting. Doing this on the
+# raw string (rather than from the last split element) keeps split()'s
+# trailing-empty accounting intact for inputs like "2001:db8::/32",
+# where the netmask would otherwise hide the trailing "::" shorthand.
+if ($addr =~ s{/(\d+)\z}{}) {
+	$m = $1;
+	}
+return 0 if ($m < 0 || $m > 128);
+
+# Special case for unspecified address (analogous to 0.0.0.0 in IPv4),
+# both bare and with a netmask.
+return 1 if ($addr eq "::");
+
+my @blocks = split(/:/, $addr);
+return 0 if (@blocks == 0);
+
+# Accept the IPv4-in-IPv6 forms (RFC 4291 §2.5.5: "::ffff:N.N.N.N"
+# IPv4-mapped, and the more general "X:X:X:X:X:X:N.N.N.N"). If the
+# last block is a dotted-quad, validate the octets and count it as two
+# 16-bit groups for the overall 8-group ceiling. The leading ":" guard
+# distinguishes IPv4-tailed IPv6 from a bare IPv4 address — callers
+# like ip_match() rely on this sub returning false for "10.0.0.1".
+my $count = scalar(@blocks);
+if ($addr =~ /:/ &&
+    $blocks[-1] =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)\z/) {
+	return 0 if ($1 > 255 || $2 > 255 || $3 > 255 || $4 > 255);
+	$count++;
+	pop(@blocks);
+	}
+return 0 if ($count > 8);
+
+my $empty = 0;
+foreach my $b (@blocks) {
+	return 0 if ($b ne "" && $b !~ /^[0-9a-f]{1,4}$/i);
+	$empty++ if ($b eq "");
+	}
+return 0 if ($empty > 1 && !($addr =~ /^::/ && $empty == 2));
+return 1;
 }

 # network_to_address(binary)
--- a/t/miniserv.t
+++ b/t/miniserv.t
@@ -264,13 +264,51 @@ subtest 'check_ipaddress' => sub {
 	ok(!miniserv::check_ipaddress('not an ip'),       'garbage rejected');
 };

+# Kept in lockstep with t/web-lib-funcs-ip.t's matching subtest, since the
+# two copies of check_ip6address must accept and reject the same inputs.
 subtest 'check_ip6address' => sub {
+	ok( miniserv::check_ip6address('::'),                   'unspecified accepted');
 	ok( miniserv::check_ip6address('::1'),                  'loopback accepted');
 	ok( miniserv::check_ip6address('2001:db8::1'),          'compressed form accepted');
-	ok( miniserv::check_ip6address('1:2:3:4:5:6:7:8'),      'full form accepted');
-	ok(!miniserv::check_ip6address('not an addr'),          'garbage rejected');
-	ok(!miniserv::check_ip6address('1:2:3:4:5:6:7:8:9'),    'too many groups rejected');
+	ok( miniserv::check_ip6address('1:2:3:4:5:6:7:8'),      'full eight-block form accepted');
+	ok( miniserv::check_ip6address('2001:db8::'),           'trailing :: accepted (no netmask)');
+
+	# Netmask suffix — both with leading and trailing :: shorthand.
+	ok( miniserv::check_ip6address('::1/64'),               'address/netmask accepted with leading ::');
+	ok( miniserv::check_ip6address('2001:db8::/32'),        'address/netmask accepted with trailing ::');
+	ok( miniserv::check_ip6address('::/0'),                 '::/0 default route accepted');
+	ok( miniserv::check_ip6address('fe80::/10'),            'fe80::/10 link-local prefix accepted');
+	ok(!miniserv::check_ip6address('::1/200'),              'netmask > 128 rejected');
+
+	# IPv4-in-IPv6 tails.
+	ok( miniserv::check_ip6address('::ffff:10.0.0.1'),      'IPv4-mapped (::ffff:N.N.N.N) accepted');
+	ok( miniserv::check_ip6address('::ffff:0.0.0.0'),       'IPv4-mapped all-zero accepted');
+	ok( miniserv::check_ip6address('::1.2.3.4'),            'IPv4-compatible (::N.N.N.N) accepted');
+	ok( miniserv::check_ip6address('0:0:0:0:0:ffff:1.2.3.4'),
+	    'fully-expanded IPv4-mapped accepted');
+	ok(!miniserv::check_ip6address('::ffff:256.0.0.1'),     'IPv4-mapped with octet > 255 rejected');
+	ok(!miniserv::check_ip6address('::ffff:1.2.3'),         'IPv4-mapped with too-few octets rejected');
+
+	# Bare IPv4 must be rejected — callers (e.g. ip_match) use this sub
+	# as a type discriminator and a true result re-routes IPv4 input
+	# through the IPv6 codepath.
+	ok(!miniserv::check_ip6address('10.0.0.1'),             'bare IPv4 rejected (type-discriminator contract)');
+	ok(!miniserv::check_ip6address('1.2.3.4'),              'bare IPv4 rejected (type-discriminator contract)');
+
+	# Degenerate netmask shapes — stripping "/N" from the input must not
+	# let an address that's otherwise just a stray colon (or empty) pass.
+	# perl's split() trims trailing empties hard, so e.g. split(":") is
+	# () not (""), and our @blocks==0 guard catches it.
+	ok(!miniserv::check_ip6address(':/64'),                 'bare colon with netmask rejected');
+	ok(!miniserv::check_ip6address('/64'),                  'netmask without address rejected');
+	ok(!miniserv::check_ip6address(':'),                    'bare colon rejected');
+	ok(!miniserv::check_ip6address('::/'),                  'trailing slash with no digits rejected');
+	ok(!miniserv::check_ip6address('//64'),                 'leading slash with netmask rejected');
+
 	ok(!miniserv::check_ip6address('gggg::1'),              'non-hex rejected');
+	ok(!miniserv::check_ip6address('1:2:3:4:5:6:7:8:9'),    'too many groups rejected');
+	ok(!miniserv::check_ip6address('::1::2'),               'multiple :: rejected');
+	ok(!miniserv::check_ip6address('not an addr'),          'garbage rejected');
 };

 # canonicalize_ip6 / expand_ipv6_bytes
--- a/t/web-lib-funcs-ip.t
+++ b/t/web-lib-funcs-ip.t
@@ -0,0 +1,142 @@
+#!/usr/bin/perl
+# Unit tests for IP-address validators / classifiers in web-lib-funcs.pl.
+#
+# Pure subs, no globals to set up. miniserv.pl carries its own copies of
+# check_ipaddress / check_ip6address; those have their own tests in
+# t/miniserv.t. This file exclusively exercises the web-lib-funcs versions,
+# which have a slightly different IPv6 contract — the web-lib-funcs version
+# accepts an /N netmask suffix.
+
+use strict;
+use warnings;
+use Test::More;
+use File::Basename qw(dirname);
+use File::Spec;
+
+my $script = File::Spec->rel2abs(
+	File::Spec->catfile(dirname(__FILE__), '..', 'web-lib-funcs.pl'));
+require $script;
+
+# check_ipaddress — strict dotted-quad IPv4.
+subtest 'check_ipaddress' => sub {
+	ok( main::check_ipaddress('1.2.3.4'),         'plain IPv4 accepted');
+	ok( main::check_ipaddress('0.0.0.0'),         'all-zero accepted');
+	ok( main::check_ipaddress('255.255.255.255'), 'all-ones accepted');
+
+	ok(!main::check_ipaddress('256.0.0.1'),       'octet > 255 rejected');
+	ok(!main::check_ipaddress('1.2.3'),           'too-few octets rejected');
+	ok(!main::check_ipaddress('1.2.3.4.5'),       'too-many octets rejected');
+	ok(!main::check_ipaddress('1.2.3.x'),         'non-numeric octet rejected');
+	ok(!main::check_ipaddress(''),                'empty rejected');
+	ok(!main::check_ipaddress('not an ip'),       'garbage rejected');
+	ok(!main::check_ipaddress(' 1.2.3.4'),        'leading whitespace rejected');
+	ok(!main::check_ipaddress('1.2.3.4 '),        'trailing whitespace rejected');
+};
+
+# check_ip6address — IPv6, optionally with /N netmask suffix.
+#
+# Accepts the standard text forms, the "::" shorthand at any position, an
+# optional /N netmask, and the IPv4-in-IPv6 dotted-quad tail (RFC 4291
+# §2.5.5: "::ffff:N.N.N.N" mapped and "X:X:X:X:X:X:N.N.N.N" compatible).
+subtest 'check_ip6address' => sub {
+	ok( main::check_ip6address('::'),              'unspecified accepted');
+	ok( main::check_ip6address('::1'),             'loopback accepted');
+	ok( main::check_ip6address('2001:db8::1'),     'compressed form accepted');
+	ok( main::check_ip6address('1:2:3:4:5:6:7:8'), 'full eight-block form accepted');
+	ok( main::check_ip6address('2001:db8::'),      'trailing :: accepted (no netmask)');
+
+	# Netmask suffix — both with leading and trailing :: shorthand.
+	ok( main::check_ip6address('::1/64'),       'address/netmask accepted with leading ::');
+	ok( main::check_ip6address('2001:db8::/32'), 'address/netmask accepted with trailing ::');
+	ok( main::check_ip6address('::/0'),         '::/0 default route accepted');
+	ok( main::check_ip6address('fe80::/10'),    'fe80::/10 link-local prefix accepted');
+	ok(!main::check_ip6address('::1/200'),      'netmask > 128 rejected');
+
+	# IPv4-in-IPv6 tails.
+	ok( main::check_ip6address('::ffff:10.0.0.1'),      'IPv4-mapped (::ffff:N.N.N.N) accepted');
+	ok( main::check_ip6address('::ffff:0.0.0.0'),       'IPv4-mapped all-zero accepted');
+	ok( main::check_ip6address('::1.2.3.4'),            'IPv4-compatible (::N.N.N.N) accepted');
+	ok( main::check_ip6address('0:0:0:0:0:ffff:1.2.3.4'),
+	    'fully-expanded IPv4-mapped accepted');
+	ok(!main::check_ip6address('::ffff:256.0.0.1'),     'IPv4-mapped with octet > 255 rejected');
+	ok(!main::check_ip6address('::ffff:1.2.3'),         'IPv4-mapped with too-few octets rejected');
+
+	# Bare IPv4 must be rejected — callers (e.g. ip_match) use this sub
+	# as a type discriminator and a true result re-routes IPv4 input
+	# through the IPv6 codepath.
+	ok(!main::check_ip6address('10.0.0.1'),          'bare IPv4 rejected (type-discriminator contract)');
+	ok(!main::check_ip6address('1.2.3.4'),           'bare IPv4 rejected (type-discriminator contract)');
+
+	# Degenerate netmask shapes — stripping "/N" from the input must not
+	# let an address that's otherwise just a stray colon (or empty) pass.
+	# perl's split() trims trailing empties hard, so e.g. split(":") is
+	# () not (""), and our @blocks==0 guard catches it.
+	ok(!main::check_ip6address(':/64'),  'bare colon with netmask rejected');
+	ok(!main::check_ip6address('/64'),   'netmask without address rejected');
+	ok(!main::check_ip6address(':'),     'bare colon rejected');
+	ok(!main::check_ip6address('::/'),   'trailing slash with no digits rejected');
+	ok(!main::check_ip6address('//64'),  'leading slash with netmask rejected');
+
+	ok(!main::check_ip6address('gggg::1'),           'non-hex rejected');
+	ok(!main::check_ip6address('1:2:3:4:5:6:7:8:9'), 'too many groups rejected');
+	ok(!main::check_ip6address('::1::2'),            'multiple :: rejected');
+	ok(!main::check_ip6address('not an addr'),       'garbage rejected');
+};
+
+# is_non_public_ipaddress — RFC1918 + reserved-range classifier.
+#
+# Returns 1 for: 0.x, 10.x, 127.x, 169.254/16, 172.16/12, 192.168/16,
+# 100.64/10 (CGNAT), 224+/4 (multicast/reserved); IPv6 loopback, link-local
+# (fe80–febf), ULA (fc00/fd00), and ::ffff:N.N.N.N when the wrapped IPv4
+# is itself non-public.
+subtest 'is_non_public_ipaddress (IPv4)' => sub {
+	# Private / reserved.
+	ok( main::is_non_public_ipaddress('10.0.0.1'),     '10/8 private');
+	ok( main::is_non_public_ipaddress('172.16.0.1'),   '172.16/12 low bound');
+	ok( main::is_non_public_ipaddress('172.31.255.255'), '172.16/12 high bound');
+	ok( main::is_non_public_ipaddress('192.168.1.1'),  '192.168/16 private');
+	ok( main::is_non_public_ipaddress('127.0.0.1'),    '127/8 loopback');
+	ok( main::is_non_public_ipaddress('169.254.1.1'),  '169.254/16 link-local');
+	ok( main::is_non_public_ipaddress('0.1.2.3'),      '0/8 reserved');
+	ok( main::is_non_public_ipaddress('100.64.0.1'),   'CGNAT 100.64/10 low');
+	ok( main::is_non_public_ipaddress('100.127.255.255'), 'CGNAT 100.64/10 high');
+	ok( main::is_non_public_ipaddress('224.0.0.1'),    '224+ multicast / reserved');
+	ok( main::is_non_public_ipaddress('255.255.255.255'), '255+ reserved');
+
+	# Just-outside boundaries.
+	ok(!main::is_non_public_ipaddress('11.0.0.1'),     '11/8 is public');
+	ok(!main::is_non_public_ipaddress('172.15.255.255'), '172.15 below private block');
+	ok(!main::is_non_public_ipaddress('172.32.0.0'),   '172.32 above private block');
+	ok(!main::is_non_public_ipaddress('192.167.0.0'),  '192.167 is public');
+	ok(!main::is_non_public_ipaddress('169.253.0.0'),  '169.253 is public');
+	ok(!main::is_non_public_ipaddress('100.63.255.255'), 'just below CGNAT');
+	ok(!main::is_non_public_ipaddress('100.128.0.0'),  'just above CGNAT');
+
+	# Plainly public.
+	ok(!main::is_non_public_ipaddress('8.8.8.8'),      'public DNS resolver');
+	ok(!main::is_non_public_ipaddress('1.1.1.1'),      'public DNS resolver');
+};
+
+subtest 'is_non_public_ipaddress (IPv6)' => sub {
+	ok( main::is_non_public_ipaddress('::1'),     'loopback');
+	ok( main::is_non_public_ipaddress('::'),      'unspecified');
+	ok( main::is_non_public_ipaddress('fe80::1'), 'link-local (fe80)');
+	ok( main::is_non_public_ipaddress('feb0::1'), 'link-local (feb0)');
+	ok( main::is_non_public_ipaddress('fc00::1'), 'ULA (fc00)');
+	ok( main::is_non_public_ipaddress('fd12::1'), 'ULA (fd12)');
+
+	# IPv4-mapped (::ffff:N.N.N.N) recurses on the embedded IPv4.
+	ok( main::is_non_public_ipaddress('::ffff:10.0.0.1'),
+	   '::ffff:<private> recurses → non-public');
+	ok( main::is_non_public_ipaddress('::ffff:192.168.1.1'),
+	   '::ffff:<rfc1918> recurses → non-public');
+	ok(!main::is_non_public_ipaddress('::ffff:8.8.8.8'),
+	   '::ffff:<public> reported as public');
+
+	# Plainly public IPv6.
+	ok(!main::is_non_public_ipaddress('2001:db8::1'), '2001:db8 is public per classifier');
+	ok(!main::is_non_public_ipaddress('2606:4700::1111'),
+	   'global unicast address is public');
+};
+
+done_testing();
--- a/t/web-lib-funcs-paths.t
+++ b/t/web-lib-funcs-paths.t
@@ -0,0 +1,189 @@
+#!/usr/bin/perl
+# Unit tests for path / URL / shell-quote helpers in web-lib-funcs.pl.
+#
+# Bare `require` loads the library; subs covered here either touch no
+# globals or use ones we set up locally ($gconfig{os_type} for quote_path,
+# %month_to_number_map / %number_to_month_map for the month helpers).
+# In production those maps are populated by web-lib.pl; we set them by hand
+# to keep tests independent of that initialiser.
+
+use strict;
+use warnings;
+use Test::More;
+use File::Basename qw(dirname);
+use File::Spec;
+
+my $script = File::Spec->rel2abs(
+	File::Spec->catfile(dirname(__FILE__), '..', 'web-lib-funcs.pl'));
+require $script;
+
+# simplify_path — strip ./ and resolve ../, refusing to escape root.
+#
+# Contract:
+#   - returns an absolute path (always leading "/")
+#   - returns undef when .. would pop above the root
+#   - "." and "" inputs both return "/"
+#   - a relative input ("foo") is promoted to "/foo"
+#
+# This is the web-lib-funcs version; miniserv.pl ships an independent
+# implementation (`miniserv::simplify_path`) with a different signature
+# (bogus-flag via aliased arg). Both are exercised by their own tests.
+subtest 'simplify_path' => sub {
+	is(main::simplify_path('/foo/bar'),     '/foo/bar', 'plain path');
+	is(main::simplify_path('/foo/./bar'),   '/foo/bar', '. collapses');
+	is(main::simplify_path('/foo/../bar'),  '/bar',     '.. pops');
+	is(main::simplify_path('/a/b/c/../../d'), '/a/d',
+	   'multiple .. pop the right number of segments');
+	is(main::simplify_path('//foo///bar//'), '/foo/bar', 'repeated slashes collapse');
+	is(main::simplify_path('/foo/'),         '/foo',    'trailing slash dropped');
+
+	# Adversarial: escaping root must fail closed.
+	is(main::simplify_path('/../etc/passwd'), undef, '.. above root → undef');
+	is(main::simplify_path('/foo/../../bar'), undef, 'overshoot → undef');
+
+	# Surprising-but-current behaviour: empty / root / relative inputs.
+	is(main::simplify_path(''),    '/',    'empty input → /');
+	is(main::simplify_path('/'),   '/',    'root passes through');
+	is(main::simplify_path('foo'), '/foo', 'relative input is promoted to absolute');
+};
+
+# parse_http_url — absolute and base-relative URL parsing.
+#
+# Contract on success: returns (host, port, page, ssl, [user], [pass]).
+# SSL mode 0=http, 1=https, 2=ftp.
+subtest 'parse_http_url' => sub {
+	my @abs = main::parse_http_url('http://example.com/foo');
+	is_deeply([@abs[0..3]], ['example.com', 80, '/foo', 0],
+		  'plain http URL');
+
+	my @https = main::parse_http_url('https://example.com:8443/bar');
+	is_deeply([@https[0..3]], ['example.com', 8443, '/bar', 1],
+		  'https with explicit port and ssl=1');
+
+	my @ftp = main::parse_http_url('ftp://host/x');
+	is_deeply([@ftp[0..3]], ['host', 21, '/x', 2],
+		  'ftp scheme → port 21 and ssl=2');
+
+	# Userinfo is captured as elements 4 and 5.
+	my @auth = main::parse_http_url('http://user:pass@example.com:81/foo');
+	is_deeply(\@auth, ['example.com', 81, '/foo', 0, 'user', 'pass'],
+		  'user:pass extracted from authority');
+
+	# Bracketed IPv6 host.
+	my @v6 = main::parse_http_url('http://[2001:db8::1]:8080/foo');
+	is_deeply([@v6[0..3]], ['2001:db8::1', 8080, '/foo', 0],
+		  'bracketed IPv6 host parsed, brackets stripped');
+
+	# Missing path defaults to "/".
+	my @noslash = main::parse_http_url('http://example.com');
+	is($noslash[2], '/', 'missing path defaults to /');
+
+	# no_default_port suppresses 80/443/21 substitution.
+	my @nd = main::parse_http_url('http://example.com/x', undef, undef,
+				     undef, undef, undef, undef, 1);
+	is($nd[1], undef, 'no_default_port leaves port undef');
+
+	# Relative URL with a base.
+	my @rs = main::parse_http_url('/page', 'host', 80, '/old/', 0);
+	is_deeply([@rs[0..3]], ['host', 80, '/page', 0],
+		  'server-absolute relative URL uses base host/port');
+
+	my @rd = main::parse_http_url('rel.html', 'host', 80, '/base/cur.html', 0);
+	is_deeply([@rd[0..3]], ['host', 80, '/base/rel.html', 0],
+		  'directory-relative URL resolves against base page directory');
+
+	# Unparseable input with no base → undef.
+	is(main::parse_http_url('not a url'), undef,
+	   'garbage with no base → undef');
+};
+
+# split_quoted_string — shell-ish tokenizer.
+#
+# Each iteration matches one of:
+#   "..." | '...' | \S+
+# followed by trailing whitespace, then loops on the remainder.
+subtest 'split_quoted_string' => sub {
+	is_deeply([main::split_quoted_string('one two three')],
+		  ['one', 'two', 'three'],
+		  'bare words');
+
+	is_deeply([main::split_quoted_string('"hello world" foo "bar baz" qux')],
+		  ['hello world', 'foo', 'bar baz', 'qux'],
+		  'double-quoted segments preserve internal spaces');
+
+	is_deeply([main::split_quoted_string(q{'a b' c 'd e'})],
+		  ['a b', 'c', 'd e'],
+		  'single-quoted segments work the same way');
+
+	is_deeply([main::split_quoted_string('')], [],
+		  'empty input → empty list');
+
+	# Unbalanced quote: the implementation falls through to the \S+
+	# branch and emits the leftover with the quote attached. Pin this.
+	is_deeply([main::split_quoted_string('unbalanced "quote')],
+		  ['unbalanced', '"quote'],
+		  'unterminated quote is taken as a bare token');
+
+	# Leading and trailing whitespace tolerated around tokens.
+	is_deeply([main::split_quoted_string('   spaces   between   ')],
+		  ['spaces', 'between'],
+		  'leading whitespace tolerated, interior whitespace splits tokens');
+	is_deeply([main::split_quoted_string("\tfoo\n")], ['foo'],
+		  'tabs and newlines treated as whitespace too');
+	is_deeply([main::split_quoted_string('   ')], [],
+		  'pure-whitespace input → empty list');
+};
+
+# quote_path — OS-dependent shell quoting.
+#
+# On Windows or for Windows-style absolute paths, wraps in double quotes.
+# Everywhere else, uses quotemeta (which escapes every non-word char).
+subtest 'quote_path' => sub {
+	no warnings 'once';
+	local %main::gconfig = ('os_type' => 'linux');
+
+	# quotemeta escapes /, space, etc.
+	is(main::quote_path('/a b/c'), '\\/a\\ b\\/c',
+	   'unix path uses quotemeta');
+	is(main::quote_path('plain'),  'plain',
+	   'all-word characters need no escaping');
+
+	# Windows-style path → "" wrapping, even when os_type isn't windows.
+	is(main::quote_path('c:/Users/x'), '"c:/Users/x"',
+	   'drive-letter prefix wraps in double quotes');
+
+	# os_type=windows forces double-quote wrap.
+	local %main::gconfig = ('os_type' => 'windows');
+	is(main::quote_path('/etc/passwd'), '"/etc/passwd"',
+	   'os_type=windows wraps in double quotes regardless of path shape');
+};
+
+# month_to_number / number_to_month — three-letter month name <-> 0-based index.
+#
+# In production these maps are populated by web-lib.pl; in tests we set
+# them up locally so the suite doesn't depend on web-lib.pl loading.
+subtest 'month_to_number / number_to_month' => sub {
+	no warnings 'once';
+	local %main::month_to_number_map = (
+		'jan' => 0,  'feb' => 1,  'mar' => 2,  'apr' => 3,
+		'may' => 4,  'jun' => 5,  'jul' => 6,  'aug' => 7,
+		'sep' => 8,  'oct' => 9,  'nov' => 10, 'dec' => 11,
+		);
+	local %main::number_to_month_map = reverse %main::month_to_number_map;
+
+	is(main::month_to_number('Jan'),     0,  'Jan → 0');
+	is(main::month_to_number('December'), 11, 'first three chars taken');
+	is(main::month_to_number('FEB'),     1,  'case-insensitive');
+	is(main::month_to_number('xyz'),     undef, 'unknown returns undef');
+
+	is(main::number_to_month(0),  'Jan', '0 → Jan (ucfirst applied)');
+	is(main::number_to_month(11), 'Dec', '11 → Dec');
+
+	# Round-trip every month.
+	for my $m (qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)) {
+		is(main::number_to_month(main::month_to_number($m)), $m,
+		   "round-trip $m");
+		}
+};
+
+done_testing();
--- a/t/web-lib-funcs-strings.t
+++ b/t/web-lib-funcs-strings.t
@@ -0,0 +1,228 @@
+#!/usr/bin/perl
+# Unit tests for the pure string helpers in web-lib-funcs.pl.
+#
+# web-lib-funcs.pl is a pure library (no `unless(caller)` guard), so a bare
+# `require` loads it without side effects. Helpers covered here touch no
+# globals beyond their args, so no stubbing or %gconfig setup is needed.
+#
+# Assertions pin the current contract — encoded byte shape, round-trip
+# identity, structural invariants — not the prettiest possible output.
+
+use strict;
+use warnings;
+use Test::More;
+use File::Basename qw(dirname);
+use File::Spec;
+
+my $script = File::Spec->rel2abs(
+	File::Spec->catfile(dirname(__FILE__), '..', 'web-lib-funcs.pl'));
+require $script;
+
+# html_escape
+#
+# Always escapes &, <, >, ", ', =. The optional $nodblamp flag uses a
+# lookahead so existing entity references survive untouched; without it,
+# every & becomes &amp;, including already-escaped input.
+subtest 'html_escape' => sub {
+	is(main::html_escape('<b>&"\'='),
+	   '&lt;b&gt;&amp;&quot;&#39;&#61;',
+	   'all six dangerous characters are escaped');
+
+	is(main::html_escape(undef), '', 'undef input → empty string');
+	is(main::html_escape(''),    '', 'empty input → empty string');
+	is(main::html_escape('plain text 123'), 'plain text 123',
+	   'whitespace and alphanumerics pass through');
+
+	# Default mode double-escapes existing entities — this is the current
+	# contract; the nodblamp flag opts into the smarter behaviour.
+	is(main::html_escape('&amp;'),    '&amp;amp;', 'default mode double-escapes &amp;');
+	is(main::html_escape('&amp;', 1), '&amp;',     'nodblamp preserves existing &amp;');
+	is(main::html_escape('&#65;', 1), '&#65;',     'nodblamp preserves numeric entity');
+	# Note: nodblamp's lookahead matches any &<letters>; as an entity, so
+	# made-up names like &x; are treated as entities and not re-escaped.
+	is(main::html_escape('&x;',   1), '&x;',       'nodblamp preserves arbitrary &word; shape');
+	is(main::html_escape('& ',    1), '&amp; ',   'nodblamp escapes lone &');
+
+	# Adversarial XSS payload — none of the dangerous chars survive raw.
+	my $xss = main::html_escape(q{<script>alert("x")</script>'=&});
+	unlike($xss, qr/[<>"'=]/, 'no raw HTML-significant characters remain');
+	unlike($xss, qr/&(?!(amp|lt|gt|quot|#39|#61);)/,
+	       'every & starts a known entity');
+};
+
+# html_unescape — inverse of html_escape for the canonical entity set.
+subtest 'html_unescape' => sub {
+	is(main::html_unescape('&lt;b&gt;&amp;&quot;&#39;&#61;'),
+	   '<b>&"\'=', 'canonical entity set round-trips');
+	is(main::html_unescape('a&nbsp;b'), 'a b',
+	   '&nbsp; decodes to a regular space');
+	is(main::html_unescape(undef), '', 'undef → empty');
+	is(main::html_unescape(''),    '', 'empty → empty');
+	is(main::html_unescape('no entities here'), 'no entities here',
+	   'plain text passes through unchanged');
+};
+
+# html_strip — remove tags, optionally replacing with a sentinel.
+subtest 'html_strip' => sub {
+	is(main::html_strip('<b>hello</b>'),         'hello',
+	   'simple tags removed');
+	is(main::html_strip('<a href="x">y</a>'),    'y',
+	   'attribute-bearing tag removed');
+	# Quoted attribute that contains a >, which would otherwise break a
+	# naive regexp — the implementation accounts for this.
+	is(main::html_strip('<a href=">">y</a>'),    'y',
+	   'quoted > inside attribute does not end tag early');
+	is(main::html_strip('plain text'),           'plain text',
+	   'plain text untouched');
+	is(main::html_strip('<b>x</b>', '|'),        '|x|',
+	   'replacement string substituted for each tag');
+};
+
+# quote_escape — only ' and " (and lone &) are escaped; existing entities
+# (&xxx; or &#NN;) are preserved.
+subtest 'quote_escape' => sub {
+	is(main::quote_escape(undef), '', 'undef → empty');
+
+	is(main::quote_escape('a&b'),      'a&amp;b', 'lone & escaped');
+	is(main::quote_escape('a&'),       'a&amp;',  'trailing & escaped');
+	is(main::quote_escape('a&amp;b'),  'a&amp;b', 'existing &amp; preserved');
+	is(main::quote_escape('a&#39;b'),  'a&#39;b', 'numeric entity preserved');
+
+	is(main::quote_escape(q{a"b'c}),        'a&quot;b&#39;c',
+	   'both quote styles escaped by default');
+	is(main::quote_escape(q{a"b'c}, q{"}),  q{a&quot;b'c},
+	   'only-quote="\"" escapes only double quotes');
+	is(main::quote_escape(q{a"b'c}, q{'}),  q{a"b&#39;c},
+	   'only-quote="\'" escapes only single quotes');
+};
+
+# quote_literal_escape — escape for inclusion in a Perl string literal.
+subtest 'quote_literal_escape' => sub {
+	is(main::quote_literal_escape(undef), '', 'undef → empty');
+	is(main::quote_literal_escape(''),    '', 'empty → empty');
+
+	# Default (single-quoted target): only \ and ' need escaping.
+	is(main::quote_literal_escape(q{it's}),   q{it\'s},      'single quote escaped');
+	is(main::quote_literal_escape(q{a\\b}),   q{a\\\\b},     'backslash doubled');
+	is(main::quote_literal_escape(q{a"b$c@d}), q{a"b$c@d},
+	   'double-quote / sigils NOT escaped in single-quoted target');
+
+	# Double-quoted target: also escape ", $, @ (because they interpolate).
+	is(main::quote_literal_escape(q{a"b$c@d}, q{"}),
+	   q{a\"b\$c\@d},
+	   '" $ @ all escaped in double-quoted target');
+	is(main::quote_literal_escape(q{a\\b}, q{"}), q{a\\\\b},
+	   'backslash doubled in double-quoted target too');
+};
+
+# quote_javascript — hex-escape the unsafe characters for a JS string literal.
+subtest 'quote_javascript' => sub {
+	is(main::quote_javascript(q{a"b}),  'a\x22b',  'double quote → \x22');
+	is(main::quote_javascript(q{a'b}),  'a\x27b',  'single quote → \x27');
+	is(main::quote_javascript('a<b>c'), 'a\x3cb\x3ec', '< and > escaped');
+	is(main::quote_javascript('a&b'),   'a\x26b',  '& escaped');
+	is(main::quote_javascript('a\\b'),  'a\x5cb',  'backslash escaped');
+	is(main::quote_javascript('plain text 123'), 'plain text 123',
+	   'safe characters pass through');
+};
+
+# urlize / un_urlize — percent-encoding round-trip.
+subtest 'urlize / un_urlize' => sub {
+	# urlize encodes anything that is not [A-Za-z0-9].
+	is(main::urlize('abc123'),  'abc123',  'alphanumerics pass through');
+	is(main::urlize(' '),       '%20',     'space encoded');
+	is(main::urlize('/'),       '%2F',     'slash encoded');
+	is(main::urlize("\n"),      '%0A',     'newline encoded');
+	is(main::urlize(chr(0xff)), '%FF',     'high-bit byte encoded');
+	is(main::urlize('a b/c'),   'a%20b%2Fc', 'mixed input');
+
+	# un_urlize: by default, '+' becomes ' ' (form-encoded). Pass the
+	# second arg true to preserve '+' literally.
+	is(main::un_urlize('a+b'),    'a b', '+ decoded as space by default');
+	is(main::un_urlize('a+b', 1), 'a+b', '+ preserved with plus-literal flag');
+	is(main::un_urlize('%20'),    ' ',   '%20 decoded');
+	is(main::un_urlize('%c3%a9'), "\xc3\xa9",
+	   'lowercase hex decoded (UTF-8 bytes for é)');
+
+	# Round-trip through a binary string.
+	for my $s ('plain', 'a b/c', "binary\x00\x01\xff",
+		   '<script>alert(1)</script>') {
+		# urlize never emits +, so the no-plus mode is safe here.
+		is(main::un_urlize(main::urlize($s), 1), $s,
+		   "round-trip preserves ".length($s)." bytes");
+		}
+};
+
+# trim — symmetric or asymmetric whitespace stripping.
+#
+# Second arg controls which end:
+#   undef/0 → both
+#       -1  → right only
+#        1  → left only
+subtest 'trim' => sub {
+	is(main::trim('  hi  '),     'hi',     'both ends by default');
+	is(main::trim('  hi  ', -1), '  hi',   '-1 strips trailing only');
+	is(main::trim('  hi  ',  1), 'hi  ',   '1 strips leading only');
+	is(main::trim('nochange'),   'nochange', 'no-op on tidy input');
+	is(main::trim(''),           '',       'empty stays empty');
+	is(main::trim("\t\nhi\r\n"), 'hi',     'tabs and newlines counted as whitespace');
+};
+
+# trunc — truncate to a "whole word" within a max length.
+#
+# Contract: if the cut lands inside a word, back up to the previous
+# whitespace; if no whitespace precedes (the first word is itself longer
+# than maxlen), return the partial first word rather than empty.
+subtest 'trunc' => sub {
+	# Early-exit when input already fits.
+	is(main::trunc('short',  99), 'short', 'no-op when input shorter than max');
+	is(main::trunc('exact5',  6), 'exact5', 'no-op when input equals max');
+
+	# Cut landed at a word boundary — keep the substring intact.
+	is(main::trunc('hello world foo', 11), 'hello world',
+	   'cut at word boundary keeps last whole word');
+	is(main::trunc('hello world',      5), 'hello',
+	   'cut at word boundary returns first whole word');
+
+	# Cut landed mid-word — back up to the previous whitespace.
+	is(main::trunc('foo bar baz', 8), 'foo bar',
+	   'mid-word cut backs up to previous whitespace');
+	is(main::trunc('a b c',  4),     'a b',
+	   'mid-word cut backs up past a one-char word');
+
+	# First word longer than maxlen and no preceding whitespace — fall
+	# back to the partial word rather than empty.
+	is(main::trunc('hellothere', 5), 'hello',
+	   'long first word with no boundary returns partial');
+	is(main::trunc('abc', 1),        'a',
+	   'maxlen=1 returns the first char when no boundary exists');
+};
+
+# indexof — first-index lookup with `eq`.
+subtest 'indexof' => sub {
+	is(main::indexof('b', 'a', 'b', 'c'),  1, 'returns 0-based index');
+	is(main::indexof('a', 'a', 'b', 'c'),  0, 'first element');
+	is(main::indexof('z', 'a', 'b', 'c'), -1, 'missing → -1');
+	is(main::indexof('a'),                -1, 'empty haystack → -1');
+	is(main::indexof('b', 'a', 'b', 'b'),  1, 'duplicates: first hit wins');
+	# Numeric needle compared stringwise (eq).
+	is(main::indexof(1, '0', '1', '2'),    1, 'numeric needle matches stringwise');
+};
+
+# indexoflc — case-insensitive variant.
+subtest 'indexoflc' => sub {
+	is(main::indexoflc('B', 'a', 'b', 'c'),  1, 'uppercase needle, lowercase haystack');
+	is(main::indexoflc('a', 'A', 'B', 'C'),  0, 'lowercase needle, uppercase haystack');
+	is(main::indexoflc('z', 'a', 'b', 'c'), -1, 'missing → -1');
+};
+
+# uniquelc — dedupe by lowercase comparison, preserving first-seen case.
+subtest 'uniquelc' => sub {
+	is_deeply([main::uniquelc('Foo', 'foo', 'FOO', 'Bar')],
+		  ['Foo', 'Bar'],
+		  'first-seen case preserved, later case-variants dropped');
+	is_deeply([main::uniquelc()], [], 'empty input → empty list');
+	is_deeply([main::uniquelc('x')], ['x'], 'single element passes through');
+};
+
+done_testing();
--- a/web-lib-funcs.pl
+++ b/web-lib-funcs.pl
@@ -624,11 +624,14 @@ sub trunc
 if (length($_[0]) <= $_[1]) {
 	return $_[0];
 	}
-my $str = substr($_[0],0,$_[1]);
-my $c;
-do {
-	$c = chop($str);
-	} while($c !~ /\S/);
+my $str = substr($_[0], 0, $_[1]);
+# If the cut landed inside a word (next char in the original is
+# non-whitespace), back the partial word out — but only when there's
+# a word boundary inside $str to back up to. If the first word is
+# longer than maxlen, return that partial word rather than empty.
+if (substr($_[0], $_[1], 1) =~ /\S/ && $str =~ /\s/) {
+	$str =~ s/\S+$//;
+	}
 $str =~ s/\s+$//;
 return $str;
 }
@@ -693,35 +696,48 @@ Check if some IPv6 address is properly formatted, and returns 1 if so.
 =cut
 sub check_ip6address
 {
-# Special case for unspecified address (analogous to 0.0.0.0 in IPv4)
-return 1 if ($_[0] eq "::");
-my @blocks = split(/:/, $_[0]);
-return 0 if (@blocks == 0 || @blocks > 8);
-
-# The address/netmask format is accepted. So we're looking for a "/" to isolate a possible netmask.
-# After that, we delete the netmask to control the address only format, but we verify whether the netmask
-# value is in [0;128].
-my $ib = $#blocks;
-my $where = index($blocks[$ib],"/");
+my $addr = $_[0];
 my $m = 0;
-if ($where != -1) {
-my $b = substr($blocks[$ib],0,$where);
-$m = substr($blocks[$ib],$where+1,length($blocks[$ib])-($where+1));
-$blocks[$ib]=$b;
-}

-# The netmask must take its value in [0;128]
-return 0 if ($m <0 || $m >128);
+# Strip an optional /N netmask before splitting. Doing this on the
+# raw string (rather than from the last split element) keeps split()'s
+# trailing-empty accounting intact for inputs like "2001:db8::/32",
+# where the netmask would otherwise hide the trailing "::" shorthand.
+if ($addr =~ s{/(\d+)\z}{}) {
+	$m = $1;
+	}
+return 0 if ($m < 0 || $m > 128);
+
+# Special case for unspecified address (analogous to 0.0.0.0 in IPv4),
+# both bare and with a netmask.
+return 1 if ($addr eq "::");
+
+my @blocks = split(/:/, $addr);
+return 0 if (@blocks == 0);
+
+# Accept the IPv4-in-IPv6 forms (RFC 4291 §2.5.5: "::ffff:N.N.N.N"
+# IPv4-mapped, and the more general "X:X:X:X:X:X:N.N.N.N"). If the
+# last block is a dotted-quad, validate the octets and count it as two
+# 16-bit groups for the overall 8-group ceiling. The leading ":" guard
+# distinguishes IPv4-tailed IPv6 from a bare IPv4 address — callers
+# like ip_match() rely on this sub returning false for "10.0.0.1".
+my $count = scalar(@blocks);
+if ($addr =~ /:/ &&
+    $blocks[-1] =~ /^(\d+)\.(\d+)\.(\d+)\.(\d+)\z/) {
+	return 0 if ($1 > 255 || $2 > 255 || $3 > 255 || $4 > 255);
+	$count++;
+	pop(@blocks);
+	}
+return 0 if ($count > 8);

 # Check the different blocks of the address : 16 bits block in hexa notation.
 # Possibility of 1 empty block or 2 if the address begins with "::".
-my $b;
 my $empty = 0;
-foreach $b (@blocks) {
+foreach my $b (@blocks) {
 	return 0 if ($b ne "" && $b !~ /^[0-9a-f]{1,4}$/i);
 	$empty++ if ($b eq "");
 	}
-return 0 if ($empty > 1 && !($_[0] =~ /^::/ && $empty == 2));
+return 0 if ($empty > 1 && !($addr =~ /^::/ && $empty == 2));
 return 1;
 }

@@ -12471,9 +12487,9 @@ sub split_quoted_string
 {
 my ($str) = @_;
 my @rv;
-while($str =~ /^"([^"]*)"\s*([\000-\377]*)$/ ||
-      $str =~ /^'([^']*)'\s*([\000-\377]*)$/ ||
-      $str =~ /^(\S+)\s*([\000-\377]*)$/) {
+while($str =~ /^\s*"([^"]*)"\s*([\000-\377]*)$/ ||
+      $str =~ /^\s*'([^']*)'\s*([\000-\377]*)$/ ||
+      $str =~ /^\s*(\S+)\s*([\000-\377]*)$/) {
 	push(@rv, $1);
 	$str = $2;
 	}