diff --git a/t/web-lib-funcs-ip.t b/t/web-lib-funcs-ip.t new file mode 100644 index 000000000..7d1da8658 --- /dev/null +++ b/t/web-lib-funcs-ip.t @@ -0,0 +1,129 @@ +#!/usr/bin/perl +# Unit tests for IP-address validators / classifiers in web-lib-funcs.pl. +# +# Pure subs, no globals to set up. miniserv.pl carries its own copies of +# check_ipaddress / check_ip6address; those have their own tests in +# t/miniserv.t. This file exclusively exercises the web-lib-funcs versions, +# which have a slightly different IPv6 contract — the web-lib-funcs version +# accepts an /N netmask suffix. + +use strict; +use warnings; +use Test::More; +use File::Basename qw(dirname); +use File::Spec; + +my $script = File::Spec->rel2abs( + File::Spec->catfile(dirname(__FILE__), '..', 'web-lib-funcs.pl')); +require $script; + +# check_ipaddress — strict dotted-quad IPv4. +subtest 'check_ipaddress' => sub { + ok( main::check_ipaddress('1.2.3.4'), 'plain IPv4 accepted'); + ok( main::check_ipaddress('0.0.0.0'), 'all-zero accepted'); + ok( main::check_ipaddress('255.255.255.255'), 'all-ones accepted'); + + ok(!main::check_ipaddress('256.0.0.1'), 'octet > 255 rejected'); + ok(!main::check_ipaddress('1.2.3'), 'too-few octets rejected'); + ok(!main::check_ipaddress('1.2.3.4.5'), 'too-many octets rejected'); + ok(!main::check_ipaddress('1.2.3.x'), 'non-numeric octet rejected'); + ok(!main::check_ipaddress(''), 'empty rejected'); + ok(!main::check_ipaddress('not an ip'), 'garbage rejected'); + ok(!main::check_ipaddress(' 1.2.3.4'), 'leading whitespace rejected'); + ok(!main::check_ipaddress('1.2.3.4 '), 'trailing whitespace rejected'); +}; + +# check_ip6address — IPv6, optionally with /N netmask suffix. +# +# Unlike the docstring, this sub also accepts an address/netmask form, but +# only when the `::` shorthand is at the *start* of the address — see the +# bug notes below. Pin current behaviour so a future fix shows up loudly. +subtest 'check_ip6address' => sub { + ok( main::check_ip6address('::'), 'unspecified accepted'); + ok( main::check_ip6address('::1'), 'loopback accepted'); + ok( main::check_ip6address('2001:db8::1'), 'compressed form accepted'); + ok( main::check_ip6address('1:2:3:4:5:6:7:8'), 'full eight-block form accepted'); + ok( main::check_ip6address('2001:db8::'), 'trailing :: accepted (no netmask)'); + + # Netmask suffix. + ok( main::check_ip6address('::1/64'), 'address/netmask accepted when :: is at start'); + ok(!main::check_ip6address('::1/200'), 'netmask > 128 rejected'); + + # BUG: a netmask suffix combined with a trailing `::` shorthand + # fails. The validator's empty-block accounting is thrown off because + # split() no longer trims trailing empties when the final element is + # the netmask. Real-world example: "2001:db8::/32" — a perfectly + # valid CIDR — is rejected. + ok(!main::check_ip6address('2001:db8::/32'), + 'BUG: valid CIDR with trailing :: rejected by validator'); + + # BUG: IPv4-mapped IPv6 (RFC 4291 §2.5.5.2) is rejected because the + # per-block regex requires hex digits. Notably, is_non_public_ipaddress + # has an unreachable ::ffff:N.N.N.N branch downstream of this check. + ok(!main::check_ip6address('::ffff:10.0.0.1'), + 'BUG: IPv4-mapped IPv6 rejected by validator'); + + ok(!main::check_ip6address('gggg::1'), 'non-hex rejected'); + ok(!main::check_ip6address('1:2:3:4:5:6:7:8:9'), 'too many groups rejected'); + ok(!main::check_ip6address('::1::2'), 'multiple :: rejected'); + ok(!main::check_ip6address('not an addr'), 'garbage rejected'); +}; + +# is_non_public_ipaddress — RFC1918 + reserved-range classifier. +# +# Returns 1 for: 0.x, 10.x, 127.x, 169.254/16, 172.16/12, 192.168/16, +# 100.64/10 (CGNAT), 224+/4 (multicast/reserved); IPv6 loopback, link-local +# (fe80–febf), ULA (fc00/fd00), and ::ffff:N.N.N.N when the wrapped IPv4 +# is itself non-public. +subtest 'is_non_public_ipaddress (IPv4)' => sub { + # Private / reserved. + ok( main::is_non_public_ipaddress('10.0.0.1'), '10/8 private'); + ok( main::is_non_public_ipaddress('172.16.0.1'), '172.16/12 low bound'); + ok( main::is_non_public_ipaddress('172.31.255.255'), '172.16/12 high bound'); + ok( main::is_non_public_ipaddress('192.168.1.1'), '192.168/16 private'); + ok( main::is_non_public_ipaddress('127.0.0.1'), '127/8 loopback'); + ok( main::is_non_public_ipaddress('169.254.1.1'), '169.254/16 link-local'); + ok( main::is_non_public_ipaddress('0.1.2.3'), '0/8 reserved'); + ok( main::is_non_public_ipaddress('100.64.0.1'), 'CGNAT 100.64/10 low'); + ok( main::is_non_public_ipaddress('100.127.255.255'), 'CGNAT 100.64/10 high'); + ok( main::is_non_public_ipaddress('224.0.0.1'), '224+ multicast / reserved'); + ok( main::is_non_public_ipaddress('255.255.255.255'), '255+ reserved'); + + # Just-outside boundaries. + ok(!main::is_non_public_ipaddress('11.0.0.1'), '11/8 is public'); + ok(!main::is_non_public_ipaddress('172.15.255.255'), '172.15 below private block'); + ok(!main::is_non_public_ipaddress('172.32.0.0'), '172.32 above private block'); + ok(!main::is_non_public_ipaddress('192.167.0.0'), '192.167 is public'); + ok(!main::is_non_public_ipaddress('169.253.0.0'), '169.253 is public'); + ok(!main::is_non_public_ipaddress('100.63.255.255'), 'just below CGNAT'); + ok(!main::is_non_public_ipaddress('100.128.0.0'), 'just above CGNAT'); + + # Plainly public. + ok(!main::is_non_public_ipaddress('8.8.8.8'), 'public DNS resolver'); + ok(!main::is_non_public_ipaddress('1.1.1.1'), 'public DNS resolver'); +}; + +subtest 'is_non_public_ipaddress (IPv6)' => sub { + ok( main::is_non_public_ipaddress('::1'), 'loopback'); + ok( main::is_non_public_ipaddress('::'), 'unspecified'); + ok( main::is_non_public_ipaddress('fe80::1'), 'link-local (fe80)'); + ok( main::is_non_public_ipaddress('feb0::1'), 'link-local (feb0)'); + ok( main::is_non_public_ipaddress('fc00::1'), 'ULA (fc00)'); + ok( main::is_non_public_ipaddress('fd12::1'), 'ULA (fd12)'); + + # IPv4-mapped (::ffff:N.N.N.N) is meant to recurse on the embedded + # IPv4, but the branch is unreachable: check_ip6address rejects all + # ::ffff:N.N.N.N inputs (see BUG note in check_ip6address subtest). + # Both calls below currently return 0 — pin that. + ok(!main::is_non_public_ipaddress('::ffff:10.0.0.1'), + 'BUG: ::ffff: falsely reported as public (validator rejects input)'); + ok(!main::is_non_public_ipaddress('::ffff:8.8.8.8'), + '::ffff: reported as public'); + + # Plainly public IPv6. + ok(!main::is_non_public_ipaddress('2001:db8::1'), '2001:db8 is public per classifier'); + ok(!main::is_non_public_ipaddress('2606:4700::1111'), + 'global unicast address is public'); +}; + +done_testing(); diff --git a/t/web-lib-funcs-paths.t b/t/web-lib-funcs-paths.t new file mode 100644 index 000000000..d1d542bd4 --- /dev/null +++ b/t/web-lib-funcs-paths.t @@ -0,0 +1,187 @@ +#!/usr/bin/perl +# Unit tests for path / URL / shell-quote helpers in web-lib-funcs.pl. +# +# Bare `require` loads the library; subs covered here either touch no +# globals or use ones we set up locally ($gconfig{os_type} for quote_path, +# %month_to_number_map / %number_to_month_map for the month helpers). +# In production those maps are populated by web-lib.pl; we set them by hand +# to keep tests independent of that initialiser. + +use strict; +use warnings; +use Test::More; +use File::Basename qw(dirname); +use File::Spec; + +my $script = File::Spec->rel2abs( + File::Spec->catfile(dirname(__FILE__), '..', 'web-lib-funcs.pl')); +require $script; + +# simplify_path — strip ./ and resolve ../, refusing to escape root. +# +# Contract: +# - returns an absolute path (always leading "/") +# - returns undef when .. would pop above the root +# - "." and "" inputs both return "/" +# - a relative input ("foo") is promoted to "/foo" +# +# This is the web-lib-funcs version; miniserv.pl ships an independent +# implementation (`miniserv::simplify_path`) with a different signature +# (bogus-flag via aliased arg). Both are exercised by their own tests. +subtest 'simplify_path' => sub { + is(main::simplify_path('/foo/bar'), '/foo/bar', 'plain path'); + is(main::simplify_path('/foo/./bar'), '/foo/bar', '. collapses'); + is(main::simplify_path('/foo/../bar'), '/bar', '.. pops'); + is(main::simplify_path('/a/b/c/../../d'), '/a/d', + 'multiple .. pop the right number of segments'); + is(main::simplify_path('//foo///bar//'), '/foo/bar', 'repeated slashes collapse'); + is(main::simplify_path('/foo/'), '/foo', 'trailing slash dropped'); + + # Adversarial: escaping root must fail closed. + is(main::simplify_path('/../etc/passwd'), undef, '.. above root → undef'); + is(main::simplify_path('/foo/../../bar'), undef, 'overshoot → undef'); + + # Surprising-but-current behaviour: empty / root / relative inputs. + is(main::simplify_path(''), '/', 'empty input → /'); + is(main::simplify_path('/'), '/', 'root passes through'); + is(main::simplify_path('foo'), '/foo', 'relative input is promoted to absolute'); +}; + +# parse_http_url — absolute and base-relative URL parsing. +# +# Contract on success: returns (host, port, page, ssl, [user], [pass]). +# SSL mode 0=http, 1=https, 2=ftp. +subtest 'parse_http_url' => sub { + my @abs = main::parse_http_url('http://example.com/foo'); + is_deeply([@abs[0..3]], ['example.com', 80, '/foo', 0], + 'plain http URL'); + + my @https = main::parse_http_url('https://example.com:8443/bar'); + is_deeply([@https[0..3]], ['example.com', 8443, '/bar', 1], + 'https with explicit port and ssl=1'); + + my @ftp = main::parse_http_url('ftp://host/x'); + is_deeply([@ftp[0..3]], ['host', 21, '/x', 2], + 'ftp scheme → port 21 and ssl=2'); + + # Userinfo is captured as elements 4 and 5. + my @auth = main::parse_http_url('http://user:pass@example.com:81/foo'); + is_deeply(\@auth, ['example.com', 81, '/foo', 0, 'user', 'pass'], + 'user:pass extracted from authority'); + + # Bracketed IPv6 host. + my @v6 = main::parse_http_url('http://[2001:db8::1]:8080/foo'); + is_deeply([@v6[0..3]], ['2001:db8::1', 8080, '/foo', 0], + 'bracketed IPv6 host parsed, brackets stripped'); + + # Missing path defaults to "/". + my @noslash = main::parse_http_url('http://example.com'); + is($noslash[2], '/', 'missing path defaults to /'); + + # no_default_port suppresses 80/443/21 substitution. + my @nd = main::parse_http_url('http://example.com/x', undef, undef, + undef, undef, undef, undef, 1); + is($nd[1], undef, 'no_default_port leaves port undef'); + + # Relative URL with a base. + my @rs = main::parse_http_url('/page', 'host', 80, '/old/', 0); + is_deeply([@rs[0..3]], ['host', 80, '/page', 0], + 'server-absolute relative URL uses base host/port'); + + my @rd = main::parse_http_url('rel.html', 'host', 80, '/base/cur.html', 0); + is_deeply([@rd[0..3]], ['host', 80, '/base/rel.html', 0], + 'directory-relative URL resolves against base page directory'); + + # Unparseable input with no base → undef. + is(main::parse_http_url('not a url'), undef, + 'garbage with no base → undef'); +}; + +# split_quoted_string — shell-ish tokenizer. +# +# Each iteration matches one of: +# "..." | '...' | \S+ +# followed by trailing whitespace, then loops on the remainder. +subtest 'split_quoted_string' => sub { + is_deeply([main::split_quoted_string('one two three')], + ['one', 'two', 'three'], + 'bare words'); + + is_deeply([main::split_quoted_string('"hello world" foo "bar baz" qux')], + ['hello world', 'foo', 'bar baz', 'qux'], + 'double-quoted segments preserve internal spaces'); + + is_deeply([main::split_quoted_string(q{'a b' c 'd e'})], + ['a b', 'c', 'd e'], + 'single-quoted segments work the same way'); + + is_deeply([main::split_quoted_string('')], [], + 'empty input → empty list'); + + # Unbalanced quote: the implementation falls through to the \S+ + # branch and emits the leftover with the quote attached. Pin this. + is_deeply([main::split_quoted_string('unbalanced "quote')], + ['unbalanced', '"quote'], + 'unterminated quote is taken as a bare token'); + + # Pure-whitespace input drops everything because no branch tolerates + # a leading-whitespace prefix. Surface this as current behaviour — + # arguably a bug, but documenting it here protects us from a silent + # behaviour change. + is_deeply([main::split_quoted_string(' spaces between ')], [], + 'leading whitespace short-circuits the tokenizer (current behaviour)'); +}; + +# quote_path — OS-dependent shell quoting. +# +# On Windows or for Windows-style absolute paths, wraps in double quotes. +# Everywhere else, uses quotemeta (which escapes every non-word char). +subtest 'quote_path' => sub { + no warnings 'once'; + local %main::gconfig = ('os_type' => 'linux'); + + # quotemeta escapes /, space, etc. + is(main::quote_path('/a b/c'), '\\/a\\ b\\/c', + 'unix path uses quotemeta'); + is(main::quote_path('plain'), 'plain', + 'all-word characters need no escaping'); + + # Windows-style path → "" wrapping, even when os_type isn't windows. + is(main::quote_path('c:/Users/x'), '"c:/Users/x"', + 'drive-letter prefix wraps in double quotes'); + + # os_type=windows forces double-quote wrap. + local %main::gconfig = ('os_type' => 'windows'); + is(main::quote_path('/etc/passwd'), '"/etc/passwd"', + 'os_type=windows wraps in double quotes regardless of path shape'); +}; + +# month_to_number / number_to_month — three-letter month name <-> 0-based index. +# +# In production these maps are populated by web-lib.pl; in tests we set +# them up locally so the suite doesn't depend on web-lib.pl loading. +subtest 'month_to_number / number_to_month' => sub { + no warnings 'once'; + local %main::month_to_number_map = ( + 'jan' => 0, 'feb' => 1, 'mar' => 2, 'apr' => 3, + 'may' => 4, 'jun' => 5, 'jul' => 6, 'aug' => 7, + 'sep' => 8, 'oct' => 9, 'nov' => 10, 'dec' => 11, + ); + local %main::number_to_month_map = reverse %main::month_to_number_map; + + is(main::month_to_number('Jan'), 0, 'Jan → 0'); + is(main::month_to_number('December'), 11, 'first three chars taken'); + is(main::month_to_number('FEB'), 1, 'case-insensitive'); + is(main::month_to_number('xyz'), undef, 'unknown returns undef'); + + is(main::number_to_month(0), 'Jan', '0 → Jan (ucfirst applied)'); + is(main::number_to_month(11), 'Dec', '11 → Dec'); + + # Round-trip every month. + for my $m (qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)) { + is(main::number_to_month(main::month_to_number($m)), $m, + "round-trip $m"); + } +}; + +done_testing(); diff --git a/t/web-lib-funcs-strings.t b/t/web-lib-funcs-strings.t new file mode 100644 index 000000000..01639045b --- /dev/null +++ b/t/web-lib-funcs-strings.t @@ -0,0 +1,235 @@ +#!/usr/bin/perl +# Unit tests for the pure string helpers in web-lib-funcs.pl. +# +# web-lib-funcs.pl is a pure library (no `unless(caller)` guard), so a bare +# `require` loads it without side effects. Helpers covered here touch no +# globals beyond their args, so no stubbing or %gconfig setup is needed. +# +# Assertions pin the current contract — encoded byte shape, round-trip +# identity, structural invariants — not the prettiest possible output. + +use strict; +use warnings; +use Test::More; +use File::Basename qw(dirname); +use File::Spec; + +my $script = File::Spec->rel2abs( + File::Spec->catfile(dirname(__FILE__), '..', 'web-lib-funcs.pl')); +require $script; + +# html_escape +# +# Always escapes &, <, >, ", ', =. The optional $nodblamp flag uses a +# lookahead so existing entity references survive untouched; without it, +# every & becomes &, including already-escaped input. +subtest 'html_escape' => sub { + is(main::html_escape('&"\'='), + '<b>&"'=', + 'all six dangerous characters are escaped'); + + is(main::html_escape(undef), '', 'undef input → empty string'); + is(main::html_escape(''), '', 'empty input → empty string'); + is(main::html_escape('plain text 123'), 'plain text 123', + 'whitespace and alphanumerics pass through'); + + # Default mode double-escapes existing entities — this is the current + # contract; the nodblamp flag opts into the smarter behaviour. + is(main::html_escape('&'), '&amp;', 'default mode double-escapes &'); + is(main::html_escape('&', 1), '&', 'nodblamp preserves existing &'); + is(main::html_escape('A', 1), 'A', 'nodblamp preserves numeric entity'); + # Note: nodblamp's lookahead matches any &; as an entity, so + # made-up names like &x; are treated as entities and not re-escaped. + is(main::html_escape('&x;', 1), '&x;', 'nodblamp preserves arbitrary &word; shape'); + is(main::html_escape('& ', 1), '& ', 'nodblamp escapes lone &'); + + # Adversarial XSS payload — none of the dangerous chars survive raw. + my $xss = main::html_escape(q{'=&}); + unlike($xss, qr/[<>"'=]/, 'no raw HTML-significant characters remain'); + unlike($xss, qr/&(?!(amp|lt|gt|quot|#39|#61);)/, + 'every & starts a known entity'); +}; + +# html_unescape — inverse of html_escape for the canonical entity set. +subtest 'html_unescape' => sub { + is(main::html_unescape('<b>&"'='), + '&"\'=', 'canonical entity set round-trips'); + is(main::html_unescape('a b'), 'a b', + '  decodes to a regular space'); + is(main::html_unescape(undef), '', 'undef → empty'); + is(main::html_unescape(''), '', 'empty → empty'); + is(main::html_unescape('no entities here'), 'no entities here', + 'plain text passes through unchanged'); +}; + +# html_strip — remove tags, optionally replacing with a sentinel. +subtest 'html_strip' => sub { + is(main::html_strip('hello'), 'hello', + 'simple tags removed'); + is(main::html_strip('y'), 'y', + 'attribute-bearing tag removed'); + # Quoted attribute that contains a >, which would otherwise break a + # naive regexp — the implementation accounts for this. + is(main::html_strip('y'), 'y', + 'quoted > inside attribute does not end tag early'); + is(main::html_strip('plain text'), 'plain text', + 'plain text untouched'); + is(main::html_strip('x', '|'), '|x|', + 'replacement string substituted for each tag'); +}; + +# quote_escape — only ' and " (and lone &) are escaped; existing entities +# (&xxx; or &#NN;) are preserved. +subtest 'quote_escape' => sub { + is(main::quote_escape(undef), '', 'undef → empty'); + + is(main::quote_escape('a&b'), 'a&b', 'lone & escaped'); + is(main::quote_escape('a&'), 'a&', 'trailing & escaped'); + is(main::quote_escape('a&b'), 'a&b', 'existing & preserved'); + is(main::quote_escape('a'b'), 'a'b', 'numeric entity preserved'); + + is(main::quote_escape(q{a"b'c}), 'a"b'c', + 'both quote styles escaped by default'); + is(main::quote_escape(q{a"b'c}, q{"}), q{a"b'c}, + 'only-quote="\"" escapes only double quotes'); + is(main::quote_escape(q{a"b'c}, q{'}), q{a"b'c}, + 'only-quote="\'" escapes only single quotes'); +}; + +# quote_literal_escape — escape for inclusion in a Perl string literal. +subtest 'quote_literal_escape' => sub { + is(main::quote_literal_escape(undef), '', 'undef → empty'); + is(main::quote_literal_escape(''), '', 'empty → empty'); + + # Default (single-quoted target): only \ and ' need escaping. + is(main::quote_literal_escape(q{it's}), q{it\'s}, 'single quote escaped'); + is(main::quote_literal_escape(q{a\\b}), q{a\\\\b}, 'backslash doubled'); + is(main::quote_literal_escape(q{a"b$c@d}), q{a"b$c@d}, + 'double-quote / sigils NOT escaped in single-quoted target'); + + # Double-quoted target: also escape ", $, @ (because they interpolate). + is(main::quote_literal_escape(q{a"b$c@d}, q{"}), + q{a\"b\$c\@d}, + '" $ @ all escaped in double-quoted target'); + is(main::quote_literal_escape(q{a\\b}, q{"}), q{a\\\\b}, + 'backslash doubled in double-quoted target too'); +}; + +# quote_javascript — hex-escape the unsafe characters for a JS string literal. +subtest 'quote_javascript' => sub { + is(main::quote_javascript(q{a"b}), 'a\x22b', 'double quote → \x22'); + is(main::quote_javascript(q{a'b}), 'a\x27b', 'single quote → \x27'); + is(main::quote_javascript('ac'), 'a\x3cb\x3ec', '< and > escaped'); + is(main::quote_javascript('a&b'), 'a\x26b', '& escaped'); + is(main::quote_javascript('a\\b'), 'a\x5cb', 'backslash escaped'); + is(main::quote_javascript('plain text 123'), 'plain text 123', + 'safe characters pass through'); +}; + +# urlize / un_urlize — percent-encoding round-trip. +subtest 'urlize / un_urlize' => sub { + # urlize encodes anything that is not [A-Za-z0-9]. + is(main::urlize('abc123'), 'abc123', 'alphanumerics pass through'); + is(main::urlize(' '), '%20', 'space encoded'); + is(main::urlize('/'), '%2F', 'slash encoded'); + is(main::urlize("\n"), '%0A', 'newline encoded'); + is(main::urlize(chr(0xff)), '%FF', 'high-bit byte encoded'); + is(main::urlize('a b/c'), 'a%20b%2Fc', 'mixed input'); + + # un_urlize: by default, '+' becomes ' ' (form-encoded). Pass the + # second arg true to preserve '+' literally. + is(main::un_urlize('a+b'), 'a b', '+ decoded as space by default'); + is(main::un_urlize('a+b', 1), 'a+b', '+ preserved with plus-literal flag'); + is(main::un_urlize('%20'), ' ', '%20 decoded'); + is(main::un_urlize('%c3%a9'), "\xc3\xa9", + 'lowercase hex decoded (UTF-8 bytes for é)'); + + # Round-trip through a binary string. + for my $s ('plain', 'a b/c', "binary\x00\x01\xff", + '') { + # urlize never emits +, so the no-plus mode is safe here. + is(main::un_urlize(main::urlize($s), 1), $s, + "round-trip preserves ".length($s)." bytes"); + } +}; + +# trim — symmetric or asymmetric whitespace stripping. +# +# Second arg controls which end: +# undef/0 → both +# -1 → right only +# 1 → left only +subtest 'trim' => sub { + is(main::trim(' hi '), 'hi', 'both ends by default'); + is(main::trim(' hi ', -1), ' hi', '-1 strips trailing only'); + is(main::trim(' hi ', 1), 'hi ', '1 strips leading only'); + is(main::trim('nochange'), 'nochange', 'no-op on tidy input'); + is(main::trim(''), '', 'empty stays empty'); + is(main::trim("\t\nhi\r\n"), 'hi', 'tabs and newlines counted as whitespace'); +}; + +# trunc — truncate to a "whole word" within a max length. +# +# The implementation cuts at maxlen, then pops one char unconditionally +# and continues popping only while the popped char is whitespace; trailing +# whitespace is then trimmed. This pins current behaviour, which has two +# notable edge cases worth flagging: +# +# * `trunc("hello world foo", 11)` returns "hello worl", losing the +# final 'd' even though substr(0, 11) cleanly ends on a word boundary. +# * `trunc("hello world", 5)` returns "hell" rather than "hello". +# +# These pass today; a future fix to trunc will break these and prompt +# re-review. +subtest 'trunc' => sub { + # Early-exit when input already fits. + is(main::trunc('short', 99), 'short', 'no-op when input shorter than max'); + is(main::trunc('exact5', 6), 'exact5', 'no-op when input equals max'); + + # Truncation lands at a partial word — pops the partial word back to + # whitespace, then trims trailing whitespace. + is(main::trunc('a b c', 4), 'a', 'cuts back through partial word'); + # substr(0,8) = "foo bar ", pop one (always), pop "r" — non-ws so stop. + # Result: "foo ba" (last word "baz" partial → chopped one char short). + is(main::trunc('foo bar baz', 8), 'foo ba', + 'partial word loses one extra char (current behaviour)'); + + # Edge case: substr cleanly ends on a word boundary. Current behaviour + # still pops one char; pin it. + is(main::trunc('hello world foo', 11), 'hello worl', + 'always pops at least one char even at word boundary (current behaviour)'); + is(main::trunc('hello world', 5), 'hell', + 'always pops at least one char (current behaviour)'); + + # Truncating to 1 leaves nothing after the mandatory pop. + is(main::trunc('abc', 1), '', 'maxlen=1 returns empty'); +}; + +# indexof — first-index lookup with `eq`. +subtest 'indexof' => sub { + is(main::indexof('b', 'a', 'b', 'c'), 1, 'returns 0-based index'); + is(main::indexof('a', 'a', 'b', 'c'), 0, 'first element'); + is(main::indexof('z', 'a', 'b', 'c'), -1, 'missing → -1'); + is(main::indexof('a'), -1, 'empty haystack → -1'); + is(main::indexof('b', 'a', 'b', 'b'), 1, 'duplicates: first hit wins'); + # Numeric needle compared stringwise (eq). + is(main::indexof(1, '0', '1', '2'), 1, 'numeric needle matches stringwise'); +}; + +# indexoflc — case-insensitive variant. +subtest 'indexoflc' => sub { + is(main::indexoflc('B', 'a', 'b', 'c'), 1, 'uppercase needle, lowercase haystack'); + is(main::indexoflc('a', 'A', 'B', 'C'), 0, 'lowercase needle, uppercase haystack'); + is(main::indexoflc('z', 'a', 'b', 'c'), -1, 'missing → -1'); +}; + +# uniquelc — dedupe by lowercase comparison, preserving first-seen case. +subtest 'uniquelc' => sub { + is_deeply([main::uniquelc('Foo', 'foo', 'FOO', 'Bar')], + ['Foo', 'Bar'], + 'first-seen case preserved, later case-variants dropped'); + is_deeply([main::uniquelc()], [], 'empty input → empty list'); + is_deeply([main::uniquelc('x')], ['x'], 'single element passes through'); +}; + +done_testing();