mirror of
https://github.com/webmin/webmin.git
synced 2026-06-05 04:40:24 +01:00
Add ip, paths, string tests for web-lib-funcs
This commit is contained in:
129
t/web-lib-funcs-ip.t
Normal file
129
t/web-lib-funcs-ip.t
Normal file
@@ -0,0 +1,129 @@
|
||||
#!/usr/bin/perl
|
||||
# Unit tests for IP-address validators / classifiers in web-lib-funcs.pl.
|
||||
#
|
||||
# Pure subs, no globals to set up. miniserv.pl carries its own copies of
|
||||
# check_ipaddress / check_ip6address; those have their own tests in
|
||||
# t/miniserv.t. This file exclusively exercises the web-lib-funcs versions,
|
||||
# which have a slightly different IPv6 contract — the web-lib-funcs version
|
||||
# accepts an /N netmask suffix.
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use Test::More;
|
||||
use File::Basename qw(dirname);
|
||||
use File::Spec;
|
||||
|
||||
my $script = File::Spec->rel2abs(
|
||||
File::Spec->catfile(dirname(__FILE__), '..', 'web-lib-funcs.pl'));
|
||||
require $script;
|
||||
|
||||
# check_ipaddress — strict dotted-quad IPv4.
|
||||
subtest 'check_ipaddress' => sub {
|
||||
ok( main::check_ipaddress('1.2.3.4'), 'plain IPv4 accepted');
|
||||
ok( main::check_ipaddress('0.0.0.0'), 'all-zero accepted');
|
||||
ok( main::check_ipaddress('255.255.255.255'), 'all-ones accepted');
|
||||
|
||||
ok(!main::check_ipaddress('256.0.0.1'), 'octet > 255 rejected');
|
||||
ok(!main::check_ipaddress('1.2.3'), 'too-few octets rejected');
|
||||
ok(!main::check_ipaddress('1.2.3.4.5'), 'too-many octets rejected');
|
||||
ok(!main::check_ipaddress('1.2.3.x'), 'non-numeric octet rejected');
|
||||
ok(!main::check_ipaddress(''), 'empty rejected');
|
||||
ok(!main::check_ipaddress('not an ip'), 'garbage rejected');
|
||||
ok(!main::check_ipaddress(' 1.2.3.4'), 'leading whitespace rejected');
|
||||
ok(!main::check_ipaddress('1.2.3.4 '), 'trailing whitespace rejected');
|
||||
};
|
||||
|
||||
# check_ip6address — IPv6, optionally with /N netmask suffix.
|
||||
#
|
||||
# Unlike the docstring, this sub also accepts an address/netmask form, but
|
||||
# only when the `::` shorthand is at the *start* of the address — see the
|
||||
# bug notes below. Pin current behaviour so a future fix shows up loudly.
|
||||
subtest 'check_ip6address' => sub {
|
||||
ok( main::check_ip6address('::'), 'unspecified accepted');
|
||||
ok( main::check_ip6address('::1'), 'loopback accepted');
|
||||
ok( main::check_ip6address('2001:db8::1'), 'compressed form accepted');
|
||||
ok( main::check_ip6address('1:2:3:4:5:6:7:8'), 'full eight-block form accepted');
|
||||
ok( main::check_ip6address('2001:db8::'), 'trailing :: accepted (no netmask)');
|
||||
|
||||
# Netmask suffix.
|
||||
ok( main::check_ip6address('::1/64'), 'address/netmask accepted when :: is at start');
|
||||
ok(!main::check_ip6address('::1/200'), 'netmask > 128 rejected');
|
||||
|
||||
# BUG: a netmask suffix combined with a trailing `::` shorthand
|
||||
# fails. The validator's empty-block accounting is thrown off because
|
||||
# split() no longer trims trailing empties when the final element is
|
||||
# the netmask. Real-world example: "2001:db8::/32" — a perfectly
|
||||
# valid CIDR — is rejected.
|
||||
ok(!main::check_ip6address('2001:db8::/32'),
|
||||
'BUG: valid CIDR with trailing :: rejected by validator');
|
||||
|
||||
# BUG: IPv4-mapped IPv6 (RFC 4291 §2.5.5.2) is rejected because the
|
||||
# per-block regex requires hex digits. Notably, is_non_public_ipaddress
|
||||
# has an unreachable ::ffff:N.N.N.N branch downstream of this check.
|
||||
ok(!main::check_ip6address('::ffff:10.0.0.1'),
|
||||
'BUG: IPv4-mapped IPv6 rejected by validator');
|
||||
|
||||
ok(!main::check_ip6address('gggg::1'), 'non-hex rejected');
|
||||
ok(!main::check_ip6address('1:2:3:4:5:6:7:8:9'), 'too many groups rejected');
|
||||
ok(!main::check_ip6address('::1::2'), 'multiple :: rejected');
|
||||
ok(!main::check_ip6address('not an addr'), 'garbage rejected');
|
||||
};
|
||||
|
||||
# is_non_public_ipaddress — RFC1918 + reserved-range classifier.
|
||||
#
|
||||
# Returns 1 for: 0.x, 10.x, 127.x, 169.254/16, 172.16/12, 192.168/16,
|
||||
# 100.64/10 (CGNAT), 224+/4 (multicast/reserved); IPv6 loopback, link-local
|
||||
# (fe80–febf), ULA (fc00/fd00), and ::ffff:N.N.N.N when the wrapped IPv4
|
||||
# is itself non-public.
|
||||
subtest 'is_non_public_ipaddress (IPv4)' => sub {
|
||||
# Private / reserved.
|
||||
ok( main::is_non_public_ipaddress('10.0.0.1'), '10/8 private');
|
||||
ok( main::is_non_public_ipaddress('172.16.0.1'), '172.16/12 low bound');
|
||||
ok( main::is_non_public_ipaddress('172.31.255.255'), '172.16/12 high bound');
|
||||
ok( main::is_non_public_ipaddress('192.168.1.1'), '192.168/16 private');
|
||||
ok( main::is_non_public_ipaddress('127.0.0.1'), '127/8 loopback');
|
||||
ok( main::is_non_public_ipaddress('169.254.1.1'), '169.254/16 link-local');
|
||||
ok( main::is_non_public_ipaddress('0.1.2.3'), '0/8 reserved');
|
||||
ok( main::is_non_public_ipaddress('100.64.0.1'), 'CGNAT 100.64/10 low');
|
||||
ok( main::is_non_public_ipaddress('100.127.255.255'), 'CGNAT 100.64/10 high');
|
||||
ok( main::is_non_public_ipaddress('224.0.0.1'), '224+ multicast / reserved');
|
||||
ok( main::is_non_public_ipaddress('255.255.255.255'), '255+ reserved');
|
||||
|
||||
# Just-outside boundaries.
|
||||
ok(!main::is_non_public_ipaddress('11.0.0.1'), '11/8 is public');
|
||||
ok(!main::is_non_public_ipaddress('172.15.255.255'), '172.15 below private block');
|
||||
ok(!main::is_non_public_ipaddress('172.32.0.0'), '172.32 above private block');
|
||||
ok(!main::is_non_public_ipaddress('192.167.0.0'), '192.167 is public');
|
||||
ok(!main::is_non_public_ipaddress('169.253.0.0'), '169.253 is public');
|
||||
ok(!main::is_non_public_ipaddress('100.63.255.255'), 'just below CGNAT');
|
||||
ok(!main::is_non_public_ipaddress('100.128.0.0'), 'just above CGNAT');
|
||||
|
||||
# Plainly public.
|
||||
ok(!main::is_non_public_ipaddress('8.8.8.8'), 'public DNS resolver');
|
||||
ok(!main::is_non_public_ipaddress('1.1.1.1'), 'public DNS resolver');
|
||||
};
|
||||
|
||||
subtest 'is_non_public_ipaddress (IPv6)' => sub {
|
||||
ok( main::is_non_public_ipaddress('::1'), 'loopback');
|
||||
ok( main::is_non_public_ipaddress('::'), 'unspecified');
|
||||
ok( main::is_non_public_ipaddress('fe80::1'), 'link-local (fe80)');
|
||||
ok( main::is_non_public_ipaddress('feb0::1'), 'link-local (feb0)');
|
||||
ok( main::is_non_public_ipaddress('fc00::1'), 'ULA (fc00)');
|
||||
ok( main::is_non_public_ipaddress('fd12::1'), 'ULA (fd12)');
|
||||
|
||||
# IPv4-mapped (::ffff:N.N.N.N) is meant to recurse on the embedded
|
||||
# IPv4, but the branch is unreachable: check_ip6address rejects all
|
||||
# ::ffff:N.N.N.N inputs (see BUG note in check_ip6address subtest).
|
||||
# Both calls below currently return 0 — pin that.
|
||||
ok(!main::is_non_public_ipaddress('::ffff:10.0.0.1'),
|
||||
'BUG: ::ffff:<private> falsely reported as public (validator rejects input)');
|
||||
ok(!main::is_non_public_ipaddress('::ffff:8.8.8.8'),
|
||||
'::ffff:<public> reported as public');
|
||||
|
||||
# Plainly public IPv6.
|
||||
ok(!main::is_non_public_ipaddress('2001:db8::1'), '2001:db8 is public per classifier');
|
||||
ok(!main::is_non_public_ipaddress('2606:4700::1111'),
|
||||
'global unicast address is public');
|
||||
};
|
||||
|
||||
done_testing();
|
||||
187
t/web-lib-funcs-paths.t
Normal file
187
t/web-lib-funcs-paths.t
Normal file
@@ -0,0 +1,187 @@
|
||||
#!/usr/bin/perl
|
||||
# Unit tests for path / URL / shell-quote helpers in web-lib-funcs.pl.
|
||||
#
|
||||
# Bare `require` loads the library; subs covered here either touch no
|
||||
# globals or use ones we set up locally ($gconfig{os_type} for quote_path,
|
||||
# %month_to_number_map / %number_to_month_map for the month helpers).
|
||||
# In production those maps are populated by web-lib.pl; we set them by hand
|
||||
# to keep tests independent of that initialiser.
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use Test::More;
|
||||
use File::Basename qw(dirname);
|
||||
use File::Spec;
|
||||
|
||||
my $script = File::Spec->rel2abs(
|
||||
File::Spec->catfile(dirname(__FILE__), '..', 'web-lib-funcs.pl'));
|
||||
require $script;
|
||||
|
||||
# simplify_path — strip ./ and resolve ../, refusing to escape root.
|
||||
#
|
||||
# Contract:
|
||||
# - returns an absolute path (always leading "/")
|
||||
# - returns undef when .. would pop above the root
|
||||
# - "." and "" inputs both return "/"
|
||||
# - a relative input ("foo") is promoted to "/foo"
|
||||
#
|
||||
# This is the web-lib-funcs version; miniserv.pl ships an independent
|
||||
# implementation (`miniserv::simplify_path`) with a different signature
|
||||
# (bogus-flag via aliased arg). Both are exercised by their own tests.
|
||||
subtest 'simplify_path' => sub {
|
||||
is(main::simplify_path('/foo/bar'), '/foo/bar', 'plain path');
|
||||
is(main::simplify_path('/foo/./bar'), '/foo/bar', '. collapses');
|
||||
is(main::simplify_path('/foo/../bar'), '/bar', '.. pops');
|
||||
is(main::simplify_path('/a/b/c/../../d'), '/a/d',
|
||||
'multiple .. pop the right number of segments');
|
||||
is(main::simplify_path('//foo///bar//'), '/foo/bar', 'repeated slashes collapse');
|
||||
is(main::simplify_path('/foo/'), '/foo', 'trailing slash dropped');
|
||||
|
||||
# Adversarial: escaping root must fail closed.
|
||||
is(main::simplify_path('/../etc/passwd'), undef, '.. above root → undef');
|
||||
is(main::simplify_path('/foo/../../bar'), undef, 'overshoot → undef');
|
||||
|
||||
# Surprising-but-current behaviour: empty / root / relative inputs.
|
||||
is(main::simplify_path(''), '/', 'empty input → /');
|
||||
is(main::simplify_path('/'), '/', 'root passes through');
|
||||
is(main::simplify_path('foo'), '/foo', 'relative input is promoted to absolute');
|
||||
};
|
||||
|
||||
# parse_http_url — absolute and base-relative URL parsing.
|
||||
#
|
||||
# Contract on success: returns (host, port, page, ssl, [user], [pass]).
|
||||
# SSL mode 0=http, 1=https, 2=ftp.
|
||||
subtest 'parse_http_url' => sub {
|
||||
my @abs = main::parse_http_url('http://example.com/foo');
|
||||
is_deeply([@abs[0..3]], ['example.com', 80, '/foo', 0],
|
||||
'plain http URL');
|
||||
|
||||
my @https = main::parse_http_url('https://example.com:8443/bar');
|
||||
is_deeply([@https[0..3]], ['example.com', 8443, '/bar', 1],
|
||||
'https with explicit port and ssl=1');
|
||||
|
||||
my @ftp = main::parse_http_url('ftp://host/x');
|
||||
is_deeply([@ftp[0..3]], ['host', 21, '/x', 2],
|
||||
'ftp scheme → port 21 and ssl=2');
|
||||
|
||||
# Userinfo is captured as elements 4 and 5.
|
||||
my @auth = main::parse_http_url('http://user:pass@example.com:81/foo');
|
||||
is_deeply(\@auth, ['example.com', 81, '/foo', 0, 'user', 'pass'],
|
||||
'user:pass extracted from authority');
|
||||
|
||||
# Bracketed IPv6 host.
|
||||
my @v6 = main::parse_http_url('http://[2001:db8::1]:8080/foo');
|
||||
is_deeply([@v6[0..3]], ['2001:db8::1', 8080, '/foo', 0],
|
||||
'bracketed IPv6 host parsed, brackets stripped');
|
||||
|
||||
# Missing path defaults to "/".
|
||||
my @noslash = main::parse_http_url('http://example.com');
|
||||
is($noslash[2], '/', 'missing path defaults to /');
|
||||
|
||||
# no_default_port suppresses 80/443/21 substitution.
|
||||
my @nd = main::parse_http_url('http://example.com/x', undef, undef,
|
||||
undef, undef, undef, undef, 1);
|
||||
is($nd[1], undef, 'no_default_port leaves port undef');
|
||||
|
||||
# Relative URL with a base.
|
||||
my @rs = main::parse_http_url('/page', 'host', 80, '/old/', 0);
|
||||
is_deeply([@rs[0..3]], ['host', 80, '/page', 0],
|
||||
'server-absolute relative URL uses base host/port');
|
||||
|
||||
my @rd = main::parse_http_url('rel.html', 'host', 80, '/base/cur.html', 0);
|
||||
is_deeply([@rd[0..3]], ['host', 80, '/base/rel.html', 0],
|
||||
'directory-relative URL resolves against base page directory');
|
||||
|
||||
# Unparseable input with no base → undef.
|
||||
is(main::parse_http_url('not a url'), undef,
|
||||
'garbage with no base → undef');
|
||||
};
|
||||
|
||||
# split_quoted_string — shell-ish tokenizer.
|
||||
#
|
||||
# Each iteration matches one of:
|
||||
# "..." | '...' | \S+
|
||||
# followed by trailing whitespace, then loops on the remainder.
|
||||
subtest 'split_quoted_string' => sub {
|
||||
is_deeply([main::split_quoted_string('one two three')],
|
||||
['one', 'two', 'three'],
|
||||
'bare words');
|
||||
|
||||
is_deeply([main::split_quoted_string('"hello world" foo "bar baz" qux')],
|
||||
['hello world', 'foo', 'bar baz', 'qux'],
|
||||
'double-quoted segments preserve internal spaces');
|
||||
|
||||
is_deeply([main::split_quoted_string(q{'a b' c 'd e'})],
|
||||
['a b', 'c', 'd e'],
|
||||
'single-quoted segments work the same way');
|
||||
|
||||
is_deeply([main::split_quoted_string('')], [],
|
||||
'empty input → empty list');
|
||||
|
||||
# Unbalanced quote: the implementation falls through to the \S+
|
||||
# branch and emits the leftover with the quote attached. Pin this.
|
||||
is_deeply([main::split_quoted_string('unbalanced "quote')],
|
||||
['unbalanced', '"quote'],
|
||||
'unterminated quote is taken as a bare token');
|
||||
|
||||
# Pure-whitespace input drops everything because no branch tolerates
|
||||
# a leading-whitespace prefix. Surface this as current behaviour —
|
||||
# arguably a bug, but documenting it here protects us from a silent
|
||||
# behaviour change.
|
||||
is_deeply([main::split_quoted_string(' spaces between ')], [],
|
||||
'leading whitespace short-circuits the tokenizer (current behaviour)');
|
||||
};
|
||||
|
||||
# quote_path — OS-dependent shell quoting.
|
||||
#
|
||||
# On Windows or for Windows-style absolute paths, wraps in double quotes.
|
||||
# Everywhere else, uses quotemeta (which escapes every non-word char).
|
||||
subtest 'quote_path' => sub {
|
||||
no warnings 'once';
|
||||
local %main::gconfig = ('os_type' => 'linux');
|
||||
|
||||
# quotemeta escapes /, space, etc.
|
||||
is(main::quote_path('/a b/c'), '\\/a\\ b\\/c',
|
||||
'unix path uses quotemeta');
|
||||
is(main::quote_path('plain'), 'plain',
|
||||
'all-word characters need no escaping');
|
||||
|
||||
# Windows-style path → "" wrapping, even when os_type isn't windows.
|
||||
is(main::quote_path('c:/Users/x'), '"c:/Users/x"',
|
||||
'drive-letter prefix wraps in double quotes');
|
||||
|
||||
# os_type=windows forces double-quote wrap.
|
||||
local %main::gconfig = ('os_type' => 'windows');
|
||||
is(main::quote_path('/etc/passwd'), '"/etc/passwd"',
|
||||
'os_type=windows wraps in double quotes regardless of path shape');
|
||||
};
|
||||
|
||||
# month_to_number / number_to_month — three-letter month name <-> 0-based index.
|
||||
#
|
||||
# In production these maps are populated by web-lib.pl; in tests we set
|
||||
# them up locally so the suite doesn't depend on web-lib.pl loading.
|
||||
subtest 'month_to_number / number_to_month' => sub {
|
||||
no warnings 'once';
|
||||
local %main::month_to_number_map = (
|
||||
'jan' => 0, 'feb' => 1, 'mar' => 2, 'apr' => 3,
|
||||
'may' => 4, 'jun' => 5, 'jul' => 6, 'aug' => 7,
|
||||
'sep' => 8, 'oct' => 9, 'nov' => 10, 'dec' => 11,
|
||||
);
|
||||
local %main::number_to_month_map = reverse %main::month_to_number_map;
|
||||
|
||||
is(main::month_to_number('Jan'), 0, 'Jan → 0');
|
||||
is(main::month_to_number('December'), 11, 'first three chars taken');
|
||||
is(main::month_to_number('FEB'), 1, 'case-insensitive');
|
||||
is(main::month_to_number('xyz'), undef, 'unknown returns undef');
|
||||
|
||||
is(main::number_to_month(0), 'Jan', '0 → Jan (ucfirst applied)');
|
||||
is(main::number_to_month(11), 'Dec', '11 → Dec');
|
||||
|
||||
# Round-trip every month.
|
||||
for my $m (qw(Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec)) {
|
||||
is(main::number_to_month(main::month_to_number($m)), $m,
|
||||
"round-trip $m");
|
||||
}
|
||||
};
|
||||
|
||||
done_testing();
|
||||
235
t/web-lib-funcs-strings.t
Normal file
235
t/web-lib-funcs-strings.t
Normal file
@@ -0,0 +1,235 @@
|
||||
#!/usr/bin/perl
|
||||
# Unit tests for the pure string helpers in web-lib-funcs.pl.
|
||||
#
|
||||
# web-lib-funcs.pl is a pure library (no `unless(caller)` guard), so a bare
|
||||
# `require` loads it without side effects. Helpers covered here touch no
|
||||
# globals beyond their args, so no stubbing or %gconfig setup is needed.
|
||||
#
|
||||
# Assertions pin the current contract — encoded byte shape, round-trip
|
||||
# identity, structural invariants — not the prettiest possible output.
|
||||
|
||||
use strict;
|
||||
use warnings;
|
||||
use Test::More;
|
||||
use File::Basename qw(dirname);
|
||||
use File::Spec;
|
||||
|
||||
my $script = File::Spec->rel2abs(
|
||||
File::Spec->catfile(dirname(__FILE__), '..', 'web-lib-funcs.pl'));
|
||||
require $script;
|
||||
|
||||
# html_escape
|
||||
#
|
||||
# Always escapes &, <, >, ", ', =. The optional $nodblamp flag uses a
|
||||
# lookahead so existing entity references survive untouched; without it,
|
||||
# every & becomes &, including already-escaped input.
|
||||
subtest 'html_escape' => sub {
|
||||
is(main::html_escape('<b>&"\'='),
|
||||
'<b>&"'=',
|
||||
'all six dangerous characters are escaped');
|
||||
|
||||
is(main::html_escape(undef), '', 'undef input → empty string');
|
||||
is(main::html_escape(''), '', 'empty input → empty string');
|
||||
is(main::html_escape('plain text 123'), 'plain text 123',
|
||||
'whitespace and alphanumerics pass through');
|
||||
|
||||
# Default mode double-escapes existing entities — this is the current
|
||||
# contract; the nodblamp flag opts into the smarter behaviour.
|
||||
is(main::html_escape('&'), '&amp;', 'default mode double-escapes &');
|
||||
is(main::html_escape('&', 1), '&', 'nodblamp preserves existing &');
|
||||
is(main::html_escape('A', 1), 'A', 'nodblamp preserves numeric entity');
|
||||
# Note: nodblamp's lookahead matches any &<letters>; as an entity, so
|
||||
# made-up names like &x; are treated as entities and not re-escaped.
|
||||
is(main::html_escape('&x;', 1), '&x;', 'nodblamp preserves arbitrary &word; shape');
|
||||
is(main::html_escape('& ', 1), '& ', 'nodblamp escapes lone &');
|
||||
|
||||
# Adversarial XSS payload — none of the dangerous chars survive raw.
|
||||
my $xss = main::html_escape(q{<script>alert("x")</script>'=&});
|
||||
unlike($xss, qr/[<>"'=]/, 'no raw HTML-significant characters remain');
|
||||
unlike($xss, qr/&(?!(amp|lt|gt|quot|#39|#61);)/,
|
||||
'every & starts a known entity');
|
||||
};
|
||||
|
||||
# html_unescape — inverse of html_escape for the canonical entity set.
|
||||
subtest 'html_unescape' => sub {
|
||||
is(main::html_unescape('<b>&"'='),
|
||||
'<b>&"\'=', 'canonical entity set round-trips');
|
||||
is(main::html_unescape('a b'), 'a b',
|
||||
' decodes to a regular space');
|
||||
is(main::html_unescape(undef), '', 'undef → empty');
|
||||
is(main::html_unescape(''), '', 'empty → empty');
|
||||
is(main::html_unescape('no entities here'), 'no entities here',
|
||||
'plain text passes through unchanged');
|
||||
};
|
||||
|
||||
# html_strip — remove tags, optionally replacing with a sentinel.
|
||||
subtest 'html_strip' => sub {
|
||||
is(main::html_strip('<b>hello</b>'), 'hello',
|
||||
'simple tags removed');
|
||||
is(main::html_strip('<a href="x">y</a>'), 'y',
|
||||
'attribute-bearing tag removed');
|
||||
# Quoted attribute that contains a >, which would otherwise break a
|
||||
# naive regexp — the implementation accounts for this.
|
||||
is(main::html_strip('<a href=">">y</a>'), 'y',
|
||||
'quoted > inside attribute does not end tag early');
|
||||
is(main::html_strip('plain text'), 'plain text',
|
||||
'plain text untouched');
|
||||
is(main::html_strip('<b>x</b>', '|'), '|x|',
|
||||
'replacement string substituted for each tag');
|
||||
};
|
||||
|
||||
# quote_escape — only ' and " (and lone &) are escaped; existing entities
|
||||
# (&xxx; or &#NN;) are preserved.
|
||||
subtest 'quote_escape' => sub {
|
||||
is(main::quote_escape(undef), '', 'undef → empty');
|
||||
|
||||
is(main::quote_escape('a&b'), 'a&b', 'lone & escaped');
|
||||
is(main::quote_escape('a&'), 'a&', 'trailing & escaped');
|
||||
is(main::quote_escape('a&b'), 'a&b', 'existing & preserved');
|
||||
is(main::quote_escape('a'b'), 'a'b', 'numeric entity preserved');
|
||||
|
||||
is(main::quote_escape(q{a"b'c}), 'a"b'c',
|
||||
'both quote styles escaped by default');
|
||||
is(main::quote_escape(q{a"b'c}, q{"}), q{a"b'c},
|
||||
'only-quote="\"" escapes only double quotes');
|
||||
is(main::quote_escape(q{a"b'c}, q{'}), q{a"b'c},
|
||||
'only-quote="\'" escapes only single quotes');
|
||||
};
|
||||
|
||||
# quote_literal_escape — escape for inclusion in a Perl string literal.
|
||||
subtest 'quote_literal_escape' => sub {
|
||||
is(main::quote_literal_escape(undef), '', 'undef → empty');
|
||||
is(main::quote_literal_escape(''), '', 'empty → empty');
|
||||
|
||||
# Default (single-quoted target): only \ and ' need escaping.
|
||||
is(main::quote_literal_escape(q{it's}), q{it\'s}, 'single quote escaped');
|
||||
is(main::quote_literal_escape(q{a\\b}), q{a\\\\b}, 'backslash doubled');
|
||||
is(main::quote_literal_escape(q{a"b$c@d}), q{a"b$c@d},
|
||||
'double-quote / sigils NOT escaped in single-quoted target');
|
||||
|
||||
# Double-quoted target: also escape ", $, @ (because they interpolate).
|
||||
is(main::quote_literal_escape(q{a"b$c@d}, q{"}),
|
||||
q{a\"b\$c\@d},
|
||||
'" $ @ all escaped in double-quoted target');
|
||||
is(main::quote_literal_escape(q{a\\b}, q{"}), q{a\\\\b},
|
||||
'backslash doubled in double-quoted target too');
|
||||
};
|
||||
|
||||
# quote_javascript — hex-escape the unsafe characters for a JS string literal.
|
||||
subtest 'quote_javascript' => sub {
|
||||
is(main::quote_javascript(q{a"b}), 'a\x22b', 'double quote → \x22');
|
||||
is(main::quote_javascript(q{a'b}), 'a\x27b', 'single quote → \x27');
|
||||
is(main::quote_javascript('a<b>c'), 'a\x3cb\x3ec', '< and > escaped');
|
||||
is(main::quote_javascript('a&b'), 'a\x26b', '& escaped');
|
||||
is(main::quote_javascript('a\\b'), 'a\x5cb', 'backslash escaped');
|
||||
is(main::quote_javascript('plain text 123'), 'plain text 123',
|
||||
'safe characters pass through');
|
||||
};
|
||||
|
||||
# urlize / un_urlize — percent-encoding round-trip.
|
||||
subtest 'urlize / un_urlize' => sub {
|
||||
# urlize encodes anything that is not [A-Za-z0-9].
|
||||
is(main::urlize('abc123'), 'abc123', 'alphanumerics pass through');
|
||||
is(main::urlize(' '), '%20', 'space encoded');
|
||||
is(main::urlize('/'), '%2F', 'slash encoded');
|
||||
is(main::urlize("\n"), '%0A', 'newline encoded');
|
||||
is(main::urlize(chr(0xff)), '%FF', 'high-bit byte encoded');
|
||||
is(main::urlize('a b/c'), 'a%20b%2Fc', 'mixed input');
|
||||
|
||||
# un_urlize: by default, '+' becomes ' ' (form-encoded). Pass the
|
||||
# second arg true to preserve '+' literally.
|
||||
is(main::un_urlize('a+b'), 'a b', '+ decoded as space by default');
|
||||
is(main::un_urlize('a+b', 1), 'a+b', '+ preserved with plus-literal flag');
|
||||
is(main::un_urlize('%20'), ' ', '%20 decoded');
|
||||
is(main::un_urlize('%c3%a9'), "\xc3\xa9",
|
||||
'lowercase hex decoded (UTF-8 bytes for é)');
|
||||
|
||||
# Round-trip through a binary string.
|
||||
for my $s ('plain', 'a b/c', "binary\x00\x01\xff",
|
||||
'<script>alert(1)</script>') {
|
||||
# urlize never emits +, so the no-plus mode is safe here.
|
||||
is(main::un_urlize(main::urlize($s), 1), $s,
|
||||
"round-trip preserves ".length($s)." bytes");
|
||||
}
|
||||
};
|
||||
|
||||
# trim — symmetric or asymmetric whitespace stripping.
|
||||
#
|
||||
# Second arg controls which end:
|
||||
# undef/0 → both
|
||||
# -1 → right only
|
||||
# 1 → left only
|
||||
subtest 'trim' => sub {
|
||||
is(main::trim(' hi '), 'hi', 'both ends by default');
|
||||
is(main::trim(' hi ', -1), ' hi', '-1 strips trailing only');
|
||||
is(main::trim(' hi ', 1), 'hi ', '1 strips leading only');
|
||||
is(main::trim('nochange'), 'nochange', 'no-op on tidy input');
|
||||
is(main::trim(''), '', 'empty stays empty');
|
||||
is(main::trim("\t\nhi\r\n"), 'hi', 'tabs and newlines counted as whitespace');
|
||||
};
|
||||
|
||||
# trunc — truncate to a "whole word" within a max length.
|
||||
#
|
||||
# The implementation cuts at maxlen, then pops one char unconditionally
|
||||
# and continues popping only while the popped char is whitespace; trailing
|
||||
# whitespace is then trimmed. This pins current behaviour, which has two
|
||||
# notable edge cases worth flagging:
|
||||
#
|
||||
# * `trunc("hello world foo", 11)` returns "hello worl", losing the
|
||||
# final 'd' even though substr(0, 11) cleanly ends on a word boundary.
|
||||
# * `trunc("hello world", 5)` returns "hell" rather than "hello".
|
||||
#
|
||||
# These pass today; a future fix to trunc will break these and prompt
|
||||
# re-review.
|
||||
subtest 'trunc' => sub {
|
||||
# Early-exit when input already fits.
|
||||
is(main::trunc('short', 99), 'short', 'no-op when input shorter than max');
|
||||
is(main::trunc('exact5', 6), 'exact5', 'no-op when input equals max');
|
||||
|
||||
# Truncation lands at a partial word — pops the partial word back to
|
||||
# whitespace, then trims trailing whitespace.
|
||||
is(main::trunc('a b c', 4), 'a', 'cuts back through partial word');
|
||||
# substr(0,8) = "foo bar ", pop one (always), pop "r" — non-ws so stop.
|
||||
# Result: "foo ba" (last word "baz" partial → chopped one char short).
|
||||
is(main::trunc('foo bar baz', 8), 'foo ba',
|
||||
'partial word loses one extra char (current behaviour)');
|
||||
|
||||
# Edge case: substr cleanly ends on a word boundary. Current behaviour
|
||||
# still pops one char; pin it.
|
||||
is(main::trunc('hello world foo', 11), 'hello worl',
|
||||
'always pops at least one char even at word boundary (current behaviour)');
|
||||
is(main::trunc('hello world', 5), 'hell',
|
||||
'always pops at least one char (current behaviour)');
|
||||
|
||||
# Truncating to 1 leaves nothing after the mandatory pop.
|
||||
is(main::trunc('abc', 1), '', 'maxlen=1 returns empty');
|
||||
};
|
||||
|
||||
# indexof — first-index lookup with `eq`.
|
||||
subtest 'indexof' => sub {
|
||||
is(main::indexof('b', 'a', 'b', 'c'), 1, 'returns 0-based index');
|
||||
is(main::indexof('a', 'a', 'b', 'c'), 0, 'first element');
|
||||
is(main::indexof('z', 'a', 'b', 'c'), -1, 'missing → -1');
|
||||
is(main::indexof('a'), -1, 'empty haystack → -1');
|
||||
is(main::indexof('b', 'a', 'b', 'b'), 1, 'duplicates: first hit wins');
|
||||
# Numeric needle compared stringwise (eq).
|
||||
is(main::indexof(1, '0', '1', '2'), 1, 'numeric needle matches stringwise');
|
||||
};
|
||||
|
||||
# indexoflc — case-insensitive variant.
|
||||
subtest 'indexoflc' => sub {
|
||||
is(main::indexoflc('B', 'a', 'b', 'c'), 1, 'uppercase needle, lowercase haystack');
|
||||
is(main::indexoflc('a', 'A', 'B', 'C'), 0, 'lowercase needle, uppercase haystack');
|
||||
is(main::indexoflc('z', 'a', 'b', 'c'), -1, 'missing → -1');
|
||||
};
|
||||
|
||||
# uniquelc — dedupe by lowercase comparison, preserving first-seen case.
|
||||
subtest 'uniquelc' => sub {
|
||||
is_deeply([main::uniquelc('Foo', 'foo', 'FOO', 'Bar')],
|
||||
['Foo', 'Bar'],
|
||||
'first-seen case preserved, later case-variants dropped');
|
||||
is_deeply([main::uniquelc()], [], 'empty input → empty list');
|
||||
is_deeply([main::uniquelc('x')], ['x'], 'single element passes through');
|
||||
};
|
||||
|
||||
done_testing();
|
||||
Reference in New Issue
Block a user