Merge pull request #2729 from swelljoe/web-lib-funcs-test-data-transforms

Add web-lib-funcs data transform tests
2026-06-04 20:30:22 +01:00 · 2026-05-23 19:25:00 -07:00
parent 3780c1a9b5 2c4467a82e
commit 73821b72b0
3 changed files with 360 additions and 2 deletions
--- a/t/web-lib-funcs-encoding.t
+++ b/t/web-lib-funcs-encoding.t
@@ -0,0 +1,188 @@
+#!/usr/bin/perl
+# Unit tests for the encoding / serialization helpers in web-lib-funcs.pl:
+# base64, base32, serialise_variable / unserialise_variable, JSON wrappers.
+#
+# Pure transforms — no globals beyond MIME::Base64 / JSON::* probes done by
+# the subs themselves. A bare `require` is enough.
+
+use strict;
+use warnings;
+use Test::More;
+use File::Basename qw(dirname);
+use File::Spec;
+
+my $script = File::Spec->rel2abs(
+	File::Spec->catfile(dirname(__FILE__), '..', 'web-lib-funcs.pl'));
+require $script;
+
+# encode_base64 / decode_base64
+#
+# Two implementations live behind these wrappers — MIME::Base64 if it loads,
+# otherwise a pack/unpack pure-Perl fallback. We test the contract both
+# share: RFC 4648 vectors, round-trip identity, and the optional "noeol"
+# flag suppressing trailing newlines.
+subtest 'encode_base64 / decode_base64' => sub {
+	# RFC 4648 §10 test vectors.
+	my %vec = (
+		''       => '',
+		'f'      => 'Zg==',
+		'fo'     => 'Zm8=',
+		'foo'    => 'Zm9v',
+		'foob'   => 'Zm9vYg==',
+		'fooba'  => 'Zm9vYmE=',
+		'foobar' => 'Zm9vYmFy',
+		);
+	for my $in (sort keys %vec) {
+		is(main::encode_base64($in, 'noeol'), $vec{$in},
+		   "RFC vector: '$in'");
+		is(main::decode_base64($vec{$in}), $in,
+		   "RFC vector decode: '$vec{$in}'");
+		}
+
+	# Default mode appends a newline; noeol suppresses it.
+	like(main::encode_base64('hello'),         qr/\n\z/, 'default mode ends in newline');
+	unlike(main::encode_base64('hello', 'noeol'), qr/\n/, 'noeol omits newline');
+
+	# Round-trip a wide byte-range, including embedded NULs.
+	my $bin = join('', map { chr } 0..255);
+	is(main::decode_base64(main::encode_base64($bin, 'noeol')), $bin,
+	   'round-trips all 256 byte values');
+
+	# Decoder tolerates embedded whitespace in the encoded form (MIME::Base64
+	# behaviour; the fallback uses uudecode under the hood and is similarly
+	# tolerant after the tr/cd strip).
+	is(main::decode_base64("aGVs\nbG8="), 'hello',
+	   'embedded newline in encoded input tolerated');
+};
+
+# encode_base32 / decode_base32
+#
+# Pure-Perl implementation. RFC 4648 §10 specifies "=" padding on
+# unaligned outputs; this encoder omits padding. Pinning that as the
+# current contract — decoder accepts both forms so round-trips are safe.
+subtest 'encode_base32 / decode_base32' => sub {
+	# Round-trip the RFC 4648 §10 vectors.
+	for my $in ('', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar') {
+		is(main::decode_base32(main::encode_base32($in)), $in,
+		   "round-trip '$in'");
+		}
+
+	# Encoder emits the RFC alphabet (uppercase A-Z and digits 2-7).
+	# Output should never contain "=" (padding is dropped).
+	like(main::encode_base32('foobar'), qr/\A[A-Z2-7]*\z/,
+	     'encoded output uses only the RFC 4648 alphabet');
+	unlike(main::encode_base32('f'), qr/=/,
+	       'encoder omits "=" padding (note: deviation from RFC 4648)');
+
+	# Decoder also accepts canonical padded input (RFC 4648 mandates "="
+	# padding on unaligned outputs). The encoder still omits padding, so
+	# this matters mainly for externally-produced base32 strings.
+	is(main::decode_base32('MY======'), 'f',    'padded "MY======" decodes');
+	is(main::decode_base32('MZXQ===='), 'fo',   'padded "MZXQ====" decodes');
+	is(main::decode_base32('MZXW6==='), 'foo',  'padded "MZXW6===" decodes');
+	is(main::decode_base32('MZXW6YQ='), 'foob', 'padded "MZXW6YQ=" decodes');
+
+	# Case-insensitive decode — Webmin's TOTP path accepts secrets
+	# case-insensitively (twofactor-funcs-lib.pl validates with /i),
+	# so lowercase input from third-party authenticators must decode
+	# identically to uppercase.
+	is(main::decode_base32('mzxw6ytboi'),       'foobar',
+	   'lowercase decodes identically to uppercase');
+	is(main::decode_base32('MzXw6YtBoI'),       'foobar',
+	   'mixed-case decodes identically');
+	is(main::decode_base32('mzxw6yq='),         'foob',
+	   'lowercase with padding decodes correctly');
+
+	# Empty input → empty output, both directions.
+	is(main::encode_base32(''), '', 'empty encode → empty');
+	is(main::decode_base32(''), '', 'empty decode → empty');
+};
+
+# serialise_variable / unserialise_variable
+#
+# Webmin's own serialization (used by remote_eval and friends). Format:
+# TYPE,urlized-payload where nested collections re-encode through urlize
+# at each level — so nested structures gain layers of %25 escaping.
+subtest 'serialise_variable / unserialise_variable' => sub {
+	# Scalars round-trip byte-for-byte.
+	for my $s ('hello', '', 'a,b,c', 'a=b&c', "\x00\xff", "spaces here") {
+		is(main::unserialise_variable(main::serialise_variable($s)), $s,
+		   "scalar round-trip: '$s'");
+		}
+
+	# undef has a dedicated marker.
+	is(main::serialise_variable(undef), 'UNDEF', 'undef serializes to "UNDEF"');
+	is(main::unserialise_variable('UNDEF'), undef, '"UNDEF" deserializes to undef');
+
+	# Refs.
+	my $scalar_ref = \'inner';
+	is_deeply(main::unserialise_variable(main::serialise_variable($scalar_ref)),
+		  $scalar_ref, 'scalar ref round-trips');
+
+	# Arrays — note numeric values come back as strings (Perl scalar stringification).
+	is_deeply(main::unserialise_variable(main::serialise_variable([1,2,3])),
+		  ['1','2','3'], 'array of numbers round-trips (as strings)');
+	is_deeply(main::unserialise_variable(main::serialise_variable(['a','b','c'])),
+		  ['a','b','c'], 'array of strings round-trips');
+	is_deeply(main::unserialise_variable(main::serialise_variable([])),
+		  [], 'empty array round-trips');
+
+	# Hashes.
+	is_deeply(main::unserialise_variable(main::serialise_variable({a=>'x', b=>'y'})),
+		  {a=>'x', b=>'y'}, 'flat hash round-trips');
+	is_deeply(main::unserialise_variable(main::serialise_variable({})),
+		  {}, 'empty hash round-trips');
+
+	# Nested — array-of-arrays and hash-of-hashes survive the recursive
+	# urlize wrapping (each level adds %25 to existing %s).
+	is_deeply(main::unserialise_variable(main::serialise_variable([[1,2],[3,4]])),
+		  [['1','2'],['3','4']], 'nested array round-trips');
+	is_deeply(main::unserialise_variable(
+		main::serialise_variable({outer=>{inner=>['x','y']}})),
+		{outer=>{inner=>['x','y']}}, 'nested hash round-trips');
+
+	# Wire-format spot checks — pin the documented format so callers that
+	# rely on it (remote_eval) don't silently change shape.
+	is(main::serialise_variable('hi'),   'VAL,hi',  'scalar wire format');
+	is(main::serialise_variable('a,b'),  'VAL,a%2Cb', 'comma in scalar urlized');
+	is(main::serialise_variable([1,2]),  'ARRAY,VAL%2C1,VAL%2C2',
+	   'array wire format (one level of urlize wrapping)');
+
+	# Data::Dumper path — opt-in via the second arg.
+	my $d = main::serialise_variable({k=>'v'}, 1);
+	like($d, qr/^\$VAR1\s*=/, 'dumper mode emits Data::Dumper format');
+	is_deeply(main::unserialise_variable($d), {k=>'v'},
+	   'dumper-format round-trips through the $VAR1 detector');
+};
+
+# convert_to_json / convert_from_json
+#
+# Thin wrappers over JSON::XS or JSON::PP. We test the wrapper contract —
+# the defaults, the pretty flag, the raw-utf8 flag, the undef-defaulting,
+# and the relaxed parser — not JSON conformance, which is the library's job.
+subtest 'convert_to_json / convert_from_json' => sub {
+	# Plain round-trip preserves structure (not key order).
+	my $in = {name=>'x', items=>[1,2,3], nested=>{k=>'v'}};
+	is_deeply(main::convert_from_json(main::convert_to_json($in)), $in,
+		  'round-trips a mixed structure');
+
+	# Pretty output is human-formatted (multi-line, indented).
+	my $pretty = main::convert_to_json({a=>1,b=>2}, 1);
+	like($pretty, qr/\n/, 'pretty mode produces multi-line output');
+	# And still round-trips.
+	is_deeply(main::convert_from_json($pretty), {a=>1,b=>2},
+		  'pretty output still parses');
+
+	# Current contract: undef input becomes {} (the `||= {}` default).
+	is(main::convert_to_json(undef), '{}', 'undef input → "{}"');
+
+	# Arrays at the top level work too.
+	is(main::convert_to_json([1,2,3]), '[1,2,3]', 'top-level array encodes');
+	is_deeply(main::convert_from_json('[1,2,3]'), [1,2,3], 'top-level array decodes');
+
+	# Relaxed mode accepts comments and trailing commas (JSON::PP feature).
+	my $rx = main::convert_from_json('{"a":1, /* note */ "b":2,}', 0, 1);
+	is_deeply($rx, {a=>1,b=>2}, 'relaxed parser accepts /* comments */ and trailing comma');
+};
+
+done_testing();
--- a/t/web-lib-funcs-numeric.t
+++ b/t/web-lib-funcs-numeric.t
@@ -0,0 +1,164 @@
+#!/usr/bin/perl
+# Unit tests for numeric / version-comparison helpers in web-lib-funcs.pl.
+# Pure subs — bare require is enough.
+
+use strict;
+use warnings;
+use Test::More;
+use File::Basename qw(dirname);
+use File::Spec;
+
+my $script = File::Spec->rel2abs(
+	File::Spec->catfile(dirname(__FILE__), '..', 'web-lib-funcs.pl'));
+require $script;
+
+# is_int — strict signed decimal integer.
+#
+# Regex is /^([-]?\d+)$/: allows a single leading "-", no leading "+",
+# no whitespace, no scientific notation, no hex.
+subtest 'is_int' => sub {
+	ok( main::is_int('0'),    '"0" is int');
+	ok( main::is_int('42'),   '"42" is int');
+	ok( main::is_int('-5'),   'negative int');
+	ok( main::is_int('01'),   'leading zero accepted');
+
+	ok(!main::is_int('+5'),   'leading "+" rejected (no signed-positive form)');
+	ok(!main::is_int(' 5'),   'leading whitespace rejected');
+	ok(!main::is_int('5 '),   'trailing whitespace rejected');
+	ok(!main::is_int('1.0'),  'decimals rejected');
+	ok(!main::is_int('1e3'),  'scientific notation rejected');
+	ok(!main::is_int('0x10'), 'hex rejected');
+	ok(!main::is_int(''),     'empty rejected');
+	ok(!main::is_int('abc'),  'non-numeric rejected');
+	# undef returns false but also triggers a "uninitialized value" warning
+	# inside the regex (web-lib-funcs.pl is not warnings-enabled today, but
+	# `prove -w` or a future `use warnings` would surface it). Silence the
+	# warning at the call site so the suite stays clean either way.
+	{ local $SIG{__WARN__} = sub {};
+	  ok(!main::is_int(undef), 'undef rejected'); }
+};
+
+# is_float — strict decimal with a dot.
+#
+# Regex is /^[-]?(\.\d+|\d+\.\d+)$/. The decimal point is required, so
+# integers don't qualify. Trailing dot ("5.") is rejected. Scientific
+# notation rejected.
+subtest 'is_float' => sub {
+	ok( main::is_float('1.5'),   'plain float');
+	ok( main::is_float('-1.5'),  'negative float');
+	ok( main::is_float('.5'),    'leading-dot form accepted');
+	ok( main::is_float('-.5'),   'negative leading-dot form accepted');
+	ok( main::is_float('0.0'),   'zero with decimal accepted');
+
+	ok(!main::is_float('5'),     'plain integer rejected (decimal point required)');
+	ok(!main::is_float('5.'),    'trailing-dot form rejected');
+	ok(!main::is_float('1e3'),   'scientific notation rejected');
+	ok(!main::is_float('+1.5'),  'leading "+" rejected');
+	ok(!main::is_float(' 1.5'),  'leading whitespace rejected');
+	ok(!main::is_float(''),      'empty rejected');
+	ok(!main::is_float('abc'),   'non-numeric rejected');
+	# See is_int's undef comment.
+	{ local $SIG{__WARN__} = sub {};
+	  ok(!main::is_float(undef), 'undef rejected'); }
+};
+
+# float — parse-and-format helper.
+#
+# Returns sprintf('%.2f', $n) if that's non-zero, otherwise the literal 0.
+# So unparseable input collapses to plain 0 (no decimals), but a valid
+# zero number also returns plain 0 — the two are indistinguishable from
+# the output side. Leading "+" is silently accepted here even though
+# is_int / is_float reject it — asymmetric with the validators.
+subtest 'float' => sub {
+	is(main::float('42'),    '42.00',  'integer string → 2-decimal form');
+	is(main::float('1.5'),   '1.50',   'float string → 2-decimal form');
+	is(main::float('-1.5'),  '-1.50',  'negative float');
+	is(main::float('1e3'),   '1000.00', 'scientific notation parsed');
+	is(main::float('+5'),    '5.00',   'leading "+" silently accepted (asymmetric with is_int/is_float)');
+
+	# All these collapse to plain 0 — non-parseable, empty, undef, true
+	# zero. Non-numeric / undef inputs warn inside sprintf under -w
+	# (uninitialized / isn't numeric); silence per call site so the suite
+	# stays warning-free regardless of how prove is invoked.
+	{ local $SIG{__WARN__} = sub {};
+	  is(main::float('abc'),  0, 'non-numeric → 0');
+	  is(main::float(''),     0, 'empty → 0');
+	  is(main::float(undef),  0, 'undef → 0'); }
+	is(main::float('0'),   0, 'zero collapses to plain 0 (not "0.00")');
+	is(main::float('0.0'), 0, 'zero with decimal also collapses to plain 0');
+};
+
+# compare_version_numbers — Debian-ish version comparator.
+#
+# Two calling shapes:
+#   compare_version_numbers($a, $b)            → -1 / 0 / 1
+#   compare_version_numbers($a, $op, $b)       → boolean
+#
+# Splits each version on /[.\-+~_]/, then walks segment-by-segment with
+# a handful of special cases (pure numeric, numeric+string, "ubuntu"
+# prefix strip, "rcN" < final).
+subtest 'compare_version_numbers (numeric form)' => sub {
+	# Equal.
+	is(main::compare_version_numbers('1.0', '1.0'),     0, 'equal');
+	is(main::compare_version_numbers('1.2.3', '1.2.3'), 0, 'equal three-part');
+
+	# Numeric ordering — NOT lexical, so "1.10" > "1.9".
+	is(main::compare_version_numbers('1.10', '1.9'),    1, '1.10 > 1.9 (numeric, not lexical)');
+	is(main::compare_version_numbers('1.0', '1.1'),    -1, 'simple less-than');
+	is(main::compare_version_numbers('2', '1.9'),       1, 'shorter higher major wins');
+
+	# Different separators are interchangeable.
+	is(main::compare_version_numbers('1-2', '1.2'),     0, 'dot and dash interchangeable');
+	is(main::compare_version_numbers('1_2~3', '1.2.3'), 0, 'underscore and tilde interchangeable');
+
+	# Numeric segment with a string tail — string compared after number.
+	is(main::compare_version_numbers('1ubuntu5', '1ubuntu10'), -1, 'ubuntu5 < ubuntu10 (numeric tail)');
+	is(main::compare_version_numbers('6redhat', '8redhat'),    -1, 'leading number wins over string tail');
+
+	# Pure-string-prefix + number variant ("centos7" vs "centos8").
+	is(main::compare_version_numbers('centos7', 'centos8'), -1, 'centos7 < centos8');
+
+	# "ubuntu" prefix is silently stripped per-segment.
+	is(main::compare_version_numbers('ubuntu5', '5'), 0,
+	   '"ubuntu" prefix is stripped (ubuntu5 == 5)');
+
+	# rcN is always older than the final release of the same number.
+	is(main::compare_version_numbers('1rc1', '1'),   -1, 'rc1 < release');
+	is(main::compare_version_numbers('1', '1rc1'),    1, 'release > rc1');
+	is(main::compare_version_numbers('1RC1', '1'),   -1, 'rc match is case-insensitive');
+	is(main::compare_version_numbers('1rc2', '1rc1'), 1, 'rc2 > rc1');
+
+	# Other string tails (alpha, beta) are NOT special-cased like rc, so
+	# they compare lexically after the leading number — and lose to a
+	# bare number on the same prefix because "" sorts before "alpha".
+	is(main::compare_version_numbers('1alpha', '1'),     1, '"alpha" tail > bare (lexical, no special-case)');
+	is(main::compare_version_numbers('1beta', '1alpha'), 1, 'lexical compare of string tails');
+
+	# Trailing-zero / segment-count asymmetry: 1.0 < 1.0.0 (the trailing
+	# missing segment compares as "less than" 0). This is a quirk to be
+	# aware of when normalizing version strings before compare.
+	is(main::compare_version_numbers('1.0', '1.0.0'),   -1, 'shorter < longer when prefix matches');
+	is(main::compare_version_numbers('1.0.0', '1.0'),    1, 'longer > shorter when prefix matches');
+
+	# Empty / undef inputs degrade quietly to a numeric answer rather
+	# than crashing. (The sub defaults undef to '' internally, so undef
+	# args don't warn under -w.)
+	is(main::compare_version_numbers('', '1.0'),   -1, 'empty < non-empty');
+	is(main::compare_version_numbers(undef, undef), 0, 'two undefs compare equal');
+};
+
+subtest 'compare_version_numbers (operator form)' => sub {
+	ok( main::compare_version_numbers('1.0', '<',  '2.0'), '1.0 <  2.0');
+	ok( main::compare_version_numbers('1.0', '<=', '1.0'), '1.0 <= 1.0');
+	ok( main::compare_version_numbers('1.0', '==', '1.0'), '1.0 == 1.0');
+	ok( main::compare_version_numbers('2.0', '>',  '1.0'), '2.0 >  1.0');
+	ok( main::compare_version_numbers('2.0', '>=', '2.0'), '2.0 >= 2.0');
+
+	ok(!main::compare_version_numbers('1.0', '>',  '2.0'), '1.0 not >  2.0');
+	ok(!main::compare_version_numbers('1.0', '==', '2.0'), '1.0 not == 2.0');
+
+	# Numeric-not-lexical also holds through the operator form.
+	ok( main::compare_version_numbers('1.10', '>', '1.9'), '1.10 > 1.9 via op');
+};
+
+done_testing();
--- a/web-lib-funcs.pl
+++ b/web-lib-funcs.pl
@@ -6242,6 +6242,8 @@ sub decode_base32
 {
 $_ = shift;
 my ($l);
+s/=+$//;
+$_ = uc($_);
 tr|A-Z2-7|\0-\37|;
 $_ = unpack('B*', $_);
 s/000(.....)/$1/g;
@@ -13235,12 +13237,16 @@ if ($cmp) {
 	return &compare_version_numbers($ver1, $ver2) < 0  if ($cmp eq '<');
 	}

+# Default undef inputs to '' so undef args don't warn in split and
+# shorter-vs-longer comparisons don't warn on the missing-segment side.
+$ver1 = '' if (!defined($ver1));
+$ver2 = '' if (!defined($ver2));
 my @sp1 = split(/[\.\-\+\~\_]/, $ver1);
 my @sp2 = split(/[\.\-\+\~\_]/, $ver2);
 my $tmp;
 for(my $i=0; $i<@sp1 || $i<@sp2; $i++) {
-	my $v1 = $sp1[$i];
-	my $v2 = $sp2[$i];
+	my $v1 = defined($sp1[$i]) ? $sp1[$i] : '';
+	my $v2 = defined($sp2[$i]) ? $sp2[$i] : '';
 	my $comp;
 	$v1 =~ s/^ubuntu//g;
 	$v2 =~ s/^ubuntu//g;