Merge pull request #2729 from swelljoe/web-lib-funcs-test-data-transforms
Some checks failed
Tests / prove (push) Has been cancelled
Build / build (push) Has been cancelled
Close inactive / close-inactive (push) Has been cancelled

Add web-lib-funcs data transform tests
This commit is contained in:
Jamie Cameron
2026-05-23 19:25:00 -07:00
committed by GitHub
3 changed files with 360 additions and 2 deletions

188
t/web-lib-funcs-encoding.t Normal file
View File

@@ -0,0 +1,188 @@
#!/usr/bin/perl
# Unit tests for the encoding / serialization helpers in web-lib-funcs.pl:
# base64, base32, serialise_variable / unserialise_variable, JSON wrappers.
#
# Pure transforms — no globals beyond MIME::Base64 / JSON::* probes done by
# the subs themselves. A bare `require` is enough.
use strict;
use warnings;
use Test::More;
use File::Basename qw(dirname);
use File::Spec;
my $script = File::Spec->rel2abs(
File::Spec->catfile(dirname(__FILE__), '..', 'web-lib-funcs.pl'));
require $script;
# encode_base64 / decode_base64
#
# Two implementations live behind these wrappers — MIME::Base64 if it loads,
# otherwise a pack/unpack pure-Perl fallback. We test the contract both
# share: RFC 4648 vectors, round-trip identity, and the optional "noeol"
# flag suppressing trailing newlines.
subtest 'encode_base64 / decode_base64' => sub {
# RFC 4648 §10 test vectors.
my %vec = (
'' => '',
'f' => 'Zg==',
'fo' => 'Zm8=',
'foo' => 'Zm9v',
'foob' => 'Zm9vYg==',
'fooba' => 'Zm9vYmE=',
'foobar' => 'Zm9vYmFy',
);
for my $in (sort keys %vec) {
is(main::encode_base64($in, 'noeol'), $vec{$in},
"RFC vector: '$in'");
is(main::decode_base64($vec{$in}), $in,
"RFC vector decode: '$vec{$in}'");
}
# Default mode appends a newline; noeol suppresses it.
like(main::encode_base64('hello'), qr/\n\z/, 'default mode ends in newline');
unlike(main::encode_base64('hello', 'noeol'), qr/\n/, 'noeol omits newline');
# Round-trip a wide byte-range, including embedded NULs.
my $bin = join('', map { chr } 0..255);
is(main::decode_base64(main::encode_base64($bin, 'noeol')), $bin,
'round-trips all 256 byte values');
# Decoder tolerates embedded whitespace in the encoded form (MIME::Base64
# behaviour; the fallback uses uudecode under the hood and is similarly
# tolerant after the tr/cd strip).
is(main::decode_base64("aGVs\nbG8="), 'hello',
'embedded newline in encoded input tolerated');
};
# encode_base32 / decode_base32
#
# Pure-Perl implementation. RFC 4648 §10 specifies "=" padding on
# unaligned outputs; this encoder omits padding. Pinning that as the
# current contract — decoder accepts both forms so round-trips are safe.
subtest 'encode_base32 / decode_base32' => sub {
# Round-trip the RFC 4648 §10 vectors.
for my $in ('', 'f', 'fo', 'foo', 'foob', 'fooba', 'foobar') {
is(main::decode_base32(main::encode_base32($in)), $in,
"round-trip '$in'");
}
# Encoder emits the RFC alphabet (uppercase A-Z and digits 2-7).
# Output should never contain "=" (padding is dropped).
like(main::encode_base32('foobar'), qr/\A[A-Z2-7]*\z/,
'encoded output uses only the RFC 4648 alphabet');
unlike(main::encode_base32('f'), qr/=/,
'encoder omits "=" padding (note: deviation from RFC 4648)');
# Decoder also accepts canonical padded input (RFC 4648 mandates "="
# padding on unaligned outputs). The encoder still omits padding, so
# this matters mainly for externally-produced base32 strings.
is(main::decode_base32('MY======'), 'f', 'padded "MY======" decodes');
is(main::decode_base32('MZXQ===='), 'fo', 'padded "MZXQ====" decodes');
is(main::decode_base32('MZXW6==='), 'foo', 'padded "MZXW6===" decodes');
is(main::decode_base32('MZXW6YQ='), 'foob', 'padded "MZXW6YQ=" decodes');
# Case-insensitive decode — Webmin's TOTP path accepts secrets
# case-insensitively (twofactor-funcs-lib.pl validates with /i),
# so lowercase input from third-party authenticators must decode
# identically to uppercase.
is(main::decode_base32('mzxw6ytboi'), 'foobar',
'lowercase decodes identically to uppercase');
is(main::decode_base32('MzXw6YtBoI'), 'foobar',
'mixed-case decodes identically');
is(main::decode_base32('mzxw6yq='), 'foob',
'lowercase with padding decodes correctly');
# Empty input → empty output, both directions.
is(main::encode_base32(''), '', 'empty encode → empty');
is(main::decode_base32(''), '', 'empty decode → empty');
};
# serialise_variable / unserialise_variable
#
# Webmin's own serialization (used by remote_eval and friends). Format:
# TYPE,urlized-payload where nested collections re-encode through urlize
# at each level — so nested structures gain layers of %25 escaping.
subtest 'serialise_variable / unserialise_variable' => sub {
# Scalars round-trip byte-for-byte.
for my $s ('hello', '', 'a,b,c', 'a=b&c', "\x00\xff", "spaces here") {
is(main::unserialise_variable(main::serialise_variable($s)), $s,
"scalar round-trip: '$s'");
}
# undef has a dedicated marker.
is(main::serialise_variable(undef), 'UNDEF', 'undef serializes to "UNDEF"');
is(main::unserialise_variable('UNDEF'), undef, '"UNDEF" deserializes to undef');
# Refs.
my $scalar_ref = \'inner';
is_deeply(main::unserialise_variable(main::serialise_variable($scalar_ref)),
$scalar_ref, 'scalar ref round-trips');
# Arrays — note numeric values come back as strings (Perl scalar stringification).
is_deeply(main::unserialise_variable(main::serialise_variable([1,2,3])),
['1','2','3'], 'array of numbers round-trips (as strings)');
is_deeply(main::unserialise_variable(main::serialise_variable(['a','b','c'])),
['a','b','c'], 'array of strings round-trips');
is_deeply(main::unserialise_variable(main::serialise_variable([])),
[], 'empty array round-trips');
# Hashes.
is_deeply(main::unserialise_variable(main::serialise_variable({a=>'x', b=>'y'})),
{a=>'x', b=>'y'}, 'flat hash round-trips');
is_deeply(main::unserialise_variable(main::serialise_variable({})),
{}, 'empty hash round-trips');
# Nested — array-of-arrays and hash-of-hashes survive the recursive
# urlize wrapping (each level adds %25 to existing %s).
is_deeply(main::unserialise_variable(main::serialise_variable([[1,2],[3,4]])),
[['1','2'],['3','4']], 'nested array round-trips');
is_deeply(main::unserialise_variable(
main::serialise_variable({outer=>{inner=>['x','y']}})),
{outer=>{inner=>['x','y']}}, 'nested hash round-trips');
# Wire-format spot checks — pin the documented format so callers that
# rely on it (remote_eval) don't silently change shape.
is(main::serialise_variable('hi'), 'VAL,hi', 'scalar wire format');
is(main::serialise_variable('a,b'), 'VAL,a%2Cb', 'comma in scalar urlized');
is(main::serialise_variable([1,2]), 'ARRAY,VAL%2C1,VAL%2C2',
'array wire format (one level of urlize wrapping)');
# Data::Dumper path — opt-in via the second arg.
my $d = main::serialise_variable({k=>'v'}, 1);
like($d, qr/^\$VAR1\s*=/, 'dumper mode emits Data::Dumper format');
is_deeply(main::unserialise_variable($d), {k=>'v'},
'dumper-format round-trips through the $VAR1 detector');
};
# convert_to_json / convert_from_json
#
# Thin wrappers over JSON::XS or JSON::PP. We test the wrapper contract —
# the defaults, the pretty flag, the raw-utf8 flag, the undef-defaulting,
# and the relaxed parser — not JSON conformance, which is the library's job.
subtest 'convert_to_json / convert_from_json' => sub {
# Plain round-trip preserves structure (not key order).
my $in = {name=>'x', items=>[1,2,3], nested=>{k=>'v'}};
is_deeply(main::convert_from_json(main::convert_to_json($in)), $in,
'round-trips a mixed structure');
# Pretty output is human-formatted (multi-line, indented).
my $pretty = main::convert_to_json({a=>1,b=>2}, 1);
like($pretty, qr/\n/, 'pretty mode produces multi-line output');
# And still round-trips.
is_deeply(main::convert_from_json($pretty), {a=>1,b=>2},
'pretty output still parses');
# Current contract: undef input becomes {} (the `||= {}` default).
is(main::convert_to_json(undef), '{}', 'undef input → "{}"');
# Arrays at the top level work too.
is(main::convert_to_json([1,2,3]), '[1,2,3]', 'top-level array encodes');
is_deeply(main::convert_from_json('[1,2,3]'), [1,2,3], 'top-level array decodes');
# Relaxed mode accepts comments and trailing commas (JSON::PP feature).
my $rx = main::convert_from_json('{"a":1, /* note */ "b":2,}', 0, 1);
is_deeply($rx, {a=>1,b=>2}, 'relaxed parser accepts /* comments */ and trailing comma');
};
done_testing();

164
t/web-lib-funcs-numeric.t Normal file
View File

@@ -0,0 +1,164 @@
#!/usr/bin/perl
# Unit tests for numeric / version-comparison helpers in web-lib-funcs.pl.
# Pure subs — bare require is enough.
use strict;
use warnings;
use Test::More;
use File::Basename qw(dirname);
use File::Spec;
my $script = File::Spec->rel2abs(
File::Spec->catfile(dirname(__FILE__), '..', 'web-lib-funcs.pl'));
require $script;
# is_int — strict signed decimal integer.
#
# Regex is /^([-]?\d+)$/: allows a single leading "-", no leading "+",
# no whitespace, no scientific notation, no hex.
subtest 'is_int' => sub {
ok( main::is_int('0'), '"0" is int');
ok( main::is_int('42'), '"42" is int');
ok( main::is_int('-5'), 'negative int');
ok( main::is_int('01'), 'leading zero accepted');
ok(!main::is_int('+5'), 'leading "+" rejected (no signed-positive form)');
ok(!main::is_int(' 5'), 'leading whitespace rejected');
ok(!main::is_int('5 '), 'trailing whitespace rejected');
ok(!main::is_int('1.0'), 'decimals rejected');
ok(!main::is_int('1e3'), 'scientific notation rejected');
ok(!main::is_int('0x10'), 'hex rejected');
ok(!main::is_int(''), 'empty rejected');
ok(!main::is_int('abc'), 'non-numeric rejected');
# undef returns false but also triggers a "uninitialized value" warning
# inside the regex (web-lib-funcs.pl is not warnings-enabled today, but
# `prove -w` or a future `use warnings` would surface it). Silence the
# warning at the call site so the suite stays clean either way.
{ local $SIG{__WARN__} = sub {};
ok(!main::is_int(undef), 'undef rejected'); }
};
# is_float — strict decimal with a dot.
#
# Regex is /^[-]?(\.\d+|\d+\.\d+)$/. The decimal point is required, so
# integers don't qualify. Trailing dot ("5.") is rejected. Scientific
# notation rejected.
subtest 'is_float' => sub {
ok( main::is_float('1.5'), 'plain float');
ok( main::is_float('-1.5'), 'negative float');
ok( main::is_float('.5'), 'leading-dot form accepted');
ok( main::is_float('-.5'), 'negative leading-dot form accepted');
ok( main::is_float('0.0'), 'zero with decimal accepted');
ok(!main::is_float('5'), 'plain integer rejected (decimal point required)');
ok(!main::is_float('5.'), 'trailing-dot form rejected');
ok(!main::is_float('1e3'), 'scientific notation rejected');
ok(!main::is_float('+1.5'), 'leading "+" rejected');
ok(!main::is_float(' 1.5'), 'leading whitespace rejected');
ok(!main::is_float(''), 'empty rejected');
ok(!main::is_float('abc'), 'non-numeric rejected');
# See is_int's undef comment.
{ local $SIG{__WARN__} = sub {};
ok(!main::is_float(undef), 'undef rejected'); }
};
# float — parse-and-format helper.
#
# Returns sprintf('%.2f', $n) if that's non-zero, otherwise the literal 0.
# So unparseable input collapses to plain 0 (no decimals), but a valid
# zero number also returns plain 0 — the two are indistinguishable from
# the output side. Leading "+" is silently accepted here even though
# is_int / is_float reject it — asymmetric with the validators.
subtest 'float' => sub {
is(main::float('42'), '42.00', 'integer string → 2-decimal form');
is(main::float('1.5'), '1.50', 'float string → 2-decimal form');
is(main::float('-1.5'), '-1.50', 'negative float');
is(main::float('1e3'), '1000.00', 'scientific notation parsed');
is(main::float('+5'), '5.00', 'leading "+" silently accepted (asymmetric with is_int/is_float)');
# All these collapse to plain 0 — non-parseable, empty, undef, true
# zero. Non-numeric / undef inputs warn inside sprintf under -w
# (uninitialized / isn't numeric); silence per call site so the suite
# stays warning-free regardless of how prove is invoked.
{ local $SIG{__WARN__} = sub {};
is(main::float('abc'), 0, 'non-numeric → 0');
is(main::float(''), 0, 'empty → 0');
is(main::float(undef), 0, 'undef → 0'); }
is(main::float('0'), 0, 'zero collapses to plain 0 (not "0.00")');
is(main::float('0.0'), 0, 'zero with decimal also collapses to plain 0');
};
# compare_version_numbers — Debian-ish version comparator.
#
# Two calling shapes:
# compare_version_numbers($a, $b) → -1 / 0 / 1
# compare_version_numbers($a, $op, $b) → boolean
#
# Splits each version on /[.\-+~_]/, then walks segment-by-segment with
# a handful of special cases (pure numeric, numeric+string, "ubuntu"
# prefix strip, "rcN" < final).
subtest 'compare_version_numbers (numeric form)' => sub {
# Equal.
is(main::compare_version_numbers('1.0', '1.0'), 0, 'equal');
is(main::compare_version_numbers('1.2.3', '1.2.3'), 0, 'equal three-part');
# Numeric ordering — NOT lexical, so "1.10" > "1.9".
is(main::compare_version_numbers('1.10', '1.9'), 1, '1.10 > 1.9 (numeric, not lexical)');
is(main::compare_version_numbers('1.0', '1.1'), -1, 'simple less-than');
is(main::compare_version_numbers('2', '1.9'), 1, 'shorter higher major wins');
# Different separators are interchangeable.
is(main::compare_version_numbers('1-2', '1.2'), 0, 'dot and dash interchangeable');
is(main::compare_version_numbers('1_2~3', '1.2.3'), 0, 'underscore and tilde interchangeable');
# Numeric segment with a string tail — string compared after number.
is(main::compare_version_numbers('1ubuntu5', '1ubuntu10'), -1, 'ubuntu5 < ubuntu10 (numeric tail)');
is(main::compare_version_numbers('6redhat', '8redhat'), -1, 'leading number wins over string tail');
# Pure-string-prefix + number variant ("centos7" vs "centos8").
is(main::compare_version_numbers('centos7', 'centos8'), -1, 'centos7 < centos8');
# "ubuntu" prefix is silently stripped per-segment.
is(main::compare_version_numbers('ubuntu5', '5'), 0,
'"ubuntu" prefix is stripped (ubuntu5 == 5)');
# rcN is always older than the final release of the same number.
is(main::compare_version_numbers('1rc1', '1'), -1, 'rc1 < release');
is(main::compare_version_numbers('1', '1rc1'), 1, 'release > rc1');
is(main::compare_version_numbers('1RC1', '1'), -1, 'rc match is case-insensitive');
is(main::compare_version_numbers('1rc2', '1rc1'), 1, 'rc2 > rc1');
# Other string tails (alpha, beta) are NOT special-cased like rc, so
# they compare lexically after the leading number — and lose to a
# bare number on the same prefix because "" sorts before "alpha".
is(main::compare_version_numbers('1alpha', '1'), 1, '"alpha" tail > bare (lexical, no special-case)');
is(main::compare_version_numbers('1beta', '1alpha'), 1, 'lexical compare of string tails');
# Trailing-zero / segment-count asymmetry: 1.0 < 1.0.0 (the trailing
# missing segment compares as "less than" 0). This is a quirk to be
# aware of when normalizing version strings before compare.
is(main::compare_version_numbers('1.0', '1.0.0'), -1, 'shorter < longer when prefix matches');
is(main::compare_version_numbers('1.0.0', '1.0'), 1, 'longer > shorter when prefix matches');
# Empty / undef inputs degrade quietly to a numeric answer rather
# than crashing. (The sub defaults undef to '' internally, so undef
# args don't warn under -w.)
is(main::compare_version_numbers('', '1.0'), -1, 'empty < non-empty');
is(main::compare_version_numbers(undef, undef), 0, 'two undefs compare equal');
};
subtest 'compare_version_numbers (operator form)' => sub {
ok( main::compare_version_numbers('1.0', '<', '2.0'), '1.0 < 2.0');
ok( main::compare_version_numbers('1.0', '<=', '1.0'), '1.0 <= 1.0');
ok( main::compare_version_numbers('1.0', '==', '1.0'), '1.0 == 1.0');
ok( main::compare_version_numbers('2.0', '>', '1.0'), '2.0 > 1.0');
ok( main::compare_version_numbers('2.0', '>=', '2.0'), '2.0 >= 2.0');
ok(!main::compare_version_numbers('1.0', '>', '2.0'), '1.0 not > 2.0');
ok(!main::compare_version_numbers('1.0', '==', '2.0'), '1.0 not == 2.0');
# Numeric-not-lexical also holds through the operator form.
ok( main::compare_version_numbers('1.10', '>', '1.9'), '1.10 > 1.9 via op');
};
done_testing();

View File

@@ -6242,6 +6242,8 @@ sub decode_base32
{
$_ = shift;
my ($l);
s/=+$//;
$_ = uc($_);
tr|A-Z2-7|\0-\37|;
$_ = unpack('B*', $_);
s/000(.....)/$1/g;
@@ -13235,12 +13237,16 @@ if ($cmp) {
return &compare_version_numbers($ver1, $ver2) < 0 if ($cmp eq '<');
}
# Default undef inputs to '' so undef args don't warn in split and
# shorter-vs-longer comparisons don't warn on the missing-segment side.
$ver1 = '' if (!defined($ver1));
$ver2 = '' if (!defined($ver2));
my @sp1 = split(/[\.\-\+\~\_]/, $ver1);
my @sp2 = split(/[\.\-\+\~\_]/, $ver2);
my $tmp;
for(my $i=0; $i<@sp1 || $i<@sp2; $i++) {
my $v1 = $sp1[$i];
my $v2 = $sp2[$i];
my $v1 = defined($sp1[$i]) ? $sp1[$i] : '';
my $v2 = defined($sp2[$i]) ? $sp2[$i] : '';
my $comp;
$v1 =~ s/^ubuntu//g;
$v2 =~ s/^ubuntu//g;