mirror of
https://github.com/webmin/webmin.git
synced 2026-02-07 07:52:13 +00:00
1177 lines
42 KiB
Perl
Executable File
1177 lines
42 KiB
Perl
Executable File
#!/usr/bin/env perl
|
||
# language-manager - Automatic translation and/or transcoding language files for all or specific modules
|
||
|
||
use strict;
|
||
use warnings;
|
||
use 5.010;
|
||
|
||
use File::Spec;
|
||
use File::Basename;
|
||
use File::Find;
|
||
use JSON::PP;
|
||
use HTTP::Tiny;
|
||
use HTML::Entities;
|
||
use List::MoreUtils qw(any);
|
||
use Cwd qw(cwd);
|
||
use Encode qw/encode decode/;
|
||
use Encode::Detect::Detector;
|
||
use Term::ANSIColor qw(:constants);
|
||
use Getopt::Long qw(:config permute pass_through);
|
||
use Pod::Usage;
|
||
|
||
sub main
|
||
{
|
||
my %data;
|
||
|
||
# Register start time
|
||
$data{'start_time'} = time();
|
||
|
||
# Prevent accidental script termination
|
||
my $sigkill = sub {
|
||
if ($data{'sigkill'}++ > 5) {
|
||
say RED, "\nTerminating: Ctrl-C has been pressed more than 5 times ..", RESET;
|
||
exit;
|
||
}
|
||
};
|
||
$SIG{INT} = \&$sigkill;
|
||
|
||
# Get user options from the command line
|
||
my %opt;
|
||
GetOptions('help|h' => \$opt{'help'},
|
||
'type|w:s' => \$opt{'type'},
|
||
'modules|m:s' => \$opt{'modules'},
|
||
'modules-exclude|me:s' => \$opt{'modules-exclude'},
|
||
'language-target|t:s' => \$opt{'language-target'},
|
||
'language-target-exclude|te:s' => \$opt{'language-target-exclude'},
|
||
'language-source|s:s' => \$opt{'language-source'},
|
||
'language-source-exclude|se:s' => \$opt{'language-source-exclude'},
|
||
'language-source-encoding|e:s' => \$opt{'language-source-encoding'},
|
||
# 'action|a:s' => \$opt{'action'}, #################################
|
||
# 'key|k:s' => \$opt{'key'}, ######### coming soon! ##########
|
||
# 'value|d:s' => \$opt{'value'}, #################################
|
||
'only-diff|od!' => \$opt{'only-diff'},
|
||
'only-transcode|ot!' => \$opt{'only-transcode'},
|
||
'keys-exclude|ke:s' => \$opt{'keys-exclude'},
|
||
'keys-test|kt:s' => \$opt{'keys-test'},
|
||
'git-commit|gc!' => \$opt{'git-commit'},
|
||
'log|l:s' => \$opt{'log'},
|
||
'verbose|v:i' => \$opt{'verbose'});
|
||
|
||
# Print help and exit
|
||
pod2usage(0) if ($opt{'help'});
|
||
|
||
# Enforce verbose output by default
|
||
if (!defined($opt{'verbose'})) {
|
||
$opt{'verbose'} = 1;
|
||
}
|
||
|
||
# Get current path and make workaround to run from sub dir
|
||
my $path = cwd;
|
||
if ($path =~ /\/bin$/) {
|
||
$path = Cwd::realpath('..');
|
||
}
|
||
my $type = -d "$path/ulang" ? 'ulang' : 'lang';
|
||
|
||
# Load Webmin lib
|
||
require("$path/web-lib-funcs.pl");
|
||
|
||
# Enforce user specific target: language directory, config.info or module.info file {lang|ulang|config|module}
|
||
$opt{'type'} && ($type = $opt{'type'});
|
||
|
||
# Store path and type
|
||
$data{'path'} = $path;
|
||
$data{'type'} = $type;
|
||
|
||
# Set default format to text
|
||
$opt{'translate-format'} = 'text';
|
||
|
||
# Force HTML format for "module" type translates
|
||
if (defined($opt{'type'}) && $opt{'type'} eq 'module') {
|
||
$opt{'translate-format'} = 'html';
|
||
}
|
||
|
||
# Check if we can get Google Translate API token
|
||
$data{'token'} = get_google_translate_token();
|
||
if (!$data{'token'}) {
|
||
$opt{'only-diff'} = 1;
|
||
$opt{'only-diff-auto'} = 1;
|
||
say YELLOW .
|
||
"Translation will not be done as Google Translate API key is missing. Falling back to `only-diff` mode.." . RESET;
|
||
}
|
||
|
||
# Get list of languages from `lang_list.txt` file
|
||
$data{'languages_source_list'} = list_languages_local(\%data);
|
||
$data{'languages_source_list_codes'} = [map {$_->{'lang'}} @{ $data{'languages_source_list'} }];
|
||
|
||
# Define base language. Default is set English
|
||
$opt{'language-source'} ||= 'en';
|
||
my $language_source = $opt{'language-source'};
|
||
|
||
# Check, if source language exists
|
||
if (!any {$_ =~ /^$language_source/} @{ $data{'languages_source_list_codes'} }) {
|
||
errors('language-source', $language_source);
|
||
exit;
|
||
}
|
||
|
||
# What language encoding do we expect on human translated files
|
||
$opt{'language-source-encoding'} ||= 'utf-8';
|
||
|
||
# Always exclude source language
|
||
my $language_target_exclude = $opt{'language-target-exclude'};
|
||
if ($language_target_exclude) {
|
||
$language_target_exclude .= ",$language_source";
|
||
} else {
|
||
$language_target_exclude = $language_source;
|
||
}
|
||
|
||
# Don't put on the list user excluded languages
|
||
if ($language_target_exclude) {
|
||
my @languages_excluded = split(',', $language_target_exclude);
|
||
my @languages_allowed;
|
||
for my $language (@{ $data{'languages_source_list_codes'} }) {
|
||
if (!any {$_ =~ /^$language$/} @languages_excluded) {
|
||
push(@languages_allowed, $language);
|
||
}
|
||
}
|
||
if (@languages_allowed) {
|
||
$data{'languages_source_list_codes'} = [@languages_allowed];
|
||
}
|
||
}
|
||
|
||
# Find out which modules to update, if exist
|
||
my $modules = $opt{'modules'};
|
||
my @modules;
|
||
if ($modules) {
|
||
my @ml = split(',', $modules);
|
||
foreach my $module (@ml) {
|
||
my ($exists) = source_data(\%data, \%opt, $module);
|
||
if ($exists) {
|
||
push(@modules, $module);
|
||
}
|
||
}
|
||
@modules = sort @modules;
|
||
} else {
|
||
my $found;
|
||
my @modules_exclude = split(',', $opt{'modules-exclude'});
|
||
find(
|
||
{
|
||
wanted => sub {
|
||
$found = $File::Find::name;
|
||
if (!-l $found && ($found =~ /$type\/$language_source$/ || $found =~ /$type\.info$/)) {
|
||
$found =~ s/^$path\///g;
|
||
$found =~ s/\/$type\/$language_source//g;
|
||
$found =~ s/\/$type\.info//g;
|
||
$found =~ s/\/$language_source//g;
|
||
$found =~ s/\/$language_source//g;
|
||
if (!any {$_ =~ /^$found$/} @modules_exclude) {
|
||
push(@modules, $found);
|
||
}
|
||
}
|
||
},
|
||
},
|
||
$path);
|
||
@modules = sort @modules;
|
||
}
|
||
$data{'modules'} = \@modules;
|
||
|
||
# Define target language(s) or translate all
|
||
$opt{'language-target'} = [$opt{'language-target'} ? split(',', $opt{'language-target'}) : ()];
|
||
if (@{ $opt{'language-target'} }) {
|
||
my @bad_languages;
|
||
for my $language (@{ $opt{'language-target'} }) {
|
||
push(@bad_languages, $language) if !any {$_ =~ /^$language$/} @{ $data{'languages_source_list_codes'} };
|
||
}
|
||
if (@bad_languages) {
|
||
errors('language-target', join(',', @bad_languages));
|
||
exit;
|
||
}
|
||
}
|
||
|
||
# Test only given keys in bulk translation. Translated strings are not saved
|
||
$opt{'keys-test'} = [$opt{'keys-test'} ? split(',', $opt{'keys-test'}) : ()];
|
||
|
||
# Exclude listed keys from bulk translation
|
||
$opt{'keys-exclude'} = [$opt{'keys-exclude'} ? split(',', $opt{'keys-exclude'}) : ()];
|
||
|
||
# Just run tests, and exit, without writing anything
|
||
if (@{ $opt{'keys-test'} }) {
|
||
say CYAN, "Translation testing for selected keys is about to start ..", RESET;
|
||
if (prompt('next')) {
|
||
go(\%opt, \%data);
|
||
}
|
||
} else {
|
||
|
||
# User interactions
|
||
talk('affected', \%opt, \%data);
|
||
|
||
# Log the output. Start
|
||
$opt{'log'} ||= "/tmp/language-manager-@{[time()]}.log";
|
||
open $data{'out'}, ">", "$opt{'log'}" or die RED, "Error creating log: $!\n", RESET, "\n";
|
||
|
||
# Execute force transcode/translate
|
||
talk('overwrite-pre', \%opt, \%data);
|
||
if (prompt('next')) {
|
||
go(\%opt, \%data);
|
||
}
|
||
|
||
# Log the output. End
|
||
close $data{'out'};
|
||
}
|
||
|
||
return 0;
|
||
}
|
||
main();
|
||
|
||
# The following is used to get correct language files based on old language table.
|
||
# This function, should not be really needed, unless compiling languages with
|
||
# Webmin version prior to 1.950 (first time), as Webmin 1.950+ is going to
|
||
# use new langauge map and have all related language files in UTF-8 already
|
||
sub language_map
|
||
{
|
||
my ($code) = @_;
|
||
my %language_map = (
|
||
|
||
# Use plain language codes
|
||
'ja' => 'ja_JP.euc',
|
||
'ko' => 'ko_KR.euc',
|
||
'ms' => 'ms_MY',
|
||
'ru' => 'ru_RU',
|
||
'uk' => 'uk_UA',
|
||
'zh' => 'zh_CN',
|
||
'zh_TW' => 'zh_TW.Big5',
|
||
|
||
# Czech is `cs` not `cz`, as there is no `cz` at all
|
||
'cs' => 'cz',
|
||
|
||
# Slovenian is `sl` not `si`, as `si` is Sinhala
|
||
'sl' => 'si',
|
||
|
||
# Greek is `el` not `gr`
|
||
'el' => 'gr',);
|
||
|
||
if ($language_map{$code}) {
|
||
$code = $language_map{$code};
|
||
}
|
||
return $code;
|
||
}
|
||
|
||
# Always check, if strings that are about to be translated haven't been
|
||
# translated by human already, and if so, extract them with correct
|
||
# encoding, to be further converted to UTF-8 and stored on destination file
|
||
sub language_source_file
|
||
{
|
||
my ($opt, $data, %data) = @_;
|
||
my $language_code = $data{'language-code'};
|
||
my %language_source_file;
|
||
my $language_source_file;
|
||
my $language_source_file_encoding = 'utf-8';
|
||
|
||
# Ignore user defined source languages
|
||
my $language_source_exclude = $opt->{'language-source-exclude'};
|
||
if ($language_source_exclude) {
|
||
my @languages_excluded = split(',', $language_source_exclude);
|
||
if (any {$_ =~ /^$language_code$/} @languages_excluded) {
|
||
return (undef, undef);
|
||
}
|
||
}
|
||
|
||
# Correct language code according to the old language map
|
||
if ($opt->{'language-source-encoding'} eq 'map') {
|
||
$language_code = language_map($language_code);
|
||
}
|
||
|
||
# Set prefix in case of processing `config.info` or `module.info`
|
||
my $language_source_file_target_ = "/";
|
||
if (defined($opt->{'type'}) && $opt->{'type'} =~ /config|module/) {
|
||
$language_source_file_target_ = "/$opt->{'type'}.info.";
|
||
}
|
||
|
||
# Get already translated language strings from current directory
|
||
my $language_source_file_target = "$data{'module-path'}$language_source_file_target_$language_code";
|
||
if (-r $language_source_file_target) {
|
||
$language_source_file = $language_source_file_target;
|
||
}
|
||
|
||
# Supply proper encoding for existing language files
|
||
if ($language_source_file) {
|
||
talk_log(("" . GREEN . " .. Found human translated file for $data{'language-name'} ($language_code)" . RESET . ""),
|
||
$data, 1);
|
||
read_file("$language_source_file", \%language_source_file);
|
||
|
||
# Force encoding based on map
|
||
if ($opt->{'language-source-encoding'} eq 'map') {
|
||
my $map_auto;
|
||
my $code = $data{'language-code'};
|
||
|
||
if ($code eq 'ja') {
|
||
$language_source_file_encoding = "euc-jp";
|
||
} elsif ($code eq 'ko') {
|
||
$language_source_file_encoding = "euc-kr";
|
||
} elsif ($code eq 'ru' || $code eq 'bg' || $code eq 'uk') {
|
||
$language_source_file_encoding = "cp1251";
|
||
} elsif ($code eq 'ca' ||
|
||
$code eq 'fr' ||
|
||
$code eq 'hr' ||
|
||
$code eq 'lt' ||
|
||
$code eq 'no')
|
||
{
|
||
$language_source_file_encoding = "cp1252";
|
||
} elsif ($code eq 'cs' ||
|
||
$code eq 'sk' ||
|
||
$code eq 'pl' ||
|
||
$code eq 'sl' ||
|
||
$code eq 'hu')
|
||
{
|
||
$language_source_file_encoding = "iso-8859-2";
|
||
} elsif ($code eq 'tr') {
|
||
$language_source_file_encoding = "iso-8859-9";
|
||
} elsif ($code eq 'he') {
|
||
$language_source_file_encoding = "cp1255";
|
||
} elsif ($code eq 'th') {
|
||
$language_source_file_encoding = "tis-620";
|
||
} elsif ($code eq 'zh') {
|
||
$language_source_file_encoding = "gb2312";
|
||
} elsif ($code eq 'zh_TW') {
|
||
$language_source_file_encoding = "big5";
|
||
} else {
|
||
my $language_source_file_data = read_file_contents($language_source_file_target);
|
||
my $detected_encoding = Encode::Detect::Detector::detect($language_source_file_data);
|
||
if ($detected_encoding) {
|
||
$language_source_file_encoding = $detected_encoding;
|
||
$map_auto = " (auto)";
|
||
} else {
|
||
$language_source_file_encoding = 'utf-8';
|
||
$map_auto = " (auto enforced)";
|
||
}
|
||
}
|
||
|
||
talk_log(
|
||
("" . CYAN . " .. Force file encoding to \`$language_source_file_encoding\`" .
|
||
($map_auto // '') . " as derived from language map" . RESET . ""
|
||
),
|
||
$data,
|
||
1);
|
||
|
||
}
|
||
|
||
# Figure out encoding automatically
|
||
elsif ($opt->{'language-source-encoding'} eq 'auto') {
|
||
my $language_source_file_data = read_file_contents($language_source_file_target);
|
||
$language_source_file_encoding = Encode::Detect::Detector::detect($language_source_file_data);
|
||
if (!$language_source_file_encoding) {
|
||
$language_source_file_encoding =
|
||
ask(' ' . BRIGHT_RED . '.. Cannot detect encoding, enter it manually' . RESET . '');
|
||
talk_log(
|
||
("" . BRIGHT_RED .
|
||
" .. Manually set file encoding to \`$language_source_file_encoding\`" . RESET . ""),
|
||
$data,
|
||
1);
|
||
} else {
|
||
talk_log(
|
||
("" . MAGENTA .
|
||
" .. Automatically detected file encoding is \`$language_source_file_encoding\`" . RESET . ""
|
||
),
|
||
$data,
|
||
1);
|
||
}
|
||
|
||
} else {
|
||
$language_source_file_encoding = $opt->{'language-source-encoding'};
|
||
talk_log(("" . MAGENTA . " .. Set file encoding to default \`$language_source_file_encoding\`" . RESET . ""),
|
||
$data, 1);
|
||
}
|
||
|
||
} else {
|
||
say YELLOW, " .. No human translated file has be found for $data{'language-name'} ($language_code)", RESET;
|
||
}
|
||
|
||
return (\%language_source_file, $language_source_file_encoding);
|
||
}
|
||
|
||
sub language_transcode
|
||
{
|
||
my ($string, $encoding) = @_;
|
||
|
||
eval {$string = decode($encoding, $string)};
|
||
if ($@) {
|
||
say "Error found: $@";
|
||
if (!prompt('next')) {
|
||
exit;
|
||
}
|
||
}
|
||
decode_entities($string);
|
||
$string = encode('utf-8', $string);
|
||
return $string;
|
||
}
|
||
|
||
sub source_data
|
||
{
|
||
my ($data, $opt, $module) = @_;
|
||
my ($language_source, $type, $target, $source_file, $source_file_, $exists);
|
||
|
||
$language_source = $opt->{'language-source'};
|
||
$type = $data->{'type'};
|
||
if ($type eq 'config' || $type eq 'module') {
|
||
$language_source = $language_source ne 'en' ? "$type.info.$language_source" : "$type.info";
|
||
}
|
||
|
||
$target = "$data->{'path'}/$module";
|
||
$source_file = "$target/$type/$language_source";
|
||
$source_file_ = "$target/$language_source";
|
||
$exists =
|
||
(-d $target && !-l $target && ((-r $source_file && !-l $source_file) || (-r $source_file_ && !-l $source_file_))) || 0;
|
||
|
||
if (-r $source_file && !-l $source_file) {
|
||
$target .= "/$type";
|
||
} elsif (-r $source_file_ && !-l $source_file_) {
|
||
$source_file = $source_file_;
|
||
}
|
||
|
||
return ($exists, $source_file, $target);
|
||
}
|
||
|
||
# Returns an array of supported languages,
|
||
# taken from Webmin's lang_list.txt file.
|
||
sub list_languages_local
|
||
{
|
||
my ($data) = @_;
|
||
my ($key, $value, $options, $language, @languages);
|
||
|
||
open(my $LANG, "$data->{'path'}/lang_list.txt");
|
||
while (<$LANG>) {
|
||
if (/^(.*=\w+)\s+(.*)/) {
|
||
$language = { 'desc' => $2 };
|
||
foreach $options (split(/,/, $1)) {
|
||
if ($options =~ /^([^=]+)=(.*)$/) {
|
||
$key = $1;
|
||
$value = $2;
|
||
$key =~ s/^\s+|\s+$//g;
|
||
$value =~ s/^\s+|\s+$//g;
|
||
$language->{$key} = $value;
|
||
}
|
||
}
|
||
push(@languages, $language);
|
||
}
|
||
}
|
||
close($LANG);
|
||
@languages = sort {$a->{'desc'} cmp $b->{'desc'}} @languages;
|
||
return wantarray ? @languages : \@languages;
|
||
}
|
||
|
||
# Prepare the string that is going to be sent to translator
|
||
sub translate_substitute
|
||
{
|
||
my ($value, $opt) = @_;
|
||
my $format = $opt->{'translate-format'};
|
||
|
||
$value = language_transcode($value, 'utf-8');
|
||
|
||
# Preserve un-quoted $1, $2, $3 in strings, which are broken in so different ways, in different languages differently
|
||
if ($format eq 'text') {
|
||
$value =~ s/(?<!['|"|`|“|«|=|?|&])(\$(\d+))/%$2/g;
|
||
}
|
||
|
||
# Preserve path in strings
|
||
elsif ($format eq 'html') {
|
||
$value =~ s/(?:(^(\/[\w+].*?),|^(\/[\w+].*?)\.|(\/[\w+].*?) |(\/[\w+].*?)$))/<span translate="no">"$1"<\/span>/g;
|
||
}
|
||
|
||
return $value;
|
||
}
|
||
|
||
# Perform white sorcery on returned string from translator,
|
||
# as it seems to be breaking quite a lot of things
|
||
sub translated_substitute
|
||
{
|
||
my ($translated, $original, $code, $rtl, $opt) = @_;
|
||
|
||
my $format = $opt->{'translate-format'};
|
||
my $remove_spaces = sub {
|
||
my ($str) = @_;
|
||
$str =~ s/[ ]+//g;
|
||
return $str;
|
||
};
|
||
|
||
# Fix broken by translator HTML closing tags
|
||
$translated =~ s/<\/[ ]*(\w+)>/<\/$1>/g;
|
||
$translated =~ s/<[ ](.*?)[ ]/<$1/g;
|
||
|
||
if ($format eq 'text') {
|
||
|
||
# Unescape preserved, un-quoted $1, $2, $3
|
||
$translated =~ s/\%[ ]+(\d+)/ \$$1 /gi;
|
||
|
||
# If replacement is changed to localized version
|
||
$translated =~ s/%(\d+)/ \$$1/gi;
|
||
|
||
# If the string starts with $1 and contains a space
|
||
$translated =~ s/(^[ ]+\$)/\$/gi;
|
||
|
||
# Get rid from undesirable spaces in parentheses
|
||
$translated =~ s/([ ]+\))/)/gi;
|
||
$translated =~ s/([ ]+\))/)/gi;
|
||
$translated =~ s/(\([ ]+)/(/gi;
|
||
$translated =~ s/(\([ ]+)/(/gi;
|
||
|
||
# Print actual values with percent sign
|
||
$translated =~ s/\([ ]+\$(\d+)[ ]*%\)/(\$$1 %)/gi;
|
||
|
||
# The following, is abnormal way to make things work, around of the bugs produced by Google Translate API for (bg).
|
||
if ($code eq 'bg') {
|
||
if ($original =~ /\$(\d+)/ && $translated !~ /\$(\d+)/) {
|
||
$translated =~ s/(\d+)/\$$1/g;
|
||
}
|
||
}
|
||
|
||
# For RTL substitute `\d ٪` with `$\d`
|
||
if ($rtl) {
|
||
$translated =~ s/(٪[ ]+(\d+))/\$$2/g;
|
||
|
||
# Fix stuck, consecutive $2$1
|
||
$translated =~ s/\$(\d+)\$(\d+)/\$$1 \$$2/g;
|
||
|
||
# Urdu returned as ٪1
|
||
if ($code eq 'ur') {
|
||
$translated =~ s/((\d+)٪)/\$$2/g;
|
||
}
|
||
}
|
||
|
||
# Language specific fixes
|
||
if ($code eq 'th') {
|
||
$translated =~ s/(([3-9])%)/\$$2/g;
|
||
}
|
||
if ($code eq 'ro') {
|
||
$translated =~ s/ (([1-9])%) / \$$2 /g;
|
||
}
|
||
|
||
} elsif ($format eq 'html') {
|
||
|
||
# Unescape path in strings
|
||
$translated =~ s/<span.*?translate.*?>(.*?)<.*?>/$1/gi;
|
||
|
||
# Remove any escapes as returned by translator, when run in HTML mode
|
||
$translated =~ s/"//gi;
|
||
$translated =~ s/'/'/gi;
|
||
}
|
||
|
||
# Restore destroyd tags
|
||
$translated =~ s/<[ ]*(?:(\?\w+|((?![br|hr|p|tt|pre|ul|li|ol])\w+).*?))[ ]*>(.*?)<.*?>/<$1>$3<\/$2>/gi;
|
||
|
||
$translated =~ s/([ ]*)\$[ ]*(\d+)/$1\$$2/g;
|
||
$translated =~ s/(['|"|`|“|«])[ ]*\$(\d+)[ ]*(['|"|`|“|»])/$1\$$2$3/g;
|
||
$translated =~ s/(<.*?>)[ ]*(.*?)[ ]*(<.*?>)/$1$2$3/g;
|
||
$translated =~ s/\$(\d+)[ ]*([:|:])[ ]*\$(\d+)[ ]*/ \$$1:\$$3 /g;
|
||
$translated =~ s/\$(\d+)[ ]*:[ ]*\$(\d+)/\$$1:\$$2/g;
|
||
$translated =~ s/(\p{L})\$(\d+)[ ]+/$1 \$$2 /g;
|
||
$translated =~ s/(\p{L})[ ]+([:|:]){2}[ ]+(\p{L})/$1::$3/g;
|
||
|
||
# Remove any whitespaces
|
||
$translated =~ s/([ ]+)/ /g;
|
||
|
||
# Fix trailing dot, which is separated from a word
|
||
$translated =~ s/(.)[ ]+\./$1./g;
|
||
|
||
# Fix trailing comma, which is separated from a word
|
||
$translated =~ s/(.)[ ]+,/$1,/g;
|
||
|
||
# Preserve inner formatting for tag's attrs
|
||
$translated =~ s/<(\w+)[ ]+(\w+)(.*?)(=)[ ]*(.*?)[ ]*(\w+)/<$1 $2=$5$6/g;
|
||
|
||
# Last .. should also be preceded by space
|
||
$translated =~ s/[ ]*(\.\.)$/ ../g;
|
||
|
||
# Preserve dates example formatting
|
||
$translated =~ s/(\p{L}{2,4}) \/ (\p{L}{2,3}) \/ (\p{L}{2,4}) (\(.*?\))/$1\/$2\/$3 $4/g;
|
||
|
||
# If initial value contains in the end of the string `text : ` or `text : $1`,
|
||
# then print it as such and not as `text: ` or `text: $1`
|
||
if ($original =~ /(?:([ ]+:[ ]*$|[ ]+:[ ]*\$\d+$))/) {
|
||
$translated =~ s/(?:(:[ ]*[^:]+$|:$))/ $1/g;
|
||
|
||
# Fix incorrectly positioned $1 :$2
|
||
if ($translated =~ /\$(\d+)[ ]+:\$\d+/) {
|
||
$translated =~ s/\$(\d+)[ ]+:\$(\d+)/\$$1 : \$$2/;
|
||
}
|
||
}
|
||
|
||
# If initial string contains `/$1/` then format translated accordingly
|
||
if ($original =~ /\/\$\d+\//) {
|
||
$translated =~ s/\/[ ]*\$(\d+)[ ]*\//\/\$$1\//g;
|
||
}
|
||
|
||
# If initial string contains `($2 %)` then format translated accordingly
|
||
if ($original =~ /\(\$(\d+)[ ]+%\)/) {
|
||
$translated =~ s/\(\$(\d+)[ ]*%\)/(\$$1 %)/g;
|
||
}
|
||
|
||
# Always fix properly included path
|
||
if ($original =~ /<tt>[\/|~\/].*?<\/tt>/) {
|
||
$translated =~ s/(<tt>)([\/|~\/].*?)(<\/tt>)/$1@{[&$remove_spaces($2)]}$3/g;
|
||
}
|
||
|
||
# Always consider `/` to be a delimiter and replace ` / ` to `/`
|
||
if ($original =~ /\w+\/\w+/) {
|
||
$translated =~ s/[ ]+(\/)[ ]+/$1/g;
|
||
}
|
||
|
||
# Treat `+` it is not not as ` + `
|
||
if ($original =~ /\w+\+\w+/) {
|
||
$translated =~ s/[ ]+(\+)[ ]+/$1/g;
|
||
}
|
||
|
||
return $translated;
|
||
}
|
||
|
||
# Make actual translation using Google Translate API
|
||
sub translate
|
||
{
|
||
my ($data, $opt, $target, $value) = @_;
|
||
|
||
my $source = $opt->{'language-source'};
|
||
my $format = $opt->{'translate-format'};
|
||
|
||
# Updating Google Translate API token to avoid expiration
|
||
my $time = time();
|
||
if ((($time - $data->{'start_time'}) / 60) > 10) {
|
||
say CYAN, "Updating Google Translate API token..", RESET;
|
||
$data->{'token'} = get_google_translate_token();
|
||
$data->{'start_time'} = $time;
|
||
}
|
||
|
||
my $token = $data->{'token'};
|
||
|
||
# Replace language code to match what translator expects
|
||
$target =~ s/_/-/;
|
||
|
||
# Prevent service unavailable error, reducing the call rate
|
||
select(undef, undef, undef, 0.25);
|
||
|
||
my $tr;
|
||
my $rsp = "https://translation.googleapis.com/language/translate/v2?q=@{[urlize($value)]}";
|
||
my $rsh = { 'Authorization' => "Bearer \"@{[trim($token)]}\"",
|
||
'User-Agent' => 'curl/7.29.1',
|
||
'Content-Type' => 'application/json; charset=utf-8', };
|
||
my $rsc = "{ 'source': '" . $source . "', 'target': '" . $target . "', 'format': '" . $format . "'}";
|
||
my $rs = HTTP::Tiny->new()->request('POST' => $rsp, { 'headers' => $rsh, 'content' => $rsc });
|
||
my $ts = $rs->{'success'};
|
||
my $tc = $rs->{'content'};
|
||
|
||
# Exctract translation on success
|
||
if ($ts) {
|
||
$tr = JSON::PP->new->decode($rs->{'content'});
|
||
}
|
||
|
||
# On error just try again in 5 seconds
|
||
else {
|
||
say YELLOW, "Error: Stopped when translating `$target` language", RESET;
|
||
print RED, "Error: Google Translator - $tc", RESET;
|
||
say CYAN, "Retrying automatically in 5 seconds ..", RESET;
|
||
sleep 5;
|
||
my $translated = translate($data, $opt, $target, $value);
|
||
return $translated;
|
||
}
|
||
|
||
return $tr;
|
||
}
|
||
|
||
# Run transcoding or translation of module(s) for the very first time
|
||
sub go
|
||
{
|
||
my ($opt, $data) = @_;
|
||
|
||
my ($module, $language);
|
||
my $path = $data->{'path'};
|
||
my $type = $data->{'type'};
|
||
my $token = $data->{'token'};
|
||
my $modules = $data->{'modules'};
|
||
my $language_source = $opt->{'language-source'};
|
||
my $language_source_encoding = $opt->{'language-source-encoding'};
|
||
my $language_target = $opt->{'language-target'};
|
||
my $language_target_exclude = $opt->{'language-target-exclude'};
|
||
my $keys_exclude = $opt->{'keys-exclude'};
|
||
my $keys_test = $opt->{'keys-test'};
|
||
my $git_commit = $opt->{'git-commit'};
|
||
|
||
foreach $module (@{$modules}) {
|
||
my (%template);
|
||
my ($exists, $mfile, $mpath) = source_data($data, $opt, $module);
|
||
|
||
# Check if there has been something to process, if not print a message
|
||
my $output;
|
||
|
||
# Get source, base language file
|
||
read_file($mfile, \%template);
|
||
|
||
talk_log(("Transcoding/translating " . BLUE BOLD, $module, RESET . " module .."), $data, 1);
|
||
foreach $language (@{ $data->{'languages_source_list'} }) {
|
||
|
||
# Get target language code
|
||
my $code = $language->{'lang'};
|
||
|
||
# Skip translating source, base language
|
||
next if ($code eq $language_source);
|
||
|
||
# Process only user defined languages or do all
|
||
if (@{$language_target}) {
|
||
next if (!any {$_ =~ /^$code$/} @{$language_target});
|
||
}
|
||
|
||
# Do not process excluded languages
|
||
if ($language_target_exclude) {
|
||
my @languages_excluded = split(',', $language_target_exclude);
|
||
if (any {$_ =~ /^$code$/} @languages_excluded) {
|
||
next;
|
||
}
|
||
}
|
||
|
||
# Get other target language attributes
|
||
my $name = $language->{'desc'};
|
||
my $rtl = $language->{'rtl'};
|
||
|
||
my %language;
|
||
my %language_auto;
|
||
|
||
my $message_type = "Processing";
|
||
$message_type = "Testing translations for selected keys with" if (@{$keys_test});
|
||
|
||
talk_log(("" . YELLOW . " .. $message_type $name ($code) language .." . RESET . ""), $data, $opt->{'verbose'});
|
||
my ($language_source_file, $language_source_file_encoding);
|
||
if (!@{$keys_test}) {
|
||
($language_source_file, $language_source_file_encoding) =
|
||
language_source_file($opt,
|
||
$data,
|
||
('language-name' => $name,
|
||
'language-code' => $code,
|
||
'module-path' => $mpath,
|
||
'module-name' => $module,
|
||
'translation-target' => $type
|
||
));
|
||
}
|
||
|
||
while (my ($key, $value) = each %template) {
|
||
|
||
# Don't add special keys
|
||
next if ($key eq '__norefs');
|
||
next if ($key =~ /^\#/);
|
||
|
||
# If set to test certain keys only, skip all other
|
||
if (@{$keys_test} && (!any {$_ =~ /^$key$/} @{$keys_test})) {
|
||
next;
|
||
}
|
||
|
||
# Modify `$key` when type is set to "module", otherwise keep default
|
||
my $key_ = $key;
|
||
my $key__ = $key;
|
||
|
||
# When processing `module.info` file, ignore most keys
|
||
# and add suffix to allowed to return expected format
|
||
if ($type eq 'module') {
|
||
if ($key !~ /name|desc|longdesc/) {
|
||
next;
|
||
}
|
||
my $code_ = $code;
|
||
if ($opt->{'language-source-encoding'} eq 'map') {
|
||
$code_ = language_map($code);
|
||
$key_ = "${key}_${code_}";
|
||
}
|
||
$key__ = "${key}_${code}";
|
||
}
|
||
|
||
# Human translated strings must be added to original `$code` file, e.g. `de` after transcoding
|
||
if ($language_source_file && $language_source_file->{$key_} && !@{$keys_test}) {
|
||
talk_log(
|
||
("" . DARK .
|
||
" .. ($code) found and transcoded human translated value for \`$key\` key to" . RESET . ""
|
||
),
|
||
$data,
|
||
$opt->{'verbose'});
|
||
|
||
# We need to re-encode the string first, depending on which human translated file is being
|
||
# used (iso or UTF-8). We still need to convert all back and forth to get rid of ugly &#... escapes
|
||
$language_source_file->{$key_} =
|
||
language_transcode($language_source_file->{$key_}, $language_source_file_encoding);
|
||
talk_log(" — " . WHITE . " $language_source_file->{$key_}" . RESET . "", $data,
|
||
$opt->{'verbose'});
|
||
|
||
# Add a string to main language file
|
||
$language{$key__} = $language_source_file->{$key_};
|
||
|
||
$output++;
|
||
|
||
}
|
||
|
||
# Machine translated strings must be added to `$code.auto` file, e.g. `de.auto`
|
||
else {
|
||
|
||
# Skip steps below, and only transcode existing values
|
||
if ($opt->{'only-transcode'}) {
|
||
next;
|
||
}
|
||
|
||
# Add original value, from source language, to `$code.auto` file without
|
||
# performing actual translation. It's useful when you want to translate it manually
|
||
# and simply need to find missing language strings on targeted file. Besides, `config.info`
|
||
# options are never translated (so far), just as languages that use right-to-left writing
|
||
if (!@{$keys_test} &&
|
||
($opt->{'only-diff'} ||
|
||
$type eq 'config' ||
|
||
($code eq 'ur' && $value =~ /</) ||
|
||
(any {$_ =~ /^$key$/} @{$keys_exclude})))
|
||
{
|
||
talk_log(("" . DARK . " .. ($code) stored original value for \`$key\` which is" . RESET . ""),
|
||
$data, $opt->{'verbose'});
|
||
talk_log(" — " . CYAN . " $value" . RESET . "", $data, $opt->{'verbose'});
|
||
$language_auto{$key__} = $value;
|
||
next;
|
||
}
|
||
|
||
# Add translated string
|
||
talk_log(("" . BRIGHT_MAGENTA . " .. ($code) translated value for \`$key\` to" . RESET . ""),
|
||
$data, $opt->{'verbose'});
|
||
|
||
# Prepare sent text
|
||
my $_value = $value;
|
||
$value = translate_substitute($value, $opt);
|
||
|
||
# Run actual translation
|
||
my $translated = translate($data, $opt, $code, $value);
|
||
$translated = $translated->{'data'}->{'translations'}[0]->{'translatedText'};
|
||
|
||
talk_log(" — $key --> $_value", $data, $opt->{'verbose'});
|
||
|
||
my $translated_ = $translated;
|
||
|
||
$translated = translated_substitute($translated, $_value, $code, $rtl, $opt);
|
||
|
||
# Store translated string
|
||
$language_auto{$key__} = $translated;
|
||
talk_log(" — $key <-- $translated", $data, $opt->{'verbose'});
|
||
|
||
# Print debug log
|
||
if ($opt->{'verbose'} == 2) {
|
||
say BLACK, " --< $value", RESET;
|
||
say BLACK, " >-- $translated_", RESET;
|
||
}
|
||
|
||
$output++;
|
||
}
|
||
}
|
||
|
||
# If we are converting from old times, where there were different encodings and
|
||
# file extensions for it, we need to delete/clear all old, no longer needed files
|
||
# e.g. will be deleted: uk_UA, zh_TW.Big5, ja_JP.euc, ko_KR.euc and etc.
|
||
my $file_deleted = "";
|
||
if (!@{$keys_test} && $opt->{'language-source-encoding'} eq 'map') {
|
||
my $found;
|
||
find(
|
||
{
|
||
wanted => sub {
|
||
$found = $File::Find::name;
|
||
my $code_ = language_map($code);
|
||
if ($code ne $code_ &&
|
||
( $found eq "$mpath/$code_" ||
|
||
(defined($opt->{'type'}) &&
|
||
$opt->{'type'} &&
|
||
$opt->{'type'} =~ /config|module/ &&
|
||
($found eq "$mpath/$opt->{'type'}.info.$code_"))))
|
||
{
|
||
unlink($found);
|
||
$file_deleted .= " $found";
|
||
$found =~ s/$mpath\///;
|
||
talk_log(
|
||
("" . YELLOW .
|
||
" .. Found no longer used language file ($found) and deleted .." . RESET . ""
|
||
),
|
||
$data,
|
||
1);
|
||
}
|
||
},
|
||
},
|
||
$mpath);
|
||
}
|
||
|
||
# Write transcoded/translated file
|
||
if (!@{$keys_test}) {
|
||
|
||
# Prepend file name for `config.info` or `module.info` modes
|
||
if (defined($opt->{'type'}) && $opt->{'type'} =~ /config|module/) {
|
||
$code = "$opt->{'type'}.info.$code";
|
||
}
|
||
|
||
# Language suffix for automatics
|
||
my $language_suffix = 'auto';
|
||
if ($opt->{'only-diff'} && !$opt->{'only-diff-auto'}) {
|
||
$language_suffix = 'diff';
|
||
}
|
||
my $file = $mpath . "/$code";
|
||
my $file_auto = $mpath . "/$code.$language_suffix";
|
||
write_file($file, \%language);
|
||
write_file($file_auto, \%language_auto, undef, undef, $mfile) if (!$opt->{'only-transcode'});
|
||
|
||
# Git auto-commit after language was transcoded/translated
|
||
if ($git_commit) {
|
||
|
||
# my $gs = "$module";
|
||
my $gf = trim("$file $file_auto $file_deleted");
|
||
say GREEN, ".. creating Git commit to current repo for " . BRIGHT_YELLOW . "$gf" . RESET . " files ..";
|
||
system("git add $gf");
|
||
system("git commit $gf -m 'Add transcoding/translation for \`$module\` module of $name ($code)'");
|
||
}
|
||
}
|
||
}
|
||
if (!$output) {
|
||
talk_log(
|
||
("" . RED . " .. No keys were found to work on for " .
|
||
BLUE . "$module" . RESET . "" . RED . " module .." . RESET . ""
|
||
),
|
||
$data,
|
||
1);
|
||
}
|
||
|
||
say GREEN, ".. done ", RESET;
|
||
}
|
||
}
|
||
|
||
sub get_google_translate_token
|
||
{
|
||
my $gc = `gcloud -v 2>&1`;
|
||
if (!$gc // $gc !~ /Google Cloud SDK/) {
|
||
errors('gcloud-missing');
|
||
return 0;
|
||
}
|
||
|
||
my $token = `gcloud auth application-default print-access-token`;
|
||
|
||
if ($token =~ /ERROR:/) {
|
||
errors('gcloud-error', $token);
|
||
return 0;
|
||
} else {
|
||
return $token;
|
||
}
|
||
}
|
||
|
||
sub prompt
|
||
{
|
||
my ($q) = @_;
|
||
|
||
if ($q eq 'next') {
|
||
return prompt("Do you want to proceed?");
|
||
}
|
||
my $p = sub {
|
||
my ($q, $c) = @_;
|
||
$q = $c if ($c);
|
||
local $| = 1;
|
||
print DARK, "$q", RESET;
|
||
chomp(my $a = <STDIN>);
|
||
$a = lc($a) if ($a);
|
||
return lc($a);
|
||
|
||
};
|
||
my $a = &$p("$q [y/N]: ");
|
||
if ($a ne 'y' && $a ne 'n') {
|
||
my $repeat = ("" . BRIGHT_MAGENTA . "Please enter" .
|
||
RESET . " " . YELLOW . "[y]" . RESET . " " . MAGENTA . "or" . RESET . " " . YELLOW . "[n]" . RESET . ": ");
|
||
$a = &$p(undef, $repeat);
|
||
if ($a ne 'y' && $a ne 'n') {
|
||
$a = &$p(undef, $repeat);
|
||
}
|
||
}
|
||
return $a eq 'y';
|
||
}
|
||
|
||
sub ask
|
||
{
|
||
my ($q) = @_;
|
||
my $p = sub {
|
||
my ($q) = @_;
|
||
local $| = 1;
|
||
print DARK, "$q", RESET;
|
||
chomp(my $a = <STDIN>);
|
||
return $a;
|
||
|
||
};
|
||
my $a = &$p("$q: ");
|
||
return $a;
|
||
}
|
||
|
||
sub errors
|
||
{
|
||
my ($e, $error) = @_;
|
||
my $message;
|
||
if ($e eq 'language-target') {
|
||
$message =
|
||
"Error: Using \`$error\` as a target language(s) is refused. The value(s) should match to one of the language codes from the language list file.";
|
||
}
|
||
|
||
if ($e eq 'language-source') {
|
||
$message =
|
||
"Error: Using \`$error\` as a source language is refused. The value should match to one of the language codes from the language list file.";
|
||
}
|
||
|
||
if ($e eq 'gcloud-missing') {
|
||
$message =
|
||
"Error: Command \`gcloud\` to manage Google Cloud Platform resources and developer workflow is not available.",;
|
||
}
|
||
|
||
if ($e eq 'gcloud-error') {
|
||
$message = "Error: $error";
|
||
}
|
||
say RED, $message, RESET;
|
||
}
|
||
|
||
sub talk_log
|
||
{
|
||
my ($what, $data, $output_to_console) = @_;
|
||
|
||
# Output to console
|
||
if ($output_to_console) {
|
||
say $what;
|
||
}
|
||
|
||
# Store to log
|
||
my $out = $data->{'out'};
|
||
if ($out) {
|
||
$what =~ s/\[\d+m//g;
|
||
say $out $what;
|
||
}
|
||
}
|
||
|
||
sub talk
|
||
{
|
||
my ($what, $opt, $data) = @_;
|
||
if ($what eq 'affected') {
|
||
my $affected = "@{$data->{'modules'}}";
|
||
my $affected_count = scalar(split(' ', $affected));
|
||
$affected =~ s/\b(lang)\b/(lang)/;
|
||
if ($affected) {
|
||
say GREEN, "Affected modules " . RESET, DARK . "[$affected_count]" . RESET . ": ", YELLOW BOLD, $affected, RESET;
|
||
exit if (!prompt('next'));
|
||
} else {
|
||
say RED, "Error: No modules found to operate on", RESET;
|
||
exit;
|
||
}
|
||
my $languages = ("@{$opt->{'language-target'}}" || "@{$data->{'languages_source_list_codes'}}");
|
||
my $languages_count = scalar(split(' ', $languages));
|
||
say GREEN, "Affected languages" . RESET, DARK . " [$languages_count]" . RESET . ": ", YELLOW BOLD,
|
||
"" . $languages . "", RESET;
|
||
exit if (!prompt('next'));
|
||
}
|
||
if ($what eq 'overwrite-pre') {
|
||
say RED, "Warning! ", RESET, WHITE,
|
||
"The following operation will force-translate and overwrite\nmentioned languages in all mentioned modules listed above, using ",
|
||
YELLOW BOLD, $opt->{'language-source'}, RESET, "\nas a template language. The following directory " . YELLOW BOLD,
|
||
$data->{'path'}, RESET,
|
||
"\nwith human translated module strings, will be used instead of machine\ntranslations. The operation log will be written in "
|
||
. YELLOW BOLD, $opt->{'log'}, RESET, " file.", RESET;
|
||
}
|
||
}
|
||
|
||
sub trim
|
||
{
|
||
my $s = shift;
|
||
$s =~ s/^\s+|\s+$//g;
|
||
return $s;
|
||
}
|
||
|
||
=pod
|
||
|
||
=head1 NAME
|
||
|
||
language-manager
|
||
|
||
=head1 DESCRIPTION
|
||
|
||
Manage Webmin/Usermin module language files (lang|ulang|help|config|module), to perform transcoding, translation, spell check and add/remove language strings.
|
||
|
||
=head1 SYNOPSIS
|
||
|
||
language-manager [options]
|
||
|
||
=head1 OPTIONS
|
||
|
||
=over
|
||
|
||
=item --help, -h
|
||
|
||
Give this help list.
|
||
|
||
Examples of usage:
|
||
|
||
Translate all available languages, using old-time encoding map, for BIND module, using as type "lang" directory, discarding human translations for Hebrew, keeping original value (not translating) for key "mass_desc", and printing verbose output.
|
||
|
||
— language-manager -e=map -m=bind8 -w=lang -te=he -ke=mass_desc -v
|
||
|
||
Only transcode language files, using old-time encoding map, from files being in different encodings, to new style, where all language files are in "utf-8" encoding, for Apache module. No translations will be made, and no ".auto" files will be created.
|
||
|
||
— language-manager -e=map -m=apache --ot
|
||
|
||
Test translation output for "mass_desc,trusted_warning" keys, in Russian and German languages, in BIND module and exit.
|
||
|
||
— language-manager -m=bind8 -t=ru,de -kt=mass_desc,trusted_warning -v
|
||
|
||
=item --type, -w <lang|ulang|help|config|module>
|
||
|
||
Type of target to use for operations. Either <lang>, <ulang>, <help> directories or language info file, as <config> or <module>. Default is set to "lang" for Webmin and "ulang" for Usermin.
|
||
|
||
=item --modules, -m
|
||
|
||
Comma separated list of modules to operate on. Default is to operate on all available modules.
|
||
|
||
=item --modules-exclude, -me
|
||
|
||
Comma separated list of modules to exclude
|
||
|
||
=item --language-target, -t
|
||
|
||
Comma separated list of affected language codes to operate on. Default is to operate on all languages defined in language list.
|
||
|
||
=item --language-target-exclude, -te
|
||
|
||
Comma separated list of target language codes that should be excluded from processing.
|
||
|
||
=item --language-source, -s
|
||
|
||
Language to use as a base language. Default is set English.
|
||
|
||
=item --language-source-exclude, -se
|
||
|
||
Comma separated list of human translated language codes to exclude from adding to target file (always translate instead). It might be useful if human translated files are low quality or corrupted.
|
||
|
||
=item --language-source-encoding, -e
|
||
|
||
Encoding of human translated language files. Default is set to "utf-8". Available options are <utf-8>, <auto>, <map>. Option "auto" is used to detect encoding automatically, while option "map" is used to use old-time encoding map, where each language file had different encoding, and nowadays, should not be used, as all language files are going to be encoded in "utf-8" already. It's always best to avoid using this option.
|
||
|
||
=item --only-diff, -o
|
||
|
||
Use this option to extract a difference and find missing keys on target language (against source) and save currently untranslated strings to a separate file, with ".diff" extension.
|
||
|
||
=item --only-transcode, -ot
|
||
|
||
Use this option to simply convert old-time style module language files (where each language had different encoding) into new format, with all files encoded in "utf-8". Old files, such as "ja_JP.euc", "ko_KR.euc", "zh_TW.Big5" and "ru_RU" will be automatically deleted.
|
||
|
||
=item --keys-exclude, -ke
|
||
|
||
Comma separated list of keys that shouldn't be translated and stored with original values
|
||
|
||
=item --keys-test, -kt
|
||
|
||
Comma separated list of keys to test translation engine on; in this mode, translated strings are not saved
|
||
|
||
=item --git-commit, -gc
|
||
|
||
Create a separate commit to the current repo, after each language was transcoded/translated
|
||
|
||
=item --log, -l
|
||
|
||
Saves complete operation log. By default, log file is saved to "/tmp/language-manager-{timestamp}.txt" file.
|
||
|
||
=item --verbose, -v
|
||
|
||
Verbosely print processed files and provide detailed output. Be detault, verbose output is enabled.
|
||
|
||
=back
|
||
|
||
=head1 LICENSE AND COPYRIGHT
|
||
|
||
Copyright 2020 Ilia Rostovtsev <programming@rostovtsev.io>
|