webmin/bin/language-manager

#!/usr/bin/env perl
# language-manager - Automatic translation and/or transcoding language files for all or specific modules

use strict;
use warnings;
use 5.010;

use File::Spec;
use File::Basename;
use File::Find;
use JSON::PP;
use HTTP::Tiny;
use HTML::Entities;
use List::MoreUtils qw(any);
use Cwd qw(cwd);
use Encode qw/encode decode/;
use Encode::Detect::Detector;
use Term::ANSIColor qw(:constants);
use Getopt::Long qw(:config permute pass_through);
use Pod::Usage;

sub main
{
    my %data;

    # Register start time
    $data{'start_time'} = time();

    # Prevent accidental script termination
    my $sigkill = sub {
        if ($data{'sigkill'}++ > 5) {
            say RED, "\nTerminating: Ctrl-C has been pressed more than 5 times ..", RESET;
            exit;
        }
    };
    $SIG{INT} = \&$sigkill;

    # Get user options from the command line
    my %opt;
    GetOptions('help|h'                           => \$opt{'help'},
               'mode|x:s'                         => \$opt{'mode'},
               'type|w:s'                         => \$opt{'type'},
               'modules|m:s'                      => \$opt{'modules'},
               'modules-exclude|me:s'             => \$opt{'modules-exclude'},
               'language-target|t:s'              => \$opt{'language-target'},
               'language-target-exclude|te:s'     => \$opt{'language-target-exclude'},
               'language-source|s:s'              => \$opt{'language-source'},
               'language-source-exclude|se:s'     => \$opt{'language-source-exclude'},
               'language-source-ignore-auto|sia!' => \$opt{'language-source-ignore-auto'},
               'language-source-encoding|e:s'     => \$opt{'language-source-encoding'},
               'only-diff|od!'                    => \$opt{'only-diff'},
               'only-transcode|ot!'               => \$opt{'only-transcode'},
               'keys-exclude|ke:s'                => \$opt{'keys-exclude'},
               'keys-test|kt:s'                   => \$opt{'keys-test'},
               'git-commit|gc!'                   => \$opt{'git-commit'},
               'log|l:s'                          => \$opt{'log'},
               'verbose|v:i'                      => \$opt{'verbose'});

    # Print help and exit
    pod2usage(0) if ($opt{'help'});

    # Enforce verbose output by default
    if (!defined($opt{'verbose'})) {
        $opt{'verbose'} = 1;
    }

    # Get current path and make workaround to run from sub dir
    my $path = cwd;
    if ($path =~ /\/bin$/) {
        $path = Cwd::realpath('..');
    }
    my $type = -d "$path/ulang" ? 'ulang' : 'lang';

    # Load Webmin lib
    require("$path/web-lib-funcs.pl");

    # Enforce user specific target: language directory, config.info or module.info file {lang|ulang|config|uconfig|module}
    $opt{'type'} && ($type = $opt{'type'});

    # Store path and type
    $data{'path'} = $path;
    $data{'type'} = $type;

    # Set default format to text
    $opt{'translate-format'} = 'text';

    # Force HTML format for "module" type translates
    if (defined($opt{'type'}) && $opt{'type'} eq 'module') {
        $opt{'translate-format'} = 'html';
    }

    # Check if we can get Google Translate API token
    $data{'token'} = get_google_translate_token();
    if (!$data{'token'}) {
        $opt{'only-diff'}      = 1;
        $opt{'only-diff-auto'} = 1;
        say YELLOW .
          "Translation will not be done as Google Translate API key is missing. Falling back to `only-diff` mode.." . RESET;
    }

    # Get list of languages from `lang_list.txt` file
    $data{'languages_source_list'}       = list_languages_local(\%data);
    $data{'languages_source_list_codes'} = [map {$_->{'lang'}} @{ $data{'languages_source_list'} }];

    # Define base language. Default is set English
    $opt{'language-source'} ||= 'en';
    my $language_source = $opt{'language-source'};

    # Check, if source language exists
    if (!any {$_ =~ /^$language_source/} @{ $data{'languages_source_list_codes'} }) {
        errors('language-source', $language_source);
        exit;
    }

    # What language encoding do we expect on human translated files
    $opt{'language-source-encoding'} ||= 'utf-8';

    # Always exclude source language
    my $language_target_exclude = $opt{'language-target-exclude'};
    if ($language_target_exclude) {
        $language_target_exclude .= ",$language_source";
    } else {
        $language_target_exclude = $language_source;
    }

    # Don't put on the list user excluded languages
    if ($language_target_exclude) {
        my @languages_excluded = split(',', $language_target_exclude);
        my @languages_allowed;
        for my $language (@{ $data{'languages_source_list_codes'} }) {
            if (!any {$_ =~ /^$language$/} @languages_excluded) {
                push(@languages_allowed, $language);
            }
        }
        if (@languages_allowed) {
            $data{'languages_source_list_codes'} = [@languages_allowed];
        }
    }

    # Define the mode
    $opt{'mode'} ||= 'sync';

    # Find out which modules to update, if exist
    my $modules = $opt{'modules'};
    my @modules;
    my @modules_exclude = $opt{'modules-exclude'} ? split(',', $opt{'modules-exclude'}) : ();
    if ($modules) {
        my @ml = split(',', $modules);
        foreach my $module (@ml) {
            my ($exists) = source_data(\%data, \%opt, $module);
            if ($exists) {
                if (!any {$_ =~ /^$module$/} @modules_exclude) {
                    push(@modules, $module);
                }
            }
        }
        @modules = sort @modules;
    } else {
        my $found;
        find(
            {
               wanted => sub {
                   $found = $File::Find::name;
                   if (!-l $found && ($found =~ /$type\/$language_source$/ || $found =~ /$type\.info$/)) {
                       $found =~ s/^$path\///g;
                       $found =~ s/\/$type\/$language_source//g;
                       $found =~ s/\/$type\.info//g;
                       $found =~ s/\/$language_source//g;
                       $found =~ s/\/$language_source//g;
                       if (!any {$_ =~ /^$found$/} @modules_exclude) {
                           push(@modules, $found);
                       }
                   }
               },
            },
            $path);
        @modules = sort @modules;
    }
    $data{'modules'} = \@modules;

    # Define target language(s) or translate all
    $opt{'language-target'} = [$opt{'language-target'} ? split(',', $opt{'language-target'}) : ()];
    if (@{ $opt{'language-target'} }) {
        my @bad_languages;
        for my $language (@{ $opt{'language-target'} }) {
            push(@bad_languages, $language) if !any {$_ =~ /^$language$/} @{ $data{'languages_source_list_codes'} };
        }
        if (@bad_languages) {
            errors('language-target', join(',', @bad_languages));
            exit;
        }
    }

    # Test only given keys in bulk translation. Translated strings are not saved
    $opt{'keys-test'} = [$opt{'keys-test'} ? split(',', $opt{'keys-test'}) : ()];

    # Exclude listed keys from bulk translation
    $opt{'keys-exclude'} = [$opt{'keys-exclude'} ? split(',', $opt{'keys-exclude'}) : ()];

    # Just run tests, and exit, without writing anything
    if (@{ $opt{'keys-test'} }) {
        say CYAN, "Translation testing for selected keys is about to start ..", RESET;
        if (prompt('next')) {
            go(\%opt, \%data);
        }
    } else {

        # Log the output. Start
        $opt{'log'} ||= "/tmp/language-manager-@{[time()]}.log";
        open $data{'out'}, ">", "$opt{'log'}" or die RED, "Error creating log: $!\n", RESET, "\n";

        # User interactions
        talk('affected', \%opt, \%data);

        # Run in overwrite mode
        if ($opt{'mode'} eq 'full') {

            # Execute force transcode/translate
            talk('overwrite-pre', \%opt, \%data);
            if (prompt('next')) {
                go(\%opt, \%data);
            }
        }

        # Simply translate newly added strings to source language (def. en),
        # to all targets ".auto". Deleted keys on source language will also
        # be removed from targets, in both, human translated and ".auto" files
        else {

            # Execute language sync
            talk('sync-pre', \%opt, \%data);
            if (prompt('next')) {
                go(\%opt, \%data);
            }
        }

        # Log the output. End
        close $data{'out'};
    }

    return 0;
}
main();

# The following is used to get correct language files based on old language table.
# This function, should not be really needed, unless compiling languages with
# Webmin version prior to 1.950 (first time), as Webmin 1.950+ is going to
# use new langauge map and have all related language files in UTF-8 already
sub language_map
{
    my ($code) = @_;
    my %language_map = (

        # Use plain language codes
        'ja'    => 'ja_JP.euc',
        'ko'    => 'ko_KR.euc',
        'ms'    => 'ms_MY',
        'ru'    => 'ru_RU',
        'uk'    => 'uk_UA',
        'zh'    => 'zh_CN',
        'zh_TW' => 'zh_TW.Big5',

        # Czech is `cs` not `cz`, as there is no `cz` at all
        'cs' => 'cz',

        # Slovenian is `sl` not `si`, as `si` is Sinhala
        'sl' => 'si',

        # Greek is `el` not `gr`
        'el' => 'gr',);

    if ($language_map{$code}) {
        $code = $language_map{$code};
    }
    return $code;
}

# Always check, if strings that are about to be translated haven't been
# translated by human already, and if so, extract them with correct
# encoding, to be further converted to UTF-8 and stored on destination file
sub language_source_file
{
    my ($opt, $data, %data) = @_;
    my $language_code = $data{'language-code'};
    my %language_source_file;
    my %language_source_file_auto;
    my $language_source_file;
    my $language_source_file_encoding = 'utf-8';

    # Ignore user defined source languages
    my $language_source_exclude = $opt->{'language-source-exclude'};
    if ($language_source_exclude) {
        my @languages_excluded = split(',', $language_source_exclude);
        if (any {$_ =~ /^$language_code$/} @languages_excluded) {
            return (undef, undef);
        }
    }

    # Correct language code according to the old language map
    if ($opt->{'language-source-encoding'} eq 'map') {
        $language_code = language_map($language_code);
    }

    # Set prefix in case of processing `config.info` or `module.info`
    my $language_source_file_target_ = "/";
    if (defined($opt->{'type'}) && $opt->{'type'} =~ /config|uconfig|module/) {
        $language_source_file_target_ = "/$opt->{'type'}.info.";
    }

    # Get already translated language strings from current directory
    my $language_source_file_target = "$data{'module-path'}$language_source_file_target_$language_code";
    if (-r $language_source_file_target) {
        $language_source_file = $language_source_file_target;
    }

    # Load machine translated file
    if ($opt->{'mode'} ne 'full') {
        my $language_source_file_target_auto = "$language_source_file_target.auto";
        if (-r $language_source_file_target_auto) {
            read_file($language_source_file_target_auto, \%language_source_file_auto);
            talk_log(
                     ("" . GREEN .
                        "  .. Found machine translated file for $data{'language-name'} ($language_code.auto)" . RESET . ""
                     ),
                     $data,
                     1);
        } else {
            say YELLOW, "  .. No machine translated file has be found for $data{'language-name'} ($language_code.auto)",
              RESET;
        }
    }

    # Supply proper encoding for existing language files
    if ($language_source_file) {
        talk_log(("" . GREEN . "  .. Found human translated file for $data{'language-name'} ($language_code)" . RESET . ""),
                 $data, 1);
        read_file("$language_source_file", \%language_source_file);

        # Force encoding based on map
        if ($opt->{'mode'} eq 'full' && $opt->{'language-source-encoding'} eq 'map') {
            my $map_auto;
            my $code = $data{'language-code'};

            if ($code eq 'ja') {
                $language_source_file_encoding = "euc-jp";
            } elsif ($code eq 'ko') {
                $language_source_file_encoding = "euc-kr";
            } elsif ($code eq 'ru' || $code eq 'bg' || $code eq 'uk') {
                $language_source_file_encoding = "cp1251";
            } elsif ($code eq 'ca' ||
                     $code eq 'fr' ||
                     $code eq 'hr' ||
                     $code eq 'lt' ||
                     $code eq 'no')
            {
                $language_source_file_encoding = "cp1252";
            } elsif ($code eq 'cs' ||
                     $code eq 'sk' ||
                     $code eq 'pl' ||
                     $code eq 'sl' ||
                     $code eq 'hu')
            {
                $language_source_file_encoding = "iso-8859-2";
            } elsif ($code eq 'tr') {
                $language_source_file_encoding = "iso-8859-9";
            } elsif ($code eq 'he') {
                $language_source_file_encoding = "cp1255";
            } elsif ($code eq 'th') {
                $language_source_file_encoding = "tis-620";
            } elsif ($code eq 'zh') {
                $language_source_file_encoding = "gb2312";
            } elsif ($code eq 'zh_TW') {
                $language_source_file_encoding = "big5";
            } else {
                my $language_source_file_data = read_file_contents($language_source_file_target);
                my $detected_encoding         = Encode::Detect::Detector::detect($language_source_file_data);
                if ($detected_encoding) {
                    $language_source_file_encoding = $detected_encoding;
                    $map_auto                      = " (auto)";
                } else {
                    $language_source_file_encoding = 'utf-8';
                    $map_auto                      = " (auto enforced)";
                }
            }

            talk_log(
                     ("" . CYAN . "  .. Force file encoding to \`$language_source_file_encoding\`" .
                        ($map_auto // '') . " as derived from language map" . RESET . ""
                     ),
                     $data,
                     1);

        }

        # Figure out encoding automatically
        elsif ($opt->{'mode'} eq 'full' && $opt->{'language-source-encoding'} eq 'auto') {
            my $language_source_file_data = read_file_contents($language_source_file_target);
            $language_source_file_encoding = Encode::Detect::Detector::detect($language_source_file_data);
            if (!$language_source_file_encoding) {
                $language_source_file_encoding =
                  ask('  ' . BRIGHT_RED . '.. Cannot detect encoding, enter it manually' . RESET . '');
                talk_log(
                         ("" . BRIGHT_RED .
                          "  .. Manually set file encoding to \`$language_source_file_encoding\`" . RESET . ""),
                         $data,
                         1);
            } else {
                talk_log(
                         ("" . MAGENTA .
                            "  .. Automatically detected file encoding is \`$language_source_file_encoding\`" . RESET . ""
                         ),
                         $data,
                         1);
            }
        } else {
            $language_source_file_encoding = $opt->{'language-source-encoding'};
            talk_log(("" . MAGENTA . "  .. Set file encoding to default \`$language_source_file_encoding\`" . RESET . ""),
                     $data, 1);
        }
    } else {
        say YELLOW, "  .. No human translated file has been found for $data{'language-name'} ($language_code)", RESET;
    }

    return (\%language_source_file, \%language_source_file_auto, $language_source_file_encoding,
            $language_source_file_target);
}

sub language_transcode
{
    my ($string, $encoding, $opt) = @_;

    # In case it's a `config` or `uconfig` file, preserve
    # &#44; and &#45;, as a literal characters
    my $type_config = $opt->{'type'} =~ /config|uconfig/;
    if ($type_config) {
        $string =~ s/&#44;/~,~,~/g;
        $string =~ s/&#45;/~-~-~/g;
    }

    # Preserve actual tags
    $string =~ s/&nbsp;/~!!SS!!~/g;
    $string =~ s/&lt;/~!~!!~/g;
    $string =~ s/&gt;/~!!~!~/g;

    eval {$string = decode($encoding, $string)};
    if ($@) {
        say "Error found: $@";
        if (!prompt('next')) {
            exit;
        }
    }
    decode_entities($string);
    $string = encode('utf-8', $string);

    # Restore special commas and dashes
    if ($type_config) {
        $string =~ s/~-~-~/&#45;/g;
        $string =~ s/~,~,~/&#44;/g;
    }

    # Restore escaped tags
    $string =~ s/~!!~!~/&gt;/g;
    $string =~ s/~!~!!~/&lt;/g;
    $string =~ s/~!!SS!!~/&nbsp;/g;
    return $string;
}

sub source_data
{
    my ($data, $opt, $module) = @_;
    my ($language_source, $type, $target, $source_file, $source_file_, $exists);

    $language_source = $opt->{'language-source'};
    $type            = $data->{'type'};
    if ($type eq 'config' || $type eq 'uconfig' || $type eq 'module') {
        $language_source = $language_source ne 'en' ? "$type.info.$language_source" : "$type.info";
    }

    $target       = "$data->{'path'}/$module";
    $source_file  = "$target/$type/$language_source";
    $source_file_ = "$target/$language_source";
    $exists =
      (-d $target && !-l $target && ((-r $source_file && !-l $source_file) || (-r $source_file_ && !-l $source_file_))) || 0;

    if (-r $source_file && !-l $source_file) {
        $target .= "/$type";
    } elsif (-r $source_file_ && !-l $source_file_) {
        $source_file = $source_file_;
    }

    return ($exists, $source_file, $target);
}

# Returns an array of supported languages,
# taken from Webmin's lang_list.txt file.
sub list_languages_local
{
    my ($data) = @_;
    my ($key, $value, $options, $language, @languages);

    open(my $LANG, "$data->{'path'}/lang_list.txt");
    while (<$LANG>) {
        if (/^(.*=\w+)\s+(.*)/) {
            $language = { 'desc' => $2 };
            foreach $options (split(/,/, $1)) {
                if ($options =~ /^([^=]+)=(.*)$/) {
                    $key   = $1;
                    $value = $2;
                    $key   =~ s/^\s+|\s+$//g;
                    $value =~ s/^\s+|\s+$//g;
                    $language->{$key} = $value;
                }
            }
            push(@languages, $language);
        }
    }
    close($LANG);
    @languages = sort {$a->{'desc'} cmp $b->{'desc'}} @languages;
    return wantarray ? @languages : \@languages;
}

# Prepare the string that is going to be sent to translator
sub translate_substitute
{
    my ($value, $opt) = @_;
    my $format = $opt->{'translate-format'};

    $value = language_transcode($value, 'utf-8', $opt);

    # Preserve un-quoted $1, $2, $3 in strings, which are broken in so different ways, in different languages differently
    if ($format eq 'text') {
        $value =~ s/(?<!['|"|`|“|«|=|?|&])(\$(\d+))/%$2/g;
    }

    # Preserve path in strings
    elsif ($format eq 'html') {
        $value =~ s/(?:(^(\/[\w+].*?),|^(\/[\w+].*?)\.|(\/[\w+].*?) |(\/[\w+].*?)$))/<span translate="no">"$1"<\/span>/g;
    }

    return $value;
}

# Perform white sorcery on returned string from translator,
# as it seems to be breaking quite a lot of things
sub translated_substitute
{
    my ($translated, $original, $code, $rtl, $opt) = @_;

    my $format        = $opt->{'translate-format'};
    my $remove_spaces = sub {
        my ($str) = @_;
        $str =~ s/[ ]+//g;
        return $str;
    };

    # Fix broken by translator HTML closing tags
    $translated =~ s/<\/[ ]*(\w+)>/<\/$1>/g;
    $translated =~ s/<[ ](.*?)[ ]/<$1/g;

    if ($format eq 'text') {

        # Unescape preserved, un-quoted $1, $2, $3
        $translated =~ s/\%[ ]+(\d+)/ \$$1 /gi;

        # If replacement is changed to localized version
        $translated =~ s/％(\d+)/ \$$1/gi;

        # If the string starts with $1 and contains a space
        $translated =~ s/(^[ ]+\$)/\$/gi;

        # Get rid from undesirable spaces in parentheses
        $translated =~ s/([ ]+\))/)/gi;
        $translated =~ s/([ ]+\）)/）/gi;
        $translated =~ s/(\([ ]+)/(/gi;
        $translated =~ s/(\（[ ]+)/（/gi;

        # Print actual values with percent sign
        $translated =~ s/\([ ]+\$(\d+)[ ]*%\)/(\$$1 %)/gi;

        # The following, is abnormal way to make things work, around of the bugs produced by Google Translate API for (bg).
        if ($code eq 'bg') {
            if ($original =~ /\$(\d+)/ && $translated !~ /\$(\d+)/) {
                $translated =~ s/(\d+)/\$$1/g;
            }
        }

        # For RTL substitute `\d ٪` with `$\d`
        if ($rtl) {
            $translated =~ s/(٪[ ]+(\d+))/\$$2/g;

            # Fix stuck, consecutive $2$1
            $translated =~ s/\$(\d+)\$(\d+)/\$$1 \$$2/g;

            # Urdu returned as ٪1
            if ($code eq 'ur') {
                $translated =~ s/((\d+)٪)/\$$2/g;
            }
        }

        # Language specific fixes
        if ($code eq 'th') {
            $translated =~ s/(([3-9])%)/\$$2/g;
        }
        if ($code eq 'ro') {
            $translated =~ s/ (([1-9])%) / \$$2 /g;
        }

    } elsif ($format eq 'html') {

        # Unescape path in strings
        $translated =~ s/<span.*?translate.*?>(.*?)<.*?>/$1/gi;

        # Remove any escapes as returned by translator, when run in HTML mode
        $translated =~ s/&quot;//gi;
        $translated =~ s/&#39;/'/gi;
    }

    # Restore destroyed tags
    $translated =~ s/<[ ]*(?:(\?\w+|((?![br|hr|p|tt|pre|ul|li|ol])\w+).*?))[ ]*>(.*?)<.*?>/<$1>$3<\/$2>/gi;

    $translated =~ s/([ ]*)\$[ ]*(\d+)/$1\$$2/g;
    $translated =~ s/(['|"|`|“|«])[ ]*\$(\d+)[ ]*(['|"|`|“|»])/$1\$$2$3/g;
    $translated =~ s/(<.*?>)[ ]*(.*?)[ ]*(<.*?>)/$1$2$3/g;
    $translated =~ s/\$(\d+)[ ]*([:|：])[ ]*\$(\d+)[ ]*/ \$$1:\$$3 /g;
    $translated =~ s/\$(\d+)[ ]*：[ ]*\$(\d+)/\$$1:\$$2/g;
    $translated =~ s/(\p{L})\$(\d+)[ ]+/$1 \$$2 /g;
    $translated =~ s/(\p{L})[ ]+([:|：]){2}[ ]+(\p{L})/$1::$3/g;

    # Remove any whitespaces
    $translated =~ s/([ ]+)/ /g;

    # Fix trailing dot, which is separated from a word
    $translated =~ s/(.)[ ]+\./$1./g;

    # Fix trailing comma, which is separated from a word
    $translated =~ s/(.)[ ]+,/$1,/g;

    # Preserve inner formatting for tag's attrs
    $translated =~ s/<(\w+)[ ]+(\w+)(.*?)(=)[ ]*(.*?)[ ]*(\w+)/<$1 $2=$5$6/g;

    # Last .. should also be preceded by space
    $translated =~ s/[ ]*(\.\.)$/ ../g;

    # Preserve dates example formatting
    $translated =~ s/(\p{L}{2,4}) \/ (\p{L}{2,3}) \/ (\p{L}{2,4}) (\(.*?\))/$1\/$2\/$3 $4/g;

    # If initial value contains in the end of the string `text : ` or `text : $1`,
    # then print it as such and not as `text: ` or `text: $1`
    if ($original =~ /(?:([ ]+:[ ]*$|[ ]+:[ ]*\$\d+$))/) {
        $translated =~ s/(?:(:[ ]*[^:]+$|:$))/ $1/g;

        # Fix incorrectly positioned $1 :$2
        if ($translated =~ /\$(\d+)[ ]+:\$\d+/) {
            $translated =~ s/\$(\d+)[ ]+:\$(\d+)/\$$1 : \$$2/;
        }
    }

    # If initial string contains `/$1/` then format translated accordingly
    if ($original =~ /\/\$\d+\//) {
        $translated =~ s/\/[ ]*\$(\d+)[ ]*\//\/\$$1\//g;
    }

    # If initial string contains `($2 %)` then format translated accordingly
    if ($original =~ /\(\$(\d+)[ ]+%\)/) {
        $translated =~ s/\(\$(\d+)[ ]*%\)/(\$$1 %)/g;
    }

    # Always fix properly included path
    if ($original =~ /<tt>[\/|~\/].*?<\/tt>/) {
        $translated =~ s/(<tt>)([\/|~\/].*?)(<\/tt>)/$1@{[&$remove_spaces($2)]}$3/g;
    }

    # Always consider `/` to be a delimiter and replace ` / ` to `/`
    if ($original =~ /\w+\/\w+/) {
        $translated =~ s/[ ]+(\/)[ ]+/$1/g;
    }

    # Treat `+` it is not not as ` + `
    if ($original =~ /\w+\+\w+/) {
        $translated =~ s/[ ]+(\+)[ ]+/$1/g;
    }

    # The following should have no spaces, e.g. `<tt>uid=joe,dc=my-domain,dc=com</tt>`
    $translated =~ s/(<tt>)(uid[ ]*=.*?)(<\/tt>)/$1@{[&$remove_spaces($2)]}$3/g;

    # If original string contains ` % ` make sure output also has it
    if ($original =~ / % /) {
        $translated =~ s/[ ]*%[ ]*/ % /g;
        $translated =~ s/[ ]*٪[ ]*/ ٪ /g;
    }

    return $translated;
}

# Make actual translation using Google Translate API
sub translate
{
    my ($data, $opt, $target, $value) = @_;

    my $source = $opt->{'language-source'};
    my $format = $opt->{'translate-format'};

    # Updating Google Translate API token to avoid expiration
    my $time = time();
    if ((($time - $data->{'start_time'}) / 60) > 10) {
        say CYAN, "Updating Google Translate API token..", RESET;
        $data->{'token'}      = get_google_translate_token();
        $data->{'start_time'} = $time;
    }

    my $token = $data->{'token'};

    # Replace language code to match what translator expects
    $target =~ s/_/-/;

    # Prevent service unavailable error, reducing the call rate
    select(undef, undef, undef, 0.25);

    my $tr;
    my $rsp = "https://translation.googleapis.com/language/translate/v2?q=@{[urlize($value)]}";
    my $rsh = { 'Authorization' => "Bearer \"@{[trim($token)]}\"",
                'User-Agent'    => 'curl/7.29.1',
                'Content-Type'  => 'application/json; charset=utf-8', };
    my $rsc = "{  'source': '" . $source . "',  'target': '" . $target . "',  'format': '" . $format . "'}";
    my $rs  = HTTP::Tiny->new()->request('POST' => $rsp, { 'headers' => $rsh, 'content' => $rsc });
    my $ts  = $rs->{'success'};
    my $tc  = $rs->{'content'};

    # Exctract translation on success
    if ($ts) {
        $tr = JSON::PP->new->decode($rs->{'content'});
    }

    # On error just try again in 5 seconds
    else {
        say YELLOW, "Error: Stopped when translating `$target` language", RESET;
        print RED, "Error: Google Translator - $tc", RESET;
        say CYAN, "Retrying automatically in 5 seconds ..", RESET;
        sleep 5;
        my $translated = translate($data, $opt, $target, $value);
        return $translated;
    }

    return $tr;
}

# Run transcoding or translation of module(s) for the very first time
sub go
{
    my ($opt, $data) = @_;

    my ($module, $language);
    my $path                        = $data->{'path'};
    my $type                        = $data->{'type'};
    my $token                       = $data->{'token'};
    my $modules                     = $data->{'modules'};
    my $language_source             = $opt->{'language-source'};
    my $language_source_encoding    = $opt->{'language-source-encoding'};
    my $language_source_ignore_auto = $opt->{'language-source-ignore-auto'};
    my $language_target             = $opt->{'language-target'};
    my $language_target_exclude     = $opt->{'language-target-exclude'};
    my $keys_exclude                = $opt->{'keys-exclude'};
    my $keys_test                   = $opt->{'keys-test'};
    my $git_commit                  = $opt->{'git-commit'};
    my $verbose                     = $opt->{'verbose'} || @{$keys_test};
    my $mode_sync                   = $opt->{'mode'} ne 'full';
    my $verbose_silent_mode         = $mode_sync && $verbose != 2;

    foreach $module (@{$modules}) {
        my (%template);
        my ($exists, $mfile, $mpath) = source_data($data, $opt, $module);

        # Check if there has been something to process, if not print a message
        my $output;

        # Get source, base language file
        read_file($mfile, \%template);

        talk_log(("Transcoding/translating " . BLUE BOLD, $module, RESET . " module .."), $data, 1);
        foreach $language (@{ $data->{'languages_source_list'} }) {

            # Check if there has been something to process, if not print a message
            my $output_;

            # Get target language code
            my $code = $language->{'lang'};

            # Skip translating source, base language
            next if ($code eq $language_source);

            # Process only user defined languages or do all
            if (@{$language_target}) {
                next if (!any {$_ =~ /^$code$/} @{$language_target});
            }

            # Do not process excluded languages
            if ($language_target_exclude) {
                my @languages_excluded = split(',', $language_target_exclude);
                if (any {$_ =~ /^$code$/} @languages_excluded) {
                    next;
                }
            }

            # Get other target language attributes
            my $name = $language->{'desc'};
            my $rtl  = $language->{'rtl'};

            my %language;
            my %language_auto;

            my $message_type = "Processing";
            $message_type = "Testing translations for selected keys with" if (@{$keys_test});

            talk_log(("" . YELLOW . " .. $message_type $name ($code) language .." . RESET . ""), $data, $opt->{'verbose'});
            my ($language_source_file,
                $language_source_file_auto,
                $language_source_file_encoding,
                $language_source_file_target);
            if (!@{$keys_test}) {

                # Get pre-translated strings
                ($language_source_file,
                 $language_source_file_auto,
                 $language_source_file_encoding,
                 $language_source_file_target)
                  = language_source_file($opt,
                                         $data,
                                         ('language-name'      => $name,
                                          'language-code'      => $code,
                                          'module-path'        => $mpath,
                                          'module-name'        => $module,
                                          'translation-target' => $type
                                         ));

                if (-l $language_source_file_target) {
                    talk_log(
                             ("  " . CYAN BOLD .
                                ".. symlink detected - transcoding/translating skipped for this file $name ($code)" .
                                RESET . " .."
                             ),
                             $data,
                             1);
                    next;
                }
            }

            # Check if the module's language has already been transcoded/translated, to avoid potential waste
            if (!@{$keys_test} &&
                !$language_source_ignore_auto &&
                $language_source_encoding eq 'map' &&
                -r $mpath . "/$code.auto")
            {
                talk_log(("   .. transcoding/translating skipped for language " . RED BOLD, $code, RESET . " .."), $data, 1);
                next;
            }

            while (my ($key, $value) = each %template) {

                # Don't add special keys
                next if ($key eq '__norefs');
                next if ($key =~ /^\#/);

                # If set to test certain keys only, skip all other
                if (@{$keys_test} && (!any {$_ =~ /^$key$/} @{$keys_test})) {
                    next;
                }

                # Modify `$key` when type is set to "module", otherwise keep default
                my $key_  = $key;
                my $key__ = $key;

                # When processing `module.info` file, ignore most keys
                # and add suffix to allowed to return expected format
                if ($type eq 'module') {
                    if ($key !~ /name|desc|longdesc/) {
                        next;
                    }
                    my $code_ = $code;
                    if ($language_source_encoding eq 'map') {
                        $code_ = language_map($code);
                        $key_  = "${key}_${code_}";
                    }
                    $key__ = "${key}_${code}";
                }

                # Previously translated strings from `$code.auto` file, e.g. `de.auto`,
                # should be considered, unless forced to be dropped
                if ($language_source_file_auto &&
                    $language_source_file_auto->{$key_} &&
                    !@{$keys_test} &&
                    !$language_source_ignore_auto)
                {
                    !$verbose_silent_mode && talk_log(
                                      ("" . MAGENTA .
                                         "   .. ($code.auto) found and stored machine translated value for \`$key\` key to" .
                                         RESET . ""
                                      ),
                                      $data,
                                      $opt->{'verbose'});

                    # Add a string to main language file
                    $language_auto{$key__} = $language_source_file_auto->{$key_};

                    $output++;
                }

                # Human translated strings must be added to original `$code` file, e.g. `de` after transcoding
                elsif ($language_source_file && $language_source_file->{$key_} && !@{$keys_test}) {
                    !$verbose_silent_mode && talk_log(
                                         ("" . DARK .
                                            "   .. ($code) found and transcoded human translated value for \`$key\` key to" .
                                            RESET . ""
                                         ),
                                         $data,
                                         $opt->{'verbose'});

                    # We need to re-encode the string first, depending on which human translated file is being
                    # used (iso or UTF-8). We still need to convert all back and forth to get rid of ugly &#... escapes
                    if (!$mode_sync) {
                        $language_source_file->{$key_} =
                          language_transcode($language_source_file->{$key_}, $language_source_file_encoding, $opt);
                        talk_log("      — " . WHITE . " $language_source_file->{$key_}" . RESET . "",
                                 $data, $opt->{'verbose'});
                    }

                    # Add a string to main language file
                    $language{$key__} = $language_source_file->{$key_};

                    $output++;
                }

                # Machine translated strings must be added to `$code.auto` file, e.g. `de.auto`
                else {

                    # Skip steps below, and only transcode existing values
                    if ($opt->{'only-transcode'}) {
                        next;
                    }

                    # Add original value, from source language, to `$code.auto` file without
                    # performing actual translation. It's useful when you want to translate it manually
                    # and simply need to find missing language strings on targeted file. Besides, `config.info`
                    # options are never translated (so far), just as languages that use right-to-left writing
                    if (!@{$keys_test} &&
                        ($opt->{'only-diff'} ||
                            $type eq 'config'                ||
                            $type eq 'uconfig'               ||
                            ($code eq 'ur' && $value =~ /</) ||
                            (any {$_ =~ /^$key$/} @{$keys_exclude})))
                    {
                        if (!$verbose_silent_mode) {
                            talk_log(("" . DARK . "   .. ($code) stored original value for \`$key\` which is" . RESET . ""),
                                     $data, $opt->{'verbose'});
                            talk_log("      — " . CYAN . " $value" . RESET . "", $data, $opt->{'verbose'});
                        }
                        $language_auto{$key__} = $value;
                        next;
                    }

                    # Add translated string
                    talk_log(("" . BRIGHT_MAGENTA . "   .. ($code) translated value for \`$key\` to" . RESET . ""),
                             $data, $verbose);

                    # Prepare sent text
                    my $_value = $value;
                    $value = translate_substitute($value, $opt);

                    # Run actual translation
                    my $translated = translate($data, $opt, $code, $value);
                    $translated = $translated->{'data'}->{'translations'}[0]->{'translatedText'};

                    talk_log("      — $key --> $_value", $data, $verbose);

                    my $translated_ = $translated;

                    $translated = translated_substitute($translated, $_value, $code, $rtl, $opt);

                    # Store translated string
                    $language_auto{$key__} = $translated;
                    talk_log("      — $key <-- $translated", $data, $verbose);

                    # Print debug log
                    if ($verbose == 2) {
                        say BLACK, "      --< $value",       RESET;
                        say BLACK, "      >-- $translated_", RESET;
                    }

                    $output++;
                    $output_++;
                }
            }

            # If we are converting from old times, where there were different encodings and
            # file extensions for it, we need to delete/clear all old, no longer needed files
            # e.g. will be deleted: uk_UA, zh_TW.Big5, ja_JP.euc, ko_KR.euc and etc.
            my $file_deleted = "";
            if (!@{$keys_test} && $language_source_encoding eq 'map') {
                my $found;
                find(
                    {
                       wanted => sub {
                           $found = $File::Find::name;
                           my $code_ = language_map($code);
                           if ($code ne $code_ &&
                               (   $found eq "$mpath/$code_" ||
                                   (defined($opt->{'type'}) &&
                                       $opt->{'type'} &&
                                       $opt->{'type'} =~ /config|uconfig|module/ &&
                                       ($found eq "$mpath/$opt->{'type'}.info.$code_"))))
                           {
                               unlink($found);
                               $file_deleted .= " $found";
                               $found =~ s/$mpath\///;
                               talk_log(
                                        ("" . YELLOW .
                                           "  .. Found no longer used language file ($found) and deleted .." . RESET . ""
                                        ),
                                        $data,
                                        1);
                           }
                       },
                    },
                    $mpath);
            }

            # Write transcoded/translated file
            if (!@{$keys_test}) {

                # Prepend file name for `config.info` or `module.info` modes
                if (defined($opt->{'type'}) && $opt->{'type'} =~ /config|uconfig|module/) {
                    $code = "$opt->{'type'}.info.$code";
                }

                # Language suffix for automatics
                my $language_suffix = 'auto';
                if ($opt->{'only-diff'} && !$opt->{'only-diff-auto'}) {
                    $language_suffix = 'diff';
                }
                my $file      = $mpath . "/$code";
                my $file_auto = $mpath . "/$code.$language_suffix";
                write_file($file, \%language, undef, undef, $mfile)
                  if (%language || (!%language && !$opt->{'only-transcode'}));
                write_file($file_auto, \%language_auto, undef, undef, $mfile) if (!$opt->{'only-transcode'});

                # Git auto-commit after language was transcoded/translated
                if ($git_commit) {

                    # my $gs = "$module";
                    my $gf = trim("$file $file_auto $file_deleted");
                    say GREEN, ".. creating Git commit to current repo for " . BRIGHT_YELLOW . "$gf" . RESET . " files ..";
                    system("git add $gf");
                    system("git commit $gf -m 'Add transcoding/translation for \`$module\` module of $name ($code)'");
                }
            }

            if (!$output_ &&
                $mode_sync &&
                $language_source_encoding eq 'utf-8' &&
                !@{$keys_test})
            {
                talk_log(("" . BRIGHT_CYAN . "  .. All strings are in sync already for $name ($code) language" . RESET . ""),
                         $data, 1);
            }
        }
        if (!$output) {
            talk_log(("Nothing to do for " . BLUE . "$module" . RESET . " module .."), $data, 1);
        }

        say GREEN, ".. done ", RESET;
    }
}

sub get_google_translate_token
{
    my $gc = `gcloud -v 2>&1`;
    if (!$gc // $gc !~ /Google Cloud SDK/) {
        errors('gcloud-missing');
        return 0;
    }

    my $token = `gcloud auth application-default print-access-token`;

    if ($token =~ /ERROR:/) {
        errors('gcloud-error', $token);
        return 0;
    } else {
        return $token;
    }
}

sub prompt
{
    my ($q) = @_;

    if ($q eq 'next') {
        return prompt("Do you want to proceed?");
    }
    my $p = sub {
        my ($q, $c) = @_;
        $q = $c if ($c);
        local $| = 1;
        print DARK, "$q", RESET;
        chomp(my $a = <STDIN>);
        $a = lc($a) if ($a);
        return lc($a);

    };
    my $a = &$p("$q [y/N]: ");
    if ($a ne 'y' && $a ne 'n') {
        my $repeat = ("" . BRIGHT_MAGENTA . "Please enter" .
                  RESET . " " . YELLOW . "[y]" . RESET . " " . MAGENTA . "or" . RESET . " " . YELLOW . "[n]" . RESET . ": ");
        $a = &$p(undef, $repeat);
        if ($a ne 'y' && $a ne 'n') {
            $a = &$p(undef, $repeat);
        }
    }
    return $a eq 'y';
}

sub ask
{
    my ($q) = @_;
    my $p = sub {
        my ($q) = @_;
        local $| = 1;
        print DARK, "$q", RESET;
        chomp(my $a = <STDIN>);
        return $a;

    };
    my $a = &$p("$q: ");
    return $a;
}

sub errors
{
    my ($e, $error) = @_;
    my $message;
    if ($e eq 'language-target') {
        $message =
"Error: Using \`$error\` as a target language(s) is refused. The value(s) should match to one of the language codes from the language list file.";
    }

    if ($e eq 'language-source') {
        $message =
"Error: Using \`$error\` as a source language is refused. The value should match to one of the language codes from the language list file.";
    }

    if ($e eq 'gcloud-missing') {
        $message =
          "Error: Command \`gcloud\` to manage Google Cloud Platform resources and developer workflow is not available.",;
    }

    if ($e eq 'gcloud-error') {
        $message = "Error: $error";
    }
    say RED, $message, RESET;
}

sub talk_log
{
    my ($what, $data, $output_to_console) = @_;

    # Output to console
    if ($output_to_console) {
        say $what;
    }

    # Store to log
    my $out = $data->{'out'};
    if ($out) {
        $what =~ s/\[\d+m//g;
        say $out $what;
    }
}

sub talk
{
    my ($what, $opt, $data) = @_;
    if ($what eq 'affected') {
        my $affected       = "@{$data->{'modules'}}";
        my $affected_count = scalar(split(' ', $affected));
        $affected =~ s/\b(lang)\b/(lang)/;
        if ($affected) {
            say GREEN, "Affected modules " . RESET, DARK . "[$affected_count]" . RESET . ": ", YELLOW BOLD, $affected, RESET;
        } else {
            say RED, "Error: No modules found to operate on", RESET;
            exit;
        }
        my $languages       = ("@{$opt->{'language-target'}}" || "@{$data->{'languages_source_list_codes'}}");
        my $languages_count = scalar(split(' ', $languages));
        say GREEN, "Affected languages" . RESET, DARK . " [$languages_count]" . RESET . ": ", YELLOW BOLD,
          "" . $languages . "", RESET;
    }
    if ($what eq 'overwrite-pre') {
        say RED, "Warning! ", RESET, WHITE,
"The following operation will force-translate and overwrite mentioned\nlanguages in all mentioned modules listed above, using ",
          YELLOW BOLD, $opt->{'language-source'}, RESET,
          " as a template lang-\nuage. The following directory " . YELLOW BOLD,
          $data->{'path'}, RESET,
" with human translated\nmodule strings, will be used instead of machine translations. The operation\nlog will be written in "
          . YELLOW BOLD, $opt->{'log'}, RESET, " file.", RESET;
    }

    if ($what eq 'sync-pre') {
        say BRIGHT_MAGENTA, "Attention! ", RESET, WHITE,
          "The following operation will synchronize mentioned languages,\nfor all mentioned modules listed above, using ",
          YELLOW BOLD, $opt->{'language-source'}, RESET,
" as a template language.\nThe keys found in target languages but missing on source, will be deleted,\nwhile newly added keys on source, will be translated for targets. The ope-\nration log will be written in "
          . YELLOW BOLD, $opt->{'log'}, RESET, " file.", RESET;
    }
}

sub trim
{
    my $s = shift;
    $s =~ s/^\s+|\s+$//g;
    return $s;
}

=pod

=head1 NAME

language-manager

=head1 DESCRIPTION

Manage Webmin/Usermin module language files (lang|ulang|help|config|uconfig|module), to perform transcoding, translation, spell check and add/remove language strings.

=head1 SYNOPSIS

language-manager [options]

=head1 OPTIONS

=over

=item --help, -h

Give this help list.

Examples of usage:

Synchronize all language keys for Apache module, using English as a source language - newly added entries to Apache source language file (def. en), will be translated and inserted into all other language files (.auto), while deleted entries will also be removed on targets, including human translated ones.

 — language-manager -m=apache

Translate all available languages, using old-time encoding map, for BIND module, using as type "lang" directory, discarding human translations for Hebrew, keeping original value (not translating) for key "mass_desc", and printing verbose output.

 — language-manager -x=full -e=map -m=bind8 -w=lang -se=he -ke=mass_desc

Only transcode language files, using old-time encoding map, from files being in different encodings, to new style, where all language files are in "utf-8" encoding, for Apache module. No translations will be made, and no ".auto" files will be created.

 — language-manager -x=full -e=map -m=apache -ot

Test translations for "index_stopmsg,trusted_warning" keys, in Russian and German languages, in BIND module, print on-screen results and exit.

 — language-manager -m=bind8 -t=ru,de -kt=index_stopmsg,trusted_warning

=item --mode, -x <sync|full>

Mode can be either <sync> or <full>. Default is set to "sync" and will only keep the keys found on source language, while missing keys in target languages, will be deleted, and newly added keys to source language, will be translated. Mode "full" is meant to perform full translation, keeping human translated strings and overwriting all machine translations done in the past (not recommended to run).

=item --type, -w <lang|ulang|help|config|uconfig|module>

Type of target to use for operations. Either <lang>, <ulang>, <help> directories or language info file, as <config> or <module>. Default is set to "lang" for Webmin and "ulang" for Usermin.

=item --modules, -m

Comma separated list of modules to operate on. Default is to operate on all available modules.

=item --modules-exclude, -me

Comma separated list of modules to exclude

=item --language-target, -t

Comma separated list of affected language codes to operate on. Default is to operate on all languages defined in language list.

=item --language-target-exclude, -te

Comma separated list of target language codes that should be excluded from processing.

=item --language-source, -s

Language to use as a base language. Default is set English.

=item --language-source-exclude, -se

Comma separated list of human translated language codes to exclude from adding to target file (always translate instead). It might be useful if human translated files are low quality or corrupted.

=item --language-source-ignore-auto, -sia

Drop all previous machine translations (those that are stored in ".auto" files) and re-run translations over again - this option is highly not recommended to use.

=item --language-source-encoding, -e

Encoding of human translated language files. Default is set to "utf-8". Available options are <utf-8>, <auto>, <map>. Option "auto" is used to detect encoding automatically, while option "map" is used to use old-time encoding map, where each language file had different encoding, and nowadays, should not be used, as all language files are going to be encoded in "utf-8" already. It's always best to avoid using this option.

=item --only-diff, -o

Use this option to extract a difference and find missing keys on target language (against source) and save currently untranslated strings to a separate file, with ".diff" extension.

=item --only-transcode, -ot

Use this option to simply convert old-time style module language files (where each language had different encoding) into new format, with all files encoded in "utf-8". Old files, such as "ja_JP.euc", "ko_KR.euc", "zh_TW.Big5" and "ru_RU" will be automatically deleted.

=item --keys-exclude, -ke

Comma separated list of keys that shouldn't be translated and stored with original values

=item --keys-test, -kt

Comma separated list of keys to test translation engine on; in this mode, translated strings are not saved

=item --git-commit, -gc

Create a separate commit to the current repo, after each language was transcoded/translated

=item --log, -l

Saves complete operation log. By default, log file is saved to "/tmp/language-manager-{timestamp}.txt" file.

=item --verbose, -v

Verbosely print processed files and provide detailed output. By detault, verbose output is enabled.

=back

=head1 LICENSE AND COPYRIGHT

Copyright 2020 Ilia Rostovtsev <programming@rostovtsev.io>