source: osm/applications/editors/josm/i18n/i18n.pl@ 35275

Last change on this file since 35275 was 35246, checked in by stoecker, 5 years ago

see #josm18399 - detect \r in translated strings

  • Property svn:executable set to *
File size: 12.8 KB
RevLine 
[19319]1#! /usr/bin/perl -w
[34025]2# fileformat documentation: JOSM I18n.java function load()
[19319]3
4use utf8;
[31111]5use strict;
[30169]6use open qw/:std :encoding(utf8)/;
[19319]7use Term::ReadKey;
8use Encode;
9
10my $waswarn = 0;
[31087]11my $lang_pattern = '([a-z]{2}_[A-Z]{2}|[a-z]{2,3}|[a-z]{2}\@[a-z]+)';
[31088]12my $lang_pattern_file = '([a-z]{2}_[A-Z]{2}|[a-z]{2,3}|[a-z]{2}-[a-z]+)';
[19319]13
14main();
15
16sub getdate
17{
18 my @t=gmtime();
19 return sprintf("%04d-%02d-%02d %02d:%02d+0000",
20 1900+$t[5],$t[4]+1,$t[3],$t[2],$t[1]);
21}
22
[31111]23sub loadpot($)
[19319]24{
[31111]25 my ($file) = @_;
26 my %all = ();
27 my %keys = ();
28 die "Could not open file $file." if(!open FILE,"<:utf8",$file);
29 my %postate = (last => "", type => "");
30 my $linenum = 0;
31 print "Reading file $file\n";
32 while(<FILE>)
33 {
34 ++$linenum;
35 my $fn = "$file:$linenum";
36 chomp;
37 if($_ =~ /^#/ || !$_)
38 {
39 checkpo(\%postate, \%all, "pot", "line $linenum in $file", \%keys, 1, undef);
40 $postate{fuzzy} = 1 if ($_ =~ /fuzzy/);
41 }
42 elsif($_ =~ /^"(.*)"$/) {$postate{last} .= $1;}
43 elsif($_ =~ /^(msg.+) "(.*)"$/)
44 {
45 my ($n, $d) = ($1, $2);
46 my $new = !${postate}{fuzzy} && (($n eq "msgid" && $postate{type} ne "msgctxt") || ($n eq "msgctxt"));
47 checkpo(\%postate, \%all, "pot", "line $linenum in $file", \%keys, $new, undef);
48 $postate{last} = $d;
49 $postate{type} = $n;
50 $postate{src} = $fn if $new;
51 }
52 else
53 {
54 die "Strange line $linenum in $file: $_.";
55 }
56 }
57 checkpo(\%postate, \%all, "pot", "line $linenum in $file", \%keys, 1, undef);
58 close(FILE);
59 return \%all;
60}
61
62sub loadfiles($$@)
63{
[19319]64 my $desc;
[31111]65 my %all = ();
66 my %keys = ();
67 my ($lang,$use,@files) = @_;
[19319]68 foreach my $file (@files)
69 {
70 die "Could not open file $file." if(!open FILE,"<:utf8",$file);
71
[31086]72 if($file =~ /\/$lang_pattern\.po$/)
[19319]73 {
74 my $l = $1;
75 ++$lang->{$l};
76 my %postate = (last => "", type => "");
77 my $linenum = 0;
[31087]78 print "Reading file $file (lang $l)\n";
[19319]79 while(<FILE>)
80 {
81 ++$linenum;
82 my $fn = "$file:$linenum";
83 chomp;
84 if($_ =~ /^#/ || !$_)
85 {
[31111]86 checkpo(\%postate, \%all, $l, "line $linenum in $file", \%keys, 1, $use);
[19319]87 $postate{fuzzy} = 1 if ($_ =~ /fuzzy/);
88 }
89 elsif($_ =~ /^"(.*)"$/) {$postate{last} .= $1;}
90 elsif($_ =~ /^(msg.+) "(.*)"$/)
91 {
92 my ($n, $d) = ($1, $2);
[19334]93 my $new = !${postate}{fuzzy} && (($n eq "msgid" && $postate{type} ne "msgctxt") || ($n eq "msgctxt"));
[31111]94 checkpo(\%postate, \%all, $l, "line $linenum in $file", \%keys, $new, $use);
[19319]95 $postate{last} = $d;
96 $postate{type} = $n;
97 $postate{src} = $fn if $new;
98 }
99 else
100 {
101 die "Strange line $linenum in $file: $_.";
102 }
103 }
[31111]104 checkpo(\%postate, \%all, $l, "line $linenum in $file", \%keys, 1, $use);
[19319]105 }
106 else
107 {
108 die "File format not supported for file $file.";
109 }
110 close(FILE);
111 }
112 return %all;
113}
114
115my $alwayspo = 0;
116my $alwaysup = 0;
117my $noask = 0;
[31111]118my %conflicts;
[19319]119sub copystring($$$$$$$)
120{
121 my ($data, $en, $l, $str, $txt, $context, $ispo) = @_;
122
123 $en = "___${context}___$en" if $context;
124
125 if(exists($data->{$en}{$l}) && $data->{$en}{$l} ne $str)
126 {
127 return if !$str;
128 if($l =~ /^_/)
129 {
130 $data->{$en}{$l} .= ";$str" if !($data->{$en}{$l} =~ /$str/);
131 }
132 elsif(!$data->{$en}{$l})
133 {
134 $data->{$en}{$l} = $str;
135 }
136 else
137 {
138 my $f = $data->{$en}{_file} || "";
139 $f = ($f ? "$f;".$data->{$en}{"_src.$l"} : $data->{$en}{"_src.$l"}) if $data->{$en}{"_src.$l"};
140 my $isotherpo = ($f =~ /\.po\:/);
141 my $pomode = ($ispo && !$isotherpo) || (!$ispo && $isotherpo);
142
143 my $mis = "String mismatch for '$en' **$str** ($txt) != **$data->{$en}{$l}** ($f)\n";
144 my $replace = 0;
145
146 if(($conflicts{$l}{$str} || "") eq $data->{$en}{$l}) {}
147 elsif($pomode && $alwaysup) { $replace=$isotherpo; }
148 elsif($pomode && $alwayspo) { $replace=$ispo; }
149 elsif($noask) { print $mis; ++$waswarn; }
150 else
151 {
152 ReadMode 4; # Turn off controls keys
153 my $arg = "(l)eft, (r)ight";
154 $arg .= ", (p)o, (u)pstream[ts/mat], all p(o), all up(s)tream" if $pomode;
155 $arg .= ", e(x)it: ";
156 print "$mis$arg";
157 while((my $c = getc()))
158 {
159 if($c eq "l") { $replace=1; }
160 elsif($c eq "r") {}
161 elsif($c eq "p" && $pomode) { $replace=$ispo; }
162 elsif($c eq "u" && $pomode) { $replace=$isotherpo; }
163 elsif($c eq "o" && $pomode) { $alwayspo = 1; $replace=$ispo; }
164 elsif($c eq "s" && $pomode) { $alwaysup = 1; $replace=$isotherpo; }
165 elsif($c eq "x") { $noask = 1; ++$waswarn; }
166 else { print "\n$arg"; next; }
167 last;
168 }
169 print("\n");
170 ReadMode 0; # Turn on controls keys
171 }
172 if(!$noask)
173 {
174 if($replace)
175 {
176 $data->{$en}{$l} = $str;
177 $conflicts{$l}{$data->{$en}{$l}} = $str;
178 }
179 else
180 {
181 $conflicts{$l}{$str} = $data->{$en}{$l};
182 }
183 }
184 }
185 }
186 else
187 {
188 $data->{$en}{$l} = $str;
189 }
190}
191
[31111]192# Check a current state for new data
193#
194# @param postate Pointer to current status hash
195# @param data Pointer to final data array
196# @param l current language
197# @param txt output text in case of error, usually file and line number
198# @param keys pointer to hash for info keys extracted from the first msgid "" entry
199# @param new whether a data set is finish or not yet complete
200# @param use hash to strings to use or undef for all strings
201#
202sub checkpo($$$$$$$)
[19319]203{
[31111]204 my ($postate, $data, $l, $txt, $keys, $new, $use) = @_;
[19319]205
206 if($postate->{type} eq "msgid") {$postate->{msgid} = $postate->{last};}
207 elsif($postate->{type} eq "msgid_plural") {$postate->{msgid_1} = $postate->{last};}
208 elsif($postate->{type} =~ /^msgstr(\[0\])?$/) {$postate->{msgstr} = $postate->{last};}
209 elsif($postate->{type} =~ /^msgstr\[(.+)\]$/) {$postate->{"msgstr_$1"} = $postate->{last};}
210 elsif($postate->{type} eq "msgctxt") {$postate->{context} = $postate->{last};}
211 elsif($postate->{type}) { die "Strange type $postate->{type} found\n" }
212
213 if($new)
214 {
[31111]215 my $en = $postate->{context} ? "___$postate->{context}___$postate->{msgid}" : $postate->{msgid};
216 if((!$postate->{fuzzy}) && ($l eq "pot" || $postate->{msgstr}) && $postate->{msgid}
217 && (!$use || $use->{$en}))
[19319]218 {
219 copystring($data, $postate->{msgid}, $l, $postate->{msgstr},$txt,$postate->{context}, 1);
[31111]220 if(!$use || $use->{$en}{"en.1"})
221 {
222 for(my $i = 1; exists($postate->{"msgstr_$i"}); ++$i)
223 { copystring($data, $postate->{msgid}, "$l.$i", $postate->{"msgstr_$i"},$txt,$postate->{context}, 1); }
224 if($postate->{msgid_1})
225 { copystring($data, $postate->{msgid}, "en.1", $postate->{msgid_1},$txt,$postate->{context}, 1); }
226 }
[19319]227 copystring($data, $postate->{msgid}, "_src.$l", $postate->{src},$txt,$postate->{context}, 1);
228 }
229 elsif($postate->{msgstr} && !$postate->{msgid})
230 {
231 my %k = ($postate->{msgstr} =~ /(.+?): +(.+?)\\n/g);
232 # take the first one!
233 for $a (sort keys %k)
234 {
235 $keys->{$l}{$a} = $k{$a} if !$keys->{$l}{$a};
236 }
237 }
238 foreach my $k (keys %{$postate})
239 {
240 delete $postate->{$k};
241 }
242 $postate->{type} = $postate->{last} = "";
243 }
244}
245
[19356]246sub makestring($)
247{
248 my ($str) = @_;
249 $str =~ s/\\"/"/g;
[21523]250 $str =~ s/\\\\/\\/g;
[19356]251 $str =~ s/\\n/\n/g;
252 $str = encode("utf8", $str);
[26338]253 return $str;
[19356]254}
255
[26338]256sub checkstring
257{
[26586]258 my ($la, $tr, $en, $cnt, $en1, $eq) = @_;
[26338]259 $tr = makestring($tr);
260 $en = makestring($en);
[26340]261 $cnt = $cnt || 0;
[26338]262 $en1 = makestring($en1) if defined($en1);
263 my $error = 0;
264
265 # Test one - are there single quotes which don't occur twice
266 my $v = $tr;
267 $v =~ s/''//g; # replace all twice occuring single quotes
[26344]268 $v =~ s/'[{}]'//g; # replace all bracketquoting single quotes
[26340]269 if($v =~ /'/)#&& $la ne "en")
[26338]270 {
[30169]271 warn "JAVA translation issue for language $la: Mismatching single quotes:\nTranslated text: ".decode("utf8",$tr)."\nOriginal text: ".decode("utf8",$en)."\n";
[26339]272 $error = 1;
[26338]273 }
[34996]274 if($tr =~ /<!\[CDATA\[/)#&& $la ne "en")
275 {
276 warn "JAVA translation issue for language $la: CDATA in string:\nTranslated text: ".decode("utf8",$tr)."\nOriginal text: ".decode("utf8",$en)."\n";
277 $error = 1;
278 }
[35246]279 if($tr =~ /\\r/)
280 {
281 warn "JAVA translation issue for language $la: \\r in string:\nTranslated text: ".decode("utf8",$tr)."\nOriginal text: ".decode("utf8",$en)."\n";
282 $error = 1;
283 }
[26338]284 # Test two - check if there are {..} which should not be
285 my @fmt = ();
286 my $fmt;
287 my $fmte;
288 my $fmte1 = "";
[26344]289 my $trt = $tr; $trt =~ s/'[{}]'//g;
[33152]290 while($trt =~ /\{(.*?)\}/g) {push @fmt,$1};
291 while($trt =~ /\%([a-z]+)\%/g) {push @fmt,$1};
292 $fmt = join("_", sort @fmt); @fmt = ();
[26344]293 my $ent = $en; $ent =~ s/'[{}]'//g;
[33152]294 while($ent =~ /\{(.*?)\}/g) {push @fmt,$1};
295 while($ent =~ /\%([a-z]+)\%/g) {push @fmt,$1};
296 $fmte = join("_", sort @fmt); @fmt = ();
[26344]297 if($en1)
298 {
299 my $en1t = $en1; $en1t =~ s/'[{}]'//g;
300 while($en1t =~ /\{(.*?)\}/g) {push @fmt,$1}; $fmte1 = join("_", sort @fmt);
301 }
[26338]302 if($fmt ne $fmte && $fmt ne $fmte1)
303 {
304 if(!($fmte eq '0' && $fmt eq "" && $cnt == 1)) # Don't warn when a single value is left for first multi-translation
305 {
[30169]306 warn "JAVA translation issue for language $la ($cnt): Mismatching format entries:\nTranslated text: ".decode("utf8",$tr)."\nOriginal text: ".decode("utf8",$en)."\n";
[26338]307 $error = 1;
308 }
309 }
310
311 #$tr = "" if($error && $la ne "en");
[26586]312 return pack("n",65534) if $eq;
[26338]313
314 return pack("n",length($tr)).$tr;
315}
316
[35180]317sub createlang($$@)
[19319]318{
[35180]319 my ($data, $maxcount, @files) = @_;
[26174]320 my $maxlen = 0;
[19319]321 foreach my $file (@files)
322 {
[26174]323 my $len = length($file);
324 $maxlen = $len if $len > $maxlen;
325 }
326 foreach my $file (@files)
327 {
[19319]328 my $la;
[19496]329 my $cnt = 0;
[31088]330 if($file =~ /^(?:.*\/)?$lang_pattern_file\.lang$/)
[19319]331 {
332 $la = $1;
[31088]333 $la =~ s/-/\@/;
[19319]334 }
335 else
336 {
337 die "Language for file $file unknown.";
338 }
339 die "Could not open outfile $file\n" if !open FILE,">:raw",$file;
340
341 foreach my $en (sort keys %{$data})
342 {
343 next if $data->{$en}{"en.1"};
344 my $val;
[26586]345 my $eq;
[19319]346 if($la eq "en")
347 {
[19496]348 ++$cnt;
[19319]349 $val = $en;
350 $val =~ s/^___(.*)___/_:$1\n/;
351 }
352 else
353 {
354 my $ennoctx = $en;
355 $ennoctx =~ s/^___(.*)___//;
356 $val = (exists($data->{$en}{$la})) ? $data->{$en}{$la} : "";
[19496]357 ++$cnt if $val;
[26586]358 if($ennoctx eq $val)
359 {
360 $val = ""; $eq = 1;
361 }
[19319]362 }
[26586]363 print FILE checkstring($la, $val, $en, undef, undef, $eq);
[19319]364 }
365 print FILE pack "n",0xFFFF;
366 foreach my $en (sort keys %{$data})
367 {
368 next if !$data->{$en}{"en.1"};
369 my $num;
370 for($num = 1; exists($data->{$en}{"$la.$num"}); ++$num)
371 { }
372 my $val;
[31111]373 my $eq = 0;
[19319]374 if($la eq "en")
375 {
[19496]376 ++$cnt;
[19319]377 $val = $en;
378 $val =~ s/^___(.*)___/_:$1\n/;
379 }
380 else
381 {
382 $val = (exists($data->{$en}{$la})) ? $data->{$en}{$la} : "";
383 --$num if(!$val);
[19496]384 ++$cnt if $val;
[19319]385 if($num == 2)
386 {
387 my $ennoctx = $en;
388 $ennoctx =~ s/^___(.*)___//;
[26987]389 if($val eq $ennoctx && $data->{$en}{"$la.1"} eq $data->{$en}{"en.1"})
390 {
391 $num = 0;
392 $eq = 1;
393 }
[19319]394 }
395 }
396
[26987]397 print FILE pack "C",$eq ? 0xFE : $num;
[19319]398 if($num)
399 {
[26338]400 print FILE checkstring($la, $val, $en, 1, $data->{$en}{"en.1"});
[19319]401 for($num = 1; exists($data->{$en}{"$la.$num"}); ++$num)
402 {
[26338]403 print FILE checkstring($la, $data->{$en}{"$la.$num"}, $en, $num+1, $data->{$en}{"en.1"});
[19319]404 }
405 }
406 }
407 close FILE;
[25523]408 if(!$cnt)
409 {
410 unlink $file;
[26174]411 printf "Skipped file %-${maxlen}s: Contained 0 strings out of %5d.\n",$file,$maxcount;
[25523]412 }
413 else
414 {
[31956]415 printf "Created file %-${maxlen}s: Added %5d strings out of %5d (%5.1f%%).\n",$file,$cnt,$maxcount,,$cnt*100.0/$maxcount-5e-2;
[25523]416 }
[19319]417 }
418}
419
420sub main
421{
422 my %lang;
423 my @po;
[31111]424 my $potfile;
425 my $basename = "./";
426 foreach my $arg (@ARGV)
427 {
428 next if $arg !~ /^--/;
429 if($arg =~ /^--basedir=(.+)$/)
430 {
431 $basename = $1;
432 }
433 elsif($arg =~ /^--potfile=(.+)$/)
434 {
435 $potfile = $1;
436 }
437 else
438 {
439 die "Unknown argument $arg.";
440 }
441 }
[28423]442 $basename .= "/" if !($basename =~ /[\/\\:]$/);
[19319]443 foreach my $arg (@ARGV)
444 {
[31111]445 next if $arg =~ /^--/;
[19319]446 foreach my $f (glob $arg)
447 {
448 if($f =~ /\*/) { printf "Skipping $f\n"; }
449 elsif($f =~ /\.po$/) { push(@po, $f); }
450 else { die "unknown file extension."; }
451 }
452 }
[35180]453 my $use = $potfile ? loadpot($potfile) : undef;
454 my %data = loadfiles(\%lang, $use, @po);
[31111]455
[19319]456 my @clang;
457 foreach my $la (sort keys %lang)
458 {
[31088]459 $la =~ s/\@/-/;
[19319]460 push(@clang, "${basename}$la.lang");
461 }
462 push(@clang, "${basename}en.lang");
463 die "There have been warning. No output.\n" if $waswarn;
464
[35180]465 createlang(\%data, ($use ? keys %{$use} : keys %data), @clang);
[19319]466}
Note: See TracBrowser for help on using the repository browser.