source: osm/applications/editors/josm/i18n/i18n.pl@ 31921

Last change on this file since 31921 was 31111, checked in by stoecker, 10 years ago

major speedup of i18n process, add some new features, split core translations into two parts

  • Property svn:executable set to *
File size: 12.2 KB
Line 
1#! /usr/bin/perl -w
2
3use utf8;
4use strict;
5use open qw/:std :encoding(utf8)/;
6use Term::ReadKey;
7use Encode;
8
9my $waswarn = 0;
10my $lang_pattern = '([a-z]{2}_[A-Z]{2}|[a-z]{2,3}|[a-z]{2}\@[a-z]+)';
11my $lang_pattern_file = '([a-z]{2}_[A-Z]{2}|[a-z]{2,3}|[a-z]{2}-[a-z]+)';
12
13main();
14
15sub getdate
16{
17 my @t=gmtime();
18 return sprintf("%04d-%02d-%02d %02d:%02d+0000",
19 1900+$t[5],$t[4]+1,$t[3],$t[2],$t[1]);
20}
21
22sub loadpot($)
23{
24 my ($file) = @_;
25 my %all = ();
26 my %keys = ();
27 die "Could not open file $file." if(!open FILE,"<:utf8",$file);
28 my %postate = (last => "", type => "");
29 my $linenum = 0;
30 print "Reading file $file\n";
31 while(<FILE>)
32 {
33 ++$linenum;
34 my $fn = "$file:$linenum";
35 chomp;
36 if($_ =~ /^#/ || !$_)
37 {
38 checkpo(\%postate, \%all, "pot", "line $linenum in $file", \%keys, 1, undef);
39 $postate{fuzzy} = 1 if ($_ =~ /fuzzy/);
40 }
41 elsif($_ =~ /^"(.*)"$/) {$postate{last} .= $1;}
42 elsif($_ =~ /^(msg.+) "(.*)"$/)
43 {
44 my ($n, $d) = ($1, $2);
45 my $new = !${postate}{fuzzy} && (($n eq "msgid" && $postate{type} ne "msgctxt") || ($n eq "msgctxt"));
46 checkpo(\%postate, \%all, "pot", "line $linenum in $file", \%keys, $new, undef);
47 $postate{last} = $d;
48 $postate{type} = $n;
49 $postate{src} = $fn if $new;
50 }
51 else
52 {
53 die "Strange line $linenum in $file: $_.";
54 }
55 }
56 checkpo(\%postate, \%all, "pot", "line $linenum in $file", \%keys, 1, undef);
57 close(FILE);
58 return \%all;
59}
60
61sub loadfiles($$@)
62{
63 my $desc;
64 my %all = ();
65 my %keys = ();
66 my ($lang,$use,@files) = @_;
67 foreach my $file (@files)
68 {
69 die "Could not open file $file." if(!open FILE,"<:utf8",$file);
70
71 if($file =~ /\/$lang_pattern\.po$/)
72 {
73 my $l = $1;
74 ++$lang->{$l};
75 my %postate = (last => "", type => "");
76 my $linenum = 0;
77 print "Reading file $file (lang $l)\n";
78 while(<FILE>)
79 {
80 ++$linenum;
81 my $fn = "$file:$linenum";
82 chomp;
83 if($_ =~ /^#/ || !$_)
84 {
85 checkpo(\%postate, \%all, $l, "line $linenum in $file", \%keys, 1, $use);
86 $postate{fuzzy} = 1 if ($_ =~ /fuzzy/);
87 }
88 elsif($_ =~ /^"(.*)"$/) {$postate{last} .= $1;}
89 elsif($_ =~ /^(msg.+) "(.*)"$/)
90 {
91 my ($n, $d) = ($1, $2);
92 my $new = !${postate}{fuzzy} && (($n eq "msgid" && $postate{type} ne "msgctxt") || ($n eq "msgctxt"));
93 checkpo(\%postate, \%all, $l, "line $linenum in $file", \%keys, $new, $use);
94 $postate{last} = $d;
95 $postate{type} = $n;
96 $postate{src} = $fn if $new;
97 }
98 else
99 {
100 die "Strange line $linenum in $file: $_.";
101 }
102 }
103 checkpo(\%postate, \%all, $l, "line $linenum in $file", \%keys, 1, $use);
104 }
105 else
106 {
107 die "File format not supported for file $file.";
108 }
109 close(FILE);
110 }
111 return %all;
112}
113
114my $alwayspo = 0;
115my $alwaysup = 0;
116my $noask = 0;
117my %conflicts;
118sub copystring($$$$$$$)
119{
120 my ($data, $en, $l, $str, $txt, $context, $ispo) = @_;
121
122 $en = "___${context}___$en" if $context;
123
124 if(exists($data->{$en}{$l}) && $data->{$en}{$l} ne $str)
125 {
126 return if !$str;
127 if($l =~ /^_/)
128 {
129 $data->{$en}{$l} .= ";$str" if !($data->{$en}{$l} =~ /$str/);
130 }
131 elsif(!$data->{$en}{$l})
132 {
133 $data->{$en}{$l} = $str;
134 }
135 else
136 {
137 my $f = $data->{$en}{_file} || "";
138 $f = ($f ? "$f;".$data->{$en}{"_src.$l"} : $data->{$en}{"_src.$l"}) if $data->{$en}{"_src.$l"};
139 my $isotherpo = ($f =~ /\.po\:/);
140 my $pomode = ($ispo && !$isotherpo) || (!$ispo && $isotherpo);
141
142 my $mis = "String mismatch for '$en' **$str** ($txt) != **$data->{$en}{$l}** ($f)\n";
143 my $replace = 0;
144
145 if(($conflicts{$l}{$str} || "") eq $data->{$en}{$l}) {}
146 elsif($pomode && $alwaysup) { $replace=$isotherpo; }
147 elsif($pomode && $alwayspo) { $replace=$ispo; }
148 elsif($noask) { print $mis; ++$waswarn; }
149 else
150 {
151 ReadMode 4; # Turn off controls keys
152 my $arg = "(l)eft, (r)ight";
153 $arg .= ", (p)o, (u)pstream[ts/mat], all p(o), all up(s)tream" if $pomode;
154 $arg .= ", e(x)it: ";
155 print "$mis$arg";
156 while((my $c = getc()))
157 {
158 if($c eq "l") { $replace=1; }
159 elsif($c eq "r") {}
160 elsif($c eq "p" && $pomode) { $replace=$ispo; }
161 elsif($c eq "u" && $pomode) { $replace=$isotherpo; }
162 elsif($c eq "o" && $pomode) { $alwayspo = 1; $replace=$ispo; }
163 elsif($c eq "s" && $pomode) { $alwaysup = 1; $replace=$isotherpo; }
164 elsif($c eq "x") { $noask = 1; ++$waswarn; }
165 else { print "\n$arg"; next; }
166 last;
167 }
168 print("\n");
169 ReadMode 0; # Turn on controls keys
170 }
171 if(!$noask)
172 {
173 if($replace)
174 {
175 $data->{$en}{$l} = $str;
176 $conflicts{$l}{$data->{$en}{$l}} = $str;
177 }
178 else
179 {
180 $conflicts{$l}{$str} = $data->{$en}{$l};
181 }
182 }
183 }
184 }
185 else
186 {
187 $data->{$en}{$l} = $str;
188 }
189}
190
191# Check a current state for new data
192#
193# @param postate Pointer to current status hash
194# @param data Pointer to final data array
195# @param l current language
196# @param txt output text in case of error, usually file and line number
197# @param keys pointer to hash for info keys extracted from the first msgid "" entry
198# @param new whether a data set is finish or not yet complete
199# @param use hash to strings to use or undef for all strings
200#
201sub checkpo($$$$$$$)
202{
203 my ($postate, $data, $l, $txt, $keys, $new, $use) = @_;
204
205 if($postate->{type} eq "msgid") {$postate->{msgid} = $postate->{last};}
206 elsif($postate->{type} eq "msgid_plural") {$postate->{msgid_1} = $postate->{last};}
207 elsif($postate->{type} =~ /^msgstr(\[0\])?$/) {$postate->{msgstr} = $postate->{last};}
208 elsif($postate->{type} =~ /^msgstr\[(.+)\]$/) {$postate->{"msgstr_$1"} = $postate->{last};}
209 elsif($postate->{type} eq "msgctxt") {$postate->{context} = $postate->{last};}
210 elsif($postate->{type}) { die "Strange type $postate->{type} found\n" }
211
212 if($new)
213 {
214 my $en = $postate->{context} ? "___$postate->{context}___$postate->{msgid}" : $postate->{msgid};
215 if((!$postate->{fuzzy}) && ($l eq "pot" || $postate->{msgstr}) && $postate->{msgid}
216 && (!$use || $use->{$en}))
217 {
218 copystring($data, $postate->{msgid}, $l, $postate->{msgstr},$txt,$postate->{context}, 1);
219 if(!$use || $use->{$en}{"en.1"})
220 {
221 for(my $i = 1; exists($postate->{"msgstr_$i"}); ++$i)
222 { copystring($data, $postate->{msgid}, "$l.$i", $postate->{"msgstr_$i"},$txt,$postate->{context}, 1); }
223 if($postate->{msgid_1})
224 { copystring($data, $postate->{msgid}, "en.1", $postate->{msgid_1},$txt,$postate->{context}, 1); }
225 }
226 copystring($data, $postate->{msgid}, "_src.$l", $postate->{src},$txt,$postate->{context}, 1);
227 }
228 elsif($postate->{msgstr} && !$postate->{msgid})
229 {
230 my %k = ($postate->{msgstr} =~ /(.+?): +(.+?)\\n/g);
231 # take the first one!
232 for $a (sort keys %k)
233 {
234 $keys->{$l}{$a} = $k{$a} if !$keys->{$l}{$a};
235 }
236 }
237 foreach my $k (keys %{$postate})
238 {
239 delete $postate->{$k};
240 }
241 $postate->{type} = $postate->{last} = "";
242 }
243}
244
245sub makestring($)
246{
247 my ($str) = @_;
248 $str =~ s/\\"/"/g;
249 $str =~ s/\\\\/\\/g;
250 $str =~ s/\\n/\n/g;
251 $str = encode("utf8", $str);
252 return $str;
253}
254
255sub checkstring
256{
257 my ($la, $tr, $en, $cnt, $en1, $eq) = @_;
258 $tr = makestring($tr);
259 $en = makestring($en);
260 $cnt = $cnt || 0;
261 $en1 = makestring($en1) if defined($en1);
262 my $error = 0;
263
264 # Test one - are there single quotes which don't occur twice
265 my $v = $tr;
266 $v =~ s/''//g; # replace all twice occuring single quotes
267 $v =~ s/'[{}]'//g; # replace all bracketquoting single quotes
268 if($v =~ /'/)#&& $la ne "en")
269 {
270 warn "JAVA translation issue for language $la: Mismatching single quotes:\nTranslated text: ".decode("utf8",$tr)."\nOriginal text: ".decode("utf8",$en)."\n";
271 $error = 1;
272 }
273 # Test two - check if there are {..} which should not be
274 my @fmt = ();
275 my $fmt;
276 my $fmte;
277 my $fmte1 = "";
278 my $trt = $tr; $trt =~ s/'[{}]'//g;
279 while($trt =~ /\{(.*?)\}/g) {push @fmt,$1}; $fmt = join("_", sort @fmt); @fmt = ();
280 my $ent = $en; $ent =~ s/'[{}]'//g;
281 while($ent =~ /\{(.*?)\}/g) {push @fmt,$1}; $fmte = join("_", sort @fmt); @fmt = ();
282 if($en1)
283 {
284 my $en1t = $en1; $en1t =~ s/'[{}]'//g;
285 while($en1t =~ /\{(.*?)\}/g) {push @fmt,$1}; $fmte1 = join("_", sort @fmt);
286 }
287 if($fmt ne $fmte && $fmt ne $fmte1)
288 {
289 if(!($fmte eq '0' && $fmt eq "" && $cnt == 1)) # Don't warn when a single value is left for first multi-translation
290 {
291 warn "JAVA translation issue for language $la ($cnt): Mismatching format entries:\nTranslated text: ".decode("utf8",$tr)."\nOriginal text: ".decode("utf8",$en)."\n";
292 $error = 1;
293 }
294 }
295
296 #$tr = "" if($error && $la ne "en");
297 return pack("n",65534) if $eq;
298
299 return pack("n",length($tr)).$tr;
300}
301
302sub createlang($@)
303{
304 my ($data, @files) = @_;
305 my $maxlen = 0;
306 foreach my $file (@files)
307 {
308 my $len = length($file);
309 $maxlen = $len if $len > $maxlen;
310 }
311 my $maxcount = keys(%{$data});
312 foreach my $file (@files)
313 {
314 my $la;
315 my $cnt = 0;
316 if($file =~ /^(?:.*\/)?$lang_pattern_file\.lang$/)
317 {
318 $la = $1;
319 $la =~ s/-/\@/;
320 }
321 else
322 {
323 die "Language for file $file unknown.";
324 }
325 die "Could not open outfile $file\n" if !open FILE,">:raw",$file;
326
327 foreach my $en (sort keys %{$data})
328 {
329 next if $data->{$en}{"en.1"};
330 my $val;
331 my $eq;
332 if($la eq "en")
333 {
334 ++$cnt;
335 $val = $en;
336 $val =~ s/^___(.*)___/_:$1\n/;
337 }
338 else
339 {
340 my $ennoctx = $en;
341 $ennoctx =~ s/^___(.*)___//;
342 $val = (exists($data->{$en}{$la})) ? $data->{$en}{$la} : "";
343 ++$cnt if $val;
344 if($ennoctx eq $val)
345 {
346 $val = ""; $eq = 1;
347 }
348 }
349 print FILE checkstring($la, $val, $en, undef, undef, $eq);
350 }
351 print FILE pack "n",0xFFFF;
352 foreach my $en (sort keys %{$data})
353 {
354 next if !$data->{$en}{"en.1"};
355 my $num;
356 for($num = 1; exists($data->{$en}{"$la.$num"}); ++$num)
357 { }
358 my $val;
359 my $eq = 0;
360 if($la eq "en")
361 {
362 ++$cnt;
363 $val = $en;
364 $val =~ s/^___(.*)___/_:$1\n/;
365 }
366 else
367 {
368 $val = (exists($data->{$en}{$la})) ? $data->{$en}{$la} : "";
369 --$num if(!$val);
370 ++$cnt if $val;
371 if($num == 2)
372 {
373 my $ennoctx = $en;
374 $ennoctx =~ s/^___(.*)___//;
375 if($val eq $ennoctx && $data->{$en}{"$la.1"} eq $data->{$en}{"en.1"})
376 {
377 $num = 0;
378 $eq = 1;
379 }
380 }
381 }
382
383 print FILE pack "C",$eq ? 0xFE : $num;
384 if($num)
385 {
386 print FILE checkstring($la, $val, $en, 1, $data->{$en}{"en.1"});
387 for($num = 1; exists($data->{$en}{"$la.$num"}); ++$num)
388 {
389 print FILE checkstring($la, $data->{$en}{"$la.$num"}, $en, $num+1, $data->{$en}{"en.1"});
390 }
391 }
392 }
393 close FILE;
394 if(!$cnt)
395 {
396 unlink $file;
397 printf "Skipped file %-${maxlen}s: Contained 0 strings out of %5d.\n",$file,$maxcount;
398 }
399 else
400 {
401 printf "Created file %-${maxlen}s: Added %5d strings out of %5d (%5.1f%%).\n",$file,$cnt,$maxcount,,$cnt*100.0/$maxcount;
402 }
403 }
404}
405
406sub main
407{
408 my %lang;
409 my @po;
410 my $potfile;
411 my $basename = "./";
412 foreach my $arg (@ARGV)
413 {
414 next if $arg !~ /^--/;
415 if($arg =~ /^--basedir=(.+)$/)
416 {
417 $basename = $1;
418 }
419 elsif($arg =~ /^--potfile=(.+)$/)
420 {
421 $potfile = $1;
422 }
423 else
424 {
425 die "Unknown argument $arg.";
426 }
427 }
428 $basename .= "/" if !($basename =~ /[\/\\:]$/);
429 foreach my $arg (@ARGV)
430 {
431 next if $arg =~ /^--/;
432 foreach my $f (glob $arg)
433 {
434 if($f =~ /\*/) { printf "Skipping $f\n"; }
435 elsif($f =~ /\.po$/) { push(@po, $f); }
436 else { die "unknown file extension."; }
437 }
438 }
439 my %data = loadfiles(\%lang,$potfile ? loadpot($potfile) : undef, @po);
440
441 my @clang;
442 foreach my $la (sort keys %lang)
443 {
444 $la =~ s/\@/-/;
445 push(@clang, "${basename}$la.lang");
446 }
447 push(@clang, "${basename}en.lang");
448 die "There have been warning. No output.\n" if $waswarn;
449
450 createlang(\%data, @clang);
451}
Note: See TracBrowser for help on using the repository browser.