source: osm/applications/editors/josm/i18n/i18n.pl@ 26859

Last change on this file since 26859 was 26586, checked in by stoecker, 13 years ago

add identical marker for translation, so translation statistics can be extracted from data file

  • Property svn:executable set to *
File size: 9.9 KB
Line 
1#! /usr/bin/perl -w
2
3use utf8;
4use encoding "utf8";
5use Term::ReadKey;
6use Encode;
7
8my $waswarn = 0;
9my $maxcount = 0;
10
11main();
12
13sub getdate
14{
15 my @t=gmtime();
16 return sprintf("%04d-%02d-%02d %02d:%02d+0000",
17 1900+$t[5],$t[4]+1,$t[3],$t[2],$t[1]);
18}
19
20sub loadfiles($@)
21{
22 my $desc;
23 my $all;
24 my ($lang,@files) = @_;
25 foreach my $file (@files)
26 {
27 die "Could not open file $file." if(!open FILE,"<:utf8",$file);
28 my $linenum = 0;
29
30 my $cnt = -1; # don't count translators info
31 if($file =~ /\/(.._..)\.po$/ || $file =~ /\/(...?)\.po$/)
32 {
33 my $l = $1;
34 ++$lang->{$l};
35 my %postate = (last => "", type => "");
36 my $linenum = 0;
37 print "Reading file $file\n";
38 while(<FILE>)
39 {
40 ++$linenum;
41 my $fn = "$file:$linenum";
42 chomp;
43 if($_ =~ /^#/ || !$_)
44 {
45 checkpo(\%postate, \%all, $l, "line $linenum in $file", $keys, 1);
46 $postate{fuzzy} = 1 if ($_ =~ /fuzzy/);
47 }
48 elsif($_ =~ /^"(.*)"$/) {$postate{last} .= $1;}
49 elsif($_ =~ /^(msg.+) "(.*)"$/)
50 {
51 my ($n, $d) = ($1, $2);
52 ++$cnt if $n eq "msgid";
53 my $new = !${postate}{fuzzy} && (($n eq "msgid" && $postate{type} ne "msgctxt") || ($n eq "msgctxt"));
54 checkpo(\%postate, \%all, $l, "line $linenum in $file", $keys, $new);
55 $postate{last} = $d;
56 $postate{type} = $n;
57 $postate{src} = $fn if $new;
58 }
59 else
60 {
61 die "Strange line $linenum in $file: $_.";
62 }
63 }
64 checkpo(\%postate, \%all, $l, "line $linenum in $file", $keys, 1);
65 }
66 else
67 {
68 die "File format not supported for file $file.";
69 }
70 $maxcount = $cnt if $cnt > $maxcount;
71 close(FILE);
72 }
73 return %all;
74}
75
76my $alwayspo = 0;
77my $alwaysup = 0;
78my $noask = 0;
79my $conflicts;
80sub copystring($$$$$$$)
81{
82 my ($data, $en, $l, $str, $txt, $context, $ispo) = @_;
83
84 $en = "___${context}___$en" if $context;
85
86 if(exists($data->{$en}{$l}) && $data->{$en}{$l} ne $str)
87 {
88 return if !$str;
89 if($l =~ /^_/)
90 {
91 $data->{$en}{$l} .= ";$str" if !($data->{$en}{$l} =~ /$str/);
92 }
93 elsif(!$data->{$en}{$l})
94 {
95 $data->{$en}{$l} = $str;
96 }
97 else
98 {
99
100 my $f = $data->{$en}{_file} || "";
101 $f = ($f ? "$f;".$data->{$en}{"_src.$l"} : $data->{$en}{"_src.$l"}) if $data->{$en}{"_src.$l"};
102 my $isotherpo = ($f =~ /\.po\:/);
103 my $pomode = ($ispo && !$isotherpo) || (!$ispo && $isotherpo);
104
105 my $mis = "String mismatch for '$en' **$str** ($txt) != **$data->{$en}{$l}** ($f)\n";
106 my $replace = 0;
107
108 if(($conflicts{$l}{$str} || "") eq $data->{$en}{$l}) {}
109 elsif($pomode && $alwaysup) { $replace=$isotherpo; }
110 elsif($pomode && $alwayspo) { $replace=$ispo; }
111 elsif($noask) { print $mis; ++$waswarn; }
112 else
113 {
114 ReadMode 4; # Turn off controls keys
115 my $arg = "(l)eft, (r)ight";
116 $arg .= ", (p)o, (u)pstream[ts/mat], all p(o), all up(s)tream" if $pomode;
117 $arg .= ", e(x)it: ";
118 print "$mis$arg";
119 while((my $c = getc()))
120 {
121 if($c eq "l") { $replace=1; }
122 elsif($c eq "r") {}
123 elsif($c eq "p" && $pomode) { $replace=$ispo; }
124 elsif($c eq "u" && $pomode) { $replace=$isotherpo; }
125 elsif($c eq "o" && $pomode) { $alwayspo = 1; $replace=$ispo; }
126 elsif($c eq "s" && $pomode) { $alwaysup = 1; $replace=$isotherpo; }
127 elsif($c eq "x") { $noask = 1; ++$waswarn; }
128 else { print "\n$arg"; next; }
129 last;
130 }
131 print("\n");
132 ReadMode 0; # Turn on controls keys
133 }
134 if(!$noask)
135 {
136 if($replace)
137 {
138 $data->{$en}{$l} = $str;
139 $conflicts{$l}{$data->{$en}{$l}} = $str;
140 }
141 else
142 {
143 $conflicts{$l}{$str} = $data->{$en}{$l};
144 }
145 }
146 }
147 }
148 else
149 {
150 $data->{$en}{$l} = $str;
151 }
152}
153
154sub checkpo($$$$$$)
155{
156 my ($postate, $data, $l, $txt, $keys, $new) = @_;
157
158 if($postate->{type} eq "msgid") {$postate->{msgid} = $postate->{last};}
159 elsif($postate->{type} eq "msgid_plural") {$postate->{msgid_1} = $postate->{last};}
160 elsif($postate->{type} =~ /^msgstr(\[0\])?$/) {$postate->{msgstr} = $postate->{last};}
161 elsif($postate->{type} =~ /^msgstr\[(.+)\]$/) {$postate->{"msgstr_$1"} = $postate->{last};}
162 elsif($postate->{type} eq "msgctxt") {$postate->{context} = $postate->{last};}
163 elsif($postate->{type}) { die "Strange type $postate->{type} found\n" }
164
165 if($new)
166 {
167 if((!$postate->{fuzzy}) && $postate->{msgstr} && $postate->{msgid})
168 {
169 copystring($data, $postate->{msgid}, $l, $postate->{msgstr},$txt,$postate->{context}, 1);
170 for($i = 1; exists($postate->{"msgstr_$i"}); ++$i)
171 { copystring($data, $postate->{msgid}, "$l.$i", $postate->{"msgstr_$i"},$txt,$postate->{context}, 1); }
172 if($postate->{msgid_1})
173 { copystring($data, $postate->{msgid}, "en.1", $postate->{msgid_1},$txt,$postate->{context}, 1); }
174 copystring($data, $postate->{msgid}, "_src.$l", $postate->{src},$txt,$postate->{context}, 1);
175 }
176 elsif($postate->{msgstr} && !$postate->{msgid})
177 {
178 my %k = ($postate->{msgstr} =~ /(.+?): +(.+?)\\n/g);
179 # take the first one!
180 for $a (sort keys %k)
181 {
182 $keys->{$l}{$a} = $k{$a} if !$keys->{$l}{$a};
183 }
184 }
185 foreach my $k (keys %{$postate})
186 {
187 delete $postate->{$k};
188 }
189 $postate->{type} = $postate->{last} = "";
190 }
191}
192
193sub makestring($)
194{
195 my ($str) = @_;
196 $str =~ s/\\"/"/g;
197 $str =~ s/\\\\/\\/g;
198 $str =~ s/\\n/\n/g;
199 $str = encode("utf8", $str);
200 return $str;
201}
202
203sub checkstring
204{
205 my ($la, $tr, $en, $cnt, $en1, $eq) = @_;
206 $tr = makestring($tr);
207 $en = makestring($en);
208 $cnt = $cnt || 0;
209 $en1 = makestring($en1) if defined($en1);
210 my $error = 0;
211
212 # Test one - are there single quotes which don't occur twice
213 my $v = $tr;
214 $v =~ s/''//g; # replace all twice occuring single quotes
215 $v =~ s/'[{}]'//g; # replace all bracketquoting single quotes
216 if($v =~ /'/)#&& $la ne "en")
217 {
218 warn "JAVA translation issue for language $la: Mismatching single quotes:\nTranslated text: $tr\nOriginal text: $en\n";
219 $error = 1;
220 }
221 # Test two - check if there are {..} which should not be
222 my @fmt = ();
223 my $fmt;
224 my $fmte;
225 my $fmte1 = "";
226 my $trt = $tr; $trt =~ s/'[{}]'//g;
227 while($trt =~ /\{(.*?)\}/g) {push @fmt,$1}; $fmt = join("_", sort @fmt); @fmt = ();
228 my $ent = $en; $ent =~ s/'[{}]'//g;
229 while($ent =~ /\{(.*?)\}/g) {push @fmt,$1}; $fmte = join("_", sort @fmt); @fmt = ();
230 if($en1)
231 {
232 my $en1t = $en1; $en1t =~ s/'[{}]'//g;
233 while($en1t =~ /\{(.*?)\}/g) {push @fmt,$1}; $fmte1 = join("_", sort @fmt);
234 }
235 if($fmt ne $fmte && $fmt ne $fmte1)
236 {
237 if(!($fmte eq '0' && $fmt eq "" && $cnt == 1)) # Don't warn when a single value is left for first multi-translation
238 {
239 warn "JAVA translation issue for language $la ($cnt): Mismatching format entries:\nTranslated text: $tr\nOriginal text: $en\n";
240 $error = 1;
241 }
242 }
243
244 #$tr = "" if($error && $la ne "en");
245 return pack("n",65534) if $eq;
246
247 return pack("n",length($tr)).$tr;
248}
249
250sub createlang($@)
251{
252 my ($data, @files) = @_;
253 my $maxlen = 0;
254 foreach my $file (@files)
255 {
256 my $len = length($file);
257 $maxlen = $len if $len > $maxlen;
258 }
259 foreach my $file (@files)
260 {
261 my $la;
262 my $cnt = 0;
263 if($file =~ /[-_](.._..)\.lang$/ || $file =~ /^(?:.*\/)?(.._..)\.lang$/ ||
264 $file =~ /[-_](...?)\.lang$/ || $file =~ /^(?:.*\/)?(...?)\.lang$/)
265 {
266 $la = $1;
267 }
268 else
269 {
270 die "Language for file $file unknown.";
271 }
272 die "Could not open outfile $file\n" if !open FILE,">:raw",$file;
273
274 foreach my $en (sort keys %{$data})
275 {
276 next if $data->{$en}{"en.1"};
277 my $val;
278 my $eq;
279 if($la eq "en")
280 {
281 ++$cnt;
282 $val = $en;
283 $val =~ s/^___(.*)___/_:$1\n/;
284 }
285 else
286 {
287 my $ennoctx = $en;
288 $ennoctx =~ s/^___(.*)___//;
289 $val = (exists($data->{$en}{$la})) ? $data->{$en}{$la} : "";
290 ++$cnt if $val;
291 if($ennoctx eq $val)
292 {
293 $val = ""; $eq = 1;
294 }
295 }
296 print FILE checkstring($la, $val, $en, undef, undef, $eq);
297 }
298 print FILE pack "n",0xFFFF;
299 foreach my $en (sort keys %{$data})
300 {
301 next if !$data->{$en}{"en.1"};
302 my $num;
303 for($num = 1; exists($data->{$en}{"$la.$num"}); ++$num)
304 { }
305 my $val;
306 if($la eq "en")
307 {
308 ++$cnt;
309 $val = $en;
310 $val =~ s/^___(.*)___/_:$1\n/;
311 }
312 else
313 {
314 $val = (exists($data->{$en}{$la})) ? $data->{$en}{$la} : "";
315 --$num if(!$val);
316 ++$cnt if $val;
317 if($num == 2)
318 {
319 my $ennoctx = $en;
320 $ennoctx =~ s/^___(.*)___//;
321 $num = 0 if $val eq $ennoctx && $data->{$en}{"$la.1"} eq $data->{$en}{"en.1"};
322 }
323 }
324
325 print FILE pack "C",$num;
326 if($num)
327 {
328 print FILE checkstring($la, $val, $en, 1, $data->{$en}{"en.1"});
329 for($num = 1; exists($data->{$en}{"$la.$num"}); ++$num)
330 {
331 print FILE checkstring($la, $data->{$en}{"$la.$num"}, $en, $num+1, $data->{$en}{"en.1"});
332 }
333 }
334 }
335 close FILE;
336 if(!$cnt)
337 {
338 unlink $file;
339 printf "Skipped file %-${maxlen}s: Contained 0 strings out of %5d.\n",$file,$maxcount;
340 }
341 else
342 {
343 printf "Created file %-${maxlen}s: Added %5d strings out of %5d (%5.1f%%).\n",$file,$cnt,$maxcount,,$cnt*100.0/$maxcount;
344 }
345 }
346}
347
348sub main
349{
350 my %lang;
351 my @po;
352 my $basename = shift @ARGV;
353 foreach my $arg (@ARGV)
354 {
355 foreach my $f (glob $arg)
356 {
357 if($f =~ /\*/) { printf "Skipping $f\n"; }
358 elsif($f =~ /\.po$/) { push(@po, $f); }
359 else { die "unknown file extension."; }
360 }
361 }
362 my %data = loadfiles(\%lang,@po);
363 my @clang;
364 foreach my $la (sort keys %lang)
365 {
366 push(@clang, "${basename}$la.lang");
367 }
368 push(@clang, "${basename}en.lang");
369 die "There have been warning. No output.\n" if $waswarn;
370
371 createlang(\%data, @clang);
372}
Note: See TracBrowser for help on using the repository browser.