source: osm/applications/editors/josm/i18n/i18n.pl@ 27570

Last change on this file since 27570 was 26987, checked in by stoecker, 13 years ago

add language file information tool

  • Property svn:executable set to *
File size: 10.0 KB
Line 
1#! /usr/bin/perl -w
2
3use utf8;
4use encoding "utf8";
5use Term::ReadKey;
6use Encode;
7
8my $waswarn = 0;
9my $maxcount = 0;
10
11main();
12
13sub getdate
14{
15 my @t=gmtime();
16 return sprintf("%04d-%02d-%02d %02d:%02d+0000",
17 1900+$t[5],$t[4]+1,$t[3],$t[2],$t[1]);
18}
19
20sub loadfiles($@)
21{
22 my $desc;
23 my $all;
24 my ($lang,@files) = @_;
25 foreach my $file (@files)
26 {
27 die "Could not open file $file." if(!open FILE,"<:utf8",$file);
28 my $linenum = 0;
29
30 my $cnt = -1; # don't count translators info
31 if($file =~ /\/(.._..)\.po$/ || $file =~ /\/(...?)\.po$/)
32 {
33 my $l = $1;
34 ++$lang->{$l};
35 my %postate = (last => "", type => "");
36 my $linenum = 0;
37 print "Reading file $file\n";
38 while(<FILE>)
39 {
40 ++$linenum;
41 my $fn = "$file:$linenum";
42 chomp;
43 if($_ =~ /^#/ || !$_)
44 {
45 checkpo(\%postate, \%all, $l, "line $linenum in $file", $keys, 1);
46 $postate{fuzzy} = 1 if ($_ =~ /fuzzy/);
47 }
48 elsif($_ =~ /^"(.*)"$/) {$postate{last} .= $1;}
49 elsif($_ =~ /^(msg.+) "(.*)"$/)
50 {
51 my ($n, $d) = ($1, $2);
52 ++$cnt if $n eq "msgid";
53 my $new = !${postate}{fuzzy} && (($n eq "msgid" && $postate{type} ne "msgctxt") || ($n eq "msgctxt"));
54 checkpo(\%postate, \%all, $l, "line $linenum in $file", $keys, $new);
55 $postate{last} = $d;
56 $postate{type} = $n;
57 $postate{src} = $fn if $new;
58 }
59 else
60 {
61 die "Strange line $linenum in $file: $_.";
62 }
63 }
64 checkpo(\%postate, \%all, $l, "line $linenum in $file", $keys, 1);
65 }
66 else
67 {
68 die "File format not supported for file $file.";
69 }
70 $maxcount = $cnt if $cnt > $maxcount;
71 close(FILE);
72 }
73 return %all;
74}
75
76my $alwayspo = 0;
77my $alwaysup = 0;
78my $noask = 0;
79my $conflicts;
80sub copystring($$$$$$$)
81{
82 my ($data, $en, $l, $str, $txt, $context, $ispo) = @_;
83
84 $en = "___${context}___$en" if $context;
85
86 if(exists($data->{$en}{$l}) && $data->{$en}{$l} ne $str)
87 {
88 return if !$str;
89 if($l =~ /^_/)
90 {
91 $data->{$en}{$l} .= ";$str" if !($data->{$en}{$l} =~ /$str/);
92 }
93 elsif(!$data->{$en}{$l})
94 {
95 $data->{$en}{$l} = $str;
96 }
97 else
98 {
99
100 my $f = $data->{$en}{_file} || "";
101 $f = ($f ? "$f;".$data->{$en}{"_src.$l"} : $data->{$en}{"_src.$l"}) if $data->{$en}{"_src.$l"};
102 my $isotherpo = ($f =~ /\.po\:/);
103 my $pomode = ($ispo && !$isotherpo) || (!$ispo && $isotherpo);
104
105 my $mis = "String mismatch for '$en' **$str** ($txt) != **$data->{$en}{$l}** ($f)\n";
106 my $replace = 0;
107
108 if(($conflicts{$l}{$str} || "") eq $data->{$en}{$l}) {}
109 elsif($pomode && $alwaysup) { $replace=$isotherpo; }
110 elsif($pomode && $alwayspo) { $replace=$ispo; }
111 elsif($noask) { print $mis; ++$waswarn; }
112 else
113 {
114 ReadMode 4; # Turn off controls keys
115 my $arg = "(l)eft, (r)ight";
116 $arg .= ", (p)o, (u)pstream[ts/mat], all p(o), all up(s)tream" if $pomode;
117 $arg .= ", e(x)it: ";
118 print "$mis$arg";
119 while((my $c = getc()))
120 {
121 if($c eq "l") { $replace=1; }
122 elsif($c eq "r") {}
123 elsif($c eq "p" && $pomode) { $replace=$ispo; }
124 elsif($c eq "u" && $pomode) { $replace=$isotherpo; }
125 elsif($c eq "o" && $pomode) { $alwayspo = 1; $replace=$ispo; }
126 elsif($c eq "s" && $pomode) { $alwaysup = 1; $replace=$isotherpo; }
127 elsif($c eq "x") { $noask = 1; ++$waswarn; }
128 else { print "\n$arg"; next; }
129 last;
130 }
131 print("\n");
132 ReadMode 0; # Turn on controls keys
133 }
134 if(!$noask)
135 {
136 if($replace)
137 {
138 $data->{$en}{$l} = $str;
139 $conflicts{$l}{$data->{$en}{$l}} = $str;
140 }
141 else
142 {
143 $conflicts{$l}{$str} = $data->{$en}{$l};
144 }
145 }
146 }
147 }
148 else
149 {
150 $data->{$en}{$l} = $str;
151 }
152}
153
154sub checkpo($$$$$$)
155{
156 my ($postate, $data, $l, $txt, $keys, $new) = @_;
157
158 if($postate->{type} eq "msgid") {$postate->{msgid} = $postate->{last};}
159 elsif($postate->{type} eq "msgid_plural") {$postate->{msgid_1} = $postate->{last};}
160 elsif($postate->{type} =~ /^msgstr(\[0\])?$/) {$postate->{msgstr} = $postate->{last};}
161 elsif($postate->{type} =~ /^msgstr\[(.+)\]$/) {$postate->{"msgstr_$1"} = $postate->{last};}
162 elsif($postate->{type} eq "msgctxt") {$postate->{context} = $postate->{last};}
163 elsif($postate->{type}) { die "Strange type $postate->{type} found\n" }
164
165 if($new)
166 {
167 if((!$postate->{fuzzy}) && $postate->{msgstr} && $postate->{msgid})
168 {
169 copystring($data, $postate->{msgid}, $l, $postate->{msgstr},$txt,$postate->{context}, 1);
170 for($i = 1; exists($postate->{"msgstr_$i"}); ++$i)
171 { copystring($data, $postate->{msgid}, "$l.$i", $postate->{"msgstr_$i"},$txt,$postate->{context}, 1); }
172 if($postate->{msgid_1})
173 { copystring($data, $postate->{msgid}, "en.1", $postate->{msgid_1},$txt,$postate->{context}, 1); }
174 copystring($data, $postate->{msgid}, "_src.$l", $postate->{src},$txt,$postate->{context}, 1);
175 }
176 elsif($postate->{msgstr} && !$postate->{msgid})
177 {
178 my %k = ($postate->{msgstr} =~ /(.+?): +(.+?)\\n/g);
179 # take the first one!
180 for $a (sort keys %k)
181 {
182 $keys->{$l}{$a} = $k{$a} if !$keys->{$l}{$a};
183 }
184 }
185 foreach my $k (keys %{$postate})
186 {
187 delete $postate->{$k};
188 }
189 $postate->{type} = $postate->{last} = "";
190 }
191}
192
193sub makestring($)
194{
195 my ($str) = @_;
196 $str =~ s/\\"/"/g;
197 $str =~ s/\\\\/\\/g;
198 $str =~ s/\\n/\n/g;
199 $str = encode("utf8", $str);
200 return $str;
201}
202
203sub checkstring
204{
205 my ($la, $tr, $en, $cnt, $en1, $eq) = @_;
206 $tr = makestring($tr);
207 $en = makestring($en);
208 $cnt = $cnt || 0;
209 $en1 = makestring($en1) if defined($en1);
210 my $error = 0;
211
212 # Test one - are there single quotes which don't occur twice
213 my $v = $tr;
214 $v =~ s/''//g; # replace all twice occuring single quotes
215 $v =~ s/'[{}]'//g; # replace all bracketquoting single quotes
216 if($v =~ /'/)#&& $la ne "en")
217 {
218 warn "JAVA translation issue for language $la: Mismatching single quotes:\nTranslated text: $tr\nOriginal text: $en\n";
219 $error = 1;
220 }
221 # Test two - check if there are {..} which should not be
222 my @fmt = ();
223 my $fmt;
224 my $fmte;
225 my $fmte1 = "";
226 my $trt = $tr; $trt =~ s/'[{}]'//g;
227 while($trt =~ /\{(.*?)\}/g) {push @fmt,$1}; $fmt = join("_", sort @fmt); @fmt = ();
228 my $ent = $en; $ent =~ s/'[{}]'//g;
229 while($ent =~ /\{(.*?)\}/g) {push @fmt,$1}; $fmte = join("_", sort @fmt); @fmt = ();
230 if($en1)
231 {
232 my $en1t = $en1; $en1t =~ s/'[{}]'//g;
233 while($en1t =~ /\{(.*?)\}/g) {push @fmt,$1}; $fmte1 = join("_", sort @fmt);
234 }
235 if($fmt ne $fmte && $fmt ne $fmte1)
236 {
237 if(!($fmte eq '0' && $fmt eq "" && $cnt == 1)) # Don't warn when a single value is left for first multi-translation
238 {
239 warn "JAVA translation issue for language $la ($cnt): Mismatching format entries:\nTranslated text: $tr\nOriginal text: $en\n";
240 $error = 1;
241 }
242 }
243
244 #$tr = "" if($error && $la ne "en");
245 return pack("n",65534) if $eq;
246
247 return pack("n",length($tr)).$tr;
248}
249
250sub createlang($@)
251{
252 my ($data, @files) = @_;
253 my $maxlen = 0;
254 foreach my $file (@files)
255 {
256 my $len = length($file);
257 $maxlen = $len if $len > $maxlen;
258 }
259 foreach my $file (@files)
260 {
261 my $la;
262 my $cnt = 0;
263 if($file =~ /[-_](.._..)\.lang$/ || $file =~ /^(?:.*\/)?(.._..)\.lang$/ ||
264 $file =~ /[-_](...?)\.lang$/ || $file =~ /^(?:.*\/)?(...?)\.lang$/)
265 {
266 $la = $1;
267 }
268 else
269 {
270 die "Language for file $file unknown.";
271 }
272 die "Could not open outfile $file\n" if !open FILE,">:raw",$file;
273
274 foreach my $en (sort keys %{$data})
275 {
276 next if $data->{$en}{"en.1"};
277 my $val;
278 my $eq;
279 if($la eq "en")
280 {
281 ++$cnt;
282 $val = $en;
283 $val =~ s/^___(.*)___/_:$1\n/;
284 }
285 else
286 {
287 my $ennoctx = $en;
288 $ennoctx =~ s/^___(.*)___//;
289 $val = (exists($data->{$en}{$la})) ? $data->{$en}{$la} : "";
290 ++$cnt if $val;
291 if($ennoctx eq $val)
292 {
293 $val = ""; $eq = 1;
294 }
295 }
296 print FILE checkstring($la, $val, $en, undef, undef, $eq);
297 }
298 print FILE pack "n",0xFFFF;
299 foreach my $en (sort keys %{$data})
300 {
301 next if !$data->{$en}{"en.1"};
302 my $num;
303 for($num = 1; exists($data->{$en}{"$la.$num"}); ++$num)
304 { }
305 my $val;
306 $eq = 0;
307 if($la eq "en")
308 {
309 ++$cnt;
310 $val = $en;
311 $val =~ s/^___(.*)___/_:$1\n/;
312 }
313 else
314 {
315 $val = (exists($data->{$en}{$la})) ? $data->{$en}{$la} : "";
316 --$num if(!$val);
317 ++$cnt if $val;
318 if($num == 2)
319 {
320 my $ennoctx = $en;
321 $ennoctx =~ s/^___(.*)___//;
322 if($val eq $ennoctx && $data->{$en}{"$la.1"} eq $data->{$en}{"en.1"})
323 {
324 $num = 0;
325 $eq = 1;
326 }
327 }
328 }
329
330 print FILE pack "C",$eq ? 0xFE : $num;
331 if($num)
332 {
333 print FILE checkstring($la, $val, $en, 1, $data->{$en}{"en.1"});
334 for($num = 1; exists($data->{$en}{"$la.$num"}); ++$num)
335 {
336 print FILE checkstring($la, $data->{$en}{"$la.$num"}, $en, $num+1, $data->{$en}{"en.1"});
337 }
338 }
339 }
340 close FILE;
341 if(!$cnt)
342 {
343 unlink $file;
344 printf "Skipped file %-${maxlen}s: Contained 0 strings out of %5d.\n",$file,$maxcount;
345 }
346 else
347 {
348 printf "Created file %-${maxlen}s: Added %5d strings out of %5d (%5.1f%%).\n",$file,$cnt,$maxcount,,$cnt*100.0/$maxcount;
349 }
350 }
351}
352
353sub main
354{
355 my %lang;
356 my @po;
357 my $basename = shift @ARGV;
358 foreach my $arg (@ARGV)
359 {
360 foreach my $f (glob $arg)
361 {
362 if($f =~ /\*/) { printf "Skipping $f\n"; }
363 elsif($f =~ /\.po$/) { push(@po, $f); }
364 else { die "unknown file extension."; }
365 }
366 }
367 my %data = loadfiles(\%lang,@po);
368 my @clang;
369 foreach my $la (sort keys %lang)
370 {
371 push(@clang, "${basename}$la.lang");
372 }
373 push(@clang, "${basename}en.lang");
374 die "There have been warning. No output.\n" if $waswarn;
375
376 createlang(\%data, @clang);
377}
Note: See TracBrowser for help on using the repository browser.