source: osm/applications/editors/josm/i18n/i18n.pl@ 31100

Last change on this file since 31100 was 31088, checked in by stoecker, 10 years ago

see #josm11148 - fix dialects

  • Property svn:executable set to *
File size: 10.2 KB
Line 
1#! /usr/bin/perl -w
2
3use utf8;
4use open qw/:std :encoding(utf8)/;
5use Term::ReadKey;
6use Encode;
7
8my $waswarn = 0;
9my $maxcount = 0;
10my $lang_pattern = '([a-z]{2}_[A-Z]{2}|[a-z]{2,3}|[a-z]{2}\@[a-z]+)';
11my $lang_pattern_file = '([a-z]{2}_[A-Z]{2}|[a-z]{2,3}|[a-z]{2}-[a-z]+)';
12
13main();
14
15sub getdate
16{
17 my @t=gmtime();
18 return sprintf("%04d-%02d-%02d %02d:%02d+0000",
19 1900+$t[5],$t[4]+1,$t[3],$t[2],$t[1]);
20}
21
22sub loadfiles($@)
23{
24 my $desc;
25 my $all;
26 my ($lang,@files) = @_;
27 foreach my $file (@files)
28 {
29 die "Could not open file $file." if(!open FILE,"<:utf8",$file);
30 my $linenum = 0;
31
32 my $cnt = -1; # don't count translators info
33 if($file =~ /\/$lang_pattern\.po$/)
34 {
35 my $l = $1;
36 ++$lang->{$l};
37 my %postate = (last => "", type => "");
38 my $linenum = 0;
39 print "Reading file $file (lang $l)\n";
40 while(<FILE>)
41 {
42 ++$linenum;
43 my $fn = "$file:$linenum";
44 chomp;
45 if($_ =~ /^#/ || !$_)
46 {
47 checkpo(\%postate, \%all, $l, "line $linenum in $file", $keys, 1);
48 $postate{fuzzy} = 1 if ($_ =~ /fuzzy/);
49 }
50 elsif($_ =~ /^"(.*)"$/) {$postate{last} .= $1;}
51 elsif($_ =~ /^(msg.+) "(.*)"$/)
52 {
53 my ($n, $d) = ($1, $2);
54 ++$cnt if $n eq "msgid";
55 my $new = !${postate}{fuzzy} && (($n eq "msgid" && $postate{type} ne "msgctxt") || ($n eq "msgctxt"));
56 checkpo(\%postate, \%all, $l, "line $linenum in $file", $keys, $new);
57 $postate{last} = $d;
58 $postate{type} = $n;
59 $postate{src} = $fn if $new;
60 }
61 else
62 {
63 die "Strange line $linenum in $file: $_.";
64 }
65 }
66 checkpo(\%postate, \%all, $l, "line $linenum in $file", $keys, 1);
67 }
68 else
69 {
70 die "File format not supported for file $file.";
71 }
72 $maxcount = $cnt if $cnt > $maxcount;
73 close(FILE);
74 }
75 return %all;
76}
77
78my $alwayspo = 0;
79my $alwaysup = 0;
80my $noask = 0;
81my $conflicts;
82sub copystring($$$$$$$)
83{
84 my ($data, $en, $l, $str, $txt, $context, $ispo) = @_;
85
86 $en = "___${context}___$en" if $context;
87
88 if(exists($data->{$en}{$l}) && $data->{$en}{$l} ne $str)
89 {
90 return if !$str;
91 if($l =~ /^_/)
92 {
93 $data->{$en}{$l} .= ";$str" if !($data->{$en}{$l} =~ /$str/);
94 }
95 elsif(!$data->{$en}{$l})
96 {
97 $data->{$en}{$l} = $str;
98 }
99 else
100 {
101
102 my $f = $data->{$en}{_file} || "";
103 $f = ($f ? "$f;".$data->{$en}{"_src.$l"} : $data->{$en}{"_src.$l"}) if $data->{$en}{"_src.$l"};
104 my $isotherpo = ($f =~ /\.po\:/);
105 my $pomode = ($ispo && !$isotherpo) || (!$ispo && $isotherpo);
106
107 my $mis = "String mismatch for '$en' **$str** ($txt) != **$data->{$en}{$l}** ($f)\n";
108 my $replace = 0;
109
110 if(($conflicts{$l}{$str} || "") eq $data->{$en}{$l}) {}
111 elsif($pomode && $alwaysup) { $replace=$isotherpo; }
112 elsif($pomode && $alwayspo) { $replace=$ispo; }
113 elsif($noask) { print $mis; ++$waswarn; }
114 else
115 {
116 ReadMode 4; # Turn off controls keys
117 my $arg = "(l)eft, (r)ight";
118 $arg .= ", (p)o, (u)pstream[ts/mat], all p(o), all up(s)tream" if $pomode;
119 $arg .= ", e(x)it: ";
120 print "$mis$arg";
121 while((my $c = getc()))
122 {
123 if($c eq "l") { $replace=1; }
124 elsif($c eq "r") {}
125 elsif($c eq "p" && $pomode) { $replace=$ispo; }
126 elsif($c eq "u" && $pomode) { $replace=$isotherpo; }
127 elsif($c eq "o" && $pomode) { $alwayspo = 1; $replace=$ispo; }
128 elsif($c eq "s" && $pomode) { $alwaysup = 1; $replace=$isotherpo; }
129 elsif($c eq "x") { $noask = 1; ++$waswarn; }
130 else { print "\n$arg"; next; }
131 last;
132 }
133 print("\n");
134 ReadMode 0; # Turn on controls keys
135 }
136 if(!$noask)
137 {
138 if($replace)
139 {
140 $data->{$en}{$l} = $str;
141 $conflicts{$l}{$data->{$en}{$l}} = $str;
142 }
143 else
144 {
145 $conflicts{$l}{$str} = $data->{$en}{$l};
146 }
147 }
148 }
149 }
150 else
151 {
152 $data->{$en}{$l} = $str;
153 }
154}
155
156sub checkpo($$$$$$)
157{
158 my ($postate, $data, $l, $txt, $keys, $new) = @_;
159
160 if($postate->{type} eq "msgid") {$postate->{msgid} = $postate->{last};}
161 elsif($postate->{type} eq "msgid_plural") {$postate->{msgid_1} = $postate->{last};}
162 elsif($postate->{type} =~ /^msgstr(\[0\])?$/) {$postate->{msgstr} = $postate->{last};}
163 elsif($postate->{type} =~ /^msgstr\[(.+)\]$/) {$postate->{"msgstr_$1"} = $postate->{last};}
164 elsif($postate->{type} eq "msgctxt") {$postate->{context} = $postate->{last};}
165 elsif($postate->{type}) { die "Strange type $postate->{type} found\n" }
166
167 if($new)
168 {
169 if((!$postate->{fuzzy}) && $postate->{msgstr} && $postate->{msgid})
170 {
171 copystring($data, $postate->{msgid}, $l, $postate->{msgstr},$txt,$postate->{context}, 1);
172 for($i = 1; exists($postate->{"msgstr_$i"}); ++$i)
173 { copystring($data, $postate->{msgid}, "$l.$i", $postate->{"msgstr_$i"},$txt,$postate->{context}, 1); }
174 if($postate->{msgid_1})
175 { copystring($data, $postate->{msgid}, "en.1", $postate->{msgid_1},$txt,$postate->{context}, 1); }
176 copystring($data, $postate->{msgid}, "_src.$l", $postate->{src},$txt,$postate->{context}, 1);
177 }
178 elsif($postate->{msgstr} && !$postate->{msgid})
179 {
180 my %k = ($postate->{msgstr} =~ /(.+?): +(.+?)\\n/g);
181 # take the first one!
182 for $a (sort keys %k)
183 {
184 $keys->{$l}{$a} = $k{$a} if !$keys->{$l}{$a};
185 }
186 }
187 foreach my $k (keys %{$postate})
188 {
189 delete $postate->{$k};
190 }
191 $postate->{type} = $postate->{last} = "";
192 }
193}
194
195sub makestring($)
196{
197 my ($str) = @_;
198 $str =~ s/\\"/"/g;
199 $str =~ s/\\\\/\\/g;
200 $str =~ s/\\n/\n/g;
201 $str = encode("utf8", $str);
202 return $str;
203}
204
205sub checkstring
206{
207 my ($la, $tr, $en, $cnt, $en1, $eq) = @_;
208 $tr = makestring($tr);
209 $en = makestring($en);
210 $cnt = $cnt || 0;
211 $en1 = makestring($en1) if defined($en1);
212 my $error = 0;
213
214 # Test one - are there single quotes which don't occur twice
215 my $v = $tr;
216 $v =~ s/''//g; # replace all twice occuring single quotes
217 $v =~ s/'[{}]'//g; # replace all bracketquoting single quotes
218 if($v =~ /'/)#&& $la ne "en")
219 {
220 warn "JAVA translation issue for language $la: Mismatching single quotes:\nTranslated text: ".decode("utf8",$tr)."\nOriginal text: ".decode("utf8",$en)."\n";
221 $error = 1;
222 }
223 # Test two - check if there are {..} which should not be
224 my @fmt = ();
225 my $fmt;
226 my $fmte;
227 my $fmte1 = "";
228 my $trt = $tr; $trt =~ s/'[{}]'//g;
229 while($trt =~ /\{(.*?)\}/g) {push @fmt,$1}; $fmt = join("_", sort @fmt); @fmt = ();
230 my $ent = $en; $ent =~ s/'[{}]'//g;
231 while($ent =~ /\{(.*?)\}/g) {push @fmt,$1}; $fmte = join("_", sort @fmt); @fmt = ();
232 if($en1)
233 {
234 my $en1t = $en1; $en1t =~ s/'[{}]'//g;
235 while($en1t =~ /\{(.*?)\}/g) {push @fmt,$1}; $fmte1 = join("_", sort @fmt);
236 }
237 if($fmt ne $fmte && $fmt ne $fmte1)
238 {
239 if(!($fmte eq '0' && $fmt eq "" && $cnt == 1)) # Don't warn when a single value is left for first multi-translation
240 {
241 warn "JAVA translation issue for language $la ($cnt): Mismatching format entries:\nTranslated text: ".decode("utf8",$tr)."\nOriginal text: ".decode("utf8",$en)."\n";
242 $error = 1;
243 }
244 }
245
246 #$tr = "" if($error && $la ne "en");
247 return pack("n",65534) if $eq;
248
249 return pack("n",length($tr)).$tr;
250}
251
252sub createlang($@)
253{
254 my ($data, @files) = @_;
255 my $maxlen = 0;
256 foreach my $file (@files)
257 {
258 my $len = length($file);
259 $maxlen = $len if $len > $maxlen;
260 }
261 foreach my $file (@files)
262 {
263 my $la;
264 my $cnt = 0;
265 if($file =~ /^(?:.*\/)?$lang_pattern_file\.lang$/)
266 {
267 $la = $1;
268 $la =~ s/-/\@/;
269 }
270 else
271 {
272 die "Language for file $file unknown.";
273 }
274 die "Could not open outfile $file\n" if !open FILE,">:raw",$file;
275
276 foreach my $en (sort keys %{$data})
277 {
278 next if $data->{$en}{"en.1"};
279 my $val;
280 my $eq;
281 if($la eq "en")
282 {
283 ++$cnt;
284 $val = $en;
285 $val =~ s/^___(.*)___/_:$1\n/;
286 }
287 else
288 {
289 my $ennoctx = $en;
290 $ennoctx =~ s/^___(.*)___//;
291 $val = (exists($data->{$en}{$la})) ? $data->{$en}{$la} : "";
292 ++$cnt if $val;
293 if($ennoctx eq $val)
294 {
295 $val = ""; $eq = 1;
296 }
297 }
298 print FILE checkstring($la, $val, $en, undef, undef, $eq);
299 }
300 print FILE pack "n",0xFFFF;
301 foreach my $en (sort keys %{$data})
302 {
303 next if !$data->{$en}{"en.1"};
304 my $num;
305 for($num = 1; exists($data->{$en}{"$la.$num"}); ++$num)
306 { }
307 my $val;
308 $eq = 0;
309 if($la eq "en")
310 {
311 ++$cnt;
312 $val = $en;
313 $val =~ s/^___(.*)___/_:$1\n/;
314 }
315 else
316 {
317 $val = (exists($data->{$en}{$la})) ? $data->{$en}{$la} : "";
318 --$num if(!$val);
319 ++$cnt if $val;
320 if($num == 2)
321 {
322 my $ennoctx = $en;
323 $ennoctx =~ s/^___(.*)___//;
324 if($val eq $ennoctx && $data->{$en}{"$la.1"} eq $data->{$en}{"en.1"})
325 {
326 $num = 0;
327 $eq = 1;
328 }
329 }
330 }
331
332 print FILE pack "C",$eq ? 0xFE : $num;
333 if($num)
334 {
335 print FILE checkstring($la, $val, $en, 1, $data->{$en}{"en.1"});
336 for($num = 1; exists($data->{$en}{"$la.$num"}); ++$num)
337 {
338 print FILE checkstring($la, $data->{$en}{"$la.$num"}, $en, $num+1, $data->{$en}{"en.1"});
339 }
340 }
341 }
342 close FILE;
343 if(!$cnt)
344 {
345 unlink $file;
346 printf "Skipped file %-${maxlen}s: Contained 0 strings out of %5d.\n",$file,$maxcount;
347 }
348 else
349 {
350 printf "Created file %-${maxlen}s: Added %5d strings out of %5d (%5.1f%%).\n",$file,$cnt,$maxcount,,$cnt*100.0/$maxcount;
351 }
352 }
353}
354
355sub main
356{
357 my %lang;
358 my @po;
359 my $basename = shift @ARGV;
360 $basename .= "/" if !($basename =~ /[\/\\:]$/);
361 foreach my $arg (@ARGV)
362 {
363 foreach my $f (glob $arg)
364 {
365 if($f =~ /\*/) { printf "Skipping $f\n"; }
366 elsif($f =~ /\.po$/) { push(@po, $f); }
367 else { die "unknown file extension."; }
368 }
369 }
370 my %data = loadfiles(\%lang,@po);
371 my @clang;
372 foreach my $la (sort keys %lang)
373 {
374 $la =~ s/\@/-/;
375 push(@clang, "${basename}$la.lang");
376 }
377 push(@clang, "${basename}en.lang");
378 die "There have been warning. No output.\n" if $waswarn;
379
380 createlang(\%data, @clang);
381}
Note: See TracBrowser for help on using the repository browser.