source: osm/applications/editors/josm/i18n/i18n.pl@ 33664

Last change on this file since 33664 was 33152, checked in by stoecker, 8 years ago

support tag2link format as well

  • Property svn:executable set to *
File size: 12.3 KB
RevLine 
[19319]1#! /usr/bin/perl -w
2
3use utf8;
[31111]4use strict;
[30169]5use open qw/:std :encoding(utf8)/;
[19319]6use Term::ReadKey;
7use Encode;
8
9my $waswarn = 0;
[31087]10my $lang_pattern = '([a-z]{2}_[A-Z]{2}|[a-z]{2,3}|[a-z]{2}\@[a-z]+)';
[31088]11my $lang_pattern_file = '([a-z]{2}_[A-Z]{2}|[a-z]{2,3}|[a-z]{2}-[a-z]+)';
[19319]12
13main();
14
15sub getdate
16{
17 my @t=gmtime();
18 return sprintf("%04d-%02d-%02d %02d:%02d+0000",
19 1900+$t[5],$t[4]+1,$t[3],$t[2],$t[1]);
20}
21
[31111]22sub loadpot($)
[19319]23{
[31111]24 my ($file) = @_;
25 my %all = ();
26 my %keys = ();
27 die "Could not open file $file." if(!open FILE,"<:utf8",$file);
28 my %postate = (last => "", type => "");
29 my $linenum = 0;
30 print "Reading file $file\n";
31 while(<FILE>)
32 {
33 ++$linenum;
34 my $fn = "$file:$linenum";
35 chomp;
36 if($_ =~ /^#/ || !$_)
37 {
38 checkpo(\%postate, \%all, "pot", "line $linenum in $file", \%keys, 1, undef);
39 $postate{fuzzy} = 1 if ($_ =~ /fuzzy/);
40 }
41 elsif($_ =~ /^"(.*)"$/) {$postate{last} .= $1;}
42 elsif($_ =~ /^(msg.+) "(.*)"$/)
43 {
44 my ($n, $d) = ($1, $2);
45 my $new = !${postate}{fuzzy} && (($n eq "msgid" && $postate{type} ne "msgctxt") || ($n eq "msgctxt"));
46 checkpo(\%postate, \%all, "pot", "line $linenum in $file", \%keys, $new, undef);
47 $postate{last} = $d;
48 $postate{type} = $n;
49 $postate{src} = $fn if $new;
50 }
51 else
52 {
53 die "Strange line $linenum in $file: $_.";
54 }
55 }
56 checkpo(\%postate, \%all, "pot", "line $linenum in $file", \%keys, 1, undef);
57 close(FILE);
58 return \%all;
59}
60
61sub loadfiles($$@)
62{
[19319]63 my $desc;
[31111]64 my %all = ();
65 my %keys = ();
66 my ($lang,$use,@files) = @_;
[19319]67 foreach my $file (@files)
68 {
69 die "Could not open file $file." if(!open FILE,"<:utf8",$file);
70
[31086]71 if($file =~ /\/$lang_pattern\.po$/)
[19319]72 {
73 my $l = $1;
74 ++$lang->{$l};
75 my %postate = (last => "", type => "");
76 my $linenum = 0;
[31087]77 print "Reading file $file (lang $l)\n";
[19319]78 while(<FILE>)
79 {
80 ++$linenum;
81 my $fn = "$file:$linenum";
82 chomp;
83 if($_ =~ /^#/ || !$_)
84 {
[31111]85 checkpo(\%postate, \%all, $l, "line $linenum in $file", \%keys, 1, $use);
[19319]86 $postate{fuzzy} = 1 if ($_ =~ /fuzzy/);
87 }
88 elsif($_ =~ /^"(.*)"$/) {$postate{last} .= $1;}
89 elsif($_ =~ /^(msg.+) "(.*)"$/)
90 {
91 my ($n, $d) = ($1, $2);
[19334]92 my $new = !${postate}{fuzzy} && (($n eq "msgid" && $postate{type} ne "msgctxt") || ($n eq "msgctxt"));
[31111]93 checkpo(\%postate, \%all, $l, "line $linenum in $file", \%keys, $new, $use);
[19319]94 $postate{last} = $d;
95 $postate{type} = $n;
96 $postate{src} = $fn if $new;
97 }
98 else
99 {
100 die "Strange line $linenum in $file: $_.";
101 }
102 }
[31111]103 checkpo(\%postate, \%all, $l, "line $linenum in $file", \%keys, 1, $use);
[19319]104 }
105 else
106 {
107 die "File format not supported for file $file.";
108 }
109 close(FILE);
110 }
111 return %all;
112}
113
114my $alwayspo = 0;
115my $alwaysup = 0;
116my $noask = 0;
[31111]117my %conflicts;
[19319]118sub copystring($$$$$$$)
119{
120 my ($data, $en, $l, $str, $txt, $context, $ispo) = @_;
121
122 $en = "___${context}___$en" if $context;
123
124 if(exists($data->{$en}{$l}) && $data->{$en}{$l} ne $str)
125 {
126 return if !$str;
127 if($l =~ /^_/)
128 {
129 $data->{$en}{$l} .= ";$str" if !($data->{$en}{$l} =~ /$str/);
130 }
131 elsif(!$data->{$en}{$l})
132 {
133 $data->{$en}{$l} = $str;
134 }
135 else
136 {
137 my $f = $data->{$en}{_file} || "";
138 $f = ($f ? "$f;".$data->{$en}{"_src.$l"} : $data->{$en}{"_src.$l"}) if $data->{$en}{"_src.$l"};
139 my $isotherpo = ($f =~ /\.po\:/);
140 my $pomode = ($ispo && !$isotherpo) || (!$ispo && $isotherpo);
141
142 my $mis = "String mismatch for '$en' **$str** ($txt) != **$data->{$en}{$l}** ($f)\n";
143 my $replace = 0;
144
145 if(($conflicts{$l}{$str} || "") eq $data->{$en}{$l}) {}
146 elsif($pomode && $alwaysup) { $replace=$isotherpo; }
147 elsif($pomode && $alwayspo) { $replace=$ispo; }
148 elsif($noask) { print $mis; ++$waswarn; }
149 else
150 {
151 ReadMode 4; # Turn off controls keys
152 my $arg = "(l)eft, (r)ight";
153 $arg .= ", (p)o, (u)pstream[ts/mat], all p(o), all up(s)tream" if $pomode;
154 $arg .= ", e(x)it: ";
155 print "$mis$arg";
156 while((my $c = getc()))
157 {
158 if($c eq "l") { $replace=1; }
159 elsif($c eq "r") {}
160 elsif($c eq "p" && $pomode) { $replace=$ispo; }
161 elsif($c eq "u" && $pomode) { $replace=$isotherpo; }
162 elsif($c eq "o" && $pomode) { $alwayspo = 1; $replace=$ispo; }
163 elsif($c eq "s" && $pomode) { $alwaysup = 1; $replace=$isotherpo; }
164 elsif($c eq "x") { $noask = 1; ++$waswarn; }
165 else { print "\n$arg"; next; }
166 last;
167 }
168 print("\n");
169 ReadMode 0; # Turn on controls keys
170 }
171 if(!$noask)
172 {
173 if($replace)
174 {
175 $data->{$en}{$l} = $str;
176 $conflicts{$l}{$data->{$en}{$l}} = $str;
177 }
178 else
179 {
180 $conflicts{$l}{$str} = $data->{$en}{$l};
181 }
182 }
183 }
184 }
185 else
186 {
187 $data->{$en}{$l} = $str;
188 }
189}
190
[31111]191# Check a current state for new data
192#
193# @param postate Pointer to current status hash
194# @param data Pointer to final data array
195# @param l current language
196# @param txt output text in case of error, usually file and line number
197# @param keys pointer to hash for info keys extracted from the first msgid "" entry
198# @param new whether a data set is finish or not yet complete
199# @param use hash to strings to use or undef for all strings
200#
201sub checkpo($$$$$$$)
[19319]202{
[31111]203 my ($postate, $data, $l, $txt, $keys, $new, $use) = @_;
[19319]204
205 if($postate->{type} eq "msgid") {$postate->{msgid} = $postate->{last};}
206 elsif($postate->{type} eq "msgid_plural") {$postate->{msgid_1} = $postate->{last};}
207 elsif($postate->{type} =~ /^msgstr(\[0\])?$/) {$postate->{msgstr} = $postate->{last};}
208 elsif($postate->{type} =~ /^msgstr\[(.+)\]$/) {$postate->{"msgstr_$1"} = $postate->{last};}
209 elsif($postate->{type} eq "msgctxt") {$postate->{context} = $postate->{last};}
210 elsif($postate->{type}) { die "Strange type $postate->{type} found\n" }
211
212 if($new)
213 {
[31111]214 my $en = $postate->{context} ? "___$postate->{context}___$postate->{msgid}" : $postate->{msgid};
215 if((!$postate->{fuzzy}) && ($l eq "pot" || $postate->{msgstr}) && $postate->{msgid}
216 && (!$use || $use->{$en}))
[19319]217 {
218 copystring($data, $postate->{msgid}, $l, $postate->{msgstr},$txt,$postate->{context}, 1);
[31111]219 if(!$use || $use->{$en}{"en.1"})
220 {
221 for(my $i = 1; exists($postate->{"msgstr_$i"}); ++$i)
222 { copystring($data, $postate->{msgid}, "$l.$i", $postate->{"msgstr_$i"},$txt,$postate->{context}, 1); }
223 if($postate->{msgid_1})
224 { copystring($data, $postate->{msgid}, "en.1", $postate->{msgid_1},$txt,$postate->{context}, 1); }
225 }
[19319]226 copystring($data, $postate->{msgid}, "_src.$l", $postate->{src},$txt,$postate->{context}, 1);
227 }
228 elsif($postate->{msgstr} && !$postate->{msgid})
229 {
230 my %k = ($postate->{msgstr} =~ /(.+?): +(.+?)\\n/g);
231 # take the first one!
232 for $a (sort keys %k)
233 {
234 $keys->{$l}{$a} = $k{$a} if !$keys->{$l}{$a};
235 }
236 }
237 foreach my $k (keys %{$postate})
238 {
239 delete $postate->{$k};
240 }
241 $postate->{type} = $postate->{last} = "";
242 }
243}
244
[19356]245sub makestring($)
246{
247 my ($str) = @_;
248 $str =~ s/\\"/"/g;
[21523]249 $str =~ s/\\\\/\\/g;
[19356]250 $str =~ s/\\n/\n/g;
251 $str = encode("utf8", $str);
[26338]252 return $str;
[19356]253}
254
[26338]255sub checkstring
256{
[26586]257 my ($la, $tr, $en, $cnt, $en1, $eq) = @_;
[26338]258 $tr = makestring($tr);
259 $en = makestring($en);
[26340]260 $cnt = $cnt || 0;
[26338]261 $en1 = makestring($en1) if defined($en1);
262 my $error = 0;
263
264 # Test one - are there single quotes which don't occur twice
265 my $v = $tr;
266 $v =~ s/''//g; # replace all twice occuring single quotes
[26344]267 $v =~ s/'[{}]'//g; # replace all bracketquoting single quotes
[26340]268 if($v =~ /'/)#&& $la ne "en")
[26338]269 {
[30169]270 warn "JAVA translation issue for language $la: Mismatching single quotes:\nTranslated text: ".decode("utf8",$tr)."\nOriginal text: ".decode("utf8",$en)."\n";
[26339]271 $error = 1;
[26338]272 }
273 # Test two - check if there are {..} which should not be
274 my @fmt = ();
275 my $fmt;
276 my $fmte;
277 my $fmte1 = "";
[26344]278 my $trt = $tr; $trt =~ s/'[{}]'//g;
[33152]279 while($trt =~ /\{(.*?)\}/g) {push @fmt,$1};
280 while($trt =~ /\%([a-z]+)\%/g) {push @fmt,$1};
281 $fmt = join("_", sort @fmt); @fmt = ();
[26344]282 my $ent = $en; $ent =~ s/'[{}]'//g;
[33152]283 while($ent =~ /\{(.*?)\}/g) {push @fmt,$1};
284 while($ent =~ /\%([a-z]+)\%/g) {push @fmt,$1};
285 $fmte = join("_", sort @fmt); @fmt = ();
[26344]286 if($en1)
287 {
288 my $en1t = $en1; $en1t =~ s/'[{}]'//g;
289 while($en1t =~ /\{(.*?)\}/g) {push @fmt,$1}; $fmte1 = join("_", sort @fmt);
290 }
[26338]291 if($fmt ne $fmte && $fmt ne $fmte1)
292 {
293 if(!($fmte eq '0' && $fmt eq "" && $cnt == 1)) # Don't warn when a single value is left for first multi-translation
294 {
[30169]295 warn "JAVA translation issue for language $la ($cnt): Mismatching format entries:\nTranslated text: ".decode("utf8",$tr)."\nOriginal text: ".decode("utf8",$en)."\n";
[26338]296 $error = 1;
297 }
298 }
299
300 #$tr = "" if($error && $la ne "en");
[26586]301 return pack("n",65534) if $eq;
[26338]302
303 return pack("n",length($tr)).$tr;
304}
305
[19319]306sub createlang($@)
307{
308 my ($data, @files) = @_;
[26174]309 my $maxlen = 0;
[19319]310 foreach my $file (@files)
311 {
[26174]312 my $len = length($file);
313 $maxlen = $len if $len > $maxlen;
314 }
[31111]315 my $maxcount = keys(%{$data});
[26174]316 foreach my $file (@files)
317 {
[19319]318 my $la;
[19496]319 my $cnt = 0;
[31088]320 if($file =~ /^(?:.*\/)?$lang_pattern_file\.lang$/)
[19319]321 {
322 $la = $1;
[31088]323 $la =~ s/-/\@/;
[19319]324 }
325 else
326 {
327 die "Language for file $file unknown.";
328 }
329 die "Could not open outfile $file\n" if !open FILE,">:raw",$file;
330
331 foreach my $en (sort keys %{$data})
332 {
333 next if $data->{$en}{"en.1"};
334 my $val;
[26586]335 my $eq;
[19319]336 if($la eq "en")
337 {
[19496]338 ++$cnt;
[19319]339 $val = $en;
340 $val =~ s/^___(.*)___/_:$1\n/;
341 }
342 else
343 {
344 my $ennoctx = $en;
345 $ennoctx =~ s/^___(.*)___//;
346 $val = (exists($data->{$en}{$la})) ? $data->{$en}{$la} : "";
[19496]347 ++$cnt if $val;
[26586]348 if($ennoctx eq $val)
349 {
350 $val = ""; $eq = 1;
351 }
[19319]352 }
[26586]353 print FILE checkstring($la, $val, $en, undef, undef, $eq);
[19319]354 }
355 print FILE pack "n",0xFFFF;
356 foreach my $en (sort keys %{$data})
357 {
358 next if !$data->{$en}{"en.1"};
359 my $num;
360 for($num = 1; exists($data->{$en}{"$la.$num"}); ++$num)
361 { }
362 my $val;
[31111]363 my $eq = 0;
[19319]364 if($la eq "en")
365 {
[19496]366 ++$cnt;
[19319]367 $val = $en;
368 $val =~ s/^___(.*)___/_:$1\n/;
369 }
370 else
371 {
372 $val = (exists($data->{$en}{$la})) ? $data->{$en}{$la} : "";
373 --$num if(!$val);
[19496]374 ++$cnt if $val;
[19319]375 if($num == 2)
376 {
377 my $ennoctx = $en;
378 $ennoctx =~ s/^___(.*)___//;
[26987]379 if($val eq $ennoctx && $data->{$en}{"$la.1"} eq $data->{$en}{"en.1"})
380 {
381 $num = 0;
382 $eq = 1;
383 }
[19319]384 }
385 }
386
[26987]387 print FILE pack "C",$eq ? 0xFE : $num;
[19319]388 if($num)
389 {
[26338]390 print FILE checkstring($la, $val, $en, 1, $data->{$en}{"en.1"});
[19319]391 for($num = 1; exists($data->{$en}{"$la.$num"}); ++$num)
392 {
[26338]393 print FILE checkstring($la, $data->{$en}{"$la.$num"}, $en, $num+1, $data->{$en}{"en.1"});
[19319]394 }
395 }
396 }
397 close FILE;
[25523]398 if(!$cnt)
399 {
400 unlink $file;
[26174]401 printf "Skipped file %-${maxlen}s: Contained 0 strings out of %5d.\n",$file,$maxcount;
[25523]402 }
403 else
404 {
[31956]405 printf "Created file %-${maxlen}s: Added %5d strings out of %5d (%5.1f%%).\n",$file,$cnt,$maxcount,,$cnt*100.0/$maxcount-5e-2;
[25523]406 }
[19319]407 }
408}
409
410sub main
411{
412 my %lang;
413 my @po;
[31111]414 my $potfile;
415 my $basename = "./";
416 foreach my $arg (@ARGV)
417 {
418 next if $arg !~ /^--/;
419 if($arg =~ /^--basedir=(.+)$/)
420 {
421 $basename = $1;
422 }
423 elsif($arg =~ /^--potfile=(.+)$/)
424 {
425 $potfile = $1;
426 }
427 else
428 {
429 die "Unknown argument $arg.";
430 }
431 }
[28423]432 $basename .= "/" if !($basename =~ /[\/\\:]$/);
[19319]433 foreach my $arg (@ARGV)
434 {
[31111]435 next if $arg =~ /^--/;
[19319]436 foreach my $f (glob $arg)
437 {
438 if($f =~ /\*/) { printf "Skipping $f\n"; }
439 elsif($f =~ /\.po$/) { push(@po, $f); }
440 else { die "unknown file extension."; }
441 }
442 }
[31111]443 my %data = loadfiles(\%lang,$potfile ? loadpot($potfile) : undef, @po);
444
[19319]445 my @clang;
446 foreach my $la (sort keys %lang)
447 {
[31088]448 $la =~ s/\@/-/;
[19319]449 push(@clang, "${basename}$la.lang");
450 }
451 push(@clang, "${basename}en.lang");
452 die "There have been warning. No output.\n" if $waswarn;
453
454 createlang(\%data, @clang);
455}
Note: See TracBrowser for help on using the repository browser.