Changeset 14991 in josm
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/org/openstreetmap/josm/data/validation/tests/TagChecker.java
r14952 r14991 75 75 private static volatile MultiMap<String, String> oftenUsedTags = new MultiMap<>(); 76 76 77 private static final Pattern NON_PRINTING_CONTROL_CHARACTERS = Pattern.compile(78 "[\\x00-\\x09\\x0B\\x0C\\x0E-\\x1F\\x7F\\u200 c-\\u200f\\u202a-\\u202e]");77 private static final Pattern UNWANTED_NON_PRINTING_CONTROL_CHARACTERS = Pattern.compile( 78 "[\\x00-\\x09\\x0B\\x0C\\x0E-\\x1F\\x7F\\u200e-\\u200f\\u202a-\\u202e]"); 79 79 80 80 /** The TagChecker data */ … … 378 378 379 379 /** 380 * Checks given string (key or value) if it contains non-printing control characters (either ASCII or Unicode bidi characters)380 * Checks given string (key or value) if it contains unwanted non-printing control characters (either ASCII or Unicode bidi characters) 381 381 * @param s string to check 382 382 * @return {@code true} if {@code s} contains non-printing control characters 383 383 */ 384 private static boolean containsNonPrintingControlCharacter(String s) { 385 return s != null && s.chars().anyMatch(c -> (isAsciiControlChar(c) && !isNewLineChar(c)) || isBidiControlChar(c)); 384 static boolean containsUnwantedNonPrintingControlCharacter(String s) { 385 return s != null && !s.isEmpty() && ( 386 isJoiningChar(s.charAt(0)) || 387 isJoiningChar(s.charAt(s.length() - 1)) || 388 s.chars().anyMatch(c -> (isAsciiControlChar(c) && !isNewLineChar(c)) || isBidiControlChar(c)) 389 ); 386 390 } 387 391 … … 394 398 } 395 399 400 private static boolean isJoiningChar(int c) { 401 return c == 0x200c || c == 0x200d; // ZWNJ, ZWJ 402 } 403 396 404 private static boolean isBidiControlChar(int c) { 397 /* check for range 0x200 c to 0x200f (ZWNJ, ZWJ,LRM, RLM) or405 /* check for range 0x200e to 0x200f (LRM, RLM) or 398 406 0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */ 399 return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e))); 400 } 401 402 static String removeNonPrintingControlCharacters(String s) { 403 return NON_PRINTING_CONTROL_CHARACTERS.matcher(s).replaceAll(""); 407 return (c >= 0x200e && c <= 0x200f) || (c >= 0x202a && c <= 0x202e); 408 } 409 410 static String removeUnwantedNonPrintingControlCharacters(String s) { 411 // Remove all unwanted characters 412 String result = UNWANTED_NON_PRINTING_CONTROL_CHARACTERS.matcher(s).replaceAll(""); 413 // Remove joining characters located at the beginning of the string 414 while (!result.isEmpty() && isJoiningChar(result.charAt(0))) { 415 result = result.substring(1); 416 } 417 // Remove joining characters located at the end of the string 418 while (!result.isEmpty() && isJoiningChar(result.charAt(result.length() - 1))) { 419 result = result.substring(0, result.length() - 1); 420 } 421 return result; 404 422 } 405 423 … … 583 601 if (!checkValues || value == null) 584 602 return; 585 if ((contains NonPrintingControlCharacter(value)) && !withErrors.contains(p, "ICV")) {603 if ((containsUnwantedNonPrintingControlCharacter(value)) && !withErrors.contains(p, "ICV")) { 586 604 errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_VALUE) 587 605 .message(tr("Tag value contains non-printing character"), s, key) 588 606 .primitives(p) 589 .fix(() -> new ChangePropertyCommand(p, key, remove NonPrintingControlCharacters(value)))607 .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(value))) 590 608 .build()); 591 609 withErrors.put(p, "ICV"); … … 639 657 if (!checkKeys || key == null) 640 658 return; 641 if ((contains NonPrintingControlCharacter(key)) && !withErrors.contains(p, "ICK")) {659 if ((containsUnwantedNonPrintingControlCharacter(key)) && !withErrors.contains(p, "ICK")) { 642 660 errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_KEY) 643 661 .message(tr("Tag key contains non-printing character"), s, key) 644 662 .primitives(p) 645 .fix(() -> new ChangePropertyCommand(p, key, remove NonPrintingControlCharacters(key)))663 .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(key))) 646 664 .build()); 647 665 withErrors.put(p, "ICK"); -
trunk/test/unit/org/openstreetmap/josm/data/validation/tests/TagCheckerTest.java
r14933 r14991 9 9 import java.util.ArrayList; 10 10 import java.util.List; 11 11 import java.util.function.Consumer; 12 13 import org.junit.Assert; 12 14 import org.junit.Rule; 13 15 import org.junit.Test; … … 248 250 } 249 251 250 /** 251 * Unit test of {@link TagChecker#removeNonPrintingControlCharacters} 252 */ 253 @Test 254 public void testRemoveUnprintableControlCharacters() { 252 private static void doTestUnwantedNonprintingControlCharacters(String s, Consumer<Boolean> assertionC, String expected) { 253 assertionC.accept(TagChecker.containsUnwantedNonPrintingControlCharacter(s)); 254 assertEquals(expected, TagChecker.removeUnwantedNonPrintingControlCharacters(s)); 255 } 256 257 private static void doTestUnwantedNonprintingControlCharacters(String s) { 258 doTestUnwantedNonprintingControlCharacters(s, Assert::assertTrue, ""); 259 } 260 261 /** 262 * Unit test of {@link TagChecker#containsUnwantedNonPrintingControlCharacter} 263 * / {@link TagChecker#removeUnwantedNonPrintingControlCharacters} 264 */ 265 @Test 266 public void testContainsRemoveUnwantedNonprintingControlCharacters() { 267 // Check empty string is handled 268 doTestUnwantedNonprintingControlCharacters("", Assert::assertFalse, ""); 255 269 // Check 65 ASCII control characters are removed, except new lines 256 270 for (char c = 0x0; c < 0x20; c++) { 257 271 if (c != '\r' && c != '\n') { 258 assertTrue(TagChecker.removeNonPrintingControlCharacters(Character.toString(c)).isEmpty());272 doTestUnwantedNonprintingControlCharacters(Character.toString(c)); 259 273 } else { 260 assertFalse(TagChecker.removeNonPrintingControlCharacters(Character.toString(c)).isEmpty());274 doTestUnwantedNonprintingControlCharacters(Character.toString(c), Assert::assertFalse, Character.toString(c)); 261 275 } 262 276 } 263 assertTrue(TagChecker.removeNonPrintingControlCharacters(Character.toString((char) 0x7F)).isEmpty());264 // Check 9Unicode bidi control characters are removed265 for (char c = 0x200 c; c <= 0x200f; c++) {266 assertTrue(TagChecker.removeNonPrintingControlCharacters(Character.toString(c)).isEmpty());277 doTestUnwantedNonprintingControlCharacters(Character.toString((char) 0x7F)); 278 // Check 7 Unicode bidi control characters are removed 279 for (char c = 0x200e; c <= 0x200f; c++) { 280 doTestUnwantedNonprintingControlCharacters(Character.toString(c)); 267 281 } 268 282 for (char c = 0x202a; c <= 0x202e; c++) { 269 assertTrue(TagChecker.removeNonPrintingControlCharacters(Character.toString(c)).isEmpty()); 283 doTestUnwantedNonprintingControlCharacters(Character.toString(c)); 284 } 285 // Check joining characters are removed if located at the beginning or end of the string 286 for (char c = 0x200c; c <= 0x200d; c++) { 287 final String s = Character.toString(c); 288 doTestUnwantedNonprintingControlCharacters(s); 289 doTestUnwantedNonprintingControlCharacters(s + s); 290 doTestUnwantedNonprintingControlCharacters(s + 'a' + s, Assert::assertTrue, "a"); 291 final String ok = 'a' + s + 'b'; 292 doTestUnwantedNonprintingControlCharacters(ok, Assert::assertFalse, ok); 293 doTestUnwantedNonprintingControlCharacters(s + ok, Assert::assertTrue, ok); 294 doTestUnwantedNonprintingControlCharacters(ok + s, Assert::assertTrue, ok); 295 doTestUnwantedNonprintingControlCharacters(s + ok + s, Assert::assertTrue, ok); 270 296 } 271 297 }
Note:
See TracChangeset
for help on using the changeset viewer.