Changeset 14991 in josm for trunk/src/org/openstreetmap


Ignore:
Timestamp:
2019-04-14T22:17:22+02:00 (6 years ago)
Author:
Don-vip
Message:

fix #17595 - smarter detection of ZWNJ/ZWJ unicode characters

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/org/openstreetmap/josm/data/validation/tests/TagChecker.java

    r14952 r14991  
    7575    private static volatile MultiMap<String, String> oftenUsedTags = new MultiMap<>();
    7676
    77     private static final Pattern NON_PRINTING_CONTROL_CHARACTERS = Pattern.compile(
    78             "[\\x00-\\x09\\x0B\\x0C\\x0E-\\x1F\\x7F\\u200c-\\u200f\\u202a-\\u202e]");
     77    private static final Pattern UNWANTED_NON_PRINTING_CONTROL_CHARACTERS = Pattern.compile(
     78            "[\\x00-\\x09\\x0B\\x0C\\x0E-\\x1F\\x7F\\u200e-\\u200f\\u202a-\\u202e]");
    7979
    8080    /** The TagChecker data */
     
    378378
    379379    /**
    380      * Checks given string (key or value) if it contains non-printing control characters (either ASCII or Unicode bidi characters)
     380     * Checks given string (key or value) if it contains unwanted non-printing control characters (either ASCII or Unicode bidi characters)
    381381     * @param s string to check
    382382     * @return {@code true} if {@code s} contains non-printing control characters
    383383     */
    384     private static boolean containsNonPrintingControlCharacter(String s) {
    385         return s != null && s.chars().anyMatch(c -> (isAsciiControlChar(c) && !isNewLineChar(c)) || isBidiControlChar(c));
     384    static boolean containsUnwantedNonPrintingControlCharacter(String s) {
     385        return s != null && !s.isEmpty() && (
     386                isJoiningChar(s.charAt(0)) ||
     387                isJoiningChar(s.charAt(s.length() - 1)) ||
     388                s.chars().anyMatch(c -> (isAsciiControlChar(c) && !isNewLineChar(c)) || isBidiControlChar(c))
     389                );
    386390    }
    387391
     
    394398    }
    395399
     400    private static boolean isJoiningChar(int c) {
     401        return c == 0x200c || c == 0x200d; // ZWNJ, ZWJ
     402    }
     403
    396404    private static boolean isBidiControlChar(int c) {
    397         /* check for range 0x200c to 0x200f (ZWNJ, ZWJ, LRM, RLM) or
     405        /* check for range 0x200e to 0x200f (LRM, RLM) or
    398406                           0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */
    399         return (((c & 0xfffffffc) == 0x200c) || ((c >= 0x202a) && (c <= 0x202e)));
    400     }
    401 
    402     static String removeNonPrintingControlCharacters(String s) {
    403         return NON_PRINTING_CONTROL_CHARACTERS.matcher(s).replaceAll("");
     407        return (c >= 0x200e && c <= 0x200f) || (c >= 0x202a && c <= 0x202e);
     408    }
     409
     410    static String removeUnwantedNonPrintingControlCharacters(String s) {
     411        // Remove all unwanted characters
     412        String result = UNWANTED_NON_PRINTING_CONTROL_CHARACTERS.matcher(s).replaceAll("");
     413        // Remove joining characters located at the beginning of the string
     414        while (!result.isEmpty() && isJoiningChar(result.charAt(0))) {
     415            result = result.substring(1);
     416        }
     417        // Remove joining characters located at the end of the string
     418        while (!result.isEmpty() && isJoiningChar(result.charAt(result.length() - 1))) {
     419            result = result.substring(0, result.length() - 1);
     420        }
     421        return result;
    404422    }
    405423
     
    583601        if (!checkValues || value == null)
    584602            return;
    585         if ((containsNonPrintingControlCharacter(value)) && !withErrors.contains(p, "ICV")) {
     603        if ((containsUnwantedNonPrintingControlCharacter(value)) && !withErrors.contains(p, "ICV")) {
    586604            errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_VALUE)
    587605                    .message(tr("Tag value contains non-printing character"), s, key)
    588606                    .primitives(p)
    589                     .fix(() -> new ChangePropertyCommand(p, key, removeNonPrintingControlCharacters(value)))
     607                    .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(value)))
    590608                    .build());
    591609            withErrors.put(p, "ICV");
     
    639657        if (!checkKeys || key == null)
    640658            return;
    641         if ((containsNonPrintingControlCharacter(key)) && !withErrors.contains(p, "ICK")) {
     659        if ((containsUnwantedNonPrintingControlCharacter(key)) && !withErrors.contains(p, "ICK")) {
    642660            errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_KEY)
    643661                    .message(tr("Tag key contains non-printing character"), s, key)
    644662                    .primitives(p)
    645                     .fix(() -> new ChangePropertyCommand(p, key, removeNonPrintingControlCharacters(key)))
     663                    .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(key)))
    646664                    .build());
    647665            withErrors.put(p, "ICK");
Note: See TracChangeset for help on using the changeset viewer.