Changeset 15613 in josm


Ignore:
Timestamp:
2019-12-25T19:04:23+01:00 (5 years ago)
Author:
Don-vip
Message:

fix #18449 - exclude lowercase schwa character (ə) from "unusual unicode character" check, as it is used in azerbaidjani alphabet

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/org/openstreetmap/josm/data/validation/tests/TagChecker.java

    r15583 r15613  
    425425
    426426    static boolean containsUnusualUnicodeCharacter(String key, String value) {
    427         return value != null && value.chars().anyMatch(c -> isUnusualUnicodeBlock(key, UnicodeBlock.of(c)));
     427        return value != null && value.chars().anyMatch(c -> isUnusualUnicodeBlock(key, c));
    428428    }
    429429
     
    431431     * Detects highly suspicious Unicode characters that have been seen in OSM database.
    432432     * @param key tag key
    433      * @param b Unicode block of the current character
     433     * @param c current character code point
    434434     * @return {@code true} if the current unicode block is very unusual for the given key
    435435     */
    436     private static boolean isUnusualUnicodeBlock(String key, UnicodeBlock b) {
    437         return isUnusualPhoneticUse(key, b) || isUnusualBmpUse(b) || isUnusualSmpUse(b);
    438     }
    439 
    440     private static boolean isUnusualPhoneticUse(String key, UnicodeBlock b) {
    441         return (b == UnicodeBlock.IPA_EXTENSIONS                        // U+0250..U+02AF
     436    private static boolean isUnusualUnicodeBlock(String key, int c) {
     437        UnicodeBlock b = UnicodeBlock.of(c);
     438        return isUnusualPhoneticUse(key, b, c) || isUnusualBmpUse(b) || isUnusualSmpUse(b);
     439    }
     440
     441    private static boolean isUnusualPhoneticUse(String key, UnicodeBlock b, int c) {
     442        return c != 0x0259                                              // U+0259 is used as a standard character in azerbaidjani
     443            && (b == UnicodeBlock.IPA_EXTENSIONS                        // U+0250..U+02AF
    442444             || b == UnicodeBlock.PHONETIC_EXTENSIONS                   // U+1D00..U+1D7F
    443445             || b == UnicodeBlock.PHONETIC_EXTENSIONS_SUPPLEMENT)       // U+1D80..U+1DBF
  • trunk/test/unit/org/openstreetmap/josm/data/validation/tests/TagCheckerTest.java

    r15042 r15613  
    315315        assertFalse(TagChecker.containsUnusualUnicodeCharacter("name", "Tallinn — Narva"));
    316316    }
     317
     318    /**
     319     * Non-regression test for <a href="https://josm.openstreetmap.de/ticket/18449">Bug #18449</a>.
     320     */
     321    @Test
     322    public void testTicket18449() {
     323        assertFalse(TagChecker.containsUnusualUnicodeCharacter("name", "Hökumət Evi"));
     324    }
    317325}
Note: See TracChangeset for help on using the changeset viewer.