Changeset 18983 in josm


Ignore:
Timestamp:
2024-02-14T15:57:09+01:00 (3 months ago)
Author:
taylor.smock
Message:

Fix #23471: fix an inconsistency between fast ASCII sort and slower unicode-aware sort

Location:
trunk
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/org/openstreetmap/josm/tools/AlphanumComparator.java

    r18982 r18983  
    3636import java.util.Comparator;
    3737
    38 import org.openstreetmap.josm.gui.MainApplication;
    39 import org.openstreetmap.josm.spi.lifecycle.Lifecycle;
    40 
    4138/**
    4239 * The Alphanum Algorithm is an improved sorting algorithm for strings
     
    5249 */
    5350public final class AlphanumComparator implements Comparator<String>, Serializable {
     51    /** {@code true} to use the faster ASCII sorting algorithm. Set to {@code false} when testing compatibility. */
     52    static boolean useFastASCIISort = true;
     53    /**
     54     * The sort order for the fast ASCII sort method.
     55     */
     56    static final String ASCII_SORT_ORDER =
     57            " \r\t\n\f\u000b-_,;:!?/.`^~'\"()[]{}@$*\\&#%+<=>|0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
    5458
    5559    private static final long serialVersionUID = 1L;
     
    5862    /**
    5963     * A mapping from ASCII characters to the default {@link Collator} order.
    60      * At writing, the default rules can be found in {@link sun.util.locale.provider.CollationRules#DEFAULTRULES}.
     64     * At writing, the default rules can be found in CollationRules#DEFAULTRULES.
    6165     */
    6266    private static final byte[] ASCII_MAPPING = new byte[128];
     
    7175        // After the symbols, we have 0-9, and then aA-zZ.
    7276        // The character order
    73         final String order = " \r\t\n\f\u000b-_,;:!?/.`^~'\"()[]{}@$*\\&#%+<=>|0123456789aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ";
    74         for (int i = 0; i < order.length(); i++) {
    75             char c = order.charAt(i);
     77        for (int i = 0; i < ASCII_SORT_ORDER.length(); i++) {
     78            char c = ASCII_SORT_ORDER.charAt(i);
    7679            ASCII_MAPPING[c] = (byte) (i + 1);
    7780        }
     
    101104     */
    102105    private static int compareString(String string1, int len1, String string2, int len2) {
    103         int lim = Math.min(len1, len2);
    104         int k = 0;
    105         while (k < lim) {
    106             final int c1 = ASCII_MAPPING[string1.charAt(k)];
    107             final int c2 = ASCII_MAPPING[string2.charAt(k)];
     106        int loc1 = 0;
     107        int loc2 = 0;
     108        while (loc1 < len1 && loc2 < len2) {
     109            // Ignore control symbols
     110            while (loc1 < len1 - 1 && string1.charAt(loc1) <= 32) {
     111                loc1++;
     112            }
     113            while (loc2 < len2 - 1 && string2.charAt(loc2) <= 32) {
     114                loc2++;
     115            }
     116            if (loc1 >= len1 || loc2 >= len2) break;
     117
     118            char lower1 = Character.toLowerCase(string1.charAt(loc1));
     119            char lower2 = Character.toLowerCase(string2.charAt(loc2));
     120
     121            final int c1 = ASCII_MAPPING[lower1];
     122            final int c2 = ASCII_MAPPING[lower2];
    108123            if (c1 != c2) {
    109124                return c1 - c2;
    110125            }
    111             k++;
     126            loc1++;
     127            loc2++;
    112128        }
    113129        return len1 - len2;
     
    185201            }
    186202        } else {
    187             // Check if both chunks are ascii only
    188             // FIXME: re-enable once #23471 is fixed (the exception at startup keeps JOSM from finishing startup)
    189             if (false && isAscii(thisChunk, thisChunkLength) && isAscii(thatChunk, thatChunkLength)) {
     203            // Check if both chunks are ascii only; if so, use a much faster sorting algorithm.
     204            if (useFastASCIISort && isAscii(thisChunk, thisChunkLength) && isAscii(thatChunk, thatChunkLength)) {
    190205                return Utils.clamp(compareString(thisChunk, thisChunkLength, thatChunk, thatChunkLength), -1, 1);
    191206            }
  • trunk/test/unit/org/openstreetmap/josm/tools/AlphanumComparatorTest.java

    r18690 r18983  
    22package org.openstreetmap.josm.tools;
    33
     4import static org.junit.jupiter.api.Assertions.assertAll;
    45import static org.junit.jupiter.api.Assertions.assertEquals;
    56
     7import java.text.Collator;
     8import java.util.ArrayList;
    69import java.util.Arrays;
    710import java.util.List;
     11import java.util.function.BiFunction;
     12import java.util.stream.Stream;
    813
     14import org.junit.jupiter.api.AfterEach;
    915import org.junit.jupiter.api.Test;
    1016
     
    1319 */
    1420class AlphanumComparatorTest {
     21    @AfterEach
     22    void teardown() {
     23        AlphanumComparator.useFastASCIISort = true;
     24    }
    1525
    1626    /**
     
    3343        assertEquals(Arrays.asList("a5", "a100", "a00999", "b1", "b20"), lst);
    3444    }
     45
     46    private static Stream<String[]> testNonRegression23471Arguments() {
     47        List<String> testStrings = Arrays.asList(
     48                "AMEN",
     49                "Ameriabank",
     50                "America First Credit Union",
     51                "BAC Credomatic",
     52                "BADR Banque",
     53                "BAI",
     54                "Banca Popolare di Cividale",
     55                "Banca Popolare di Sondrio",
     56                "Banca Sella",
     57                "Banca Transilvania",
     58                "Bancaribe",
     59                "BancaStato",
     60                "Banco Agrario",
     61                "Banco AV Villas",
     62                "Banco Azteca",
     63                "Banco Bicentenario",
     64                "Banco BISA",
     65                "Banco BMG",
     66                "Banco BPI (Portugal)",
     67                "Banco BPM",
     68                "Banco Caja Social",
     69                "Banco Ciudad",
     70                "Banco Continental (Paraguay)",
     71                "Banco di Sardegna"
     72        );
     73        List<String> testChars = new ArrayList<>(AlphanumComparator.ASCII_SORT_ORDER.length());
     74        for (char c : AlphanumComparator.ASCII_SORT_ORDER.toCharArray()) {
     75            testChars.add(Character.toString(c));
     76        }
     77        BiFunction<List<String>, String, List<String>> subList = (list, string) -> list.subList(list.indexOf(string), list.size());
     78        return Stream.concat(
     79                testStrings.stream().flatMap(first -> subList.apply(testStrings, first).stream().map(second -> new String[]{first, second})),
     80                testChars.stream().flatMap(first -> subList.apply(testChars, first).stream().map(second -> new String[]{first, second}))
     81        );
     82    }
     83
     84    /**
     85     * Non-regression test for #23471
     86     * This ensures that the comparison contract holds.
     87     * There are ~5300 combinations run in <1s (as of 2024-02-14).
     88     */
     89    @Test
     90    void testNonRegression23471() {
     91        assertAll(testNonRegression23471Arguments().map(strings -> () -> testNonRegression23471(strings[0], strings[1])));
     92    }
     93
     94    private static void testNonRegression23471(String first, String second) {
     95        AlphanumComparator.useFastASCIISort = true;
     96        final AlphanumComparator instance = AlphanumComparator.getInstance();
     97        assertEquals(-instance.compare(first, second), instance.compare(second, first));
     98        // Ensure that the fast sort is equivalent to the slow sort
     99        AlphanumComparator.useFastASCIISort = false;
     100        final int slowFirstSecond = instance.compare(first, second);
     101        final int slowSecondFirst = instance.compare(second, first);
     102        AlphanumComparator.useFastASCIISort = true;
     103        final int fastFirstSecond = instance.compare(first, second);
     104        final int fastSecondFirst = instance.compare(second, first);
     105        assertEquals(slowFirstSecond, fastFirstSecond);
     106        assertEquals(slowSecondFirst, fastSecondFirst);
     107
     108        final Collator collator = Collator.getInstance();
     109        collator.setStrength(Collator.SECONDARY);
     110        // Check against the collator instance
     111        assertEquals(Utils.clamp(collator.compare(first, second), -1, 1),
     112                Utils.clamp(instance.compare(first, second), -1, 1));
     113        assertEquals(Utils.clamp(collator.compare(second, first), -1, 1),
     114                Utils.clamp(instance.compare(second, first), -1, 1));
     115    }
     116
    35117}
Note: See TracChangeset for help on using the changeset viewer.