Ignore:
Timestamp:
2024-02-14T15:57:09+01:00 (4 months ago)
Author:
taylor.smock
Message:

Fix #23471: fix an inconsistency between fast ASCII sort and slower unicode-aware sort

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/src/org/openstreetmap/josm/tools/AlphanumComparator.java

    r18982 r18983  
    3636import java.util.Comparator;
    3737
    38 import org.openstreetmap.josm.gui.MainApplication;
    39 import org.openstreetmap.josm.spi.lifecycle.Lifecycle;
    40 
    4138/**
    4239 * The Alphanum Algorithm is an improved sorting algorithm for strings
     
    5249 */
    5350public final class AlphanumComparator implements Comparator<String>, Serializable {
     51    /** {@code true} to use the faster ASCII sorting algorithm. Set to {@code false} when testing compatibility. */
     52    static boolean useFastASCIISort = true;
     53    /**
     54     * The sort order for the fast ASCII sort method.
     55     */
     56    static final String ASCII_SORT_ORDER =
     57            " \r\t\n\f\u000b-_,;:!?/.`^~'\"()[]{}@$*\\&#%+<=>|0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
    5458
    5559    private static final long serialVersionUID = 1L;
     
    5862    /**
    5963     * A mapping from ASCII characters to the default {@link Collator} order.
    60      * At writing, the default rules can be found in {@link sun.util.locale.provider.CollationRules#DEFAULTRULES}.
     64     * At writing, the default rules can be found in CollationRules#DEFAULTRULES.
    6165     */
    6266    private static final byte[] ASCII_MAPPING = new byte[128];
     
    7175        // After the symbols, we have 0-9, and then aA-zZ.
    7276        // The character order
    73         final String order = " \r\t\n\f\u000b-_,;:!?/.`^~'\"()[]{}@$*\\&#%+<=>|0123456789aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ";
    74         for (int i = 0; i < order.length(); i++) {
    75             char c = order.charAt(i);
     77        for (int i = 0; i < ASCII_SORT_ORDER.length(); i++) {
     78            char c = ASCII_SORT_ORDER.charAt(i);
    7679            ASCII_MAPPING[c] = (byte) (i + 1);
    7780        }
     
    101104     */
    102105    private static int compareString(String string1, int len1, String string2, int len2) {
    103         int lim = Math.min(len1, len2);
    104         int k = 0;
    105         while (k < lim) {
    106             final int c1 = ASCII_MAPPING[string1.charAt(k)];
    107             final int c2 = ASCII_MAPPING[string2.charAt(k)];
     106        int loc1 = 0;
     107        int loc2 = 0;
     108        while (loc1 < len1 && loc2 < len2) {
     109            // Ignore control symbols
     110            while (loc1 < len1 - 1 && string1.charAt(loc1) <= 32) {
     111                loc1++;
     112            }
     113            while (loc2 < len2 - 1 && string2.charAt(loc2) <= 32) {
     114                loc2++;
     115            }
     116            if (loc1 >= len1 || loc2 >= len2) break;
     117
     118            char lower1 = Character.toLowerCase(string1.charAt(loc1));
     119            char lower2 = Character.toLowerCase(string2.charAt(loc2));
     120
     121            final int c1 = ASCII_MAPPING[lower1];
     122            final int c2 = ASCII_MAPPING[lower2];
    108123            if (c1 != c2) {
    109124                return c1 - c2;
    110125            }
    111             k++;
     126            loc1++;
     127            loc2++;
    112128        }
    113129        return len1 - len2;
     
    185201            }
    186202        } else {
    187             // Check if both chunks are ascii only
    188             // FIXME: re-enable once #23471 is fixed (the exception at startup keeps JOSM from finishing startup)
    189             if (false && isAscii(thisChunk, thisChunkLength) && isAscii(thatChunk, thatChunkLength)) {
     203            // Check if both chunks are ascii only; if so, use a much faster sorting algorithm.
     204            if (useFastASCIISort && isAscii(thisChunk, thisChunkLength) && isAscii(thatChunk, thatChunkLength)) {
    190205                return Utils.clamp(compareString(thisChunk, thisChunkLength, thatChunk, thatChunkLength), -1, 1);
    191206            }
Note: See TracChangeset for help on using the changeset viewer.