source: josm/trunk/src/org/openstreetmap/josm/tools/AlphanumComparator.java@ 18982

Last change on this file since 18982 was 18982, checked in by taylor.smock, 4 months ago

See #23471: Temporarily disable fast sorting

  • Property svn:eol-style set to native
File size: 9.0 KB
Line 
1// License: GPL. For details, see LICENSE file.
2package org.openstreetmap.josm.tools;
3
4/*
5 * The Alphanum Algorithm is an improved sorting algorithm for strings
6 * containing numbers. Instead of sorting numbers in ASCII order like a standard
7 * sort, this algorithm sorts numbers in numeric order.
8 *
9 * The Alphanum Algorithm is discussed at http://www.DaveKoelle.com
10 *
11 * Released under the MIT License - https://opensource.org/licenses/MIT
12 *
13 * Copyright 2007-2017 David Koelle
14 *
15 * Permission is hereby granted, free of charge, to any person obtaining
16 * a copy of this software and associated documentation files (the "Software"),
17 * to deal in the Software without restriction, including without limitation
18 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
19 * and/or sell copies of the Software, and to permit persons to whom the
20 * Software is furnished to do so, subject to the following conditions:
21 *
22 * The above copyright notice and this permission notice shall be included
23 * in all copies or substantial portions of the Software.
24 *
25 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
26 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28 * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
29 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
30 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
31 * USE OR OTHER DEALINGS IN THE SOFTWARE.
32 */
33import java.io.Serializable;
34import java.text.Collator;
35import java.util.Arrays;
36import java.util.Comparator;
37
38import org.openstreetmap.josm.gui.MainApplication;
39import org.openstreetmap.josm.spi.lifecycle.Lifecycle;
40
41/**
42 * The Alphanum Algorithm is an improved sorting algorithm for strings
43 * containing numbers: Instead of sorting numbers in ASCII order like a standard
44 * sort, this algorithm sorts numbers in numeric order.
45 * <p>
46 * The Alphanum Algorithm is discussed at
47 * <a href="https://web.archive.org/web/20210602024123/http://www.davekoelle.com/alphanum.html">DaveKoelle.com</a>
48 * <p>
49 * This is an updated version with enhancements made by Daniel Migowski, Andre
50 * Bogus, David Koelle and others.
51 *
52 */
53public final class AlphanumComparator implements Comparator<String>, Serializable {
54
55 private static final long serialVersionUID = 1L;
56
57 private static final AlphanumComparator INSTANCE = new AlphanumComparator();
58 /**
59 * A mapping from ASCII characters to the default {@link Collator} order.
60 * At writing, the default rules can be found in {@link sun.util.locale.provider.CollationRules#DEFAULTRULES}.
61 */
62 private static final byte[] ASCII_MAPPING = new byte[128];
63 static {
64 for (int i = 0; i < ASCII_MAPPING.length; i++) {
65 ASCII_MAPPING[i] = (byte) i; // This is kind of pointless, but it is the default ASCII ordering.
66 }
67 // The control characters are "ignored"
68 Arrays.fill(ASCII_MAPPING, 0, 32, (byte) 0);
69 ASCII_MAPPING[127] = 0; // DEL is ignored.
70 // We have 37 order overrides for symbols; ASCII tables has control characters through 31. 32-47 are symbols.
71 // After the symbols, we have 0-9, and then aA-zZ.
72 // The character order
73 final String order = " \r\t\n\f\u000b-_,;:!?/.`^~'\"()[]{}@$*\\&#%+<=>|0123456789aAbBcCdDeEfFgGhHiIjJkKlLmMnNoOpPqQrRsStTuUvVwWxXyYzZ";
74 for (int i = 0; i < order.length(); i++) {
75 char c = order.charAt(i);
76 ASCII_MAPPING[c] = (byte) (i + 1);
77 }
78 }
79
80 /**
81 * Replies the unique instance.
82 * @return the unique instance
83 */
84 public static AlphanumComparator getInstance() {
85 return INSTANCE;
86 }
87
88 /**
89 * Constructs a new Alphanum Comparator.
90 */
91 private AlphanumComparator() {
92 }
93
94 /**
95 * Compare two ASCII strings in a manner compatible with the default {@link Collator}
96 * @param string1 The first string to compare
97 * @param len1 The length of the first string
98 * @param string2 The second string to compare
99 * @param len2 The length of the second string
100 * @return See {@link String#compareToIgnoreCase(String)} (e.g. {@code string1.compareToIgnoreCase(string2)}).
101 */
102 private static int compareString(String string1, int len1, String string2, int len2) {
103 int lim = Math.min(len1, len2);
104 int k = 0;
105 while (k < lim) {
106 final int c1 = ASCII_MAPPING[string1.charAt(k)];
107 final int c2 = ASCII_MAPPING[string2.charAt(k)];
108 if (c1 != c2) {
109 return c1 - c2;
110 }
111 k++;
112 }
113 return len1 - len2;
114 }
115
116 /**
117 * Returns an alphanum chunk.
118 * Length of string is passed in for improved efficiency (only need to calculate it once).
119 * @param s string
120 * @param slength string length
121 * @param marker position
122 * @return alphanum chunk found at given position
123 */
124 private static String getChunk(String s, int slength, int marker) {
125 final int startMarker = marker;
126 char c = s.charAt(marker);
127 marker++;
128 if (Character.isDigit(c)) {
129 while (marker < slength) {
130 c = s.charAt(marker);
131 if (!Character.isDigit(c)) {
132 break;
133 }
134 marker++;
135 }
136 } else {
137 while (marker < slength) {
138 c = s.charAt(marker);
139 if (Character.isDigit(c)) {
140 break;
141 }
142 marker++;
143 }
144 }
145 return s.substring(startMarker, marker);
146 }
147
148 /**
149 * Check if a string is ASCII only
150 * @param string The string to check
151 * @param stringLength The length of the string (for performance reasons)
152 * @return {@code true} if the string only contains ascii characters
153 */
154 private static boolean isAscii(String string, int stringLength) {
155 for (int i = 0; i < stringLength; i++) {
156 char c = string.charAt(i);
157 if (c > ASCII_MAPPING.length) {
158 return false;
159 }
160 }
161 return true;
162 }
163
164 /**
165 * Compare two string chunks
166 * @param thisChunk The first chunk to compare
167 * @param thisChunkLength The length of the first chunk (for performance reasons)
168 * @param thatChunk The second chunk to compare
169 * @param thatChunkLength The length of the second chunk (for performance reasons)
170 * @return The {@link Comparator} result
171 */
172 private static int compareChunk(String thisChunk, int thisChunkLength, String thatChunk, int thatChunkLength) {
173 int result;
174 if (Character.isDigit(thisChunk.charAt(0)) && Character.isDigit(thatChunk.charAt(0))) {
175 // Simple chunk comparison by length.
176 result = thisChunkLength - thatChunkLength;
177 // If equal, the first different number counts
178 if (result == 0) {
179 for (int i = 0; i < thisChunkLength; i++) {
180 result = thisChunk.charAt(i) - thatChunk.charAt(i);
181 if (result != 0) {
182 return result;
183 }
184 }
185 }
186 } else {
187 // Check if both chunks are ascii only
188 // FIXME: re-enable once #23471 is fixed (the exception at startup keeps JOSM from finishing startup)
189 if (false && isAscii(thisChunk, thisChunkLength) && isAscii(thatChunk, thatChunkLength)) {
190 return Utils.clamp(compareString(thisChunk, thisChunkLength, thatChunk, thatChunkLength), -1, 1);
191 }
192 // Instantiate the collator
193 Collator compareOperator = Collator.getInstance();
194 // Compare regardless of accented letters
195 compareOperator.setStrength(Collator.SECONDARY);
196 result = compareOperator.compare(thisChunk, thatChunk);
197 }
198 return result;
199 }
200
201 @Override
202 public int compare(String s1, String s2) {
203 if (s1 == null && s2 == null) {
204 return 0;
205 } else if (s1 == null) {
206 return -1;
207 } else if (s2 == null) {
208 return 1;
209 }
210
211 int thisMarker = 0;
212 int thatMarker = 0;
213 int s1Length = s1.length();
214 int s2Length = s2.length();
215
216 while (thisMarker < s1Length && thatMarker < s2Length) {
217 final String thisChunk = getChunk(s1, s1Length, thisMarker);
218 final int thisChunkLength = thisChunk.length();
219 thisMarker += thisChunkLength;
220
221 String thatChunk = getChunk(s2, s2Length, thatMarker);
222 final int thatChunkLength = thatChunk.length();
223 thatMarker += thatChunkLength;
224
225 // If both chunks contain numeric characters, sort them numerically
226 int result = compareChunk(thisChunk, thisChunkLength, thatChunk, thatChunkLength);
227
228 if (result != 0) {
229 return result;
230 }
231 }
232
233 return s1Length - s2Length;
234 }
235}
Note: See TracBrowser for help on using the repository browser.