- Timestamp:
- 2014-12-20T02:55:42+01:00 (10 years ago)
- Location:
- trunk/src/org/openstreetmap/josm/data/validation
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/src/org/openstreetmap/josm/data/validation/TestError.java
r7005 r7848 327 327 @Override 328 328 public String toString() { 329 return "TestError [tester=" + tester + ", code=" + code + " ]";329 return "TestError [tester=" + tester + ", code=" + code + ", message=" + message + "]"; 330 330 } 331 331 } -
trunk/src/org/openstreetmap/josm/data/validation/tests/SimilarNamedWays.java
r7005 r7848 2 2 package org.openstreetmap.josm.data.validation.tests; 3 3 4 import static java.util.regex.Pattern.CASE_INSENSITIVE; 5 import static java.util.regex.Pattern.UNICODE_CASE; 4 6 import static org.openstreetmap.josm.tools.I18n.tr; 5 7 6 8 import java.awt.geom.Point2D; 7 9 import java.util.ArrayList; 10 import java.util.Arrays; 8 11 import java.util.HashMap; 9 12 import java.util.List; 10 13 import java.util.Map; 14 import java.util.regex.Matcher; 15 import java.util.regex.Pattern; 11 16 12 17 import org.openstreetmap.josm.data.osm.OsmPrimitive; … … 35 40 private MultiMap<Way, Way> errorWays; 36 41 42 private ArrayList<NormalizeRule> rules = new ArrayList<NormalizeRule>(); 43 37 44 /** 38 45 * Constructor … … 41 48 super(tr("Similarly named ways"), 42 49 tr("This test checks for ways with similar names that may have been misspelled.")); 50 51 // FIXME: hardcode these rules for now. Replace them with preferences later 52 // See https://josm.openstreetmap.de/ticket/3733#comment:19 53 addRegExprRule("\\d+", "0"); // Highway 66 54 addRegExprRule("\\d+(st|nd|rd|th)", "0st"); // 3rd Ave 55 addRegExprRule("^[A-Z] ", "X"); // E Street 56 addSynonyms("east", "west", "north", "south"); 57 addSynonyms("first", "second", "third"); 43 58 } 44 59 … … 78 93 } 79 94 80 int levenshteinDistance = getLevenshteinDistance(name, name2); 81 if (0 < levenshteinDistance && levenshteinDistance <= 2) { 95 if (similaryName(name, name2)) { 82 96 List<OsmPrimitive> primitives = new ArrayList<>(2); 83 97 primitives.add(w); … … 98 112 * @return The distance between words 99 113 */ 100 public int getLevenshteinDistance(String s, String t) {114 public static int getLevenshteinDistance(String s, String t) { 101 115 int[][] d; // matrix 102 116 int n; // length of s … … 150 164 return d[n][m]; 151 165 } 166 167 /** 168 * Add a regular expression rule. 169 * @param regExpr the regular expression to search for 170 * @param replacement a string to replace with, which should match the expression. 171 */ 172 public void addRegExprRule(String regExpr, String replacement) { 173 rules.add(new RegExprRule(regExpr, replacement)); 174 } 175 176 /** 177 * Add a rule with synonym words. 178 * @param words words which are synonyms 179 */ 180 public void addSynonyms(String... words) { 181 for (String word : words) { 182 rules.add(new SynonymRule(word, words)); 183 } 184 } 185 186 /** 187 * Check if two names are similar, but not identical. First both names will be "normalized". 188 * Afterwards the Levenshtein distance will be calculated.<br> 189 * Examples for normalization rules:<br> 190 * <code>replaceAll("\\d+", "0")</code><br> 191 * would cause similaryName("track 1", "track 2") = false, but similaryName("Track 1", "track 2") = true 192 * @param name first name to compare 193 * @param name2 second name to compare 194 * @return true if the normalized names are different but only a "little bit" 195 */ 196 public boolean similaryName(String name, String name2) { 197 // check plain strings 198 int distance = getLevenshteinDistance(name, name2); 199 boolean similar = distance>0 && distance<=2; 200 201 // try all rules 202 for (NormalizeRule rule : rules) { 203 int levenshteinDistance = getLevenshteinDistance(rule.normalize(name), rule.normalize(name2)); 204 if (levenshteinDistance == 0) 205 // one rule results in identical names: identical 206 return false; 207 else if (levenshteinDistance <= 2) { 208 // 0 < distance <= 2 209 similar = true; 210 } 211 } 212 return similar; 213 } 214 215 public interface NormalizeRule { 216 217 /** 218 * Normalize the string by replacing parts. 219 * @param name name to normalize 220 * @return normalized string 221 */ 222 String normalize(String name); 223 } 224 225 public class RegExprRule implements NormalizeRule { 226 private final Pattern regExpr; 227 private final String replacement; 228 229 public RegExprRule(String expression, String replacement) { 230 this.regExpr = Pattern.compile(expression); 231 this.replacement = replacement; 232 } 233 234 @Override 235 public String normalize(String name) { 236 return regExpr.matcher(name).replaceAll(replacement); 237 } 238 239 @Override 240 public String toString() { 241 return "replaceAll(" + regExpr + ", " + replacement + ")"; 242 } 243 } 244 245 public class SynonymRule implements NormalizeRule { 246 247 private final String[] words; 248 private final Pattern regExpr; 249 private final String replacement; 250 251 public SynonymRule(String replacement, String[] words) { 252 this.replacement = replacement.toLowerCase(); 253 this.words = words; 254 255 // build regular expression for other words (for fast match) 256 StringBuilder expression = new StringBuilder(); 257 int maxLength = 0; 258 for (int i = 0; i < words.length; i++) { 259 if (words[i].length() > maxLength) { 260 maxLength = words[i].length(); 261 } 262 if (expression.length() > 0) { 263 expression.append("|"); 264 } 265 expression.append(Pattern.quote(words[i])); 266 } 267 this.regExpr = Pattern.compile(expression.toString(), CASE_INSENSITIVE + UNICODE_CASE); 268 } 269 270 @Override 271 public String normalize(String name) { 272 // find first match 273 Matcher matcher = regExpr.matcher(name); 274 if (!matcher.find()) 275 return name; 276 277 int start = matcher.start(); 278 279 // which word matches? 280 String part = ""; 281 for (int i = 0; i < words.length; i++) { 282 String word = words[i]; 283 part = name.substring(start, start + word.length()); 284 if (word.equalsIgnoreCase(part)) { 285 break; 286 } 287 } 288 289 // replace the word 290 char[] newName = matcher.replaceFirst(replacement).toCharArray(); 291 292 // adjust case (replacement is not shorter than matching word!) 293 int minLength = Math.min(replacement.length(), part.length()); 294 for (int i = 0; i < minLength; i++) { 295 if (Character.isUpperCase(part.charAt(i))) { 296 newName[start + i] = Character.toUpperCase(newName[start + i]); 297 } 298 } 299 300 return new String(newName); 301 } 302 303 @Override 304 public String toString() { 305 return "synonyms(" + replacement + ", " + Arrays.toString(words) + ")"; 306 } 307 } 152 308 }
Note:
See TracChangeset
for help on using the changeset viewer.