Changeset 14696 in josm
Legend:
- Unmodified
- Added
- Removed
-
trunk/data/validator/ignoretags.cfg
r14683 r14696 124 124 E:width 125 125 E:length 126 E:layer 127 E:end_date 128 E:start_date 126 129 ; 127 130 ; Ignore valid and semi-valid keys that end with... -
trunk/src/org/openstreetmap/josm/data/validation/tests/TagChecker.java
r14615 r14696 75 75 /** The spell check preset values which are not stored in TaggingPresets */ 76 76 private static volatile MultiMap<String, String> additionalPresetsValueData; 77 /** The spell check preset values which are not stored in TaggingPresets */ 78 private static volatile MultiMap<String, String> oftenUsedValueData = new MultiMap<>(); 79 77 80 /** The TagChecker data */ 78 81 private static final List<CheckerData> checkerData = new ArrayList<>(); … … 81 84 private static final List<String> ignoreDataEndsWith = new ArrayList<>(); 82 85 private static final List<Tag> ignoreDataTag = new ArrayList<>(); 86 /** tag keys that have only numerical values in the presets */ 87 private static final Set<String> ignoreForLevenshtein = new HashSet<>(); 83 88 84 89 /** The preferences prefix */ … … 178 183 initializeData(); 179 184 initializePresets(); 185 analysePresets(); 186 } 187 188 /** 189 * Add presets that contain only numerical values to the ignore list 190 */ 191 private void analysePresets() { 192 for (String key : TaggingPresets.getPresetKeys()) { 193 if (isKeyIgnored(key)) 194 continue; 195 boolean allNumerical = true; 196 Set<String> values = TaggingPresets.getPresetValues(key); 197 if (values.isEmpty()) 198 allNumerical = false; 199 for (String val : values) { 200 if (!isNum(val)) { 201 allNumerical = false; 202 break; 203 } 204 } 205 if (allNumerical) { 206 ignoreForLevenshtein.add(key); 207 } 208 } 180 209 } 181 210 … … 195 224 ignoreDataTag.clear(); 196 225 harmonizedKeys.clear(); 226 ignoreForLevenshtein.clear(); 197 227 198 228 StringBuilder errorSources = new StringBuilder(); … … 241 271 break; 242 272 case "K:": 243 ignoreDataTag.add(Tag.ofString(line)); 273 Tag tag = Tag.ofString(line); 274 ignoreDataTag.add(tag); 275 oftenUsedValueData.put(tag.getKey(), tag.getValue()); 244 276 break; 245 277 default: … … 389 421 390 422 /** 423 * Determines if the given tag key is ignored for checks "key/tag not in presets". 424 * @param key key 425 * @return true if the given key is ignored 426 */ 427 private static boolean isKeyIgnored(String key) { 428 if (ignoreDataEquals.contains(key)) { 429 return true; 430 } 431 for (String a : ignoreDataStartsWith) { 432 if (key.startsWith(a)) { 433 return true; 434 } 435 } 436 for (String a : ignoreDataEndsWith) { 437 if (key.endsWith(a)) { 438 return true; 439 } 440 } 441 return false; 442 } 443 444 /** 391 445 * Determines if the given tag is ignored for checks "key/tag not in presets". 392 446 * @param key key … … 396 450 */ 397 451 public static boolean isTagIgnored(String key, String value) { 398 if (i gnoreDataEquals.contains(key)) {452 if (isKeyIgnored(key)) 399 453 return true; 400 }401 for (String a : ignoreDataStartsWith) {402 if (key.startsWith(a)) {403 return true;404 }405 }406 for (String a : ignoreDataEndsWith) {407 if (key.endsWith(a)) {408 return true;409 }410 }411 412 454 if (!isTagInPresets(key, value)) { 413 455 for (Tag a : ignoreDataTag) { … … 536 578 } 537 579 } else if (!isTagInPresets(key, value)) { 538 // try to fix common typos and check again if value is still unknown 539 final String harmonizedValue = harmonizeValue(prop.getValue()); 540 String fixedValue = null; 541 Set<String> possibleValues = getPresetValues(key); 542 List<String> fixVals = new ArrayList<>(); 543 int maxPresetValueLen = 0; 544 if (possibleValues.contains(harmonizedValue)) { 545 fixedValue = harmonizedValue; 546 } else { 547 // use Levenshtein distance to find typical typos 548 int minDist = MAX_LEVENSHTEIN_DISTANCE + 1; 549 String closest = null; 550 for (String possibleVal : possibleValues) { 551 if (possibleVal.isEmpty()) 552 continue; 553 maxPresetValueLen = Math.max(maxPresetValueLen, possibleVal.length()); 554 if (harmonizedValue.length() < 3 && possibleVal.length() >= harmonizedValue.length() + MAX_LEVENSHTEIN_DISTANCE) { 555 // don't suggest fix value when given value is short and lengths are too different 556 // for example surface=u would result in surface=mud 557 continue; 558 } 559 int dist = Utils.getLevenshteinDistance(possibleVal, harmonizedValue); 560 if (dist >= harmonizedValue.length()) { 561 // short value, all characters are different. Don't warn, might say Value '10' for key 'fee' looks like 'no'. 562 continue; 563 } 564 if (dist < minDist) { 565 closest = possibleVal; 566 minDist = dist; 567 fixVals.clear(); 568 fixVals.add(possibleVal); 569 } else if (dist == minDist) { 570 fixVals.add(possibleVal); 571 } 572 } 573 if (minDist <= MAX_LEVENSHTEIN_DISTANCE && maxPresetValueLen > MAX_LEVENSHTEIN_DISTANCE 574 && (harmonizedValue.length() > 3 || minDist < MAX_LEVENSHTEIN_DISTANCE)) { 575 if (fixVals.size() < 2) { 576 fixedValue = closest; 577 } else { 578 Collections.sort(fixVals); 579 // misspelled preset value with multiple good alternatives 580 errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE_NO_FIX) 581 .message(tr("Misspelled property value"), 582 marktr("Value ''{0}'' for key ''{1}'' looks like one of {2}."), 583 prop.getValue(), key, fixVals) 584 .primitives(p).build()); 585 withErrors.put(p, "WPV"); 586 continue; 587 } 588 } 580 tryGuess(p, key, value, withErrors); 581 } 582 } 583 if (checkFixmes && key != null && value != null && !value.isEmpty() && isFixme(key, value) && !withErrors.contains(p, "FIXME")) { 584 errors.add(TestError.builder(this, Severity.OTHER, FIXME) 585 .message(tr("FIXMES")) 586 .primitives(p) 587 .build()); 588 withErrors.put(p, "FIXME"); 589 } 590 } 591 } 592 593 private void tryGuess(OsmPrimitive p, String key, String value, MultiMap<OsmPrimitive, String> withErrors) { 594 // try to fix common typos and check again if value is still unknown 595 final String harmonizedValue = harmonizeValue(value); 596 String fixedValue = null; 597 Set<String> presetValues = getPresetValues(key); 598 Set<String> oftenUsedValues = oftenUsedValueData.get(key); 599 for (Set<String> possibleValues: Arrays.asList(presetValues, oftenUsedValues)) { 600 if (possibleValues != null && possibleValues.contains(harmonizedValue)) { 601 fixedValue = harmonizedValue; 602 break; 603 } 604 } 605 if (fixedValue == null && !ignoreForLevenshtein.contains(key)) { 606 int maxPresetValueLen = 0; 607 List<String> fixVals = new ArrayList<>(); 608 // use Levenshtein distance to find typical typos 609 int minDist = MAX_LEVENSHTEIN_DISTANCE + 1; 610 String closest = null; 611 for (Set<String> possibleValues: Arrays.asList(presetValues, oftenUsedValues)) { 612 if (possibleValues == null) 613 continue; 614 for (String possibleVal : possibleValues) { 615 if (possibleVal.isEmpty()) 616 continue; 617 maxPresetValueLen = Math.max(maxPresetValueLen, possibleVal.length()); 618 if (harmonizedValue.length() < 3 && possibleVal.length() >= harmonizedValue.length() + MAX_LEVENSHTEIN_DISTANCE) { 619 // don't suggest fix value when given value is short and lengths are too different 620 // for example surface=u would result in surface=mud 621 continue; 589 622 } 590 if (fixedValue != null && possibleValues.contains(fixedValue)) { 591 final String newValue = fixedValue; 592 // misspelled preset value 593 errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE) 594 .message(tr("Misspelled property value"), 595 marktr("Value ''{0}'' for key ''{1}'' looks like ''{2}''."), prop.getValue(), key, newValue) 596 .primitives(p) 597 .build()); 598 withErrors.put(p, "WPV"); 599 } else { 600 // unknown preset value 601 errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE) 602 .message(tr("Presets do not contain property value"), 603 marktr("Value ''{0}'' for key ''{1}'' not in presets."), prop.getValue(), key) 604 .primitives(p) 605 .build()); 606 withErrors.put(p, "UPV"); 623 int dist = Utils.getLevenshteinDistance(possibleVal, harmonizedValue); 624 if (dist >= harmonizedValue.length()) { 625 // short value, all characters are different. Don't warn, might say Value '10' for key 'fee' looks like 'no'. 626 continue; 607 627 } 608 } 609 } 610 if (checkFixmes && key != null && value != null && !value.isEmpty() && isFixme(key, value) && !withErrors.contains(p, "FIXME")) { 611 errors.add(TestError.builder(this, Severity.OTHER, FIXME) 612 .message(tr("FIXMES")) 613 .primitives(p) 614 .build()); 615 withErrors.put(p, "FIXME"); 616 } 628 if (dist < minDist) { 629 closest = possibleVal; 630 minDist = dist; 631 fixVals.clear(); 632 fixVals.add(possibleVal); 633 } else if (dist == minDist) { 634 fixVals.add(possibleVal); 635 } 636 } 637 } 638 639 if (minDist <= MAX_LEVENSHTEIN_DISTANCE && maxPresetValueLen > MAX_LEVENSHTEIN_DISTANCE 640 && (harmonizedValue.length() > 3 || minDist < MAX_LEVENSHTEIN_DISTANCE)) { 641 if (fixVals.size() < 2) { 642 fixedValue = closest; 643 } else { 644 Collections.sort(fixVals); 645 // misspelled preset value with multiple good alternatives 646 errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE_NO_FIX) 647 .message(tr("Unknown property value"), 648 marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe one of {2} is meant?"), 649 value, key, fixVals) 650 .primitives(p).build()); 651 withErrors.put(p, "WPV"); 652 return; 653 } 654 } 655 } 656 if (fixedValue != null) { 657 final String newValue = fixedValue; 658 // misspelled preset value 659 errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE) 660 .message(tr("Unknown property value"), 661 marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe ''{2}'' is meant?"), value, key, newValue) 662 .primitives(p) 663 .build()); 664 withErrors.put(p, "WPV"); 665 } else { 666 // unknown preset value 667 errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE) 668 .message(tr("Presets do not contain property value"), 669 marktr("Value ''{0}'' for key ''{1}'' not in presets."), value, key) 670 .primitives(p) 671 .build()); 672 withErrors.put(p, "UPV"); 673 } 674 } 675 676 private boolean isNum(String harmonizedValue) { 677 try { 678 Double.parseDouble(harmonizedValue); 679 return true; 680 } catch (NumberFormatException e) { 681 return false; 617 682 } 618 683 } -
trunk/test/unit/org/openstreetmap/josm/data/validation/tests/TagCheckerTest.java
r14585 r14696 105 105 final List<TestError> errors = test(OsmUtils.createPrimitive("node landuse=forrest")); 106 106 assertEquals(1, errors.size()); 107 assertEquals(" Misspelledproperty value", errors.get(0).getMessage());108 assertEquals("Value 'forrest' for key 'landuse' looks like 'forest'.", errors.get(0).getDescription());107 assertEquals("Unknown property value", errors.get(0).getMessage()); 108 assertEquals("Value 'forrest' for key 'landuse' is unknown, maybe 'forest' is meant?", errors.get(0).getDescription()); 109 109 assertEquals(Severity.WARNING, errors.get(0).getSeverity()); 110 110 assertFalse(errors.get(0).isFixable()); … … 119 119 final List<TestError> errors = test(OsmUtils.createPrimitive("node highway=servics")); 120 120 assertEquals(1, errors.size()); 121 assertEquals("Misspelled property value", errors.get(0).getMessage()); 122 assertEquals("Value 'servics' for key 'highway' looks like one of [service, services].", errors.get(0).getDescription()); 121 assertEquals("Unknown property value", errors.get(0).getMessage()); 122 assertEquals( 123 "Value 'servics' for key 'highway' is unknown, maybe one of [service, services] is meant?", 124 errors.get(0).getDescription()); 123 125 assertEquals(Severity.WARNING, errors.get(0).getSeverity()); 124 126 assertFalse(errors.get(0).isFixable()); … … 133 135 final List<TestError> errors = test(OsmUtils.createPrimitive("node highway=residentail")); 134 136 assertEquals(1, errors.size()); 135 assertEquals("Misspelled property value", errors.get(0).getMessage()); 136 assertEquals("Value 'residentail' for key 'highway' looks like 'residential'.", errors.get(0).getDescription()); 137 assertEquals(Severity.WARNING, errors.get(0).getSeverity()); 138 assertFalse(errors.get(0).isFixable()); 139 } 140 141 /** 142 * Check for misspelled value. 143 * @throws IOException if any I/O error occurs 144 */ 145 @Test 146 public void testShortValNotInPreset() throws IOException { 147 final List<TestError> errors = test(OsmUtils.createPrimitive("node layer=6")); 148 assertEquals(1, errors.size()); 149 assertEquals("Presets do not contain property value", errors.get(0).getMessage()); 150 assertEquals("Value '6' for key 'layer' not in presets.", errors.get(0).getDescription()); 151 assertEquals(Severity.OTHER, errors.get(0).getSeverity()); 137 assertEquals("Unknown property value", errors.get(0).getMessage()); 138 assertEquals("Value 'residentail' for key 'highway' is unknown, maybe 'residential' is meant?", 139 errors.get(0).getDescription()); 140 assertEquals(Severity.WARNING, errors.get(0).getSeverity()); 152 141 assertFalse(errors.get(0).isFixable()); 153 142 } … … 205 194 final List<TestError> errors = test(OsmUtils.createPrimitive("node highway=Residential")); 206 195 assertEquals(1, errors.size()); 207 assertEquals("Misspelled property value", errors.get(0).getMessage()); 208 assertEquals("Value 'Residential' for key 'highway' looks like 'residential'.", errors.get(0).getDescription()); 196 assertEquals("Unknown property value", errors.get(0).getMessage()); 197 assertEquals("Value 'Residential' for key 'highway' is unknown, maybe 'residential' is meant?", 198 errors.get(0).getDescription()); 209 199 assertEquals(Severity.WARNING, errors.get(0).getSeverity()); 210 200 assertFalse(errors.get(0).isFixable());
Note:
See TracChangeset
for help on using the changeset viewer.