Changeset 33098 in osm for applications/editors/josm
- Timestamp:
- 2016-12-09T09:23:47+01:00 (8 years ago)
- Location:
- applications/editors/josm/plugins/wikipedia
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
applications/editors/josm/plugins/wikipedia/src/org/wikipedia/WikipediaApp.java
r33076 r33098 21 21 import java.util.function.Function; 22 22 import java.util.regex.Pattern; 23 import java.util.stream.Collector; 23 24 import java.util.stream.Collectors; 24 25 import java.util.stream.Stream; … … 229 230 .values() 230 231 .stream() 231 .flatMap(chunk -> getWikidataForArticles0(chunk).entrySet().stream())232 .flatMap(chunk -> resolveWikidataItems(chunk).entrySet().stream()) 232 233 .collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue)); 233 234 } 234 235 235 private Map<String, String> getWikidataForArticles0(List<String> articles) { 236 /** 237 * Get Wikidata IDs. For any unknown IDs, resolve them (normalize and get redirects), 238 * and try getting Wikidata IDs again 239 */ 240 private Map<String, String> resolveWikidataItems(Collection<String> articles) { 241 final Map<String, String> result = getWikidataForArticles0(articles); 242 final List<String> unresolved = articles.stream() 243 .filter(title -> !result.containsKey(title)) 244 .collect(Collectors.toList()); 245 if (!unresolved.isEmpty()) { 246 final Map<String, String> redirects = resolveRedirectsForArticles(unresolved); 247 final Map<String, String> result2 = getWikidataForArticles0(redirects.values()); 248 redirects.forEach((original, resolved) -> { 249 if (result2.containsKey(resolved)) { 250 result.put(original, result2.get(resolved)); 251 } 252 }); 253 } 254 return result; 255 } 256 257 private Map<String, String> getWikidataForArticles0(Collection<String> articles) { 236 258 if (articles.isEmpty()) { 237 259 return Collections.emptyMap(); … … 257 279 } 258 280 return r; 281 } catch (Exception ex) { 282 throw new RuntimeException(ex); 283 } 284 } 285 286 /** 287 * Given a list of wikipedia titles, returns a map of corresponding normalized title names, 288 * or if the title is a redirect page, the result is the redirect target. 289 */ 290 private Map<String, String> resolveRedirectsForArticles(Collection<String> articles) { 291 try { 292 final String url = getSiteUrl() + "/w/api.php" + 293 "?action=query" + 294 "&redirects" + 295 "&format=xml" + 296 "&titles=" + articles.stream().map(Utils::encodeUrl).collect(Collectors.joining("|")); 297 try (final InputStream in = connect(url).getContent()) { 298 final Document xml = newDocumentBuilder().parse(in); 299 300 // Add both redirects and normalization results to the same map 301 final Collector<Node, ?, Map<String, String>> fromToCollector = Collectors.toMap( 302 node -> X_PATH.evaluateString("./@from", node), 303 node -> X_PATH.evaluateString("./@to", node) 304 ); 305 final Map<String, String> normalized = X_PATH.evaluateNodes("//normalized/n", xml) 306 .stream() 307 .collect(fromToCollector); 308 final Map<String, String> redirects = X_PATH.evaluateNodes("//redirects/r", xml) 309 .stream() 310 .collect(fromToCollector); 311 // We should only return those keys that were originally requested, excluding titles that are both normalized and redirected 312 return articles.stream() 313 .collect(Collectors.toMap(Function.identity(), title -> { 314 final String normalizedTitle = normalized.getOrDefault(title, title); 315 return redirects.getOrDefault(normalizedTitle, normalizedTitle); 316 } 317 )); 318 } 259 319 } catch (Exception ex) { 260 320 throw new RuntimeException(ex); -
applications/editors/josm/plugins/wikipedia/test/unit/org/wikipedia/WikipediaAppTest.java
r33075 r33098 119 119 120 120 @Test 121 public void testGetWikidataForArticlesResolveRedirects() throws Exception { 122 final Map<String, String> map = WikipediaApp.forLanguage("en") 123 .getWikidataForArticles(Arrays.asList("einstein", "USA")); 124 assertThat(map.get("einstein"), is("Q937")); 125 assertThat(map.get("USA"), is("Q30")); 126 assertThat(map.size(), is(2)); 127 } 128 129 @Test 121 130 public void testTicket13991() throws Exception { 122 131 final Map<String, String> map = WikipediaApp.forLanguage("en")
Note:
See TracChangeset
for help on using the changeset viewer.