001 // License: GPL. See LICENSE file for details.
002 package org.openstreetmap.josm.data.validation.tests;
003
004 import static org.openstreetmap.josm.tools.I18n.tr;
005
006 import java.awt.geom.Point2D;
007 import java.util.ArrayList;
008 import java.util.HashMap;
009 import java.util.List;
010 import java.util.Map;
011
012 import org.openstreetmap.josm.data.osm.OsmPrimitive;
013 import org.openstreetmap.josm.data.osm.Way;
014 import org.openstreetmap.josm.data.validation.Severity;
015 import org.openstreetmap.josm.data.validation.Test;
016 import org.openstreetmap.josm.data.validation.TestError;
017 import org.openstreetmap.josm.data.validation.util.ValUtil;
018 import org.openstreetmap.josm.gui.progress.ProgressMonitor;
019 import org.openstreetmap.josm.tools.MultiMap;
020 import org.openstreetmap.josm.tools.Utils;
021
022 /**
023 * Checks for similar named ways, symptom of a possible typo. It uses the
024 * Levenshtein distance to check for similarity
025 *
026 * @author frsantos
027 */
028 public class SimilarNamedWays extends Test {
029
030 protected static final int SIMILAR_NAMED = 701;
031
032 /** All ways, grouped by cells */
033 Map<Point2D,List<Way>> cellWays;
034 /** The already detected errors */
035 MultiMap<Way, Way> errorWays;
036
037 /**
038 * Constructor
039 */
040 public SimilarNamedWays() {
041 super(tr("Similarly named ways"),
042 tr("This test checks for ways with similar names that may have been misspelled."));
043 }
044
045 @Override
046 public void startTest(ProgressMonitor monitor) {
047 super.startTest(monitor);
048 cellWays = new HashMap<Point2D,List<Way>>(1000);
049 errorWays = new MultiMap<Way, Way>();
050 }
051
052 @Override
053 public void endTest() {
054 cellWays = null;
055 errorWays = null;
056 super.endTest();
057 }
058
059 @Override
060 public void visit(Way w) {
061 if (!w.isUsable())
062 return;
063
064 String name = w.get("name");
065 if (name == null || name.length() < 6)
066 return;
067
068 List<List<Way>> theCellWays = ValUtil.getWaysInCell(w, cellWays);
069 for (List<Way> ways : theCellWays) {
070 for (Way w2 : ways) {
071 if (errorWays.contains(w, w2) || errorWays.contains(w2, w)) {
072 continue;
073 }
074
075 String name2 = w2.get("name");
076 if (name2 == null || name2.length() < 6) {
077 continue;
078 }
079
080 int levenshteinDistance = getLevenshteinDistance(name, name2);
081 if (0 < levenshteinDistance && levenshteinDistance <= 2) {
082 List<OsmPrimitive> primitives = new ArrayList<OsmPrimitive>();
083 primitives.add(w);
084 primitives.add(w2);
085 errors.add(new TestError(this, Severity.WARNING, tr("Similarly named ways"), SIMILAR_NAMED, primitives));
086 errorWays.put(w, w2);
087 }
088 }
089 ways.add(w);
090 }
091 }
092
093 /**
094 * Compute Levenshtein distance
095 *
096 * @param s First word
097 * @param t Second word
098 * @return The distance between words
099 */
100 public int getLevenshteinDistance(String s, String t) {
101 int d[][]; // matrix
102 int n; // length of s
103 int m; // length of t
104 int i; // iterates through s
105 int j; // iterates through t
106 char s_i; // ith character of s
107 char t_j; // jth character of t
108 int cost; // cost
109
110 // Step 1
111 n = s.length();
112 m = t.length();
113 if (n == 0)
114 return m;
115 if (m == 0)
116 return n;
117 d = new int[n + 1][m + 1];
118
119 // Step 2
120 for (i = 0; i <= n; i++) {
121 d[i][0] = i;
122 }
123 for (j = 0; j <= m; j++) {
124 d[0][j] = j;
125 }
126
127 // Step 3
128 for (i = 1; i <= n; i++) {
129
130 s_i = s.charAt(i - 1);
131
132 // Step 4
133 for (j = 1; j <= m; j++) {
134
135 t_j = t.charAt(j - 1);
136
137 // Step 5
138 if (s_i == t_j) {
139 cost = 0;
140 } else {
141 cost = 1;
142 }
143
144 // Step 6
145 d[i][j] = Utils.min(d[i - 1][j] + 1, d[i][j - 1] + 1, d[i - 1][j - 1] + cost);
146 }
147 }
148
149 // Step 7
150 return d[n][m];
151 }
152 }