001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.data.validation.tests;
003
004import static org.openstreetmap.josm.tools.I18n.marktr;
005import static org.openstreetmap.josm.tools.I18n.tr;
006
007import java.awt.GridBagConstraints;
008import java.awt.event.ActionListener;
009import java.io.BufferedReader;
010import java.io.IOException;
011import java.lang.Character.UnicodeBlock;
012import java.util.ArrayList;
013import java.util.Arrays;
014import java.util.Collection;
015import java.util.Collections;
016import java.util.HashMap;
017import java.util.HashSet;
018import java.util.List;
019import java.util.Locale;
020import java.util.Map;
021import java.util.Map.Entry;
022import java.util.Set;
023import java.util.regex.Pattern;
024
025import javax.swing.JCheckBox;
026import javax.swing.JLabel;
027import javax.swing.JPanel;
028
029import org.openstreetmap.josm.command.ChangePropertyCommand;
030import org.openstreetmap.josm.command.ChangePropertyKeyCommand;
031import org.openstreetmap.josm.command.Command;
032import org.openstreetmap.josm.command.SequenceCommand;
033import org.openstreetmap.josm.data.osm.AbstractPrimitive;
034import org.openstreetmap.josm.data.osm.OsmPrimitive;
035import org.openstreetmap.josm.data.osm.Tag;
036import org.openstreetmap.josm.data.osm.Tagged;
037import org.openstreetmap.josm.data.preferences.sources.ValidatorPrefHelper;
038import org.openstreetmap.josm.data.validation.Severity;
039import org.openstreetmap.josm.data.validation.Test.TagTest;
040import org.openstreetmap.josm.data.validation.TestError;
041import org.openstreetmap.josm.data.validation.util.Entities;
042import org.openstreetmap.josm.gui.progress.ProgressMonitor;
043import org.openstreetmap.josm.gui.tagging.presets.TaggingPreset;
044import org.openstreetmap.josm.gui.tagging.presets.TaggingPresetItem;
045import org.openstreetmap.josm.gui.tagging.presets.TaggingPresetListener;
046import org.openstreetmap.josm.gui.tagging.presets.TaggingPresets;
047import org.openstreetmap.josm.gui.tagging.presets.items.Check;
048import org.openstreetmap.josm.gui.tagging.presets.items.CheckGroup;
049import org.openstreetmap.josm.gui.tagging.presets.items.KeyedItem;
050import org.openstreetmap.josm.gui.widgets.EditableList;
051import org.openstreetmap.josm.io.CachedFile;
052import org.openstreetmap.josm.spi.preferences.Config;
053import org.openstreetmap.josm.tools.GBC;
054import org.openstreetmap.josm.tools.Logging;
055import org.openstreetmap.josm.tools.MultiMap;
056import org.openstreetmap.josm.tools.Utils;
057
058/**
059 * Check for misspelled or wrong tags
060 *
061 * @author frsantos
062 * @since 3669
063 */
064public class TagChecker extends TagTest implements TaggingPresetListener {
065
066    /** The config file of ignored tags */
067    public static final String IGNORE_FILE = "resource://data/validator/ignoretags.cfg";
068    /** The config file of dictionary words */
069    public static final String SPELL_FILE = "resource://data/validator/words.cfg";
070
071    /** Normalized keys: the key should be substituted by the value if the key was not found in presets */
072    private static final Map<String, String> harmonizedKeys = new HashMap<>();
073    /** The spell check preset values which are not stored in TaggingPresets */
074    private static volatile HashSet<String> additionalPresetsValueData;
075    /** often used tags which are not in presets */
076    private static volatile MultiMap<String, String> oftenUsedTags = new MultiMap<>();
077
078    private static final Pattern UNWANTED_NON_PRINTING_CONTROL_CHARACTERS = Pattern.compile(
079            "[\\x00-\\x09\\x0B\\x0C\\x0E-\\x1F\\x7F\\u200e-\\u200f\\u202a-\\u202e]");
080
081    /** The TagChecker data */
082    private static final List<String> ignoreDataStartsWith = new ArrayList<>();
083    private static final Set<String> ignoreDataEquals = new HashSet<>();
084    private static final List<String> ignoreDataEndsWith = new ArrayList<>();
085    private static final List<Tag> ignoreDataTag = new ArrayList<>();
086    /** tag keys that have only numerical values in the presets */
087    private static final Set<String> ignoreForLevenshtein = new HashSet<>();
088
089    /** The preferences prefix */
090    protected static final String PREFIX = ValidatorPrefHelper.PREFIX + "." + TagChecker.class.getSimpleName();
091
092    /**
093     * The preference key to check values
094     */
095    public static final String PREF_CHECK_VALUES = PREFIX + ".checkValues";
096    /**
097     * The preference key to check keys
098     */
099    public static final String PREF_CHECK_KEYS = PREFIX + ".checkKeys";
100    /**
101     * The preference key to enable complex checks
102     */
103    public static final String PREF_CHECK_COMPLEX = PREFIX + ".checkComplex";
104    /**
105     * The preference key to search for fixme tags
106     */
107    public static final String PREF_CHECK_FIXMES = PREFIX + ".checkFixmes";
108
109    /**
110     * The preference key for source files
111     * @see #DEFAULT_SOURCES
112     */
113    public static final String PREF_SOURCES = PREFIX + ".source";
114
115    private static final String BEFORE_UPLOAD = "BeforeUpload";
116    /**
117     * The preference key to check keys - used before upload
118     */
119    public static final String PREF_CHECK_KEYS_BEFORE_UPLOAD = PREF_CHECK_KEYS + BEFORE_UPLOAD;
120    /**
121     * The preference key to check values - used before upload
122     */
123    public static final String PREF_CHECK_VALUES_BEFORE_UPLOAD = PREF_CHECK_VALUES + BEFORE_UPLOAD;
124    /**
125     * The preference key to run complex tests - used before upload
126     */
127    public static final String PREF_CHECK_COMPLEX_BEFORE_UPLOAD = PREF_CHECK_COMPLEX + BEFORE_UPLOAD;
128    /**
129     * The preference key to search for fixmes - used before upload
130     */
131    public static final String PREF_CHECK_FIXMES_BEFORE_UPLOAD = PREF_CHECK_FIXMES + BEFORE_UPLOAD;
132
133    private static final int MAX_LEVENSHTEIN_DISTANCE = 2;
134
135    protected boolean checkKeys;
136    protected boolean checkValues;
137    /** Was used for special configuration file, might be used to disable value spell checker. */
138    protected boolean checkComplex;
139    protected boolean checkFixmes;
140
141    protected JCheckBox prefCheckKeys;
142    protected JCheckBox prefCheckValues;
143    protected JCheckBox prefCheckComplex;
144    protected JCheckBox prefCheckFixmes;
145    protected JCheckBox prefCheckPaint;
146
147    protected JCheckBox prefCheckKeysBeforeUpload;
148    protected JCheckBox prefCheckValuesBeforeUpload;
149    protected JCheckBox prefCheckComplexBeforeUpload;
150    protected JCheckBox prefCheckFixmesBeforeUpload;
151    protected JCheckBox prefCheckPaintBeforeUpload;
152
153    // CHECKSTYLE.OFF: SingleSpaceSeparator
154    protected static final int EMPTY_VALUES             = 1200;
155    protected static final int INVALID_KEY              = 1201;
156    protected static final int INVALID_VALUE            = 1202;
157    protected static final int FIXME                    = 1203;
158    protected static final int INVALID_SPACE            = 1204;
159    protected static final int INVALID_KEY_SPACE        = 1205;
160    protected static final int INVALID_HTML             = 1206; /* 1207 was PAINT */
161    protected static final int LONG_VALUE               = 1208;
162    protected static final int LONG_KEY                 = 1209;
163    protected static final int LOW_CHAR_VALUE           = 1210;
164    protected static final int LOW_CHAR_KEY             = 1211;
165    protected static final int MISSPELLED_VALUE         = 1212;
166    protected static final int MISSPELLED_KEY           = 1213;
167    protected static final int MULTIPLE_SPACES          = 1214;
168    protected static final int MISSPELLED_VALUE_NO_FIX  = 1215;
169    protected static final int UNUSUAL_UNICODE_CHAR_VALUE = 1216;
170    // CHECKSTYLE.ON: SingleSpaceSeparator
171
172    protected EditableList sourcesList;
173
174    private static final List<String> DEFAULT_SOURCES = Arrays.asList(IGNORE_FILE, SPELL_FILE);
175
176    /**
177     * Constructor
178     */
179    public TagChecker() {
180        super(tr("Tag checker"), tr("This test checks for errors in tag keys and values."));
181    }
182
183    @Override
184    public void initialize() throws IOException {
185        TaggingPresets.addListener(this);
186        initializeData();
187        initializePresets();
188        analysePresets();
189    }
190
191    /**
192     * Add presets that contain only numerical values to the ignore list
193     */
194    private static void analysePresets() {
195        for (String key : TaggingPresets.getPresetKeys()) {
196            if (isKeyIgnored(key))
197                continue;
198            boolean allNumerical = true;
199            Set<String> values = TaggingPresets.getPresetValues(key);
200            if (values.isEmpty())
201                allNumerical = false;
202            for (String val : values) {
203                if (!isNum(val)) {
204                    allNumerical = false;
205                    break;
206                }
207            }
208            if (allNumerical) {
209                ignoreForLevenshtein.add(key);
210            }
211        }
212    }
213
214    /**
215     * Reads the spell-check file into a HashMap.
216     * The data file is a list of words, beginning with +/-. If it starts with +,
217     * the word is valid, but if it starts with -, the word should be replaced
218     * by the nearest + word before this.
219     *
220     * @throws IOException if any I/O error occurs
221     */
222    private static void initializeData() throws IOException {
223        ignoreDataStartsWith.clear();
224        ignoreDataEquals.clear();
225        ignoreDataEndsWith.clear();
226        ignoreDataTag.clear();
227        harmonizedKeys.clear();
228        ignoreForLevenshtein.clear();
229        oftenUsedTags.clear();
230
231        StringBuilder errorSources = new StringBuilder();
232        for (String source : Config.getPref().getList(PREF_SOURCES, DEFAULT_SOURCES)) {
233            try (
234                CachedFile cf = new CachedFile(source);
235                BufferedReader reader = cf.getContentReader()
236            ) {
237                String okValue = null;
238                boolean tagcheckerfile = false;
239                boolean ignorefile = false;
240                boolean isFirstLine = true;
241                String line;
242                while ((line = reader.readLine()) != null) {
243                    if (line.isEmpty()) {
244                        // ignore
245                    } else if (line.startsWith("#")) {
246                        if (line.startsWith("# JOSM TagChecker")) {
247                            tagcheckerfile = true;
248                            Logging.error(tr("Ignoring {0}. Support was dropped", source));
249                        } else
250                        if (line.startsWith("# JOSM IgnoreTags")) {
251                            ignorefile = true;
252                            if (!DEFAULT_SOURCES.contains(source)) {
253                                Logging.info(tr("Adding {0} to ignore tags", source));
254                            }
255                        }
256                    } else if (ignorefile) {
257                        parseIgnoreFileLine(source, line);
258                    } else if (tagcheckerfile) {
259                        // ignore
260                    } else if (line.charAt(0) == '+') {
261                        okValue = line.substring(1);
262                    } else if (line.charAt(0) == '-' && okValue != null) {
263                        String hk = harmonizeKey(line.substring(1));
264                        if (!okValue.equals(hk) && harmonizedKeys.put(hk, okValue) != null) {
265                            Logging.debug(tr("Line was ignored: {0}", line));
266                        }
267                    } else {
268                        Logging.error(tr("Invalid spellcheck line: {0}", line));
269                    }
270                    if (isFirstLine) {
271                        isFirstLine = false;
272                        if (!(tagcheckerfile || ignorefile) && !DEFAULT_SOURCES.contains(source)) {
273                            Logging.info(tr("Adding {0} to spellchecker", source));
274                        }
275                    }
276                }
277            } catch (IOException e) {
278                Logging.error(e);
279                errorSources.append(source).append('\n');
280            }
281        }
282
283        if (errorSources.length() > 0)
284            throw new IOException(tr("Could not access data file(s):\n{0}", errorSources));
285    }
286
287    /**
288     * Parse a line found in a configuration file
289     * @param source name of configuration file
290     * @param line the line to parse
291     */
292    private static void parseIgnoreFileLine(String source, String line) {
293        line = line.trim();
294        if (line.length() < 4) {
295            return;
296        }
297        try {
298            String key = line.substring(0, 2);
299            line = line.substring(2);
300
301            switch (key) {
302            case "S:":
303                ignoreDataStartsWith.add(line);
304                break;
305            case "E:":
306                ignoreDataEquals.add(line);
307                addToKeyDictionary(line);
308                break;
309            case "F:":
310                ignoreDataEndsWith.add(line);
311                break;
312            case "K:":
313                Tag tag = Tag.ofString(line);
314                ignoreDataTag.add(tag);
315                oftenUsedTags.put(tag.getKey(), tag.getValue());
316                addToKeyDictionary(tag.getKey());
317                break;
318            default:
319                if (!key.startsWith(";")) {
320                    Logging.warn("Unsupported TagChecker key: " + key);
321                }
322            }
323        } catch (IllegalArgumentException e) {
324            Logging.error("Invalid line in {0} : {1}", source, e.getMessage());
325            Logging.trace(e);
326        }
327    }
328
329    private static void addToKeyDictionary(String key) {
330        if (key != null) {
331            String hk = harmonizeKey(key);
332            if (!key.equals(hk)) {
333                harmonizedKeys.put(hk, key);
334            }
335        }
336    }
337
338    /**
339     * Reads the presets data.
340     *
341     */
342    public static void initializePresets() {
343
344        if (!Config.getPref().getBoolean(PREF_CHECK_VALUES, true))
345            return;
346
347        Collection<TaggingPreset> presets = TaggingPresets.getTaggingPresets();
348        if (!presets.isEmpty()) {
349            initAdditionalPresetsValueData();
350            for (TaggingPreset p : presets) {
351                for (TaggingPresetItem i : p.data) {
352                    if (i instanceof KeyedItem) {
353                        addPresetValue((KeyedItem) i);
354                    } else if (i instanceof CheckGroup) {
355                        for (Check c : ((CheckGroup) i).checks) {
356                            addPresetValue(c);
357                        }
358                    }
359                }
360            }
361        }
362    }
363
364    private static void initAdditionalPresetsValueData() {
365        additionalPresetsValueData = new HashSet<>();
366        for (String a : AbstractPrimitive.getUninterestingKeys()) {
367            additionalPresetsValueData.add(a);
368        }
369        for (String a : Config.getPref().getList(ValidatorPrefHelper.PREFIX + ".knownkeys",
370                Arrays.asList("is_in", "int_ref", "fixme", "population"))) {
371            additionalPresetsValueData.add(a);
372        }
373    }
374
375    private static void addPresetValue(KeyedItem ky) {
376        if (ky.key != null && ky.getValues() != null) {
377            addToKeyDictionary(ky.key);
378        }
379    }
380
381    /**
382     * Checks given string (key or value) if it contains unwanted non-printing control characters (either ASCII or Unicode bidi characters)
383     * @param s string to check
384     * @return {@code true} if {@code s} contains non-printing control characters
385     */
386    static boolean containsUnwantedNonPrintingControlCharacter(String s) {
387        return s != null && !s.isEmpty() && (
388                isJoiningChar(s.charAt(0)) ||
389                isJoiningChar(s.charAt(s.length() - 1)) ||
390                s.chars().anyMatch(c -> (isAsciiControlChar(c) && !isNewLineChar(c)) || isBidiControlChar(c))
391                );
392    }
393
394    private static boolean isAsciiControlChar(int c) {
395        return c < 0x20 || c == 0x7F;
396    }
397
398    private static boolean isNewLineChar(int c) {
399        return c == 0x0a || c == 0x0d;
400    }
401
402    private static boolean isJoiningChar(int c) {
403        return c == 0x200c || c == 0x200d; // ZWNJ, ZWJ
404    }
405
406    private static boolean isBidiControlChar(int c) {
407        /* check for range 0x200e to 0x200f (LRM, RLM) or
408                           0x202a to 0x202e (LRE, RLE, PDF, LRO, RLO) */
409        return (c >= 0x200e && c <= 0x200f) || (c >= 0x202a && c <= 0x202e);
410    }
411
412    static String removeUnwantedNonPrintingControlCharacters(String s) {
413        // Remove all unwanted characters
414        String result = UNWANTED_NON_PRINTING_CONTROL_CHARACTERS.matcher(s).replaceAll("");
415        // Remove joining characters located at the beginning of the string
416        while (!result.isEmpty() && isJoiningChar(result.charAt(0))) {
417            result = result.substring(1);
418        }
419        // Remove joining characters located at the end of the string
420        while (!result.isEmpty() && isJoiningChar(result.charAt(result.length() - 1))) {
421            result = result.substring(0, result.length() - 1);
422        }
423        return result;
424    }
425
426    static boolean containsUnusualUnicodeCharacter(String key, String value) {
427        return value != null && value.chars().anyMatch(c -> isUnusualUnicodeBlock(key, c));
428    }
429
430    /**
431     * Detects highly suspicious Unicode characters that have been seen in OSM database.
432     * @param key tag key
433     * @param c current character code point
434     * @return {@code true} if the current unicode block is very unusual for the given key
435     */
436    private static boolean isUnusualUnicodeBlock(String key, int c) {
437        UnicodeBlock b = UnicodeBlock.of(c);
438        return isUnusualPhoneticUse(key, b, c) || isUnusualBmpUse(b) || isUnusualSmpUse(b);
439    }
440
441    private static boolean isAllowedPhoneticCharacter(String key, int c) {
442        return c == 0x0259                                          // U+0259 is used as a standard character in azerbaidjani
443            || (key.endsWith("ref") && 0x1D2C <= c && c <= 0x1D42); // allow uppercase superscript latin characters in *ref tags
444    }
445
446    private static boolean isUnusualPhoneticUse(String key, UnicodeBlock b, int c) {
447        return !isAllowedPhoneticCharacter(key, c)
448            && (b == UnicodeBlock.IPA_EXTENSIONS                        // U+0250..U+02AF
449             || b == UnicodeBlock.PHONETIC_EXTENSIONS                   // U+1D00..U+1D7F
450             || b == UnicodeBlock.PHONETIC_EXTENSIONS_SUPPLEMENT)       // U+1D80..U+1DBF
451                && !key.endsWith(":pronunciation");
452    }
453
454    private static boolean isUnusualBmpUse(UnicodeBlock b) {
455        // CHECKSTYLE.OFF: BooleanExpressionComplexity
456        return b == UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS            // U+20D0..U+20FF
457            || b == UnicodeBlock.MATHEMATICAL_OPERATORS                 // U+2200..U+22FF
458            || b == UnicodeBlock.ENCLOSED_ALPHANUMERICS                 // U+2460..U+24FF
459            || b == UnicodeBlock.BOX_DRAWING                            // U+2500..U+257F
460            || b == UnicodeBlock.GEOMETRIC_SHAPES                       // U+25A0..U+25FF
461            || b == UnicodeBlock.DINGBATS                               // U+2700..U+27BF
462            || b == UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS       // U+2B00..U+2BFF
463            || b == UnicodeBlock.GLAGOLITIC                             // U+2C00..U+2C5F
464            || b == UnicodeBlock.HANGUL_COMPATIBILITY_JAMO              // U+3130..U+318F
465            || b == UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS        // U+3200..U+32FF
466            || b == UnicodeBlock.LATIN_EXTENDED_D                       // U+A720..U+A7FF
467            || b == UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS           // U+F900..U+FAFF
468            || b == UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS          // U+FB00..U+FB4F
469            || b == UnicodeBlock.VARIATION_SELECTORS                    // U+FE00..U+FE0F
470            || b == UnicodeBlock.SPECIALS;                              // U+FFF0..U+FFFF
471            // CHECKSTYLE.ON: BooleanExpressionComplexity
472    }
473
474    private static boolean isUnusualSmpUse(UnicodeBlock b) {
475        // UnicodeBlock.SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS is only defined in Java 9+
476        return b == UnicodeBlock.MUSICAL_SYMBOLS                        // U+1D100..U+1D1FF
477            || b == UnicodeBlock.ENCLOSED_ALPHANUMERIC_SUPPLEMENT       // U+1F100..U+1F1FF
478            || b == UnicodeBlock.EMOTICONS                              // U+1F600..U+1F64F
479            || b == UnicodeBlock.TRANSPORT_AND_MAP_SYMBOLS;             // U+1F680..U+1F6FF
480    }
481
482    /**
483     * Get set of preset values for the given key.
484     * @param key the key
485     * @return null if key is not in presets or in additionalPresetsValueData,
486     *  else a set which might be empty.
487     */
488    private static Set<String> getPresetValues(String key) {
489        Set<String> res = TaggingPresets.getPresetValues(key);
490        if (res != null)
491            return res;
492        if (additionalPresetsValueData.contains(key))
493            return Collections.emptySet();
494        // null means key is not known
495        return null;
496    }
497
498    /**
499     * Determines if the given key is in internal presets.
500     * @param key key
501     * @return {@code true} if the given key is in internal presets
502     * @since 9023
503     */
504    public static boolean isKeyInPresets(String key) {
505        return TaggingPresets.getPresetValues(key) != null;
506    }
507
508    /**
509     * Determines if the given tag is in internal presets.
510     * @param key key
511     * @param value value
512     * @return {@code true} if the given tag is in internal presets
513     * @since 9023
514     */
515    public static boolean isTagInPresets(String key, String value) {
516        final Set<String> values = getPresetValues(key);
517        return values != null && values.contains(value);
518    }
519
520    /**
521     * Returns the list of ignored tags.
522     * @return the list of ignored tags
523     * @since 9023
524     */
525    public static List<Tag> getIgnoredTags() {
526        return new ArrayList<>(ignoreDataTag);
527    }
528
529    /**
530     * Determines if the given tag key is ignored for checks "key/tag not in presets".
531     * @param key key
532     * @return true if the given key is ignored
533     */
534    private static boolean isKeyIgnored(String key) {
535        if (ignoreDataEquals.contains(key)) {
536            return true;
537        }
538        for (String a : ignoreDataStartsWith) {
539            if (key.startsWith(a)) {
540                return true;
541            }
542        }
543        for (String a : ignoreDataEndsWith) {
544            if (key.endsWith(a)) {
545                return true;
546            }
547        }
548        return false;
549    }
550
551    /**
552     * Determines if the given tag is ignored for checks "key/tag not in presets".
553     * @param key key
554     * @param value value
555     * @return {@code true} if the given tag is ignored
556     * @since 9023
557     */
558    public static boolean isTagIgnored(String key, String value) {
559        if (isKeyIgnored(key))
560            return true;
561        final Set<String> values = getPresetValues(key);
562        if (values != null && values.isEmpty())
563            return true;
564        if (!isTagInPresets(key, value)) {
565            for (Tag a : ignoreDataTag) {
566                if (key.equals(a.getKey()) && value.equals(a.getValue())) {
567                    return true;
568                }
569            }
570        }
571        return false;
572    }
573
574    /**
575     * Checks the primitive tags
576     * @param p The primitive to check
577     */
578    @Override
579    public void check(OsmPrimitive p) {
580        if (!p.isTagged())
581            return;
582
583        // Just a collection to know if a primitive has been already marked with error
584        MultiMap<OsmPrimitive, String> withErrors = new MultiMap<>();
585
586        for (Entry<String, String> prop : p.getKeys().entrySet()) {
587            String s = marktr("Tag ''{0}'' invalid.");
588            String key = prop.getKey();
589            String value = prop.getValue();
590
591            if (checkKeys) {
592                checkSingleTagKeySimple(withErrors, p, s, key);
593            }
594            if (checkValues) {
595                checkSingleTagValueSimple(withErrors, p, s, key, value);
596                checkSingleTagComplex(withErrors, p, key, value);
597            }
598            if (checkFixmes && key != null && value != null && !value.isEmpty() && isFixme(key, value) && !withErrors.contains(p, "FIXME")) {
599                errors.add(TestError.builder(this, Severity.OTHER, FIXME)
600                        .message(tr("FIXMES"))
601                        .primitives(p)
602                        .build());
603                withErrors.put(p, "FIXME");
604            }
605        }
606    }
607
608    private void checkSingleTagValueSimple(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String s, String key, String value) {
609        if (!checkValues || value == null)
610            return;
611        if ((containsUnwantedNonPrintingControlCharacter(value)) && !withErrors.contains(p, "ICV")) {
612            errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_VALUE)
613                    .message(tr("Tag value contains non-printing (usually invisible) character"), s, key)
614                    .primitives(p)
615                    .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(value)))
616                    .build());
617            withErrors.put(p, "ICV");
618        }
619        if ((containsUnusualUnicodeCharacter(key, value)) && !withErrors.contains(p, "UUCV")) {
620            errors.add(TestError.builder(this, Severity.WARNING, UNUSUAL_UNICODE_CHAR_VALUE)
621                    .message(tr("Tag value contains unusual Unicode character"), s, key)
622                    .primitives(p)
623                    .build());
624            withErrors.put(p, "UUCV");
625        }
626        if ((value.length() > Tagged.MAX_TAG_LENGTH) && !withErrors.contains(p, "LV")) {
627            errors.add(TestError.builder(this, Severity.ERROR, LONG_VALUE)
628                    .message(tr("Tag value longer than {0} characters ({1} characters)", Tagged.MAX_TAG_LENGTH, value.length()), s, key)
629                    .primitives(p)
630                    .build());
631            withErrors.put(p, "LV");
632        }
633        if ((value.trim().isEmpty()) && !withErrors.contains(p, "EV")) {
634            errors.add(TestError.builder(this, Severity.WARNING, EMPTY_VALUES)
635                    .message(tr("Tags with empty values"), s, key)
636                    .primitives(p)
637                    .build());
638            withErrors.put(p, "EV");
639        }
640        final String errTypeSpace = "SPACE";
641        if ((value.startsWith(" ") || value.endsWith(" ")) && !withErrors.contains(p, errTypeSpace)) {
642            errors.add(TestError.builder(this, Severity.WARNING, INVALID_SPACE)
643                    .message(tr("Property values start or end with white space"), s, key)
644                    .primitives(p)
645                    .build());
646            withErrors.put(p, errTypeSpace);
647        }
648        if (value.contains("  ") && !withErrors.contains(p, errTypeSpace)) {
649            errors.add(TestError.builder(this, Severity.WARNING, MULTIPLE_SPACES)
650                    .message(tr("Property values contain multiple white spaces"), s, key)
651                    .primitives(p)
652                    .build());
653            withErrors.put(p, errTypeSpace);
654        }
655        if (!value.equals(Entities.unescape(value)) && !withErrors.contains(p, "HTML")) {
656            errors.add(TestError.builder(this, Severity.OTHER, INVALID_HTML)
657                    .message(tr("Property values contain HTML entity"), s, key)
658                    .primitives(p)
659                    .build());
660            withErrors.put(p, "HTML");
661        }
662    }
663
664    private void checkSingleTagKeySimple(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String s, String key) {
665        if (!checkKeys || key == null)
666            return;
667        if ((containsUnwantedNonPrintingControlCharacter(key)) && !withErrors.contains(p, "ICK")) {
668            errors.add(TestError.builder(this, Severity.WARNING, LOW_CHAR_KEY)
669                    .message(tr("Tag key contains non-printing character"), s, key)
670                    .primitives(p)
671                    .fix(() -> new ChangePropertyCommand(p, key, removeUnwantedNonPrintingControlCharacters(key)))
672                    .build());
673            withErrors.put(p, "ICK");
674        }
675        if (key.length() > Tagged.MAX_TAG_LENGTH && !withErrors.contains(p, "LK")) {
676            errors.add(TestError.builder(this, Severity.ERROR, LONG_KEY)
677                    .message(tr("Tag key longer than {0} characters ({1} characters)", Tagged.MAX_TAG_LENGTH, key.length()), s, key)
678                    .primitives(p)
679                    .build());
680            withErrors.put(p, "LK");
681        }
682        if (key.indexOf(' ') >= 0 && !withErrors.contains(p, "IPK")) {
683            errors.add(TestError.builder(this, Severity.WARNING, INVALID_KEY_SPACE)
684                    .message(tr("Invalid white space in property key"), s, key)
685                    .primitives(p)
686                    .build());
687            withErrors.put(p, "IPK");
688        }
689    }
690
691    private void checkSingleTagComplex(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String key, String value) {
692        if (!checkValues || key == null || value == null || value.isEmpty())
693            return;
694        if (additionalPresetsValueData != null && !isTagIgnored(key, value)) {
695            if (!isKeyInPresets(key)) {
696                spellCheckKey(withErrors, p, key);
697            } else if (!isTagInPresets(key, value)) {
698                if (oftenUsedTags.contains(key, value)) {
699                    // tag is quite often used but not in presets
700                    errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE)
701                            .message(tr("Presets do not contain property value"),
702                                    marktr("Value ''{0}'' for key ''{1}'' not in presets, but is known."), value, key)
703                            .primitives(p)
704                            .build());
705                    withErrors.put(p, "UPV");
706                } else {
707                    tryGuess(p, key, value, withErrors);
708                }
709            }
710        }
711    }
712
713    private void spellCheckKey(MultiMap<OsmPrimitive, String> withErrors, OsmPrimitive p, String key) {
714        String prettifiedKey = harmonizeKey(key);
715        String fixedKey;
716        if (ignoreDataEquals.contains(prettifiedKey)) {
717            fixedKey = prettifiedKey;
718        } else {
719            fixedKey = isKeyInPresets(prettifiedKey) ? prettifiedKey : harmonizedKeys.get(prettifiedKey);
720        }
721        if (fixedKey == null) {
722            for (Tag a : ignoreDataTag) {
723                if (a.getKey().equals(prettifiedKey)) {
724                    fixedKey = prettifiedKey;
725                    break;
726                }
727            }
728        }
729
730        if (fixedKey != null && !"".equals(fixedKey) && !fixedKey.equals(key)) {
731            final String proposedKey = fixedKey;
732            // misspelled preset key
733            final TestError.Builder error = TestError.builder(this, Severity.WARNING, MISSPELLED_KEY)
734                    .message(tr("Misspelled property key"), marktr("Key ''{0}'' looks like ''{1}''."), key, proposedKey)
735                    .primitives(p);
736            if (p.hasKey(fixedKey)) {
737                errors.add(error.build());
738            } else {
739                errors.add(error.fix(() -> new ChangePropertyKeyCommand(p, key, proposedKey)).build());
740            }
741            withErrors.put(p, "WPK");
742        } else {
743            errors.add(TestError.builder(this, Severity.OTHER, INVALID_KEY)
744                    .message(tr("Presets do not contain property key"), marktr("Key ''{0}'' not in presets."), key)
745                    .primitives(p)
746                    .build());
747            withErrors.put(p, "UPK");
748        }
749    }
750
751    private void tryGuess(OsmPrimitive p, String key, String value, MultiMap<OsmPrimitive, String> withErrors) {
752        // try to fix common typos and check again if value is still unknown
753        final String harmonizedValue = harmonizeValue(value);
754        if (harmonizedValue == null || harmonizedValue.isEmpty())
755            return;
756        String fixedValue = null;
757        List<Set<String>> sets = new ArrayList<>();
758        Set<String> presetValues = getPresetValues(key);
759        if (presetValues != null)
760            sets.add(presetValues);
761        Set<String> usedValues = oftenUsedTags.get(key);
762        if (usedValues != null)
763            sets.add(usedValues);
764        for (Set<String> possibleValues: sets) {
765            if (possibleValues.contains(harmonizedValue)) {
766                fixedValue = harmonizedValue;
767                break;
768            }
769        }
770        if (fixedValue == null && !ignoreForLevenshtein.contains(key)) {
771            int maxPresetValueLen = 0;
772            List<String> fixVals = new ArrayList<>();
773            // use Levenshtein distance to find typical typos
774            int minDist = MAX_LEVENSHTEIN_DISTANCE + 1;
775            String closest = null;
776            for (Set<String> possibleValues: sets) {
777                for (String possibleVal : possibleValues) {
778                    if (possibleVal.isEmpty())
779                        continue;
780                    maxPresetValueLen = Math.max(maxPresetValueLen, possibleVal.length());
781                    if (harmonizedValue.length() < 3 && possibleVal.length() >= harmonizedValue.length() + MAX_LEVENSHTEIN_DISTANCE) {
782                        // don't suggest fix value when given value is short and lengths are too different
783                        // for example surface=u would result in surface=mud
784                        continue;
785                    }
786                    int dist = Utils.getLevenshteinDistance(possibleVal, harmonizedValue);
787                    if (dist >= harmonizedValue.length()) {
788                        // short value, all characters are different. Don't warn, might say Value '10' for key 'fee' looks like 'no'.
789                        continue;
790                    }
791                    if (dist < minDist) {
792                        closest = possibleVal;
793                        minDist = dist;
794                        fixVals.clear();
795                        fixVals.add(possibleVal);
796                    } else if (dist == minDist) {
797                        fixVals.add(possibleVal);
798                    }
799                }
800            }
801
802            if (minDist <= MAX_LEVENSHTEIN_DISTANCE && maxPresetValueLen > MAX_LEVENSHTEIN_DISTANCE
803                    && (harmonizedValue.length() > 3 || minDist < MAX_LEVENSHTEIN_DISTANCE)) {
804                if (fixVals.size() < 2) {
805                    fixedValue = closest;
806                } else {
807                    Collections.sort(fixVals);
808                    // misspelled preset value with multiple good alternatives
809                    errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE_NO_FIX)
810                            .message(tr("Unknown property value"),
811                                    marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe one of {2} is meant?"),
812                                    value, key, fixVals)
813                            .primitives(p).build());
814                    withErrors.put(p, "WPV");
815                    return;
816                }
817            }
818        }
819        if (fixedValue != null && !fixedValue.equals(value)) {
820            final String newValue = fixedValue;
821            // misspelled preset value
822            errors.add(TestError.builder(this, Severity.WARNING, MISSPELLED_VALUE)
823                    .message(tr("Unknown property value"),
824                            marktr("Value ''{0}'' for key ''{1}'' is unknown, maybe ''{2}'' is meant?"), value, key, newValue)
825                    .primitives(p)
826                    .build());
827            withErrors.put(p, "WPV");
828        } else {
829            // unknown preset value
830            errors.add(TestError.builder(this, Severity.OTHER, INVALID_VALUE)
831                    .message(tr("Presets do not contain property value"),
832                            marktr("Value ''{0}'' for key ''{1}'' not in presets."), value, key)
833                    .primitives(p)
834                    .build());
835            withErrors.put(p, "UPV");
836        }
837    }
838
839    private static boolean isNum(String harmonizedValue) {
840        try {
841            Double.parseDouble(harmonizedValue);
842            return true;
843        } catch (NumberFormatException e) {
844            return false;
845        }
846    }
847
848    private static boolean isFixme(String key, String value) {
849        return key.toLowerCase(Locale.ENGLISH).contains("fixme") || key.contains("todo")
850          || value.toLowerCase(Locale.ENGLISH).contains("fixme") || value.contains("check and delete");
851    }
852
853    private static String harmonizeKey(String key) {
854        return Utils.strip(key.toLowerCase(Locale.ENGLISH).replace('-', '_').replace(':', '_').replace(' ', '_'), "-_;:,");
855    }
856
857    private static String harmonizeValue(String value) {
858        return Utils.strip(value.toLowerCase(Locale.ENGLISH).replace('-', '_').replace(' ', '_'), "-_;:,");
859    }
860
861    @Override
862    public void startTest(ProgressMonitor monitor) {
863        super.startTest(monitor);
864        checkKeys = Config.getPref().getBoolean(PREF_CHECK_KEYS, true);
865        if (isBeforeUpload) {
866            checkKeys = checkKeys && Config.getPref().getBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, true);
867        }
868
869        checkValues = Config.getPref().getBoolean(PREF_CHECK_VALUES, true);
870        if (isBeforeUpload) {
871            checkValues = checkValues && Config.getPref().getBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, true);
872        }
873
874        checkComplex = Config.getPref().getBoolean(PREF_CHECK_COMPLEX, true);
875        if (isBeforeUpload) {
876            checkComplex = checkComplex && Config.getPref().getBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, true);
877        }
878
879        checkFixmes = Config.getPref().getBoolean(PREF_CHECK_FIXMES, true);
880        if (isBeforeUpload) {
881            checkFixmes = checkFixmes && Config.getPref().getBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, true);
882        }
883    }
884
885    @Override
886    public void visit(Collection<OsmPrimitive> selection) {
887        if (checkKeys || checkValues || checkComplex || checkFixmes) {
888            super.visit(selection);
889        }
890    }
891
892    @Override
893    public void addGui(JPanel testPanel) {
894        GBC a = GBC.eol();
895        a.anchor = GridBagConstraints.EAST;
896
897        testPanel.add(new JLabel(name+" :"), GBC.eol().insets(3, 0, 0, 0));
898
899        prefCheckKeys = new JCheckBox(tr("Check property keys."), Config.getPref().getBoolean(PREF_CHECK_KEYS, true));
900        prefCheckKeys.setToolTipText(tr("Validate that property keys are valid checking against list of words."));
901        testPanel.add(prefCheckKeys, GBC.std().insets(20, 0, 0, 0));
902
903        prefCheckKeysBeforeUpload = new JCheckBox();
904        prefCheckKeysBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, true));
905        testPanel.add(prefCheckKeysBeforeUpload, a);
906
907        prefCheckComplex = new JCheckBox(tr("Use complex property checker."), Config.getPref().getBoolean(PREF_CHECK_COMPLEX, true));
908        prefCheckComplex.setToolTipText(tr("Validate property values and tags using complex rules."));
909        testPanel.add(prefCheckComplex, GBC.std().insets(20, 0, 0, 0));
910
911        prefCheckComplexBeforeUpload = new JCheckBox();
912        prefCheckComplexBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, true));
913        testPanel.add(prefCheckComplexBeforeUpload, a);
914
915        final Collection<String> sources = Config.getPref().getList(PREF_SOURCES, DEFAULT_SOURCES);
916        sourcesList = new EditableList(tr("TagChecker source"));
917        sourcesList.setItems(sources);
918        testPanel.add(new JLabel(tr("Data sources ({0})", "*.cfg")), GBC.eol().insets(23, 0, 0, 0));
919        testPanel.add(sourcesList, GBC.eol().fill(GridBagConstraints.HORIZONTAL).insets(23, 0, 0, 0));
920
921        ActionListener disableCheckActionListener = e -> handlePrefEnable();
922        prefCheckKeys.addActionListener(disableCheckActionListener);
923        prefCheckKeysBeforeUpload.addActionListener(disableCheckActionListener);
924        prefCheckComplex.addActionListener(disableCheckActionListener);
925        prefCheckComplexBeforeUpload.addActionListener(disableCheckActionListener);
926
927        handlePrefEnable();
928
929        prefCheckValues = new JCheckBox(tr("Check property values."), Config.getPref().getBoolean(PREF_CHECK_VALUES, true));
930        prefCheckValues.setToolTipText(tr("Validate that property values are valid checking against presets."));
931        testPanel.add(prefCheckValues, GBC.std().insets(20, 0, 0, 0));
932
933        prefCheckValuesBeforeUpload = new JCheckBox();
934        prefCheckValuesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, true));
935        testPanel.add(prefCheckValuesBeforeUpload, a);
936
937        prefCheckFixmes = new JCheckBox(tr("Check for FIXMES."), Config.getPref().getBoolean(PREF_CHECK_FIXMES, true));
938        prefCheckFixmes.setToolTipText(tr("Looks for nodes or ways with FIXME in any property value."));
939        testPanel.add(prefCheckFixmes, GBC.std().insets(20, 0, 0, 0));
940
941        prefCheckFixmesBeforeUpload = new JCheckBox();
942        prefCheckFixmesBeforeUpload.setSelected(Config.getPref().getBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, true));
943        testPanel.add(prefCheckFixmesBeforeUpload, a);
944    }
945
946    /**
947     * Enables/disables the source list field
948     */
949    public void handlePrefEnable() {
950        boolean selected = prefCheckKeys.isSelected() || prefCheckKeysBeforeUpload.isSelected()
951                || prefCheckComplex.isSelected() || prefCheckComplexBeforeUpload.isSelected();
952        sourcesList.setEnabled(selected);
953    }
954
955    @Override
956    public boolean ok() {
957        enabled = prefCheckKeys.isSelected() || prefCheckValues.isSelected() || prefCheckComplex.isSelected() || prefCheckFixmes.isSelected();
958        testBeforeUpload = prefCheckKeysBeforeUpload.isSelected() || prefCheckValuesBeforeUpload.isSelected()
959                || prefCheckFixmesBeforeUpload.isSelected() || prefCheckComplexBeforeUpload.isSelected();
960
961        Config.getPref().putBoolean(PREF_CHECK_VALUES, prefCheckValues.isSelected());
962        Config.getPref().putBoolean(PREF_CHECK_COMPLEX, prefCheckComplex.isSelected());
963        Config.getPref().putBoolean(PREF_CHECK_KEYS, prefCheckKeys.isSelected());
964        Config.getPref().putBoolean(PREF_CHECK_FIXMES, prefCheckFixmes.isSelected());
965        Config.getPref().putBoolean(PREF_CHECK_VALUES_BEFORE_UPLOAD, prefCheckValuesBeforeUpload.isSelected());
966        Config.getPref().putBoolean(PREF_CHECK_COMPLEX_BEFORE_UPLOAD, prefCheckComplexBeforeUpload.isSelected());
967        Config.getPref().putBoolean(PREF_CHECK_KEYS_BEFORE_UPLOAD, prefCheckKeysBeforeUpload.isSelected());
968        Config.getPref().putBoolean(PREF_CHECK_FIXMES_BEFORE_UPLOAD, prefCheckFixmesBeforeUpload.isSelected());
969        return Config.getPref().putList(PREF_SOURCES, sourcesList.getItems());
970    }
971
972    @Override
973    public Command fixError(TestError testError) {
974        List<Command> commands = new ArrayList<>(50);
975
976        Collection<? extends OsmPrimitive> primitives = testError.getPrimitives();
977        for (OsmPrimitive p : primitives) {
978            Map<String, String> tags = p.getKeys();
979            if (tags.isEmpty()) {
980                continue;
981            }
982
983            for (Entry<String, String> prop: tags.entrySet()) {
984                String key = prop.getKey();
985                String value = prop.getValue();
986                if (value == null || value.trim().isEmpty()) {
987                    commands.add(new ChangePropertyCommand(p, key, null));
988                } else if (value.startsWith(" ") || value.endsWith(" ") || value.contains("  ")) {
989                    commands.add(new ChangePropertyCommand(p, key, Utils.removeWhiteSpaces(value)));
990                } else if (key.startsWith(" ") || key.endsWith(" ") || key.contains("  ")) {
991                    commands.add(new ChangePropertyKeyCommand(p, key, Utils.removeWhiteSpaces(key)));
992                } else {
993                    String evalue = Entities.unescape(value);
994                    if (!evalue.equals(value)) {
995                        commands.add(new ChangePropertyCommand(p, key, evalue));
996                    }
997                }
998            }
999        }
1000
1001        if (commands.isEmpty())
1002            return null;
1003        if (commands.size() == 1)
1004            return commands.get(0);
1005
1006        return new SequenceCommand(tr("Fix tags"), commands);
1007    }
1008
1009    @Override
1010    public boolean isFixable(TestError testError) {
1011        if (testError.getTester() instanceof TagChecker) {
1012            int code = testError.getCode();
1013            return code == EMPTY_VALUES || code == INVALID_SPACE ||
1014                   code == INVALID_KEY_SPACE || code == INVALID_HTML ||
1015                   code == MULTIPLE_SPACES;
1016        }
1017
1018        return false;
1019    }
1020
1021    @Override
1022    public void taggingPresetsModified() {
1023        try {
1024            initializeData();
1025            initializePresets();
1026            analysePresets();
1027        } catch (IOException e) {
1028            Logging.error(e);
1029        }
1030    }
1031}