001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.tools;
003
004import java.util.Arrays;
005import java.util.HashMap;
006import java.util.Map;
007
008/**
009 * A helper class that analyzes the text and attempts to parse tags from it
010 * @since 13544 (extracted from {@link TextTagParser})
011 */
012public class TextAnalyzer {
013    private boolean quotesStarted;
014    private boolean esc;
015    private final StringBuilder s = new StringBuilder(200);
016    private String valueStops = "\n\r\t";
017    private int pos;
018    private final String data;
019    private final int n;
020
021    /**
022     * Create a new {@link TextAnalyzer}
023     * @param text The text to parse
024     */
025    public TextAnalyzer(String text) {
026        pos = 0;
027        data = Utils.strip(text);
028        n = data.length();
029        // fix #1604: allow space characters as value stops for single-line input only
030        if (data.indexOf('\r') == -1 && data.indexOf('\n') == -1) {
031            valueStops += " ";
032        }
033    }
034
035    /**
036     * Read tags from "Free format"
037     * @return map of tags
038     */
039    public Map<String, String> getFreeParsedTags() {
040        String k, v;
041        Map<String, String> tags = new HashMap<>();
042
043        while (true) {
044            skipEmpty();
045            if (pos == n) {
046                break;
047            }
048            k = parseString("\n\r\t= ");
049            if (pos == n) {
050                tags.clear();
051                break;
052            }
053            skipSign();
054            if (pos == n) {
055                tags.clear();
056                break;
057            }
058            v = parseString(valueStops);
059            tags.put(k, v);
060        }
061        return tags;
062    }
063
064    /**
065     * Parses current text to extract a key or value depending on given stop characters.
066     * @param stopChars Parsing will stop when one character of this string is found
067     * @return key or value extracted from current text
068     */
069    public String parseString(String stopChars) {
070        char[] stop = stopChars.toCharArray();
071        Arrays.sort(stop);
072        char c;
073        while (pos < n) {
074            c = data.charAt(pos);
075            if (esc) {
076                esc = false;
077                s.append(c); //  \" \\
078            } else if (c == '\\') {
079                esc = true;
080            } else if (c == '\"' && !quotesStarted) { // opening "
081                if (!s.toString().trim().isEmpty()) { // we had   ||some text"||
082                    s.append(c); // just add ", not open
083                } else {
084                    s.delete(0, s.length()); // forget that empty characthers and start reading "....
085                    quotesStarted = true;
086                }
087            } else if (c == '\"' && quotesStarted) {  // closing "
088                quotesStarted = false;
089                pos++;
090                break;
091            } else if (!quotesStarted && (Arrays.binarySearch(stop, c) >= 0)) {
092                // stop-symbol found
093                pos++;
094                break;
095            } else {
096                // skip non-printable characters
097                if (c >= 32) s.append(c);
098            }
099            pos++;
100        }
101
102        String res = s.toString();
103        s.delete(0, s.length());
104        return res.trim();
105    }
106
107    private void skipSign() {
108        char c;
109        boolean signFound = false;
110        while (pos < n) {
111            c = data.charAt(pos);
112            if (c == '\t' || c == '\n' || c == ' ') {
113                pos++;
114            } else if (c == '=') {
115                if (signFound) break; // a  =  =qwerty means "a"="=qwerty"
116                signFound = true;
117                pos++;
118            } else {
119                break;
120            }
121        }
122    }
123
124    private void skipEmpty() {
125        char c;
126        while (pos < n) {
127            c = data.charAt(pos);
128            if (c == '\t' || c == '\n' || c == '\r' || c == ' ') {
129                pos++;
130            } else {
131                break;
132            }
133        }
134    }
135}