001// License: GPL. For details, see LICENSE file. 002package org.openstreetmap.josm.tools.template_engine; 003 004import java.util.Arrays; 005import java.util.HashSet; 006import java.util.Set; 007 008/** 009 * This class converts a template string (stream of characters) into a stream of tokens. 010 * 011 * The result of the tokenization (also called lexical analysis) serves as input for the 012 * parser {@link TemplateParser}. 013 */ 014public class Tokenizer { 015 016 public static class Token { 017 private final TokenType type; 018 private final int position; 019 private final String text; 020 021 public Token(TokenType type, int position) { 022 this(type, position, null); 023 } 024 025 public Token(TokenType type, int position, String text) { 026 this.type = type; 027 this.position = position; 028 this.text = text; 029 } 030 031 public TokenType getType() { 032 return type; 033 } 034 035 public int getPosition() { 036 return position; 037 } 038 039 public String getText() { 040 return text; 041 } 042 043 @Override 044 public String toString() { 045 return type + (text != null ? ' ' + text : ""); 046 } 047 } 048 049 public enum TokenType { CONDITION_START, VARIABLE_START, CONTEXT_SWITCH_START, END, PIPE, APOSTROPHE, TEXT, EOF } 050 051 private final Set<Character> specialCharaters = new HashSet<>(Arrays.asList('$', '?', '{', '}', '|', '\'', '!')); 052 053 private final String template; 054 055 private int c; 056 private int index; 057 private Token currentToken; 058 private final StringBuilder text = new StringBuilder(); 059 060 /** 061 * Creates a new {@link Tokenizer} 062 * @param template the template as a user input string 063 */ 064 public Tokenizer(String template) { 065 this.template = template; 066 getChar(); 067 } 068 069 private void getChar() { 070 if (index >= template.length()) { 071 c = -1; 072 } else { 073 c = template.charAt(index++); 074 } 075 } 076 077 public Token nextToken() throws ParseError { 078 if (currentToken != null) { 079 Token result = currentToken; 080 currentToken = null; 081 return result; 082 } 083 int position = index; 084 085 text.setLength(0); 086 switch (c) { 087 case -1: 088 return new Token(TokenType.EOF, position); 089 case '{': 090 getChar(); 091 return new Token(TokenType.VARIABLE_START, position); 092 case '?': 093 getChar(); 094 if (c == '{') { 095 getChar(); 096 return new Token(TokenType.CONDITION_START, position); 097 } else 098 throw ParseError.unexpectedChar('{', (char) c, position); 099 case '!': 100 getChar(); 101 if (c == '{') { 102 getChar(); 103 return new Token(TokenType.CONTEXT_SWITCH_START, position); 104 } else 105 throw ParseError.unexpectedChar('{', (char) c, position); 106 case '}': 107 getChar(); 108 return new Token(TokenType.END, position); 109 case '|': 110 getChar(); 111 return new Token(TokenType.PIPE, position); 112 case '\'': 113 getChar(); 114 return new Token(TokenType.APOSTROPHE, position); 115 default: 116 while (c != -1 && !specialCharaters.contains((char) c)) { 117 if (c == '\\') { 118 getChar(); 119 if (c == 'n') { 120 c = '\n'; 121 } 122 } 123 text.append((char) c); 124 getChar(); 125 } 126 return new Token(TokenType.TEXT, position, text.toString()); 127 } 128 } 129 130 public Token lookAhead() throws ParseError { 131 if (currentToken == null) { 132 currentToken = nextToken(); 133 } 134 return currentToken; 135 } 136 137 public Token skip(char lastChar) { 138 currentToken = null; 139 int position = index; 140 StringBuilder result = new StringBuilder(); 141 while (c != lastChar && c != -1) { 142 if (c == '\\') { 143 getChar(); 144 } 145 result.append((char) c); 146 getChar(); 147 } 148 return new Token(TokenType.TEXT, position, result.toString()); 149 } 150}