001 package org.maltparser.core.symbol.trie;
002
003 import java.io.BufferedReader;
004 import java.io.BufferedWriter;
005 import java.io.IOException;
006 import java.util.Set;
007 import java.util.SortedMap;
008 import java.util.TreeMap;
009
010 import org.apache.log4j.Logger;
011 import org.maltparser.core.helper.HashMap;
012 import org.maltparser.core.exception.MaltChainedException;
013 import org.maltparser.core.io.dataformat.ColumnDescription;
014 import org.maltparser.core.symbol.SymbolException;
015 import org.maltparser.core.symbol.SymbolTable;
016 import org.maltparser.core.symbol.nullvalue.InputNullValues;
017 import org.maltparser.core.symbol.nullvalue.NullValues;
018 import org.maltparser.core.symbol.nullvalue.OutputNullValues;
019 import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId;
020 /**
021
022 @author Johan Hall
023 @since 1.0
024 */
025 public class TrieSymbolTable implements SymbolTable {
026 private final String name;
027 private final Trie trie;
028 private final SortedMap<Integer, TrieNode> codeTable;
029 private int columnCategory;
030 private final NullValues nullValues;
031 private int valueCounter;
032 /** Cache the hash code for the symbol table */
033 private int cachedHash;
034
035 /** Special treatment during parsing */
036 private final int symbolTableMode;
037 private HashMap<String, Integer> tmpStorageStrIntMap;
038 private HashMap<Integer, String> tmpStorageIntStrMap;
039 private int tmpStorageValueCounter;
040
041 public TrieSymbolTable(String name, Trie trie, int columnCategory, String nullValueStrategy, int symbolTableMode) throws MaltChainedException {
042 this.name = name;
043 this.trie = trie;
044 this.columnCategory = columnCategory;
045
046 codeTable = new TreeMap<Integer, TrieNode>();
047 if (columnCategory == ColumnDescription.INPUT) {
048 nullValues = new InputNullValues(nullValueStrategy, this);
049 } else if (columnCategory == ColumnDescription.DEPENDENCY_EDGE_LABEL) {
050 nullValues = new OutputNullValues(nullValueStrategy, this);
051 } else {
052 nullValues = new InputNullValues(nullValueStrategy, this);
053 }
054 valueCounter = nullValues.getNextCode();
055
056 this.symbolTableMode = symbolTableMode;
057 if (this.symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TMP_STORAGE) {
058 tmpStorageStrIntMap = new HashMap<String, Integer>();
059 tmpStorageIntStrMap = new HashMap<Integer, String>();
060 tmpStorageValueCounter = -1;
061 }
062 }
063
064 public TrieSymbolTable(String name, Trie trie, int symbolTableMode) {
065 this.name = name;
066 this.trie = trie;
067 codeTable = new TreeMap<Integer, TrieNode>();
068 nullValues = new InputNullValues("one", this);
069 valueCounter = 1;
070 this.symbolTableMode = symbolTableMode;
071 if (this.symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TMP_STORAGE) {
072 tmpStorageStrIntMap = new HashMap<String, Integer>();
073 tmpStorageIntStrMap = new HashMap<Integer, String>();
074 tmpStorageValueCounter = -1;
075 }
076 }
077
078 public int addSymbol(String symbol) throws MaltChainedException {
079 if (nullValues == null || !nullValues.isNullValue(symbol)) {
080 if (symbol == null || symbol.length() == 0) {
081 throw new SymbolException("Symbol table error: empty string cannot be added to the symbol table");
082 }
083
084 if (this.symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TRIE) {
085 final TrieNode node = trie.addValue(symbol, this, -1);
086 final int code = node.getEntry(this);
087 if (!codeTable.containsKey(code)) {
088 codeTable.put(code, node);
089 }
090 return code;
091 } else { // this.symbolTableMode == ADD_NEW_TO_TMP_STORAGE
092 Integer entry = trie.getEntry(symbol, this);
093 if (entry != null) {
094 return entry.intValue();
095 }
096 if (!tmpStorageStrIntMap.containsKey(symbol)) {
097 // System.out.println("!tmpStorageStrIntMap.containsKey(symbol) : " + this.getName() + ": " + symbol.toString());
098 if (tmpStorageValueCounter == -1) {
099 tmpStorageValueCounter = valueCounter + 1;
100 } else {
101 tmpStorageValueCounter++;
102 }
103 tmpStorageStrIntMap.put(symbol, tmpStorageValueCounter);
104 tmpStorageIntStrMap.put(tmpStorageValueCounter, symbol);
105 return tmpStorageValueCounter;
106 } else {
107 return tmpStorageStrIntMap.get(symbol);
108 }
109 }
110 } else {
111 return nullValues.symbolToCode(symbol);
112 }
113 }
114
115 public int addSymbol(StringBuilder symbol) throws MaltChainedException {
116 if (nullValues == null || !nullValues.isNullValue(symbol)) {
117 if (symbol == null || symbol.length() == 0) {
118 throw new SymbolException("Symbol table error: empty string cannot be added to the symbol table");
119 }
120
121 if (this.symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TRIE) {
122 final TrieNode node = trie.addValue(symbol, this, -1);
123 final int code = node.getEntry(this);
124 if (!codeTable.containsKey(code)) {
125 codeTable.put(code, node);
126 }
127 return code;
128 } else { // this.symbolTableMode == ADD_NEW_TO_TMP_STORAGE
129 Integer entry = trie.getEntry(symbol.toString(), this);
130
131 if (entry != null) {
132 return entry.intValue();
133 }
134 if (!tmpStorageStrIntMap.containsKey(symbol)) {
135 if (tmpStorageValueCounter == -1) {
136 tmpStorageValueCounter = valueCounter + 1;
137 } else {
138 tmpStorageValueCounter++;
139 }
140 tmpStorageStrIntMap.put(symbol.toString(), tmpStorageValueCounter);
141 tmpStorageIntStrMap.put(tmpStorageValueCounter, symbol.toString());
142 return tmpStorageValueCounter;
143 } else {
144 return tmpStorageStrIntMap.get(symbol);
145 }
146 }
147 } else {
148 return nullValues.symbolToCode(symbol);
149 }
150 }
151
152 public String getSymbolCodeToString(int code) throws MaltChainedException {
153 if (code >= 0) {
154 if (nullValues == null || !nullValues.isNullValue(code)) {
155 if (trie == null) {
156 throw new SymbolException("The symbol table is corrupt. ");
157 }
158 if (this.symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TRIE) {
159 return trie.getValue(codeTable.get(code), this);
160 } else {
161 TrieNode node = codeTable.get(code);
162 if (node != null) {
163 return trie.getValue(node, this);
164 } else {
165 return tmpStorageIntStrMap.get(code);
166 }
167 }
168 } else {
169 return nullValues.codeToSymbol(code);
170 }
171 } else {
172 throw new SymbolException("The symbol code '"+code+"' cannot be found in the symbol table. ");
173 }
174 }
175
176 public int getSymbolStringToCode(String symbol) throws MaltChainedException {
177 if (symbol != null) {
178 if (nullValues == null || !nullValues.isNullValue(symbol)) {
179 if (trie == null) {
180 throw new SymbolException("The symbol table is corrupt. ");
181 }
182 if (this.symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TRIE) {
183 final Integer entry = trie.getEntry(symbol, this);
184 if (entry == null) {
185 throw new SymbolException("Could not find the symbol '"+symbol+"' in the symbol table. ");
186 }
187 return entry.intValue();
188 } else {
189 final Integer entry = trie.getEntry(symbol, this);
190 if (entry != null) {
191 return entry.intValue();
192 } else {
193 Integer tmpEntry = tmpStorageStrIntMap.get(symbol);
194 if (tmpEntry == null) {
195 throw new SymbolException("Could not find the symbol '"+symbol+"' in the symbol table. ");
196 }
197 return tmpEntry.intValue();
198 }
199 }
200 } else {
201 return nullValues.symbolToCode(symbol);
202 }
203 } else {
204 throw new SymbolException("The symbol code '"+symbol+"' cannot be found in the symbol table. ");
205 }
206 }
207
208 public void clearTmpStorage() {
209 if (symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TMP_STORAGE) {
210 tmpStorageIntStrMap.clear();
211 tmpStorageStrIntMap.clear();
212 tmpStorageValueCounter = -1;
213 }
214 }
215 public String getNullValueStrategy() {
216 if (nullValues == null) {
217 return null;
218 }
219 return nullValues.getNullValueStrategy();
220 }
221
222
223 public int getColumnCategory() {
224 return columnCategory;
225 }
226
227 public void printSymbolTable(Logger logger) throws MaltChainedException {
228 for (Integer code : codeTable.keySet()) {
229 logger.info(code+"\t"+trie.getValue(codeTable.get(code), this)+"\n");
230 }
231 }
232
233 public void saveHeader(BufferedWriter out) throws MaltChainedException {
234 try {
235 out.append('\t');
236 out.append(getName());
237 out.append('\t');
238 out.append(Integer.toString(getColumnCategory()));
239 out.append('\t');
240 out.append(getNullValueStrategy());
241 out.append('\n');
242 } catch (IOException e) {
243 throw new SymbolException("Could not save the symbol table. ", e);
244 }
245 }
246
247 public int size() {
248 return codeTable.size();
249 }
250
251
252 public void save(BufferedWriter out) throws MaltChainedException {
253 try {
254 out.write(name);
255 out.write('\n');
256 for (Integer code : codeTable.keySet()) {
257 out.write(code+"");
258 out.write('\t');
259 out.write(trie.getValue(codeTable.get(code), this));
260 out.write('\n');
261 }
262 out.write('\n');
263 } catch (IOException e) {
264 throw new SymbolException("Could not save the symbol table. ", e);
265 }
266 }
267
268 public void load(BufferedReader in) throws MaltChainedException {
269 int max = 0;
270 int index = 0;
271 String fileLine;
272 try {
273 while ((fileLine = in.readLine()) != null) {
274 if (fileLine.length() == 0 || (index = fileLine.indexOf('\t')) == -1) {
275 setValueCounter(max+1);
276 break;
277 }
278 int code = Integer.parseInt(fileLine.substring(0,index));
279 final String str = fileLine.substring(index+1);
280 final TrieNode node = trie.addValue(str, this, code);
281 codeTable.put(node.getEntry(this), node); //.getCode(), node);
282 if (max < code) {
283 max = code;
284 }
285 }
286 } catch (NumberFormatException e) {
287 throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the first column. ", e);
288 } catch (IOException e) {
289 throw new SymbolException("Could not load the symbol table. ", e);
290 }
291 }
292
293 public String getName() {
294 return name;
295 }
296
297 public int getValueCounter() {
298 return valueCounter;
299 }
300
301 private void setValueCounter(int valueCounter) {
302 this.valueCounter = valueCounter;
303 }
304
305 protected void updateValueCounter(int code) {
306 if (code > valueCounter) {
307 valueCounter = code;
308 }
309 }
310
311 protected int increaseValueCounter() {
312 return valueCounter++;
313 }
314
315 public int getNullValueCode(NullValueId nullValueIdentifier) throws MaltChainedException {
316 if (nullValues == null) {
317 throw new SymbolException("The symbol table does not have any null-values. ");
318 }
319 return nullValues.nullvalueToCode(nullValueIdentifier);
320 }
321
322 public String getNullValueSymbol(NullValueId nullValueIdentifier) throws MaltChainedException {
323 if (nullValues == null) {
324 throw new SymbolException("The symbol table does not have any null-values. ");
325 }
326 return nullValues.nullvalueToSymbol(nullValueIdentifier);
327 }
328
329 public boolean isNullValue(String symbol) throws MaltChainedException {
330 if (nullValues != null) {
331 return nullValues.isNullValue(symbol);
332 }
333 return false;
334 }
335
336 public boolean isNullValue(int code) throws MaltChainedException {
337 if (nullValues != null) {
338 return nullValues.isNullValue(code);
339 }
340 return false;
341 }
342
343 public void copy(SymbolTable fromTable) throws MaltChainedException {
344 final SortedMap<Integer, TrieNode> fromCodeTable = ((TrieSymbolTable)fromTable).getCodeTable();
345 int max = getValueCounter()-1;
346 for (Integer code : fromCodeTable.keySet()) {
347 final String str = trie.getValue(fromCodeTable.get(code), this);
348 final TrieNode node = trie.addValue(str, this, code);
349 codeTable.put(node.getEntry(this), node); //.getCode(), node);
350 if (max < code) {
351 max = code;
352 }
353 }
354 setValueCounter(max+1);
355 }
356
357 public SortedMap<Integer, TrieNode> getCodeTable() {
358 return codeTable;
359 }
360
361 public Set<Integer> getCodes() {
362 return codeTable.keySet();
363 }
364
365 protected Trie getTrie() {
366 return trie;
367 }
368
369 public boolean equals(Object obj) {
370 if (this == obj)
371 return true;
372 if (obj == null)
373 return false;
374 if (getClass() != obj.getClass())
375 return false;
376 final TrieSymbolTable other = (TrieSymbolTable)obj;
377 return ((name == null) ? other.name == null : name.equals(other.name));
378 }
379
380 public int hashCode() {
381 if (cachedHash == 0) {
382 cachedHash = 217 + (null == name ? 0 : name.hashCode());
383 }
384 return cachedHash;
385 }
386
387 public String toString() {
388 final StringBuilder sb = new StringBuilder();
389 sb.append(name);
390 sb.append(' ');
391 sb.append(valueCounter);
392 return sb.toString();
393 }
394 }