001 package org.maltparser.core.symbol.trie; 002 003 import java.io.BufferedReader; 004 import java.io.BufferedWriter; 005 import java.io.IOException; 006 import java.util.Set; 007 import java.util.SortedMap; 008 import java.util.TreeMap; 009 010 import org.apache.log4j.Logger; 011 import org.maltparser.core.helper.HashMap; 012 import org.maltparser.core.exception.MaltChainedException; 013 import org.maltparser.core.io.dataformat.ColumnDescription; 014 import org.maltparser.core.symbol.SymbolException; 015 import org.maltparser.core.symbol.SymbolTable; 016 import org.maltparser.core.symbol.nullvalue.InputNullValues; 017 import org.maltparser.core.symbol.nullvalue.NullValues; 018 import org.maltparser.core.symbol.nullvalue.OutputNullValues; 019 import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId; 020 /** 021 022 @author Johan Hall 023 @since 1.0 024 */ 025 public class TrieSymbolTable implements SymbolTable { 026 private final String name; 027 private final Trie trie; 028 private final SortedMap<Integer, TrieNode> codeTable; 029 private int columnCategory; 030 private final NullValues nullValues; 031 private int valueCounter; 032 /** Cache the hash code for the symbol table */ 033 private int cachedHash; 034 035 /** Special treatment during parsing */ 036 private final int symbolTableMode; 037 private HashMap<String, Integer> tmpStorageStrIntMap; 038 private HashMap<Integer, String> tmpStorageIntStrMap; 039 private int tmpStorageValueCounter; 040 041 public TrieSymbolTable(String name, Trie trie, int columnCategory, String nullValueStrategy, int symbolTableMode) throws MaltChainedException { 042 this.name = name; 043 this.trie = trie; 044 this.columnCategory = columnCategory; 045 046 codeTable = new TreeMap<Integer, TrieNode>(); 047 if (columnCategory == ColumnDescription.INPUT) { 048 nullValues = new InputNullValues(nullValueStrategy, this); 049 } else if (columnCategory == ColumnDescription.DEPENDENCY_EDGE_LABEL) { 050 nullValues = new OutputNullValues(nullValueStrategy, this); 051 } else { 052 nullValues = new InputNullValues(nullValueStrategy, this); 053 } 054 valueCounter = nullValues.getNextCode(); 055 056 this.symbolTableMode = symbolTableMode; 057 if (this.symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TMP_STORAGE) { 058 tmpStorageStrIntMap = new HashMap<String, Integer>(); 059 tmpStorageIntStrMap = new HashMap<Integer, String>(); 060 tmpStorageValueCounter = -1; 061 } 062 } 063 064 public TrieSymbolTable(String name, Trie trie, int symbolTableMode) { 065 this.name = name; 066 this.trie = trie; 067 codeTable = new TreeMap<Integer, TrieNode>(); 068 nullValues = new InputNullValues("one", this); 069 valueCounter = 1; 070 this.symbolTableMode = symbolTableMode; 071 if (this.symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TMP_STORAGE) { 072 tmpStorageStrIntMap = new HashMap<String, Integer>(); 073 tmpStorageIntStrMap = new HashMap<Integer, String>(); 074 tmpStorageValueCounter = -1; 075 } 076 } 077 078 public int addSymbol(String symbol) throws MaltChainedException { 079 if (nullValues == null || !nullValues.isNullValue(symbol)) { 080 if (symbol == null || symbol.length() == 0) { 081 throw new SymbolException("Symbol table error: empty string cannot be added to the symbol table"); 082 } 083 084 if (this.symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TRIE) { 085 final TrieNode node = trie.addValue(symbol, this, -1); 086 final int code = node.getEntry(this); 087 if (!codeTable.containsKey(code)) { 088 codeTable.put(code, node); 089 } 090 return code; 091 } else { // this.symbolTableMode == ADD_NEW_TO_TMP_STORAGE 092 Integer entry = trie.getEntry(symbol, this); 093 if (entry != null) { 094 return entry.intValue(); 095 } 096 if (!tmpStorageStrIntMap.containsKey(symbol)) { 097 // System.out.println("!tmpStorageStrIntMap.containsKey(symbol) : " + this.getName() + ": " + symbol.toString()); 098 if (tmpStorageValueCounter == -1) { 099 tmpStorageValueCounter = valueCounter + 1; 100 } else { 101 tmpStorageValueCounter++; 102 } 103 tmpStorageStrIntMap.put(symbol, tmpStorageValueCounter); 104 tmpStorageIntStrMap.put(tmpStorageValueCounter, symbol); 105 return tmpStorageValueCounter; 106 } else { 107 return tmpStorageStrIntMap.get(symbol); 108 } 109 } 110 } else { 111 return nullValues.symbolToCode(symbol); 112 } 113 } 114 115 public int addSymbol(StringBuilder symbol) throws MaltChainedException { 116 if (nullValues == null || !nullValues.isNullValue(symbol)) { 117 if (symbol == null || symbol.length() == 0) { 118 throw new SymbolException("Symbol table error: empty string cannot be added to the symbol table"); 119 } 120 121 if (this.symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TRIE) { 122 final TrieNode node = trie.addValue(symbol, this, -1); 123 final int code = node.getEntry(this); 124 if (!codeTable.containsKey(code)) { 125 codeTable.put(code, node); 126 } 127 return code; 128 } else { // this.symbolTableMode == ADD_NEW_TO_TMP_STORAGE 129 Integer entry = trie.getEntry(symbol.toString(), this); 130 131 if (entry != null) { 132 return entry.intValue(); 133 } 134 if (!tmpStorageStrIntMap.containsKey(symbol)) { 135 if (tmpStorageValueCounter == -1) { 136 tmpStorageValueCounter = valueCounter + 1; 137 } else { 138 tmpStorageValueCounter++; 139 } 140 tmpStorageStrIntMap.put(symbol.toString(), tmpStorageValueCounter); 141 tmpStorageIntStrMap.put(tmpStorageValueCounter, symbol.toString()); 142 return tmpStorageValueCounter; 143 } else { 144 return tmpStorageStrIntMap.get(symbol); 145 } 146 } 147 } else { 148 return nullValues.symbolToCode(symbol); 149 } 150 } 151 152 public String getSymbolCodeToString(int code) throws MaltChainedException { 153 if (code >= 0) { 154 if (nullValues == null || !nullValues.isNullValue(code)) { 155 if (trie == null) { 156 throw new SymbolException("The symbol table is corrupt. "); 157 } 158 if (this.symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TRIE) { 159 return trie.getValue(codeTable.get(code), this); 160 } else { 161 TrieNode node = codeTable.get(code); 162 if (node != null) { 163 return trie.getValue(node, this); 164 } else { 165 return tmpStorageIntStrMap.get(code); 166 } 167 } 168 } else { 169 return nullValues.codeToSymbol(code); 170 } 171 } else { 172 throw new SymbolException("The symbol code '"+code+"' cannot be found in the symbol table. "); 173 } 174 } 175 176 public int getSymbolStringToCode(String symbol) throws MaltChainedException { 177 if (symbol != null) { 178 if (nullValues == null || !nullValues.isNullValue(symbol)) { 179 if (trie == null) { 180 throw new SymbolException("The symbol table is corrupt. "); 181 } 182 if (this.symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TRIE) { 183 final Integer entry = trie.getEntry(symbol, this); 184 if (entry == null) { 185 throw new SymbolException("Could not find the symbol '"+symbol+"' in the symbol table. "); 186 } 187 return entry.intValue(); 188 } else { 189 final Integer entry = trie.getEntry(symbol, this); 190 if (entry != null) { 191 return entry.intValue(); 192 } else { 193 Integer tmpEntry = tmpStorageStrIntMap.get(symbol); 194 if (tmpEntry == null) { 195 throw new SymbolException("Could not find the symbol '"+symbol+"' in the symbol table. "); 196 } 197 return tmpEntry.intValue(); 198 } 199 } 200 } else { 201 return nullValues.symbolToCode(symbol); 202 } 203 } else { 204 throw new SymbolException("The symbol code '"+symbol+"' cannot be found in the symbol table. "); 205 } 206 } 207 208 public void clearTmpStorage() { 209 if (symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TMP_STORAGE) { 210 tmpStorageIntStrMap.clear(); 211 tmpStorageStrIntMap.clear(); 212 tmpStorageValueCounter = -1; 213 } 214 } 215 public String getNullValueStrategy() { 216 if (nullValues == null) { 217 return null; 218 } 219 return nullValues.getNullValueStrategy(); 220 } 221 222 223 public int getColumnCategory() { 224 return columnCategory; 225 } 226 227 public void printSymbolTable(Logger logger) throws MaltChainedException { 228 for (Integer code : codeTable.keySet()) { 229 logger.info(code+"\t"+trie.getValue(codeTable.get(code), this)+"\n"); 230 } 231 } 232 233 public void saveHeader(BufferedWriter out) throws MaltChainedException { 234 try { 235 out.append('\t'); 236 out.append(getName()); 237 out.append('\t'); 238 out.append(Integer.toString(getColumnCategory())); 239 out.append('\t'); 240 out.append(getNullValueStrategy()); 241 out.append('\n'); 242 } catch (IOException e) { 243 throw new SymbolException("Could not save the symbol table. ", e); 244 } 245 } 246 247 public int size() { 248 return codeTable.size(); 249 } 250 251 252 public void save(BufferedWriter out) throws MaltChainedException { 253 try { 254 out.write(name); 255 out.write('\n'); 256 for (Integer code : codeTable.keySet()) { 257 out.write(code+""); 258 out.write('\t'); 259 out.write(trie.getValue(codeTable.get(code), this)); 260 out.write('\n'); 261 } 262 out.write('\n'); 263 } catch (IOException e) { 264 throw new SymbolException("Could not save the symbol table. ", e); 265 } 266 } 267 268 public void load(BufferedReader in) throws MaltChainedException { 269 int max = 0; 270 int index = 0; 271 String fileLine; 272 try { 273 while ((fileLine = in.readLine()) != null) { 274 if (fileLine.length() == 0 || (index = fileLine.indexOf('\t')) == -1) { 275 setValueCounter(max+1); 276 break; 277 } 278 int code = Integer.parseInt(fileLine.substring(0,index)); 279 final String str = fileLine.substring(index+1); 280 final TrieNode node = trie.addValue(str, this, code); 281 codeTable.put(node.getEntry(this), node); //.getCode(), node); 282 if (max < code) { 283 max = code; 284 } 285 } 286 } catch (NumberFormatException e) { 287 throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the first column. ", e); 288 } catch (IOException e) { 289 throw new SymbolException("Could not load the symbol table. ", e); 290 } 291 } 292 293 public String getName() { 294 return name; 295 } 296 297 public int getValueCounter() { 298 return valueCounter; 299 } 300 301 private void setValueCounter(int valueCounter) { 302 this.valueCounter = valueCounter; 303 } 304 305 protected void updateValueCounter(int code) { 306 if (code > valueCounter) { 307 valueCounter = code; 308 } 309 } 310 311 protected int increaseValueCounter() { 312 return valueCounter++; 313 } 314 315 public int getNullValueCode(NullValueId nullValueIdentifier) throws MaltChainedException { 316 if (nullValues == null) { 317 throw new SymbolException("The symbol table does not have any null-values. "); 318 } 319 return nullValues.nullvalueToCode(nullValueIdentifier); 320 } 321 322 public String getNullValueSymbol(NullValueId nullValueIdentifier) throws MaltChainedException { 323 if (nullValues == null) { 324 throw new SymbolException("The symbol table does not have any null-values. "); 325 } 326 return nullValues.nullvalueToSymbol(nullValueIdentifier); 327 } 328 329 public boolean isNullValue(String symbol) throws MaltChainedException { 330 if (nullValues != null) { 331 return nullValues.isNullValue(symbol); 332 } 333 return false; 334 } 335 336 public boolean isNullValue(int code) throws MaltChainedException { 337 if (nullValues != null) { 338 return nullValues.isNullValue(code); 339 } 340 return false; 341 } 342 343 public void copy(SymbolTable fromTable) throws MaltChainedException { 344 final SortedMap<Integer, TrieNode> fromCodeTable = ((TrieSymbolTable)fromTable).getCodeTable(); 345 int max = getValueCounter()-1; 346 for (Integer code : fromCodeTable.keySet()) { 347 final String str = trie.getValue(fromCodeTable.get(code), this); 348 final TrieNode node = trie.addValue(str, this, code); 349 codeTable.put(node.getEntry(this), node); //.getCode(), node); 350 if (max < code) { 351 max = code; 352 } 353 } 354 setValueCounter(max+1); 355 } 356 357 public SortedMap<Integer, TrieNode> getCodeTable() { 358 return codeTable; 359 } 360 361 public Set<Integer> getCodes() { 362 return codeTable.keySet(); 363 } 364 365 protected Trie getTrie() { 366 return trie; 367 } 368 369 public boolean equals(Object obj) { 370 if (this == obj) 371 return true; 372 if (obj == null) 373 return false; 374 if (getClass() != obj.getClass()) 375 return false; 376 final TrieSymbolTable other = (TrieSymbolTable)obj; 377 return ((name == null) ? other.name == null : name.equals(other.name)); 378 } 379 380 public int hashCode() { 381 if (cachedHash == 0) { 382 cachedHash = 217 + (null == name ? 0 : name.hashCode()); 383 } 384 return cachedHash; 385 } 386 387 public String toString() { 388 final StringBuilder sb = new StringBuilder(); 389 sb.append(name); 390 sb.append(' '); 391 sb.append(valueCounter); 392 return sb.toString(); 393 } 394 }