001package org.maltparser.core.symbol.trie; 002 003import java.io.BufferedReader; 004import java.io.BufferedWriter; 005import java.io.IOException; 006import java.util.Set; 007import java.util.SortedMap; 008import java.util.TreeMap; 009 010import org.maltparser.core.exception.MaltChainedException; 011import org.maltparser.core.io.dataformat.ColumnDescription; 012import org.maltparser.core.symbol.SymbolException; 013import org.maltparser.core.symbol.SymbolTable; 014import org.maltparser.core.symbol.nullvalue.InputNullValues; 015import org.maltparser.core.symbol.nullvalue.NullValues; 016import org.maltparser.core.symbol.nullvalue.OutputNullValues; 017import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId; 018/** 019 020@author Johan Hall 021@since 1.0 022*/ 023public class TrieSymbolTable implements SymbolTable { 024 private final String name; 025 private final Trie trie; 026 private final SortedMap<Integer, TrieNode> codeTable; 027 private int columnCategory; 028 private final NullValues nullValues; 029 private int valueCounter; 030 /** Cache the hash code for the symbol table */ 031 private int cachedHash; 032 033 034 public TrieSymbolTable(String name, Trie trie, int columnCategory, String nullValueStrategy) throws MaltChainedException { 035 this.name = name; 036 this.trie = trie; 037 this.columnCategory = columnCategory; 038 039 codeTable = new TreeMap<Integer, TrieNode>(); 040 if (columnCategory == ColumnDescription.INPUT) { 041 nullValues = new InputNullValues(nullValueStrategy, this); 042 } else if (columnCategory == ColumnDescription.DEPENDENCY_EDGE_LABEL) { 043 nullValues = new OutputNullValues(nullValueStrategy, this); 044 } else { 045 nullValues = new InputNullValues(nullValueStrategy, this); 046 } 047 valueCounter = nullValues.getNextCode(); 048 } 049 050 public TrieSymbolTable(String name, Trie trie) { 051 this.name = name; 052 this.trie = trie; 053 codeTable = new TreeMap<Integer, TrieNode>(); 054 nullValues = new InputNullValues("one", this); 055 valueCounter = 1; 056 } 057 058 public int addSymbol(String symbol) throws MaltChainedException { 059 if (nullValues == null || !nullValues.isNullValue(symbol)) { 060 if (symbol == null || symbol.length() == 0) { 061 throw new SymbolException("Symbol table error: empty string cannot be added to the symbol table"); 062 } 063 064 final TrieNode node = trie.addValue(symbol, this, -1); 065 final int code = node.getEntry(this); 066 if (!codeTable.containsKey(code)) { 067 codeTable.put(code, node); 068 } 069 return code; 070 } else { 071 return nullValues.symbolToCode(symbol); 072 } 073 } 074 075// public int addSymbol(StringBuilder symbol) throws MaltChainedException { 076// if (nullValues == null || !nullValues.isNullValue(symbol)) { 077// if (symbol == null || symbol.length() == 0) { 078// throw new SymbolException("Symbol table error: empty string cannot be added to the symbol table"); 079// } 080// 081// final TrieNode node = trie.addValue(symbol, this, -1); 082// final int code = node.getEntry(this); 083// if (!codeTable.containsKey(code)) { 084// codeTable.put(code, node); 085// } 086// return code; 087// } else { 088// return nullValues.symbolToCode(symbol); 089// } 090// } 091 092 public String getSymbolCodeToString(int code) throws MaltChainedException { 093 if (code >= 0) { 094 if (nullValues == null || !nullValues.isNullValue(code)) { 095 TrieNode node = codeTable.get(code); 096 if (node != null) { 097 return trie.getValue(node, this); 098 } else { 099 return null; 100 } 101 } else { 102 return nullValues.codeToSymbol(code); 103 } 104 } else { 105 throw new SymbolException("The symbol code '"+code+"' cannot be found in the symbol table. "); 106 } 107 } 108 109 public int getSymbolStringToCode(String symbol) throws MaltChainedException { 110 if (symbol != null) { 111 if (nullValues == null || !nullValues.isNullValue(symbol)) { 112 final Integer entry = trie.getEntry(symbol, this); 113 if (entry != null) { 114 return entry.intValue(); 115 } else { 116 return -1; 117 } 118 } else { 119 return nullValues.symbolToCode(symbol); 120 } 121 } else { 122 throw new SymbolException("The symbol code '"+symbol+"' cannot be found in the symbol table. "); 123 } 124 } 125 126 public void clearTmpStorage() { 127 128 } 129 130 public String getNullValueStrategy() { 131 if (nullValues == null) { 132 return null; 133 } 134 return nullValues.getNullValueStrategy(); 135 } 136 137 138 public int getColumnCategory() { 139 return columnCategory; 140 } 141 142 public String printSymbolTable() throws MaltChainedException { 143 StringBuilder sb = new StringBuilder(); 144 for (Integer code : codeTable.keySet()) { 145 sb.append(code+"\t"+trie.getValue(codeTable.get(code), this)+"\n"); 146 } 147 return sb.toString(); 148 } 149 150 public void saveHeader(BufferedWriter out) throws MaltChainedException { 151 try { 152 out.append('\t'); 153 out.append(getName()); 154 out.append('\t'); 155 out.append(Integer.toString(getColumnCategory())); 156 out.append('\t'); 157 out.append(getNullValueStrategy()); 158 out.append('\n'); 159 } catch (IOException e) { 160 throw new SymbolException("Could not save the symbol table. ", e); 161 } 162 } 163 164 public int size() { 165 return codeTable.size(); 166 } 167 168 169 public void save(BufferedWriter out) throws MaltChainedException { 170 try { 171 out.write(name); 172 out.write('\n'); 173 for (Integer code : codeTable.keySet()) { 174 out.write(code+""); 175 out.write('\t'); 176 out.write(trie.getValue(codeTable.get(code), this)); 177 out.write('\n'); 178 } 179 out.write('\n'); 180 } catch (IOException e) { 181 throw new SymbolException("Could not save the symbol table. ", e); 182 } 183 } 184 185 public void load(BufferedReader in) throws MaltChainedException { 186 int max = 0; 187 int index = 0; 188 String fileLine; 189 try { 190 while ((fileLine = in.readLine()) != null) { 191 if (fileLine.length() == 0 || (index = fileLine.indexOf('\t')) == -1) { 192 setValueCounter(max+1); 193 break; 194 } 195 int code = Integer.parseInt(fileLine.substring(0,index)); 196 final String str = fileLine.substring(index+1); 197 final TrieNode node = trie.addValue(str, this, code); 198 codeTable.put(node.getEntry(this), node); 199 if (max < code) { 200 max = code; 201 } 202 } 203 } catch (NumberFormatException e) { 204 throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the first column. ", e); 205 } catch (IOException e) { 206 throw new SymbolException("Could not load the symbol table. ", e); 207 } 208 } 209 210 public String getName() { 211 return name; 212 } 213 214 public int getValueCounter() { 215 return valueCounter; 216 } 217 218 private void setValueCounter(int valueCounter) { 219 this.valueCounter = valueCounter; 220 } 221 222 protected void updateValueCounter(int code) { 223 if (code > valueCounter) { 224 valueCounter = code; 225 } 226 } 227 228 protected int increaseValueCounter() { 229 return valueCounter++; 230 } 231 232 public int getNullValueCode(NullValueId nullValueIdentifier) throws MaltChainedException { 233 if (nullValues == null) { 234 throw new SymbolException("The symbol table does not have any null-values. "); 235 } 236 return nullValues.nullvalueToCode(nullValueIdentifier); 237 } 238 239 public String getNullValueSymbol(NullValueId nullValueIdentifier) throws MaltChainedException { 240 if (nullValues == null) { 241 throw new SymbolException("The symbol table does not have any null-values. "); 242 } 243 return nullValues.nullvalueToSymbol(nullValueIdentifier); 244 } 245 246 public boolean isNullValue(String symbol) throws MaltChainedException { 247 if (nullValues != null) { 248 return nullValues.isNullValue(symbol); 249 } 250 return false; 251 } 252 253 public boolean isNullValue(int code) throws MaltChainedException { 254 if (nullValues != null) { 255 return nullValues.isNullValue(code); 256 } 257 return false; 258 } 259 260// public void copy(SymbolTable fromTable) throws MaltChainedException { 261// final SortedMap<Integer, TrieNode> fromCodeTable = ((TrieSymbolTable)fromTable).getCodeTable(); 262// int max = getValueCounter()-1; 263// for (Integer code : fromCodeTable.keySet()) { 264// final String str = trie.getValue(fromCodeTable.get(code), this); 265// final TrieNode node = trie.addValue(str, this, code); 266// codeTable.put(node.getEntry(this), node); //.getCode(), node); 267// if (max < code) { 268// max = code; 269// } 270// } 271// setValueCounter(max+1); 272// } 273 274 public SortedMap<Integer, TrieNode> getCodeTable() { 275 return codeTable; 276 } 277 278 public Set<Integer> getCodes() { 279 return codeTable.keySet(); 280 } 281 282 protected Trie getTrie() { 283 return trie; 284 } 285 286 public boolean equals(Object obj) { 287 if (this == obj) 288 return true; 289 if (obj == null) 290 return false; 291 if (getClass() != obj.getClass()) 292 return false; 293 final TrieSymbolTable other = (TrieSymbolTable)obj; 294 return ((name == null) ? other.name == null : name.equals(other.name)); 295 } 296 297 public int hashCode() { 298 if (cachedHash == 0) { 299 cachedHash = 217 + (null == name ? 0 : name.hashCode()); 300 } 301 return cachedHash; 302 } 303 304 public String toString() { 305 final StringBuilder sb = new StringBuilder(); 306 sb.append(name); 307 sb.append(' '); 308 sb.append(valueCounter); 309 return sb.toString(); 310 } 311}