001package org.maltparser.core.symbol.trie; 002 003import java.io.BufferedReader; 004import java.io.BufferedWriter; 005import java.io.IOException; 006import java.util.Set; 007import java.util.SortedMap; 008import java.util.TreeMap; 009 010import org.maltparser.core.exception.MaltChainedException; 011import org.maltparser.core.symbol.SymbolException; 012import org.maltparser.core.symbol.SymbolTable; 013import org.maltparser.core.symbol.nullvalue.InputNullValues; 014import org.maltparser.core.symbol.nullvalue.NullValues; 015import org.maltparser.core.symbol.nullvalue.OutputNullValues; 016import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId; 017/** 018 019@author Johan Hall 020@since 1.0 021*/ 022public class TrieSymbolTable implements SymbolTable { 023 private final String name; 024 private final Trie trie; 025 private final SortedMap<Integer, TrieNode> codeTable; 026 private int category; 027 private final NullValues nullValues; 028 private int valueCounter; 029 /** Cache the hash code for the symbol table */ 030 private int cachedHash; 031 032 033 public TrieSymbolTable(String _name, Trie _trie, int _category, String nullValueStrategy) throws MaltChainedException { 034 this.name = _name; 035 this.trie = _trie; 036 this.category = _category; 037 codeTable = new TreeMap<Integer, TrieNode>(); 038 if (this.category != SymbolTable.OUTPUT) { 039 nullValues = new OutputNullValues(nullValueStrategy, this); 040 } else { 041 nullValues = new InputNullValues(nullValueStrategy, this); 042 } 043 valueCounter = nullValues.getNextCode(); 044 } 045 046 public TrieSymbolTable(String _name, Trie trie) { 047 this.name = _name; 048 this.trie = trie; 049 codeTable = new TreeMap<Integer, TrieNode>(); 050 nullValues = new InputNullValues("one", this); 051 valueCounter = 1; 052 } 053 054 public int addSymbol(String symbol) throws MaltChainedException { 055 if (nullValues == null || !nullValues.isNullValue(symbol)) { 056 if (symbol == null || symbol.length() == 0) { 057 throw new SymbolException("Symbol table error: empty string cannot be added to the symbol table"); 058 } 059 060 final TrieNode node = trie.addValue(symbol, this, -1); 061 final int code = node.getEntry(this); 062 if (!codeTable.containsKey(code)) { 063 codeTable.put(code, node); 064 } 065 return code; 066 } else { 067 return nullValues.symbolToCode(symbol); 068 } 069 } 070 071 public String getSymbolCodeToString(int code) throws MaltChainedException { 072 if (code >= 0) { 073 if (nullValues == null || !nullValues.isNullValue(code)) { 074 TrieNode node = codeTable.get(code); 075 if (node != null) { 076 return trie.getValue(node, this); 077 } else { 078 return null; 079 } 080 } else { 081 return nullValues.codeToSymbol(code); 082 } 083 } else { 084 throw new SymbolException("The symbol code '"+code+"' cannot be found in the symbol table. "); 085 } 086 } 087 088 public int getSymbolStringToCode(String symbol) throws MaltChainedException { 089 if (symbol != null) { 090 if (nullValues == null || !nullValues.isNullValue(symbol)) { 091 final Integer entry = trie.getEntry(symbol, this); 092 if (entry != null) { 093 return entry.intValue(); 094 } else { 095 return -1; 096 } 097 } else { 098 return nullValues.symbolToCode(symbol); 099 } 100 } else { 101 throw new SymbolException("The symbol code '"+symbol+"' cannot be found in the symbol table. "); 102 } 103 } 104 105 public double getSymbolStringToValue(String symbol) throws MaltChainedException { 106 if (symbol == null) { 107 throw new SymbolException("The symbol code '"+symbol+"' cannot be found in the symbol table. "); 108 } 109 110 return 1.0; 111 } 112 public void clearTmpStorage() { 113 114 } 115 116 public String getNullValueStrategy() { 117 if (nullValues == null) { 118 return null; 119 } 120 return nullValues.getNullValueStrategy(); 121 } 122 123 124 public int getCategory() { 125 return category; 126 } 127 128 public void saveHeader(BufferedWriter out) throws MaltChainedException { 129 try { 130 out.append('\t'); 131 out.append(getName()); 132 out.append('\t'); 133 out.append(Integer.toString(getCategory())); 134 out.append('\t'); 135 out.append(Integer.toString(SymbolTable.STRING)); 136 out.append('\t'); 137 out.append(getNullValueStrategy()); 138 out.append('\n'); 139 } catch (IOException e) { 140 throw new SymbolException("Could not save the symbol table. ", e); 141 } 142 } 143 144 public int size() { 145 return codeTable.size(); 146 } 147 148 149 public void save(BufferedWriter out) throws MaltChainedException { 150 try { 151 out.write(name); 152 out.write('\n'); 153 for (Integer code : codeTable.keySet()) { 154 out.write(code+""); 155 out.write('\t'); 156 out.write(trie.getValue(codeTable.get(code), this)); 157 out.write('\n'); 158 } 159 out.write('\n'); 160 } catch (IOException e) { 161 throw new SymbolException("Could not save the symbol table. ", e); 162 } 163 } 164 165 public void load(BufferedReader in) throws MaltChainedException { 166 int max = 0; 167 int index = 0; 168 String fileLine; 169 try { 170 while ((fileLine = in.readLine()) != null) { 171 if (fileLine.length() == 0 || (index = fileLine.indexOf('\t')) == -1) { 172 setValueCounter(max+1); 173 break; 174 } 175 int code = Integer.parseInt(fileLine.substring(0,index)); 176 final String str = fileLine.substring(index+1); 177 final TrieNode node = trie.addValue(str, this, code); 178 codeTable.put(node.getEntry(this), node); 179 if (max < code) { 180 max = code; 181 } 182 } 183 } catch (NumberFormatException e) { 184 throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the first column. ", e); 185 } catch (IOException e) { 186 throw new SymbolException("Could not load the symbol table. ", e); 187 } 188 } 189 190 public String getName() { 191 return name; 192 } 193 194 public int getValueCounter() { 195 return valueCounter; 196 } 197 198 private void setValueCounter(int valueCounter) { 199 this.valueCounter = valueCounter; 200 } 201 202 protected void updateValueCounter(int code) { 203 if (code > valueCounter) { 204 valueCounter = code; 205 } 206 } 207 208 protected int increaseValueCounter() { 209 return valueCounter++; 210 } 211 212 public int getNullValueCode(NullValueId nullValueIdentifier) throws MaltChainedException { 213 if (nullValues == null) { 214 throw new SymbolException("The symbol table does not have any null-values. "); 215 } 216 return nullValues.nullvalueToCode(nullValueIdentifier); 217 } 218 219 public String getNullValueSymbol(NullValueId nullValueIdentifier) throws MaltChainedException { 220 if (nullValues == null) { 221 throw new SymbolException("The symbol table does not have any null-values. "); 222 } 223 return nullValues.nullvalueToSymbol(nullValueIdentifier); 224 } 225 226 public boolean isNullValue(String symbol) throws MaltChainedException { 227 if (nullValues != null) { 228 return nullValues.isNullValue(symbol); 229 } 230 return false; 231 } 232 233 public boolean isNullValue(int code) throws MaltChainedException { 234 if (nullValues != null) { 235 return nullValues.isNullValue(code); 236 } 237 return false; 238 } 239 240// public void copy(SymbolTable fromTable) throws MaltChainedException { 241// final SortedMap<Integer, TrieNode> fromCodeTable = ((TrieSymbolTable)fromTable).getCodeTable(); 242// int max = getValueCounter()-1; 243// for (Integer code : fromCodeTable.keySet()) { 244// final String str = trie.getValue(fromCodeTable.get(code), this); 245// final TrieNode node = trie.addValue(str, this, code); 246// codeTable.put(node.getEntry(this), node); //.getCode(), node); 247// if (max < code) { 248// max = code; 249// } 250// } 251// setValueCounter(max+1); 252// } 253 254 public SortedMap<Integer, TrieNode> getCodeTable() { 255 return codeTable; 256 } 257 258 public Set<Integer> getCodes() { 259 return codeTable.keySet(); 260 } 261 262 protected Trie getTrie() { 263 return trie; 264 } 265 266 public boolean equals(Object obj) { 267 if (this == obj) 268 return true; 269 if (obj == null) 270 return false; 271 if (getClass() != obj.getClass()) 272 return false; 273 final TrieSymbolTable other = (TrieSymbolTable)obj; 274 return ((name == null) ? other.name == null : name.equals(other.name)); 275 } 276 277 public int hashCode() { 278 if (cachedHash == 0) { 279 cachedHash = 217 + (null == name ? 0 : name.hashCode()); 280 } 281 return cachedHash; 282 } 283 284 public String toString() { 285 final StringBuilder sb = new StringBuilder(); 286 sb.append(name); 287 sb.append(' '); 288 sb.append(valueCounter); 289 return sb.toString(); 290 } 291}