001package org.maltparser.core.symbol.hash; 002 003import java.io.BufferedReader; 004import java.io.BufferedWriter; 005import java.io.IOException; 006import java.util.Map; 007import java.util.Set; 008import java.util.SortedMap; 009import java.util.TreeMap; 010import java.util.regex.Pattern; 011 012 013import org.maltparser.core.exception.MaltChainedException; 014import org.maltparser.core.helper.HashMap; 015import org.maltparser.core.io.dataformat.ColumnDescription; 016import org.maltparser.core.symbol.SymbolException; 017import org.maltparser.core.symbol.SymbolTable; 018import org.maltparser.core.symbol.nullvalue.InputNullValues; 019import org.maltparser.core.symbol.nullvalue.NullValues; 020import org.maltparser.core.symbol.nullvalue.OutputNullValues; 021import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId; 022 023 024public final class HashSymbolTable implements SymbolTable { 025 private final String name; 026 private final Map<String, Integer> symbolCodeMap; 027 private final SortedMap<Integer, String> codeSymbolMap; 028 private final NullValues nullValues; 029 private final int columnCategory; 030 private int valueCounter; 031 032 public HashSymbolTable(String name, int columnCategory, String nullValueStrategy) throws MaltChainedException { 033 this.name = name; 034 this.columnCategory = columnCategory; 035 this.symbolCodeMap = new HashMap<String, Integer>(); 036 this.codeSymbolMap = new TreeMap<Integer, String>(); 037 if (columnCategory == ColumnDescription.INPUT) { 038 this.nullValues = new InputNullValues(nullValueStrategy, this); 039 } else if (columnCategory == ColumnDescription.DEPENDENCY_EDGE_LABEL) { 040 this.nullValues = new OutputNullValues(nullValueStrategy, this); 041 } else { 042 this.nullValues = new InputNullValues(nullValueStrategy, this); 043 } 044 this.valueCounter = nullValues.getNextCode(); 045 } 046 047 public HashSymbolTable(String name) { 048 this.name = name; 049 this.columnCategory = -1; 050 this.symbolCodeMap = new HashMap<String, Integer>(); 051 this.codeSymbolMap = new TreeMap<Integer, String>(); 052 this.nullValues = new InputNullValues("one", this); 053 this.valueCounter = 1; 054 } 055 056 public int addSymbol(String symbol) throws MaltChainedException { 057 if (nullValues == null || !nullValues.isNullValue(symbol)) { 058 if (symbol == null || symbol.length() == 0) { 059 throw new SymbolException("Symbol table error: empty string cannot be added to the symbol table"); 060 } 061 062 if (!symbolCodeMap.containsKey(symbol)) { 063 int code = valueCounter; 064 symbolCodeMap.put(symbol, code); 065 codeSymbolMap.put(code, symbol); 066 valueCounter++; 067 return code; 068 } else { 069 return symbolCodeMap.get(symbol); 070 } 071 } else { 072 return nullValues.symbolToCode(symbol); 073 } 074 075 } 076 077 public String getSymbolCodeToString(int code) throws MaltChainedException { 078 if (code >= 0) { 079 if (nullValues == null || !nullValues.isNullValue(code)) { 080 if (codeSymbolMap.containsKey(code)) { 081 return codeSymbolMap.get(code); 082 } else { 083 return null; 084 } 085 } else { 086 return nullValues.codeToSymbol(code); 087 } 088 } else { 089 throw new SymbolException("The symbol code '"+code+"' cannot be found in the symbol table. "); 090 } 091 } 092 093 public int getSymbolStringToCode(String symbol) throws MaltChainedException { 094 if (symbol != null) { 095 if (nullValues == null || !nullValues.isNullValue(symbol)) { 096 if (symbolCodeMap.containsKey(symbol)) { 097 return symbolCodeMap.get(symbol); 098 } else { 099 return -1; 100 } 101 } else { 102 return nullValues.symbolToCode(symbol); 103 } 104 } else { 105 throw new SymbolException("The symbol code '"+symbol+"' cannot be found in the symbol table. "); 106 } 107 } 108 109 public String printSymbolTable() throws MaltChainedException { 110 StringBuilder sb = new StringBuilder(); 111 for (Integer code : codeSymbolMap.keySet()) { 112 sb.append(code+"\t"+codeSymbolMap.get(code)+"\n"); 113 } 114 return sb.toString(); 115 } 116 117 public void saveHeader(BufferedWriter out) throws MaltChainedException { 118 try { 119 out.append('\t'); 120 out.append(getName()); 121 out.append('\t'); 122 out.append(Integer.toString(getColumnCategory())); 123 out.append('\t'); 124 out.append(getNullValueStrategy()); 125 out.append('\n'); 126 } catch (IOException e) { 127 throw new SymbolException("Could not save the symbol table. ", e); 128 } 129 } 130 131 public int getColumnCategory() { 132 return columnCategory; 133 } 134 135 public String getNullValueStrategy() { 136 if (nullValues == null) { 137 return null; 138 } 139 return nullValues.getNullValueStrategy(); 140 } 141 142 public int size() { 143 return symbolCodeMap.size(); 144 } 145 146 public void save(BufferedWriter out) throws MaltChainedException { 147 try { 148 out.write(name); 149 out.write('\n'); 150 for (Integer code : codeSymbolMap.keySet()) { 151 out.write(Integer.toString(code)); 152 out.write('\t'); 153 out.write(codeSymbolMap.get(code)); 154 out.write('\n'); 155 } 156 out.write('\n'); 157 } catch (IOException e) { 158 throw new SymbolException("Could not save the symbol table. ", e); 159 } 160 } 161 162 public void load(BufferedReader in) throws MaltChainedException { 163 int max = 0; 164 String fileLine; 165 Pattern splitPattern = Pattern.compile("\t"); 166 try { 167 while ((fileLine = in.readLine()) != null) { 168 if (fileLine.length() == 0) { 169 valueCounter = max+1; 170 break; 171 } 172 String[] items = splitPattern.split(fileLine); 173 int code = Integer.parseInt(items[0]); 174 symbolCodeMap.put(items[1], code); 175 codeSymbolMap.put(code, items[1]); 176 if (max < code) { 177 max = code; 178 } 179 } 180 } catch (NumberFormatException e) { 181 throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the first column. ", e); 182 } catch (IOException e) { 183 throw new SymbolException("Could not load the symbol table. ", e); 184 } 185 } 186 187 public String getName() { 188 return name; 189 } 190 191 public int getValueCounter() { 192 return valueCounter; 193 } 194 195 public int getNullValueCode(NullValueId nullValueIdentifier) throws MaltChainedException { 196 if (nullValues == null) { 197 throw new SymbolException("The symbol table does not have any null-values. "); 198 } 199 return nullValues.nullvalueToCode(nullValueIdentifier); 200 } 201 202 public String getNullValueSymbol(NullValueId nullValueIdentifier) throws MaltChainedException { 203 if (nullValues == null) { 204 throw new SymbolException("The symbol table does not have any null-values. "); 205 } 206 return nullValues.nullvalueToSymbol(nullValueIdentifier); 207 } 208 209 public boolean isNullValue(String symbol) throws MaltChainedException { 210 if (nullValues != null) { 211 return nullValues.isNullValue(symbol); 212 } 213 return false; 214 } 215 216 public boolean isNullValue(int code) throws MaltChainedException { 217 if (nullValues != null) { 218 return nullValues.isNullValue(code); 219 } 220 return false; 221 } 222 223 public Set<Integer> getCodes() { 224 return codeSymbolMap.keySet(); 225 } 226 227 public boolean equals(Object obj) { 228 if (this == obj) 229 return true; 230 if (obj == null) 231 return false; 232 if (getClass() != obj.getClass()) 233 return false; 234 final HashSymbolTable other = (HashSymbolTable)obj; 235 return ((name == null) ? other.name == null : name.equals(other.name)); 236 } 237 238 public int hashCode() { 239 return 217 + (null == name ? 0 : name.hashCode()); 240 } 241 242 public String toString() { 243 final StringBuilder sb = new StringBuilder(); 244 sb.append(name); 245 sb.append(' '); 246 sb.append(valueCounter); 247 return sb.toString(); 248 } 249}