001    package org.maltparser.core.symbol.trie;
002    
003    import java.io.BufferedReader;
004    import java.io.BufferedWriter;
005    import java.io.IOException;
006    import java.util.Set;
007    import java.util.SortedMap;
008    import java.util.TreeMap;
009    
010    import org.apache.log4j.Logger;
011    import org.maltparser.core.helper.HashMap;
012    import org.maltparser.core.exception.MaltChainedException;
013    import org.maltparser.core.io.dataformat.ColumnDescription;
014    import org.maltparser.core.symbol.SymbolException;
015    import org.maltparser.core.symbol.SymbolTable;
016    import org.maltparser.core.symbol.nullvalue.InputNullValues;
017    import org.maltparser.core.symbol.nullvalue.NullValues;
018    import org.maltparser.core.symbol.nullvalue.OutputNullValues;
019    import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId;
020    /**
021    
022    @author Johan Hall
023    @since 1.0
024    */
025    public class TrieSymbolTable implements SymbolTable {
026            private final String name;
027            private final Trie trie;
028            private final SortedMap<Integer, TrieNode> codeTable;
029            private int columnCategory;
030            private final NullValues nullValues;
031            private int valueCounter;
032        /** Cache the hash code for the symbol table */
033        private int cachedHash;
034        
035            /** Special treatment during parsing */
036            private final int symbolTableMode;
037            private HashMap<String, Integer> tmpStorageStrIntMap;
038            private HashMap<Integer, String> tmpStorageIntStrMap;
039            private int tmpStorageValueCounter;
040        
041            public TrieSymbolTable(String name, Trie trie, int columnCategory, String nullValueStrategy, int symbolTableMode) throws MaltChainedException {
042                    this.name = name;
043                    this.trie = trie;
044                    this.columnCategory = columnCategory;
045                    
046                    codeTable = new TreeMap<Integer, TrieNode>();
047                    if (columnCategory == ColumnDescription.INPUT) {
048                            nullValues = new InputNullValues(nullValueStrategy, this);
049                    } else if (columnCategory == ColumnDescription.DEPENDENCY_EDGE_LABEL) {
050                            nullValues = new OutputNullValues(nullValueStrategy, this);
051                    } else {
052                            nullValues = new InputNullValues(nullValueStrategy, this);
053                    }
054                    valueCounter = nullValues.getNextCode();
055                    
056                    this.symbolTableMode = symbolTableMode;
057                    if (this.symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TMP_STORAGE) {
058                            tmpStorageStrIntMap = new HashMap<String, Integer>();
059                            tmpStorageIntStrMap = new HashMap<Integer, String>();
060                            tmpStorageValueCounter = -1;
061                    }
062            }
063            
064            public TrieSymbolTable(String name, Trie trie, int symbolTableMode) {
065                    this.name = name;
066                    this.trie = trie;
067                    codeTable = new TreeMap<Integer, TrieNode>();
068                    nullValues = new InputNullValues("one", this);
069                    valueCounter = 1;
070                    this.symbolTableMode = symbolTableMode;
071                    if (this.symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TMP_STORAGE) {
072                            tmpStorageStrIntMap = new HashMap<String, Integer>();
073                            tmpStorageIntStrMap = new HashMap<Integer, String>();
074                            tmpStorageValueCounter = -1;
075                    }
076            }
077            
078            public int addSymbol(String symbol) throws MaltChainedException {
079                    if (nullValues == null || !nullValues.isNullValue(symbol)) {
080                            if (symbol == null || symbol.length() == 0) {
081                                    throw new SymbolException("Symbol table error: empty string cannot be added to the symbol table");
082                            }
083                            
084                            if (this.symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TRIE) {
085                                    final TrieNode node = trie.addValue(symbol, this, -1);
086                                    final int code = node.getEntry(this); 
087                                    if (!codeTable.containsKey(code)) {
088                                            codeTable.put(code, node);
089                                    }
090                                    return code;
091                            } else { // this.symbolTableMode == ADD_NEW_TO_TMP_STORAGE      
092                                    Integer entry = trie.getEntry(symbol, this);
093                                    if (entry != null) {
094                                            return entry.intValue();
095                                    }
096                                    if (!tmpStorageStrIntMap.containsKey(symbol)) {
097    //                                      System.out.println("!tmpStorageStrIntMap.containsKey(symbol) : " + this.getName() + ": " + symbol.toString());
098                                            if (tmpStorageValueCounter == -1) {
099                                                    tmpStorageValueCounter = valueCounter + 1;
100                                            } else {
101                                                    tmpStorageValueCounter++;
102                                            }
103                                            tmpStorageStrIntMap.put(symbol, tmpStorageValueCounter);
104                                            tmpStorageIntStrMap.put(tmpStorageValueCounter, symbol);
105                                            return tmpStorageValueCounter;
106                                    } else {
107                                            return tmpStorageStrIntMap.get(symbol);
108                                    }
109                            } 
110                    } else {
111                            return nullValues.symbolToCode(symbol);
112                    }
113            }
114            
115            public int addSymbol(StringBuilder symbol) throws MaltChainedException {
116                    if (nullValues == null || !nullValues.isNullValue(symbol)) {
117                            if (symbol == null || symbol.length() == 0) {
118                                    throw new SymbolException("Symbol table error: empty string cannot be added to the symbol table");
119                            }
120                            
121                            if (this.symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TRIE) {
122                                    final TrieNode node = trie.addValue(symbol, this, -1);
123                                    final int code = node.getEntry(this);
124                                    if (!codeTable.containsKey(code)) {
125                                            codeTable.put(code, node);
126                                    }
127                                    return code;
128                            } else { // this.symbolTableMode == ADD_NEW_TO_TMP_STORAGE
129                                    Integer entry = trie.getEntry(symbol.toString(), this);
130                                    
131                                    if (entry != null) {
132                                            return entry.intValue();
133                                    }
134                                    if (!tmpStorageStrIntMap.containsKey(symbol)) {
135                                            if (tmpStorageValueCounter == -1) {
136                                                    tmpStorageValueCounter = valueCounter + 1;
137                                            } else {
138                                                    tmpStorageValueCounter++;
139                                            }
140                                            tmpStorageStrIntMap.put(symbol.toString(), tmpStorageValueCounter);
141                                            tmpStorageIntStrMap.put(tmpStorageValueCounter, symbol.toString());
142                                            return tmpStorageValueCounter;
143                                    } else {
144                                            return tmpStorageStrIntMap.get(symbol);
145                                    }
146                            }
147                    } else {
148                            return nullValues.symbolToCode(symbol);
149                    }
150            }
151            
152            public String getSymbolCodeToString(int code) throws MaltChainedException {
153                    if (code >= 0) {
154                            if (nullValues == null || !nullValues.isNullValue(code)) {
155                                    if (trie == null) {
156                                            throw new SymbolException("The symbol table is corrupt. ");
157                                    }
158                                    if (this.symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TRIE) {
159                                            return trie.getValue(codeTable.get(code), this);
160                                    } else {
161                                            TrieNode node = codeTable.get(code);
162                                            if (node != null) {
163                                                    return trie.getValue(node, this);
164                                            } else {
165                                                    return tmpStorageIntStrMap.get(code);
166                                            }
167                                    }
168                            } else {
169                                    return nullValues.codeToSymbol(code);
170                            }
171                    } else {
172                            throw new SymbolException("The symbol code '"+code+"' cannot be found in the symbol table. ");
173                    }
174            }
175            
176            public int getSymbolStringToCode(String symbol) throws MaltChainedException {
177                    if (symbol != null) {
178                            if (nullValues == null || !nullValues.isNullValue(symbol)) {
179                                    if (trie == null) {
180                                            throw new SymbolException("The symbol table is corrupt. ");
181                                    } 
182                                    if (this.symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TRIE) {
183                                            final Integer entry = trie.getEntry(symbol, this);
184                                            if (entry == null) {
185                                                    throw new SymbolException("Could not find the symbol '"+symbol+"' in the symbol table. ");
186                                            }
187                                            return entry.intValue(); 
188                                    } else {
189                                            final Integer entry = trie.getEntry(symbol, this);
190                                            if (entry != null) {
191                                                    return entry.intValue(); 
192                                            } else {
193                                                    Integer tmpEntry = tmpStorageStrIntMap.get(symbol);
194                                                    if (tmpEntry == null) {
195                                                            throw new SymbolException("Could not find the symbol '"+symbol+"' in the symbol table. "); 
196                                                    } 
197                                                    return tmpEntry.intValue();
198                                            }
199                                    }
200                            } else {
201                                    return nullValues.symbolToCode(symbol);
202                            }
203                    } else {
204                            throw new SymbolException("The symbol code '"+symbol+"' cannot be found in the symbol table. ");
205                    }
206            }
207    
208            public void clearTmpStorage() {
209                    if (symbolTableMode == TrieSymbolTableHandler.ADD_NEW_TO_TMP_STORAGE) {
210                            tmpStorageIntStrMap.clear();
211                            tmpStorageStrIntMap.clear();
212                            tmpStorageValueCounter = -1;
213                    }
214            }
215            public String getNullValueStrategy() {
216                    if (nullValues == null) {
217                            return null;
218                    }
219                    return nullValues.getNullValueStrategy();
220            }
221            
222            
223            public int getColumnCategory() {
224                    return columnCategory;
225            }
226            
227            public void printSymbolTable(Logger logger) throws MaltChainedException {
228                    for (Integer code : codeTable.keySet()) {
229                            logger.info(code+"\t"+trie.getValue(codeTable.get(code), this)+"\n");
230                    }
231            }
232            
233            public void saveHeader(BufferedWriter out) throws MaltChainedException  {
234                    try {
235                            out.append('\t');
236                            out.append(getName());
237                            out.append('\t');
238                            out.append(Integer.toString(getColumnCategory()));
239                            out.append('\t');
240                            out.append(getNullValueStrategy());
241                            out.append('\n');
242                    } catch (IOException e) {
243                            throw new SymbolException("Could not save the symbol table. ", e);
244                    }
245            }
246            
247            public int size() {
248                    return codeTable.size();
249            }
250            
251            
252            public void save(BufferedWriter out) throws MaltChainedException  {
253                    try {
254                            out.write(name);
255                            out.write('\n');
256                            for (Integer code : codeTable.keySet()) {
257                                    out.write(code+"");
258                                    out.write('\t');
259                                    out.write(trie.getValue(codeTable.get(code), this));
260                                    out.write('\n');
261                            }
262                            out.write('\n');
263                    } catch (IOException e) {
264                            throw new SymbolException("Could not save the symbol table. ", e);
265                    }
266            }
267            
268            public void load(BufferedReader in) throws MaltChainedException {
269                    int max = 0;
270                    int index = 0;
271                    String fileLine;
272                    try {
273                            while ((fileLine = in.readLine()) != null) {
274                                    if (fileLine.length() == 0 || (index = fileLine.indexOf('\t')) == -1) {
275                                            setValueCounter(max+1);
276                                            break;
277                                    }
278                                    int code = Integer.parseInt(fileLine.substring(0,index));
279                                    final String str = fileLine.substring(index+1);
280                                    final TrieNode node = trie.addValue(str, this, code);
281                                    codeTable.put(node.getEntry(this), node); //.getCode(), node);
282                                    if (max < code) {
283                                            max = code;
284                                    }
285                            }
286                    } catch (NumberFormatException e) {
287                            throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the first column. ", e);
288                    } catch (IOException e) {
289                            throw new SymbolException("Could not load the symbol table. ", e);
290                    }
291            }
292            
293            public String getName() {
294                    return name;
295            }
296    
297            public int getValueCounter() {
298                    return valueCounter;
299            }
300    
301            private void setValueCounter(int valueCounter) {
302                    this.valueCounter = valueCounter;
303            }
304            
305            protected void updateValueCounter(int code) {
306                    if (code > valueCounter) {
307                            valueCounter = code;
308                    }
309            }
310            
311            protected int increaseValueCounter() {
312                    return valueCounter++;
313            }
314            
315            public int getNullValueCode(NullValueId nullValueIdentifier) throws MaltChainedException {
316                    if (nullValues == null) {
317                            throw new SymbolException("The symbol table does not have any null-values. ");
318                    }
319                    return nullValues.nullvalueToCode(nullValueIdentifier);
320            }
321            
322            public String getNullValueSymbol(NullValueId nullValueIdentifier) throws MaltChainedException {
323                    if (nullValues == null) {
324                            throw new SymbolException("The symbol table does not have any null-values. ");
325                    }
326                    return nullValues.nullvalueToSymbol(nullValueIdentifier);
327            }
328            
329            public boolean isNullValue(String symbol) throws MaltChainedException {
330                    if (nullValues != null) {
331                            return nullValues.isNullValue(symbol);
332                    } 
333                    return false;
334            }
335            
336            public boolean isNullValue(int code) throws MaltChainedException {
337                    if (nullValues != null) {
338                            return nullValues.isNullValue(code);
339                    } 
340                    return false;
341            }
342            
343            public void copy(SymbolTable fromTable) throws MaltChainedException {
344                    final SortedMap<Integer, TrieNode> fromCodeTable =  ((TrieSymbolTable)fromTable).getCodeTable();
345                    int max = getValueCounter()-1;
346                    for (Integer code : fromCodeTable.keySet()) {
347                            final String str = trie.getValue(fromCodeTable.get(code), this);
348                            final TrieNode node = trie.addValue(str, this, code);
349                            codeTable.put(node.getEntry(this), node); //.getCode(), node);
350                            if (max < code) {
351                                    max = code;
352                            }
353                    }
354                    setValueCounter(max+1);
355            }
356    
357            public SortedMap<Integer, TrieNode> getCodeTable() {
358                    return codeTable;
359            }
360            
361            public Set<Integer> getCodes() {
362                    return codeTable.keySet();
363            }
364            
365            protected Trie getTrie() {
366                    return trie;
367            }
368            
369            public boolean equals(Object obj) {
370                    if (this == obj)
371                            return true;
372                    if (obj == null)
373                            return false;
374                    if (getClass() != obj.getClass())
375                            return false;
376                    final TrieSymbolTable other = (TrieSymbolTable)obj;
377                    return ((name == null) ? other.name == null : name.equals(other.name));
378            }
379    
380            public int hashCode() {
381                    if (cachedHash == 0) {
382                            cachedHash = 217 + (null == name ? 0 : name.hashCode());
383                    }
384                    return cachedHash;
385            }
386            
387            public String toString() {
388                    final StringBuilder sb = new StringBuilder();
389                    sb.append(name);
390                    sb.append(' ');
391                    sb.append(valueCounter);
392                    return sb.toString();
393            }
394    }