001package org.maltparser.core.symbol.hash;
002
003import java.io.BufferedReader;
004import java.io.BufferedWriter;
005import java.io.IOException;
006import java.util.Map;
007import java.util.Set;
008import java.util.SortedMap;
009import java.util.TreeMap;
010import java.util.regex.Pattern;
011
012
013import org.maltparser.core.exception.MaltChainedException;
014import org.maltparser.core.helper.HashMap;
015import org.maltparser.core.io.dataformat.ColumnDescription;
016import org.maltparser.core.symbol.SymbolException;
017import org.maltparser.core.symbol.SymbolTable;
018import org.maltparser.core.symbol.nullvalue.InputNullValues;
019import org.maltparser.core.symbol.nullvalue.NullValues;
020import org.maltparser.core.symbol.nullvalue.OutputNullValues;
021import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId;
022
023
024public final class HashSymbolTable implements SymbolTable {
025        private final String name;
026        private final Map<String, Integer> symbolCodeMap;
027        private final SortedMap<Integer, String> codeSymbolMap;
028        private final NullValues nullValues;
029        private final int columnCategory;
030        private int valueCounter;
031        
032        public HashSymbolTable(String name, int columnCategory, String nullValueStrategy) throws MaltChainedException {
033                this.name = name;
034                this.columnCategory = columnCategory;
035                this.symbolCodeMap = new HashMap<String, Integer>();
036                this.codeSymbolMap = new TreeMap<Integer, String>();
037                if (columnCategory == ColumnDescription.INPUT) {
038                        this.nullValues = new InputNullValues(nullValueStrategy, this);
039                } else if (columnCategory == ColumnDescription.DEPENDENCY_EDGE_LABEL) {
040                        this.nullValues = new OutputNullValues(nullValueStrategy, this);
041                } else {
042                        this.nullValues = new InputNullValues(nullValueStrategy, this);
043                }
044                this.valueCounter = nullValues.getNextCode();
045        }
046        
047        public HashSymbolTable(String name) { 
048                this.name = name;
049                this.columnCategory = -1;
050                this.symbolCodeMap = new HashMap<String, Integer>();
051                this.codeSymbolMap = new TreeMap<Integer, String>();
052                this.nullValues = new InputNullValues("one", this);
053                this.valueCounter = 1;
054        }
055        
056        public int addSymbol(String symbol) throws MaltChainedException {
057                if (nullValues == null || !nullValues.isNullValue(symbol)) {
058                        if (symbol == null || symbol.length() == 0) {
059                                throw new SymbolException("Symbol table error: empty string cannot be added to the symbol table");
060                        }
061        
062                        if (!symbolCodeMap.containsKey(symbol)) {
063                                int code = valueCounter;
064                                symbolCodeMap.put(symbol, code);
065                                codeSymbolMap.put(code, symbol);
066                                valueCounter++;
067                                return code;
068                        } else {
069                                return symbolCodeMap.get(symbol);
070                        }
071                } else {
072                        return nullValues.symbolToCode(symbol);
073                }
074
075        }
076        
077        public String getSymbolCodeToString(int code) throws MaltChainedException {
078                if (code >= 0) {
079                        if (nullValues == null || !nullValues.isNullValue(code)) {
080                                if (codeSymbolMap.containsKey(code)) {
081                                        return codeSymbolMap.get(code);
082                                } else {
083                                        return null;
084                                }
085                        } else {
086                                return nullValues.codeToSymbol(code);
087                        }
088                } else {
089                        throw new SymbolException("The symbol code '"+code+"' cannot be found in the symbol table. ");
090                }
091        }
092        
093        public int getSymbolStringToCode(String symbol) throws MaltChainedException {
094                if (symbol != null) {
095                        if (nullValues == null || !nullValues.isNullValue(symbol)) {
096                                if (symbolCodeMap.containsKey(symbol)) {
097                                        return symbolCodeMap.get(symbol);
098                                } else {
099                                        return -1;
100                                }
101                        } else {
102                                return nullValues.symbolToCode(symbol);
103                        }
104                } else {
105                        throw new SymbolException("The symbol code '"+symbol+"' cannot be found in the symbol table. ");
106                }
107        }
108        
109        public String printSymbolTable() throws MaltChainedException {
110                StringBuilder sb = new StringBuilder();
111                for (Integer code : codeSymbolMap.keySet()) {
112                        sb.append(code+"\t"+codeSymbolMap.get(code)+"\n");
113                }
114                return sb.toString();
115        }
116        
117        public void saveHeader(BufferedWriter out) throws MaltChainedException  {
118                try {
119                        out.append('\t');
120                        out.append(getName());
121                        out.append('\t');
122                        out.append(Integer.toString(getColumnCategory()));
123                        out.append('\t');
124                        out.append(getNullValueStrategy());
125                        out.append('\n');
126                } catch (IOException e) {
127                        throw new SymbolException("Could not save the symbol table. ", e);
128                }
129        }
130        
131        public int getColumnCategory() {
132                return columnCategory;
133        }
134        
135        public String getNullValueStrategy() {
136                if (nullValues == null) {
137                        return null;
138                }
139                return nullValues.getNullValueStrategy();
140        }
141        
142        public int size() {
143                return symbolCodeMap.size();
144        }
145        
146        public void save(BufferedWriter out) throws MaltChainedException  {
147                try {
148                        out.write(name);
149                        out.write('\n');
150                        for (Integer code : codeSymbolMap.keySet()) {
151                                out.write(Integer.toString(code));
152                                out.write('\t');
153                                out.write(codeSymbolMap.get(code));
154                                out.write('\n');
155                        }
156                        out.write('\n');
157                } catch (IOException e) {
158                        throw new SymbolException("Could not save the symbol table. ", e);
159                }
160        }
161        
162        public void load(BufferedReader in) throws MaltChainedException {               
163                int max = 0;
164                String fileLine;
165                Pattern splitPattern = Pattern.compile("\t");
166                try {
167                        while ((fileLine = in.readLine()) != null) {
168                                if (fileLine.length() == 0) {
169                                        valueCounter = max+1;
170                                        break;
171                                }
172                                String[] items = splitPattern.split(fileLine);
173                                int code = Integer.parseInt(items[0]);
174                                symbolCodeMap.put(items[1], code);
175                                codeSymbolMap.put(code, items[1]);
176                                if (max < code) {
177                                        max = code;
178                                }
179                        }
180                } catch (NumberFormatException e) {
181                        throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the first column. ", e);
182                } catch (IOException e) {
183                        throw new SymbolException("Could not load the symbol table. ", e);
184                }
185        }
186        
187        public String getName() {
188                return name;
189        }
190
191        public int getValueCounter() {
192                return valueCounter;
193        }
194
195        public int getNullValueCode(NullValueId nullValueIdentifier) throws MaltChainedException {
196                if (nullValues == null) {
197                        throw new SymbolException("The symbol table does not have any null-values. ");
198                }
199                return nullValues.nullvalueToCode(nullValueIdentifier);
200        }
201        
202        public String getNullValueSymbol(NullValueId nullValueIdentifier) throws MaltChainedException {
203                if (nullValues == null) {
204                        throw new SymbolException("The symbol table does not have any null-values. ");
205                }
206                return nullValues.nullvalueToSymbol(nullValueIdentifier);
207        }
208        
209        public boolean isNullValue(String symbol) throws MaltChainedException {
210                if (nullValues != null) {
211                        return nullValues.isNullValue(symbol);
212                } 
213                return false;
214        }
215        
216        public boolean isNullValue(int code) throws MaltChainedException {
217                if (nullValues != null) {
218                        return nullValues.isNullValue(code);
219                } 
220                return false;
221        }
222        
223        public Set<Integer> getCodes() {
224                return codeSymbolMap.keySet();
225        }
226        
227        public boolean equals(Object obj) {
228                if (this == obj)
229                        return true;
230                if (obj == null)
231                        return false;
232                if (getClass() != obj.getClass())
233                        return false;
234                final HashSymbolTable other = (HashSymbolTable)obj;
235                return ((name == null) ? other.name == null : name.equals(other.name));
236        }
237
238        public int hashCode() {
239                return 217 + (null == name ? 0 : name.hashCode());
240        }
241        
242        public String toString() {
243                final StringBuilder sb = new StringBuilder();
244                sb.append(name);
245                sb.append(' ');
246                sb.append(valueCounter);
247                return sb.toString();
248        }
249}