001package org.maltparser.core.symbol.hash;
002
003import java.io.BufferedReader;
004import java.io.BufferedWriter;
005import java.io.IOException;
006import java.util.Map;
007import java.util.Set;
008
009import org.maltparser.core.exception.MaltChainedException;
010import org.maltparser.core.helper.HashMap;
011import org.maltparser.core.symbol.SymbolException;
012import org.maltparser.core.symbol.SymbolTable;
013import org.maltparser.core.symbol.nullvalue.InputNullValues;
014import org.maltparser.core.symbol.nullvalue.NullValues;
015import org.maltparser.core.symbol.nullvalue.OutputNullValues;
016import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId;
017
018
019public final class HashSymbolTable implements SymbolTable {
020        private final String name;
021        private final Map<String, Integer> symbolCodeMap;
022        private final Map<Integer, String> codeSymbolMap;
023        private final Map<String, Double> symbolValueMap;
024        private final NullValues nullValues;
025        private final int category;
026        private final int type;
027        private int valueCounter;
028        
029        public HashSymbolTable(String _name, int _category, int _type, String nullValueStrategy) throws MaltChainedException {
030                this.name = _name;
031                this.category = _category;
032                this.type = _type;
033                this.symbolCodeMap = new HashMap<String, Integer>();
034                this.codeSymbolMap = new HashMap<Integer, String>();
035                this.symbolValueMap = new HashMap<String, Double>();
036                if (this.category == SymbolTable.OUTPUT) {
037                        this.nullValues = new OutputNullValues(nullValueStrategy, this);
038                } else {
039                        this.nullValues = new InputNullValues(nullValueStrategy, this);
040                }
041                this.valueCounter = nullValues.getNextCode();
042        }
043        
044        public HashSymbolTable(String _name) { 
045                this.name = _name;
046                this.category = SymbolTable.NA;
047                this.type = SymbolTable.STRING;
048                this.symbolCodeMap = new HashMap<String, Integer>();
049                this.codeSymbolMap = new HashMap<Integer, String>();
050                this.symbolValueMap = new HashMap<String, Double>();
051                this.nullValues = new InputNullValues("one", this);
052                this.valueCounter = 1;
053        }
054        
055        public int addSymbol(String symbol) throws MaltChainedException {
056                if (nullValues == null || !nullValues.isNullValue(symbol)) {
057                        if (symbol == null || symbol.length() == 0) {
058                                throw new SymbolException("Symbol table error: empty string cannot be added to the symbol table");
059                        }
060
061                        if (this.type == SymbolTable.REAL) {
062                                addSymbolValue(symbol);
063                        }
064                        if (!symbolCodeMap.containsKey(symbol)) {
065                                int code = valueCounter;
066                                symbolCodeMap.put(symbol, code);
067                                codeSymbolMap.put(code, symbol);
068                                valueCounter++;
069                                return code;
070                        } else {
071                                return symbolCodeMap.get(symbol);
072                        }
073                } else {
074                        return nullValues.symbolToCode(symbol);
075                }
076        }
077        
078        public double addSymbolValue(String symbol) throws MaltChainedException {
079                if (!symbolValueMap.containsKey(symbol)) {
080                        Double value = Double.valueOf(symbol);
081                        symbolValueMap.put(symbol, value);
082                        return value;
083                } else {
084                        return symbolValueMap.get(symbol);
085                }
086        }
087        
088        public String getSymbolCodeToString(int code) throws MaltChainedException {
089                if (code >= 0) {
090                        if (nullValues == null || !nullValues.isNullValue(code)) {
091                                return codeSymbolMap.get(code);
092                        } else {
093                                return nullValues.codeToSymbol(code);
094                        }
095                } else {
096                        throw new SymbolException("The symbol code '"+code+"' cannot be found in the symbol table. ");
097                }
098        }
099        
100        public int getSymbolStringToCode(String symbol) throws MaltChainedException {
101                if (symbol != null) {
102                        if (nullValues == null || !nullValues.isNullValue(symbol)) {
103                                Integer value = symbolCodeMap.get(symbol);
104                                return (value != null) ? value.intValue() : -1; 
105                        } else {
106                                return nullValues.symbolToCode(symbol);
107                        }
108                } else {
109                        throw new SymbolException("The symbol code '"+symbol+"' cannot be found in the symbol table. ");
110                }
111        }
112        
113        public double getSymbolStringToValue(String symbol) throws MaltChainedException {
114                if (symbol != null) {
115                        if (type == SymbolTable.REAL && nullValues == null || !nullValues.isNullValue(symbol)) {
116                                Double value = symbolValueMap.get(symbol);
117                                return (value != null) ? value.doubleValue() : Double.parseDouble(symbol); 
118                        } else {
119                                return 1.0;
120                        }
121                } else {
122                        throw new SymbolException("The symbol code '"+symbol+"' cannot be found in the symbol table. ");
123                }
124        }
125        
126        public void saveHeader(BufferedWriter out) throws MaltChainedException  {
127                try {
128                        out.append('\t');
129                        out.append(getName());
130                        out.append('\t');
131                        out.append(Integer.toString(getCategory()));
132                        out.append('\t');
133                        out.append(Integer.toString(getType()));
134                        out.append('\t');
135                        out.append(getNullValueStrategy());
136                        out.append('\n');
137                } catch (IOException e) {
138                        throw new SymbolException("Could not save the symbol table. ", e);
139                }
140        }
141        
142        public int getCategory() {
143                return category;
144        }
145        
146        public int getType() {
147                return type;
148        }
149        
150        public String getNullValueStrategy() {
151                if (nullValues == null) {
152                        return null;
153                }
154                return nullValues.getNullValueStrategy();
155        }
156        
157        public int size() {
158                return symbolCodeMap.size();
159        }
160        
161        public void save(BufferedWriter out) throws MaltChainedException  {
162                try {
163                        out.write(name);
164                        out.write('\n');
165                        if (this.type != SymbolTable.REAL) {
166                                // TODO sort codes before writing due to change from TreeMap to HashMap
167                                for (Integer code : codeSymbolMap.keySet()) {
168                                        out.write(Integer.toString(code));
169                                        out.write('\t');
170                                        out.write(codeSymbolMap.get(code));
171                                        out.write('\n');
172                                }
173                        } else {
174                                for (String symbol : symbolValueMap.keySet()) {
175                                        out.write(1);
176                                        out.write('\t');
177                                        out.write(symbol);
178                                        out.write('\n');
179                                }
180                        }
181                        out.write('\n');
182                } catch (IOException e) {
183                        throw new SymbolException("Could not save the symbol table. ", e);
184                }
185        }
186
187        public void load(BufferedReader in) throws MaltChainedException {       
188                int max = 0;
189                String fileLine;
190                try {
191                        while ((fileLine = in.readLine()) != null) {
192                                int index;
193                                if (fileLine.length() == 0 || (index = fileLine.indexOf('\t')) == -1) {
194                                        valueCounter = max+1;
195                                        break;
196                                }
197                                
198                                if (this.type != SymbolTable.REAL) {
199                                        int code;
200                                    try {
201                                        code = Integer.parseInt(fileLine.substring(0,index));
202                                        } catch (NumberFormatException e) {
203                                                throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the first column. ", e);
204                                        }
205                                    final String symbol = fileLine.substring(index+1);
206                                        symbolCodeMap.put(symbol, code);
207                                        codeSymbolMap.put(code, symbol);
208                                                        
209                                        if (max < code) {
210                                                max = code;
211                                        }
212                                } else {
213                                    final String symbol = fileLine.substring(index+1);
214                                        symbolValueMap.put(symbol, Double.parseDouble(symbol));
215                                        
216                                        max = 1;
217                                }
218                        }
219                } catch (IOException e) {
220                        throw new SymbolException("Could not load the symbol table. ", e);
221                }
222        }
223        
224        public String getName() {
225                return name;
226        }
227
228        public int getValueCounter() {
229                return valueCounter;
230        }
231
232        public int getNullValueCode(NullValueId nullValueIdentifier) throws MaltChainedException {
233                if (nullValues == null) {
234                        throw new SymbolException("The symbol table does not have any null-values. ");
235                }
236                return nullValues.nullvalueToCode(nullValueIdentifier);
237        }
238        
239        public String getNullValueSymbol(NullValueId nullValueIdentifier) throws MaltChainedException {
240                if (nullValues == null) {
241                        throw new SymbolException("The symbol table does not have any null-values. ");
242                }
243                return nullValues.nullvalueToSymbol(nullValueIdentifier);
244        }
245        
246        public boolean isNullValue(String symbol) throws MaltChainedException {
247                if (nullValues != null) {
248                        return nullValues.isNullValue(symbol);
249                } 
250                return false;
251        }
252        
253        public boolean isNullValue(int code) throws MaltChainedException {
254                if (nullValues != null) {
255                        return nullValues.isNullValue(code);
256                } 
257                return false;
258        }
259        
260        public Set<Integer> getCodes() {
261                return codeSymbolMap.keySet();
262        }
263        
264        public boolean equals(Object obj) {
265                if (this == obj)
266                        return true;
267                if (obj == null)
268                        return false;
269                if (getClass() != obj.getClass())
270                        return false;
271                final HashSymbolTable other = (HashSymbolTable)obj;
272                return ((name == null) ? other.name == null : name.equals(other.name));
273        }
274
275        public int hashCode() {
276                return 217 + (null == name ? 0 : name.hashCode());
277        }
278        
279        public String toString() {
280                final StringBuilder sb = new StringBuilder();
281                sb.append(name);
282                sb.append(' ');
283                sb.append(valueCounter);
284                return sb.toString();
285        }
286}