001package org.maltparser.core.symbol.hash;
002
003import java.io.BufferedReader;
004import java.io.BufferedWriter;
005import java.io.FileInputStream;
006import java.io.FileNotFoundException;
007import java.io.FileOutputStream;
008import java.io.IOException;
009import java.io.InputStreamReader;
010import java.io.OutputStreamWriter;
011import java.io.UnsupportedEncodingException;
012import java.util.Map;
013import java.util.Set;
014import java.util.regex.Pattern;
015import java.util.regex.PatternSyntaxException;
016
017import org.maltparser.core.exception.MaltChainedException;
018import org.maltparser.core.helper.HashMap;
019import org.maltparser.core.symbol.SymbolException;
020import org.maltparser.core.symbol.SymbolTable;
021import org.maltparser.core.symbol.SymbolTableHandler;
022
023
024public class HashSymbolTableHandler implements SymbolTableHandler {
025        private final Map<String,  HashSymbolTable> symbolTables;
026        
027        public HashSymbolTableHandler() { 
028                this.symbolTables = new HashMap<String, HashSymbolTable>();
029        }
030        
031        public SymbolTable addSymbolTable(String tableName) throws MaltChainedException {
032                HashSymbolTable symbolTable = symbolTables.get(tableName);
033                if (symbolTable == null) {
034                        symbolTable = new HashSymbolTable(tableName);
035                        symbolTables.put(tableName, symbolTable);
036                }
037                return symbolTable;
038        }
039        
040        public SymbolTable addSymbolTable(String tableName, int columnCategory, int columnType, String nullValueStrategy) throws MaltChainedException {
041                HashSymbolTable symbolTable = symbolTables.get(tableName);
042                if (symbolTable == null) {
043                        symbolTable = new HashSymbolTable(tableName, columnCategory, columnType, nullValueStrategy);
044                        symbolTables.put(tableName, symbolTable);
045                }
046                return symbolTable;
047        }
048        
049        public SymbolTable addSymbolTable(String tableName, SymbolTable parentTable) throws MaltChainedException {
050                HashSymbolTable symbolTable = symbolTables.get(tableName);
051                if (symbolTable == null) {
052                        HashSymbolTable hashParentTable = (HashSymbolTable)parentTable;
053                        symbolTable = new HashSymbolTable(tableName, hashParentTable.getCategory(), hashParentTable.getType(), hashParentTable.getNullValueStrategy());
054                        symbolTables.put(tableName, symbolTable);
055                }
056                return symbolTable;
057        }
058        
059        public SymbolTable getSymbolTable(String tableName) {
060                return symbolTables.get(tableName);
061        }
062        
063        public Set<String> getSymbolTableNames() {
064                return symbolTables.keySet();
065        }
066        
067        public void cleanUp() {}
068        
069        public void save(OutputStreamWriter osw) throws MaltChainedException  {
070                try {
071                        BufferedWriter bout = new BufferedWriter(osw);
072                        for (HashSymbolTable table : symbolTables.values()) {
073                                table.saveHeader(bout);
074                        }
075                        bout.write('\n');
076                        for (HashSymbolTable table : symbolTables.values()) {
077                                table.save(bout);
078                        }
079                        bout.close();
080                } catch (IOException e) {
081                        throw new SymbolException("Could not save the symbol tables. ", e);
082                }       
083        }
084        
085        public void save(String fileName, String charSet) throws MaltChainedException  {
086                try {
087                        save(new OutputStreamWriter(new FileOutputStream(fileName), charSet));
088                } catch (FileNotFoundException e) {
089                        throw new SymbolException("The symbol table file '"+fileName+"' cannot be created. ", e);
090                } catch (UnsupportedEncodingException e) {
091                        throw new SymbolException("The char set '"+charSet+"' is not supported. ", e);
092                }
093        }
094        
095        public void loadHeader(BufferedReader bin) throws MaltChainedException {
096                String fileLine = "";
097                Pattern tabPattern = Pattern.compile("\t");
098                try {
099                        while ((fileLine = bin.readLine()) != null) {
100                                if (fileLine.length() == 0 || fileLine.charAt(0) != '\t') {
101                                        break;
102                                }
103                                String items[];
104                                try {
105                                        items = tabPattern.split(fileLine.substring(1));
106                                } catch (PatternSyntaxException e) {
107                                        throw new SymbolException("The header line of the symbol table  '"+fileLine.substring(1)+"' could not split into atomic parts. ", e);
108                                }
109                                if (items.length == 4)
110                                        addSymbolTable(items[0], Integer.parseInt(items[1]), Integer.parseInt(items[2]), items[3]);
111                                else if (items.length == 3) 
112                                        addSymbolTable(items[0], Integer.parseInt(items[1]), SymbolTable.STRING, items[2]);
113                                else
114                                        throw new SymbolException("The header line of the symbol table  '"+fileLine.substring(1)+"' must contain three or four columns. ");
115                        }
116                } catch (NumberFormatException e) {
117                        throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the header. ", e);
118                } catch (IOException e) {
119                        throw new SymbolException("Could not load the symbol table. ", e);
120                }
121        }
122        
123        public void load(InputStreamReader isr) throws MaltChainedException  {
124                try {
125                        BufferedReader bin = new BufferedReader(isr);
126                        String fileLine;
127                        SymbolTable table = null;
128                        bin.mark(2);
129                        if (bin.read() == '\t') {
130                                bin.reset();
131                                loadHeader(bin);
132                        } else {
133                                bin.reset();
134                        }
135                        while ((fileLine = bin.readLine()) != null) {
136                                if (fileLine.length() > 0) {
137                                        table = addSymbolTable(fileLine);
138                                        table.load(bin);
139                                }
140                        }
141                        bin.close();
142                } catch (IOException e) {
143                        throw new SymbolException("Could not load the symbol tables. ", e);
144                }
145        }
146        
147        public void load(String fileName, String charSet) throws MaltChainedException  {
148                try {
149                        load(new InputStreamReader(new FileInputStream(fileName), charSet));
150                } catch (FileNotFoundException e) {
151                        throw new SymbolException("The symbol table file '"+fileName+"' cannot be found. ", e);
152                } catch (UnsupportedEncodingException e) {
153                        throw new SymbolException("The char set '"+charSet+"' is not supported. ", e);
154                }               
155        }
156        
157        public SymbolTable loadTagset(String fileName, String tableName, String charSet, int columnCategory, int columnType, String nullValueStrategy) throws MaltChainedException {
158                try {
159                        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), charSet));
160                        String fileLine;
161                        SymbolTable table = addSymbolTable(tableName, columnCategory, columnType, nullValueStrategy);
162
163                        while ((fileLine = br.readLine()) != null) {
164                                table.addSymbol(fileLine.trim());
165                        }
166                        br.close();
167                        return table;
168                } catch (FileNotFoundException e) {
169                        throw new SymbolException("The tagset file '"+fileName+"' cannot be found. ", e);
170                } catch (UnsupportedEncodingException e) {
171                        throw new SymbolException("The char set '"+charSet+"' is not supported. ", e);
172                } catch (IOException e) {
173                        throw new SymbolException("The tagset file '"+fileName+"' cannot be loaded. ", e);
174                }
175        }
176        
177//      public String printSymbolTables() throws MaltChainedException  {
178//              StringBuilder sb = new StringBuilder();
179//              for (HashSymbolTable table : symbolTables.values()) {
180//                      sb.append(table.printSymbolTable());
181//              }
182//              return sb.toString();
183//      }
184}