001package org.maltparser.core.symbol.parse;
002
003import java.io.BufferedReader;
004import java.io.FileInputStream;
005import java.io.FileNotFoundException;
006import java.io.IOException;
007import java.io.InputStreamReader;
008import java.io.OutputStreamWriter;
009import java.io.UnsupportedEncodingException;
010import java.util.Set;
011import java.util.regex.Pattern;
012import java.util.regex.PatternSyntaxException;
013
014import org.maltparser.core.exception.MaltChainedException;
015import org.maltparser.core.helper.HashMap;
016import org.maltparser.core.symbol.SymbolException;
017import org.maltparser.core.symbol.SymbolTable;
018import org.maltparser.core.symbol.SymbolTableHandler;
019
020public class ParseSymbolTableHandler implements SymbolTableHandler {
021        private final SymbolTableHandler parentSymbolTableHandler;
022        private final HashMap<String,  ParseSymbolTable> symbolTables;
023        
024        public ParseSymbolTableHandler(SymbolTableHandler parentSymbolTableHandler) throws MaltChainedException {
025                this.parentSymbolTableHandler = parentSymbolTableHandler;
026                this.symbolTables = new HashMap<String, ParseSymbolTable>();
027                for (String tableName : parentSymbolTableHandler.getSymbolTableNames()) {
028                        addSymbolTable(tableName);
029                }
030        }
031
032        public SymbolTable addSymbolTable(String tableName) throws MaltChainedException {
033                ParseSymbolTable symbolTable = symbolTables.get(tableName);
034                if (symbolTable == null) {
035                        symbolTable = new ParseSymbolTable(tableName, parentSymbolTableHandler);
036                        symbolTables.put(tableName, symbolTable);
037                }
038                return symbolTable;
039        }
040        
041        public SymbolTable addSymbolTable(String tableName, SymbolTable parentTable) throws MaltChainedException {
042                ParseSymbolTable symbolTable = symbolTables.get(tableName);
043                if (symbolTable == null) {
044                        symbolTable = new ParseSymbolTable(tableName, parentTable, parentSymbolTableHandler);
045                        symbolTables.put(tableName, symbolTable);
046                }
047                return symbolTable;
048        }
049        
050        public SymbolTable addSymbolTable(String tableName, int columnCategory, int columnType, String nullValueStrategy) throws MaltChainedException {
051                ParseSymbolTable symbolTable = symbolTables.get(tableName);
052                if (symbolTable == null) {
053                        symbolTable = new ParseSymbolTable(tableName, columnCategory, columnType, nullValueStrategy, parentSymbolTableHandler);
054                        symbolTables.put(tableName, symbolTable);
055                }
056                return symbolTable;
057        }
058        
059        public SymbolTable getSymbolTable(String tableName) {
060                return symbolTables.get(tableName);
061        }
062        
063        public Set<String> getSymbolTableNames() {
064                return symbolTables.keySet();
065        }
066        
067        public void cleanUp() {
068                for (ParseSymbolTable table : symbolTables.values()) {
069                        table.clearTmpStorage();
070                }
071        }
072        
073        public void save(OutputStreamWriter osw) throws MaltChainedException  {
074                parentSymbolTableHandler.save(osw);     
075        }
076        
077        public void save(String fileName, String charSet) throws MaltChainedException  {
078                parentSymbolTableHandler.save(fileName, charSet);
079        }
080        
081        public void loadHeader(BufferedReader bin) throws MaltChainedException {
082                String fileLine = "";
083                Pattern tabPattern = Pattern.compile("\t");
084                try {
085                        while ((fileLine = bin.readLine()) != null) {
086                                if (fileLine.length() == 0 || fileLine.charAt(0) != '\t') {
087                                        break;
088                                }
089                                String items[];
090                                try {
091                                        items = tabPattern.split(fileLine.substring(1));
092                                } catch (PatternSyntaxException e) {
093                                        throw new SymbolException("The header line of the symbol table  '"+fileLine.substring(1)+"' could not split into atomic parts. ", e);
094                                }
095                                if (items.length == 4)
096                                        addSymbolTable(items[0], Integer.parseInt(items[1]), Integer.parseInt(items[2]), items[3]);
097                                else if (items.length == 3) 
098                                        addSymbolTable(items[0], Integer.parseInt(items[1]), SymbolTable.STRING, items[2]);
099                                else
100                                        throw new SymbolException("The header line of the symbol table  '"+fileLine.substring(1)+"' must contain three or four columns. ");
101
102                        }
103                } catch (NumberFormatException e) {
104                        throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the header. ", e);
105                } catch (IOException e) {
106                        throw new SymbolException("Could not load the symbol table. ", e);
107                }
108        }
109        
110        public void load(InputStreamReader isr) throws MaltChainedException  {
111                try {
112                        BufferedReader bin = new BufferedReader(isr);
113                        String fileLine;
114                        SymbolTable table = null;
115                        bin.mark(2);
116                        if (bin.read() == '\t') {
117                                bin.reset();
118                                loadHeader(bin);
119                        } else {
120                                bin.reset();
121                        }
122                        while ((fileLine = bin.readLine()) != null) {
123                                if (fileLine.length() > 0) {
124                                        table = addSymbolTable(fileLine);
125                                        table.load(bin);
126                                }
127                        }
128                        bin.close();
129                } catch (IOException e) {
130                        throw new SymbolException("Could not load the symbol tables. ", e);
131                }
132        }
133        
134        public void load(String fileName, String charSet) throws MaltChainedException  {
135                try {
136                        load(new InputStreamReader(new FileInputStream(fileName), charSet));
137                } catch (FileNotFoundException e) {
138                        throw new SymbolException("The symbol table file '"+fileName+"' cannot be found. ", e);
139                } catch (UnsupportedEncodingException e) {
140                        throw new SymbolException("The char set '"+charSet+"' is not supported. ", e);
141                }
142        }
143        
144        public SymbolTable loadTagset(String fileName, String tableName, String charSet, int columnCategory, int columnType, String nullValueStrategy) throws MaltChainedException {
145                try {
146                        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), charSet));
147                        String fileLine;
148                        SymbolTable table = addSymbolTable(tableName, columnCategory, columnType, nullValueStrategy);
149
150                        while ((fileLine = br.readLine()) != null) {
151                                table.addSymbol(fileLine.trim());
152                        }
153                        return table;
154                } catch (FileNotFoundException e) {
155                        throw new SymbolException("The tagset file '"+fileName+"' cannot be found. ", e);
156                } catch (UnsupportedEncodingException e) {
157                        throw new SymbolException("The char set '"+charSet+"' is not supported. ", e);
158                } catch (IOException e) {
159                        throw new SymbolException("The tagset file '"+fileName+"' cannot be loaded. ", e);
160                }
161        }
162        
163//      public String printSymbolTables() throws MaltChainedException  {
164//              return parentSymbolTableHandler.printSymbolTables();
165//      }
166}