001package org.maltparser.core.symbol.parse;
002
003import java.io.BufferedReader;
004import java.io.FileInputStream;
005import java.io.FileNotFoundException;
006import java.io.IOException;
007import java.io.InputStreamReader;
008import java.io.OutputStreamWriter;
009import java.io.UnsupportedEncodingException;
010import java.util.Set;
011import java.util.regex.Pattern;
012import java.util.regex.PatternSyntaxException;
013
014import org.maltparser.core.exception.MaltChainedException;
015import org.maltparser.core.helper.HashMap;
016
017import org.maltparser.core.symbol.SymbolException;
018import org.maltparser.core.symbol.SymbolTable;
019import org.maltparser.core.symbol.SymbolTableHandler;
020
021public class ParseSymbolTableHandler implements SymbolTableHandler {
022        private final SymbolTableHandler parentSymbolTableHandler;
023        private final HashMap<String,  ParseSymbolTable> symbolTables;
024        
025        public ParseSymbolTableHandler(SymbolTableHandler parentSymbolTableHandler) throws MaltChainedException {
026                this.parentSymbolTableHandler = parentSymbolTableHandler;
027                this.symbolTables = new HashMap<String, ParseSymbolTable>();
028                for (String tableName : parentSymbolTableHandler.getSymbolTableNames()) {
029                        addSymbolTable(tableName);
030                }
031        }
032
033        public SymbolTable addSymbolTable(String tableName) throws MaltChainedException {
034                ParseSymbolTable symbolTable = symbolTables.get(tableName);
035                if (symbolTable == null) {
036                        symbolTable = new ParseSymbolTable(tableName, parentSymbolTableHandler);
037                        symbolTables.put(tableName, symbolTable);
038                }
039                return symbolTable;
040        }
041        
042        public SymbolTable addSymbolTable(String tableName, SymbolTable parentTable) throws MaltChainedException {
043                ParseSymbolTable symbolTable = symbolTables.get(tableName);
044                if (symbolTable == null) {
045                        symbolTable = new ParseSymbolTable(tableName, parentTable, parentSymbolTableHandler);
046                        symbolTables.put(tableName, symbolTable);
047                }
048                return symbolTable;
049        }
050        
051        public SymbolTable addSymbolTable(String tableName, int columnCategory, String nullValueStrategy) throws MaltChainedException {
052                ParseSymbolTable symbolTable = symbolTables.get(tableName);
053                if (symbolTable == null) {
054                        symbolTable = new ParseSymbolTable(tableName, columnCategory, nullValueStrategy, parentSymbolTableHandler);
055                        symbolTables.put(tableName, symbolTable);
056                }
057                return symbolTable;
058        }
059        
060        public SymbolTable getSymbolTable(String tableName) {
061                return symbolTables.get(tableName);
062        }
063        
064        public Set<String> getSymbolTableNames() {
065                return symbolTables.keySet();
066        }
067        
068        public void cleanUp() {
069//              System.out.println("cleanUp");
070                for (ParseSymbolTable table : symbolTables.values()) {
071                        table.clearTmpStorage();
072                }
073        }
074        
075        public void save(OutputStreamWriter osw) throws MaltChainedException  {
076                parentSymbolTableHandler.save(osw);     
077        }
078        
079        public void save(String fileName, String charSet) throws MaltChainedException  {
080                parentSymbolTableHandler.save(fileName, charSet);
081        }
082        
083        public void loadHeader(BufferedReader bin) throws MaltChainedException {
084                String fileLine = "";
085                Pattern tabPattern = Pattern.compile("\t");
086                try {
087                        while ((fileLine = bin.readLine()) != null) {
088                                if (fileLine.length() == 0 || fileLine.charAt(0) != '\t') {
089                                        break;
090                                }
091                                String items[];
092                                try {
093                                        items = tabPattern.split(fileLine.substring(1));
094                                } catch (PatternSyntaxException e) {
095                                        throw new SymbolException("The header line of the symbol table  '"+fileLine.substring(1)+"' could not split into atomic parts. ", e);
096                                }
097                                if (items.length != 3) {
098                                        throw new SymbolException("The header line of the symbol table  '"+fileLine.substring(1)+"' must contain four columns. ");
099                                }
100                                addSymbolTable(items[0], Integer.parseInt(items[1]), items[2]);
101                        }
102                } catch (NumberFormatException e) {
103                        throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the header. ", e);
104                } catch (IOException e) {
105                        throw new SymbolException("Could not load the symbol table. ", e);
106                }
107        }
108        
109        public void load(InputStreamReader isr) throws MaltChainedException  {
110                try {
111                        BufferedReader bin = new BufferedReader(isr);
112                        String fileLine;
113                        SymbolTable table = null;
114                        bin.mark(2);
115                        if (bin.read() == '\t') {
116                                bin.reset();
117                                loadHeader(bin);
118                        } else {
119                                bin.reset();
120                        }
121                        while ((fileLine = bin.readLine()) != null) {
122                                if (fileLine.length() > 0) {
123                                        table = addSymbolTable(fileLine);
124                                        table.load(bin);
125                                }
126                        }
127                        bin.close();
128                } catch (IOException e) {
129                        throw new SymbolException("Could not load the symbol tables. ", e);
130                }
131        }
132        
133        public void load(String fileName, String charSet) throws MaltChainedException  {
134                try {
135                        load(new InputStreamReader(new FileInputStream(fileName), charSet));
136                } catch (FileNotFoundException e) {
137                        throw new SymbolException("The symbol table file '"+fileName+"' cannot be found. ", e);
138                } catch (UnsupportedEncodingException e) {
139                        throw new SymbolException("The char set '"+charSet+"' is not supported. ", e);
140                }
141        }
142        
143        public SymbolTable loadTagset(String fileName, String tableName, String charSet, int columnCategory, String nullValueStrategy) throws MaltChainedException {
144                try {
145                        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), charSet));
146                        String fileLine;
147                        SymbolTable table = addSymbolTable(tableName, columnCategory, nullValueStrategy);
148
149                        while ((fileLine = br.readLine()) != null) {
150                                table.addSymbol(fileLine.trim());
151                        }
152                        return table;
153                } catch (FileNotFoundException e) {
154                        throw new SymbolException("The tagset file '"+fileName+"' cannot be found. ", e);
155                } catch (UnsupportedEncodingException e) {
156                        throw new SymbolException("The char set '"+charSet+"' is not supported. ", e);
157                } catch (IOException e) {
158                        throw new SymbolException("The tagset file '"+fileName+"' cannot be loaded. ", e);
159                }
160        }
161        
162        public String printSymbolTables() throws MaltChainedException  {
163                return parentSymbolTableHandler.printSymbolTables();
164        }
165}