001package org.maltparser.core.symbol.trie;
002
003import java.io.BufferedReader;
004import java.io.BufferedWriter;
005import java.io.FileInputStream;
006import java.io.FileNotFoundException;
007import java.io.FileOutputStream;
008import java.io.UnsupportedEncodingException;
009
010import java.io.IOException;
011import java.io.InputStreamReader;
012import java.io.OutputStreamWriter;
013import java.util.Set;
014import java.util.regex.Pattern;
015import java.util.regex.PatternSyntaxException;
016
017import org.maltparser.core.exception.MaltChainedException;
018import org.maltparser.core.helper.HashMap;
019import org.maltparser.core.symbol.SymbolException;
020import org.maltparser.core.symbol.SymbolTable;
021import org.maltparser.core.symbol.SymbolTableHandler;
022
023
024/**
025
026@author Johan Hall
027*/
028public class TrieSymbolTableHandler implements SymbolTableHandler {
029        private final Trie trie;
030        private final HashMap<String, TrieSymbolTable> symbolTables;
031
032        public TrieSymbolTableHandler() { 
033                trie = new Trie();
034                symbolTables = new HashMap<String, TrieSymbolTable>();
035        }
036
037        public TrieSymbolTable addSymbolTable(String tableName) throws MaltChainedException {
038                TrieSymbolTable symbolTable = symbolTables.get(tableName);
039                if (symbolTable == null) {
040                        symbolTable = new TrieSymbolTable(tableName, trie); 
041                        symbolTables.put(tableName, symbolTable);
042                }
043                return symbolTable;
044        }
045        
046        public TrieSymbolTable addSymbolTable(String tableName, SymbolTable parentTable) throws MaltChainedException {
047                TrieSymbolTable symbolTable = symbolTables.get(tableName);
048                if (symbolTable == null) {
049                        TrieSymbolTable trieParentTable = (TrieSymbolTable)parentTable;
050                        symbolTable = new TrieSymbolTable(tableName, trie, trieParentTable.getColumnCategory(), trieParentTable.getNullValueStrategy());
051                        symbolTables.put(tableName, symbolTable);
052                }
053                return symbolTable;
054        }
055        
056        public TrieSymbolTable addSymbolTable(String tableName, int columnCategory, String nullValueStrategy) throws MaltChainedException {
057                TrieSymbolTable symbolTable = symbolTables.get(tableName);
058                if (symbolTable == null) {
059                        symbolTable = new TrieSymbolTable(tableName, trie, columnCategory, nullValueStrategy);
060                        symbolTables.put(tableName, symbolTable);
061                }
062                return symbolTable;
063        }
064        
065        public TrieSymbolTable getSymbolTable(String tableName) {
066                return symbolTables.get(tableName);
067        }
068        
069        public Set<String> getSymbolTableNames() {
070                return symbolTables.keySet();
071        }
072        
073        public void cleanUp() {
074        }
075        
076        public void save(OutputStreamWriter osw) throws MaltChainedException  {
077                try {
078                        BufferedWriter bout = new BufferedWriter(osw);
079                        for (TrieSymbolTable table : symbolTables.values()) {
080                                table.saveHeader(bout);
081                        }
082                        bout.write('\n');
083                        for (TrieSymbolTable table : symbolTables.values()) {
084                                table.save(bout);
085                        }
086                        bout.close();
087                } catch (IOException e) {
088                        throw new SymbolException("Could not save the symbol tables. ", e);
089                }               
090        }
091        
092        public void save(String fileName, String charSet) throws MaltChainedException  {
093                try {
094                        save(new OutputStreamWriter(new FileOutputStream(fileName), charSet));
095                } catch (FileNotFoundException e) {
096                        throw new SymbolException("The symbol table file '"+fileName+"' cannot be created. ", e);
097                } catch (UnsupportedEncodingException e) {
098                        throw new SymbolException("The char set '"+charSet+"' is not supported. ", e);
099                }
100        }
101        
102        public void loadHeader(BufferedReader bin) throws MaltChainedException {
103                String fileLine = "";
104                Pattern tabPattern = Pattern.compile("\t");
105                try {
106                        while ((fileLine = bin.readLine()) != null) {
107                                if (fileLine.length() == 0 || fileLine.charAt(0) != '\t') {
108                                        break;
109                                }
110                                String items[];
111                                try {
112                                        items = tabPattern.split(fileLine.substring(1));
113                                } catch (PatternSyntaxException e) {
114                                        throw new SymbolException("The header line of the symbol table  '"+fileLine.substring(1)+"' could not split into atomic parts. ", e);
115                                }
116                                if (items.length != 3) {
117                                        throw new SymbolException("The header line of the symbol table  '"+fileLine.substring(1)+"' must contain four columns. ");
118                                }
119                                addSymbolTable(items[0], Integer.parseInt(items[1]), items[2]);
120                        }
121                } catch (NumberFormatException e) {
122                        throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the header. ", e);
123                } catch (IOException e) {
124                        throw new SymbolException("Could not load the symbol table. ", e);
125                }
126        }
127        
128        
129        public void load(InputStreamReader isr) throws MaltChainedException  {
130                try {
131                        BufferedReader bin = new BufferedReader(isr);
132                        String fileLine;
133                        SymbolTable table = null;
134                        bin.mark(2);
135                        if (bin.read() == '\t') {
136                                bin.reset();
137                                loadHeader(bin);
138                        } else {
139                                bin.reset();
140                        }
141                        while ((fileLine = bin.readLine()) != null) {
142                                if (fileLine.length() > 0) {
143                                        table = addSymbolTable(fileLine);
144                                        table.load(bin);
145                                }
146                        }
147                        bin.close();
148                } catch (IOException e) {
149                        throw new SymbolException("Could not load the symbol tables. ", e);
150                }                       
151        }
152        
153        public void load(String fileName, String charSet) throws MaltChainedException  {
154                try {
155                        load(new InputStreamReader(new FileInputStream(fileName), charSet));
156                } catch (FileNotFoundException e) {
157                        throw new SymbolException("The symbol table file '"+fileName+"' cannot be found. ", e);
158                } catch (UnsupportedEncodingException e) {
159                        throw new SymbolException("The char set '"+charSet+"' is not supported. ", e);
160                }               
161        }
162        
163        
164        public SymbolTable loadTagset(String fileName, String tableName, String charSet, int columnCategory, String nullValueStrategy) throws MaltChainedException {
165                try {
166                        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), charSet));
167                        String fileLine;
168                        TrieSymbolTable table = addSymbolTable(tableName, columnCategory, nullValueStrategy);
169
170                        while ((fileLine = br.readLine()) != null) {
171                                table.addSymbol(fileLine.trim());
172                        }
173                        br.close();
174                        return table;
175                } catch (FileNotFoundException e) {
176                        throw new SymbolException("The tagset file '"+fileName+"' cannot be found. ", e);
177                } catch (UnsupportedEncodingException e) {
178                        throw new SymbolException("The char set '"+charSet+"' is not supported. ", e);
179                } catch (IOException e) {
180                        throw new SymbolException("The tagset file '"+fileName+"' cannot be loaded. ", e);
181                }
182        }
183        
184        public String printSymbolTables() throws MaltChainedException  {
185                StringBuilder sb = new StringBuilder();
186                for (TrieSymbolTable table : symbolTables.values()) {
187                        sb.append(table.printSymbolTable());
188                }
189                return sb.toString();
190        }
191}