001package org.maltparser.core.symbol.parse; 002 003import java.io.BufferedReader; 004import java.io.FileInputStream; 005import java.io.FileNotFoundException; 006import java.io.IOException; 007import java.io.InputStreamReader; 008import java.io.OutputStreamWriter; 009import java.io.UnsupportedEncodingException; 010import java.util.Set; 011import java.util.regex.Pattern; 012import java.util.regex.PatternSyntaxException; 013 014import org.maltparser.core.exception.MaltChainedException; 015import org.maltparser.core.helper.HashMap; 016 017import org.maltparser.core.symbol.SymbolException; 018import org.maltparser.core.symbol.SymbolTable; 019import org.maltparser.core.symbol.SymbolTableHandler; 020 021public class ParseSymbolTableHandler implements SymbolTableHandler { 022 private final SymbolTableHandler parentSymbolTableHandler; 023 private final HashMap<String, ParseSymbolTable> symbolTables; 024 025 public ParseSymbolTableHandler(SymbolTableHandler parentSymbolTableHandler) throws MaltChainedException { 026 this.parentSymbolTableHandler = parentSymbolTableHandler; 027 this.symbolTables = new HashMap<String, ParseSymbolTable>(); 028 for (String tableName : parentSymbolTableHandler.getSymbolTableNames()) { 029 addSymbolTable(tableName); 030 } 031 } 032 033 public SymbolTable addSymbolTable(String tableName) throws MaltChainedException { 034 ParseSymbolTable symbolTable = symbolTables.get(tableName); 035 if (symbolTable == null) { 036 symbolTable = new ParseSymbolTable(tableName, parentSymbolTableHandler); 037 symbolTables.put(tableName, symbolTable); 038 } 039 return symbolTable; 040 } 041 042 public SymbolTable addSymbolTable(String tableName, SymbolTable parentTable) throws MaltChainedException { 043 ParseSymbolTable symbolTable = symbolTables.get(tableName); 044 if (symbolTable == null) { 045 symbolTable = new ParseSymbolTable(tableName, parentTable, parentSymbolTableHandler); 046 symbolTables.put(tableName, symbolTable); 047 } 048 return symbolTable; 049 } 050 051 public SymbolTable addSymbolTable(String tableName, int columnCategory, String nullValueStrategy) throws MaltChainedException { 052 ParseSymbolTable symbolTable = symbolTables.get(tableName); 053 if (symbolTable == null) { 054 symbolTable = new ParseSymbolTable(tableName, columnCategory, nullValueStrategy, parentSymbolTableHandler); 055 symbolTables.put(tableName, symbolTable); 056 } 057 return symbolTable; 058 } 059 060 public SymbolTable getSymbolTable(String tableName) { 061 return symbolTables.get(tableName); 062 } 063 064 public Set<String> getSymbolTableNames() { 065 return symbolTables.keySet(); 066 } 067 068 public void cleanUp() { 069// System.out.println("cleanUp"); 070 for (ParseSymbolTable table : symbolTables.values()) { 071 table.clearTmpStorage(); 072 } 073 } 074 075 public void save(OutputStreamWriter osw) throws MaltChainedException { 076 parentSymbolTableHandler.save(osw); 077 } 078 079 public void save(String fileName, String charSet) throws MaltChainedException { 080 parentSymbolTableHandler.save(fileName, charSet); 081 } 082 083 public void loadHeader(BufferedReader bin) throws MaltChainedException { 084 String fileLine = ""; 085 Pattern tabPattern = Pattern.compile("\t"); 086 try { 087 while ((fileLine = bin.readLine()) != null) { 088 if (fileLine.length() == 0 || fileLine.charAt(0) != '\t') { 089 break; 090 } 091 String items[]; 092 try { 093 items = tabPattern.split(fileLine.substring(1)); 094 } catch (PatternSyntaxException e) { 095 throw new SymbolException("The header line of the symbol table '"+fileLine.substring(1)+"' could not split into atomic parts. ", e); 096 } 097 if (items.length != 3) { 098 throw new SymbolException("The header line of the symbol table '"+fileLine.substring(1)+"' must contain four columns. "); 099 } 100 addSymbolTable(items[0], Integer.parseInt(items[1]), items[2]); 101 } 102 } catch (NumberFormatException e) { 103 throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the header. ", e); 104 } catch (IOException e) { 105 throw new SymbolException("Could not load the symbol table. ", e); 106 } 107 } 108 109 public void load(InputStreamReader isr) throws MaltChainedException { 110 try { 111 BufferedReader bin = new BufferedReader(isr); 112 String fileLine; 113 SymbolTable table = null; 114 bin.mark(2); 115 if (bin.read() == '\t') { 116 bin.reset(); 117 loadHeader(bin); 118 } else { 119 bin.reset(); 120 } 121 while ((fileLine = bin.readLine()) != null) { 122 if (fileLine.length() > 0) { 123 table = addSymbolTable(fileLine); 124 table.load(bin); 125 } 126 } 127 bin.close(); 128 } catch (IOException e) { 129 throw new SymbolException("Could not load the symbol tables. ", e); 130 } 131 } 132 133 public void load(String fileName, String charSet) throws MaltChainedException { 134 try { 135 load(new InputStreamReader(new FileInputStream(fileName), charSet)); 136 } catch (FileNotFoundException e) { 137 throw new SymbolException("The symbol table file '"+fileName+"' cannot be found. ", e); 138 } catch (UnsupportedEncodingException e) { 139 throw new SymbolException("The char set '"+charSet+"' is not supported. ", e); 140 } 141 } 142 143 public SymbolTable loadTagset(String fileName, String tableName, String charSet, int columnCategory, String nullValueStrategy) throws MaltChainedException { 144 try { 145 BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), charSet)); 146 String fileLine; 147 SymbolTable table = addSymbolTable(tableName, columnCategory, nullValueStrategy); 148 149 while ((fileLine = br.readLine()) != null) { 150 table.addSymbol(fileLine.trim()); 151 } 152 return table; 153 } catch (FileNotFoundException e) { 154 throw new SymbolException("The tagset file '"+fileName+"' cannot be found. ", e); 155 } catch (UnsupportedEncodingException e) { 156 throw new SymbolException("The char set '"+charSet+"' is not supported. ", e); 157 } catch (IOException e) { 158 throw new SymbolException("The tagset file '"+fileName+"' cannot be loaded. ", e); 159 } 160 } 161 162 public String printSymbolTables() throws MaltChainedException { 163 return parentSymbolTableHandler.printSymbolTables(); 164 } 165}