001package org.maltparser.core.symbol.parse; 002 003import java.io.BufferedReader; 004import java.io.FileInputStream; 005import java.io.FileNotFoundException; 006import java.io.IOException; 007import java.io.InputStreamReader; 008import java.io.OutputStreamWriter; 009import java.io.UnsupportedEncodingException; 010import java.util.Set; 011import java.util.regex.Pattern; 012import java.util.regex.PatternSyntaxException; 013 014import org.maltparser.core.exception.MaltChainedException; 015import org.maltparser.core.helper.HashMap; 016import org.maltparser.core.symbol.SymbolException; 017import org.maltparser.core.symbol.SymbolTable; 018import org.maltparser.core.symbol.SymbolTableHandler; 019 020public class ParseSymbolTableHandler implements SymbolTableHandler { 021 private final SymbolTableHandler parentSymbolTableHandler; 022 private final HashMap<String, ParseSymbolTable> symbolTables; 023 024 public ParseSymbolTableHandler(SymbolTableHandler parentSymbolTableHandler) throws MaltChainedException { 025 this.parentSymbolTableHandler = parentSymbolTableHandler; 026 this.symbolTables = new HashMap<String, ParseSymbolTable>(); 027 for (String tableName : parentSymbolTableHandler.getSymbolTableNames()) { 028 addSymbolTable(tableName); 029 } 030 } 031 032 public SymbolTable addSymbolTable(String tableName) throws MaltChainedException { 033 ParseSymbolTable symbolTable = symbolTables.get(tableName); 034 if (symbolTable == null) { 035 symbolTable = new ParseSymbolTable(tableName, parentSymbolTableHandler); 036 symbolTables.put(tableName, symbolTable); 037 } 038 return symbolTable; 039 } 040 041 public SymbolTable addSymbolTable(String tableName, SymbolTable parentTable) throws MaltChainedException { 042 ParseSymbolTable symbolTable = symbolTables.get(tableName); 043 if (symbolTable == null) { 044 symbolTable = new ParseSymbolTable(tableName, parentTable, parentSymbolTableHandler); 045 symbolTables.put(tableName, symbolTable); 046 } 047 return symbolTable; 048 } 049 050 public SymbolTable addSymbolTable(String tableName, int columnCategory, int columnType, String nullValueStrategy) throws MaltChainedException { 051 ParseSymbolTable symbolTable = symbolTables.get(tableName); 052 if (symbolTable == null) { 053 symbolTable = new ParseSymbolTable(tableName, columnCategory, columnType, nullValueStrategy, parentSymbolTableHandler); 054 symbolTables.put(tableName, symbolTable); 055 } 056 return symbolTable; 057 } 058 059 public SymbolTable getSymbolTable(String tableName) { 060 return symbolTables.get(tableName); 061 } 062 063 public Set<String> getSymbolTableNames() { 064 return symbolTables.keySet(); 065 } 066 067 public void cleanUp() { 068 for (ParseSymbolTable table : symbolTables.values()) { 069 table.clearTmpStorage(); 070 } 071 } 072 073 public void save(OutputStreamWriter osw) throws MaltChainedException { 074 parentSymbolTableHandler.save(osw); 075 } 076 077 public void save(String fileName, String charSet) throws MaltChainedException { 078 parentSymbolTableHandler.save(fileName, charSet); 079 } 080 081 public void loadHeader(BufferedReader bin) throws MaltChainedException { 082 String fileLine = ""; 083 Pattern tabPattern = Pattern.compile("\t"); 084 try { 085 while ((fileLine = bin.readLine()) != null) { 086 if (fileLine.length() == 0 || fileLine.charAt(0) != '\t') { 087 break; 088 } 089 String items[]; 090 try { 091 items = tabPattern.split(fileLine.substring(1)); 092 } catch (PatternSyntaxException e) { 093 throw new SymbolException("The header line of the symbol table '"+fileLine.substring(1)+"' could not split into atomic parts. ", e); 094 } 095 if (items.length == 4) 096 addSymbolTable(items[0], Integer.parseInt(items[1]), Integer.parseInt(items[2]), items[3]); 097 else if (items.length == 3) 098 addSymbolTable(items[0], Integer.parseInt(items[1]), SymbolTable.STRING, items[2]); 099 else 100 throw new SymbolException("The header line of the symbol table '"+fileLine.substring(1)+"' must contain three or four columns. "); 101 102 } 103 } catch (NumberFormatException e) { 104 throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the header. ", e); 105 } catch (IOException e) { 106 throw new SymbolException("Could not load the symbol table. ", e); 107 } 108 } 109 110 public void load(InputStreamReader isr) throws MaltChainedException { 111 try { 112 BufferedReader bin = new BufferedReader(isr); 113 String fileLine; 114 SymbolTable table = null; 115 bin.mark(2); 116 if (bin.read() == '\t') { 117 bin.reset(); 118 loadHeader(bin); 119 } else { 120 bin.reset(); 121 } 122 while ((fileLine = bin.readLine()) != null) { 123 if (fileLine.length() > 0) { 124 table = addSymbolTable(fileLine); 125 table.load(bin); 126 } 127 } 128 bin.close(); 129 } catch (IOException e) { 130 throw new SymbolException("Could not load the symbol tables. ", e); 131 } 132 } 133 134 public void load(String fileName, String charSet) throws MaltChainedException { 135 try { 136 load(new InputStreamReader(new FileInputStream(fileName), charSet)); 137 } catch (FileNotFoundException e) { 138 throw new SymbolException("The symbol table file '"+fileName+"' cannot be found. ", e); 139 } catch (UnsupportedEncodingException e) { 140 throw new SymbolException("The char set '"+charSet+"' is not supported. ", e); 141 } 142 } 143 144 public SymbolTable loadTagset(String fileName, String tableName, String charSet, int columnCategory, int columnType, String nullValueStrategy) throws MaltChainedException { 145 try { 146 BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), charSet)); 147 String fileLine; 148 SymbolTable table = addSymbolTable(tableName, columnCategory, columnType, nullValueStrategy); 149 150 while ((fileLine = br.readLine()) != null) { 151 table.addSymbol(fileLine.trim()); 152 } 153 return table; 154 } catch (FileNotFoundException e) { 155 throw new SymbolException("The tagset file '"+fileName+"' cannot be found. ", e); 156 } catch (UnsupportedEncodingException e) { 157 throw new SymbolException("The char set '"+charSet+"' is not supported. ", e); 158 } catch (IOException e) { 159 throw new SymbolException("The tagset file '"+fileName+"' cannot be loaded. ", e); 160 } 161 } 162 163// public String printSymbolTables() throws MaltChainedException { 164// return parentSymbolTableHandler.printSymbolTables(); 165// } 166}