001package org.maltparser.core.symbol.hash; 002 003import java.io.BufferedReader; 004import java.io.BufferedWriter; 005import java.io.FileInputStream; 006import java.io.FileNotFoundException; 007import java.io.FileOutputStream; 008import java.io.IOException; 009import java.io.InputStreamReader; 010import java.io.OutputStreamWriter; 011import java.io.UnsupportedEncodingException; 012import java.util.Map; 013import java.util.Set; 014import java.util.regex.Pattern; 015import java.util.regex.PatternSyntaxException; 016 017import org.maltparser.core.exception.MaltChainedException; 018import org.maltparser.core.helper.HashMap; 019import org.maltparser.core.symbol.SymbolException; 020import org.maltparser.core.symbol.SymbolTable; 021import org.maltparser.core.symbol.SymbolTableHandler; 022 023 024public class HashSymbolTableHandler implements SymbolTableHandler { 025 private final Map<String, HashSymbolTable> symbolTables; 026 027 public HashSymbolTableHandler() { 028 this.symbolTables = new HashMap<String, HashSymbolTable>(); 029 } 030 031 public SymbolTable addSymbolTable(String tableName) throws MaltChainedException { 032 HashSymbolTable symbolTable = symbolTables.get(tableName); 033 if (symbolTable == null) { 034 symbolTable = new HashSymbolTable(tableName); 035 symbolTables.put(tableName, symbolTable); 036 } 037 return symbolTable; 038 } 039 040 public SymbolTable addSymbolTable(String tableName, int columnCategory, int columnType, String nullValueStrategy) throws MaltChainedException { 041 HashSymbolTable symbolTable = symbolTables.get(tableName); 042 if (symbolTable == null) { 043 symbolTable = new HashSymbolTable(tableName, columnCategory, columnType, nullValueStrategy); 044 symbolTables.put(tableName, symbolTable); 045 } 046 return symbolTable; 047 } 048 049 public SymbolTable addSymbolTable(String tableName, SymbolTable parentTable) throws MaltChainedException { 050 HashSymbolTable symbolTable = symbolTables.get(tableName); 051 if (symbolTable == null) { 052 HashSymbolTable hashParentTable = (HashSymbolTable)parentTable; 053 symbolTable = new HashSymbolTable(tableName, hashParentTable.getCategory(), hashParentTable.getType(), hashParentTable.getNullValueStrategy()); 054 symbolTables.put(tableName, symbolTable); 055 } 056 return symbolTable; 057 } 058 059 public SymbolTable getSymbolTable(String tableName) { 060 return symbolTables.get(tableName); 061 } 062 063 public Set<String> getSymbolTableNames() { 064 return symbolTables.keySet(); 065 } 066 067 public void cleanUp() {} 068 069 public void save(OutputStreamWriter osw) throws MaltChainedException { 070 try { 071 BufferedWriter bout = new BufferedWriter(osw); 072 for (HashSymbolTable table : symbolTables.values()) { 073 table.saveHeader(bout); 074 } 075 bout.write('\n'); 076 for (HashSymbolTable table : symbolTables.values()) { 077 table.save(bout); 078 } 079 bout.close(); 080 } catch (IOException e) { 081 throw new SymbolException("Could not save the symbol tables. ", e); 082 } 083 } 084 085 public void save(String fileName, String charSet) throws MaltChainedException { 086 try { 087 save(new OutputStreamWriter(new FileOutputStream(fileName), charSet)); 088 } catch (FileNotFoundException e) { 089 throw new SymbolException("The symbol table file '"+fileName+"' cannot be created. ", e); 090 } catch (UnsupportedEncodingException e) { 091 throw new SymbolException("The char set '"+charSet+"' is not supported. ", e); 092 } 093 } 094 095 public void loadHeader(BufferedReader bin) throws MaltChainedException { 096 String fileLine = ""; 097 Pattern tabPattern = Pattern.compile("\t"); 098 try { 099 while ((fileLine = bin.readLine()) != null) { 100 if (fileLine.length() == 0 || fileLine.charAt(0) != '\t') { 101 break; 102 } 103 String items[]; 104 try { 105 items = tabPattern.split(fileLine.substring(1)); 106 } catch (PatternSyntaxException e) { 107 throw new SymbolException("The header line of the symbol table '"+fileLine.substring(1)+"' could not split into atomic parts. ", e); 108 } 109 if (items.length == 4) 110 addSymbolTable(items[0], Integer.parseInt(items[1]), Integer.parseInt(items[2]), items[3]); 111 else if (items.length == 3) 112 addSymbolTable(items[0], Integer.parseInt(items[1]), SymbolTable.STRING, items[2]); 113 else 114 throw new SymbolException("The header line of the symbol table '"+fileLine.substring(1)+"' must contain three or four columns. "); 115 } 116 } catch (NumberFormatException e) { 117 throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the header. ", e); 118 } catch (IOException e) { 119 throw new SymbolException("Could not load the symbol table. ", e); 120 } 121 } 122 123 public void load(InputStreamReader isr) throws MaltChainedException { 124 try { 125 BufferedReader bin = new BufferedReader(isr); 126 String fileLine; 127 SymbolTable table = null; 128 bin.mark(2); 129 if (bin.read() == '\t') { 130 bin.reset(); 131 loadHeader(bin); 132 } else { 133 bin.reset(); 134 } 135 while ((fileLine = bin.readLine()) != null) { 136 if (fileLine.length() > 0) { 137 table = addSymbolTable(fileLine); 138 table.load(bin); 139 } 140 } 141 bin.close(); 142 } catch (IOException e) { 143 throw new SymbolException("Could not load the symbol tables. ", e); 144 } 145 } 146 147 public void load(String fileName, String charSet) throws MaltChainedException { 148 try { 149 load(new InputStreamReader(new FileInputStream(fileName), charSet)); 150 } catch (FileNotFoundException e) { 151 throw new SymbolException("The symbol table file '"+fileName+"' cannot be found. ", e); 152 } catch (UnsupportedEncodingException e) { 153 throw new SymbolException("The char set '"+charSet+"' is not supported. ", e); 154 } 155 } 156 157 public SymbolTable loadTagset(String fileName, String tableName, String charSet, int columnCategory, int columnType, String nullValueStrategy) throws MaltChainedException { 158 try { 159 BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), charSet)); 160 String fileLine; 161 SymbolTable table = addSymbolTable(tableName, columnCategory, columnType, nullValueStrategy); 162 163 while ((fileLine = br.readLine()) != null) { 164 table.addSymbol(fileLine.trim()); 165 } 166 br.close(); 167 return table; 168 } catch (FileNotFoundException e) { 169 throw new SymbolException("The tagset file '"+fileName+"' cannot be found. ", e); 170 } catch (UnsupportedEncodingException e) { 171 throw new SymbolException("The char set '"+charSet+"' is not supported. ", e); 172 } catch (IOException e) { 173 throw new SymbolException("The tagset file '"+fileName+"' cannot be loaded. ", e); 174 } 175 } 176 177// public String printSymbolTables() throws MaltChainedException { 178// StringBuilder sb = new StringBuilder(); 179// for (HashSymbolTable table : symbolTables.values()) { 180// sb.append(table.printSymbolTable()); 181// } 182// return sb.toString(); 183// } 184}