001package org.maltparser.core.symbol.hash; 002 003import java.io.BufferedReader; 004import java.io.BufferedWriter; 005import java.io.FileInputStream; 006import java.io.FileNotFoundException; 007import java.io.FileOutputStream; 008import java.io.IOException; 009import java.io.InputStreamReader; 010import java.io.OutputStreamWriter; 011import java.io.UnsupportedEncodingException; 012import java.util.Map; 013import java.util.Set; 014import java.util.regex.Pattern; 015import java.util.regex.PatternSyntaxException; 016 017import org.maltparser.core.exception.MaltChainedException; 018import org.maltparser.core.helper.HashMap; 019import org.maltparser.core.symbol.SymbolException; 020import org.maltparser.core.symbol.SymbolTable; 021import org.maltparser.core.symbol.SymbolTableHandler; 022 023 024public class HashSymbolTableHandler implements SymbolTableHandler { 025 private final Map<String, HashSymbolTable> symbolTables; 026 027 public HashSymbolTableHandler() { 028 this.symbolTables = new HashMap<String, HashSymbolTable>(); 029 } 030 031 public SymbolTable addSymbolTable(String tableName) throws MaltChainedException { 032 HashSymbolTable symbolTable = symbolTables.get(tableName); 033 if (symbolTable == null) { 034 symbolTable = new HashSymbolTable(tableName); 035 symbolTables.put(tableName, symbolTable); 036 } 037 return symbolTable; 038 } 039 040 public SymbolTable addSymbolTable(String tableName, int columnCategory, String nullValueStrategy) throws MaltChainedException { 041 HashSymbolTable symbolTable = symbolTables.get(tableName); 042 if (symbolTable == null) { 043 symbolTable = new HashSymbolTable(tableName, columnCategory, nullValueStrategy); 044 symbolTables.put(tableName, symbolTable); 045 } 046 return symbolTable; 047 } 048 049 public SymbolTable addSymbolTable(String tableName, SymbolTable parentTable) throws MaltChainedException { 050 HashSymbolTable symbolTable = symbolTables.get(tableName); 051 if (symbolTable == null) { 052 HashSymbolTable hashParentTable = (HashSymbolTable)parentTable; 053 symbolTable = new HashSymbolTable(tableName, hashParentTable.getColumnCategory(), hashParentTable.getNullValueStrategy()); 054 symbolTables.put(tableName, symbolTable); 055 } 056 return symbolTable; 057 } 058 059 public SymbolTable getSymbolTable(String tableName) { 060 return symbolTables.get(tableName); 061 } 062 063 public Set<String> getSymbolTableNames() { 064 return symbolTables.keySet(); 065 } 066 067 public void cleanUp() {} 068 069 public void save(OutputStreamWriter osw) throws MaltChainedException { 070 try { 071 BufferedWriter bout = new BufferedWriter(osw); 072 for (HashSymbolTable table : symbolTables.values()) { 073 table.saveHeader(bout); 074 } 075 bout.write('\n'); 076 for (HashSymbolTable table : symbolTables.values()) { 077 table.save(bout); 078 } 079 bout.close(); 080 } catch (IOException e) { 081 throw new SymbolException("Could not save the symbol tables. ", e); 082 } 083 } 084 085 public void save(String fileName, String charSet) throws MaltChainedException { 086 try { 087 save(new OutputStreamWriter(new FileOutputStream(fileName), charSet)); 088 } catch (FileNotFoundException e) { 089 throw new SymbolException("The symbol table file '"+fileName+"' cannot be created. ", e); 090 } catch (UnsupportedEncodingException e) { 091 throw new SymbolException("The char set '"+charSet+"' is not supported. ", e); 092 } 093 } 094 095 public void loadHeader(BufferedReader bin) throws MaltChainedException { 096 String fileLine = ""; 097 Pattern tabPattern = Pattern.compile("\t"); 098 try { 099 while ((fileLine = bin.readLine()) != null) { 100 if (fileLine.length() == 0 || fileLine.charAt(0) != '\t') { 101 break; 102 } 103 String items[]; 104 try { 105 items = tabPattern.split(fileLine.substring(1)); 106 } catch (PatternSyntaxException e) { 107 throw new SymbolException("The header line of the symbol table '"+fileLine.substring(1)+"' could not split into atomic parts. ", e); 108 } 109 if (items.length != 3) { 110 throw new SymbolException("The header line of the symbol table '"+fileLine.substring(1)+"' must contain four columns. "); 111 } 112 addSymbolTable(items[0], Integer.parseInt(items[1]), items[2]); 113 } 114 } catch (NumberFormatException e) { 115 throw new SymbolException("The symbol table file (.sym) contains a non-integer value in the header. ", e); 116 } catch (IOException e) { 117 throw new SymbolException("Could not load the symbol table. ", e); 118 } 119 } 120 121 public void load(InputStreamReader isr) throws MaltChainedException { 122 try { 123 BufferedReader bin = new BufferedReader(isr); 124 String fileLine; 125 SymbolTable table = null; 126 bin.mark(2); 127 if (bin.read() == '\t') { 128 bin.reset(); 129 loadHeader(bin); 130 } else { 131 bin.reset(); 132 } 133 while ((fileLine = bin.readLine()) != null) { 134 if (fileLine.length() > 0) { 135 table = addSymbolTable(fileLine); 136 table.load(bin); 137 } 138 } 139 bin.close(); 140 } catch (IOException e) { 141 throw new SymbolException("Could not load the symbol tables. ", e); 142 } 143 } 144 145 public void load(String fileName, String charSet) throws MaltChainedException { 146 try { 147 load(new InputStreamReader(new FileInputStream(fileName), charSet)); 148 } catch (FileNotFoundException e) { 149 throw new SymbolException("The symbol table file '"+fileName+"' cannot be found. ", e); 150 } catch (UnsupportedEncodingException e) { 151 throw new SymbolException("The char set '"+charSet+"' is not supported. ", e); 152 } 153 } 154 155 public SymbolTable loadTagset(String fileName, String tableName, String charSet, int columnCategory, String nullValueStrategy) throws MaltChainedException { 156 try { 157 BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), charSet)); 158 String fileLine; 159 SymbolTable table = addSymbolTable(tableName, columnCategory, nullValueStrategy); 160 161 while ((fileLine = br.readLine()) != null) { 162 table.addSymbol(fileLine.trim()); 163 } 164 br.close(); 165 return table; 166 } catch (FileNotFoundException e) { 167 throw new SymbolException("The tagset file '"+fileName+"' cannot be found. ", e); 168 } catch (UnsupportedEncodingException e) { 169 throw new SymbolException("The char set '"+charSet+"' is not supported. ", e); 170 } catch (IOException e) { 171 throw new SymbolException("The tagset file '"+fileName+"' cannot be loaded. ", e); 172 } 173 } 174 175 public String printSymbolTables() throws MaltChainedException { 176 StringBuilder sb = new StringBuilder(); 177 for (HashSymbolTable table : symbolTables.values()) { 178 sb.append(table.printSymbolTable()); 179 } 180 return sb.toString(); 181 } 182}