001package org.maltparser.core.syntaxgraph.reader; 002 003import java.io.File; 004 005import org.maltparser.core.config.ConfigurationDir; 006import org.maltparser.core.exception.MaltChainedException; 007import org.maltparser.core.flow.FlowChartInstance; 008import org.maltparser.core.flow.item.ChartItem; 009import org.maltparser.core.flow.spec.ChartItemSpecification; 010import org.maltparser.core.helper.URLFinder; 011import org.maltparser.core.io.dataformat.DataFormatException; 012import org.maltparser.core.io.dataformat.DataFormatInstance; 013import org.maltparser.core.io.dataformat.DataFormatManager; 014import org.maltparser.core.options.OptionManager; 015import org.maltparser.core.symbol.SymbolTableHandler; 016import org.maltparser.core.syntaxgraph.TokenStructure; 017 018public class ReadChartItem extends ChartItem { 019 private String idName; 020 private String inputFormatName; 021 private String inputFileName; 022 private String inputCharSet; 023 private String readerOptions; 024 private int iterations; 025 private Class<? extends SyntaxGraphReader> graphReaderClass; 026 027 private String nullValueStrategy; 028 029 private SyntaxGraphReader reader; 030 private String targetName; 031 private String optiongroupName; 032 private DataFormatInstance inputDataFormatInstance; 033 private TokenStructure cachedGraph = null; 034 035 public ReadChartItem() { super(); } 036 037 public void initialize(FlowChartInstance flowChartinstance, ChartItemSpecification chartItemSpecification) throws MaltChainedException { 038 super.initialize(flowChartinstance, chartItemSpecification); 039 040 for (String key : chartItemSpecification.getChartItemAttributes().keySet()) { 041 if (key.equals("id")) { 042 idName = chartItemSpecification.getChartItemAttributes().get(key); 043 } else if (key.equals("target")) { 044 targetName = chartItemSpecification.getChartItemAttributes().get(key); 045 } else if (key.equals("optiongroup")) { 046 optiongroupName = chartItemSpecification.getChartItemAttributes().get(key); 047 } 048 } 049 050 if (idName == null) { 051 idName = getChartElement("read").getAttributes().get("id").getDefaultValue(); 052 } else if (targetName == null) { 053 targetName = getChartElement("read").getAttributes().get("target").getDefaultValue(); 054 } else if (optiongroupName == null) { 055 optiongroupName = getChartElement("read").getAttributes().get("optiongroup").getDefaultValue(); 056 } 057 058 setInputFormatName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "format").toString()); 059 setInputFileName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "infile").toString()); 060 setInputCharSet(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "charset").toString()); 061 setReaderOptions(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader_options").toString()); 062 if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "iterations") != null) { 063 setIterations((Integer)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "iterations")); 064 } else { 065 setIterations(1); 066 } 067 setSyntaxGraphReaderClass((Class<?>)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader")); 068 069 setNullValueStrategy(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "singlemalt", "null_value").toString()); 070 071 initInput(getNullValueStrategy()); 072 initReader(getSyntaxGraphReaderClass(), getInputFileName(), getInputCharSet(), getReaderOptions(), iterations); 073 } 074 075 public int preprocess(int signal) throws MaltChainedException { 076 return signal; 077 } 078 079 public int process(int signal) throws MaltChainedException { 080 if (cachedGraph == null) { 081 cachedGraph = (TokenStructure)flowChartinstance.getFlowChartRegistry(org.maltparser.core.syntaxgraph.TokenStructure.class, targetName); 082 } 083 int prevIterationCounter = reader.getIterationCounter(); 084 boolean moreInput = reader.readSentence(cachedGraph); 085 if (!moreInput) { 086 return ChartItem.TERMINATE; 087 } else if (prevIterationCounter < reader.getIterationCounter()) { 088 return ChartItem.NEWITERATION; 089 } 090 return ChartItem.CONTINUE; 091// return continueNextSentence && moreInput; 092 } 093 094 public int postprocess(int signal) throws MaltChainedException { 095 return signal; 096 } 097 098 public void terminate() throws MaltChainedException { 099 if (reader != null) { 100 reader.close(); 101 reader = null; 102 } 103 cachedGraph = null; 104 inputDataFormatInstance = null; 105 } 106 107 public String getInputFormatName() { 108 if (inputFormatName == null) { 109 return "/appdata/dataformat/conllx.xml"; 110 } 111 return inputFormatName; 112 } 113 114 public void setInputFormatName(String inputFormatName) { 115 this.inputFormatName = inputFormatName; 116 } 117 118 public String getInputFileName() { 119 if (inputFileName == null) { 120 return "/dev/stdin"; 121 } 122 return inputFileName; 123 } 124 125 public void setInputFileName(String inputFileName) { 126 this.inputFileName = inputFileName; 127 } 128 129 public String getInputCharSet() { 130 if (inputCharSet == null) { 131 return "UTF-8"; 132 } 133 return inputCharSet; 134 } 135 136 public void setInputCharSet(String inputCharSet) { 137 this.inputCharSet = inputCharSet; 138 } 139 140 public String getReaderOptions() { 141 if (readerOptions == null) { 142 return ""; 143 } 144 return readerOptions; 145 } 146 147 public void setReaderOptions(String readerOptions) { 148 this.readerOptions = readerOptions; 149 } 150 151 152 public int getIterations() { 153 return iterations; 154 } 155 156 public void setIterations(int iterations) { 157 this.iterations = iterations; 158 } 159 160 public Class<? extends SyntaxGraphReader> getSyntaxGraphReaderClass() { 161 return graphReaderClass; 162 } 163 164 public void setSyntaxGraphReaderClass(Class<?> graphReaderClass) throws MaltChainedException { 165 try { 166 if (graphReaderClass != null) { 167 this.graphReaderClass = graphReaderClass.asSubclass(org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class); 168 } 169 } catch (ClassCastException e) { 170 throw new DataFormatException("The class '"+graphReaderClass.getName()+"' is not a subclass of '"+org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class.getName()+"'. ", e); 171 } 172 } 173 174 public String getNullValueStrategy() { 175 if (nullValueStrategy == null) { 176 return "one"; 177 } 178 return nullValueStrategy; 179 } 180 181 public void setNullValueStrategy(String nullValueStrategy) { 182 this.nullValueStrategy = nullValueStrategy; 183 } 184 185 public String getTargetName() { 186 return targetName; 187 } 188 189 public void setTargetName(String targetName) { 190 this.targetName = targetName; 191 } 192 193 public SyntaxGraphReader getReader() { 194 return reader; 195 } 196 197 public DataFormatInstance getInputDataFormatInstance() { 198 return inputDataFormatInstance; 199 } 200 201 public void initInput(String nullValueStategy) throws MaltChainedException { 202 ConfigurationDir configDir = (ConfigurationDir)flowChartinstance.getFlowChartRegistry(org.maltparser.core.config.ConfigurationDir.class, idName); 203 DataFormatManager dataFormatManager = configDir.getDataFormatManager(); 204 SymbolTableHandler symbolTables = configDir.getSymbolTables(); 205 inputDataFormatInstance = dataFormatManager.getInputDataFormatSpec().createDataFormatInstance(symbolTables, nullValueStategy); 206 configDir.addDataFormatInstance(dataFormatManager.getInputDataFormatSpec().getDataFormatName(), inputDataFormatInstance); 207 208 } 209 210 public void initReader(Class<? extends SyntaxGraphReader> syntaxGraphReader, String inputFile, String inputCharSet, String readerOptions, int iterations) throws MaltChainedException { 211 try { 212 final URLFinder f = new URLFinder(); 213 reader = syntaxGraphReader.newInstance(); 214 if (inputFile == null || inputFile.length() == 0 || inputFile.equals("/dev/stdin")) { 215 reader.open(System.in, inputCharSet); 216 } else if (new File(inputFile).exists()) { 217 reader.setNIterations(iterations); 218 reader.open(inputFile, inputCharSet); 219 } else { 220 reader.setNIterations(iterations); 221 reader.open(f.findURL(inputFile), inputCharSet); 222 } 223 reader.setDataFormatInstance(inputDataFormatInstance); 224 reader.setOptions(readerOptions); 225 } catch (InstantiationException e) { 226 throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e); 227 } catch (IllegalAccessException e) { 228 throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e); 229 } 230 } 231 232 public boolean equals(Object obj) { 233 if (this == obj) 234 return true; 235 if (obj == null) 236 return false; 237 if (getClass() != obj.getClass()) 238 return false; 239 return obj.toString().equals(this.toString()); 240 } 241 242 public int hashCode() { 243 return 217 + (null == toString() ? 0 : toString().hashCode()); 244 } 245 246 public String toString() { 247 final StringBuilder sb = new StringBuilder(); 248 sb.append(" read "); 249 sb.append("id:");sb.append(idName); 250 sb.append(' '); 251 sb.append("target:"); 252 sb.append(targetName); 253 sb.append(' '); 254 sb.append("optiongroup:"); 255 sb.append(optiongroupName); 256 return sb.toString(); 257 } 258}