001 package org.maltparser.core.syntaxgraph.reader; 002 003 import java.io.File; 004 005 import org.maltparser.core.exception.MaltChainedException; 006 import org.maltparser.core.flow.FlowChartInstance; 007 import org.maltparser.core.flow.item.ChartItem; 008 import org.maltparser.core.flow.spec.ChartItemSpecification; 009 import org.maltparser.core.helper.Util; 010 import org.maltparser.core.io.dataformat.DataFormatException; 011 import org.maltparser.core.io.dataformat.DataFormatInstance; 012 import org.maltparser.core.options.OptionManager; 013 import org.maltparser.core.syntaxgraph.TokenStructure; 014 015 public class ReadChartItem extends ChartItem { 016 private String inputFormatName; 017 private String inputFileName; 018 private String inputCharSet; 019 private String readerOptions; 020 private int iterations; 021 private Class<? extends SyntaxGraphReader> graphReaderClass; 022 023 private String nullValueStrategy; 024 private String rootLabels; 025 026 private SyntaxGraphReader reader; 027 private String targetName; 028 private String optiongroupName; 029 private DataFormatInstance inputDataFormatInstance; 030 private TokenStructure cachedGraph = null; 031 032 public ReadChartItem() { super(); } 033 034 public void initialize(FlowChartInstance flowChartinstance, ChartItemSpecification chartItemSpecification) throws MaltChainedException { 035 super.initialize(flowChartinstance, chartItemSpecification); 036 037 for (String key : chartItemSpecification.getChartItemAttributes().keySet()) { 038 if (key.equals("target")) { 039 targetName = chartItemSpecification.getChartItemAttributes().get(key); 040 } else if (key.equals("optiongroup")) { 041 optiongroupName = chartItemSpecification.getChartItemAttributes().get(key); 042 } 043 } 044 045 if (targetName == null) { 046 targetName = getChartElement("read").getAttributes().get("target").getDefaultValue(); 047 } else if (optiongroupName == null) { 048 optiongroupName = getChartElement("read").getAttributes().get("optiongroup").getDefaultValue(); 049 } 050 051 setInputFormatName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "format").toString()); 052 setInputFileName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "infile").toString()); 053 setInputCharSet(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "charset").toString()); 054 setReaderOptions(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader_options").toString()); 055 if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "iterations") != null) { 056 setIterations((Integer)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "iterations")); 057 } else { 058 setIterations(1); 059 } 060 setSyntaxGraphReaderClass((Class<?>)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader")); 061 062 setNullValueStrategy(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "singlemalt", "null_value").toString()); 063 setRootLabels(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "graph", "root_label").toString()); 064 065 066 initInput(getNullValueStrategy(), getRootLabels()); 067 initReader(getSyntaxGraphReaderClass(), getInputFileName(), getInputCharSet(), getReaderOptions(), iterations); 068 } 069 070 public int preprocess(int signal) throws MaltChainedException { 071 return signal; 072 } 073 074 public int process(int signal) throws MaltChainedException { 075 if (cachedGraph == null) { 076 cachedGraph = (TokenStructure)flowChartinstance.getFlowChartRegistry(org.maltparser.core.syntaxgraph.TokenStructure.class, targetName); 077 } 078 int prevIterationCounter = reader.getIterationCounter(); 079 boolean moreInput = reader.readSentence(cachedGraph); 080 // System.out.println(cachedGraph); 081 // System.exit(1); 082 if (!moreInput) { 083 return ChartItem.TERMINATE; 084 } else if (prevIterationCounter < reader.getIterationCounter()) { 085 return ChartItem.NEWITERATION; 086 } 087 return ChartItem.CONTINUE; 088 // return continueNextSentence && moreInput; 089 } 090 091 public int postprocess(int signal) throws MaltChainedException { 092 return signal; 093 } 094 095 public void terminate() throws MaltChainedException { 096 if (reader != null) { 097 reader.close(); 098 reader = null; 099 } 100 cachedGraph = null; 101 inputDataFormatInstance = null; 102 } 103 104 public String getInputFormatName() { 105 if (inputFormatName == null) { 106 return "/appdata/dataformat/conllx.xml"; 107 } 108 return inputFormatName; 109 } 110 111 public void setInputFormatName(String inputFormatName) { 112 this.inputFormatName = inputFormatName; 113 } 114 115 public String getInputFileName() { 116 if (inputFileName == null) { 117 return "/dev/stdin"; 118 } 119 return inputFileName; 120 } 121 122 public void setInputFileName(String inputFileName) { 123 this.inputFileName = inputFileName; 124 } 125 126 public String getInputCharSet() { 127 if (inputCharSet == null) { 128 return "UTF-8"; 129 } 130 return inputCharSet; 131 } 132 133 public void setInputCharSet(String inputCharSet) { 134 this.inputCharSet = inputCharSet; 135 } 136 137 public String getReaderOptions() { 138 if (readerOptions == null) { 139 return ""; 140 } 141 return readerOptions; 142 } 143 144 public void setReaderOptions(String readerOptions) { 145 this.readerOptions = readerOptions; 146 } 147 148 149 public int getIterations() { 150 return iterations; 151 } 152 153 public void setIterations(int iterations) { 154 this.iterations = iterations; 155 } 156 157 public Class<? extends SyntaxGraphReader> getSyntaxGraphReaderClass() { 158 return graphReaderClass; 159 } 160 161 public void setSyntaxGraphReaderClass(Class<?> graphReaderClass) throws MaltChainedException { 162 try { 163 if (graphReaderClass != null) { 164 this.graphReaderClass = graphReaderClass.asSubclass(org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class); 165 } 166 } catch (ClassCastException e) { 167 throw new DataFormatException("The class '"+graphReaderClass.getName()+"' is not a subclass of '"+org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class.getName()+"'. ", e); 168 } 169 } 170 171 public String getNullValueStrategy() { 172 if (nullValueStrategy == null) { 173 return "one"; 174 } 175 return nullValueStrategy; 176 } 177 178 public void setNullValueStrategy(String nullValueStrategy) { 179 this.nullValueStrategy = nullValueStrategy; 180 } 181 182 public String getRootLabels() { 183 if (nullValueStrategy == null) { 184 return "ROOT"; 185 } 186 return rootLabels; 187 } 188 189 public void setRootLabels(String rootLabels) { 190 this.rootLabels = rootLabels; 191 } 192 193 194 public String getTargetName() { 195 return targetName; 196 } 197 198 public void setTargetName(String targetName) { 199 this.targetName = targetName; 200 } 201 202 public SyntaxGraphReader getReader() { 203 return reader; 204 } 205 206 public DataFormatInstance getInputDataFormatInstance() { 207 return inputDataFormatInstance; 208 } 209 210 public void initInput(String nullValueStategy, String rootLabels) throws MaltChainedException { 211 inputDataFormatInstance = flowChartinstance.getDataFormatManager().getInputDataFormatSpec().createDataFormatInstance(flowChartinstance.getSymbolTables(), nullValueStategy, rootLabels); 212 if (!flowChartinstance.getDataFormatInstances().containsKey(flowChartinstance.getDataFormatManager().getInputDataFormatSpec().getDataFormatName())) { 213 flowChartinstance.getDataFormatInstances().put(flowChartinstance.getDataFormatManager().getInputDataFormatSpec().getDataFormatName(), inputDataFormatInstance); 214 } 215 } 216 217 public void initReader(Class<? extends SyntaxGraphReader> syntaxGraphReader, String inputFile, String inputCharSet, String readerOptions, int iterations) throws MaltChainedException { 218 try { 219 reader = syntaxGraphReader.newInstance(); 220 if (inputFile == null || inputFile.length() == 0 || inputFile.equals("/dev/stdin")) { 221 reader.open(System.in, inputCharSet); 222 } else if (new File(inputFile).exists()) { 223 reader.setNIterations(iterations); 224 reader.open(inputFile, inputCharSet); 225 } else { 226 reader.setNIterations(iterations); 227 reader.open(Util.findURL(inputFile), inputCharSet); 228 } 229 reader.setDataFormatInstance(inputDataFormatInstance); 230 reader.setOptions(readerOptions); 231 } catch (InstantiationException e) { 232 throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e); 233 } catch (IllegalAccessException e) { 234 throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e); 235 } 236 } 237 238 public boolean equals(Object obj) { 239 if (this == obj) 240 return true; 241 if (obj == null) 242 return false; 243 if (getClass() != obj.getClass()) 244 return false; 245 return obj.toString().equals(this.toString()); 246 } 247 248 public int hashCode() { 249 return 217 + (null == toString() ? 0 : toString().hashCode()); 250 } 251 252 public String toString() { 253 final StringBuilder sb = new StringBuilder(); 254 sb.append(" read "); 255 sb.append("target:"); 256 sb.append(targetName); 257 sb.append(' '); 258 sb.append("optiongroup:"); 259 sb.append(optiongroupName); 260 return sb.toString(); 261 } 262 }