001 package org.maltparser.core.syntaxgraph.reader; 002 003 import java.io.File; 004 005 import org.maltparser.core.exception.MaltChainedException; 006 import org.maltparser.core.flow.FlowChartInstance; 007 import org.maltparser.core.flow.item.ChartItem; 008 import org.maltparser.core.flow.spec.ChartItemSpecification; 009 import org.maltparser.core.helper.Util; 010 import org.maltparser.core.io.dataformat.DataFormatException; 011 import org.maltparser.core.io.dataformat.DataFormatInstance; 012 import org.maltparser.core.options.OptionManager; 013 import org.maltparser.core.syntaxgraph.TokenStructure; 014 015 public class ReadChartItem extends ChartItem { 016 private String inputFormatName; 017 private String inputFileName; 018 private String inputCharSet; 019 private String readerOptions; 020 private Class<? extends SyntaxGraphReader> graphReaderClass; 021 022 private String nullValueStrategy; 023 private String rootLabels; 024 025 private SyntaxGraphReader reader; 026 private String targetName; 027 private String optiongroupName; 028 private DataFormatInstance inputDataFormatInstance; 029 private TokenStructure cachedGraph = null; 030 031 public ReadChartItem() { super(); } 032 033 public void initialize(FlowChartInstance flowChartinstance, ChartItemSpecification chartItemSpecification) throws MaltChainedException { 034 super.initialize(flowChartinstance, chartItemSpecification); 035 036 for (String key : chartItemSpecification.getChartItemAttributes().keySet()) { 037 if (key.equals("target")) { 038 targetName = chartItemSpecification.getChartItemAttributes().get(key); 039 } else if (key.equals("optiongroup")) { 040 optiongroupName = chartItemSpecification.getChartItemAttributes().get(key); 041 } 042 } 043 044 if (targetName == null) { 045 targetName = getChartElement("read").getAttributes().get("target").getDefaultValue(); 046 } else if (optiongroupName == null) { 047 optiongroupName = getChartElement("read").getAttributes().get("optiongroup").getDefaultValue(); 048 } 049 050 setInputFormatName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "format").toString()); 051 setInputFileName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "infile").toString()); 052 setInputCharSet(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "charset").toString()); 053 setReaderOptions(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader_options").toString()); 054 setSyntaxGraphReaderClass((Class<?>)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader")); 055 056 setNullValueStrategy(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "singlemalt", "null_value").toString()); 057 setRootLabels(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "graph", "root_label").toString()); 058 059 initInput(getNullValueStrategy(), getRootLabels()); 060 initReader(getSyntaxGraphReaderClass(), getInputFileName(), getInputCharSet(), getReaderOptions()); 061 } 062 063 public boolean preprocess() throws MaltChainedException { 064 return true; 065 } 066 067 public boolean process(boolean continueNextSentence) throws MaltChainedException { 068 if (cachedGraph == null) { 069 cachedGraph = (TokenStructure)flowChartinstance.getFlowChartRegistry(org.maltparser.core.syntaxgraph.TokenStructure.class, targetName); 070 } 071 boolean moreInput = reader.readSentence(cachedGraph); 072 // System.out.println(cachedGraph); 073 // System.exit(1); 074 return continueNextSentence && moreInput; 075 } 076 077 public boolean postprocess() throws MaltChainedException { 078 return true; 079 } 080 081 public void terminate() throws MaltChainedException { 082 if (reader != null) { 083 reader.close(); 084 reader = null; 085 } 086 cachedGraph = null; 087 inputDataFormatInstance = null; 088 } 089 090 public String getInputFormatName() { 091 if (inputFormatName == null) { 092 return "/appdata/dataformat/conllx.xml"; 093 } 094 return inputFormatName; 095 } 096 097 public void setInputFormatName(String inputFormatName) { 098 this.inputFormatName = inputFormatName; 099 } 100 101 public String getInputFileName() { 102 if (inputFileName == null) { 103 return "/dev/stdin"; 104 } 105 return inputFileName; 106 } 107 108 public void setInputFileName(String inputFileName) { 109 this.inputFileName = inputFileName; 110 } 111 112 public String getInputCharSet() { 113 if (inputCharSet == null) { 114 return "UTF-8"; 115 } 116 return inputCharSet; 117 } 118 119 public void setInputCharSet(String inputCharSet) { 120 this.inputCharSet = inputCharSet; 121 } 122 123 public String getReaderOptions() { 124 if (readerOptions == null) { 125 return ""; 126 } 127 return readerOptions; 128 } 129 130 public void setReaderOptions(String readerOptions) { 131 this.readerOptions = readerOptions; 132 } 133 134 public Class<? extends SyntaxGraphReader> getSyntaxGraphReaderClass() { 135 return graphReaderClass; 136 } 137 138 public void setSyntaxGraphReaderClass(Class<?> graphReaderClass) throws MaltChainedException { 139 try { 140 if (graphReaderClass != null) { 141 this.graphReaderClass = graphReaderClass.asSubclass(org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class); 142 } 143 } catch (ClassCastException e) { 144 throw new DataFormatException("The class '"+graphReaderClass.getName()+"' is not a subclass of '"+org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class.getName()+"'. ", e); 145 } 146 } 147 148 public String getNullValueStrategy() { 149 if (nullValueStrategy == null) { 150 return "one"; 151 } 152 return nullValueStrategy; 153 } 154 155 public void setNullValueStrategy(String nullValueStrategy) { 156 this.nullValueStrategy = nullValueStrategy; 157 } 158 159 public String getRootLabels() { 160 if (nullValueStrategy == null) { 161 return "ROOT"; 162 } 163 return rootLabels; 164 } 165 166 public void setRootLabels(String rootLabels) { 167 this.rootLabels = rootLabels; 168 } 169 170 171 public String getTargetName() { 172 return targetName; 173 } 174 175 public void setTargetName(String targetName) { 176 this.targetName = targetName; 177 } 178 179 public SyntaxGraphReader getReader() { 180 return reader; 181 } 182 183 public DataFormatInstance getInputDataFormatInstance() { 184 return inputDataFormatInstance; 185 } 186 187 public void initInput(String nullValueStategy, String rootLabels) throws MaltChainedException { 188 inputDataFormatInstance = flowChartinstance.getDataFormatManager().getInputDataFormatSpec().createDataFormatInstance(flowChartinstance.getSymbolTables(), nullValueStategy, rootLabels); 189 if (!flowChartinstance.getDataFormatInstances().containsKey(flowChartinstance.getDataFormatManager().getInputDataFormatSpec().getDataFormatName())) { 190 flowChartinstance.getDataFormatInstances().put(flowChartinstance.getDataFormatManager().getInputDataFormatSpec().getDataFormatName(), inputDataFormatInstance); 191 } 192 } 193 194 public void initReader(Class<? extends SyntaxGraphReader> syntaxGraphReader, String inputFile, String inputCharSet, String readerOptions) throws MaltChainedException { 195 try { 196 reader = syntaxGraphReader.newInstance(); 197 if (inputFile == null || inputFile.length() == 0 || inputFile.equals("/dev/stdin")) { 198 reader.open(System.in, inputCharSet); 199 } else if (new File(inputFile).exists()) { 200 reader.open(inputFile, inputCharSet); 201 } else { 202 reader.open(Util.findURL(inputFile), inputCharSet); 203 } 204 reader.setDataFormatInstance(inputDataFormatInstance); 205 reader.setOptions(readerOptions); 206 } catch (InstantiationException e) { 207 throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e); 208 } catch (IllegalAccessException e) { 209 throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e); 210 } 211 } 212 213 public boolean equals(Object obj) { 214 if (this == obj) 215 return true; 216 if (obj == null) 217 return false; 218 if (getClass() != obj.getClass()) 219 return false; 220 return obj.toString().equals(this.toString()); 221 } 222 223 public int hashCode() { 224 return 217 + (null == toString() ? 0 : toString().hashCode()); 225 } 226 227 public String toString() { 228 final StringBuilder sb = new StringBuilder(); 229 sb.append(" read "); 230 sb.append("target:"); 231 sb.append(targetName); 232 sb.append(' '); 233 sb.append("optiongroup:"); 234 sb.append(optiongroupName); 235 return sb.toString(); 236 } 237 }