001    package org.maltparser.core.syntaxgraph.reader;
002    
003    import java.io.File;
004    
005    import org.maltparser.core.config.ConfigurationDir;
006    import org.maltparser.core.exception.MaltChainedException;
007    import org.maltparser.core.flow.FlowChartInstance;
008    import org.maltparser.core.flow.item.ChartItem;
009    import org.maltparser.core.flow.spec.ChartItemSpecification;
010    import org.maltparser.core.helper.Util;
011    import org.maltparser.core.io.dataformat.DataFormatException;
012    import org.maltparser.core.io.dataformat.DataFormatInstance;
013    import org.maltparser.core.io.dataformat.DataFormatManager;
014    import org.maltparser.core.options.OptionManager;
015    import org.maltparser.core.symbol.SymbolTableHandler;
016    import org.maltparser.core.syntaxgraph.TokenStructure;
017    
018    public class ReadChartItem extends ChartItem {
019            private String idName;
020            private String inputFormatName;
021            private String inputFileName;
022            private String inputCharSet;
023            private String readerOptions;
024            private int iterations;
025            private Class<? extends SyntaxGraphReader> graphReaderClass;
026            
027            private String nullValueStrategy;
028            
029            private SyntaxGraphReader reader;
030            private String targetName;
031            private String optiongroupName;
032            private DataFormatInstance inputDataFormatInstance;
033            private TokenStructure cachedGraph = null;
034            
035            public ReadChartItem() { super(); }
036    
037            public void initialize(FlowChartInstance flowChartinstance, ChartItemSpecification chartItemSpecification) throws MaltChainedException {
038                    super.initialize(flowChartinstance, chartItemSpecification);
039                    
040                    for (String key : chartItemSpecification.getChartItemAttributes().keySet()) {
041                            if (key.equals("id")) {
042                                    idName = chartItemSpecification.getChartItemAttributes().get(key);
043                            } else if (key.equals("target")) {
044                                    targetName = chartItemSpecification.getChartItemAttributes().get(key);
045                            } else if (key.equals("optiongroup")) {
046                                    optiongroupName = chartItemSpecification.getChartItemAttributes().get(key);
047                            }
048                    }
049                    
050                    if (idName == null) {
051                            idName = getChartElement("read").getAttributes().get("id").getDefaultValue();
052                    } else if (targetName == null) {
053                            targetName = getChartElement("read").getAttributes().get("target").getDefaultValue();
054                    } else if (optiongroupName == null) {
055                            optiongroupName = getChartElement("read").getAttributes().get("optiongroup").getDefaultValue();
056                    }
057                    
058                    setInputFormatName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "format").toString());
059                    setInputFileName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "infile").toString());
060                    setInputCharSet(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "charset").toString());
061                    setReaderOptions(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader_options").toString());
062                    if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "iterations") != null) {
063                            setIterations((Integer)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "iterations"));
064                    } else {
065                            setIterations(1);
066                    }
067                    setSyntaxGraphReaderClass((Class<?>)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader"));
068    
069                    setNullValueStrategy(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "singlemalt", "null_value").toString());
070                    
071                    initInput(getNullValueStrategy());
072                    initReader(getSyntaxGraphReaderClass(), getInputFileName(), getInputCharSet(), getReaderOptions(), iterations);
073            }
074            
075            public int preprocess(int signal) throws MaltChainedException {
076                    return signal;
077            }
078            
079            public int process(int signal) throws MaltChainedException {
080                    if (cachedGraph == null) {
081                            cachedGraph = (TokenStructure)flowChartinstance.getFlowChartRegistry(org.maltparser.core.syntaxgraph.TokenStructure.class, targetName);
082                    }
083                    int prevIterationCounter = reader.getIterationCounter();
084                    boolean moreInput = reader.readSentence(cachedGraph);
085                    if (!moreInput) {
086                            return ChartItem.TERMINATE;
087                    } else if (prevIterationCounter < reader.getIterationCounter()) {
088                            return ChartItem.NEWITERATION;
089                    }
090                    return ChartItem.CONTINUE;
091    //              return continueNextSentence && moreInput;
092            }
093            
094            public int postprocess(int signal) throws MaltChainedException {
095                    return signal;
096            }
097            
098            public void terminate() throws MaltChainedException {
099                    if (reader != null) {
100                            reader.close();
101                            reader = null;
102                    }
103                    cachedGraph = null;
104                    inputDataFormatInstance = null;
105            }
106            
107            public String getInputFormatName() {
108                    if (inputFormatName == null) {
109                            return "/appdata/dataformat/conllx.xml";
110                    }
111                    return inputFormatName;
112            }
113    
114            public void setInputFormatName(String inputFormatName) {
115                    this.inputFormatName = inputFormatName;
116            }
117    
118            public String getInputFileName() {
119                    if (inputFileName == null) {
120                            return "/dev/stdin";
121                    }
122                    return inputFileName;
123            }
124    
125            public void setInputFileName(String inputFileName) {
126                    this.inputFileName = inputFileName;
127            }
128    
129            public String getInputCharSet() {
130                    if (inputCharSet == null) {
131                            return "UTF-8";
132                    }
133                    return inputCharSet;
134            }
135    
136            public void setInputCharSet(String inputCharSet) {
137                    this.inputCharSet = inputCharSet;
138            }
139    
140            public String getReaderOptions() {
141                    if (readerOptions == null) {
142                            return "";
143                    }
144                    return readerOptions;
145            }
146    
147            public void setReaderOptions(String readerOptions) {
148                    this.readerOptions = readerOptions;
149            }
150    
151            
152            public int getIterations() {
153                    return iterations;
154            }
155    
156            public void setIterations(int iterations) {
157                    this.iterations = iterations;
158            }
159    
160            public Class<? extends SyntaxGraphReader> getSyntaxGraphReaderClass() {
161                    return graphReaderClass;
162            }
163    
164            public void setSyntaxGraphReaderClass(Class<?> graphReaderClass) throws MaltChainedException {
165                    try {
166                            if (graphReaderClass != null) {
167                                    this.graphReaderClass = graphReaderClass.asSubclass(org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class);
168                            }
169                    } catch (ClassCastException e) {
170                            throw new DataFormatException("The class '"+graphReaderClass.getName()+"' is not a subclass of '"+org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class.getName()+"'. ", e);
171                    }
172            }
173            
174            public String getNullValueStrategy() {
175                    if (nullValueStrategy == null) {
176                            return "one";
177                    }
178                    return nullValueStrategy;
179            }
180    
181            public void setNullValueStrategy(String nullValueStrategy) {
182                    this.nullValueStrategy = nullValueStrategy;
183            }
184    
185            public String getTargetName() {
186                    return targetName;
187            }
188    
189            public void setTargetName(String targetName) {
190                    this.targetName = targetName;
191            }
192    
193            public SyntaxGraphReader getReader() {
194                    return reader;
195            }
196    
197            public DataFormatInstance getInputDataFormatInstance() {
198                    return inputDataFormatInstance;
199            }
200    
201            public void initInput(String nullValueStategy) throws MaltChainedException {
202                    ConfigurationDir configDir = (ConfigurationDir)flowChartinstance.getFlowChartRegistry(org.maltparser.core.config.ConfigurationDir.class, idName);
203                    DataFormatManager dataFormatManager = configDir.getDataFormatManager();
204                    SymbolTableHandler symbolTables = configDir.getSymbolTables();
205                    inputDataFormatInstance = dataFormatManager.getInputDataFormatSpec().createDataFormatInstance(symbolTables, nullValueStategy);
206                    configDir.addDataFormatInstance(dataFormatManager.getInputDataFormatSpec().getDataFormatName(), inputDataFormatInstance);
207    
208            }
209            
210            public void initReader(Class<? extends SyntaxGraphReader> syntaxGraphReader, String inputFile, String inputCharSet, String readerOptions, int iterations) throws MaltChainedException {
211                    try {   
212                            reader = syntaxGraphReader.newInstance();
213                            if (inputFile == null || inputFile.length() == 0 || inputFile.equals("/dev/stdin")) {
214                                    reader.open(System.in, inputCharSet);
215                            } else if (new File(inputFile).exists()) {
216                                    reader.setNIterations(iterations);
217                                    reader.open(inputFile, inputCharSet);
218                            } else {
219                                    reader.setNIterations(iterations);
220                                    reader.open(Util.findURL(inputFile), inputCharSet);
221                            }
222                            reader.setDataFormatInstance(inputDataFormatInstance); 
223                            reader.setOptions(readerOptions);
224                    } catch (InstantiationException e) {
225                            throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e);
226                    } catch (IllegalAccessException e) {
227                            throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e);
228                    }       
229            }
230            
231            public boolean equals(Object obj) {
232                    if (this == obj)
233                            return true;
234                    if (obj == null)
235                            return false;
236                    if (getClass() != obj.getClass())
237                            return false;
238                    return obj.toString().equals(this.toString());
239            }
240            
241            public int hashCode() {
242                    return 217 + (null == toString() ? 0 : toString().hashCode());
243            }
244            
245            public String toString() {
246                    final StringBuilder sb = new StringBuilder();
247                    sb.append("    read ");
248                    sb.append("id:");sb.append(idName);
249                    sb.append(' ');
250                    sb.append("target:");
251                    sb.append(targetName);
252                    sb.append(' ');
253                    sb.append("optiongroup:");
254                    sb.append(optiongroupName);
255                    return sb.toString();
256            }
257    }