001    package org.maltparser.core.syntaxgraph.reader;
002    
003    import java.io.File;
004    
005    import org.maltparser.core.exception.MaltChainedException;
006    import org.maltparser.core.flow.FlowChartInstance;
007    import org.maltparser.core.flow.item.ChartItem;
008    import org.maltparser.core.flow.spec.ChartItemSpecification;
009    import org.maltparser.core.helper.Util;
010    import org.maltparser.core.io.dataformat.DataFormatException;
011    import org.maltparser.core.io.dataformat.DataFormatInstance;
012    import org.maltparser.core.options.OptionManager;
013    import org.maltparser.core.syntaxgraph.TokenStructure;
014    
015    public class ReadChartItem extends ChartItem {
016            private String inputFormatName;
017            private String inputFileName;
018            private String inputCharSet;
019            private String readerOptions;
020            private int iterations;
021            private Class<? extends SyntaxGraphReader> graphReaderClass;
022            
023            private String nullValueStrategy;
024            private String rootLabels;
025            
026            private SyntaxGraphReader reader;
027            private String targetName;
028            private String optiongroupName;
029            private DataFormatInstance inputDataFormatInstance;
030            private TokenStructure cachedGraph = null;
031            
032            public ReadChartItem() { super(); }
033    
034            public void initialize(FlowChartInstance flowChartinstance, ChartItemSpecification chartItemSpecification) throws MaltChainedException {
035                    super.initialize(flowChartinstance, chartItemSpecification);
036                    
037                    for (String key : chartItemSpecification.getChartItemAttributes().keySet()) {
038                            if (key.equals("target")) {
039                                    targetName = chartItemSpecification.getChartItemAttributes().get(key);
040                            } else if (key.equals("optiongroup")) {
041                                    optiongroupName = chartItemSpecification.getChartItemAttributes().get(key);
042                            }
043                    }
044                    
045                    if (targetName == null) {
046                            targetName = getChartElement("read").getAttributes().get("target").getDefaultValue();
047                    } else if (optiongroupName == null) {
048                            optiongroupName = getChartElement("read").getAttributes().get("optiongroup").getDefaultValue();
049                    }
050                    
051                    setInputFormatName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "format").toString());
052                    setInputFileName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "infile").toString());
053                    setInputCharSet(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "charset").toString());
054                    setReaderOptions(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader_options").toString());
055                    if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "iterations") != null) {
056                            setIterations((Integer)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "iterations"));
057                    } else {
058                            setIterations(1);
059                    }
060                    setSyntaxGraphReaderClass((Class<?>)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader"));
061    
062                    setNullValueStrategy(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "singlemalt", "null_value").toString());
063                    setRootLabels(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "graph", "root_label").toString());
064                    
065                    
066                    initInput(getNullValueStrategy(), getRootLabels());
067                    initReader(getSyntaxGraphReaderClass(), getInputFileName(), getInputCharSet(), getReaderOptions(), iterations);
068            }
069            
070            public int preprocess(int signal) throws MaltChainedException {
071                    return signal;
072            }
073            
074            public int process(int signal) throws MaltChainedException {
075                    if (cachedGraph == null) {
076                            cachedGraph = (TokenStructure)flowChartinstance.getFlowChartRegistry(org.maltparser.core.syntaxgraph.TokenStructure.class, targetName);
077                    }
078                    int prevIterationCounter = reader.getIterationCounter();
079                    boolean moreInput = reader.readSentence(cachedGraph);
080    //              System.out.println(cachedGraph);
081    //              System.exit(1);
082                    if (!moreInput) {
083                            return ChartItem.TERMINATE;
084                    } else if (prevIterationCounter < reader.getIterationCounter()) {
085                            return ChartItem.NEWITERATION;
086                    }
087                    return ChartItem.CONTINUE;
088    //              return continueNextSentence && moreInput;
089            }
090            
091            public int postprocess(int signal) throws MaltChainedException {
092                    return signal;
093            }
094            
095            public void terminate() throws MaltChainedException {
096                    if (reader != null) {
097                            reader.close();
098                            reader = null;
099                    }
100                    cachedGraph = null;
101                    inputDataFormatInstance = null;
102            }
103            
104            public String getInputFormatName() {
105                    if (inputFormatName == null) {
106                            return "/appdata/dataformat/conllx.xml";
107                    }
108                    return inputFormatName;
109            }
110    
111            public void setInputFormatName(String inputFormatName) {
112                    this.inputFormatName = inputFormatName;
113            }
114    
115            public String getInputFileName() {
116                    if (inputFileName == null) {
117                            return "/dev/stdin";
118                    }
119                    return inputFileName;
120            }
121    
122            public void setInputFileName(String inputFileName) {
123                    this.inputFileName = inputFileName;
124            }
125    
126            public String getInputCharSet() {
127                    if (inputCharSet == null) {
128                            return "UTF-8";
129                    }
130                    return inputCharSet;
131            }
132    
133            public void setInputCharSet(String inputCharSet) {
134                    this.inputCharSet = inputCharSet;
135            }
136    
137            public String getReaderOptions() {
138                    if (readerOptions == null) {
139                            return "";
140                    }
141                    return readerOptions;
142            }
143    
144            public void setReaderOptions(String readerOptions) {
145                    this.readerOptions = readerOptions;
146            }
147    
148            
149            public int getIterations() {
150                    return iterations;
151            }
152    
153            public void setIterations(int iterations) {
154                    this.iterations = iterations;
155            }
156    
157            public Class<? extends SyntaxGraphReader> getSyntaxGraphReaderClass() {
158                    return graphReaderClass;
159            }
160    
161            public void setSyntaxGraphReaderClass(Class<?> graphReaderClass) throws MaltChainedException {
162                    try {
163                            if (graphReaderClass != null) {
164                                    this.graphReaderClass = graphReaderClass.asSubclass(org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class);
165                            }
166                    } catch (ClassCastException e) {
167                            throw new DataFormatException("The class '"+graphReaderClass.getName()+"' is not a subclass of '"+org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class.getName()+"'. ", e);
168                    }
169            }
170            
171            public String getNullValueStrategy() {
172                    if (nullValueStrategy == null) {
173                            return "one";
174                    }
175                    return nullValueStrategy;
176            }
177    
178            public void setNullValueStrategy(String nullValueStrategy) {
179                    this.nullValueStrategy = nullValueStrategy;
180            }
181    
182            public String getRootLabels() {
183                    if (nullValueStrategy == null) {
184                            return "ROOT";
185                    }
186                    return rootLabels;
187            }
188    
189            public void setRootLabels(String rootLabels) {
190                    this.rootLabels = rootLabels;
191            }
192            
193    
194            public String getTargetName() {
195                    return targetName;
196            }
197    
198            public void setTargetName(String targetName) {
199                    this.targetName = targetName;
200            }
201    
202            public SyntaxGraphReader getReader() {
203                    return reader;
204            }
205    
206            public DataFormatInstance getInputDataFormatInstance() {
207                    return inputDataFormatInstance;
208            }
209    
210            public void initInput(String nullValueStategy, String rootLabels) throws MaltChainedException {
211                    inputDataFormatInstance = flowChartinstance.getDataFormatManager().getInputDataFormatSpec().createDataFormatInstance(flowChartinstance.getSymbolTables(), nullValueStategy, rootLabels);
212                    if (!flowChartinstance.getDataFormatInstances().containsKey(flowChartinstance.getDataFormatManager().getInputDataFormatSpec().getDataFormatName())) {
213                            flowChartinstance.getDataFormatInstances().put(flowChartinstance.getDataFormatManager().getInputDataFormatSpec().getDataFormatName(), inputDataFormatInstance);
214                    }
215            }
216            
217            public void initReader(Class<? extends SyntaxGraphReader> syntaxGraphReader, String inputFile, String inputCharSet, String readerOptions, int iterations) throws MaltChainedException {
218                    try {   
219                            reader = syntaxGraphReader.newInstance();
220                            if (inputFile == null || inputFile.length() == 0 || inputFile.equals("/dev/stdin")) {
221                                    reader.open(System.in, inputCharSet);
222                            } else if (new File(inputFile).exists()) {
223                                    reader.setNIterations(iterations);
224                                    reader.open(inputFile, inputCharSet);
225                            } else {
226                                    reader.setNIterations(iterations);
227                                    reader.open(Util.findURL(inputFile), inputCharSet);
228                            }
229                            reader.setDataFormatInstance(inputDataFormatInstance); 
230                            reader.setOptions(readerOptions);
231                    } catch (InstantiationException e) {
232                            throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e);
233                    } catch (IllegalAccessException e) {
234                            throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e);
235                    }       
236            }
237            
238            public boolean equals(Object obj) {
239                    if (this == obj)
240                            return true;
241                    if (obj == null)
242                            return false;
243                    if (getClass() != obj.getClass())
244                            return false;
245                    return obj.toString().equals(this.toString());
246            }
247            
248            public int hashCode() {
249                    return 217 + (null == toString() ? 0 : toString().hashCode());
250            }
251            
252            public String toString() {
253                    final StringBuilder sb = new StringBuilder();
254                    sb.append("    read ");
255                    sb.append("target:");
256                    sb.append(targetName);
257                    sb.append(' ');
258                    sb.append("optiongroup:");
259                    sb.append(optiongroupName);
260                    return sb.toString();
261            }
262    }