001package org.maltparser.core.syntaxgraph.reader;
002
003import java.io.File;
004
005import org.maltparser.core.config.ConfigurationDir;
006import org.maltparser.core.exception.MaltChainedException;
007import org.maltparser.core.flow.FlowChartInstance;
008import org.maltparser.core.flow.item.ChartItem;
009import org.maltparser.core.flow.spec.ChartItemSpecification;
010import org.maltparser.core.helper.URLFinder;
011import org.maltparser.core.io.dataformat.DataFormatException;
012import org.maltparser.core.io.dataformat.DataFormatInstance;
013import org.maltparser.core.io.dataformat.DataFormatManager;
014import org.maltparser.core.options.OptionManager;
015import org.maltparser.core.symbol.SymbolTableHandler;
016import org.maltparser.core.syntaxgraph.TokenStructure;
017
018public class ReadChartItem extends ChartItem {
019        private String idName;
020        private String inputFormatName;
021        private String inputFileName;
022        private String inputCharSet;
023        private String readerOptions;
024        private int iterations;
025        private Class<? extends SyntaxGraphReader> graphReaderClass;
026        
027        private String nullValueStrategy;
028        
029        private SyntaxGraphReader reader;
030        private String targetName;
031        private String optiongroupName;
032        private DataFormatInstance inputDataFormatInstance;
033        private TokenStructure cachedGraph = null;
034        
035        public ReadChartItem() { super(); }
036
037        public void initialize(FlowChartInstance flowChartinstance, ChartItemSpecification chartItemSpecification) throws MaltChainedException {
038                super.initialize(flowChartinstance, chartItemSpecification);
039                
040                for (String key : chartItemSpecification.getChartItemAttributes().keySet()) {
041                        if (key.equals("id")) {
042                                idName = chartItemSpecification.getChartItemAttributes().get(key);
043                        } else if (key.equals("target")) {
044                                targetName = chartItemSpecification.getChartItemAttributes().get(key);
045                        } else if (key.equals("optiongroup")) {
046                                optiongroupName = chartItemSpecification.getChartItemAttributes().get(key);
047                        }
048                }
049                
050                if (idName == null) {
051                        idName = getChartElement("read").getAttributes().get("id").getDefaultValue();
052                } else if (targetName == null) {
053                        targetName = getChartElement("read").getAttributes().get("target").getDefaultValue();
054                } else if (optiongroupName == null) {
055                        optiongroupName = getChartElement("read").getAttributes().get("optiongroup").getDefaultValue();
056                }
057                
058                setInputFormatName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "format").toString());
059                setInputFileName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "infile").toString());
060                setInputCharSet(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "charset").toString());
061                setReaderOptions(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader_options").toString());
062                if (OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "iterations") != null) {
063                        setIterations((Integer)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "iterations"));
064                } else {
065                        setIterations(1);
066                }
067                setSyntaxGraphReaderClass((Class<?>)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader"));
068
069                setNullValueStrategy(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "singlemalt", "null_value").toString());
070                
071                initInput(getNullValueStrategy());
072                initReader(getSyntaxGraphReaderClass(), getInputFileName(), getInputCharSet(), getReaderOptions(), iterations);
073        }
074        
075        public int preprocess(int signal) throws MaltChainedException {
076                return signal;
077        }
078        
079        public int process(int signal) throws MaltChainedException {
080                if (cachedGraph == null) {
081                        cachedGraph = (TokenStructure)flowChartinstance.getFlowChartRegistry(org.maltparser.core.syntaxgraph.TokenStructure.class, targetName);
082                }
083                int prevIterationCounter = reader.getIterationCounter();
084                boolean moreInput = reader.readSentence(cachedGraph);
085                if (!moreInput) {
086                        return ChartItem.TERMINATE;
087                } else if (prevIterationCounter < reader.getIterationCounter()) {
088                        return ChartItem.NEWITERATION;
089                }
090                return ChartItem.CONTINUE;
091//              return continueNextSentence && moreInput;
092        }
093        
094        public int postprocess(int signal) throws MaltChainedException {
095                return signal;
096        }
097        
098        public void terminate() throws MaltChainedException {
099                if (reader != null) {
100                        reader.close();
101                        reader = null;
102                }
103                cachedGraph = null;
104                inputDataFormatInstance = null;
105        }
106        
107        public String getInputFormatName() {
108                if (inputFormatName == null) {
109                        return "/appdata/dataformat/conllx.xml";
110                }
111                return inputFormatName;
112        }
113
114        public void setInputFormatName(String inputFormatName) {
115                this.inputFormatName = inputFormatName;
116        }
117
118        public String getInputFileName() {
119                if (inputFileName == null) {
120                        return "/dev/stdin";
121                }
122                return inputFileName;
123        }
124
125        public void setInputFileName(String inputFileName) {
126                this.inputFileName = inputFileName;
127        }
128
129        public String getInputCharSet() {
130                if (inputCharSet == null) {
131                        return "UTF-8";
132                }
133                return inputCharSet;
134        }
135
136        public void setInputCharSet(String inputCharSet) {
137                this.inputCharSet = inputCharSet;
138        }
139
140        public String getReaderOptions() {
141                if (readerOptions == null) {
142                        return "";
143                }
144                return readerOptions;
145        }
146
147        public void setReaderOptions(String readerOptions) {
148                this.readerOptions = readerOptions;
149        }
150
151        
152        public int getIterations() {
153                return iterations;
154        }
155
156        public void setIterations(int iterations) {
157                this.iterations = iterations;
158        }
159
160        public Class<? extends SyntaxGraphReader> getSyntaxGraphReaderClass() {
161                return graphReaderClass;
162        }
163
164        public void setSyntaxGraphReaderClass(Class<?> graphReaderClass) throws MaltChainedException {
165                try {
166                        if (graphReaderClass != null) {
167                                this.graphReaderClass = graphReaderClass.asSubclass(org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class);
168                        }
169                } catch (ClassCastException e) {
170                        throw new DataFormatException("The class '"+graphReaderClass.getName()+"' is not a subclass of '"+org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class.getName()+"'. ", e);
171                }
172        }
173        
174        public String getNullValueStrategy() {
175                if (nullValueStrategy == null) {
176                        return "one";
177                }
178                return nullValueStrategy;
179        }
180
181        public void setNullValueStrategy(String nullValueStrategy) {
182                this.nullValueStrategy = nullValueStrategy;
183        }
184
185        public String getTargetName() {
186                return targetName;
187        }
188
189        public void setTargetName(String targetName) {
190                this.targetName = targetName;
191        }
192
193        public SyntaxGraphReader getReader() {
194                return reader;
195        }
196
197        public DataFormatInstance getInputDataFormatInstance() {
198                return inputDataFormatInstance;
199        }
200
201        public void initInput(String nullValueStategy) throws MaltChainedException {
202                ConfigurationDir configDir = (ConfigurationDir)flowChartinstance.getFlowChartRegistry(org.maltparser.core.config.ConfigurationDir.class, idName);
203                DataFormatManager dataFormatManager = configDir.getDataFormatManager();
204                SymbolTableHandler symbolTables = configDir.getSymbolTables();
205                inputDataFormatInstance = dataFormatManager.getInputDataFormatSpec().createDataFormatInstance(symbolTables, nullValueStategy);
206                configDir.addDataFormatInstance(dataFormatManager.getInputDataFormatSpec().getDataFormatName(), inputDataFormatInstance);
207
208        }
209        
210        public void initReader(Class<? extends SyntaxGraphReader> syntaxGraphReader, String inputFile, String inputCharSet, String readerOptions, int iterations) throws MaltChainedException {
211                try {
212                        final URLFinder f = new URLFinder();
213                        reader = syntaxGraphReader.newInstance();
214                        if (inputFile == null || inputFile.length() == 0 || inputFile.equals("/dev/stdin")) {
215                                reader.open(System.in, inputCharSet);
216                        } else if (new File(inputFile).exists()) {
217                                reader.setNIterations(iterations);
218                                reader.open(inputFile, inputCharSet);
219                        } else {
220                                reader.setNIterations(iterations);
221                                reader.open(f.findURL(inputFile), inputCharSet);
222                        }
223                        reader.setDataFormatInstance(inputDataFormatInstance); 
224                        reader.setOptions(readerOptions);
225                } catch (InstantiationException e) {
226                        throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e);
227                } catch (IllegalAccessException e) {
228                        throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e);
229                }       
230        }
231        
232        public boolean equals(Object obj) {
233                if (this == obj)
234                        return true;
235                if (obj == null)
236                        return false;
237                if (getClass() != obj.getClass())
238                        return false;
239                return obj.toString().equals(this.toString());
240        }
241        
242        public int hashCode() {
243                return 217 + (null == toString() ? 0 : toString().hashCode());
244        }
245        
246        public String toString() {
247                final StringBuilder sb = new StringBuilder();
248                sb.append("    read ");
249                sb.append("id:");sb.append(idName);
250                sb.append(' ');
251                sb.append("target:");
252                sb.append(targetName);
253                sb.append(' ');
254                sb.append("optiongroup:");
255                sb.append(optiongroupName);
256                return sb.toString();
257        }
258}