001    package org.maltparser.core.syntaxgraph.reader;
002    
003    import java.io.File;
004    
005    import org.maltparser.core.exception.MaltChainedException;
006    import org.maltparser.core.flow.FlowChartInstance;
007    import org.maltparser.core.flow.item.ChartItem;
008    import org.maltparser.core.flow.spec.ChartItemSpecification;
009    import org.maltparser.core.helper.Util;
010    import org.maltparser.core.io.dataformat.DataFormatException;
011    import org.maltparser.core.io.dataformat.DataFormatInstance;
012    import org.maltparser.core.options.OptionManager;
013    import org.maltparser.core.syntaxgraph.TokenStructure;
014    
015    public class ReadChartItem extends ChartItem {
016            private String inputFormatName;
017            private String inputFileName;
018            private String inputCharSet;
019            private String readerOptions;
020            private Class<? extends SyntaxGraphReader> graphReaderClass;
021            
022            private String nullValueStrategy;
023            private String rootLabels;
024            
025            private SyntaxGraphReader reader;
026            private String targetName;
027            private String optiongroupName;
028            private DataFormatInstance inputDataFormatInstance;
029            private TokenStructure cachedGraph = null;
030            
031            public ReadChartItem() { super(); }
032    
033            public void initialize(FlowChartInstance flowChartinstance, ChartItemSpecification chartItemSpecification) throws MaltChainedException {
034                    super.initialize(flowChartinstance, chartItemSpecification);
035                    
036                    for (String key : chartItemSpecification.getChartItemAttributes().keySet()) {
037                            if (key.equals("target")) {
038                                    targetName = chartItemSpecification.getChartItemAttributes().get(key);
039                            } else if (key.equals("optiongroup")) {
040                                    optiongroupName = chartItemSpecification.getChartItemAttributes().get(key);
041                            }
042                    }
043                    
044                    if (targetName == null) {
045                            targetName = getChartElement("read").getAttributes().get("target").getDefaultValue();
046                    } else if (optiongroupName == null) {
047                            optiongroupName = getChartElement("read").getAttributes().get("optiongroup").getDefaultValue();
048                    }
049                    
050                    setInputFormatName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "format").toString());
051                    setInputFileName(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "infile").toString());
052                    setInputCharSet(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "charset").toString());
053                    setReaderOptions(OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader_options").toString());
054                    setSyntaxGraphReaderClass((Class<?>)OptionManager.instance().getOptionValue(getOptionContainerIndex(), optiongroupName, "reader"));
055    
056                    setNullValueStrategy(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "singlemalt", "null_value").toString());
057                    setRootLabels(OptionManager.instance().getOptionValue(getOptionContainerIndex(), "graph", "root_label").toString());
058                    
059                    initInput(getNullValueStrategy(), getRootLabels());
060                    initReader(getSyntaxGraphReaderClass(), getInputFileName(), getInputCharSet(), getReaderOptions());
061            }
062            
063            public boolean preprocess() throws MaltChainedException {
064                    return true;
065            }
066            
067            public boolean process(boolean continueNextSentence) throws MaltChainedException {
068                    if (cachedGraph == null) {
069                            cachedGraph = (TokenStructure)flowChartinstance.getFlowChartRegistry(org.maltparser.core.syntaxgraph.TokenStructure.class, targetName);
070                    }
071                    boolean moreInput = reader.readSentence(cachedGraph);
072    //              System.out.println(cachedGraph);
073    //              System.exit(1);
074                    return continueNextSentence && moreInput;
075            }
076            
077            public boolean postprocess() throws MaltChainedException {
078                    return true;
079            }
080            
081            public void terminate() throws MaltChainedException {
082                    if (reader != null) {
083                            reader.close();
084                            reader = null;
085                    }
086                    cachedGraph = null;
087                    inputDataFormatInstance = null;
088            }
089            
090            public String getInputFormatName() {
091                    if (inputFormatName == null) {
092                            return "/appdata/dataformat/conllx.xml";
093                    }
094                    return inputFormatName;
095            }
096    
097            public void setInputFormatName(String inputFormatName) {
098                    this.inputFormatName = inputFormatName;
099            }
100    
101            public String getInputFileName() {
102                    if (inputFileName == null) {
103                            return "/dev/stdin";
104                    }
105                    return inputFileName;
106            }
107    
108            public void setInputFileName(String inputFileName) {
109                    this.inputFileName = inputFileName;
110            }
111    
112            public String getInputCharSet() {
113                    if (inputCharSet == null) {
114                            return "UTF-8";
115                    }
116                    return inputCharSet;
117            }
118    
119            public void setInputCharSet(String inputCharSet) {
120                    this.inputCharSet = inputCharSet;
121            }
122    
123            public String getReaderOptions() {
124                    if (readerOptions == null) {
125                            return "";
126                    }
127                    return readerOptions;
128            }
129    
130            public void setReaderOptions(String readerOptions) {
131                    this.readerOptions = readerOptions;
132            }
133    
134            public Class<? extends SyntaxGraphReader> getSyntaxGraphReaderClass() {
135                    return graphReaderClass;
136            }
137    
138            public void setSyntaxGraphReaderClass(Class<?> graphReaderClass) throws MaltChainedException {
139                    try {
140                            if (graphReaderClass != null) {
141                                    this.graphReaderClass = graphReaderClass.asSubclass(org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class);
142                            }
143                    } catch (ClassCastException e) {
144                            throw new DataFormatException("The class '"+graphReaderClass.getName()+"' is not a subclass of '"+org.maltparser.core.syntaxgraph.reader.SyntaxGraphReader.class.getName()+"'. ", e);
145                    }
146            }
147            
148            public String getNullValueStrategy() {
149                    if (nullValueStrategy == null) {
150                            return "one";
151                    }
152                    return nullValueStrategy;
153            }
154    
155            public void setNullValueStrategy(String nullValueStrategy) {
156                    this.nullValueStrategy = nullValueStrategy;
157            }
158    
159            public String getRootLabels() {
160                    if (nullValueStrategy == null) {
161                            return "ROOT";
162                    }
163                    return rootLabels;
164            }
165    
166            public void setRootLabels(String rootLabels) {
167                    this.rootLabels = rootLabels;
168            }
169            
170    
171            public String getTargetName() {
172                    return targetName;
173            }
174    
175            public void setTargetName(String targetName) {
176                    this.targetName = targetName;
177            }
178    
179            public SyntaxGraphReader getReader() {
180                    return reader;
181            }
182    
183            public DataFormatInstance getInputDataFormatInstance() {
184                    return inputDataFormatInstance;
185            }
186    
187            public void initInput(String nullValueStategy, String rootLabels) throws MaltChainedException {
188                    inputDataFormatInstance = flowChartinstance.getDataFormatManager().getInputDataFormatSpec().createDataFormatInstance(flowChartinstance.getSymbolTables(), nullValueStategy, rootLabels);
189                    if (!flowChartinstance.getDataFormatInstances().containsKey(flowChartinstance.getDataFormatManager().getInputDataFormatSpec().getDataFormatName())) {
190                            flowChartinstance.getDataFormatInstances().put(flowChartinstance.getDataFormatManager().getInputDataFormatSpec().getDataFormatName(), inputDataFormatInstance);
191                    }
192            }
193            
194            public void initReader(Class<? extends SyntaxGraphReader> syntaxGraphReader, String inputFile, String inputCharSet, String readerOptions) throws MaltChainedException {
195                    try {   
196                            reader = syntaxGraphReader.newInstance();
197                            if (inputFile == null || inputFile.length() == 0 || inputFile.equals("/dev/stdin")) {
198                                    reader.open(System.in, inputCharSet);
199                            } else if (new File(inputFile).exists()) {
200                                    reader.open(inputFile, inputCharSet);
201                            } else {
202                                    reader.open(Util.findURL(inputFile), inputCharSet);
203                            }
204                            reader.setDataFormatInstance(inputDataFormatInstance); 
205                            reader.setOptions(readerOptions);
206                    } catch (InstantiationException e) {
207                            throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e);
208                    } catch (IllegalAccessException e) {
209                            throw new DataFormatException("The data reader '"+syntaxGraphReader.getName()+"' cannot be initialized. ", e);
210                    }       
211            }
212            
213            public boolean equals(Object obj) {
214                    if (this == obj)
215                            return true;
216                    if (obj == null)
217                            return false;
218                    if (getClass() != obj.getClass())
219                            return false;
220                    return obj.toString().equals(this.toString());
221            }
222            
223            public int hashCode() {
224                    return 217 + (null == toString() ? 0 : toString().hashCode());
225            }
226            
227            public String toString() {
228                    final StringBuilder sb = new StringBuilder();
229                    sb.append("    read ");
230                    sb.append("target:");
231                    sb.append(targetName);
232                    sb.append(' ');
233                    sb.append("optiongroup:");
234                    sb.append(optiongroupName);
235                    return sb.toString();
236            }
237    }