001package org.maltparser.core.lw.parser;
002
003import java.io.File;
004import java.io.IOException;
005import java.io.InputStream;
006import java.io.InputStreamReader;
007import java.io.OutputStreamWriter;
008import java.lang.reflect.InvocationTargetException;
009import java.net.URL;
010import java.util.ArrayList;
011import java.util.Collections;
012import java.util.List;
013
014import org.maltparser.concurrent.graph.dataformat.DataFormat;
015import org.maltparser.core.config.ConfigurationException;
016import org.maltparser.core.exception.MaltChainedException;
017import org.maltparser.core.feature.FeatureModelManager;
018import org.maltparser.core.io.dataformat.DataFormatInstance;
019import org.maltparser.core.lw.graph.LWDependencyGraph;
020import org.maltparser.core.lw.graph.LWDeprojectivizer;
021import org.maltparser.core.options.OptionManager;
022import org.maltparser.core.propagation.PropagationManager;
023import org.maltparser.core.symbol.SymbolTableHandler;
024import org.maltparser.core.symbol.parse.ParseSymbolTableHandler;
025import org.maltparser.core.syntaxgraph.DependencyStructure;
026import org.maltparser.parser.AbstractParserFactory;
027import org.maltparser.parser.DependencyParserConfig;
028
029/**
030 *  A lightweight version of org.maltparser.parser.SingleMalt. This class can only perform parsing and is used by 
031 *  the concurrent MaltParser model. 
032 * 
033 * @author Johan Hall
034 *
035 */
036public final class LWSingleMalt implements DependencyParserConfig {
037        public final static Class<?>[] paramTypes = { org.maltparser.parser.DependencyParserConfig.class };
038        private final McoModel mcoModel;
039        private final int optionContainerIndex;
040        private final DataFormatInstance dataFormatInstance;
041        private final PropagationManager propagationManager;
042        private final FeatureModelManager featureModelManager;
043        private final AbstractParserFactory parserFactory;
044        private final String decisionSettings;
045        private final int kBestSize;
046        private final String classitem_separator;
047        private final URL featureModelURL;
048        private final String dataSplitColumn;
049        private final String dataSplitStructure;
050        private final boolean excludeNullValues; 
051        private final LWDecisionModel decisionModel;
052        
053        public LWSingleMalt(int containerIndex, DataFormatInstance dataFormatInstance, McoModel _mcoModel, PropagationManager _propagationManager, FeatureModelManager _featureModelManager) throws MaltChainedException {
054                this.optionContainerIndex = containerIndex;
055                this.mcoModel = _mcoModel;
056                this.dataFormatInstance = dataFormatInstance;
057                this.propagationManager = _propagationManager;
058                this.featureModelManager = _featureModelManager;
059                this.parserFactory = makeParserFactory();
060                this.decisionSettings = getOptionValue("guide", "decision_settings").toString().trim();
061                this.kBestSize = ((Integer)getOptionValue("guide", "kbest")).intValue();
062                this.classitem_separator = getOptionValue("guide", "classitem_separator").toString().trim();
063                this.featureModelURL = getConfigFileEntryURL(getOptionValue("guide", "features").toString().trim());
064                this.dataSplitColumn = getOptionValue("guide", "data_split_column").toString().trim();
065                this.dataSplitStructure = getOptionValue("guide", "data_split_structure").toString().trim();
066                this.excludeNullValues = getOptionValue("singlemalt", "null_value").toString().equalsIgnoreCase("none");
067                this.decisionModel = new LWDecisionModel(mcoModel, excludeNullValues, getOptionValueString("guide","learner"));
068        }
069        
070        private AbstractParserFactory makeParserFactory() throws MaltChainedException {
071                Class<?> clazz = (Class<?>)getOptionValue("singlemalt", "parsing_algorithm");
072                try {   
073                        Object[] arguments = { this };
074                        return (AbstractParserFactory)clazz.getConstructor(paramTypes).newInstance(arguments);
075                } catch (NoSuchMethodException e) {
076                        throw new ConfigurationException("The parser factory '"+clazz.getName()+"' cannot be initialized. ", e);
077                } catch (InstantiationException e) {
078                        throw new ConfigurationException("The parser factory '"+clazz.getName()+"' cannot be initialized. ", e);
079                } catch (IllegalAccessException e) {
080                        throw new ConfigurationException("The parser factory '"+clazz.getName()+"' cannot be initialized. ", e);
081                } catch (InvocationTargetException e) {
082                        throw new ConfigurationException("The parser factory '"+clazz.getName()+"' cannot be initialized. ", e);                        
083                }
084        }
085        public FeatureModelManager getFeatureModelManager() {
086                return featureModelManager;
087        }
088        
089        public AbstractParserFactory getParserFactory() {
090                return parserFactory;
091        }
092
093        public void parse(DependencyStructure graph) throws MaltChainedException {
094                if (graph.hasTokens()) {
095                        LWDeterministicParser parser = new LWDeterministicParser(this, graph.getSymbolTables());
096                        parser.parse(graph);
097                }
098        }
099        
100    public List<String[]> parseSentences(List<String[]> inputSentences, String defaultRootLabel, int markingStrategy, boolean coveredRoot, SymbolTableHandler parentSymbolTableHandler, DataFormat concurrentDataFormat) throws MaltChainedException {
101        List<String[]> outputSentences = Collections.synchronizedList(new ArrayList<String[]>());
102        SymbolTableHandler parseSymbolTableHandler = new ParseSymbolTableHandler(parentSymbolTableHandler);
103        LWDependencyGraph parseGraph = new LWDependencyGraph(concurrentDataFormat, parseSymbolTableHandler);
104        LWDeterministicParser parser = new LWDeterministicParser(this, parseSymbolTableHandler);
105        
106                for (int i = 0; i < inputSentences.size(); i++) {
107                        String[] tokens = inputSentences.get(i);
108                        // TODO nothing to parse
109                        parseGraph.resetTokens(tokens, defaultRootLabel, false);
110                        parser.parse(parseGraph);
111                        if (markingStrategy != 0 || coveredRoot) { 
112                                new LWDeprojectivizer().deprojectivize(parseGraph, markingStrategy);
113                        }
114                        String[] outputTokens = new String[tokens.length];
115                        for (int j = 0; j < outputTokens.length; j++) {
116                                outputTokens[j] = parseGraph.getDependencyNode(j+1).toString();
117                        }
118                        outputSentences.add(outputTokens);
119                }
120                return outputSentences;
121    }
122        
123        public void oracleParse(DependencyStructure goldGraph, DependencyStructure oracleGraph) throws MaltChainedException {}
124        
125        public void terminate(Object[] arguments) throws MaltChainedException {}
126        
127        public boolean isLoggerInfoEnabled() {
128                return false;
129        }
130        public boolean isLoggerDebugEnabled() {
131                return false;
132        }
133        public void logErrorMessage(String message) {}
134        public void logInfoMessage(String message) {}
135        public void logInfoMessage(char character) {}
136        public void logDebugMessage(String message) {}
137        public void writeInfoToConfigFile(String message) throws MaltChainedException {}
138        
139        public OutputStreamWriter getOutputStreamWriter(String fileName) throws MaltChainedException {
140                return null;
141        }
142        
143        public OutputStreamWriter getAppendOutputStreamWriter(String fileName) throws MaltChainedException {
144                return null;
145        }
146        
147        public InputStreamReader getInputStreamReader(String fileName) throws MaltChainedException {
148                try {
149                        return mcoModel.getInputStreamReader(fileName, "UTF-8");
150                } catch (IOException e) {
151                        throw new ConfigurationException("Couldn't read file "+fileName+" from mco-file ", e);
152                }
153        }
154        
155        public InputStream getInputStreamFromConfigFileEntry(String fileName) throws MaltChainedException {
156                try {
157                        return mcoModel.getInputStream(fileName);
158                } catch (IOException e) {
159                        throw new ConfigurationException("Couldn't read file "+fileName+" from mco-file ", e);
160                }
161        }
162        
163        public URL getConfigFileEntryURL(String fileName) throws MaltChainedException {
164                try {
165                        return mcoModel.getMcoEntryURL(fileName);
166                } catch (IOException e) {
167                        throw new ConfigurationException("Couldn't read file "+fileName+" from mco-file ", e);
168                }
169        }
170        
171        public Object getConfigFileEntryObject(String fileName) throws MaltChainedException {
172                return mcoModel.getMcoEntryObject(fileName);
173        }
174        
175        public String getConfigFileEntryString(String fileName) throws MaltChainedException {
176                return mcoModel.getMcoEntryString(fileName);
177        }
178        
179        public File getFile(String fileName) throws MaltChainedException {
180                return new File(System.getProperty("user.dir")+File.separator+fileName);
181        }
182
183        public Object getOptionValue(String optiongroup, String optionname) throws MaltChainedException {
184                return OptionManager.instance().getOptionValue(optionContainerIndex, optiongroup, optionname);
185        }
186        
187        public String getOptionValueString(String optiongroup, String optionname) throws MaltChainedException {
188                return OptionManager.instance().getOptionValueString(optionContainerIndex, optiongroup, optionname);
189        }
190
191        public SymbolTableHandler getSymbolTables() {
192                return null;
193        }
194        
195        public DataFormatInstance getDataFormatInstance() {
196                return dataFormatInstance;
197        }
198        
199        public PropagationManager getPropagationManager() {
200                return propagationManager;
201        }
202
203        public String getDecisionSettings() {
204                return decisionSettings;
205        }
206
207        public int getkBestSize() {
208                return kBestSize;
209        }
210
211        public String getClassitem_separator() {
212                return classitem_separator;
213        }
214
215        public URL getFeatureModelURL() {
216                return featureModelURL;
217        }
218
219        public String getDataSplitColumn() {
220                return dataSplitColumn;
221        }
222
223        public String getDataSplitStructure() {
224                return dataSplitStructure;
225        }
226
227        public boolean isExcludeNullValues() {
228                return excludeNullValues;
229        }
230
231        public LWDecisionModel getDecisionModel() {
232                return decisionModel;
233        }
234}