001package org.maltparser.core.lw.parser; 002 003import java.io.File; 004import java.io.IOException; 005import java.io.InputStream; 006import java.io.InputStreamReader; 007import java.io.OutputStreamWriter; 008import java.lang.reflect.InvocationTargetException; 009import java.net.URL; 010import java.util.ArrayList; 011import java.util.Collections; 012import java.util.List; 013 014import org.maltparser.concurrent.graph.dataformat.DataFormat; 015import org.maltparser.core.config.ConfigurationException; 016import org.maltparser.core.exception.MaltChainedException; 017import org.maltparser.core.feature.FeatureModelManager; 018import org.maltparser.core.io.dataformat.DataFormatInstance; 019import org.maltparser.core.lw.graph.LWDependencyGraph; 020import org.maltparser.core.lw.graph.LWDeprojectivizer; 021import org.maltparser.core.options.OptionManager; 022import org.maltparser.core.propagation.PropagationManager; 023import org.maltparser.core.symbol.SymbolTableHandler; 024import org.maltparser.core.symbol.parse.ParseSymbolTableHandler; 025import org.maltparser.core.syntaxgraph.DependencyStructure; 026import org.maltparser.parser.AbstractParserFactory; 027import org.maltparser.parser.DependencyParserConfig; 028 029/** 030 * A lightweight version of org.maltparser.parser.SingleMalt. This class can only perform parsing and is used by 031 * the concurrent MaltParser model. 032 * 033 * @author Johan Hall 034 * 035 */ 036public final class LWSingleMalt implements DependencyParserConfig { 037 public final static Class<?>[] paramTypes = { org.maltparser.parser.DependencyParserConfig.class }; 038 private final McoModel mcoModel; 039 private final int optionContainerIndex; 040 private final DataFormatInstance dataFormatInstance; 041 private final PropagationManager propagationManager; 042 private final FeatureModelManager featureModelManager; 043 private final AbstractParserFactory parserFactory; 044 private final String decisionSettings; 045 private final int kBestSize; 046 private final String classitem_separator; 047 private final URL featureModelURL; 048 private final String dataSplitColumn; 049 private final String dataSplitStructure; 050 private final boolean excludeNullValues; 051 private final LWDecisionModel decisionModel; 052 053 public LWSingleMalt(int containerIndex, DataFormatInstance dataFormatInstance, McoModel _mcoModel, PropagationManager _propagationManager, FeatureModelManager _featureModelManager) throws MaltChainedException { 054 this.optionContainerIndex = containerIndex; 055 this.mcoModel = _mcoModel; 056 this.dataFormatInstance = dataFormatInstance; 057 this.propagationManager = _propagationManager; 058 this.featureModelManager = _featureModelManager; 059 this.parserFactory = makeParserFactory(); 060 this.decisionSettings = getOptionValue("guide", "decision_settings").toString().trim(); 061 this.kBestSize = ((Integer)getOptionValue("guide", "kbest")).intValue(); 062 this.classitem_separator = getOptionValue("guide", "classitem_separator").toString().trim(); 063 this.featureModelURL = getConfigFileEntryURL(getOptionValue("guide", "features").toString().trim()); 064 this.dataSplitColumn = getOptionValue("guide", "data_split_column").toString().trim(); 065 this.dataSplitStructure = getOptionValue("guide", "data_split_structure").toString().trim(); 066 this.excludeNullValues = getOptionValue("singlemalt", "null_value").toString().equalsIgnoreCase("none"); 067 this.decisionModel = new LWDecisionModel(mcoModel, excludeNullValues, getOptionValueString("guide","learner")); 068 } 069 070 private AbstractParserFactory makeParserFactory() throws MaltChainedException { 071 Class<?> clazz = (Class<?>)getOptionValue("singlemalt", "parsing_algorithm"); 072 try { 073 Object[] arguments = { this }; 074 return (AbstractParserFactory)clazz.getConstructor(paramTypes).newInstance(arguments); 075 } catch (NoSuchMethodException e) { 076 throw new ConfigurationException("The parser factory '"+clazz.getName()+"' cannot be initialized. ", e); 077 } catch (InstantiationException e) { 078 throw new ConfigurationException("The parser factory '"+clazz.getName()+"' cannot be initialized. ", e); 079 } catch (IllegalAccessException e) { 080 throw new ConfigurationException("The parser factory '"+clazz.getName()+"' cannot be initialized. ", e); 081 } catch (InvocationTargetException e) { 082 throw new ConfigurationException("The parser factory '"+clazz.getName()+"' cannot be initialized. ", e); 083 } 084 } 085 public FeatureModelManager getFeatureModelManager() { 086 return featureModelManager; 087 } 088 089 public AbstractParserFactory getParserFactory() { 090 return parserFactory; 091 } 092 093 public void parse(DependencyStructure graph) throws MaltChainedException { 094 if (graph.hasTokens()) { 095 LWDeterministicParser parser = new LWDeterministicParser(this, graph.getSymbolTables()); 096 parser.parse(graph); 097 } 098 } 099 100 public List<String[]> parseSentences(List<String[]> inputSentences, String defaultRootLabel, int markingStrategy, boolean coveredRoot, SymbolTableHandler parentSymbolTableHandler, DataFormat concurrentDataFormat) throws MaltChainedException { 101 List<String[]> outputSentences = Collections.synchronizedList(new ArrayList<String[]>()); 102 SymbolTableHandler parseSymbolTableHandler = new ParseSymbolTableHandler(parentSymbolTableHandler); 103 LWDependencyGraph parseGraph = new LWDependencyGraph(concurrentDataFormat, parseSymbolTableHandler); 104 LWDeterministicParser parser = new LWDeterministicParser(this, parseSymbolTableHandler); 105 106 for (int i = 0; i < inputSentences.size(); i++) { 107 String[] tokens = inputSentences.get(i); 108 // TODO nothing to parse 109 parseGraph.resetTokens(tokens, defaultRootLabel, false); 110 parser.parse(parseGraph); 111 if (markingStrategy != 0 || coveredRoot) { 112 new LWDeprojectivizer().deprojectivize(parseGraph, markingStrategy); 113 } 114 String[] outputTokens = new String[tokens.length]; 115 for (int j = 0; j < outputTokens.length; j++) { 116 outputTokens[j] = parseGraph.getDependencyNode(j+1).toString(); 117 } 118 outputSentences.add(outputTokens); 119 } 120 return outputSentences; 121 } 122 123 public void oracleParse(DependencyStructure goldGraph, DependencyStructure oracleGraph) throws MaltChainedException {} 124 125 public void terminate(Object[] arguments) throws MaltChainedException {} 126 127 public boolean isLoggerInfoEnabled() { 128 return false; 129 } 130 public boolean isLoggerDebugEnabled() { 131 return false; 132 } 133 public void logErrorMessage(String message) {} 134 public void logInfoMessage(String message) {} 135 public void logInfoMessage(char character) {} 136 public void logDebugMessage(String message) {} 137 public void writeInfoToConfigFile(String message) throws MaltChainedException {} 138 139 public OutputStreamWriter getOutputStreamWriter(String fileName) throws MaltChainedException { 140 return null; 141 } 142 143 public OutputStreamWriter getAppendOutputStreamWriter(String fileName) throws MaltChainedException { 144 return null; 145 } 146 147 public InputStreamReader getInputStreamReader(String fileName) throws MaltChainedException { 148 try { 149 return mcoModel.getInputStreamReader(fileName, "UTF-8"); 150 } catch (IOException e) { 151 throw new ConfigurationException("Couldn't read file "+fileName+" from mco-file ", e); 152 } 153 } 154 155 public InputStream getInputStreamFromConfigFileEntry(String fileName) throws MaltChainedException { 156 try { 157 return mcoModel.getInputStream(fileName); 158 } catch (IOException e) { 159 throw new ConfigurationException("Couldn't read file "+fileName+" from mco-file ", e); 160 } 161 } 162 163 public URL getConfigFileEntryURL(String fileName) throws MaltChainedException { 164 try { 165 return mcoModel.getMcoEntryURL(fileName); 166 } catch (IOException e) { 167 throw new ConfigurationException("Couldn't read file "+fileName+" from mco-file ", e); 168 } 169 } 170 171 public Object getConfigFileEntryObject(String fileName) throws MaltChainedException { 172 return mcoModel.getMcoEntryObject(fileName); 173 } 174 175 public String getConfigFileEntryString(String fileName) throws MaltChainedException { 176 return mcoModel.getMcoEntryString(fileName); 177 } 178 179 public File getFile(String fileName) throws MaltChainedException { 180 return new File(System.getProperty("user.dir")+File.separator+fileName); 181 } 182 183 public Object getOptionValue(String optiongroup, String optionname) throws MaltChainedException { 184 return OptionManager.instance().getOptionValue(optionContainerIndex, optiongroup, optionname); 185 } 186 187 public String getOptionValueString(String optiongroup, String optionname) throws MaltChainedException { 188 return OptionManager.instance().getOptionValueString(optionContainerIndex, optiongroup, optionname); 189 } 190 191 public SymbolTableHandler getSymbolTables() { 192 return null; 193 } 194 195 public DataFormatInstance getDataFormatInstance() { 196 return dataFormatInstance; 197 } 198 199 public PropagationManager getPropagationManager() { 200 return propagationManager; 201 } 202 203 public String getDecisionSettings() { 204 return decisionSettings; 205 } 206 207 public int getkBestSize() { 208 return kBestSize; 209 } 210 211 public String getClassitem_separator() { 212 return classitem_separator; 213 } 214 215 public URL getFeatureModelURL() { 216 return featureModelURL; 217 } 218 219 public String getDataSplitColumn() { 220 return dataSplitColumn; 221 } 222 223 public String getDataSplitStructure() { 224 return dataSplitStructure; 225 } 226 227 public boolean isExcludeNullValues() { 228 return excludeNullValues; 229 } 230 231 public LWDecisionModel getDecisionModel() { 232 return decisionModel; 233 } 234}