001 package org.maltparser.parser; 002 003 import java.io.File; 004 import java.io.IOException; 005 import java.lang.reflect.Constructor; 006 import java.lang.reflect.InvocationTargetException; 007 import java.util.Formatter; 008 import java.util.regex.Pattern; 009 010 import org.apache.log4j.FileAppender; 011 import org.apache.log4j.Level; 012 import org.apache.log4j.Logger; 013 import org.apache.log4j.PatternLayout; 014 import org.maltparser.core.config.ConfigurationDir; 015 import org.maltparser.core.config.ConfigurationException; 016 import org.maltparser.core.config.ConfigurationRegistry; 017 import org.maltparser.core.exception.MaltChainedException; 018 import org.maltparser.core.helper.SystemLogger; 019 import org.maltparser.core.io.dataformat.DataFormatInstance; 020 import org.maltparser.core.options.OptionManager; 021 import org.maltparser.core.symbol.SymbolTableHandler; 022 import org.maltparser.core.syntaxgraph.DependencyStructure; 023 import org.maltparser.parser.algorithm.ParsingAlgorithm; 024 import org.maltparser.parser.algorithm.nivre.malt04.NivreEagerMalt04; 025 import org.maltparser.parser.algorithm.nivre.malt04.NivreStandardMalt04; 026 import org.maltparser.parser.guide.Guidable; 027 import org.maltparser.parser.guide.Guide; 028 import org.maltparser.parser.history.GuideHistory; 029 import org.maltparser.parser.history.action.GuideDecision; 030 import org.maltparser.parser.history.action.GuideUserAction; 031 032 public class SingleMalt implements DependencyParserConfig, Guidable { 033 public static final int LEARN = 0; 034 public static final int PARSE = 1; 035 protected ConfigurationDir configDir; 036 protected Logger configLogger; 037 protected int optionContainerIndex; 038 protected ParsingAlgorithm parsingAlgorithm = null; 039 protected Guide guide = null; 040 protected int mode; 041 protected ConfigurationRegistry registry; 042 protected SymbolTableHandler symbolTableHandler; 043 protected long startTime; 044 protected long endTime; 045 046 public void initialize(int containerIndex, DataFormatInstance dataFormatInstance, ConfigurationDir configDir, int mode) throws MaltChainedException { 047 this.optionContainerIndex = containerIndex; 048 this.mode = mode; 049 setConfigurationDir(configDir); 050 startTime = System.currentTimeMillis(); 051 registry = new ConfigurationRegistry(); 052 053 symbolTableHandler = dataFormatInstance.getSymbolTables(); 054 configLogger = initConfigLogger(getOptionValue("config", "logfile").toString(), getOptionValue("config", "logging").toString()); 055 if (mode == SingleMalt.LEARN) { 056 checkOptionDependency(); 057 // initDecisionSettings(); 058 } else if (mode == SingleMalt.PARSE) { 059 060 } 061 registry.put(org.maltparser.core.symbol.SymbolTableHandler.class, getSymbolTables()); 062 registry.put(org.maltparser.core.io.dataformat.DataFormatInstance.class, dataFormatInstance); 063 registry.put(org.maltparser.parser.DependencyParserConfig.class, this); 064 initParsingAlgorithm(); 065 initGuide(); 066 } 067 068 069 /** 070 * Initialize the parsing algorithm 071 * 072 * @throws MaltChainedException 073 */ 074 protected void initParsingAlgorithm() throws MaltChainedException { 075 if (((Boolean)getOptionValue("malt0.4", "behavior")).booleanValue() == true && getOptionValueString("singlemalt", "parsing_algorithm").equals("nivreeager")) { 076 this.parsingAlgorithm = new NivreEagerMalt04(this); 077 } else if (((Boolean)getOptionValue("malt0.4", "behavior")).booleanValue() == true && getOptionValueString("singlemalt", "parsing_algorithm").equals("nivrestandard")) { 078 this.parsingAlgorithm = new NivreStandardMalt04(this); 079 } else { 080 Class<?> clazz = (Class<?>)getOptionValue("singlemalt", "parsing_algorithm"); 081 082 Class<?>[] argTypes = { org.maltparser.parser.SingleMalt.class }; 083 Object[] arguments = new Object[1]; 084 arguments[0] = this; 085 if (getConfigLogger().isInfoEnabled()) { 086 getConfigLogger().info("Initialize the parsing algorithm...\n"); 087 } 088 try { 089 Constructor<?> constructor = clazz.getConstructor(argTypes); 090 this.parsingAlgorithm = (ParsingAlgorithm)constructor.newInstance(arguments); 091 } catch (NoSuchMethodException e) { 092 throw new ConfigurationException("The parsing algorithm '"+clazz.getName()+"' cannot be initialized. ", e); 093 } catch (InstantiationException e) { 094 throw new ConfigurationException("The parsing algorithm '"+clazz.getName()+"' cannot be initialized. ", e); 095 } catch (IllegalAccessException e) { 096 throw new ConfigurationException("The parsing algorithm '"+clazz.getName()+"' cannot be initialized. ", e); 097 } catch (InvocationTargetException e) { 098 throw new ConfigurationException("The parsing algorithm '"+clazz.getName()+"' cannot be initialized. ", e); 099 } 100 } 101 registry.put(org.maltparser.parser.algorithm.ParsingAlgorithm.class, parsingAlgorithm); 102 } 103 104 public void initGuide() throws MaltChainedException { 105 Class<?> clazz = (Class<?>)getOptionValue("singlemalt", "guide_model"); 106 107 Class<?>[] argTypes = { org.maltparser.parser.DependencyParserConfig.class, org.maltparser.parser.history.GuideHistory.class, org.maltparser.parser.guide.Guide.GuideMode.class }; 108 Object[] arguments = new Object[3]; 109 arguments[0] = this; 110 arguments[1] = (GuideHistory)parsingAlgorithm.getHistory(); 111 if (mode == LEARN) { 112 arguments[2] = Guide.GuideMode.TRAIN; 113 } else if (mode == PARSE) { 114 arguments[2] = Guide.GuideMode.CLASSIFY; 115 } 116 117 if (configLogger.isInfoEnabled()) { 118 configLogger.info("Initialize the guide model...\n"); 119 } 120 try { 121 Constructor<?> constructor = clazz.getConstructor(argTypes); 122 this.guide = (Guide)constructor.newInstance(arguments); 123 } catch (NoSuchMethodException e) { 124 throw new ConfigurationException("The guide model '"+clazz.getName()+"' cannot be initialized. ", e); 125 } catch (InstantiationException e) { 126 throw new ConfigurationException("The guide model '"+clazz.getName()+"' cannot be initialized. ", e); 127 } catch (IllegalAccessException e) { 128 throw new ConfigurationException("The guide model '"+clazz.getName()+"' cannot be initialized. ", e); 129 } catch (InvocationTargetException e) { 130 throw new ConfigurationException("The guide model '"+clazz.getName()+"' cannot be initialized. ", e); 131 } 132 } 133 134 public void process(Object[] arguments) throws MaltChainedException { 135 if (mode == LEARN) { 136 if (arguments.length < 2 || !(arguments[0] instanceof DependencyStructure) || !(arguments[1] instanceof DependencyStructure)) { 137 throw new MaltChainedException("The single malt learn task must be supplied with at least two dependency structures. "); 138 } 139 DependencyStructure systemGraph = (DependencyStructure)arguments[0]; 140 DependencyStructure goldGraph = (DependencyStructure)arguments[1]; 141 if (systemGraph.hasTokens()) { 142 getGuide().finalizeSentence(getParsingAlgorithm().oracleParse(goldGraph, systemGraph)); 143 } 144 } else if (mode == PARSE) { 145 if (arguments.length < 1 || !(arguments[0] instanceof DependencyStructure)) { 146 throw new MaltChainedException("The single malt parse task must be supplied with at least one input terminal structure and one output dependency structure. "); 147 } 148 DependencyStructure processGraph = (DependencyStructure)arguments[0]; 149 if (processGraph.hasTokens()) { 150 getParsingAlgorithm().parse(processGraph); 151 } 152 } 153 } 154 155 public void parse(DependencyStructure graph) throws MaltChainedException { 156 if (graph.hasTokens()) { 157 getParsingAlgorithm().parse(graph); 158 } 159 } 160 161 public void oracleParse(DependencyStructure goldGraph, DependencyStructure oracleGraph) throws MaltChainedException { 162 if (oracleGraph.hasTokens()) { 163 getGuide().finalizeSentence(getParsingAlgorithm().oracleParse(goldGraph, oracleGraph)); 164 } 165 } 166 167 public void terminate(Object[] arguments) throws MaltChainedException { 168 if (guide != null) { 169 guide.terminate(); 170 } 171 if (mode == LEARN) { 172 endTime = System.currentTimeMillis(); 173 long elapsed = endTime - startTime; 174 if (configLogger.isInfoEnabled()) { 175 configLogger.info("Learning time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n"); 176 } 177 } else if (mode == PARSE) { 178 endTime = System.currentTimeMillis(); 179 long elapsed = endTime - startTime; 180 if (configLogger.isInfoEnabled()) { 181 configLogger.info("Parsing time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n"); 182 } 183 } 184 if (SystemLogger.logger() != configLogger && configLogger != null) { 185 configLogger.removeAllAppenders(); 186 } 187 } 188 189 /** 190 * Initialize the configuration logger 191 * 192 * @return the configuration logger 193 * @throws MaltChainedException 194 */ 195 public Logger initConfigLogger(String logfile, String level) throws MaltChainedException { 196 if (logfile != null && logfile.length() > 0 && !logfile.equalsIgnoreCase("stdout") && configDir != null) { 197 configLogger = Logger.getLogger(logfile); 198 FileAppender fileAppender = null; 199 try { 200 fileAppender = new FileAppender(new PatternLayout("%m"),configDir.getWorkingDirectory().getPath()+File.separator+logfile, true); 201 } catch(IOException e) { 202 throw new ConfigurationException("It is not possible to create a configuration log file. ", e); 203 } 204 fileAppender.setThreshold(Level.toLevel(level, Level.INFO)); 205 configLogger.addAppender(fileAppender); 206 configLogger.setLevel(Level.toLevel(level, Level.INFO)); 207 } else { 208 configLogger = SystemLogger.logger(); 209 } 210 211 return configLogger; 212 } 213 214 public Logger getConfigLogger() { 215 return configLogger; 216 } 217 218 public void setConfigLogger(Logger logger) { 219 configLogger = logger; 220 } 221 222 public ConfigurationDir getConfigurationDir() { 223 return configDir; 224 } 225 226 public void setConfigurationDir(ConfigurationDir configDir) { 227 this.configDir = configDir; 228 } 229 230 public int getMode() { 231 return mode; 232 } 233 234 public ConfigurationRegistry getRegistry() { 235 return registry; 236 } 237 238 public void setRegistry(ConfigurationRegistry registry) { 239 this.registry = registry; 240 } 241 242 public Object getOptionValue(String optiongroup, String optionname) throws MaltChainedException { 243 return OptionManager.instance().getOptionValue(optionContainerIndex, optiongroup, optionname); 244 } 245 246 public String getOptionValueString(String optiongroup, String optionname) throws MaltChainedException { 247 return OptionManager.instance().getOptionValueString(optionContainerIndex, optiongroup, optionname); 248 } 249 250 public OptionManager getOptionManager() throws MaltChainedException { 251 return OptionManager.instance(); 252 } 253 /******************************** MaltParserConfiguration specific ********************************/ 254 255 /** 256 * Returns the list of symbol tables 257 * 258 * @return the list of symbol tables 259 */ 260 public SymbolTableHandler getSymbolTables() { 261 return symbolTableHandler; 262 } 263 264 /** 265 * Returns the parsing algorithm in use 266 * 267 * @return the parsing algorithm in use 268 */ 269 public ParsingAlgorithm getParsingAlgorithm() { 270 return parsingAlgorithm; 271 } 272 273 /** 274 * Returns the guide 275 * 276 * @return the guide 277 */ 278 public Guide getGuide() { 279 return guide; 280 } 281 282 public void checkOptionDependency() throws MaltChainedException { 283 try { 284 configDir.getInfoFileWriter().write("\nDEPENDENCIES\n"); 285 if ((Boolean)getOptionValue("malt0.4", "behavior") == true) { 286 if (!getOptionValueString("singlemalt", "null_value").equals("rootlabel")) { 287 OptionManager.instance().overloadOptionValue(optionContainerIndex, "singlemalt", "null_value", "rootlabel"); 288 configDir.getInfoFileWriter().write("--singlemalt-null_value (-nv) rootlabel\n"); 289 configLogger.warn("Option --malt0.4-behavior = true and --singlemalt-null_value != 'rootlabel'. Option --singlemalt-null_value is overloaded with value 'rootlabel'\n"); 290 } 291 if (getOptionValue("malt0.4", "depset").toString().equals("")) { 292 configLogger.warn("Option --malt0.4-behavior = true and option --malt0.4-depset has no value. These combination will probably not reproduce the behavior of MaltParser 0.4 (C-impl)\n"); 293 } 294 if (getOptionValue("malt0.4", "posset").toString().equals("")) { 295 configLogger.warn("Option --malt0.4-behavior = true and option --malt0.4-posset has no value. These combination will probably not reproduce the behavior of MaltParser 0.4 (C-impl)\n"); 296 } 297 if (getOptionValue("malt0.4", "cposset").toString().equals("")) { 298 configLogger.warn("Option --malt0.4-behavior = true and option --malt0.4-cposset has no value. These combination will probably not reproduce the behavior of MaltParser 0.4 (C-impl)\n"); 299 } 300 if (!getOptionValue("guide", "kbest").toString().equals("1")) { 301 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "kbest", "1"); 302 configDir.getInfoFileWriter().write("--guide-kbest ( -k) 1\n"); 303 configLogger.warn("Option --malt0.4-behavior = true and --guide-kbest != '1'. Option --guide-kbest is overloaded with value '1'\n"); 304 } 305 } 306 if (getOptionValue("guide", "features").toString().equals("")) { 307 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", getOptionValueString("singlemalt", "parsing_algorithm")); 308 configDir.getInfoFileWriter().write("--guide-features ( -F) "+getOptionValue("guide", "features").toString()+"\n"); 309 } else { 310 configDir.copyToConfig(getOptionValue("guide", "features").toString()); 311 } 312 if (getOptionValue("guide", "data_split_column").toString().equals("") && !getOptionValue("guide", "data_split_structure").toString().equals("")) { 313 configLogger.warn("Option --guide-data_split_column = '' and --guide-data_split_structure != ''. Option --guide-data_split_structure is overloaded with '', this will cause the parser to induce a single model.\n "); 314 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_structure", ""); 315 configDir.getInfoFileWriter().write("--guide-data_split_structure ( -s)\n"); 316 } 317 if (!getOptionValue("guide", "data_split_column").toString().equals("") && getOptionValue("guide", "data_split_structure").toString().equals("")) { 318 configLogger.warn("Option --guide-data_split_column != '' and --guide-data_split_structure = ''. Option --guide-data_split_column is overloaded with '', this will cause the parser to induce a single model.\n"); 319 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_column", ""); 320 configDir.getInfoFileWriter().write("--guide-data_split_column ( -d)\n"); 321 } 322 // if (!getOptionValue("input", "format").toString().equals(getOptionValue("output", "format").toString())) { 323 // OptionManager.instance().overloadOptionValue(containerIndex, "output", "format", getOptionValue("input", "format").toString()); 324 // configDir.getInfoFileWriter().write("--output-format ( -of) "+getOptionValue("input", "format").toString()+"\n"); 325 // } 326 // decision settings 327 328 String decisionSettings = getOptionValue("guide", "decision_settings").toString().trim(); 329 String markingStrategy = getOptionValue("pproj", "marking_strategy").toString().trim(); 330 String coveredRoot = getOptionValue("pproj", "covered_root").toString().trim(); 331 StringBuilder newDecisionSettings = new StringBuilder(); 332 if ((Boolean)getOptionValue("malt0.4", "behavior") == true) { 333 decisionSettings = "T.TRANS+A.DEPREL"; 334 } 335 if (decisionSettings == null || decisionSettings.length() < 1 || decisionSettings.equals("default")) { 336 decisionSettings = "T.TRANS+A.DEPREL"; 337 } else { 338 decisionSettings = decisionSettings.toUpperCase(); 339 } 340 341 if (markingStrategy.equalsIgnoreCase("head") || markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) { 342 if (!Pattern.matches(".*A\\.PPLIFTED.*", decisionSettings)) { 343 newDecisionSettings.append("+A.PPLIFTED"); 344 } 345 } 346 if (markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) { 347 if (!Pattern.matches(".*A\\.PPPATH.*", decisionSettings)) { 348 newDecisionSettings.append("+A.PPPATH"); 349 } 350 } 351 if (!coveredRoot.equalsIgnoreCase("none") && !Pattern.matches(".*A\\.PPCOVERED.*", decisionSettings)) { 352 newDecisionSettings.append("+A.PPCOVERED"); 353 } 354 if (!getOptionValue("guide", "decision_settings").toString().equals(decisionSettings) || newDecisionSettings.length() > 0) { 355 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "decision_settings", decisionSettings+newDecisionSettings.toString()); 356 configDir.getInfoFileWriter().write("--guide-decision_settings ( -gds) "+getOptionValue("guide", "decision_settings").toString()+"\n"); 357 } 358 359 configDir.getInfoFileWriter().flush(); 360 } catch (IOException e) { 361 throw new ConfigurationException("Could not write to the configuration information file. ", e); 362 } 363 } 364 365 /******************************** Guidable interface ********************************/ 366 367 /** 368 * This method is used during learning. Currently, 369 * the MaltParserConfiguration redirect the instance to the guide. 370 * Maybe in the future this method will 371 * be re-implemented to add some interesting things or maybe not. 372 * 373 * @param action 374 * @throws MaltChainedException 375 */ 376 public void setInstance(GuideUserAction action) throws MaltChainedException { 377 if (mode != SingleMalt.LEARN) { 378 throw new ConfigurationException("It is only possible to set an instance during learning. "); 379 } 380 try { 381 // if (diagnostics == true && diaLogger != null) { 382 // SingleDecision singleDecision; 383 // if (((GuideDecision)action) instanceof SingleDecision) { 384 // singleDecision = (SingleDecision)((GuideDecision)action); 385 // if (singleDecision.getDecisionCode() >= 0) { 386 // diaLogger.info(singleDecision.getDecisionSymbol()); 387 // diaLogger.info("\n"); 388 // } 389 // } else { 390 // for (int i = 0; i < ((MultipleDecision)((GuideDecision)action)).numberOfDecisions(); i++) { 391 // singleDecision = ((MultipleDecision)((GuideDecision)action)).getSingleDecision(i); 392 // if (singleDecision.getDecisionCode() >= 0) { 393 // diaLogger.info(singleDecision.getDecisionSymbol()); 394 // diaLogger.info("\t"); 395 // } 396 // } 397 // diaLogger.info("\n"); 398 // } 399 // } 400 401 guide.addInstance((GuideDecision)action); 402 } catch (NullPointerException e) { 403 throw new ConfigurationException("The guide cannot be found. ", e); 404 } 405 } 406 407 /** 408 * This method is used during parsing. Currently, 409 * the MaltParserConfiguration redirect the request to the guide. 410 * Maybe in the future this method will 411 * be re-implemented to add some interesting things or maybe not. 412 * 413 * @throws MaltChainedException 414 */ 415 public boolean predictFromKBestList(GuideUserAction action) throws MaltChainedException { 416 try { 417 return guide.predictFromKBestList((GuideDecision)action); 418 } catch (NullPointerException e) { 419 throw new ConfigurationException("The guide cannot be found. ", e); 420 } 421 } 422 423 /** 424 * This method is used during parsing. Currently, 425 * the MaltParserConfiguration redirect the request to the guide. 426 * Maybe in the future this method will 427 * be re-implemented to add some interesting things or maybe not. 428 * 429 * @throws MaltChainedException 430 */ 431 public void predict(GuideUserAction action) throws MaltChainedException { 432 try { 433 guide.predict((GuideDecision)action); 434 } catch (NullPointerException e) { 435 throw new ConfigurationException("The guide cannot be found. ", e); 436 } 437 } 438 439 }