001 package org.maltparser.parser; 002 003 import java.io.File; 004 import java.io.IOException; 005 import java.net.URL; 006 import java.util.Formatter; 007 import java.util.regex.Pattern; 008 009 import org.apache.log4j.FileAppender; 010 import org.apache.log4j.Level; 011 import org.apache.log4j.Logger; 012 import org.apache.log4j.PatternLayout; 013 import org.maltparser.core.config.ConfigurationDir; 014 import org.maltparser.core.config.ConfigurationException; 015 import org.maltparser.core.config.ConfigurationRegistry; 016 import org.maltparser.core.exception.MaltChainedException; 017 import org.maltparser.core.helper.SystemLogger; 018 import org.maltparser.core.helper.URLFinder; 019 import org.maltparser.core.io.dataformat.DataFormatInstance; 020 import org.maltparser.core.options.OptionManager; 021 import org.maltparser.core.propagation.PropagationManager; 022 import org.maltparser.core.symbol.SymbolTableHandler; 023 import org.maltparser.core.syntaxgraph.DependencyStructure; 024 import org.maltparser.parser.guide.ClassifierGuide; 025 026 /** 027 * @author Johan Hall 028 * 029 */ 030 public class SingleMalt implements DependencyParserConfig { 031 public static final int LEARN = 0; 032 public static final int PARSE = 1; 033 protected ConfigurationDir configDir; 034 protected Logger configLogger; 035 protected int optionContainerIndex; 036 protected Algorithm parsingAlgorithm = null; 037 protected int mode; 038 protected ConfigurationRegistry registry; 039 protected SymbolTableHandler symbolTableHandler; 040 protected DataFormatInstance dataFormatInstance; 041 protected long startTime; 042 protected long endTime; 043 protected int nIterations = 0; 044 protected PropagationManager propagationManager; 045 private Parser parser; 046 private Trainer trainer; 047 048 public void initialize(int containerIndex, DataFormatInstance dataFormatInstance, ConfigurationDir configDir, int mode) throws MaltChainedException { 049 050 this.optionContainerIndex = containerIndex; 051 this.mode = mode; 052 setConfigurationDir(configDir); 053 startTime = System.currentTimeMillis(); 054 configLogger = initConfigLogger(getOptionValue("config", "logfile").toString(), getOptionValue("config", "logging").toString()); 055 registry = new ConfigurationRegistry(); 056 this.dataFormatInstance = dataFormatInstance; 057 symbolTableHandler = dataFormatInstance.getSymbolTables(); 058 059 if (mode == SingleMalt.LEARN) { 060 checkOptionDependency(); 061 } 062 registry.put(org.maltparser.core.symbol.SymbolTableHandler.class, getSymbolTables()); 063 registry.put(org.maltparser.core.io.dataformat.DataFormatInstance.class, dataFormatInstance); 064 // registry.put(org.maltparser.parser.DependencyParserConfig.class, this); 065 initPropagation(); 066 initParsingAlgorithm(); 067 if (configLogger.isInfoEnabled()) { 068 URL inputFormatURL = configDir.getInputFormatURL(); 069 URL outputFormatURL = configDir.getOutputFormatURL(); 070 if (inputFormatURL != null) { 071 if (outputFormatURL == null || outputFormatURL.toString().equals(inputFormatURL.toString())) { 072 int index = inputFormatURL.toString().indexOf('!'); 073 if (index == -1) { 074 configLogger.info(" Data Format : "+inputFormatURL.toString()+"\n"); 075 } else { 076 configLogger.info(" Data Format : "+inputFormatURL.toString().substring(index+1)+"\n"); 077 } 078 } else { 079 int indexIn = inputFormatURL.toString().indexOf('!'); 080 int indexOut = outputFormatURL.toString().indexOf('!'); 081 if (indexIn == -1) { 082 configLogger.info(" Input Data Format : "+inputFormatURL.toString()+"\n"); 083 } else { 084 configLogger.info(" Input Data Format : "+inputFormatURL.toString().substring(indexIn+1)+"\n"); 085 } 086 if (indexOut == -1) { 087 configLogger.info(" Output Data Format : "+outputFormatURL.toString()+"\n"); 088 } else { 089 configLogger.info(" Output Data Format : "+outputFormatURL.toString().substring(indexOut+1)+"\n"); 090 } 091 } 092 } 093 } 094 } 095 096 private void initPropagation() throws MaltChainedException { 097 String propagationSpecFileName = getOptionValue("singlemalt", "propagation").toString(); 098 if (propagationSpecFileName == null || propagationSpecFileName.length() == 0) { 099 return; 100 } 101 propagationManager = new PropagationManager(configDir); 102 if (mode == SingleMalt.LEARN) { 103 propagationSpecFileName = configDir.copyToConfig(propagationSpecFileName); 104 OptionManager.instance().overloadOptionValue(optionContainerIndex, "singlemalt", "propagation", propagationSpecFileName); 105 } 106 getConfigLogger().info(" Propagation : " + propagationSpecFileName+"\n"); 107 propagationManager.loadSpecification(propagationSpecFileName); 108 } 109 110 /** 111 * Initialize the parsing algorithm 112 * 113 * @throws MaltChainedException 114 */ 115 protected void initParsingAlgorithm() throws MaltChainedException { 116 if (mode == LEARN) { 117 parsingAlgorithm = trainer = new BatchTrainer(this); 118 } else if (mode == PARSE) { 119 parsingAlgorithm = parser = new DeterministicParser(this); 120 } 121 } 122 123 public void addRegistry(Class<?> clazz, Object o) { 124 registry.put(clazz, o); 125 } 126 127 public void process(Object[] arguments) throws MaltChainedException { 128 if (mode == LEARN) { 129 if (arguments.length < 2 || !(arguments[0] instanceof DependencyStructure) || !(arguments[1] instanceof DependencyStructure)) { 130 throw new MaltChainedException("The single malt learn task must be supplied with at least two dependency structures. "); 131 } 132 DependencyStructure systemGraph = (DependencyStructure)arguments[0]; 133 DependencyStructure goldGraph = (DependencyStructure)arguments[1]; 134 if (systemGraph.hasTokens() && getGuide() != null) { 135 getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, systemGraph)); 136 } 137 } else if (mode == PARSE) { 138 if (arguments.length < 1 || !(arguments[0] instanceof DependencyStructure)) { 139 throw new MaltChainedException("The single malt parse task must be supplied with at least one input terminal structure and one output dependency structure. "); 140 } 141 DependencyStructure processGraph = (DependencyStructure)arguments[0]; 142 if (processGraph.hasTokens()) { 143 parser.parse(processGraph); 144 // ((Parser)getAlgorithm()).parse(processGraph); 145 } 146 } 147 } 148 149 public void parse(DependencyStructure graph) throws MaltChainedException { 150 if (graph.hasTokens()) { 151 // ((Parser)getAlgorithm()).parse(graph); 152 parser.parse(graph); 153 } 154 } 155 156 public void oracleParse(DependencyStructure goldGraph, DependencyStructure oracleGraph) throws MaltChainedException { 157 if (oracleGraph.hasTokens()) { 158 if (getGuide() != null) { 159 getGuide().finalizeSentence(trainer.parse(goldGraph, oracleGraph)); 160 } else { 161 trainer.parse(goldGraph, oracleGraph); 162 } 163 } 164 } 165 166 public void train() throws MaltChainedException { 167 if (getGuide() == null) { 168 ((Trainer)getAlgorithm()).train(); 169 } 170 } 171 172 public void terminate(Object[] arguments) throws MaltChainedException { 173 // if (getAlgorithm() instanceof Trainer) { 174 // ((Trainer)getAlgorithm()).terminate(); 175 // } 176 getAlgorithm().terminate(); 177 if (getGuide() != null) { 178 getGuide().terminate(); 179 } 180 if (mode == LEARN) { 181 endTime = System.currentTimeMillis(); 182 long elapsed = endTime - startTime; 183 if (configLogger.isInfoEnabled()) { 184 configLogger.info("Learning time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n"); 185 } 186 } else if (mode == PARSE) { 187 endTime = System.currentTimeMillis(); 188 long elapsed = endTime - startTime; 189 if (configLogger.isInfoEnabled()) { 190 configLogger.info("Parsing time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n"); 191 } 192 } 193 if (SystemLogger.logger() != configLogger && configLogger != null) { 194 configLogger.removeAllAppenders(); 195 } 196 } 197 198 /** 199 * Initialize the configuration logger 200 * 201 * @return the configuration logger 202 * @throws MaltChainedException 203 */ 204 public Logger initConfigLogger(String logfile, String level) throws MaltChainedException { 205 if (logfile != null && logfile.length() > 0 && !logfile.equalsIgnoreCase("stdout") && configDir != null) { 206 configLogger = Logger.getLogger(logfile); 207 FileAppender fileAppender = null; 208 try { 209 fileAppender = new FileAppender(new PatternLayout("%m"),configDir.getWorkingDirectory().getPath()+File.separator+logfile, true); 210 } catch(IOException e) { 211 throw new ConfigurationException("It is not possible to create a configuration log file. ", e); 212 } 213 fileAppender.setThreshold(Level.toLevel(level, Level.INFO)); 214 configLogger.addAppender(fileAppender); 215 configLogger.setLevel(Level.toLevel(level, Level.INFO)); 216 } else { 217 configLogger = SystemLogger.logger(); 218 } 219 220 return configLogger; 221 } 222 223 public Logger getConfigLogger() { 224 return configLogger; 225 } 226 227 public void setConfigLogger(Logger logger) { 228 configLogger = logger; 229 } 230 231 public ConfigurationDir getConfigurationDir() { 232 return configDir; 233 } 234 235 public void setConfigurationDir(ConfigurationDir configDir) { 236 this.configDir = configDir; 237 } 238 239 public int getMode() { 240 return mode; 241 } 242 243 public ConfigurationRegistry getRegistry() { 244 return registry; 245 } 246 247 public void setRegistry(ConfigurationRegistry registry) { 248 this.registry = registry; 249 } 250 251 public Object getOptionValue(String optiongroup, String optionname) throws MaltChainedException { 252 return OptionManager.instance().getOptionValue(optionContainerIndex, optiongroup, optionname); 253 } 254 255 public String getOptionValueString(String optiongroup, String optionname) throws MaltChainedException { 256 return OptionManager.instance().getOptionValueString(optionContainerIndex, optiongroup, optionname); 257 } 258 259 public OptionManager getOptionManager() throws MaltChainedException { 260 return OptionManager.instance(); 261 } 262 /******************************** MaltParserConfiguration specific ********************************/ 263 264 /** 265 * Returns the list of symbol tables 266 * 267 * @return the list of symbol tables 268 */ 269 public SymbolTableHandler getSymbolTables() { 270 return symbolTableHandler; 271 } 272 273 public PropagationManager getPropagationManager() { 274 return propagationManager; 275 } 276 277 public Algorithm getAlgorithm() { 278 return parsingAlgorithm; 279 } 280 /** 281 * Returns the guide 282 * 283 * @return the guide 284 */ 285 public ClassifierGuide getGuide() { 286 return parsingAlgorithm.getGuide(); 287 } 288 289 public void checkOptionDependency() throws MaltChainedException { 290 try { 291 if (configDir.getInfoFileWriter() != null) { 292 configDir.getInfoFileWriter().write("\nDEPENDENCIES\n"); 293 } 294 295 // Copy the feature model file into the configuration directory 296 String featureModelFileName = getOptionValue("guide", "features").toString().trim(); 297 if (featureModelFileName.equals("")) { 298 // use default feature model depending on the selected parser algorithm 299 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", getOptionValueString("singlemalt", "parsing_algorithm")); 300 featureModelFileName = getOptionValue("guide", "features").toString().trim(); 301 /* START: Temp fix during development of new liblinear and libsvm interface */ 302 String learner = getOptionValueString("guide", "learner"); 303 if (!learner.startsWith("lib")) { 304 learner = "lib"+learner; 305 } 306 /* END: Temp fix during development of new liblinear and libsvm interface */ 307 featureModelFileName = featureModelFileName.replace("{learner}", learner); 308 final URLFinder f = new URLFinder(); 309 featureModelFileName = configDir.copyToConfig(f.findURLinJars(featureModelFileName)); 310 } else { 311 featureModelFileName = configDir.copyToConfig(featureModelFileName); 312 } 313 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", featureModelFileName); 314 if (configDir.getInfoFileWriter() != null) { 315 configDir.getInfoFileWriter().write("--guide-features ( -F) "+getOptionValue("guide", "features").toString()+"\n"); 316 } 317 318 if (getOptionValue("guide", "data_split_column").toString().equals("") && !getOptionValue("guide", "data_split_structure").toString().equals("")) { 319 configLogger.warn("Option --guide-data_split_column = '' and --guide-data_split_structure != ''. Option --guide-data_split_structure is overloaded with '', this will cause the parser to induce a single model.\n "); 320 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_structure", ""); 321 if (configDir.getInfoFileWriter() != null) { 322 configDir.getInfoFileWriter().write("--guide-data_split_structure ( -s)\n"); 323 } 324 } 325 if (!getOptionValue("guide", "data_split_column").toString().equals("") && getOptionValue("guide", "data_split_structure").toString().equals("")) { 326 configLogger.warn("Option --guide-data_split_column != '' and --guide-data_split_structure = ''. Option --guide-data_split_column is overloaded with '', this will cause the parser to induce a single model.\n"); 327 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_column", ""); 328 if (configDir.getInfoFileWriter() != null) { 329 configDir.getInfoFileWriter().write("--guide-data_split_column ( -d)\n"); 330 } 331 } 332 333 String decisionSettings = getOptionValue("guide", "decision_settings").toString().trim(); 334 String markingStrategy = getOptionValue("pproj", "marking_strategy").toString().trim(); 335 String coveredRoot = getOptionValue("pproj", "covered_root").toString().trim(); 336 StringBuilder newDecisionSettings = new StringBuilder(); 337 338 if (decisionSettings == null || decisionSettings.length() < 1 || decisionSettings.equals("default")) { 339 decisionSettings = "T.TRANS+A.DEPREL"; 340 } else { 341 decisionSettings = decisionSettings.toUpperCase(); 342 } 343 344 if (markingStrategy.equalsIgnoreCase("head") || markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) { 345 if (!Pattern.matches(".*A\\.PPLIFTED.*", decisionSettings)) { 346 newDecisionSettings.append("+A.PPLIFTED"); 347 } 348 } 349 if (markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) { 350 if (!Pattern.matches(".*A\\.PPPATH.*", decisionSettings)) { 351 newDecisionSettings.append("+A.PPPATH"); 352 } 353 } 354 if (!coveredRoot.equalsIgnoreCase("none") && !Pattern.matches(".*A\\.PPCOVERED.*", decisionSettings)) { 355 newDecisionSettings.append("+A.PPCOVERED"); 356 } 357 if (!getOptionValue("guide", "decision_settings").toString().equals(decisionSettings) || newDecisionSettings.length() > 0) { 358 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "decision_settings", decisionSettings+newDecisionSettings.toString()); 359 if (configDir.getInfoFileWriter() != null) { 360 configDir.getInfoFileWriter().write("--guide-decision_settings ( -gds) "+getOptionValue("guide", "decision_settings").toString()+"\n"); 361 } 362 } 363 if (configDir.getInfoFileWriter() != null) { 364 configDir.getInfoFileWriter().flush(); 365 } 366 } catch (IOException e) { 367 throw new ConfigurationException("Could not write to the configuration information file. ", e); 368 } 369 } 370 }