001 package org.maltparser.parser; 002 003 import java.io.File; 004 import java.io.IOException; 005 import java.util.Formatter; 006 import java.util.regex.Pattern; 007 008 import org.apache.log4j.FileAppender; 009 import org.apache.log4j.Level; 010 import org.apache.log4j.Logger; 011 import org.apache.log4j.PatternLayout; 012 import org.maltparser.core.config.ConfigurationDir; 013 import org.maltparser.core.config.ConfigurationException; 014 import org.maltparser.core.config.ConfigurationRegistry; 015 import org.maltparser.core.exception.MaltChainedException; 016 import org.maltparser.core.helper.SystemLogger; 017 import org.maltparser.core.io.dataformat.DataFormatInstance; 018 import org.maltparser.core.options.OptionManager; 019 import org.maltparser.core.symbol.SymbolTableHandler; 020 import org.maltparser.core.syntaxgraph.DependencyStructure; 021 import org.maltparser.parser.guide.ClassifierGuide; 022 023 /** 024 * @author Johan Hall 025 * 026 */ 027 public class SingleMalt implements DependencyParserConfig { 028 public static final int LEARN = 0; 029 public static final int PARSE = 1; 030 protected ConfigurationDir configDir; 031 protected Logger configLogger; 032 protected int optionContainerIndex; 033 protected Algorithm parsingAlgorithm = null; 034 protected int mode; 035 protected ConfigurationRegistry registry; 036 protected SymbolTableHandler symbolTableHandler; 037 protected long startTime; 038 protected long endTime; 039 protected int nIterations = 0; 040 041 public void initialize(int containerIndex, DataFormatInstance dataFormatInstance, ConfigurationDir configDir, int mode) throws MaltChainedException { 042 043 this.optionContainerIndex = containerIndex; 044 this.mode = mode; 045 setConfigurationDir(configDir); 046 startTime = System.currentTimeMillis(); 047 configLogger = initConfigLogger(getOptionValue("config", "logfile").toString(), getOptionValue("config", "logging").toString()); 048 registry = new ConfigurationRegistry(); 049 symbolTableHandler = dataFormatInstance.getSymbolTables(); 050 051 if (mode == SingleMalt.LEARN) { 052 checkOptionDependency(); 053 } 054 registry.put(org.maltparser.core.symbol.SymbolTableHandler.class, getSymbolTables()); 055 registry.put(org.maltparser.core.io.dataformat.DataFormatInstance.class, dataFormatInstance); 056 // registry.put(org.maltparser.parser.DependencyParserConfig.class, this); 057 initParsingAlgorithm(); 058 059 } 060 061 062 /** 063 * Initialize the parsing algorithm 064 * 065 * @throws MaltChainedException 066 */ 067 protected void initParsingAlgorithm() throws MaltChainedException { 068 if (mode == LEARN) { 069 parsingAlgorithm = new BatchTrainer(this); 070 } else if (mode == PARSE) { 071 parsingAlgorithm = new DeterministicParser(this); 072 } 073 } 074 075 public void addRegistry(Class<?> clazz, Object o) { 076 registry.put(clazz, o); 077 } 078 079 public void process(Object[] arguments) throws MaltChainedException { 080 if (mode == LEARN) { 081 if (arguments.length < 2 || !(arguments[0] instanceof DependencyStructure) || !(arguments[1] instanceof DependencyStructure)) { 082 throw new MaltChainedException("The single malt learn task must be supplied with at least two dependency structures. "); 083 } 084 DependencyStructure systemGraph = (DependencyStructure)arguments[0]; 085 DependencyStructure goldGraph = (DependencyStructure)arguments[1]; 086 if (systemGraph.hasTokens() && getGuide() != null) { 087 getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, systemGraph)); 088 } 089 } else if (mode == PARSE) { 090 if (arguments.length < 1 || !(arguments[0] instanceof DependencyStructure)) { 091 throw new MaltChainedException("The single malt parse task must be supplied with at least one input terminal structure and one output dependency structure. "); 092 } 093 DependencyStructure processGraph = (DependencyStructure)arguments[0]; 094 if (processGraph.hasTokens()) { 095 ((Parser)getAlgorithm()).parse(processGraph); 096 } 097 } 098 } 099 100 public void parse(DependencyStructure graph) throws MaltChainedException { 101 if (graph.hasTokens()) { 102 ((Parser)getAlgorithm()).parse(graph); 103 } 104 } 105 106 public void oracleParse(DependencyStructure goldGraph, DependencyStructure oracleGraph) throws MaltChainedException { 107 if (oracleGraph.hasTokens()) { 108 if (getGuide() != null) { 109 getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, oracleGraph)); 110 } else { 111 ((Trainer)getAlgorithm()).parse(goldGraph, oracleGraph); 112 } 113 } 114 } 115 116 public void train() throws MaltChainedException { 117 if (getGuide() == null) { 118 ((Trainer)getAlgorithm()).train(); 119 } 120 } 121 122 public void terminate(Object[] arguments) throws MaltChainedException { 123 // if (getAlgorithm() instanceof Trainer) { 124 // ((Trainer)getAlgorithm()).terminate(); 125 // } 126 getAlgorithm().terminate(); 127 if (getGuide() != null) { 128 getGuide().terminate(); 129 } 130 if (mode == LEARN) { 131 endTime = System.currentTimeMillis(); 132 long elapsed = endTime - startTime; 133 if (configLogger.isInfoEnabled()) { 134 configLogger.info("Learning time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n"); 135 } 136 } else if (mode == PARSE) { 137 endTime = System.currentTimeMillis(); 138 long elapsed = endTime - startTime; 139 if (configLogger.isInfoEnabled()) { 140 configLogger.info("Parsing time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n"); 141 } 142 } 143 if (SystemLogger.logger() != configLogger && configLogger != null) { 144 configLogger.removeAllAppenders(); 145 } 146 } 147 148 /** 149 * Initialize the configuration logger 150 * 151 * @return the configuration logger 152 * @throws MaltChainedException 153 */ 154 public Logger initConfigLogger(String logfile, String level) throws MaltChainedException { 155 if (logfile != null && logfile.length() > 0 && !logfile.equalsIgnoreCase("stdout") && configDir != null) { 156 configLogger = Logger.getLogger(logfile); 157 FileAppender fileAppender = null; 158 try { 159 fileAppender = new FileAppender(new PatternLayout("%m"),configDir.getWorkingDirectory().getPath()+File.separator+logfile, true); 160 } catch(IOException e) { 161 throw new ConfigurationException("It is not possible to create a configuration log file. ", e); 162 } 163 fileAppender.setThreshold(Level.toLevel(level, Level.INFO)); 164 configLogger.addAppender(fileAppender); 165 configLogger.setLevel(Level.toLevel(level, Level.INFO)); 166 } else { 167 configLogger = SystemLogger.logger(); 168 } 169 170 return configLogger; 171 } 172 173 public Logger getConfigLogger() { 174 return configLogger; 175 } 176 177 public void setConfigLogger(Logger logger) { 178 configLogger = logger; 179 } 180 181 public ConfigurationDir getConfigurationDir() { 182 return configDir; 183 } 184 185 public void setConfigurationDir(ConfigurationDir configDir) { 186 this.configDir = configDir; 187 } 188 189 public int getMode() { 190 return mode; 191 } 192 193 public ConfigurationRegistry getRegistry() { 194 return registry; 195 } 196 197 public void setRegistry(ConfigurationRegistry registry) { 198 this.registry = registry; 199 } 200 201 public Object getOptionValue(String optiongroup, String optionname) throws MaltChainedException { 202 return OptionManager.instance().getOptionValue(optionContainerIndex, optiongroup, optionname); 203 } 204 205 public String getOptionValueString(String optiongroup, String optionname) throws MaltChainedException { 206 return OptionManager.instance().getOptionValueString(optionContainerIndex, optiongroup, optionname); 207 } 208 209 public OptionManager getOptionManager() throws MaltChainedException { 210 return OptionManager.instance(); 211 } 212 /******************************** MaltParserConfiguration specific ********************************/ 213 214 /** 215 * Returns the list of symbol tables 216 * 217 * @return the list of symbol tables 218 */ 219 public SymbolTableHandler getSymbolTables() { 220 return symbolTableHandler; 221 } 222 223 public Algorithm getAlgorithm() { 224 return parsingAlgorithm; 225 } 226 /** 227 * Returns the guide 228 * 229 * @return the guide 230 */ 231 public ClassifierGuide getGuide() { 232 return parsingAlgorithm.getGuide(); 233 } 234 235 public void checkOptionDependency() throws MaltChainedException { 236 try { 237 if (configDir.getInfoFileWriter() != null) { 238 configDir.getInfoFileWriter().write("\nDEPENDENCIES\n"); 239 } 240 // if ((Boolean)getOptionValue("malt0.4", "behavior") == true) { 241 // if (!getOptionValueString("singlemalt", "null_value").equals("rootlabel")) { 242 // OptionManager.instance().overloadOptionValue(optionContainerIndex, "singlemalt", "null_value", "rootlabel"); 243 // if (configDir.getInfoFileWriter() != null) { 244 // configDir.getInfoFileWriter().write("--singlemalt-null_value (-nv) rootlabel\n"); 245 // } 246 // configLogger.warn("Option --malt0.4-behavior = true and --singlemalt-null_value != 'rootlabel'. Option --singlemalt-null_value is overloaded with value 'rootlabel'\n"); 247 // } 248 // if (getOptionValue("malt0.4", "depset").toString().equals("")) { 249 // configLogger.warn("Option --malt0.4-behavior = true and option --malt0.4-depset has no value. These combination will probably not reproduce the behavior of MaltParser 0.4 (C-impl)\n"); 250 // } 251 // if (getOptionValue("malt0.4", "posset").toString().equals("")) { 252 // configLogger.warn("Option --malt0.4-behavior = true and option --malt0.4-posset has no value. These combination will probably not reproduce the behavior of MaltParser 0.4 (C-impl)\n"); 253 // } 254 // if (getOptionValue("malt0.4", "cposset").toString().equals("")) { 255 // configLogger.warn("Option --malt0.4-behavior = true and option --malt0.4-cposset has no value. These combination will probably not reproduce the behavior of MaltParser 0.4 (C-impl)\n"); 256 // } 257 // if (!getOptionValue("guide", "kbest").toString().equals("1")) { 258 // OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "kbest", "1"); 259 // if (configDir.getInfoFileWriter() != null) { 260 // configDir.getInfoFileWriter().write("--guide-kbest ( -k) 1\n"); 261 // } 262 // configLogger.warn("Option --malt0.4-behavior = true and --guide-kbest != '1'. Option --guide-kbest is overloaded with value '1'\n"); 263 // } 264 // } 265 if (getOptionValue("guide", "features").toString().equals("")) { 266 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", getOptionValueString("singlemalt", "parsing_algorithm")); 267 if (configDir.getInfoFileWriter() != null) { 268 configDir.getInfoFileWriter().write("--guide-features ( -F) "+getOptionValue("guide", "features").toString()+"\n"); 269 } 270 } else { 271 configDir.copyToConfig(getOptionValue("guide", "features").toString()); 272 } 273 if (getOptionValue("guide", "data_split_column").toString().equals("") && !getOptionValue("guide", "data_split_structure").toString().equals("")) { 274 configLogger.warn("Option --guide-data_split_column = '' and --guide-data_split_structure != ''. Option --guide-data_split_structure is overloaded with '', this will cause the parser to induce a single model.\n "); 275 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_structure", ""); 276 if (configDir.getInfoFileWriter() != null) { 277 configDir.getInfoFileWriter().write("--guide-data_split_structure ( -s)\n"); 278 } 279 } 280 if (!getOptionValue("guide", "data_split_column").toString().equals("") && getOptionValue("guide", "data_split_structure").toString().equals("")) { 281 configLogger.warn("Option --guide-data_split_column != '' and --guide-data_split_structure = ''. Option --guide-data_split_column is overloaded with '', this will cause the parser to induce a single model.\n"); 282 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_column", ""); 283 if (configDir.getInfoFileWriter() != null) { 284 configDir.getInfoFileWriter().write("--guide-data_split_column ( -d)\n"); 285 } 286 } 287 288 String decisionSettings = getOptionValue("guide", "decision_settings").toString().trim(); 289 String markingStrategy = getOptionValue("pproj", "marking_strategy").toString().trim(); 290 String coveredRoot = getOptionValue("pproj", "covered_root").toString().trim(); 291 StringBuilder newDecisionSettings = new StringBuilder(); 292 // if ((Boolean)getOptionValue("malt0.4", "behavior") == true) { 293 // decisionSettings = "T.TRANS+A.DEPREL"; 294 // } 295 if (decisionSettings == null || decisionSettings.length() < 1 || decisionSettings.equals("default")) { 296 decisionSettings = "T.TRANS+A.DEPREL"; 297 } else { 298 decisionSettings = decisionSettings.toUpperCase(); 299 } 300 301 if (markingStrategy.equalsIgnoreCase("head") || markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) { 302 if (!Pattern.matches(".*A\\.PPLIFTED.*", decisionSettings)) { 303 newDecisionSettings.append("+A.PPLIFTED"); 304 } 305 } 306 if (markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) { 307 if (!Pattern.matches(".*A\\.PPPATH.*", decisionSettings)) { 308 newDecisionSettings.append("+A.PPPATH"); 309 } 310 } 311 if (!coveredRoot.equalsIgnoreCase("none") && !Pattern.matches(".*A\\.PPCOVERED.*", decisionSettings)) { 312 newDecisionSettings.append("+A.PPCOVERED"); 313 } 314 if (!getOptionValue("guide", "decision_settings").toString().equals(decisionSettings) || newDecisionSettings.length() > 0) { 315 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "decision_settings", decisionSettings+newDecisionSettings.toString()); 316 if (configDir.getInfoFileWriter() != null) { 317 configDir.getInfoFileWriter().write("--guide-decision_settings ( -gds) "+getOptionValue("guide", "decision_settings").toString()+"\n"); 318 } 319 } 320 if (configDir.getInfoFileWriter() != null) { 321 configDir.getInfoFileWriter().flush(); 322 } 323 } catch (IOException e) { 324 throw new ConfigurationException("Could not write to the configuration information file. ", e); 325 } 326 } 327 }