001 package org.maltparser.parser;
002
003 import java.io.File;
004 import java.io.IOException;
005 import java.net.URL;
006 import java.util.Formatter;
007 import java.util.regex.Pattern;
008
009 import org.apache.log4j.FileAppender;
010 import org.apache.log4j.Level;
011 import org.apache.log4j.Logger;
012 import org.apache.log4j.PatternLayout;
013 import org.maltparser.core.config.ConfigurationDir;
014 import org.maltparser.core.config.ConfigurationException;
015 import org.maltparser.core.config.ConfigurationRegistry;
016 import org.maltparser.core.exception.MaltChainedException;
017 import org.maltparser.core.helper.SystemLogger;
018 import org.maltparser.core.helper.Util;
019 import org.maltparser.core.io.dataformat.DataFormatInstance;
020 import org.maltparser.core.options.OptionManager;
021 import org.maltparser.core.propagation.PropagationManager;
022 import org.maltparser.core.symbol.SymbolTableHandler;
023 import org.maltparser.core.syntaxgraph.DependencyStructure;
024 import org.maltparser.parser.guide.ClassifierGuide;
025
026 /**
027 * @author Johan Hall
028 *
029 */
030 public class SingleMalt implements DependencyParserConfig {
031 public static final int LEARN = 0;
032 public static final int PARSE = 1;
033 protected ConfigurationDir configDir;
034 protected Logger configLogger;
035 protected int optionContainerIndex;
036 protected Algorithm parsingAlgorithm = null;
037 protected int mode;
038 protected ConfigurationRegistry registry;
039 protected SymbolTableHandler symbolTableHandler;
040 protected DataFormatInstance dataFormatInstance;
041 protected long startTime;
042 protected long endTime;
043 protected int nIterations = 0;
044 protected PropagationManager propagationManager;
045
046 public void initialize(int containerIndex, DataFormatInstance dataFormatInstance, ConfigurationDir configDir, int mode) throws MaltChainedException {
047
048 this.optionContainerIndex = containerIndex;
049 this.mode = mode;
050 setConfigurationDir(configDir);
051 startTime = System.currentTimeMillis();
052 configLogger = initConfigLogger(getOptionValue("config", "logfile").toString(), getOptionValue("config", "logging").toString());
053 registry = new ConfigurationRegistry();
054 this.dataFormatInstance = dataFormatInstance;
055 symbolTableHandler = dataFormatInstance.getSymbolTables();
056
057 if (mode == SingleMalt.LEARN) {
058 checkOptionDependency();
059 }
060 registry.put(org.maltparser.core.symbol.SymbolTableHandler.class, getSymbolTables());
061 registry.put(org.maltparser.core.io.dataformat.DataFormatInstance.class, dataFormatInstance);
062 // registry.put(org.maltparser.parser.DependencyParserConfig.class, this);
063 initPropagation();
064 initParsingAlgorithm();
065 if (configLogger.isInfoEnabled()) {
066 URL inputFormatURL = configDir.getInputFormatURL();
067 URL outputFormatURL = configDir.getOutputFormatURL();
068 if (inputFormatURL != null) {
069 if (outputFormatURL == null || outputFormatURL.toString().equals(inputFormatURL.toString())) {
070 int index = inputFormatURL.toString().indexOf('!');
071 if (index == -1) {
072 configLogger.info(" Data Format : "+inputFormatURL.toString()+"\n");
073 } else {
074 configLogger.info(" Data Format : "+inputFormatURL.toString().substring(index+1)+"\n");
075 }
076 } else {
077 int indexIn = inputFormatURL.toString().indexOf('!');
078 int indexOut = outputFormatURL.toString().indexOf('!');
079 if (indexIn == -1) {
080 configLogger.info(" Input Data Format : "+inputFormatURL.toString()+"\n");
081 } else {
082 configLogger.info(" Input Data Format : "+inputFormatURL.toString().substring(indexIn+1)+"\n");
083 }
084 if (indexOut == -1) {
085 configLogger.info(" Output Data Format : "+outputFormatURL.toString()+"\n");
086 } else {
087 configLogger.info(" Output Data Format : "+outputFormatURL.toString().substring(indexOut+1)+"\n");
088 }
089 }
090 }
091 }
092 }
093
094 private void initPropagation() throws MaltChainedException {
095 String propagationSpecFileName = getOptionValue("singlemalt", "propagation").toString();
096 if (propagationSpecFileName == null || propagationSpecFileName.length() == 0) {
097 return;
098 }
099 propagationManager = new PropagationManager(configDir);
100 if (mode == SingleMalt.LEARN) {
101 propagationSpecFileName = configDir.copyToConfig(propagationSpecFileName);
102 OptionManager.instance().overloadOptionValue(optionContainerIndex, "singlemalt", "propagation", propagationSpecFileName);
103 }
104 getConfigLogger().info(" Propagation : " + propagationSpecFileName+"\n");
105 propagationManager.loadSpecification(propagationSpecFileName);
106 }
107
108 /**
109 * Initialize the parsing algorithm
110 *
111 * @throws MaltChainedException
112 */
113 protected void initParsingAlgorithm() throws MaltChainedException {
114 if (mode == LEARN) {
115 parsingAlgorithm = new BatchTrainer(this);
116 } else if (mode == PARSE) {
117 parsingAlgorithm = new DeterministicParser(this);
118 }
119 }
120
121 public void addRegistry(Class<?> clazz, Object o) {
122 registry.put(clazz, o);
123 }
124
125 public void process(Object[] arguments) throws MaltChainedException {
126 if (mode == LEARN) {
127 if (arguments.length < 2 || !(arguments[0] instanceof DependencyStructure) || !(arguments[1] instanceof DependencyStructure)) {
128 throw new MaltChainedException("The single malt learn task must be supplied with at least two dependency structures. ");
129 }
130 DependencyStructure systemGraph = (DependencyStructure)arguments[0];
131 DependencyStructure goldGraph = (DependencyStructure)arguments[1];
132 if (systemGraph.hasTokens() && getGuide() != null) {
133 getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, systemGraph));
134 }
135 } else if (mode == PARSE) {
136 if (arguments.length < 1 || !(arguments[0] instanceof DependencyStructure)) {
137 throw new MaltChainedException("The single malt parse task must be supplied with at least one input terminal structure and one output dependency structure. ");
138 }
139 DependencyStructure processGraph = (DependencyStructure)arguments[0];
140 if (processGraph.hasTokens()) {
141 ((Parser)getAlgorithm()).parse(processGraph);
142 }
143 }
144 }
145
146 public void parse(DependencyStructure graph) throws MaltChainedException {
147 if (graph.hasTokens()) {
148 ((Parser)getAlgorithm()).parse(graph);
149 }
150 }
151
152 public void oracleParse(DependencyStructure goldGraph, DependencyStructure oracleGraph) throws MaltChainedException {
153 if (oracleGraph.hasTokens()) {
154 if (getGuide() != null) {
155 getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, oracleGraph));
156 } else {
157 ((Trainer)getAlgorithm()).parse(goldGraph, oracleGraph);
158 }
159 }
160 }
161
162 public void train() throws MaltChainedException {
163 if (getGuide() == null) {
164 ((Trainer)getAlgorithm()).train();
165 }
166 }
167
168 public void terminate(Object[] arguments) throws MaltChainedException {
169 // if (getAlgorithm() instanceof Trainer) {
170 // ((Trainer)getAlgorithm()).terminate();
171 // }
172 getAlgorithm().terminate();
173 if (getGuide() != null) {
174 getGuide().terminate();
175 }
176 if (mode == LEARN) {
177 endTime = System.currentTimeMillis();
178 long elapsed = endTime - startTime;
179 if (configLogger.isInfoEnabled()) {
180 configLogger.info("Learning time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
181 }
182 } else if (mode == PARSE) {
183 endTime = System.currentTimeMillis();
184 long elapsed = endTime - startTime;
185 if (configLogger.isInfoEnabled()) {
186 configLogger.info("Parsing time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
187 }
188 }
189 if (SystemLogger.logger() != configLogger && configLogger != null) {
190 configLogger.removeAllAppenders();
191 }
192 }
193
194 /**
195 * Initialize the configuration logger
196 *
197 * @return the configuration logger
198 * @throws MaltChainedException
199 */
200 public Logger initConfigLogger(String logfile, String level) throws MaltChainedException {
201 if (logfile != null && logfile.length() > 0 && !logfile.equalsIgnoreCase("stdout") && configDir != null) {
202 configLogger = Logger.getLogger(logfile);
203 FileAppender fileAppender = null;
204 try {
205 fileAppender = new FileAppender(new PatternLayout("%m"),configDir.getWorkingDirectory().getPath()+File.separator+logfile, true);
206 } catch(IOException e) {
207 throw new ConfigurationException("It is not possible to create a configuration log file. ", e);
208 }
209 fileAppender.setThreshold(Level.toLevel(level, Level.INFO));
210 configLogger.addAppender(fileAppender);
211 configLogger.setLevel(Level.toLevel(level, Level.INFO));
212 } else {
213 configLogger = SystemLogger.logger();
214 }
215
216 return configLogger;
217 }
218
219 public Logger getConfigLogger() {
220 return configLogger;
221 }
222
223 public void setConfigLogger(Logger logger) {
224 configLogger = logger;
225 }
226
227 public ConfigurationDir getConfigurationDir() {
228 return configDir;
229 }
230
231 public void setConfigurationDir(ConfigurationDir configDir) {
232 this.configDir = configDir;
233 }
234
235 public int getMode() {
236 return mode;
237 }
238
239 public ConfigurationRegistry getRegistry() {
240 return registry;
241 }
242
243 public void setRegistry(ConfigurationRegistry registry) {
244 this.registry = registry;
245 }
246
247 public Object getOptionValue(String optiongroup, String optionname) throws MaltChainedException {
248 return OptionManager.instance().getOptionValue(optionContainerIndex, optiongroup, optionname);
249 }
250
251 public String getOptionValueString(String optiongroup, String optionname) throws MaltChainedException {
252 return OptionManager.instance().getOptionValueString(optionContainerIndex, optiongroup, optionname);
253 }
254
255 public OptionManager getOptionManager() throws MaltChainedException {
256 return OptionManager.instance();
257 }
258 /******************************** MaltParserConfiguration specific ********************************/
259
260 /**
261 * Returns the list of symbol tables
262 *
263 * @return the list of symbol tables
264 */
265 public SymbolTableHandler getSymbolTables() {
266 return symbolTableHandler;
267 }
268
269 public PropagationManager getPropagationManager() {
270 return propagationManager;
271 }
272
273 public Algorithm getAlgorithm() {
274 return parsingAlgorithm;
275 }
276 /**
277 * Returns the guide
278 *
279 * @return the guide
280 */
281 public ClassifierGuide getGuide() {
282 return parsingAlgorithm.getGuide();
283 }
284
285 public void checkOptionDependency() throws MaltChainedException {
286 try {
287 if (configDir.getInfoFileWriter() != null) {
288 configDir.getInfoFileWriter().write("\nDEPENDENCIES\n");
289 }
290
291 // Copy the feature model file into the configuration directory
292 String featureModelFileName = getOptionValue("guide", "features").toString().trim();
293 if (featureModelFileName.equals("")) {
294 // use default feature model depending on the selected parser algorithm
295 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", getOptionValueString("singlemalt", "parsing_algorithm"));
296 featureModelFileName = getOptionValue("guide", "features").toString().trim();
297 /* START: Temp fix during development of new liblinear and libsvm interface */
298 String learner = getOptionValueString("guide", "learner");
299 if (!learner.startsWith("lib")) {
300 learner = "lib"+learner;
301 }
302 /* END: Temp fix during development of new liblinear and libsvm interface */
303 featureModelFileName = featureModelFileName.replace("{learner}", learner);
304 featureModelFileName = configDir.copyToConfig(Util.findURLinJars(featureModelFileName));
305 } else {
306 featureModelFileName = configDir.copyToConfig(featureModelFileName);
307 }
308 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", featureModelFileName);
309 if (configDir.getInfoFileWriter() != null) {
310 configDir.getInfoFileWriter().write("--guide-features ( -F) "+getOptionValue("guide", "features").toString()+"\n");
311 }
312
313 if (getOptionValue("guide", "data_split_column").toString().equals("") && !getOptionValue("guide", "data_split_structure").toString().equals("")) {
314 configLogger.warn("Option --guide-data_split_column = '' and --guide-data_split_structure != ''. Option --guide-data_split_structure is overloaded with '', this will cause the parser to induce a single model.\n ");
315 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_structure", "");
316 if (configDir.getInfoFileWriter() != null) {
317 configDir.getInfoFileWriter().write("--guide-data_split_structure ( -s)\n");
318 }
319 }
320 if (!getOptionValue("guide", "data_split_column").toString().equals("") && getOptionValue("guide", "data_split_structure").toString().equals("")) {
321 configLogger.warn("Option --guide-data_split_column != '' and --guide-data_split_structure = ''. Option --guide-data_split_column is overloaded with '', this will cause the parser to induce a single model.\n");
322 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_column", "");
323 if (configDir.getInfoFileWriter() != null) {
324 configDir.getInfoFileWriter().write("--guide-data_split_column ( -d)\n");
325 }
326 }
327
328 String decisionSettings = getOptionValue("guide", "decision_settings").toString().trim();
329 String markingStrategy = getOptionValue("pproj", "marking_strategy").toString().trim();
330 String coveredRoot = getOptionValue("pproj", "covered_root").toString().trim();
331 StringBuilder newDecisionSettings = new StringBuilder();
332
333 if (decisionSettings == null || decisionSettings.length() < 1 || decisionSettings.equals("default")) {
334 decisionSettings = "T.TRANS+A.DEPREL";
335 } else {
336 decisionSettings = decisionSettings.toUpperCase();
337 }
338
339 if (markingStrategy.equalsIgnoreCase("head") || markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
340 if (!Pattern.matches(".*A\\.PPLIFTED.*", decisionSettings)) {
341 newDecisionSettings.append("+A.PPLIFTED");
342 }
343 }
344 if (markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
345 if (!Pattern.matches(".*A\\.PPPATH.*", decisionSettings)) {
346 newDecisionSettings.append("+A.PPPATH");
347 }
348 }
349 if (!coveredRoot.equalsIgnoreCase("none") && !Pattern.matches(".*A\\.PPCOVERED.*", decisionSettings)) {
350 newDecisionSettings.append("+A.PPCOVERED");
351 }
352 if (!getOptionValue("guide", "decision_settings").toString().equals(decisionSettings) || newDecisionSettings.length() > 0) {
353 OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "decision_settings", decisionSettings+newDecisionSettings.toString());
354 if (configDir.getInfoFileWriter() != null) {
355 configDir.getInfoFileWriter().write("--guide-decision_settings ( -gds) "+getOptionValue("guide", "decision_settings").toString()+"\n");
356 }
357 }
358 if (configDir.getInfoFileWriter() != null) {
359 configDir.getInfoFileWriter().flush();
360 }
361 } catch (IOException e) {
362 throw new ConfigurationException("Could not write to the configuration information file. ", e);
363 }
364 }
365 }