001    package org.maltparser.parser;
002    
003    import java.io.File;
004    import java.io.IOException;
005    import java.lang.reflect.Constructor;
006    import java.lang.reflect.InvocationTargetException;
007    import java.util.Formatter;
008    import java.util.regex.Pattern;
009    
010    import org.apache.log4j.FileAppender;
011    import org.apache.log4j.Level;
012    import org.apache.log4j.Logger;
013    import org.apache.log4j.PatternLayout;
014    import org.maltparser.core.config.ConfigurationDir;
015    import org.maltparser.core.config.ConfigurationException;
016    import org.maltparser.core.config.ConfigurationRegistry;
017    import org.maltparser.core.exception.MaltChainedException;
018    import org.maltparser.core.helper.SystemLogger;
019    import org.maltparser.core.io.dataformat.DataFormatInstance;
020    import org.maltparser.core.options.OptionManager;
021    import org.maltparser.core.symbol.SymbolTableHandler;
022    import org.maltparser.core.syntaxgraph.DependencyStructure;
023    import org.maltparser.parser.algorithm.ParsingAlgorithm;
024    import org.maltparser.parser.algorithm.nivre.malt04.NivreEagerMalt04;
025    import org.maltparser.parser.algorithm.nivre.malt04.NivreStandardMalt04;
026    import org.maltparser.parser.guide.Guidable;
027    import org.maltparser.parser.guide.Guide;
028    import org.maltparser.parser.history.GuideHistory;
029    import org.maltparser.parser.history.action.GuideDecision;
030    import org.maltparser.parser.history.action.GuideUserAction;
031    
032    public class SingleMalt implements DependencyParserConfig, Guidable {
033            public static final int LEARN = 0;
034            public static final int PARSE = 1;
035            protected ConfigurationDir configDir;
036            protected Logger configLogger;
037            protected int optionContainerIndex;
038            protected ParsingAlgorithm parsingAlgorithm = null;
039            protected Guide guide = null;
040            protected int mode;
041            protected ConfigurationRegistry registry;
042            protected SymbolTableHandler symbolTableHandler;
043            protected long startTime;
044            protected long endTime;
045            
046            public void initialize(int containerIndex, DataFormatInstance dataFormatInstance, ConfigurationDir configDir, int mode) throws MaltChainedException {
047                    this.optionContainerIndex = containerIndex;
048                    this.mode = mode;
049                    setConfigurationDir(configDir);
050                    startTime = System.currentTimeMillis();
051                    registry = new ConfigurationRegistry();
052    
053                    symbolTableHandler = dataFormatInstance.getSymbolTables();
054                    configLogger = initConfigLogger(getOptionValue("config", "logfile").toString(), getOptionValue("config", "logging").toString());
055                    if (mode == SingleMalt.LEARN) {
056                            checkOptionDependency();
057    //                      initDecisionSettings();
058                    } else if (mode == SingleMalt.PARSE) {
059                            
060                    } 
061                    registry.put(org.maltparser.core.symbol.SymbolTableHandler.class, getSymbolTables());
062                    registry.put(org.maltparser.core.io.dataformat.DataFormatInstance.class, dataFormatInstance);
063                    registry.put(org.maltparser.parser.DependencyParserConfig.class, this);
064                    initParsingAlgorithm(); 
065                    initGuide();
066            }
067            
068            
069            /**
070             * Initialize the parsing algorithm
071             * 
072             * @throws MaltChainedException
073             */
074            protected void initParsingAlgorithm() throws MaltChainedException {
075                    if (((Boolean)getOptionValue("malt0.4", "behavior")).booleanValue() == true && getOptionValueString("singlemalt", "parsing_algorithm").equals("nivreeager")) {
076                            this.parsingAlgorithm = new NivreEagerMalt04(this);
077                    } else if (((Boolean)getOptionValue("malt0.4", "behavior")).booleanValue() == true && getOptionValueString("singlemalt", "parsing_algorithm").equals("nivrestandard")) {
078                            this.parsingAlgorithm = new NivreStandardMalt04(this);
079                    } else {
080                            Class<?> clazz = (Class<?>)getOptionValue("singlemalt", "parsing_algorithm");
081            
082                            Class<?>[] argTypes = { org.maltparser.parser.SingleMalt.class };
083                            Object[] arguments = new Object[1];
084                            arguments[0] = this;
085                            if (getConfigLogger().isInfoEnabled()) {
086                                    getConfigLogger().info("Initialize the parsing algorithm...\n");
087                            }
088                            try {   
089                                    Constructor<?> constructor = clazz.getConstructor(argTypes);
090                                    this.parsingAlgorithm = (ParsingAlgorithm)constructor.newInstance(arguments);
091                            } catch (NoSuchMethodException e) {
092                                    throw new ConfigurationException("The parsing algorithm '"+clazz.getName()+"' cannot be initialized. ", e);
093                            } catch (InstantiationException e) {
094                                    throw new ConfigurationException("The parsing algorithm '"+clazz.getName()+"' cannot be initialized. ", e);
095                            } catch (IllegalAccessException e) {
096                                    throw new ConfigurationException("The parsing algorithm '"+clazz.getName()+"' cannot be initialized. ", e);
097                            } catch (InvocationTargetException e) {
098                                    throw new ConfigurationException("The parsing algorithm '"+clazz.getName()+"' cannot be initialized. ", e);
099                            }
100                    }
101                    registry.put(org.maltparser.parser.algorithm.ParsingAlgorithm.class, parsingAlgorithm);
102            }
103            
104            public void initGuide() throws MaltChainedException {
105                    Class<?> clazz = (Class<?>)getOptionValue("singlemalt", "guide_model");
106    
107                    Class<?>[] argTypes = { org.maltparser.parser.DependencyParserConfig.class, org.maltparser.parser.history.GuideHistory.class, org.maltparser.parser.guide.Guide.GuideMode.class };
108                    Object[] arguments = new Object[3];
109                    arguments[0] = this;
110                    arguments[1] = (GuideHistory)parsingAlgorithm.getHistory();
111                    if (mode == LEARN) {
112                            arguments[2] = Guide.GuideMode.TRAIN;
113                    } else if (mode == PARSE) {
114                            arguments[2] = Guide.GuideMode.CLASSIFY;
115                    }
116                    
117                    if (configLogger.isInfoEnabled()) {
118                            configLogger.info("Initialize the guide model...\n");
119                    }
120                    try {   
121                            Constructor<?> constructor = clazz.getConstructor(argTypes);
122                            this.guide = (Guide)constructor.newInstance(arguments);
123                    } catch (NoSuchMethodException e) {
124                            throw new ConfigurationException("The guide model '"+clazz.getName()+"' cannot be initialized. ", e);
125                    } catch (InstantiationException e) {
126                            throw new ConfigurationException("The guide model '"+clazz.getName()+"' cannot be initialized. ", e);
127                    } catch (IllegalAccessException e) {
128                            throw new ConfigurationException("The guide model '"+clazz.getName()+"' cannot be initialized. ", e);
129                    } catch (InvocationTargetException e) {
130                            throw new ConfigurationException("The guide model '"+clazz.getName()+"' cannot be initialized. ", e);
131                    }
132            }
133            
134            public void process(Object[] arguments) throws MaltChainedException {
135                    if (mode == LEARN) {
136                            if (arguments.length < 2 || !(arguments[0] instanceof DependencyStructure) || !(arguments[1] instanceof DependencyStructure)) {
137                                    throw new MaltChainedException("The single malt learn task must be supplied with at least two dependency structures. ");
138                            }
139                            DependencyStructure systemGraph = (DependencyStructure)arguments[0];
140                            DependencyStructure goldGraph = (DependencyStructure)arguments[1];
141                            if (systemGraph.hasTokens()) {
142                                    getGuide().finalizeSentence(getParsingAlgorithm().oracleParse(goldGraph, systemGraph));
143                            }
144                    } else if (mode == PARSE) {
145                            if (arguments.length < 1 || !(arguments[0] instanceof DependencyStructure)) {
146                                    throw new MaltChainedException("The single malt parse task must be supplied with at least one input terminal structure and one output dependency structure. ");
147                            }
148                            DependencyStructure processGraph = (DependencyStructure)arguments[0];
149                            if (processGraph.hasTokens()) {
150                                    getParsingAlgorithm().parse(processGraph);
151                            }
152                    }
153            }
154            
155            public void parse(DependencyStructure graph) throws MaltChainedException {
156                    if (graph.hasTokens()) {
157                            getParsingAlgorithm().parse(graph);
158                    }
159            }
160            
161            public void oracleParse(DependencyStructure goldGraph, DependencyStructure oracleGraph) throws MaltChainedException {
162                    if (oracleGraph.hasTokens()) {
163                            getGuide().finalizeSentence(getParsingAlgorithm().oracleParse(goldGraph, oracleGraph));
164                    }
165            }
166            
167            public void terminate(Object[] arguments) throws MaltChainedException {
168                    if (guide != null) {
169                            guide.terminate();
170                    }
171                    if (mode == LEARN) {
172                            endTime = System.currentTimeMillis();
173                            long elapsed = endTime - startTime;
174                            if (configLogger.isInfoEnabled()) {
175                                    configLogger.info("Learning time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
176                            }
177                    } else if (mode == PARSE) {
178                            endTime = System.currentTimeMillis();
179                            long elapsed = endTime - startTime;
180                            if (configLogger.isInfoEnabled()) {
181                                    configLogger.info("Parsing time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
182                            }
183                    }
184                    if (SystemLogger.logger() != configLogger && configLogger != null) {
185                            configLogger.removeAllAppenders();
186                    }
187            }
188            
189            /**
190             * Initialize the configuration logger
191             * 
192             * @return the configuration logger
193             * @throws MaltChainedException
194             */
195            public Logger initConfigLogger(String logfile, String level) throws MaltChainedException {
196                    if (logfile != null && logfile.length() > 0 && !logfile.equalsIgnoreCase("stdout") && configDir != null) {
197                            configLogger = Logger.getLogger(logfile);
198                            FileAppender fileAppender = null;
199                            try {
200                                    fileAppender = new FileAppender(new PatternLayout("%m"),configDir.getWorkingDirectory().getPath()+File.separator+logfile, true);
201                            } catch(IOException e) {
202                                    throw new ConfigurationException("It is not possible to create a configuration log file. ", e);
203                            }
204                            fileAppender.setThreshold(Level.toLevel(level, Level.INFO));
205                            configLogger.addAppender(fileAppender);
206                            configLogger.setLevel(Level.toLevel(level, Level.INFO));        
207                    } else {
208                            configLogger = SystemLogger.logger();
209                    }
210    
211                    return configLogger;
212            }
213            
214            public Logger getConfigLogger() {
215                    return configLogger;
216            }
217    
218            public void setConfigLogger(Logger logger) {
219                    configLogger = logger;
220            }
221            
222            public ConfigurationDir getConfigurationDir() {
223                    return configDir;
224            }
225            
226            public void setConfigurationDir(ConfigurationDir configDir) {
227                    this.configDir = configDir;
228            }
229            
230            public int getMode() {
231                    return mode;
232            }
233            
234            public ConfigurationRegistry getRegistry() {
235                    return registry;
236            }
237    
238            public void setRegistry(ConfigurationRegistry registry) {
239                    this.registry = registry;
240            }
241    
242            public Object getOptionValue(String optiongroup, String optionname) throws MaltChainedException {
243                    return OptionManager.instance().getOptionValue(optionContainerIndex, optiongroup, optionname);
244            }
245            
246            public String getOptionValueString(String optiongroup, String optionname) throws MaltChainedException {
247                    return OptionManager.instance().getOptionValueString(optionContainerIndex, optiongroup, optionname);
248            }
249            
250            public OptionManager getOptionManager() throws MaltChainedException {
251                    return OptionManager.instance();
252            }
253            /******************************** MaltParserConfiguration specific  ********************************/
254            
255            /**
256             * Returns the list of symbol tables
257             * 
258             * @return the list of symbol tables
259             */
260            public SymbolTableHandler getSymbolTables() {
261                    return symbolTableHandler;
262            }
263            
264            /**
265             * Returns the parsing algorithm in use
266             * 
267             * @return the parsing algorithm in use
268             */
269            public ParsingAlgorithm getParsingAlgorithm() {
270                    return parsingAlgorithm;
271            }
272            
273            /**
274             * Returns the guide
275             * 
276             * @return the guide
277             */
278            public Guide getGuide() {
279                    return guide;
280            }
281            
282            public void checkOptionDependency() throws MaltChainedException {
283                    try {
284                            configDir.getInfoFileWriter().write("\nDEPENDENCIES\n");
285                            if ((Boolean)getOptionValue("malt0.4", "behavior") == true) {
286                                    if (!getOptionValueString("singlemalt", "null_value").equals("rootlabel")) {
287                                            OptionManager.instance().overloadOptionValue(optionContainerIndex, "singlemalt", "null_value", "rootlabel");
288                                            configDir.getInfoFileWriter().write("--singlemalt-null_value (-nv)     rootlabel\n");
289                                            configLogger.warn("Option --malt0.4-behavior = true and --singlemalt-null_value != 'rootlabel'. Option --singlemalt-null_value is overloaded with value 'rootlabel'\n");
290                                    }
291                                    if (getOptionValue("malt0.4", "depset").toString().equals("")) {                                
292                                            configLogger.warn("Option --malt0.4-behavior = true and option --malt0.4-depset has no value. These combination will probably not reproduce the behavior of MaltParser 0.4 (C-impl)\n");
293                                    }
294                                    if (getOptionValue("malt0.4", "posset").toString().equals("")) {                                
295                                            configLogger.warn("Option --malt0.4-behavior = true and option --malt0.4-posset has no value. These combination will probably not reproduce the behavior of MaltParser 0.4 (C-impl)\n");
296                                    }
297                                    if (getOptionValue("malt0.4", "cposset").toString().equals("")) {                               
298                                            configLogger.warn("Option --malt0.4-behavior = true and option --malt0.4-cposset has no value. These combination will probably not reproduce the behavior of MaltParser 0.4 (C-impl)\n");
299                                    }
300                                    if (!getOptionValue("guide", "kbest").toString().equals("1")) {
301                                            OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "kbest", "1");
302                                            configDir.getInfoFileWriter().write("--guide-kbest (  -k)                    1\n");
303                                            configLogger.warn("Option --malt0.4-behavior = true and --guide-kbest != '1'. Option --guide-kbest is overloaded with value '1'\n");
304                                    }
305                            }
306                            if (getOptionValue("guide", "features").toString().equals("")) {
307                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", getOptionValueString("singlemalt", "parsing_algorithm"));
308                                    configDir.getInfoFileWriter().write("--guide-features (  -F)                 "+getOptionValue("guide", "features").toString()+"\n");
309                            } else {
310                                    configDir.copyToConfig(getOptionValue("guide", "features").toString());
311                            }
312                            if (getOptionValue("guide", "data_split_column").toString().equals("") && !getOptionValue("guide", "data_split_structure").toString().equals("")) {
313                                    configLogger.warn("Option --guide-data_split_column = '' and --guide-data_split_structure != ''. Option --guide-data_split_structure is overloaded with '', this will cause the parser to induce a single model.\n ");
314                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_structure", "");
315                                    configDir.getInfoFileWriter().write("--guide-data_split_structure (  -s)\n");
316                            }
317                            if (!getOptionValue("guide", "data_split_column").toString().equals("") && getOptionValue("guide", "data_split_structure").toString().equals("")) {
318                                    configLogger.warn("Option --guide-data_split_column != '' and --guide-data_split_structure = ''. Option --guide-data_split_column is overloaded with '', this will cause the parser to induce a single model.\n");
319                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_column", "");
320                                    configDir.getInfoFileWriter().write("--guide-data_split_column (  -d)\n");
321                            }
322    //                      if (!getOptionValue("input", "format").toString().equals(getOptionValue("output", "format").toString())) {
323    //                              OptionManager.instance().overloadOptionValue(containerIndex, "output", "format", getOptionValue("input", "format").toString());
324    //                              configDir.getInfoFileWriter().write("--output-format (  -of)                 "+getOptionValue("input", "format").toString()+"\n");
325    //                      }
326                            // decision settings
327    
328                            String decisionSettings = getOptionValue("guide", "decision_settings").toString().trim();
329                            String markingStrategy = getOptionValue("pproj", "marking_strategy").toString().trim();
330                            String coveredRoot = getOptionValue("pproj", "covered_root").toString().trim();
331                            StringBuilder newDecisionSettings = new StringBuilder();
332                            if ((Boolean)getOptionValue("malt0.4", "behavior") == true) {
333                                    decisionSettings = "T.TRANS+A.DEPREL";
334                            }
335                            if (decisionSettings == null || decisionSettings.length() < 1 || decisionSettings.equals("default")) {
336                                    decisionSettings = "T.TRANS+A.DEPREL";
337                            } else {
338                                    decisionSettings = decisionSettings.toUpperCase();
339                            }
340                            
341                            if (markingStrategy.equalsIgnoreCase("head") || markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
342                                    if (!Pattern.matches(".*A\\.PPLIFTED.*", decisionSettings)) {
343                                            newDecisionSettings.append("+A.PPLIFTED");
344                                    }
345                            }
346                            if (markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
347                                    if (!Pattern.matches(".*A\\.PPPATH.*", decisionSettings)) {
348                                            newDecisionSettings.append("+A.PPPATH");
349                                    }
350                            }
351                            if (!coveredRoot.equalsIgnoreCase("none") && !Pattern.matches(".*A\\.PPCOVERED.*", decisionSettings)) {
352                                    newDecisionSettings.append("+A.PPCOVERED");
353                            }
354                            if (!getOptionValue("guide", "decision_settings").toString().equals(decisionSettings) || newDecisionSettings.length() > 0) {
355                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "decision_settings", decisionSettings+newDecisionSettings.toString());
356                                    configDir.getInfoFileWriter().write("--guide-decision_settings (  -gds)                 "+getOptionValue("guide", "decision_settings").toString()+"\n");
357                            }
358                            
359                            configDir.getInfoFileWriter().flush();
360                    } catch (IOException e) {
361                            throw new ConfigurationException("Could not write to the configuration information file. ", e);
362                    }
363            }
364            
365            /******************************** Guidable interface ********************************/
366            
367            /**
368             * This method is used during learning. Currently, 
369             * the MaltParserConfiguration redirect the instance to the guide. 
370             * Maybe in the future this method will
371             * be re-implemented to add some interesting things or maybe not.
372             * 
373             * @param action
374             * @throws MaltChainedException
375             */
376            public void setInstance(GuideUserAction action) throws MaltChainedException {
377                    if (mode != SingleMalt.LEARN) {
378                            throw new ConfigurationException("It is only possible to set an instance during learning. ");
379                    }
380                    try {
381    //                      if (diagnostics == true && diaLogger != null) {
382    //                              SingleDecision singleDecision;
383    //                              if (((GuideDecision)action) instanceof SingleDecision) {
384    //                                      singleDecision = (SingleDecision)((GuideDecision)action);
385    //                                      if (singleDecision.getDecisionCode() >= 0) {
386    //                                              diaLogger.info(singleDecision.getDecisionSymbol());
387    //                                              diaLogger.info("\n");
388    //                                      }
389    //                              } else {
390    //                                      for (int i = 0; i < ((MultipleDecision)((GuideDecision)action)).numberOfDecisions(); i++) {
391    //                                              singleDecision = ((MultipleDecision)((GuideDecision)action)).getSingleDecision(i);
392    //                                              if (singleDecision.getDecisionCode() >= 0) {
393    //                                                      diaLogger.info(singleDecision.getDecisionSymbol());
394    //                                                      diaLogger.info("\t");
395    //                                              }
396    //                                      }
397    //                                      diaLogger.info("\n");
398    //                              }
399    //                      }
400                            
401                            guide.addInstance((GuideDecision)action);
402                    } catch (NullPointerException e) {
403                            throw new ConfigurationException("The guide cannot be found. ", e);
404                    }
405            }
406    
407            /**
408             * This method is used during parsing. Currently, 
409             * the MaltParserConfiguration redirect the request to the guide. 
410             * Maybe in the future this method will
411             * be re-implemented to add some interesting things or maybe not.
412             * 
413             * @throws MaltChainedException
414             */
415            public boolean predictFromKBestList(GuideUserAction action) throws MaltChainedException {
416                    try {
417                            return guide.predictFromKBestList((GuideDecision)action);
418                    } catch (NullPointerException e) {
419                            throw new ConfigurationException("The guide cannot be found. ", e);
420                    }
421            }
422            
423            /**
424             * This method is used during parsing. Currently, 
425             * the MaltParserConfiguration redirect the request to the guide. 
426             * Maybe in the future this method will
427             * be re-implemented to add some interesting things or maybe not.
428             * 
429             * @throws MaltChainedException
430             */
431            public void predict(GuideUserAction action) throws MaltChainedException {
432                    try {
433                            guide.predict((GuideDecision)action);
434                    } catch (NullPointerException e) {
435                            throw new ConfigurationException("The guide cannot be found. ", e);
436                    }
437            }
438    
439    }