001    package org.maltparser.parser;
002    
003    import java.io.File;
004    import java.io.IOException;
005    import java.util.Formatter;
006    import java.util.regex.Pattern;
007    
008    import org.apache.log4j.FileAppender;
009    import org.apache.log4j.Level;
010    import org.apache.log4j.Logger;
011    import org.apache.log4j.PatternLayout;
012    import org.maltparser.core.config.ConfigurationDir;
013    import org.maltparser.core.config.ConfigurationException;
014    import org.maltparser.core.config.ConfigurationRegistry;
015    import org.maltparser.core.exception.MaltChainedException;
016    import org.maltparser.core.helper.SystemLogger;
017    import org.maltparser.core.io.dataformat.DataFormatInstance;
018    import org.maltparser.core.options.OptionManager;
019    import org.maltparser.core.symbol.SymbolTableHandler;
020    import org.maltparser.core.syntaxgraph.DependencyStructure;
021    import org.maltparser.parser.guide.ClassifierGuide;
022    
023    /**
024     * @author Johan Hall
025     *
026     */
027    public class SingleMalt implements DependencyParserConfig {
028            public static final int LEARN = 0;
029            public static final int PARSE = 1;
030            protected ConfigurationDir configDir;
031            protected Logger configLogger;
032            protected int optionContainerIndex;
033            protected Algorithm parsingAlgorithm = null;
034            protected int mode;
035            protected ConfigurationRegistry registry;
036            protected SymbolTableHandler symbolTableHandler;
037            protected long startTime;
038            protected long endTime;
039            protected int nIterations = 0;
040            
041            public void initialize(int containerIndex, DataFormatInstance dataFormatInstance, ConfigurationDir configDir, int mode) throws MaltChainedException {
042    
043                    this.optionContainerIndex = containerIndex;
044                    this.mode = mode;
045                    setConfigurationDir(configDir);
046                    startTime = System.currentTimeMillis();
047                    configLogger = initConfigLogger(getOptionValue("config", "logfile").toString(), getOptionValue("config", "logging").toString());
048                    registry = new ConfigurationRegistry();
049                    symbolTableHandler = dataFormatInstance.getSymbolTables();
050    
051                    if (mode == SingleMalt.LEARN) {
052                            checkOptionDependency();
053                    }
054                    registry.put(org.maltparser.core.symbol.SymbolTableHandler.class, getSymbolTables());
055                    registry.put(org.maltparser.core.io.dataformat.DataFormatInstance.class, dataFormatInstance);
056    //              registry.put(org.maltparser.parser.DependencyParserConfig.class, this);
057                    initParsingAlgorithm(); 
058    
059            }
060            
061            
062            /**
063             * Initialize the parsing algorithm
064             * 
065             * @throws MaltChainedException
066             */
067            protected void initParsingAlgorithm() throws MaltChainedException {
068                    if (mode == LEARN) {
069                            parsingAlgorithm = new BatchTrainer(this);
070                    } else if (mode == PARSE) {
071                            parsingAlgorithm = new DeterministicParser(this);
072                    }
073            }
074            
075            public void addRegistry(Class<?> clazz, Object o) {
076                    registry.put(clazz, o);
077            }
078            
079            public void process(Object[] arguments) throws MaltChainedException {
080                    if (mode == LEARN) {
081                            if (arguments.length < 2 || !(arguments[0] instanceof DependencyStructure) || !(arguments[1] instanceof DependencyStructure)) {
082                                    throw new MaltChainedException("The single malt learn task must be supplied with at least two dependency structures. ");
083                            }
084                            DependencyStructure systemGraph = (DependencyStructure)arguments[0];
085                            DependencyStructure goldGraph = (DependencyStructure)arguments[1];
086                            if (systemGraph.hasTokens() && getGuide() != null) {
087                                    getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, systemGraph));
088                            }
089                    } else if (mode == PARSE) {
090                            if (arguments.length < 1 || !(arguments[0] instanceof DependencyStructure)) {
091                                    throw new MaltChainedException("The single malt parse task must be supplied with at least one input terminal structure and one output dependency structure. ");
092                            }
093                            DependencyStructure processGraph = (DependencyStructure)arguments[0];
094                            if (processGraph.hasTokens()) {
095                                    ((Parser)getAlgorithm()).parse(processGraph);
096                            }
097                    }
098            }
099            
100            public void parse(DependencyStructure graph) throws MaltChainedException {
101                    if (graph.hasTokens()) {
102                            ((Parser)getAlgorithm()).parse(graph);
103                    }
104            }
105            
106            public void oracleParse(DependencyStructure goldGraph, DependencyStructure oracleGraph) throws MaltChainedException {
107                    if (oracleGraph.hasTokens()) {
108                            if (getGuide() != null) {
109                                    getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, oracleGraph));
110                            } else {
111                                    ((Trainer)getAlgorithm()).parse(goldGraph, oracleGraph);
112                            }
113                    }
114            }
115            
116            public void train() throws MaltChainedException {
117                    if (getGuide() == null) {
118                            ((Trainer)getAlgorithm()).train();
119                    }
120            }
121            
122            public void terminate(Object[] arguments) throws MaltChainedException {
123    //              if (getAlgorithm() instanceof Trainer) {
124    //                      ((Trainer)getAlgorithm()).terminate();
125    //              }
126                    getAlgorithm().terminate();
127                    if (getGuide() != null) {
128                            getGuide().terminate();
129                    }
130                    if (mode == LEARN) {
131                            endTime = System.currentTimeMillis();
132                            long elapsed = endTime - startTime;
133                            if (configLogger.isInfoEnabled()) {
134                                    configLogger.info("Learning time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
135                            }
136                    } else if (mode == PARSE) {
137                            endTime = System.currentTimeMillis();
138                            long elapsed = endTime - startTime;
139                            if (configLogger.isInfoEnabled()) {
140                                    configLogger.info("Parsing time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
141                            }
142                    }
143                    if (SystemLogger.logger() != configLogger && configLogger != null) {
144                            configLogger.removeAllAppenders();
145                    }
146            }
147            
148            /**
149             * Initialize the configuration logger
150             * 
151             * @return the configuration logger
152             * @throws MaltChainedException
153             */
154            public Logger initConfigLogger(String logfile, String level) throws MaltChainedException {
155                    if (logfile != null && logfile.length() > 0 && !logfile.equalsIgnoreCase("stdout") && configDir != null) {
156                            configLogger = Logger.getLogger(logfile);
157                            FileAppender fileAppender = null;
158                            try {
159                                    fileAppender = new FileAppender(new PatternLayout("%m"),configDir.getWorkingDirectory().getPath()+File.separator+logfile, true);
160                            } catch(IOException e) {
161                                    throw new ConfigurationException("It is not possible to create a configuration log file. ", e);
162                            }
163                            fileAppender.setThreshold(Level.toLevel(level, Level.INFO));
164                            configLogger.addAppender(fileAppender);
165                            configLogger.setLevel(Level.toLevel(level, Level.INFO));        
166                    } else {
167                            configLogger = SystemLogger.logger();
168                    }
169    
170                    return configLogger;
171            }
172            
173            public Logger getConfigLogger() {
174                    return configLogger;
175            }
176    
177            public void setConfigLogger(Logger logger) {
178                    configLogger = logger;
179            }
180            
181            public ConfigurationDir getConfigurationDir() {
182                    return configDir;
183            }
184            
185            public void setConfigurationDir(ConfigurationDir configDir) {
186                    this.configDir = configDir;
187            }
188            
189            public int getMode() {
190                    return mode;
191            }
192            
193            public ConfigurationRegistry getRegistry() {
194                    return registry;
195            }
196    
197            public void setRegistry(ConfigurationRegistry registry) {
198                    this.registry = registry;
199            }
200    
201            public Object getOptionValue(String optiongroup, String optionname) throws MaltChainedException {
202                    return OptionManager.instance().getOptionValue(optionContainerIndex, optiongroup, optionname);
203            }
204            
205            public String getOptionValueString(String optiongroup, String optionname) throws MaltChainedException {
206                    return OptionManager.instance().getOptionValueString(optionContainerIndex, optiongroup, optionname);
207            }
208            
209            public OptionManager getOptionManager() throws MaltChainedException {
210                    return OptionManager.instance();
211            }
212            /******************************** MaltParserConfiguration specific  ********************************/
213            
214            /**
215             * Returns the list of symbol tables
216             * 
217             * @return the list of symbol tables
218             */
219            public SymbolTableHandler getSymbolTables() {
220                    return symbolTableHandler;
221            }
222            
223            public Algorithm getAlgorithm() {
224                    return parsingAlgorithm;
225            }
226            /**
227             * Returns the guide
228             * 
229             * @return the guide
230             */
231            public ClassifierGuide getGuide() {
232                    return parsingAlgorithm.getGuide();
233            }
234            
235            public void checkOptionDependency() throws MaltChainedException {
236                    try {
237                            if (configDir.getInfoFileWriter() != null) {
238                                    configDir.getInfoFileWriter().write("\nDEPENDENCIES\n");
239                            }
240    //                      if ((Boolean)getOptionValue("malt0.4", "behavior") == true) {
241    //                              if (!getOptionValueString("singlemalt", "null_value").equals("rootlabel")) {
242    //                                      OptionManager.instance().overloadOptionValue(optionContainerIndex, "singlemalt", "null_value", "rootlabel");
243    //                                      if (configDir.getInfoFileWriter() != null) {
244    //                                              configDir.getInfoFileWriter().write("--singlemalt-null_value (-nv)     rootlabel\n");
245    //                                      }
246    //                                      configLogger.warn("Option --malt0.4-behavior = true and --singlemalt-null_value != 'rootlabel'. Option --singlemalt-null_value is overloaded with value 'rootlabel'\n");
247    //                              }
248    //                              if (getOptionValue("malt0.4", "depset").toString().equals("")) {                                
249    //                                      configLogger.warn("Option --malt0.4-behavior = true and option --malt0.4-depset has no value. These combination will probably not reproduce the behavior of MaltParser 0.4 (C-impl)\n");
250    //                              }
251    //                              if (getOptionValue("malt0.4", "posset").toString().equals("")) {                                
252    //                                      configLogger.warn("Option --malt0.4-behavior = true and option --malt0.4-posset has no value. These combination will probably not reproduce the behavior of MaltParser 0.4 (C-impl)\n");
253    //                              }
254    //                              if (getOptionValue("malt0.4", "cposset").toString().equals("")) {                               
255    //                                      configLogger.warn("Option --malt0.4-behavior = true and option --malt0.4-cposset has no value. These combination will probably not reproduce the behavior of MaltParser 0.4 (C-impl)\n");
256    //                              }
257    //                              if (!getOptionValue("guide", "kbest").toString().equals("1")) {
258    //                                      OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "kbest", "1");
259    //                                      if (configDir.getInfoFileWriter() != null) {
260    //                                              configDir.getInfoFileWriter().write("--guide-kbest (  -k)                    1\n");
261    //                                      }
262    //                                      configLogger.warn("Option --malt0.4-behavior = true and --guide-kbest != '1'. Option --guide-kbest is overloaded with value '1'\n");
263    //                              }
264    //                      }
265                            if (getOptionValue("guide", "features").toString().equals("")) {
266                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", getOptionValueString("singlemalt", "parsing_algorithm"));
267                                    if (configDir.getInfoFileWriter() != null) {
268                                            configDir.getInfoFileWriter().write("--guide-features (  -F)                 "+getOptionValue("guide", "features").toString()+"\n");
269                                    }
270                            } else {
271                                    configDir.copyToConfig(getOptionValue("guide", "features").toString());
272                            }
273                            if (getOptionValue("guide", "data_split_column").toString().equals("") && !getOptionValue("guide", "data_split_structure").toString().equals("")) {
274                                    configLogger.warn("Option --guide-data_split_column = '' and --guide-data_split_structure != ''. Option --guide-data_split_structure is overloaded with '', this will cause the parser to induce a single model.\n ");
275                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_structure", "");
276                                    if (configDir.getInfoFileWriter() != null) {
277                                            configDir.getInfoFileWriter().write("--guide-data_split_structure (  -s)\n");
278                                    }
279                            }
280                            if (!getOptionValue("guide", "data_split_column").toString().equals("") && getOptionValue("guide", "data_split_structure").toString().equals("")) {
281                                    configLogger.warn("Option --guide-data_split_column != '' and --guide-data_split_structure = ''. Option --guide-data_split_column is overloaded with '', this will cause the parser to induce a single model.\n");
282                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_column", "");
283                                    if (configDir.getInfoFileWriter() != null) {
284                                            configDir.getInfoFileWriter().write("--guide-data_split_column (  -d)\n");
285                                    }
286                            }
287                            
288                            String decisionSettings = getOptionValue("guide", "decision_settings").toString().trim();
289                            String markingStrategy = getOptionValue("pproj", "marking_strategy").toString().trim();
290                            String coveredRoot = getOptionValue("pproj", "covered_root").toString().trim();
291                            StringBuilder newDecisionSettings = new StringBuilder();
292    //                      if ((Boolean)getOptionValue("malt0.4", "behavior") == true) {
293    //                              decisionSettings = "T.TRANS+A.DEPREL";
294    //                      }
295                            if (decisionSettings == null || decisionSettings.length() < 1 || decisionSettings.equals("default")) {
296                                    decisionSettings = "T.TRANS+A.DEPREL";
297                            } else {
298                                    decisionSettings = decisionSettings.toUpperCase();
299                            }
300                            
301                            if (markingStrategy.equalsIgnoreCase("head") || markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
302                                    if (!Pattern.matches(".*A\\.PPLIFTED.*", decisionSettings)) {
303                                            newDecisionSettings.append("+A.PPLIFTED");
304                                    }
305                            }
306                            if (markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
307                                    if (!Pattern.matches(".*A\\.PPPATH.*", decisionSettings)) {
308                                            newDecisionSettings.append("+A.PPPATH");
309                                    }
310                            }
311                            if (!coveredRoot.equalsIgnoreCase("none") && !Pattern.matches(".*A\\.PPCOVERED.*", decisionSettings)) {
312                                    newDecisionSettings.append("+A.PPCOVERED");
313                            }
314                            if (!getOptionValue("guide", "decision_settings").toString().equals(decisionSettings) || newDecisionSettings.length() > 0) {
315                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "decision_settings", decisionSettings+newDecisionSettings.toString());
316                                    if (configDir.getInfoFileWriter() != null) {
317                                            configDir.getInfoFileWriter().write("--guide-decision_settings (  -gds)                 "+getOptionValue("guide", "decision_settings").toString()+"\n");
318                                    }
319                            }
320                            if (configDir.getInfoFileWriter() != null) {
321                                    configDir.getInfoFileWriter().flush();
322                            }
323                    } catch (IOException e) {
324                            throw new ConfigurationException("Could not write to the configuration information file. ", e);
325                    }
326            }
327    }