001    package org.maltparser.parser;
002    
003    import java.io.File;
004    import java.io.IOException;
005    import java.net.URL;
006    import java.util.Formatter;
007    import java.util.regex.Pattern;
008    
009    import org.apache.log4j.FileAppender;
010    import org.apache.log4j.Level;
011    import org.apache.log4j.Logger;
012    import org.apache.log4j.PatternLayout;
013    import org.maltparser.core.config.ConfigurationDir;
014    import org.maltparser.core.config.ConfigurationException;
015    import org.maltparser.core.config.ConfigurationRegistry;
016    import org.maltparser.core.exception.MaltChainedException;
017    import org.maltparser.core.helper.SystemLogger;
018    import org.maltparser.core.helper.URLFinder;
019    import org.maltparser.core.io.dataformat.DataFormatInstance;
020    import org.maltparser.core.options.OptionManager;
021    import org.maltparser.core.propagation.PropagationManager;
022    import org.maltparser.core.symbol.SymbolTableHandler;
023    import org.maltparser.core.syntaxgraph.DependencyStructure;
024    import org.maltparser.parser.guide.ClassifierGuide;
025    
026    /**
027     * @author Johan Hall
028     *
029     */
030    public class SingleMalt implements DependencyParserConfig {
031            public static final int LEARN = 0;
032            public static final int PARSE = 1;
033            protected ConfigurationDir configDir;
034            protected Logger configLogger;
035            protected int optionContainerIndex;
036            protected Algorithm parsingAlgorithm = null;
037            protected int mode;
038            protected ConfigurationRegistry registry;
039            protected SymbolTableHandler symbolTableHandler;
040            protected DataFormatInstance dataFormatInstance;
041            protected long startTime;
042            protected long endTime;
043            protected int nIterations = 0;
044            protected PropagationManager propagationManager;
045            private Parser parser;
046            private Trainer trainer;
047            
048            public void initialize(int containerIndex, DataFormatInstance dataFormatInstance, ConfigurationDir configDir, int mode) throws MaltChainedException {
049    
050                    this.optionContainerIndex = containerIndex;
051                    this.mode = mode;
052                    setConfigurationDir(configDir);
053                    startTime = System.currentTimeMillis();
054                    configLogger = initConfigLogger(getOptionValue("config", "logfile").toString(), getOptionValue("config", "logging").toString());
055                    registry = new ConfigurationRegistry();
056                    this.dataFormatInstance = dataFormatInstance;
057                    symbolTableHandler = dataFormatInstance.getSymbolTables();
058    
059                    if (mode == SingleMalt.LEARN) {
060                            checkOptionDependency();
061                    }
062                    registry.put(org.maltparser.core.symbol.SymbolTableHandler.class, getSymbolTables());
063                    registry.put(org.maltparser.core.io.dataformat.DataFormatInstance.class, dataFormatInstance);
064    //              registry.put(org.maltparser.parser.DependencyParserConfig.class, this);
065                    initPropagation();
066                    initParsingAlgorithm(); 
067                    if (configLogger.isInfoEnabled()) {
068                            URL inputFormatURL = configDir.getInputFormatURL(); 
069                            URL outputFormatURL = configDir.getOutputFormatURL();
070                            if (inputFormatURL != null) {
071                                    if (outputFormatURL == null || outputFormatURL.toString().equals(inputFormatURL.toString())) {
072                                            int index = inputFormatURL.toString().indexOf('!');
073                                            if (index == -1) {
074                                                    configLogger.info("  Data Format          : "+inputFormatURL.toString()+"\n");
075                                            } else {
076                                                    configLogger.info("  Data Format          : "+inputFormatURL.toString().substring(index+1)+"\n");
077                                            }
078                                    } else {
079                                            int indexIn = inputFormatURL.toString().indexOf('!');
080                                            int indexOut = outputFormatURL.toString().indexOf('!');
081                                            if (indexIn == -1) {
082                                                    configLogger.info("  Input Data Format    : "+inputFormatURL.toString()+"\n");
083                                            } else {
084                                                    configLogger.info("  Input Data Format    : "+inputFormatURL.toString().substring(indexIn+1)+"\n");
085                                            }
086                                            if (indexOut == -1) {
087                                                    configLogger.info("  Output Data Format   : "+outputFormatURL.toString()+"\n");
088                                            } else {
089                                                    configLogger.info("  Output Data Format   : "+outputFormatURL.toString().substring(indexOut+1)+"\n");
090                                            }
091                                    }
092                            }
093                    }
094            }
095            
096            private void initPropagation()  throws MaltChainedException {
097                    String propagationSpecFileName = getOptionValue("singlemalt", "propagation").toString();
098                    if (propagationSpecFileName == null || propagationSpecFileName.length() == 0) {
099                            return;
100                    }
101                    propagationManager = new PropagationManager(configDir);
102                    if (mode == SingleMalt.LEARN) {
103                            propagationSpecFileName = configDir.copyToConfig(propagationSpecFileName);
104                            OptionManager.instance().overloadOptionValue(optionContainerIndex, "singlemalt", "propagation", propagationSpecFileName);
105                    }
106                    getConfigLogger().info("  Propagation          : " + propagationSpecFileName+"\n");
107                    propagationManager.loadSpecification(propagationSpecFileName);
108            }
109            
110            /**
111             * Initialize the parsing algorithm
112             * 
113             * @throws MaltChainedException
114             */
115            protected void initParsingAlgorithm() throws MaltChainedException {
116                    if (mode == LEARN) {
117                            parsingAlgorithm = trainer = new BatchTrainer(this);
118                    } else if (mode == PARSE) {
119                            parsingAlgorithm = parser = new DeterministicParser(this);
120                    }
121            }
122            
123            public void addRegistry(Class<?> clazz, Object o) {
124                    registry.put(clazz, o);
125            }
126            
127            public void process(Object[] arguments) throws MaltChainedException {
128                    if (mode == LEARN) {
129                            if (arguments.length < 2 || !(arguments[0] instanceof DependencyStructure) || !(arguments[1] instanceof DependencyStructure)) {
130                                    throw new MaltChainedException("The single malt learn task must be supplied with at least two dependency structures. ");
131                            }
132                            DependencyStructure systemGraph = (DependencyStructure)arguments[0];
133                            DependencyStructure goldGraph = (DependencyStructure)arguments[1];
134                            if (systemGraph.hasTokens() && getGuide() != null) {
135                                    getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, systemGraph));
136                            }
137                    } else if (mode == PARSE) {
138                            if (arguments.length < 1 || !(arguments[0] instanceof DependencyStructure)) {
139                                    throw new MaltChainedException("The single malt parse task must be supplied with at least one input terminal structure and one output dependency structure. ");
140                            }
141                            DependencyStructure processGraph = (DependencyStructure)arguments[0];
142                            if (processGraph.hasTokens()) {
143                                    parser.parse(processGraph);
144    //                              ((Parser)getAlgorithm()).parse(processGraph);
145                            }
146                    }
147            }
148            
149            public void parse(DependencyStructure graph) throws MaltChainedException {
150                    if (graph.hasTokens()) {
151    //                      ((Parser)getAlgorithm()).parse(graph);
152                            parser.parse(graph);
153                    }
154            }
155            
156            public void oracleParse(DependencyStructure goldGraph, DependencyStructure oracleGraph) throws MaltChainedException {
157                    if (oracleGraph.hasTokens()) {
158                            if (getGuide() != null) {
159                                    getGuide().finalizeSentence(trainer.parse(goldGraph, oracleGraph));
160                            } else {
161                                    trainer.parse(goldGraph, oracleGraph);
162                            }
163                    }
164            }
165            
166            public void train() throws MaltChainedException {
167                    if (getGuide() == null) {
168                            ((Trainer)getAlgorithm()).train();
169                    }
170            }
171            
172            public void terminate(Object[] arguments) throws MaltChainedException {
173    //              if (getAlgorithm() instanceof Trainer) {
174    //                      ((Trainer)getAlgorithm()).terminate();
175    //              }
176                    getAlgorithm().terminate();
177                    if (getGuide() != null) {
178                            getGuide().terminate();
179                    }
180                    if (mode == LEARN) {
181                            endTime = System.currentTimeMillis();
182                            long elapsed = endTime - startTime;
183                            if (configLogger.isInfoEnabled()) {
184                                    configLogger.info("Learning time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
185                            }
186                    } else if (mode == PARSE) {
187                            endTime = System.currentTimeMillis();
188                            long elapsed = endTime - startTime;
189                            if (configLogger.isInfoEnabled()) {
190                                    configLogger.info("Parsing time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
191                            }
192                    }
193                    if (SystemLogger.logger() != configLogger && configLogger != null) {
194                            configLogger.removeAllAppenders();
195                    }
196            }
197            
198            /**
199             * Initialize the configuration logger
200             * 
201             * @return the configuration logger
202             * @throws MaltChainedException
203             */
204            public Logger initConfigLogger(String logfile, String level) throws MaltChainedException {
205                    if (logfile != null && logfile.length() > 0 && !logfile.equalsIgnoreCase("stdout") && configDir != null) {
206                            configLogger = Logger.getLogger(logfile);
207                            FileAppender fileAppender = null;
208                            try {
209                                    fileAppender = new FileAppender(new PatternLayout("%m"),configDir.getWorkingDirectory().getPath()+File.separator+logfile, true);
210                            } catch(IOException e) {
211                                    throw new ConfigurationException("It is not possible to create a configuration log file. ", e);
212                            }
213                            fileAppender.setThreshold(Level.toLevel(level, Level.INFO));
214                            configLogger.addAppender(fileAppender);
215                            configLogger.setLevel(Level.toLevel(level, Level.INFO));        
216                    } else {
217                            configLogger = SystemLogger.logger();
218                    }
219    
220                    return configLogger;
221            }
222            
223            public Logger getConfigLogger() {
224                    return configLogger;
225            }
226    
227            public void setConfigLogger(Logger logger) {
228                    configLogger = logger;
229            }
230            
231            public ConfigurationDir getConfigurationDir() {
232                    return configDir;
233            }
234            
235            public void setConfigurationDir(ConfigurationDir configDir) {
236                    this.configDir = configDir;
237            }
238            
239            public int getMode() {
240                    return mode;
241            }
242            
243            public ConfigurationRegistry getRegistry() {
244                    return registry;
245            }
246    
247            public void setRegistry(ConfigurationRegistry registry) {
248                    this.registry = registry;
249            }
250    
251            public Object getOptionValue(String optiongroup, String optionname) throws MaltChainedException {
252                    return OptionManager.instance().getOptionValue(optionContainerIndex, optiongroup, optionname);
253            }
254            
255            public String getOptionValueString(String optiongroup, String optionname) throws MaltChainedException {
256                    return OptionManager.instance().getOptionValueString(optionContainerIndex, optiongroup, optionname);
257            }
258            
259            public OptionManager getOptionManager() throws MaltChainedException {
260                    return OptionManager.instance();
261            }
262            /******************************** MaltParserConfiguration specific  ********************************/
263            
264            /**
265             * Returns the list of symbol tables
266             * 
267             * @return the list of symbol tables
268             */
269            public SymbolTableHandler getSymbolTables() {
270                    return symbolTableHandler;
271            }
272            
273            public PropagationManager getPropagationManager() {
274                    return propagationManager;
275            }
276    
277            public Algorithm getAlgorithm() {
278                    return parsingAlgorithm;
279            }
280            /**
281             * Returns the guide
282             * 
283             * @return the guide
284             */
285            public ClassifierGuide getGuide() {
286                    return parsingAlgorithm.getGuide();
287            }
288            
289            public void checkOptionDependency() throws MaltChainedException {
290                    try {
291                            if (configDir.getInfoFileWriter() != null) {
292                                    configDir.getInfoFileWriter().write("\nDEPENDENCIES\n");
293                            }
294                            
295                            // Copy the feature model file into the configuration directory
296                            String featureModelFileName = getOptionValue("guide", "features").toString().trim();
297                            if (featureModelFileName.equals("")) {
298                                    // use default feature model depending on the selected parser algorithm
299                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", getOptionValueString("singlemalt", "parsing_algorithm"));
300                                    featureModelFileName = getOptionValue("guide", "features").toString().trim();
301                                    /* START: Temp fix during development of new liblinear and libsvm interface */
302                                    String learner = getOptionValueString("guide", "learner");
303                                    if (!learner.startsWith("lib")) {
304                                            learner = "lib"+learner;
305                                    }
306                                    /* END: Temp fix during development of new liblinear and libsvm interface */
307                                    featureModelFileName = featureModelFileName.replace("{learner}", learner);
308                                    final URLFinder f = new URLFinder();
309                                    featureModelFileName = configDir.copyToConfig(f.findURLinJars(featureModelFileName));
310                            } else {
311                                    featureModelFileName = configDir.copyToConfig(featureModelFileName);
312                            }
313                            OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", featureModelFileName);
314                            if (configDir.getInfoFileWriter() != null) {
315                                    configDir.getInfoFileWriter().write("--guide-features (  -F)                 "+getOptionValue("guide", "features").toString()+"\n");
316                            }
317    
318                            if (getOptionValue("guide", "data_split_column").toString().equals("") && !getOptionValue("guide", "data_split_structure").toString().equals("")) {
319                                    configLogger.warn("Option --guide-data_split_column = '' and --guide-data_split_structure != ''. Option --guide-data_split_structure is overloaded with '', this will cause the parser to induce a single model.\n ");
320                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_structure", "");
321                                    if (configDir.getInfoFileWriter() != null) {
322                                            configDir.getInfoFileWriter().write("--guide-data_split_structure (  -s)\n");
323                                    }
324                            }
325                            if (!getOptionValue("guide", "data_split_column").toString().equals("") && getOptionValue("guide", "data_split_structure").toString().equals("")) {
326                                    configLogger.warn("Option --guide-data_split_column != '' and --guide-data_split_structure = ''. Option --guide-data_split_column is overloaded with '', this will cause the parser to induce a single model.\n");
327                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_column", "");
328                                    if (configDir.getInfoFileWriter() != null) {
329                                            configDir.getInfoFileWriter().write("--guide-data_split_column (  -d)\n");
330                                    }
331                            }
332                            
333                            String decisionSettings = getOptionValue("guide", "decision_settings").toString().trim();
334                            String markingStrategy = getOptionValue("pproj", "marking_strategy").toString().trim();
335                            String coveredRoot = getOptionValue("pproj", "covered_root").toString().trim();
336                            StringBuilder newDecisionSettings = new StringBuilder();
337    
338                            if (decisionSettings == null || decisionSettings.length() < 1 || decisionSettings.equals("default")) {
339                                    decisionSettings = "T.TRANS+A.DEPREL";
340                            } else {
341                                    decisionSettings = decisionSettings.toUpperCase();
342                            }
343                            
344                            if (markingStrategy.equalsIgnoreCase("head") || markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
345                                    if (!Pattern.matches(".*A\\.PPLIFTED.*", decisionSettings)) {
346                                            newDecisionSettings.append("+A.PPLIFTED");
347                                    }
348                            }
349                            if (markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
350                                    if (!Pattern.matches(".*A\\.PPPATH.*", decisionSettings)) {
351                                            newDecisionSettings.append("+A.PPPATH");
352                                    }
353                            }
354                            if (!coveredRoot.equalsIgnoreCase("none") && !Pattern.matches(".*A\\.PPCOVERED.*", decisionSettings)) {
355                                    newDecisionSettings.append("+A.PPCOVERED");
356                            }
357                            if (!getOptionValue("guide", "decision_settings").toString().equals(decisionSettings) || newDecisionSettings.length() > 0) {
358                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "decision_settings", decisionSettings+newDecisionSettings.toString());
359                                    if (configDir.getInfoFileWriter() != null) {
360                                            configDir.getInfoFileWriter().write("--guide-decision_settings (  -gds)                 "+getOptionValue("guide", "decision_settings").toString()+"\n");
361                                    }
362                            }
363                            if (configDir.getInfoFileWriter() != null) {
364                                    configDir.getInfoFileWriter().flush();
365                            }
366                    } catch (IOException e) {
367                            throw new ConfigurationException("Could not write to the configuration information file. ", e);
368                    }
369            }
370    }