001    package org.maltparser.parser;
002    
003    import java.io.File;
004    import java.io.IOException;
005    import java.net.URL;
006    import java.util.Formatter;
007    import java.util.regex.Pattern;
008    
009    import org.apache.log4j.FileAppender;
010    import org.apache.log4j.Level;
011    import org.apache.log4j.Logger;
012    import org.apache.log4j.PatternLayout;
013    import org.maltparser.core.config.ConfigurationDir;
014    import org.maltparser.core.config.ConfigurationException;
015    import org.maltparser.core.config.ConfigurationRegistry;
016    import org.maltparser.core.exception.MaltChainedException;
017    import org.maltparser.core.helper.SystemLogger;
018    import org.maltparser.core.helper.URLFinder;
019    import org.maltparser.core.io.dataformat.DataFormatInstance;
020    import org.maltparser.core.options.OptionManager;
021    import org.maltparser.core.propagation.PropagationManager;
022    import org.maltparser.core.symbol.SymbolTableHandler;
023    import org.maltparser.core.syntaxgraph.DependencyStructure;
024    import org.maltparser.parser.guide.ClassifierGuide;
025    
026    /**
027     * @author Johan Hall
028     *
029     */
030    public class SingleMalt implements DependencyParserConfig {
031            public static final int LEARN = 0;
032            public static final int PARSE = 1;
033            protected ConfigurationDir configDir;
034            protected Logger configLogger;
035            protected int optionContainerIndex;
036            protected Algorithm parsingAlgorithm = null;
037            protected int mode;
038            protected ConfigurationRegistry registry;
039            protected SymbolTableHandler symbolTableHandler;
040            protected DataFormatInstance dataFormatInstance;
041            protected long startTime;
042            protected long endTime;
043            protected int nIterations = 0;
044            protected PropagationManager propagationManager;
045            
046            public void initialize(int containerIndex, DataFormatInstance dataFormatInstance, ConfigurationDir configDir, int mode) throws MaltChainedException {
047    
048                    this.optionContainerIndex = containerIndex;
049                    this.mode = mode;
050                    setConfigurationDir(configDir);
051                    startTime = System.currentTimeMillis();
052                    configLogger = initConfigLogger(getOptionValue("config", "logfile").toString(), getOptionValue("config", "logging").toString());
053                    registry = new ConfigurationRegistry();
054                    this.dataFormatInstance = dataFormatInstance;
055                    symbolTableHandler = dataFormatInstance.getSymbolTables();
056    
057                    if (mode == SingleMalt.LEARN) {
058                            checkOptionDependency();
059                    }
060                    registry.put(org.maltparser.core.symbol.SymbolTableHandler.class, getSymbolTables());
061                    registry.put(org.maltparser.core.io.dataformat.DataFormatInstance.class, dataFormatInstance);
062    //              registry.put(org.maltparser.parser.DependencyParserConfig.class, this);
063                    initPropagation();
064                    initParsingAlgorithm(); 
065                    if (configLogger.isInfoEnabled()) {
066                            URL inputFormatURL = configDir.getInputFormatURL(); 
067                            URL outputFormatURL = configDir.getOutputFormatURL();
068                            if (inputFormatURL != null) {
069                                    if (outputFormatURL == null || outputFormatURL.toString().equals(inputFormatURL.toString())) {
070                                            int index = inputFormatURL.toString().indexOf('!');
071                                            if (index == -1) {
072                                                    configLogger.info("  Data Format          : "+inputFormatURL.toString()+"\n");
073                                            } else {
074                                                    configLogger.info("  Data Format          : "+inputFormatURL.toString().substring(index+1)+"\n");
075                                            }
076                                    } else {
077                                            int indexIn = inputFormatURL.toString().indexOf('!');
078                                            int indexOut = outputFormatURL.toString().indexOf('!');
079                                            if (indexIn == -1) {
080                                                    configLogger.info("  Input Data Format    : "+inputFormatURL.toString()+"\n");
081                                            } else {
082                                                    configLogger.info("  Input Data Format    : "+inputFormatURL.toString().substring(indexIn+1)+"\n");
083                                            }
084                                            if (indexOut == -1) {
085                                                    configLogger.info("  Output Data Format   : "+outputFormatURL.toString()+"\n");
086                                            } else {
087                                                    configLogger.info("  Output Data Format   : "+outputFormatURL.toString().substring(indexOut+1)+"\n");
088                                            }
089                                    }
090                            }
091                    }
092            }
093            
094            private void initPropagation()  throws MaltChainedException {
095                    String propagationSpecFileName = getOptionValue("singlemalt", "propagation").toString();
096                    if (propagationSpecFileName == null || propagationSpecFileName.length() == 0) {
097                            return;
098                    }
099                    propagationManager = new PropagationManager(configDir);
100                    if (mode == SingleMalt.LEARN) {
101                            propagationSpecFileName = configDir.copyToConfig(propagationSpecFileName);
102                            OptionManager.instance().overloadOptionValue(optionContainerIndex, "singlemalt", "propagation", propagationSpecFileName);
103                    }
104                    getConfigLogger().info("  Propagation          : " + propagationSpecFileName+"\n");
105                    propagationManager.loadSpecification(propagationSpecFileName);
106            }
107            
108            /**
109             * Initialize the parsing algorithm
110             * 
111             * @throws MaltChainedException
112             */
113            protected void initParsingAlgorithm() throws MaltChainedException {
114                    if (mode == LEARN) {
115                            parsingAlgorithm = new BatchTrainer(this);
116                    } else if (mode == PARSE) {
117                            parsingAlgorithm = new DeterministicParser(this);
118                    }
119            }
120            
121            public void addRegistry(Class<?> clazz, Object o) {
122                    registry.put(clazz, o);
123            }
124            
125            public void process(Object[] arguments) throws MaltChainedException {
126                    if (mode == LEARN) {
127                            if (arguments.length < 2 || !(arguments[0] instanceof DependencyStructure) || !(arguments[1] instanceof DependencyStructure)) {
128                                    throw new MaltChainedException("The single malt learn task must be supplied with at least two dependency structures. ");
129                            }
130                            DependencyStructure systemGraph = (DependencyStructure)arguments[0];
131                            DependencyStructure goldGraph = (DependencyStructure)arguments[1];
132                            if (systemGraph.hasTokens() && getGuide() != null) {
133                                    getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, systemGraph));
134                            }
135                    } else if (mode == PARSE) {
136                            if (arguments.length < 1 || !(arguments[0] instanceof DependencyStructure)) {
137                                    throw new MaltChainedException("The single malt parse task must be supplied with at least one input terminal structure and one output dependency structure. ");
138                            }
139                            DependencyStructure processGraph = (DependencyStructure)arguments[0];
140                            if (processGraph.hasTokens()) {
141                                    ((Parser)getAlgorithm()).parse(processGraph);
142                            }
143                    }
144            }
145            
146            public void parse(DependencyStructure graph) throws MaltChainedException {
147                    if (graph.hasTokens()) {
148                            ((Parser)getAlgorithm()).parse(graph);
149                    }
150            }
151            
152            public void oracleParse(DependencyStructure goldGraph, DependencyStructure oracleGraph) throws MaltChainedException {
153                    if (oracleGraph.hasTokens()) {
154                            if (getGuide() != null) {
155                                    getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, oracleGraph));
156                            } else {
157                                    ((Trainer)getAlgorithm()).parse(goldGraph, oracleGraph);
158                            }
159                    }
160            }
161            
162            public void train() throws MaltChainedException {
163                    if (getGuide() == null) {
164                            ((Trainer)getAlgorithm()).train();
165                    }
166            }
167            
168            public void terminate(Object[] arguments) throws MaltChainedException {
169    //              if (getAlgorithm() instanceof Trainer) {
170    //                      ((Trainer)getAlgorithm()).terminate();
171    //              }
172                    getAlgorithm().terminate();
173                    if (getGuide() != null) {
174                            getGuide().terminate();
175                    }
176                    if (mode == LEARN) {
177                            endTime = System.currentTimeMillis();
178                            long elapsed = endTime - startTime;
179                            if (configLogger.isInfoEnabled()) {
180                                    configLogger.info("Learning time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
181                            }
182                    } else if (mode == PARSE) {
183                            endTime = System.currentTimeMillis();
184                            long elapsed = endTime - startTime;
185                            if (configLogger.isInfoEnabled()) {
186                                    configLogger.info("Parsing time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
187                            }
188                    }
189                    if (SystemLogger.logger() != configLogger && configLogger != null) {
190                            configLogger.removeAllAppenders();
191                    }
192            }
193            
194            /**
195             * Initialize the configuration logger
196             * 
197             * @return the configuration logger
198             * @throws MaltChainedException
199             */
200            public Logger initConfigLogger(String logfile, String level) throws MaltChainedException {
201                    if (logfile != null && logfile.length() > 0 && !logfile.equalsIgnoreCase("stdout") && configDir != null) {
202                            configLogger = Logger.getLogger(logfile);
203                            FileAppender fileAppender = null;
204                            try {
205                                    fileAppender = new FileAppender(new PatternLayout("%m"),configDir.getWorkingDirectory().getPath()+File.separator+logfile, true);
206                            } catch(IOException e) {
207                                    throw new ConfigurationException("It is not possible to create a configuration log file. ", e);
208                            }
209                            fileAppender.setThreshold(Level.toLevel(level, Level.INFO));
210                            configLogger.addAppender(fileAppender);
211                            configLogger.setLevel(Level.toLevel(level, Level.INFO));        
212                    } else {
213                            configLogger = SystemLogger.logger();
214                    }
215    
216                    return configLogger;
217            }
218            
219            public Logger getConfigLogger() {
220                    return configLogger;
221            }
222    
223            public void setConfigLogger(Logger logger) {
224                    configLogger = logger;
225            }
226            
227            public ConfigurationDir getConfigurationDir() {
228                    return configDir;
229            }
230            
231            public void setConfigurationDir(ConfigurationDir configDir) {
232                    this.configDir = configDir;
233            }
234            
235            public int getMode() {
236                    return mode;
237            }
238            
239            public ConfigurationRegistry getRegistry() {
240                    return registry;
241            }
242    
243            public void setRegistry(ConfigurationRegistry registry) {
244                    this.registry = registry;
245            }
246    
247            public Object getOptionValue(String optiongroup, String optionname) throws MaltChainedException {
248                    return OptionManager.instance().getOptionValue(optionContainerIndex, optiongroup, optionname);
249            }
250            
251            public String getOptionValueString(String optiongroup, String optionname) throws MaltChainedException {
252                    return OptionManager.instance().getOptionValueString(optionContainerIndex, optiongroup, optionname);
253            }
254            
255            public OptionManager getOptionManager() throws MaltChainedException {
256                    return OptionManager.instance();
257            }
258            /******************************** MaltParserConfiguration specific  ********************************/
259            
260            /**
261             * Returns the list of symbol tables
262             * 
263             * @return the list of symbol tables
264             */
265            public SymbolTableHandler getSymbolTables() {
266                    return symbolTableHandler;
267            }
268            
269            public PropagationManager getPropagationManager() {
270                    return propagationManager;
271            }
272    
273            public Algorithm getAlgorithm() {
274                    return parsingAlgorithm;
275            }
276            /**
277             * Returns the guide
278             * 
279             * @return the guide
280             */
281            public ClassifierGuide getGuide() {
282                    return parsingAlgorithm.getGuide();
283            }
284            
285            public void checkOptionDependency() throws MaltChainedException {
286                    try {
287                            if (configDir.getInfoFileWriter() != null) {
288                                    configDir.getInfoFileWriter().write("\nDEPENDENCIES\n");
289                            }
290                            
291                            // Copy the feature model file into the configuration directory
292                            String featureModelFileName = getOptionValue("guide", "features").toString().trim();
293                            if (featureModelFileName.equals("")) {
294                                    // use default feature model depending on the selected parser algorithm
295                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", getOptionValueString("singlemalt", "parsing_algorithm"));
296                                    featureModelFileName = getOptionValue("guide", "features").toString().trim();
297                                    /* START: Temp fix during development of new liblinear and libsvm interface */
298                                    String learner = getOptionValueString("guide", "learner");
299                                    if (!learner.startsWith("lib")) {
300                                            learner = "lib"+learner;
301                                    }
302                                    /* END: Temp fix during development of new liblinear and libsvm interface */
303                                    featureModelFileName = featureModelFileName.replace("{learner}", learner);
304                                    final URLFinder f = new URLFinder();
305                                    featureModelFileName = configDir.copyToConfig(f.findURLinJars(featureModelFileName));
306                            } else {
307                                    featureModelFileName = configDir.copyToConfig(featureModelFileName);
308                            }
309                            OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", featureModelFileName);
310                            if (configDir.getInfoFileWriter() != null) {
311                                    configDir.getInfoFileWriter().write("--guide-features (  -F)                 "+getOptionValue("guide", "features").toString()+"\n");
312                            }
313    
314                            if (getOptionValue("guide", "data_split_column").toString().equals("") && !getOptionValue("guide", "data_split_structure").toString().equals("")) {
315                                    configLogger.warn("Option --guide-data_split_column = '' and --guide-data_split_structure != ''. Option --guide-data_split_structure is overloaded with '', this will cause the parser to induce a single model.\n ");
316                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_structure", "");
317                                    if (configDir.getInfoFileWriter() != null) {
318                                            configDir.getInfoFileWriter().write("--guide-data_split_structure (  -s)\n");
319                                    }
320                            }
321                            if (!getOptionValue("guide", "data_split_column").toString().equals("") && getOptionValue("guide", "data_split_structure").toString().equals("")) {
322                                    configLogger.warn("Option --guide-data_split_column != '' and --guide-data_split_structure = ''. Option --guide-data_split_column is overloaded with '', this will cause the parser to induce a single model.\n");
323                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_column", "");
324                                    if (configDir.getInfoFileWriter() != null) {
325                                            configDir.getInfoFileWriter().write("--guide-data_split_column (  -d)\n");
326                                    }
327                            }
328                            
329                            String decisionSettings = getOptionValue("guide", "decision_settings").toString().trim();
330                            String markingStrategy = getOptionValue("pproj", "marking_strategy").toString().trim();
331                            String coveredRoot = getOptionValue("pproj", "covered_root").toString().trim();
332                            StringBuilder newDecisionSettings = new StringBuilder();
333    
334                            if (decisionSettings == null || decisionSettings.length() < 1 || decisionSettings.equals("default")) {
335                                    decisionSettings = "T.TRANS+A.DEPREL";
336                            } else {
337                                    decisionSettings = decisionSettings.toUpperCase();
338                            }
339                            
340                            if (markingStrategy.equalsIgnoreCase("head") || markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
341                                    if (!Pattern.matches(".*A\\.PPLIFTED.*", decisionSettings)) {
342                                            newDecisionSettings.append("+A.PPLIFTED");
343                                    }
344                            }
345                            if (markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
346                                    if (!Pattern.matches(".*A\\.PPPATH.*", decisionSettings)) {
347                                            newDecisionSettings.append("+A.PPPATH");
348                                    }
349                            }
350                            if (!coveredRoot.equalsIgnoreCase("none") && !Pattern.matches(".*A\\.PPCOVERED.*", decisionSettings)) {
351                                    newDecisionSettings.append("+A.PPCOVERED");
352                            }
353                            if (!getOptionValue("guide", "decision_settings").toString().equals(decisionSettings) || newDecisionSettings.length() > 0) {
354                                    OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "decision_settings", decisionSettings+newDecisionSettings.toString());
355                                    if (configDir.getInfoFileWriter() != null) {
356                                            configDir.getInfoFileWriter().write("--guide-decision_settings (  -gds)                 "+getOptionValue("guide", "decision_settings").toString()+"\n");
357                                    }
358                            }
359                            if (configDir.getInfoFileWriter() != null) {
360                                    configDir.getInfoFileWriter().flush();
361                            }
362                    } catch (IOException e) {
363                            throw new ConfigurationException("Could not write to the configuration information file. ", e);
364                    }
365            }
366    }