001package org.maltparser.parser;
002
003import java.io.BufferedReader;
004import java.io.File;
005import java.io.IOException;
006import java.io.InputStream;
007import java.io.InputStreamReader;
008import java.io.ObjectInputStream;
009import java.io.OutputStreamWriter;
010import java.lang.reflect.InvocationTargetException;
011import java.net.MalformedURLException;
012import java.net.URL;
013import java.util.Formatter;
014import java.util.regex.Pattern;
015
016import org.apache.log4j.FileAppender;
017import org.apache.log4j.Level;
018import org.apache.log4j.Logger;
019import org.apache.log4j.PatternLayout;
020import org.maltparser.core.config.ConfigurationDir;
021import org.maltparser.core.config.ConfigurationException;
022import org.maltparser.core.exception.MaltChainedException;
023import org.maltparser.core.feature.FeatureModelManager;
024import org.maltparser.core.feature.system.FeatureEngine;
025import org.maltparser.core.helper.SystemLogger;
026import org.maltparser.core.helper.URLFinder;
027import org.maltparser.core.io.dataformat.DataFormatInstance;
028import org.maltparser.core.options.OptionManager;
029import org.maltparser.core.plugin.PluginLoader;
030import org.maltparser.core.propagation.PropagationException;
031import org.maltparser.core.propagation.PropagationManager;
032import org.maltparser.core.symbol.SymbolTableHandler;
033import org.maltparser.core.syntaxgraph.DependencyStructure;
034import org.maltparser.parser.guide.ClassifierGuide;
035
036/**
037 * @author Johan Hall
038 *
039 */
040public class SingleMalt implements DependencyParserConfig {
041        public final static Class<?>[] paramTypes = { org.maltparser.parser.DependencyParserConfig.class };
042        public static final int LEARN = 0;
043        public static final int PARSE = 1;
044        protected ConfigurationDir configDir;
045        protected Logger configLogger;
046        protected int optionContainerIndex;
047        protected ParsingAlgorithm parsingAlgorithm = null;
048        protected int mode;
049        protected SymbolTableHandler symbolTableHandler;
050        protected DataFormatInstance dataFormatInstance;
051        protected FeatureModelManager featureModelManager;
052        protected long startTime;
053        protected long endTime;
054        protected int nIterations = 0;
055        protected PropagationManager propagationManager;
056        private Parser parser;
057        private Trainer trainer;
058        private AbstractParserFactory parserFactory;
059        
060        
061        public void initialize(int containerIndex, DataFormatInstance dataFormatInstance, SymbolTableHandler symbolTableHandler, ConfigurationDir configDir, int mode) throws MaltChainedException {
062                this.optionContainerIndex = containerIndex;
063                this.mode = mode;
064                setConfigurationDir(configDir);
065                startTime = System.currentTimeMillis();
066                configLogger = initConfigLogger(getOptionValue("config", "logfile").toString(), getOptionValue("config", "logging").toString());
067                this.dataFormatInstance = dataFormatInstance;
068                this.symbolTableHandler = symbolTableHandler;
069                this.parserFactory = makeParserFactory();
070                if (mode == SingleMalt.LEARN) {
071                        checkOptionDependency();
072                }
073                initPropagation();
074                initFeatureSystem();
075                initParsingAlgorithm(); 
076                
077                if (configLogger.isInfoEnabled()) {
078                        URL inputFormatURL = configDir.getInputFormatURL(); 
079                        URL outputFormatURL = configDir.getOutputFormatURL();
080                        if (inputFormatURL != null) {
081                                if (outputFormatURL == null || outputFormatURL.toString().equals(inputFormatURL.toString())) {
082                                        int index = inputFormatURL.toString().indexOf('!');
083                                        if (index == -1) {
084                                                configLogger.info("  Data Format          : "+inputFormatURL.toString()+"\n");
085                                        } else {
086                                                configLogger.info("  Data Format          : "+inputFormatURL.toString().substring(index+1)+"\n");
087                                        }
088                                } else {
089                                        int indexIn = inputFormatURL.toString().indexOf('!');
090                                        int indexOut = outputFormatURL.toString().indexOf('!');
091                                        if (indexIn == -1) {
092                                                configLogger.info("  Input Data Format    : "+inputFormatURL.toString()+"\n");
093                                        } else {
094                                                configLogger.info("  Input Data Format    : "+inputFormatURL.toString().substring(indexIn+1)+"\n");
095                                        }
096                                        if (indexOut == -1) {
097                                                configLogger.info("  Output Data Format   : "+outputFormatURL.toString()+"\n");
098                                        } else {
099                                                configLogger.info("  Output Data Format   : "+outputFormatURL.toString().substring(indexOut+1)+"\n");
100                                        }
101                                }
102                        }
103                }
104        }
105        
106        private void initPropagation()  throws MaltChainedException {
107                String propagationSpecFileName = getOptionValue("singlemalt", "propagation").toString();
108                if (propagationSpecFileName == null || propagationSpecFileName.length() == 0) {
109                        return;
110                }
111                propagationManager = new PropagationManager();
112                if (mode == SingleMalt.LEARN) {
113                        propagationSpecFileName = configDir.copyToConfig(propagationSpecFileName);
114                        OptionManager.instance().overloadOptionValue(optionContainerIndex, "singlemalt", "propagation", propagationSpecFileName);
115                }
116                if (isLoggerInfoEnabled()) {
117                        logInfoMessage("  Propagation          : " + propagationSpecFileName+"\n");
118                }
119                propagationManager.loadSpecification(findURL(propagationSpecFileName));
120                propagationManager.createPropagations(dataFormatInstance, symbolTableHandler);
121        }
122        
123        /**
124         * Initialize the parsing algorithm
125         * 
126         * @throws MaltChainedException
127         */
128        protected void initParsingAlgorithm() throws MaltChainedException {
129                boolean diagnostics = (Boolean)getOptionValue("singlemalt", "diagnostics");
130                if (mode == LEARN) {
131                        if (!diagnostics) {
132                                parsingAlgorithm = trainer = new BatchTrainer(this, symbolTableHandler);
133                        } else {
134                                parsingAlgorithm = trainer = new BatchTrainerWithDiagnostics(this, symbolTableHandler);
135                        }
136                } else if (mode == PARSE) {
137                        if (!diagnostics) {
138                                parsingAlgorithm = parser = new DeterministicParser(this, symbolTableHandler);
139                        } else {
140                                parsingAlgorithm = parser = new DeterministicParserWithDiagnostics(this, symbolTableHandler);
141                        }
142                }
143        }
144        
145        protected void initFeatureSystem() throws MaltChainedException {
146                final FeatureEngine system = new FeatureEngine();
147                system.load("/appdata/features/ParserFeatureSystem.xml");
148                system.load(PluginLoader.instance());
149                featureModelManager = new FeatureModelManager(system);
150                String featureModelFileName = getOptionValue("guide", "features").toString().trim();
151                if (featureModelFileName.endsWith(".par")) {
152                        String markingStrategy = getOptionValue("pproj", "marking_strategy").toString().trim();
153                        String coveredRoot = getOptionValue("pproj", "covered_root").toString().trim();
154                        featureModelManager.loadParSpecification(findURL(featureModelFileName), markingStrategy, coveredRoot);
155                } else {
156                        featureModelManager.loadSpecification(findURL(featureModelFileName));
157                }
158        }
159        
160        /**
161         * Creates a parser factory specified by the --singlemalt-parsing_algorithm option
162         * 
163         * @return a parser factory
164         * @throws MaltChainedException
165         */
166        private AbstractParserFactory makeParserFactory() throws MaltChainedException {
167                Class<?> clazz = (Class<?>)getOptionValue("singlemalt", "parsing_algorithm");
168                try {   
169                        Object[] arguments = { this };
170                        return (AbstractParserFactory)clazz.getConstructor(paramTypes).newInstance(arguments);
171                } catch (NoSuchMethodException e) {
172                        throw new ConfigurationException("The parser factory '"+clazz.getName()+"' cannot be initialized. ", e);
173                } catch (InstantiationException e) {
174                        throw new ConfigurationException("The parser factory '"+clazz.getName()+"' cannot be initialized. ", e);
175                } catch (IllegalAccessException e) {
176                        throw new ConfigurationException("The parser factory '"+clazz.getName()+"' cannot be initialized. ", e);
177                } catch (InvocationTargetException e) {
178                        throw new ConfigurationException("The parser factory '"+clazz.getName()+"' cannot be initialized. ", e);                        
179                }
180        }
181        
182        public AbstractParserFactory getParserFactory() {
183                return parserFactory;
184        }
185        
186        public FeatureModelManager getFeatureModelManager() {
187                return featureModelManager;
188        }
189        
190        public void process(Object[] arguments) throws MaltChainedException {
191                if (mode == LEARN) {
192                        if (arguments.length < 2 || !(arguments[0] instanceof DependencyStructure) || !(arguments[1] instanceof DependencyStructure)) {
193                                throw new MaltChainedException("The single malt learn task must be supplied with at least two dependency structures. ");
194                        }
195                        DependencyStructure systemGraph = (DependencyStructure)arguments[0];
196                        DependencyStructure goldGraph = (DependencyStructure)arguments[1];
197                        if (systemGraph.hasTokens() && getGuide() != null) {
198                                getGuide().finalizeSentence(((Trainer)getAlgorithm()).parse(goldGraph, systemGraph));
199                        }
200                } else if (mode == PARSE) {
201                        if (arguments.length < 1 || !(arguments[0] instanceof DependencyStructure)) {
202                                throw new MaltChainedException("The single malt parse task must be supplied with at least one input terminal structure and one output dependency structure. ");
203                        }
204                        DependencyStructure processGraph = (DependencyStructure)arguments[0];
205                        if (processGraph.hasTokens()) {
206                                parser.parse(processGraph);
207//                              ((Parser)getAlgorithm()).parse(processGraph);
208                        }
209                }
210        }
211        
212        public void parse(DependencyStructure graph) throws MaltChainedException {
213                if (graph.hasTokens()) {
214//                      ((Parser)getAlgorithm()).parse(graph);
215                        parser.parse(graph);
216                }
217        }
218        
219        public void oracleParse(DependencyStructure goldGraph, DependencyStructure oracleGraph) throws MaltChainedException {
220                if (oracleGraph.hasTokens()) {
221                        if (getGuide() != null) {
222                                getGuide().finalizeSentence(trainer.parse(goldGraph, oracleGraph));
223                        } else {
224                                trainer.parse(goldGraph, oracleGraph);
225                        }
226                }
227        }
228        
229        public void train() throws MaltChainedException {
230                if (getGuide() == null) {
231                        ((Trainer)getAlgorithm()).train();
232                }
233        }
234        
235        public void terminate(Object[] arguments) throws MaltChainedException {
236//              if (getAlgorithm() instanceof Trainer) {
237//                      ((Trainer)getAlgorithm()).terminate();
238//              }
239                getAlgorithm().terminate();
240                if (getGuide() != null) {
241                        getGuide().terminate();
242                }
243                if (mode == LEARN) {
244                        endTime = System.currentTimeMillis();
245                        long elapsed = endTime - startTime;
246                        if (configLogger.isInfoEnabled()) {
247                                configLogger.info("Learning time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
248                        }
249                } else if (mode == PARSE) {
250                        endTime = System.currentTimeMillis();
251                        long elapsed = endTime - startTime;
252                        if (configLogger.isInfoEnabled()) {
253                                configLogger.info("Parsing time: " +new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)\n");
254                        }
255                }
256                if (SystemLogger.logger() != configLogger && configLogger != null) {
257                        configLogger.removeAllAppenders();
258                }
259        }
260        
261        /**
262         * Initialize the configuration logger
263         * 
264         * @return the configuration logger
265         * @throws MaltChainedException
266         */
267        public Logger initConfigLogger(String logfile, String level) throws MaltChainedException {
268                if (logfile != null && logfile.length() > 0 && !logfile.equalsIgnoreCase("stdout") && configDir != null) {
269                        configLogger = Logger.getLogger(logfile);
270                        FileAppender fileAppender = null;
271                        try {
272                                fileAppender = new FileAppender(new PatternLayout("%m"),configDir.getWorkingDirectory().getPath()+File.separator+logfile, true);
273                        } catch(IOException e) {
274                                throw new ConfigurationException("It is not possible to create a configuration log file. ", e);
275                        }
276                        fileAppender.setThreshold(Level.toLevel(level, Level.INFO));
277                        configLogger.addAppender(fileAppender);
278                        configLogger.setLevel(Level.toLevel(level, Level.INFO));        
279                } else {
280                        configLogger = SystemLogger.logger();
281                }
282
283                return configLogger;
284        }
285        
286        public boolean isLoggerInfoEnabled() {
287                return configLogger != null && configLogger.isInfoEnabled();
288        }
289        public boolean isLoggerDebugEnabled() {
290                return configLogger != null && configLogger.isDebugEnabled();
291        }
292        public void logErrorMessage(String message) {
293                configLogger.error(message);
294        }
295        public void logInfoMessage(String message) {
296                configLogger.info(message);
297        }
298        public void logInfoMessage(char character) {
299                configLogger.info(character);
300        }
301        public void logDebugMessage(String message) {
302                configLogger.debug(message);
303        }
304        
305        public void writeInfoToConfigFile(String message) throws MaltChainedException {
306                try {
307                        configDir.getInfoFileWriter().write(message);
308                        configDir.getInfoFileWriter().flush();
309                } catch (IOException e) {
310                        throw new ConfigurationException("Could not write to the configuration information file. ", e);
311        
312                }
313        }
314        
315        public Logger getConfigLogger() {
316                return configLogger;
317        }
318
319        public void setConfigLogger(Logger logger) {
320                configLogger = logger;
321        }
322        
323        public ConfigurationDir getConfigurationDir() {
324                return configDir;
325        }
326        
327        public void setConfigurationDir(ConfigurationDir configDir) {
328                this.configDir = configDir;
329        }
330        
331        public OutputStreamWriter getOutputStreamWriter(String fileName) throws MaltChainedException {
332                return configDir.getOutputStreamWriter(fileName);
333        }
334        
335        public OutputStreamWriter getAppendOutputStreamWriter(String fileName) throws MaltChainedException {
336                return configDir.getAppendOutputStreamWriter(fileName);
337        }
338        
339        public InputStreamReader getInputStreamReader(String fileName) throws MaltChainedException {
340                return configDir.getInputStreamReader(fileName);
341        }
342        
343        public InputStream getInputStreamFromConfigFileEntry(String fileName) throws MaltChainedException {
344                return configDir.getInputStreamFromConfigFileEntry(fileName);
345        }
346        
347        public URL getConfigFileEntryURL(String fileName) throws MaltChainedException {
348                return configDir.getConfigFileEntryURL(fileName);
349        }
350        
351        public File getFile(String fileName) throws MaltChainedException {
352                return configDir.getFile(fileName);
353        }
354        
355        public Object getConfigFileEntryObject(String fileName) throws MaltChainedException {
356                Object object = null;
357                try {
358                    ObjectInputStream input = new ObjectInputStream(getInputStreamFromConfigFileEntry(fileName));
359                    try {
360                        object = input.readObject();
361                        } catch (ClassNotFoundException e) {
362                                throw new ConfigurationException("Could not load object '"+fileName+"' from mco-file", e);
363                        } catch (Exception e) {
364                                throw new ConfigurationException("Could not load object '"+fileName+"' from mco-file", e);
365                    } finally {
366                        input.close();
367                    }
368                } catch (IOException e) {
369                        throw new ConfigurationException("Could not load object from '"+fileName+"' in mco-file", e);
370                }
371            return object;
372        }
373        
374        public String getConfigFileEntryString(String fileName) throws MaltChainedException {
375                StringBuilder sb = new StringBuilder();
376                try {
377                        final BufferedReader in = new BufferedReader(new InputStreamReader(getInputStreamFromConfigFileEntry(fileName), "UTF-8"));
378                        String line;
379                        
380                        while((line = in.readLine()) != null) {
381                                 sb.append(line);
382                                 sb.append('\n');
383                        }
384                } catch (IOException e) {
385                        throw new ConfigurationException("Could not load string from '"+fileName+"' in mco-file", e);
386                }
387            return sb.toString();
388        }
389        
390        public int getMode() {
391                return mode;
392        }
393
394        public Object getOptionValue(String optiongroup, String optionname) throws MaltChainedException {
395                return OptionManager.instance().getOptionValue(optionContainerIndex, optiongroup, optionname);
396        }
397        
398        public String getOptionValueString(String optiongroup, String optionname) throws MaltChainedException {
399                return OptionManager.instance().getOptionValueString(optionContainerIndex, optiongroup, optionname);
400        }
401        
402        public OptionManager getOptionManager() throws MaltChainedException {
403                return OptionManager.instance();
404        }
405        /******************************** MaltParserConfiguration specific  ********************************/
406        
407        /**
408         * Returns the list of symbol tables
409         * 
410         * @return the list of symbol tables
411         */
412        public SymbolTableHandler getSymbolTables() {
413                return symbolTableHandler;
414        }
415        
416        public DataFormatInstance getDataFormatInstance() {
417                return dataFormatInstance;
418        }
419        
420        public PropagationManager getPropagationManager() {
421                return propagationManager;
422        }
423        
424        public ParsingAlgorithm getAlgorithm() {
425                return parsingAlgorithm;
426        }
427        /**
428         * Returns the guide
429         * 
430         * @return the guide
431         */
432        public ClassifierGuide getGuide() {
433                return parsingAlgorithm.getGuide();
434        }
435        
436        public void checkOptionDependency() throws MaltChainedException {
437                try {
438                        if (configDir.getInfoFileWriter() != null) {
439                                configDir.getInfoFileWriter().write("\nDEPENDENCIES\n");
440                        }
441                        
442                        // Copy the feature model file into the configuration directory
443                        String featureModelFileName = getOptionValue("guide", "features").toString().trim();
444                        if (featureModelFileName.equals("")) {
445                                
446                                // use default feature model depending on the selected parser algorithm
447                                OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", getOptionValueString("singlemalt", "parsing_algorithm"));
448                                featureModelFileName = getOptionValue("guide", "features").toString().trim();
449                                
450                                /* START: Temp fix during development of new liblinear and libsvm interface */
451                                String learner = getOptionValueString("guide", "learner");
452                                if (!learner.startsWith("lib")) {
453                                        learner = "lib"+learner;
454                                }
455                                /* END: Temp fix during development of new liblinear and libsvm interface */
456                                featureModelFileName = featureModelFileName.replace("{learner}", learner);
457                                featureModelFileName = featureModelFileName.replace("{dataformat}", getOptionValue("input", "format").toString().trim().replace(".xml", ""));
458                                
459                                final URLFinder f = new URLFinder();
460                                featureModelFileName = configDir.copyToConfig(f.findURLinJars(featureModelFileName));
461                        } else {
462                                featureModelFileName = configDir.copyToConfig(featureModelFileName);
463                        }
464                        OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "features", featureModelFileName);
465                        if (configDir.getInfoFileWriter() != null) {
466                                configDir.getInfoFileWriter().write("--guide-features (  -F)                 "+getOptionValue("guide", "features").toString()+"\n");
467                        }
468
469                        if (getOptionValue("guide", "data_split_column").toString().equals("") && !getOptionValue("guide", "data_split_structure").toString().equals("")) {
470                                configLogger.warn("Option --guide-data_split_column = '' and --guide-data_split_structure != ''. Option --guide-data_split_structure is overloaded with '', this will cause the parser to induce a single model.\n ");
471                                OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_structure", "");
472                                if (configDir.getInfoFileWriter() != null) {
473                                        configDir.getInfoFileWriter().write("--guide-data_split_structure (  -s)\n");
474                                }
475                        }
476                        if (!getOptionValue("guide", "data_split_column").toString().equals("") && getOptionValue("guide", "data_split_structure").toString().equals("")) {
477                                configLogger.warn("Option --guide-data_split_column != '' and --guide-data_split_structure = ''. Option --guide-data_split_column is overloaded with '', this will cause the parser to induce a single model.\n");
478                                OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "data_split_column", "");
479                                if (configDir.getInfoFileWriter() != null) {
480                                        configDir.getInfoFileWriter().write("--guide-data_split_column (  -d)\n");
481                                }
482                        }
483                        
484                        String decisionSettings = getOptionValue("guide", "decision_settings").toString().trim();
485                        String markingStrategy = getOptionValue("pproj", "marking_strategy").toString().trim();
486                        String coveredRoot = getOptionValue("pproj", "covered_root").toString().trim();
487                        StringBuilder newDecisionSettings = new StringBuilder();
488
489                        if (decisionSettings == null || decisionSettings.length() < 1 || decisionSettings.equals("default")) {
490                                decisionSettings = "T.TRANS+A.DEPREL";
491                        } else {
492                                decisionSettings = decisionSettings.toUpperCase();
493                        }
494                        
495                        if (markingStrategy.equalsIgnoreCase("head") || markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
496                                if (!Pattern.matches(".*A\\.PPLIFTED.*", decisionSettings)) {
497                                        newDecisionSettings.append("+A.PPLIFTED");
498                                }
499                        }
500                        if (markingStrategy.equalsIgnoreCase("path") || markingStrategy.equalsIgnoreCase("head+path")) {
501                                if (!Pattern.matches(".*A\\.PPPATH.*", decisionSettings)) {
502                                        newDecisionSettings.append("+A.PPPATH");
503                                }
504                        }
505                        if (!coveredRoot.equalsIgnoreCase("none") && !Pattern.matches(".*A\\.PPCOVERED.*", decisionSettings)) {
506                                newDecisionSettings.append("+A.PPCOVERED");
507                        }
508                        if (!getOptionValue("guide", "decision_settings").toString().equals(decisionSettings) || newDecisionSettings.length() > 0) {
509                                OptionManager.instance().overloadOptionValue(optionContainerIndex, "guide", "decision_settings", decisionSettings+newDecisionSettings.toString());
510                                if (configDir.getInfoFileWriter() != null) {
511                                        configDir.getInfoFileWriter().write("--guide-decision_settings (  -gds)                 "+getOptionValue("guide", "decision_settings").toString()+"\n");
512                                }
513                        }
514                        if (configDir.getInfoFileWriter() != null) {
515                                configDir.getInfoFileWriter().flush();
516                        }
517                } catch (IOException e) {
518                        throw new ConfigurationException("Could not write to the configuration information file. ", e);
519                }
520        }
521        
522        private URL findURL(String propagationSpecFileName) throws MaltChainedException {
523                URL url = null;
524                File specFile = configDir.getFile(propagationSpecFileName);
525                if (specFile.exists()) {
526                        try {
527                                url = new URL("file:///"+specFile.getAbsolutePath());
528                        } catch (MalformedURLException e) {
529                                throw new PropagationException("Malformed URL: "+specFile, e);
530                        }
531                } else {
532                        url = configDir.getConfigFileEntryURL(propagationSpecFileName);
533                }
534                return url;
535        }
536}