001    package org.maltparser;
002    
003    import java.net.MalformedURLException;
004    import java.net.URL;
005    import java.util.Iterator;
006    
007    import org.maltparser.core.exception.MaltChainedException;
008    import org.maltparser.core.flow.FlowChartInstance;
009    import org.maltparser.core.helper.SystemInfo;
010    import org.maltparser.core.helper.Util;
011    import org.maltparser.core.io.dataformat.ColumnDescription;
012    import org.maltparser.core.io.dataformat.DataFormatException;
013    import org.maltparser.core.io.dataformat.DataFormatInstance;
014    import org.maltparser.core.options.OptionManager;
015    import org.maltparser.core.symbol.SymbolTable;
016    import org.maltparser.core.syntaxgraph.DependencyGraph;
017    import org.maltparser.core.syntaxgraph.DependencyStructure;
018    import org.maltparser.core.syntaxgraph.edge.Edge;
019    import org.maltparser.core.syntaxgraph.node.DependencyNode;
020    import org.maltparser.parser.SingleMalt;
021    
022    /**
023     * The purpose of MaltParserService is to easily write third-party programs that uses MaltParser. 
024     * 
025     *  There are two ways to call the MaltParserService:
026     *  1. By running experiments, which allow other programs to train a parser model or parse with a parser model. IO-handling is done by MaltParser.
027     *  2. By first initialize a parser model and then call the method parse() with an array of tokens that MaltParser parses. IO-handling of the sentence is
028     *  done by the third-party program.
029     *  
030     *  How to use MaltParserService, please see the examples provided in the directory 'examples/apiexamples/srcex'
031     * 
032     * @author Johan Hall
033     */
034    public class MaltParserService {
035            private URL urlMaltJar;
036            private Engine engine;
037            private FlowChartInstance flowChartInstance;
038            private DataFormatInstance dataFormatInstance;
039            private SingleMalt singleMalt;
040            private int optionContainer;
041            private boolean initialized = false;
042            
043            /**
044             * Creates a MaltParserService with the option container 0
045             * 
046             * @throws MaltChainedException
047             */
048            public MaltParserService() throws MaltChainedException {
049                    this(0);
050            }
051            
052            /**
053             * Creates a MaltParserService with the specified option container. To use different option containers allows the calling program 
054             * to load several parser models or several experiments. The option management in MaltParser uses the singleton design pattern, which means that there can only
055             * be one instance of the option manager. To be able to have several parser models or experiments at same time please use different option containers.
056             * 
057             * @param optionContainer an integer from 0 to max value of data type Integer
058             * @throws MaltChainedException
059             */
060            public MaltParserService(int optionContainer) throws MaltChainedException {
061                    initialize();
062                    setOptionContainer(optionContainer);
063            }
064            
065            /**
066             * Runs a MaltParser experiment. The experiment is controlled by a commandLine string, please see the documentation of MaltParser to see all available options.
067             * 
068             * @param commandLine a commandLine string that controls the MaltParser.
069             * @throws MaltChainedException
070             */
071            public void runExperiment(String commandLine) throws MaltChainedException {
072                    OptionManager.instance().parseCommandLine(commandLine, optionContainer);
073                    engine = new Engine();
074                    engine.initialize(optionContainer);
075                    engine.process(optionContainer);
076                    engine.terminate(optionContainer);
077            }
078            
079            /**
080             * Initialize a parser model that later can by used to parse sentences. MaltParser is controlled by a commandLine string, please see the documentation of MaltParser to see all available options.
081             * 
082             * @param commandLine a commandLine string that controls the MaltParser
083             * @throws MaltChainedException
084             */
085            public void initializeParserModel(String commandLine) throws MaltChainedException {
086                    OptionManager.instance().parseCommandLine(commandLine, optionContainer);
087                    // Creates an engine
088                    engine = new Engine();
089                    // Initialize the engine with option container and gets a flow chart instance
090                    flowChartInstance = engine.initialize(optionContainer);
091                    // Runs the preprocess chart items of the "parse" flow chart
092                    if (flowChartInstance.hasPreProcessChartItems()) {
093                            flowChartInstance.preprocess();
094                    }
095                    singleMalt = (SingleMalt)flowChartInstance.getFlowChartRegistry(org.maltparser.parser.SingleMalt.class, "singlemalt");
096                    singleMalt.getConfigurationDir().initDataFormat();
097                    dataFormatInstance = singleMalt.getConfigurationDir().getDataFormatManager().getInputDataFormatSpec().createDataFormatInstance(
098                                    singleMalt.getSymbolTables(),
099                                    OptionManager.instance().getOptionValueString(optionContainer, "singlemalt", "null_value")); //, 
100    //                              OptionManager.instance().getOptionValueString(optionContainer, "graph", "root_label"));
101                    initialized = true;
102            }
103            
104            /**
105             * Parses an array of tokens and returns a dependency structure. 
106             * 
107             * Note: To call this method requires that a parser model has been initialized by using the initializeParserModel(). 
108             * 
109             * @param tokens an array of tokens 
110             * @return a dependency structure
111             * @throws MaltChainedException
112             */
113            public DependencyStructure parse(String[] tokens) throws MaltChainedException {
114                    if (!initialized) {
115                            throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method.");
116                    }
117                    if (tokens == null || tokens.length == 0) {
118                            throw new MaltChainedException("Nothing to parse. ");
119                    }
120    
121                    DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables());
122                    
123                    for (int i = 0; i < tokens.length; i++) {
124                            Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
125                            DependencyNode node = outputGraph.addDependencyNode(i+1);
126                            String[] items = tokens[i].split("\t");
127                            for (int j = 0; j < items.length; j++) {
128                                    if (columns.hasNext()) {
129                                            ColumnDescription column = columns.next();
130                                            if (column.getCategory() == ColumnDescription.INPUT && node != null) {
131                                                    outputGraph.addLabel(node, column.getName(), items[j]);
132                                            }
133                                    }
134                            }
135                    }
136                    outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
137                    // Invoke parse with the output graph
138                    singleMalt.parse(outputGraph);
139                    return outputGraph;
140            }
141            
142            /**
143             * Converts an array of tokens to a dependency structure
144             * 
145             * @param tokens an array of tokens
146             * @return a dependency structure
147             * @throws MaltChainedException
148             */
149            public DependencyStructure toDependencyStructure(String[] tokens) throws MaltChainedException {
150                    if (tokens == null || tokens.length == 0) {
151                            throw new MaltChainedException("Nothing to convert. ");
152                    }
153                    DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables());
154                    
155                    for (int i = 0; i < tokens.length; i++) {
156                            Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
157                            DependencyNode node = outputGraph.addDependencyNode(i+1);
158                            String[] items = tokens[i].split("\t");
159                            Edge edge = null;
160                            for (int j = 0; j < items.length; j++) {
161                                    if (columns.hasNext()) {
162                                            ColumnDescription column = columns.next();
163                                            if (column.getCategory() == ColumnDescription.INPUT && node != null) {
164                                                    outputGraph.addLabel(node, column.getName(), items[j]);
165                                            } else if (column.getCategory() == ColumnDescription.HEAD) {
166                                                    if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) {
167                                                            edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1);
168                                                    }
169                                            } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) {
170                                                    outputGraph.addLabel(edge, column.getName(), items[j]);
171                                            }
172                                    }
173                            }
174                    }
175                    outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
176                    return outputGraph;
177            }
178            
179            /**
180             * Same as parse(String[] tokens), but instead it returns an array of tokens with a head index and a dependency type at the end of string
181             * 
182             * @param tokens an array of tokens to parse
183             * @return an array of tokens with a head index and a dependency type at the end of string
184             * @throws MaltChainedException
185             */
186            public String[] parseTokens(String[] tokens) throws MaltChainedException {
187                    DependencyStructure outputGraph = parse(tokens);
188                    StringBuilder sb = new StringBuilder();
189                    String[] outputTokens = new String[tokens.length];
190                    SymbolTable deprelTable = outputGraph.getSymbolTables().getSymbolTable("DEPREL");
191                    for (Integer index : outputGraph.getTokenIndices()) {
192                            sb.setLength(0);
193                            if (index <= tokens.length) {
194                                    DependencyNode node = outputGraph.getDependencyNode(index);
195                                    sb.append(tokens[index -1]);
196                                    sb.append('\t');
197                                    sb.append(node.getHead().getIndex());
198                                    sb.append('\t');
199                                    if (node.getHeadEdge().hasLabel(deprelTable)) {
200                                            sb.append(node.getHeadEdge().getLabelSymbol(deprelTable));
201                                    } else {
202                                            sb.append(outputGraph.getDefaultRootEdgeLabelSymbol(deprelTable));
203                                    }
204                                    outputTokens[index-1] = sb.toString();
205                            }
206                    }
207                    return outputTokens;
208            }
209            
210            /**
211             * Terminates the parser model.
212             * 
213             * @throws MaltChainedException
214             */
215            public void terminateParserModel() throws MaltChainedException {
216                    // Runs the postprocess chart items of the "parse" flow chart
217                    if (flowChartInstance.hasPostProcessChartItems()) {
218                            flowChartInstance.postprocess();
219                    }
220                    
221                    // Terminate the flow chart with an option container
222                    engine.terminate(optionContainer);
223            }
224            
225            private void initialize() throws MaltChainedException {
226                    if (OptionManager.instance().getOptionDescriptions().getOptionGroupNameSet().size() > 0) {
227                            return; // OptionManager is already initialized
228                    }
229                    String maltpath = getMaltJarPath();
230                    if (maltpath == null) {
231                            throw new MaltChainedException("malt.jar could not be found. ");
232                    }
233                    urlMaltJar = Util.findURL(maltpath);
234                    try {
235                            OptionManager.instance().loadOptionDescriptionFile(new URL("jar:"+urlMaltJar.toString()+"!/appdata/options.xml"));
236                            
237                    } catch (MalformedURLException e) {
238                            throw new MaltChainedException("MaltParser couldn't find its options 'malt.jar!/appdata/options.xml'", e);
239                    }
240                    OptionManager.instance().generateMaps();
241            }
242            
243            
244            /**
245             * Returns the option container index
246             * 
247             * @return the option container index
248             */
249            public int getOptionContainer() {
250                    return optionContainer;
251            }
252    
253            private void setOptionContainer(int optionContainer) {
254                    this.optionContainer = optionContainer;
255            }
256    
257            /**
258             * Returns the path of malt.jar file
259             * 
260             * @return the path of malt.jar file
261             */
262            public static String getMaltJarPath() {
263                    if (SystemInfo.getMaltJarPath() != null) {
264                            return SystemInfo.getMaltJarPath().toString();
265                    }
266                    return null;
267            }
268            
269            
270    }