001    package org.maltparser;
002    
003    import java.net.MalformedURLException;
004    import java.net.URL;
005    import java.util.Iterator;
006    
007    import org.maltparser.core.exception.MaltChainedException;
008    import org.maltparser.core.flow.FlowChartInstance;
009    import org.maltparser.core.helper.SystemInfo;
010    import org.maltparser.core.helper.Util;
011    import org.maltparser.core.io.dataformat.ColumnDescription;
012    import org.maltparser.core.io.dataformat.DataFormatException;
013    import org.maltparser.core.io.dataformat.DataFormatInstance;
014    import org.maltparser.core.io.dataformat.DataFormatSpecification;
015    import org.maltparser.core.options.OptionManager;
016    import org.maltparser.core.symbol.SymbolTable;
017    import org.maltparser.core.symbol.SymbolTableHandler;
018    import org.maltparser.core.symbol.trie.TrieSymbolTableHandler;
019    import org.maltparser.core.syntaxgraph.DependencyGraph;
020    import org.maltparser.core.syntaxgraph.DependencyStructure;
021    import org.maltparser.core.syntaxgraph.edge.Edge;
022    import org.maltparser.core.syntaxgraph.node.DependencyNode;
023    import org.maltparser.parser.SingleMalt;
024    
025    /**
026     * The purpose of MaltParserService is to easily write third-party programs that uses MaltParser. 
027     * 
028     *  There are two ways to call the MaltParserService:
029     *  1. By running experiments, which allow other programs to train a parser model or parse with a parser model. IO-handling is done by MaltParser.
030     *  2. By first initialize a parser model and then call the method parse() with an array of tokens that MaltParser parses. IO-handling of the sentence is
031     *  done by the third-party program.
032     *  
033     *  How to use MaltParserService, please see the examples provided in the directory 'examples/apiexamples/srcex'
034     * 
035     * @author Johan Hall
036     */
037    public class MaltParserService {
038            private URL urlMaltJar;
039            private Engine engine;
040            private FlowChartInstance flowChartInstance;
041            private DataFormatInstance dataFormatInstance;
042            private SingleMalt singleMalt;
043            private int optionContainer;
044            private boolean initialized = false;
045            
046            /**
047             * Creates a MaltParserService with the option container 0
048             * 
049             * @throws MaltChainedException
050             */
051            public MaltParserService() throws MaltChainedException {
052                    this(0);
053            }
054            
055            /**
056             * Creates a MaltParserService with the specified option container. To use different option containers allows the calling program 
057             * to load several parser models or several experiments. The option management in MaltParser uses the singleton design pattern, which means that there can only
058             * be one instance of the option manager. To be able to have several parser models or experiments at same time please use different option containers.
059             * 
060             * @param optionContainer an integer from 0 to max value of data type Integer
061             * @throws MaltChainedException
062             */
063            public MaltParserService(int optionContainer) throws MaltChainedException {
064                    initialize();
065                    setOptionContainer(optionContainer);
066            }
067            
068            /**
069             * Runs a MaltParser experiment. The experiment is controlled by a commandLine string, please see the documentation of MaltParser to see all available options.
070             * 
071             * @param commandLine a commandLine string that controls the MaltParser.
072             * @throws MaltChainedException
073             */
074            public void runExperiment(String commandLine) throws MaltChainedException {
075                    OptionManager.instance().parseCommandLine(commandLine, optionContainer);
076                    engine = new Engine();
077                    engine.initialize(optionContainer);
078                    engine.process(optionContainer);
079                    engine.terminate(optionContainer);
080            }
081            
082            /**
083             * Initialize a parser model that later can by used to parse sentences. MaltParser is controlled by a commandLine string, please see the documentation of MaltParser to see all available options.
084             * 
085             * @param commandLine a commandLine string that controls the MaltParser
086             * @throws MaltChainedException
087             */
088            public void initializeParserModel(String commandLine) throws MaltChainedException {
089                    OptionManager.instance().parseCommandLine(commandLine, optionContainer);
090                    // Creates an engine
091                    engine = new Engine();
092                    // Initialize the engine with option container and gets a flow chart instance
093                    flowChartInstance = engine.initialize(optionContainer);
094                    // Runs the preprocess chart items of the "parse" flow chart
095                    if (flowChartInstance.hasPreProcessChartItems()) {
096                            flowChartInstance.preprocess();
097                    }
098                    singleMalt = (SingleMalt)flowChartInstance.getFlowChartRegistry(org.maltparser.parser.SingleMalt.class, "singlemalt");
099                    singleMalt.getConfigurationDir().initDataFormat();
100                    dataFormatInstance = singleMalt.getConfigurationDir().getDataFormatManager().getInputDataFormatSpec().createDataFormatInstance(
101                                    singleMalt.getSymbolTables(),
102                                    OptionManager.instance().getOptionValueString(optionContainer, "singlemalt", "null_value")); //, 
103    //                              OptionManager.instance().getOptionValueString(optionContainer, "graph", "root_label"));
104                    initialized = true;
105            }
106            
107    
108            
109            /**
110             * Parses an array of tokens and returns a dependency structure. 
111             * 
112             * Note: To call this method requires that a parser model has been initialized by using the initializeParserModel(). 
113             * 
114             * @param tokens an array of tokens 
115             * @return a dependency structure
116             * @throws MaltChainedException
117             */
118            public DependencyStructure parse(String[] tokens) throws MaltChainedException {
119                    if (!initialized) {
120                            throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method.");
121                    }
122                    if (tokens == null || tokens.length == 0) {
123                            throw new MaltChainedException("Nothing to parse. ");
124                    }
125    
126                    DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables());
127                    
128                    for (int i = 0; i < tokens.length; i++) {
129                            Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
130                            DependencyNode node = outputGraph.addDependencyNode(i+1);
131                            String[] items = tokens[i].split("\t");
132                            for (int j = 0; j < items.length; j++) {
133                                    if (columns.hasNext()) {
134                                            ColumnDescription column = columns.next();
135                                            if (column.getCategory() == ColumnDescription.INPUT && node != null) {
136                                                    outputGraph.addLabel(node, column.getName(), items[j]);
137                                            }
138                                    }
139                            }
140                    }
141                    outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
142                    // Invoke parse with the output graph
143                    singleMalt.parse(outputGraph);
144                    return outputGraph;
145            }
146            
147            /**
148             * Converts an array of tokens to a dependency structure
149             * 
150             * @param tokens an array of tokens
151             * @return a dependency structure
152             * @throws MaltChainedException
153             */
154            public DependencyStructure toDependencyStructure(String[] tokens) throws MaltChainedException {
155                    if (tokens == null || tokens.length == 0) {
156                            throw new MaltChainedException("Nothing to convert. ");
157                    }
158                    DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables());
159                    
160                    for (int i = 0; i < tokens.length; i++) {
161                            Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
162                            DependencyNode node = outputGraph.addDependencyNode(i+1);
163                            String[] items = tokens[i].split("\t");
164                            Edge edge = null;
165                            for (int j = 0; j < items.length; j++) {
166                                    if (columns.hasNext()) {
167                                            ColumnDescription column = columns.next();
168                                            if (column.getCategory() == ColumnDescription.INPUT && node != null) {
169                                                    outputGraph.addLabel(node, column.getName(), items[j]);
170                                            } else if (column.getCategory() == ColumnDescription.HEAD) {
171                                                    if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) {
172                                                            edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1);
173                                                    }
174                                            } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) {
175                                                    outputGraph.addLabel(edge, column.getName(), items[j]);
176                                            }
177                                    }
178                            }
179                    }
180                    outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
181                    return outputGraph;
182            }
183            
184            public  DependencyStructure toDependencyStructure(String[] tokens, String dataFormatFileName) throws MaltChainedException {
185                    // Creates a symbol table handler
186                    SymbolTableHandler symbolTables = new TrieSymbolTableHandler();
187                    
188                    // Initialize data format instance of the CoNLL data format from conllx.xml (conllx.xml located in same directory)
189                    DataFormatSpecification dataFormat = new DataFormatSpecification();
190                    dataFormat.parseDataFormatXMLfile(dataFormatFileName);
191                    DataFormatInstance dataFormatInstance = dataFormat.createDataFormatInstance(symbolTables, "none");
192    
193                    // Creates a dependency graph
194                    if (tokens == null || tokens.length == 0) {
195                            throw new MaltChainedException("Nothing to convert. ");
196                    }
197                    DependencyStructure outputGraph = new DependencyGraph(symbolTables);
198                    
199                    for (int i = 0; i < tokens.length; i++) {
200                            Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
201                            DependencyNode node = outputGraph.addDependencyNode(i+1);
202                            String[] items = tokens[i].split("\t");
203                            Edge edge = null;
204                            for (int j = 0; j < items.length; j++) {
205                                    if (columns.hasNext()) {
206                                            ColumnDescription column = columns.next();
207                                            if (column.getCategory() == ColumnDescription.INPUT && node != null) {
208                                                    outputGraph.addLabel(node, column.getName(), items[j]);
209                                            } else if (column.getCategory() == ColumnDescription.HEAD) {
210                                                    if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) {
211                                                            edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1);
212                                                    }
213                                            } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) {
214                                                    outputGraph.addLabel(edge, column.getName(), items[j]);
215                                            }
216                                    }
217                            }
218                    }
219                    outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
220                    return outputGraph;
221            }
222            
223            /**
224             * Same as parse(String[] tokens), but instead it returns an array of tokens with a head index and a dependency type at the end of string
225             * 
226             * @param tokens an array of tokens to parse
227             * @return an array of tokens with a head index and a dependency type at the end of string
228             * @throws MaltChainedException
229             */
230            public String[] parseTokens(String[] tokens) throws MaltChainedException {
231                    DependencyStructure outputGraph = parse(tokens);
232                    StringBuilder sb = new StringBuilder();
233                    String[] outputTokens = new String[tokens.length];
234                    SymbolTable deprelTable = outputGraph.getSymbolTables().getSymbolTable("DEPREL");
235                    for (Integer index : outputGraph.getTokenIndices()) {
236                            sb.setLength(0);
237                            if (index <= tokens.length) {
238                                    DependencyNode node = outputGraph.getDependencyNode(index);
239                                    sb.append(tokens[index -1]);
240                                    sb.append('\t');
241                                    sb.append(node.getHead().getIndex());
242                                    sb.append('\t');
243                                    if (node.getHeadEdge().hasLabel(deprelTable)) {
244                                            sb.append(node.getHeadEdge().getLabelSymbol(deprelTable));
245                                    } else {
246                                            sb.append(outputGraph.getDefaultRootEdgeLabelSymbol(deprelTable));
247                                    }
248                                    outputTokens[index-1] = sb.toString();
249                            }
250                    }
251                    return outputTokens;
252            }
253            
254            /**
255             * Terminates the parser model.
256             * 
257             * @throws MaltChainedException
258             */
259            public void terminateParserModel() throws MaltChainedException {
260                    // Runs the postprocess chart items of the "parse" flow chart
261                    if (flowChartInstance.hasPostProcessChartItems()) {
262                            flowChartInstance.postprocess();
263                    }
264                    
265                    // Terminate the flow chart with an option container
266                    engine.terminate(optionContainer);
267            }
268            
269            private void initialize() throws MaltChainedException {
270                    if (OptionManager.instance().getOptionDescriptions().getOptionGroupNameSet().size() > 0) {
271                            return; // OptionManager is already initialized
272                    }
273                    String maltpath = getMaltJarPath();
274                    if (maltpath == null) {
275                            throw new MaltChainedException("malt.jar could not be found. ");
276                    }
277                    urlMaltJar = Util.findURL(maltpath);
278                    try {
279                            OptionManager.instance().loadOptionDescriptionFile(new URL("jar:"+urlMaltJar.toString()+"!/appdata/options.xml"));
280                            
281                    } catch (MalformedURLException e) {
282                            throw new MaltChainedException("MaltParser couldn't find its options 'malt.jar!/appdata/options.xml'", e);
283                    }
284                    OptionManager.instance().generateMaps();
285            }
286            
287            
288            /**
289             * Returns the option container index
290             * 
291             * @return the option container index
292             */
293            public int getOptionContainer() {
294                    return optionContainer;
295            }
296    
297            private void setOptionContainer(int optionContainer) {
298                    this.optionContainer = optionContainer;
299            }
300    
301            /**
302             * Returns the path of malt.jar file
303             * 
304             * @return the path of malt.jar file
305             */
306            public static String getMaltJarPath() {
307                    if (SystemInfo.getMaltJarPath() != null) {
308                            return SystemInfo.getMaltJarPath().toString();
309                    }
310                    return null;
311            }
312            
313            
314    }