001    package org.maltparser;
002    
003    import java.net.MalformedURLException;
004    import java.net.URL;
005    import java.util.Iterator;
006    
007    import org.maltparser.core.exception.MaltChainedException;
008    import org.maltparser.core.flow.FlowChartInstance;
009    import org.maltparser.core.helper.SystemInfo;
010    import org.maltparser.core.helper.URLFinder;
011    import org.maltparser.core.io.dataformat.ColumnDescription;
012    import org.maltparser.core.io.dataformat.DataFormatInstance;
013    import org.maltparser.core.io.dataformat.DataFormatSpecification;
014    import org.maltparser.core.options.OptionManager;
015    import org.maltparser.core.symbol.SymbolTable;
016    import org.maltparser.core.symbol.SymbolTableHandler;
017    import org.maltparser.core.symbol.trie.TrieSymbolTableHandler;
018    import org.maltparser.core.syntaxgraph.DependencyGraph;
019    import org.maltparser.core.syntaxgraph.DependencyStructure;
020    import org.maltparser.core.syntaxgraph.edge.Edge;
021    import org.maltparser.core.syntaxgraph.node.DependencyNode;
022    import org.maltparser.parser.SingleMalt;
023    
024    /**
025     * The purpose of MaltParserService is to easily write third-party programs that uses MaltParser. 
026     * 
027     *  There are two ways to call the MaltParserService:
028     *  1. By running experiments, which allow other programs to train a parser model or parse with a parser model. IO-handling is done by MaltParser.
029     *  2. By first initialize a parser model and then call the method parse() with an array of tokens that MaltParser parses. IO-handling of the sentence is
030     *  done by the third-party program.
031     *  
032     *  How to use MaltParserService, please see the examples provided in the directory 'examples/apiexamples/srcex'
033     * 
034     * @author Johan Hall
035     */
036    public class MaltParserService {
037            private URL urlMaltJar;
038            private Engine engine;
039            private FlowChartInstance flowChartInstance;
040            private DataFormatInstance dataFormatInstance;
041            private SingleMalt singleMalt;
042            private int optionContainer;
043            private boolean initialized = false;
044            
045            /**
046             * Creates a MaltParserService with the option container 0
047             * 
048             * @throws MaltChainedException
049             */
050            public MaltParserService() throws MaltChainedException {
051                    this(0);
052            }
053            
054            /**
055             * Creates a MaltParserService with the specified option container. To use different option containers allows the calling program 
056             * to load several parser models or several experiments. The option management in MaltParser uses the singleton design pattern, which means that there can only
057             * be one instance of the option manager. To be able to have several parser models or experiments at same time please use different option containers.
058             * 
059             * @param optionContainer an integer from 0 to max value of data type Integer
060             * @throws MaltChainedException
061             */
062            public MaltParserService(int optionContainer) throws MaltChainedException {
063                    setOptionContainer(optionContainer);
064                    initialize();
065            }
066            
067            /**
068             * Use this constructor only when you want a MaltParserService without an option manager. Without the option manager MaltParser cannot
069             * load or create a parser model. 
070             * 
071             * @param optionFreeInitialization true, means that MaltParserService is created without an option manager, false will do the same as MaltParserService(). 
072             * @throws MaltChainedException
073             */
074            public MaltParserService(boolean optionFreeInitialization) throws MaltChainedException {
075                    if (optionFreeInitialization == false) {
076                            setOptionContainer(0);
077                            initialize();
078                    } else {
079                            setOptionContainer(-1);
080                    }
081            }
082            
083            /**
084             * Runs a MaltParser experiment. The experiment is controlled by a commandLine string, please see the documentation of MaltParser to see all available options.
085             * 
086             * @param commandLine a commandLine string that controls the MaltParser.
087             * @throws MaltChainedException
088             */
089            public void runExperiment(String commandLine) throws MaltChainedException {
090                    OptionManager.instance().parseCommandLine(commandLine, optionContainer);
091                    engine = new Engine();
092                    engine.initialize(optionContainer);
093                    engine.process(optionContainer);
094                    engine.terminate(optionContainer);
095            }
096            
097            /**
098             * Initialize a parser model that later can by used to parse sentences. MaltParser is controlled by a commandLine string, please see the documentation of MaltParser to see all available options.
099             * 
100             * @param commandLine a commandLine string that controls the MaltParser
101             * @throws MaltChainedException
102             */
103            public void initializeParserModel(String commandLine) throws MaltChainedException {
104                    if (optionContainer == -1) {
105                            throw new MaltChainedException("MaltParserService has been initialized as an option free initialization and therefore no parser model can be initialized.");
106                    }
107                    OptionManager.instance().parseCommandLine(commandLine, optionContainer);
108                    // Creates an engine
109                    engine = new Engine();
110                    // Initialize the engine with option container and gets a flow chart instance
111                    flowChartInstance = engine.initialize(optionContainer);
112                    // Runs the preprocess chart items of the "parse" flow chart
113                    if (flowChartInstance.hasPreProcessChartItems()) {
114                            flowChartInstance.preprocess();
115                    }
116                    singleMalt = (SingleMalt)flowChartInstance.getFlowChartRegistry(org.maltparser.parser.SingleMalt.class, "singlemalt");
117                    singleMalt.getConfigurationDir().initDataFormat();
118                    dataFormatInstance = singleMalt.getConfigurationDir().getDataFormatManager().getInputDataFormatSpec().createDataFormatInstance(
119                                    singleMalt.getSymbolTables(),
120                                    OptionManager.instance().getOptionValueString(optionContainer, "singlemalt", "null_value")); 
121                    initialized = true;
122            }
123            
124    
125            
126            /**
127             * Parses an array of tokens and returns a dependency structure. 
128             * 
129             * Note: To call this method requires that a parser model has been initialized by using the initializeParserModel(). 
130             * 
131             * @param tokens an array of tokens 
132             * @return a dependency structure
133             * @throws MaltChainedException
134             */
135            public DependencyStructure parse(String[] tokens) throws MaltChainedException {
136                    if (!initialized) {
137                            throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method.");
138                    }
139                    if (tokens == null || tokens.length == 0) {
140                            throw new MaltChainedException("Nothing to parse. ");
141                    }
142    
143                    DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables());
144                    
145                    for (int i = 0; i < tokens.length; i++) {
146                            Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
147                            DependencyNode node = outputGraph.addDependencyNode(i+1);
148                            String[] items = tokens[i].split("\t");
149                            for (int j = 0; j < items.length; j++) {
150                                    if (columns.hasNext()) {
151                                            ColumnDescription column = columns.next();
152                                            if (column.getCategory() == ColumnDescription.INPUT && node != null) {
153                                                    outputGraph.addLabel(node, column.getName(), items[j]);
154                                            }
155                                    }
156                            }
157                    }
158                    outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
159                    // Invoke parse with the output graph
160                    singleMalt.parse(outputGraph);
161                    return outputGraph;
162            }
163            
164            /**
165             * Converts an array of tokens to a dependency structure. 
166             * 
167             * Note that this method uses the same data format specification and symbol table as the parser engine. This can cause problem in multi-threaded 
168             * environment. 
169             * 
170             * Please use (in multi-threaded environment)
171             * toDependencyStructure(String[] tokens, DataFormatSpecification dataFormatSpecification)
172             * or
173             * toDependencyStructure(String[] tokens, String dataFormatFileName)
174             * 
175             * @param tokens an array of tokens
176             * @return a dependency structure
177             * @throws MaltChainedException
178             */
179            public DependencyStructure toDependencyStructure(String[] tokens) throws MaltChainedException {
180                    if (!initialized) {
181                            throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method.");
182                    }
183                    if (tokens == null || tokens.length == 0) {
184                            throw new MaltChainedException("Nothing to convert. ");
185                    }
186                    DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables());
187                    
188                    for (int i = 0; i < tokens.length; i++) {
189                            Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
190                            DependencyNode node = outputGraph.addDependencyNode(i+1);
191                            String[] items = tokens[i].split("\t");
192                            Edge edge = null;
193                            for (int j = 0; j < items.length; j++) {
194                                    if (columns.hasNext()) {
195                                            ColumnDescription column = columns.next();
196                                            if (column.getCategory() == ColumnDescription.INPUT && node != null) {
197                                                    outputGraph.addLabel(node, column.getName(), items[j]);
198                                            } else if (column.getCategory() == ColumnDescription.HEAD) {
199                                                    if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) {
200                                                            edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1);
201                                                    }
202                                            } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) {
203                                                    outputGraph.addLabel(edge, column.getName(), items[j]);
204                                            }
205                                    }
206                            }
207                    }
208                    outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
209                    return outputGraph;
210            }
211            
212            /**
213             * Reads the data format specification file
214             * 
215             * @param dataFormatFileName the path to the data format specification file
216             * @return a data format specification
217             * @throws MaltChainedException
218             */
219            public DataFormatSpecification readDataFormatSpecification(String dataFormatFileName) throws MaltChainedException {
220                    DataFormatSpecification dataFormat = new DataFormatSpecification();
221                    dataFormat.parseDataFormatXMLfile(dataFormatFileName);
222                    return dataFormat;
223            }
224            
225            /**
226             * Converts an array of tokens to a dependency structure
227             * 
228             * @param tokens tokens an array of tokens
229             * @param dataFormatSpecification a data format specification
230             * @return a dependency structure
231             * @throws MaltChainedException
232             */
233            public DependencyStructure toDependencyStructure(String[] tokens, DataFormatSpecification dataFormatSpecification) throws MaltChainedException {
234                    // Creates a symbol table handler
235                    SymbolTableHandler symbolTables = new TrieSymbolTableHandler(TrieSymbolTableHandler.ADD_NEW_TO_TRIE);
236                    
237                    // Initialize data format instance
238                    DataFormatInstance dataFormatInstance = dataFormatSpecification.createDataFormatInstance(symbolTables, "none");
239    
240                    // Creates a dependency graph
241                    if (tokens == null || tokens.length == 0) {
242                            throw new MaltChainedException("Nothing to convert. ");
243                    }
244                    DependencyStructure outputGraph = new DependencyGraph(symbolTables);
245                    
246                    for (int i = 0; i < tokens.length; i++) {
247                            Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
248                            DependencyNode node = outputGraph.addDependencyNode(i+1);
249                            String[] items = tokens[i].split("\t");
250                            Edge edge = null;
251                            for (int j = 0; j < items.length; j++) {
252                                    if (columns.hasNext()) {
253                                            ColumnDescription column = columns.next();
254                                            if (column.getCategory() == ColumnDescription.INPUT && node != null) {
255                                                    outputGraph.addLabel(node, column.getName(), items[j]);
256                                            } else if (column.getCategory() == ColumnDescription.HEAD) {
257                                                    if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) {
258                                                            edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1);
259                                                    }
260                                            } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) {
261                                                    outputGraph.addLabel(edge, column.getName(), items[j]);
262                                            }
263                                    }
264                            }
265                    }
266                    outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
267                    return outputGraph;
268            }
269            
270            /**
271             * Converts an array of tokens to a dependency structure
272             * 
273             * @param tokens an array of tokens
274             * @param dataFormatFileName the path to the data format file
275             * @return a dependency structure
276             * @throws MaltChainedException
277             */
278            public DependencyStructure toDependencyStructure(String[] tokens, String dataFormatFileName) throws MaltChainedException {
279                    return toDependencyStructure(tokens, readDataFormatSpecification(dataFormatFileName));
280            }
281            
282            /**
283             * Same as parse(String[] tokens), but instead it returns an array of tokens with a head index and a dependency type at the end of string
284             * 
285             * @param tokens an array of tokens to parse
286             * @return an array of tokens with a head index and a dependency type at the end of string
287             * @throws MaltChainedException
288             */
289            public String[] parseTokens(String[] tokens) throws MaltChainedException {
290                    DependencyStructure outputGraph = parse(tokens);
291                    StringBuilder sb = new StringBuilder();
292                    String[] outputTokens = new String[tokens.length];
293                    SymbolTable deprelTable = outputGraph.getSymbolTables().getSymbolTable("DEPREL");
294                    for (Integer index : outputGraph.getTokenIndices()) {
295                            sb.setLength(0);
296                            if (index <= tokens.length) {
297                                    DependencyNode node = outputGraph.getDependencyNode(index);
298                                    sb.append(tokens[index -1]);
299                                    sb.append('\t');
300                                    sb.append(node.getHead().getIndex());
301                                    sb.append('\t');
302                                    if (node.getHeadEdge().hasLabel(deprelTable)) {
303                                            sb.append(node.getHeadEdge().getLabelSymbol(deprelTable));
304                                    } else {
305                                            sb.append(outputGraph.getDefaultRootEdgeLabelSymbol(deprelTable));
306                                    }
307                                    outputTokens[index-1] = sb.toString();
308                            }
309                    }
310                    return outputTokens;
311            }
312            
313            /**
314             * Terminates the parser model.
315             * 
316             * @throws MaltChainedException
317             */
318            public void terminateParserModel() throws MaltChainedException {
319                    if (!initialized) {
320                            throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method.");
321                    }
322                    // Runs the postprocess chart items of the "parse" flow chart
323                    if (flowChartInstance.hasPostProcessChartItems()) {
324                            flowChartInstance.postprocess();
325                    }
326                    
327                    // Terminate the flow chart with an option container
328                    engine.terminate(optionContainer);
329            }
330            
331            private void initialize() throws MaltChainedException {
332                    if (OptionManager.instance().getOptionDescriptions().getOptionGroupNameSet().size() > 0) {
333                            return; // OptionManager is already initialized
334                    }
335                    String maltpath = getMaltJarPath();
336                    if (maltpath == null) {
337                            throw new MaltChainedException("malt.jar could not be found. ");
338                    }
339                    final URLFinder f = new URLFinder();
340                    urlMaltJar = f.findURL(maltpath);
341                    try {
342                            OptionManager.instance().loadOptionDescriptionFile(new URL("jar:"+urlMaltJar.toString()+"!/appdata/options.xml"));
343                            
344                    } catch (MalformedURLException e) {
345                            throw new MaltChainedException("MaltParser couldn't find its options 'malt.jar!/appdata/options.xml'", e);
346                    }
347                    OptionManager.instance().generateMaps();
348            }
349            
350            
351            /**
352             * Returns the option container index
353             * 
354             * @return the option container index
355             */
356            public int getOptionContainer() {
357                    return optionContainer;
358            }
359    
360            private void setOptionContainer(int optionContainer) {
361                    this.optionContainer = optionContainer;
362            }
363    
364            /**
365             * Returns the path of malt.jar file
366             * 
367             * @return the path of malt.jar file
368             */
369            public static String getMaltJarPath() {
370                    if (SystemInfo.getMaltJarPath() != null) {
371                            return SystemInfo.getMaltJarPath().toString();
372                    }
373                    return null;
374            }
375            
376            
377    }