001    package org.maltparser;
002    
003    import java.net.MalformedURLException;
004    import java.net.URL;
005    import java.util.Iterator;
006    
007    import org.maltparser.core.exception.MaltChainedException;
008    import org.maltparser.core.flow.FlowChartInstance;
009    import org.maltparser.core.helper.SystemInfo;
010    import org.maltparser.core.helper.URLFinder;
011    import org.maltparser.core.io.dataformat.ColumnDescription;
012    import org.maltparser.core.io.dataformat.DataFormatInstance;
013    import org.maltparser.core.io.dataformat.DataFormatSpecification;
014    import org.maltparser.core.options.OptionManager;
015    import org.maltparser.core.symbol.SymbolTable;
016    import org.maltparser.core.symbol.SymbolTableHandler;
017    import org.maltparser.core.symbol.trie.TrieSymbolTableHandler;
018    import org.maltparser.core.syntaxgraph.DependencyGraph;
019    import org.maltparser.core.syntaxgraph.DependencyStructure;
020    import org.maltparser.core.syntaxgraph.edge.Edge;
021    import org.maltparser.core.syntaxgraph.node.DependencyNode;
022    import org.maltparser.parser.SingleMalt;
023    
024    /**
025     * The purpose of MaltParserService is to easily write third-party programs that uses MaltParser. 
026     * 
027     *  There are two ways to call the MaltParserService:
028     *  1. By running experiments, which allow other programs to train a parser model or parse with a parser model. IO-handling is done by MaltParser.
029     *  2. By first initialize a parser model and then call the method parse() with an array of tokens that MaltParser parses. IO-handling of the sentence is
030     *  done by the third-party program.
031     *  
032     *  How to use MaltParserService, please see the examples provided in the directory 'examples/apiexamples/srcex'
033     * 
034     * @author Johan Hall
035     */
036    public class MaltParserService {
037            private URL urlMaltJar;
038            private Engine engine;
039            private FlowChartInstance flowChartInstance;
040            private DataFormatInstance dataFormatInstance;
041            private SingleMalt singleMalt;
042            private int optionContainer;
043            private boolean initialized = false;
044            
045            /**
046             * Creates a MaltParserService with the option container 0
047             * 
048             * @throws MaltChainedException
049             */
050            public MaltParserService() throws MaltChainedException {
051                    this(0);
052            }
053            
054            /**
055             * Creates a MaltParserService with the specified option container. To use different option containers allows the calling program 
056             * to load several parser models or several experiments. The option management in MaltParser uses the singleton design pattern, which means that there can only
057             * be one instance of the option manager. To be able to have several parser models or experiments at same time please use different option containers.
058             * 
059             * @param optionContainer an integer from 0 to max value of data type Integer
060             * @throws MaltChainedException
061             */
062            public MaltParserService(int optionContainer) throws MaltChainedException {
063                    initialize();
064                    setOptionContainer(optionContainer);
065            }
066            
067            /**
068             * Runs a MaltParser experiment. The experiment is controlled by a commandLine string, please see the documentation of MaltParser to see all available options.
069             * 
070             * @param commandLine a commandLine string that controls the MaltParser.
071             * @throws MaltChainedException
072             */
073            public void runExperiment(String commandLine) throws MaltChainedException {
074                    OptionManager.instance().parseCommandLine(commandLine, optionContainer);
075                    engine = new Engine();
076                    engine.initialize(optionContainer);
077                    engine.process(optionContainer);
078                    engine.terminate(optionContainer);
079            }
080            
081            /**
082             * Initialize a parser model that later can by used to parse sentences. MaltParser is controlled by a commandLine string, please see the documentation of MaltParser to see all available options.
083             * 
084             * @param commandLine a commandLine string that controls the MaltParser
085             * @throws MaltChainedException
086             */
087            public void initializeParserModel(String commandLine) throws MaltChainedException {
088                    OptionManager.instance().parseCommandLine(commandLine, optionContainer);
089                    // Creates an engine
090                    engine = new Engine();
091                    // Initialize the engine with option container and gets a flow chart instance
092                    flowChartInstance = engine.initialize(optionContainer);
093                    // Runs the preprocess chart items of the "parse" flow chart
094                    if (flowChartInstance.hasPreProcessChartItems()) {
095                            flowChartInstance.preprocess();
096                    }
097                    singleMalt = (SingleMalt)flowChartInstance.getFlowChartRegistry(org.maltparser.parser.SingleMalt.class, "singlemalt");
098                    singleMalt.getConfigurationDir().initDataFormat();
099                    dataFormatInstance = singleMalt.getConfigurationDir().getDataFormatManager().getInputDataFormatSpec().createDataFormatInstance(
100                                    singleMalt.getSymbolTables(),
101                                    OptionManager.instance().getOptionValueString(optionContainer, "singlemalt", "null_value")); //, 
102    //                              OptionManager.instance().getOptionValueString(optionContainer, "graph", "root_label"));
103                    initialized = true;
104            }
105            
106    
107            
108            /**
109             * Parses an array of tokens and returns a dependency structure. 
110             * 
111             * Note: To call this method requires that a parser model has been initialized by using the initializeParserModel(). 
112             * 
113             * @param tokens an array of tokens 
114             * @return a dependency structure
115             * @throws MaltChainedException
116             */
117            public DependencyStructure parse(String[] tokens) throws MaltChainedException {
118                    if (!initialized) {
119                            throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method.");
120                    }
121                    if (tokens == null || tokens.length == 0) {
122                            throw new MaltChainedException("Nothing to parse. ");
123                    }
124    
125                    DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables());
126                    
127                    for (int i = 0; i < tokens.length; i++) {
128                            Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
129                            DependencyNode node = outputGraph.addDependencyNode(i+1);
130                            String[] items = tokens[i].split("\t");
131                            for (int j = 0; j < items.length; j++) {
132                                    if (columns.hasNext()) {
133                                            ColumnDescription column = columns.next();
134                                            if (column.getCategory() == ColumnDescription.INPUT && node != null) {
135                                                    outputGraph.addLabel(node, column.getName(), items[j]);
136                                            }
137                                    }
138                            }
139                    }
140                    outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
141                    // Invoke parse with the output graph
142                    singleMalt.parse(outputGraph);
143                    return outputGraph;
144            }
145            
146            /**
147             * Converts an array of tokens to a dependency structure
148             * 
149             * @param tokens an array of tokens
150             * @return a dependency structure
151             * @throws MaltChainedException
152             */
153            public DependencyStructure toDependencyStructure(String[] tokens) throws MaltChainedException {
154                    if (tokens == null || tokens.length == 0) {
155                            throw new MaltChainedException("Nothing to convert. ");
156                    }
157                    DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables());
158                    
159                    for (int i = 0; i < tokens.length; i++) {
160                            Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
161                            DependencyNode node = outputGraph.addDependencyNode(i+1);
162                            String[] items = tokens[i].split("\t");
163                            Edge edge = null;
164                            for (int j = 0; j < items.length; j++) {
165                                    if (columns.hasNext()) {
166                                            ColumnDescription column = columns.next();
167                                            if (column.getCategory() == ColumnDescription.INPUT && node != null) {
168                                                    outputGraph.addLabel(node, column.getName(), items[j]);
169                                            } else if (column.getCategory() == ColumnDescription.HEAD) {
170                                                    if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) {
171                                                            edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1);
172                                                    }
173                                            } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) {
174                                                    outputGraph.addLabel(edge, column.getName(), items[j]);
175                                            }
176                                    }
177                            }
178                    }
179                    outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
180                    return outputGraph;
181            }
182            
183            public  DependencyStructure toDependencyStructure(String[] tokens, String dataFormatFileName) throws MaltChainedException {
184                    // Creates a symbol table handler
185                    SymbolTableHandler symbolTables = new TrieSymbolTableHandler();
186                    
187                    // Initialize data format instance of the CoNLL data format from conllx.xml (conllx.xml located in same directory)
188                    DataFormatSpecification dataFormat = new DataFormatSpecification();
189                    dataFormat.parseDataFormatXMLfile(dataFormatFileName);
190                    DataFormatInstance dataFormatInstance = dataFormat.createDataFormatInstance(symbolTables, "none");
191    
192                    // Creates a dependency graph
193                    if (tokens == null || tokens.length == 0) {
194                            throw new MaltChainedException("Nothing to convert. ");
195                    }
196                    DependencyStructure outputGraph = new DependencyGraph(symbolTables);
197                    
198                    for (int i = 0; i < tokens.length; i++) {
199                            Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
200                            DependencyNode node = outputGraph.addDependencyNode(i+1);
201                            String[] items = tokens[i].split("\t");
202                            Edge edge = null;
203                            for (int j = 0; j < items.length; j++) {
204                                    if (columns.hasNext()) {
205                                            ColumnDescription column = columns.next();
206                                            if (column.getCategory() == ColumnDescription.INPUT && node != null) {
207                                                    outputGraph.addLabel(node, column.getName(), items[j]);
208                                            } else if (column.getCategory() == ColumnDescription.HEAD) {
209                                                    if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) {
210                                                            edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1);
211                                                    }
212                                            } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) {
213                                                    outputGraph.addLabel(edge, column.getName(), items[j]);
214                                            }
215                                    }
216                            }
217                    }
218                    outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
219                    return outputGraph;
220            }
221            
222            /**
223             * Same as parse(String[] tokens), but instead it returns an array of tokens with a head index and a dependency type at the end of string
224             * 
225             * @param tokens an array of tokens to parse
226             * @return an array of tokens with a head index and a dependency type at the end of string
227             * @throws MaltChainedException
228             */
229            public String[] parseTokens(String[] tokens) throws MaltChainedException {
230                    DependencyStructure outputGraph = parse(tokens);
231                    StringBuilder sb = new StringBuilder();
232                    String[] outputTokens = new String[tokens.length];
233                    SymbolTable deprelTable = outputGraph.getSymbolTables().getSymbolTable("DEPREL");
234                    for (Integer index : outputGraph.getTokenIndices()) {
235                            sb.setLength(0);
236                            if (index <= tokens.length) {
237                                    DependencyNode node = outputGraph.getDependencyNode(index);
238                                    sb.append(tokens[index -1]);
239                                    sb.append('\t');
240                                    sb.append(node.getHead().getIndex());
241                                    sb.append('\t');
242                                    if (node.getHeadEdge().hasLabel(deprelTable)) {
243                                            sb.append(node.getHeadEdge().getLabelSymbol(deprelTable));
244                                    } else {
245                                            sb.append(outputGraph.getDefaultRootEdgeLabelSymbol(deprelTable));
246                                    }
247                                    outputTokens[index-1] = sb.toString();
248                            }
249                    }
250                    return outputTokens;
251            }
252            
253            /**
254             * Terminates the parser model.
255             * 
256             * @throws MaltChainedException
257             */
258            public void terminateParserModel() throws MaltChainedException {
259                    // Runs the postprocess chart items of the "parse" flow chart
260                    if (flowChartInstance.hasPostProcessChartItems()) {
261                            flowChartInstance.postprocess();
262                    }
263                    
264                    // Terminate the flow chart with an option container
265                    engine.terminate(optionContainer);
266            }
267            
268            private void initialize() throws MaltChainedException {
269                    if (OptionManager.instance().getOptionDescriptions().getOptionGroupNameSet().size() > 0) {
270                            return; // OptionManager is already initialized
271                    }
272                    String maltpath = getMaltJarPath();
273                    if (maltpath == null) {
274                            throw new MaltChainedException("malt.jar could not be found. ");
275                    }
276                    final URLFinder f = new URLFinder();
277                    urlMaltJar = f.findURL(maltpath);
278                    try {
279                            OptionManager.instance().loadOptionDescriptionFile(new URL("jar:"+urlMaltJar.toString()+"!/appdata/options.xml"));
280                            
281                    } catch (MalformedURLException e) {
282                            throw new MaltChainedException("MaltParser couldn't find its options 'malt.jar!/appdata/options.xml'", e);
283                    }
284                    OptionManager.instance().generateMaps();
285            }
286            
287            
288            /**
289             * Returns the option container index
290             * 
291             * @return the option container index
292             */
293            public int getOptionContainer() {
294                    return optionContainer;
295            }
296    
297            private void setOptionContainer(int optionContainer) {
298                    this.optionContainer = optionContainer;
299            }
300    
301            /**
302             * Returns the path of malt.jar file
303             * 
304             * @return the path of malt.jar file
305             */
306            public static String getMaltJarPath() {
307                    if (SystemInfo.getMaltJarPath() != null) {
308                            return SystemInfo.getMaltJarPath().toString();
309                    }
310                    return null;
311            }
312            
313            
314    }