001 package org.maltparser;
002
003 import java.net.MalformedURLException;
004 import java.net.URL;
005 import java.util.Iterator;
006
007 import org.maltparser.core.exception.MaltChainedException;
008 import org.maltparser.core.flow.FlowChartInstance;
009 import org.maltparser.core.helper.SystemInfo;
010 import org.maltparser.core.helper.URLFinder;
011 import org.maltparser.core.io.dataformat.ColumnDescription;
012 import org.maltparser.core.io.dataformat.DataFormatInstance;
013 import org.maltparser.core.io.dataformat.DataFormatSpecification;
014 import org.maltparser.core.options.OptionManager;
015 import org.maltparser.core.symbol.SymbolTable;
016 import org.maltparser.core.symbol.SymbolTableHandler;
017 import org.maltparser.core.symbol.trie.TrieSymbolTableHandler;
018 import org.maltparser.core.syntaxgraph.DependencyGraph;
019 import org.maltparser.core.syntaxgraph.DependencyStructure;
020 import org.maltparser.core.syntaxgraph.edge.Edge;
021 import org.maltparser.core.syntaxgraph.node.DependencyNode;
022 import org.maltparser.parser.SingleMalt;
023
024 /**
025 * The purpose of MaltParserService is to easily write third-party programs that uses MaltParser.
026 *
027 * There are two ways to call the MaltParserService:
028 * 1. By running experiments, which allow other programs to train a parser model or parse with a parser model. IO-handling is done by MaltParser.
029 * 2. By first initialize a parser model and then call the method parse() with an array of tokens that MaltParser parses. IO-handling of the sentence is
030 * done by the third-party program.
031 *
032 * How to use MaltParserService, please see the examples provided in the directory 'examples/apiexamples/srcex'
033 *
034 * @author Johan Hall
035 */
036 public class MaltParserService {
037 private URL urlMaltJar;
038 private Engine engine;
039 private FlowChartInstance flowChartInstance;
040 private DataFormatInstance dataFormatInstance;
041 private SingleMalt singleMalt;
042 private int optionContainer;
043 private boolean initialized = false;
044
045 /**
046 * Creates a MaltParserService with the option container 0
047 *
048 * @throws MaltChainedException
049 */
050 public MaltParserService() throws MaltChainedException {
051 this(0);
052 }
053
054 /**
055 * Creates a MaltParserService with the specified option container. To use different option containers allows the calling program
056 * to load several parser models or several experiments. The option management in MaltParser uses the singleton design pattern, which means that there can only
057 * be one instance of the option manager. To be able to have several parser models or experiments at same time please use different option containers.
058 *
059 * @param optionContainer an integer from 0 to max value of data type Integer
060 * @throws MaltChainedException
061 */
062 public MaltParserService(int optionContainer) throws MaltChainedException {
063 initialize();
064 setOptionContainer(optionContainer);
065 }
066
067 /**
068 * Runs a MaltParser experiment. The experiment is controlled by a commandLine string, please see the documentation of MaltParser to see all available options.
069 *
070 * @param commandLine a commandLine string that controls the MaltParser.
071 * @throws MaltChainedException
072 */
073 public void runExperiment(String commandLine) throws MaltChainedException {
074 OptionManager.instance().parseCommandLine(commandLine, optionContainer);
075 engine = new Engine();
076 engine.initialize(optionContainer);
077 engine.process(optionContainer);
078 engine.terminate(optionContainer);
079 }
080
081 /**
082 * Initialize a parser model that later can by used to parse sentences. MaltParser is controlled by a commandLine string, please see the documentation of MaltParser to see all available options.
083 *
084 * @param commandLine a commandLine string that controls the MaltParser
085 * @throws MaltChainedException
086 */
087 public void initializeParserModel(String commandLine) throws MaltChainedException {
088 OptionManager.instance().parseCommandLine(commandLine, optionContainer);
089 // Creates an engine
090 engine = new Engine();
091 // Initialize the engine with option container and gets a flow chart instance
092 flowChartInstance = engine.initialize(optionContainer);
093 // Runs the preprocess chart items of the "parse" flow chart
094 if (flowChartInstance.hasPreProcessChartItems()) {
095 flowChartInstance.preprocess();
096 }
097 singleMalt = (SingleMalt)flowChartInstance.getFlowChartRegistry(org.maltparser.parser.SingleMalt.class, "singlemalt");
098 singleMalt.getConfigurationDir().initDataFormat();
099 dataFormatInstance = singleMalt.getConfigurationDir().getDataFormatManager().getInputDataFormatSpec().createDataFormatInstance(
100 singleMalt.getSymbolTables(),
101 OptionManager.instance().getOptionValueString(optionContainer, "singlemalt", "null_value")); //,
102 // OptionManager.instance().getOptionValueString(optionContainer, "graph", "root_label"));
103 initialized = true;
104 }
105
106
107
108 /**
109 * Parses an array of tokens and returns a dependency structure.
110 *
111 * Note: To call this method requires that a parser model has been initialized by using the initializeParserModel().
112 *
113 * @param tokens an array of tokens
114 * @return a dependency structure
115 * @throws MaltChainedException
116 */
117 public DependencyStructure parse(String[] tokens) throws MaltChainedException {
118 if (!initialized) {
119 throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method.");
120 }
121 if (tokens == null || tokens.length == 0) {
122 throw new MaltChainedException("Nothing to parse. ");
123 }
124
125 DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables());
126
127 for (int i = 0; i < tokens.length; i++) {
128 Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
129 DependencyNode node = outputGraph.addDependencyNode(i+1);
130 String[] items = tokens[i].split("\t");
131 for (int j = 0; j < items.length; j++) {
132 if (columns.hasNext()) {
133 ColumnDescription column = columns.next();
134 if (column.getCategory() == ColumnDescription.INPUT && node != null) {
135 outputGraph.addLabel(node, column.getName(), items[j]);
136 }
137 }
138 }
139 }
140 outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
141 // Invoke parse with the output graph
142 singleMalt.parse(outputGraph);
143 return outputGraph;
144 }
145
146 /**
147 * Converts an array of tokens to a dependency structure
148 *
149 * @param tokens an array of tokens
150 * @return a dependency structure
151 * @throws MaltChainedException
152 */
153 public DependencyStructure toDependencyStructure(String[] tokens) throws MaltChainedException {
154 if (tokens == null || tokens.length == 0) {
155 throw new MaltChainedException("Nothing to convert. ");
156 }
157 DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables());
158
159 for (int i = 0; i < tokens.length; i++) {
160 Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
161 DependencyNode node = outputGraph.addDependencyNode(i+1);
162 String[] items = tokens[i].split("\t");
163 Edge edge = null;
164 for (int j = 0; j < items.length; j++) {
165 if (columns.hasNext()) {
166 ColumnDescription column = columns.next();
167 if (column.getCategory() == ColumnDescription.INPUT && node != null) {
168 outputGraph.addLabel(node, column.getName(), items[j]);
169 } else if (column.getCategory() == ColumnDescription.HEAD) {
170 if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) {
171 edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1);
172 }
173 } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) {
174 outputGraph.addLabel(edge, column.getName(), items[j]);
175 }
176 }
177 }
178 }
179 outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
180 return outputGraph;
181 }
182
183 public DependencyStructure toDependencyStructure(String[] tokens, String dataFormatFileName) throws MaltChainedException {
184 // Creates a symbol table handler
185 SymbolTableHandler symbolTables = new TrieSymbolTableHandler(TrieSymbolTableHandler.ADD_NEW_TO_TRIE);
186
187 // Initialize data format instance of the CoNLL data format from conllx.xml (conllx.xml located in same directory)
188 DataFormatSpecification dataFormat = new DataFormatSpecification();
189 dataFormat.parseDataFormatXMLfile(dataFormatFileName);
190 DataFormatInstance dataFormatInstance = dataFormat.createDataFormatInstance(symbolTables, "none");
191
192 // Creates a dependency graph
193 if (tokens == null || tokens.length == 0) {
194 throw new MaltChainedException("Nothing to convert. ");
195 }
196 DependencyStructure outputGraph = new DependencyGraph(symbolTables);
197
198 for (int i = 0; i < tokens.length; i++) {
199 Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
200 DependencyNode node = outputGraph.addDependencyNode(i+1);
201 String[] items = tokens[i].split("\t");
202 Edge edge = null;
203 for (int j = 0; j < items.length; j++) {
204 if (columns.hasNext()) {
205 ColumnDescription column = columns.next();
206 if (column.getCategory() == ColumnDescription.INPUT && node != null) {
207 outputGraph.addLabel(node, column.getName(), items[j]);
208 } else if (column.getCategory() == ColumnDescription.HEAD) {
209 if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) {
210 edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1);
211 }
212 } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) {
213 outputGraph.addLabel(edge, column.getName(), items[j]);
214 }
215 }
216 }
217 }
218 outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
219 return outputGraph;
220 }
221
222 /**
223 * Same as parse(String[] tokens), but instead it returns an array of tokens with a head index and a dependency type at the end of string
224 *
225 * @param tokens an array of tokens to parse
226 * @return an array of tokens with a head index and a dependency type at the end of string
227 * @throws MaltChainedException
228 */
229 public String[] parseTokens(String[] tokens) throws MaltChainedException {
230 DependencyStructure outputGraph = parse(tokens);
231 StringBuilder sb = new StringBuilder();
232 String[] outputTokens = new String[tokens.length];
233 SymbolTable deprelTable = outputGraph.getSymbolTables().getSymbolTable("DEPREL");
234 for (Integer index : outputGraph.getTokenIndices()) {
235 sb.setLength(0);
236 if (index <= tokens.length) {
237 DependencyNode node = outputGraph.getDependencyNode(index);
238 sb.append(tokens[index -1]);
239 sb.append('\t');
240 sb.append(node.getHead().getIndex());
241 sb.append('\t');
242 if (node.getHeadEdge().hasLabel(deprelTable)) {
243 sb.append(node.getHeadEdge().getLabelSymbol(deprelTable));
244 } else {
245 sb.append(outputGraph.getDefaultRootEdgeLabelSymbol(deprelTable));
246 }
247 outputTokens[index-1] = sb.toString();
248 }
249 }
250 return outputTokens;
251 }
252
253 /**
254 * Terminates the parser model.
255 *
256 * @throws MaltChainedException
257 */
258 public void terminateParserModel() throws MaltChainedException {
259 // Runs the postprocess chart items of the "parse" flow chart
260 if (flowChartInstance.hasPostProcessChartItems()) {
261 flowChartInstance.postprocess();
262 }
263
264 // Terminate the flow chart with an option container
265 engine.terminate(optionContainer);
266 }
267
268 private void initialize() throws MaltChainedException {
269 if (OptionManager.instance().getOptionDescriptions().getOptionGroupNameSet().size() > 0) {
270 return; // OptionManager is already initialized
271 }
272 String maltpath = getMaltJarPath();
273 if (maltpath == null) {
274 throw new MaltChainedException("malt.jar could not be found. ");
275 }
276 final URLFinder f = new URLFinder();
277 urlMaltJar = f.findURL(maltpath);
278 try {
279 OptionManager.instance().loadOptionDescriptionFile(new URL("jar:"+urlMaltJar.toString()+"!/appdata/options.xml"));
280
281 } catch (MalformedURLException e) {
282 throw new MaltChainedException("MaltParser couldn't find its options 'malt.jar!/appdata/options.xml'", e);
283 }
284 OptionManager.instance().generateMaps();
285 }
286
287
288 /**
289 * Returns the option container index
290 *
291 * @return the option container index
292 */
293 public int getOptionContainer() {
294 return optionContainer;
295 }
296
297 private void setOptionContainer(int optionContainer) {
298 this.optionContainer = optionContainer;
299 }
300
301 /**
302 * Returns the path of malt.jar file
303 *
304 * @return the path of malt.jar file
305 */
306 public static String getMaltJarPath() {
307 if (SystemInfo.getMaltJarPath() != null) {
308 return SystemInfo.getMaltJarPath().toString();
309 }
310 return null;
311 }
312
313
314 }