001 package org.maltparser;
002
003 import java.net.MalformedURLException;
004 import java.net.URL;
005 import java.util.Iterator;
006
007 import org.maltparser.core.exception.MaltChainedException;
008 import org.maltparser.core.flow.FlowChartInstance;
009 import org.maltparser.core.helper.SystemInfo;
010 import org.maltparser.core.helper.Util;
011 import org.maltparser.core.io.dataformat.ColumnDescription;
012 import org.maltparser.core.io.dataformat.DataFormatException;
013 import org.maltparser.core.io.dataformat.DataFormatInstance;
014 import org.maltparser.core.options.OptionManager;
015 import org.maltparser.core.symbol.SymbolTable;
016 import org.maltparser.core.syntaxgraph.DependencyGraph;
017 import org.maltparser.core.syntaxgraph.DependencyStructure;
018 import org.maltparser.core.syntaxgraph.edge.Edge;
019 import org.maltparser.core.syntaxgraph.node.DependencyNode;
020 import org.maltparser.parser.SingleMalt;
021
022 /**
023 * The purpose of MaltParserService is to easily write third-party programs that uses MaltParser.
024 *
025 * There are two ways to call the MaltParserService:
026 * 1. By running experiments, which allow other programs to train a parser model or parse with a parser model. IO-handling is done by MaltParser.
027 * 2. By first initialize a parser model and then call the method parse() with an array of tokens that MaltParser parses. IO-handling of the sentence is
028 * done by the third-party program.
029 *
030 * How to use MaltParserService, please see the examples provided in the directory 'examples/apiexamples/srcex'
031 *
032 * @author Johan Hall
033 */
034 public class MaltParserService {
035 private URL urlMaltJar;
036 private Engine engine;
037 private FlowChartInstance flowChartInstance;
038 private DataFormatInstance dataFormatInstance;
039 private SingleMalt singleMalt;
040 private int optionContainer;
041 private boolean initialized = false;
042
043 /**
044 * Creates a MaltParserService with the option container 0
045 *
046 * @throws MaltChainedException
047 */
048 public MaltParserService() throws MaltChainedException {
049 this(0);
050 }
051
052 /**
053 * Creates a MaltParserService with the specified option container. To use different option containers allows the calling program
054 * to load several parser models or several experiments. The option management in MaltParser uses the singleton design pattern, which means that there can only
055 * be one instance of the option manager. To be able to have several parser models or experiments at same time please use different option containers.
056 *
057 * @param optionContainer an integer from 0 to max value of data type Integer
058 * @throws MaltChainedException
059 */
060 public MaltParserService(int optionContainer) throws MaltChainedException {
061 initialize();
062 setOptionContainer(optionContainer);
063 }
064
065 /**
066 * Runs a MaltParser experiment. The experiment is controlled by a commandLine string, please see the documentation of MaltParser to see all available options.
067 *
068 * @param commandLine a commandLine string that controls the MaltParser.
069 * @throws MaltChainedException
070 */
071 public void runExperiment(String commandLine) throws MaltChainedException {
072 OptionManager.instance().parseCommandLine(commandLine, optionContainer);
073 engine = new Engine();
074 engine.initialize(optionContainer);
075 engine.process(optionContainer);
076 engine.terminate(optionContainer);
077 }
078
079 /**
080 * Initialize a parser model that later can by used to parse sentences. MaltParser is controlled by a commandLine string, please see the documentation of MaltParser to see all available options.
081 *
082 * @param commandLine a commandLine string that controls the MaltParser
083 * @throws MaltChainedException
084 */
085 public void initializeParserModel(String commandLine) throws MaltChainedException {
086 OptionManager.instance().parseCommandLine(commandLine, optionContainer);
087 // Creates an engine
088 engine = new Engine();
089 // Initialize the engine with option container and gets a flow chart instance
090 flowChartInstance = engine.initialize(optionContainer);
091 // Runs the preprocess chart items of the "parse" flow chart
092 if (flowChartInstance.hasPreProcessChartItems()) {
093 flowChartInstance.preprocess();
094 }
095 singleMalt = (SingleMalt)flowChartInstance.getFlowChartRegistry(org.maltparser.parser.SingleMalt.class, "singlemalt");
096 singleMalt.getConfigurationDir().initDataFormat();
097 dataFormatInstance = singleMalt.getConfigurationDir().getDataFormatManager().getInputDataFormatSpec().createDataFormatInstance(
098 singleMalt.getSymbolTables(),
099 OptionManager.instance().getOptionValueString(optionContainer, "singlemalt", "null_value")); //,
100 // OptionManager.instance().getOptionValueString(optionContainer, "graph", "root_label"));
101 initialized = true;
102 }
103
104 /**
105 * Parses an array of tokens and returns a dependency structure.
106 *
107 * Note: To call this method requires that a parser model has been initialized by using the initializeParserModel().
108 *
109 * @param tokens an array of tokens
110 * @return a dependency structure
111 * @throws MaltChainedException
112 */
113 public DependencyStructure parse(String[] tokens) throws MaltChainedException {
114 if (!initialized) {
115 throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method.");
116 }
117 if (tokens == null || tokens.length == 0) {
118 throw new MaltChainedException("Nothing to parse. ");
119 }
120
121 DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables());
122
123 for (int i = 0; i < tokens.length; i++) {
124 Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
125 DependencyNode node = outputGraph.addDependencyNode(i+1);
126 String[] items = tokens[i].split("\t");
127 for (int j = 0; j < items.length; j++) {
128 if (columns.hasNext()) {
129 ColumnDescription column = columns.next();
130 if (column.getCategory() == ColumnDescription.INPUT && node != null) {
131 outputGraph.addLabel(node, column.getName(), items[j]);
132 }
133 }
134 }
135 }
136 outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
137 // Invoke parse with the output graph
138 singleMalt.parse(outputGraph);
139 return outputGraph;
140 }
141
142 /**
143 * Converts an array of tokens to a dependency structure
144 *
145 * @param tokens an array of tokens
146 * @return a dependency structure
147 * @throws MaltChainedException
148 */
149 public DependencyStructure toDependencyStructure(String[] tokens) throws MaltChainedException {
150 if (tokens == null || tokens.length == 0) {
151 throw new MaltChainedException("Nothing to convert. ");
152 }
153 DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables());
154
155 for (int i = 0; i < tokens.length; i++) {
156 Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
157 DependencyNode node = outputGraph.addDependencyNode(i+1);
158 String[] items = tokens[i].split("\t");
159 Edge edge = null;
160 for (int j = 0; j < items.length; j++) {
161 if (columns.hasNext()) {
162 ColumnDescription column = columns.next();
163 if (column.getCategory() == ColumnDescription.INPUT && node != null) {
164 outputGraph.addLabel(node, column.getName(), items[j]);
165 } else if (column.getCategory() == ColumnDescription.HEAD) {
166 if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) {
167 edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1);
168 }
169 } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) {
170 outputGraph.addLabel(edge, column.getName(), items[j]);
171 }
172 }
173 }
174 }
175 outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
176 return outputGraph;
177 }
178
179 /**
180 * Same as parse(String[] tokens), but instead it returns an array of tokens with a head index and a dependency type at the end of string
181 *
182 * @param tokens an array of tokens to parse
183 * @return an array of tokens with a head index and a dependency type at the end of string
184 * @throws MaltChainedException
185 */
186 public String[] parseTokens(String[] tokens) throws MaltChainedException {
187 DependencyStructure outputGraph = parse(tokens);
188 StringBuilder sb = new StringBuilder();
189 String[] outputTokens = new String[tokens.length];
190 SymbolTable deprelTable = outputGraph.getSymbolTables().getSymbolTable("DEPREL");
191 for (Integer index : outputGraph.getTokenIndices()) {
192 sb.setLength(0);
193 if (index <= tokens.length) {
194 DependencyNode node = outputGraph.getDependencyNode(index);
195 sb.append(tokens[index -1]);
196 sb.append('\t');
197 sb.append(node.getHead().getIndex());
198 sb.append('\t');
199 if (node.getHeadEdge().hasLabel(deprelTable)) {
200 sb.append(node.getHeadEdge().getLabelSymbol(deprelTable));
201 } else {
202 sb.append(outputGraph.getDefaultRootEdgeLabelSymbol(deprelTable));
203 }
204 outputTokens[index-1] = sb.toString();
205 }
206 }
207 return outputTokens;
208 }
209
210 /**
211 * Terminates the parser model.
212 *
213 * @throws MaltChainedException
214 */
215 public void terminateParserModel() throws MaltChainedException {
216 // Runs the postprocess chart items of the "parse" flow chart
217 if (flowChartInstance.hasPostProcessChartItems()) {
218 flowChartInstance.postprocess();
219 }
220
221 // Terminate the flow chart with an option container
222 engine.terminate(optionContainer);
223 }
224
225 private void initialize() throws MaltChainedException {
226 if (OptionManager.instance().getOptionDescriptions().getOptionGroupNameSet().size() > 0) {
227 return; // OptionManager is already initialized
228 }
229 String maltpath = getMaltJarPath();
230 if (maltpath == null) {
231 throw new MaltChainedException("malt.jar could not be found. ");
232 }
233 urlMaltJar = Util.findURL(maltpath);
234 try {
235 OptionManager.instance().loadOptionDescriptionFile(new URL("jar:"+urlMaltJar.toString()+"!/appdata/options.xml"));
236
237 } catch (MalformedURLException e) {
238 throw new MaltChainedException("MaltParser couldn't find its options 'malt.jar!/appdata/options.xml'", e);
239 }
240 OptionManager.instance().generateMaps();
241 }
242
243
244 /**
245 * Returns the option container index
246 *
247 * @return the option container index
248 */
249 public int getOptionContainer() {
250 return optionContainer;
251 }
252
253 private void setOptionContainer(int optionContainer) {
254 this.optionContainer = optionContainer;
255 }
256
257 /**
258 * Returns the path of malt.jar file
259 *
260 * @return the path of malt.jar file
261 */
262 public static String getMaltJarPath() {
263 if (SystemInfo.getMaltJarPath() != null) {
264 return SystemInfo.getMaltJarPath().toString();
265 }
266 return null;
267 }
268
269
270 }