001 package org.maltparser; 002 003 import java.net.MalformedURLException; 004 import java.net.URL; 005 import java.util.Iterator; 006 007 import org.maltparser.core.exception.MaltChainedException; 008 import org.maltparser.core.flow.FlowChartInstance; 009 import org.maltparser.core.helper.SystemInfo; 010 import org.maltparser.core.helper.URLFinder; 011 import org.maltparser.core.io.dataformat.ColumnDescription; 012 import org.maltparser.core.io.dataformat.DataFormatInstance; 013 import org.maltparser.core.io.dataformat.DataFormatSpecification; 014 import org.maltparser.core.options.OptionManager; 015 import org.maltparser.core.symbol.SymbolTable; 016 import org.maltparser.core.symbol.SymbolTableHandler; 017 import org.maltparser.core.symbol.trie.TrieSymbolTableHandler; 018 import org.maltparser.core.syntaxgraph.DependencyGraph; 019 import org.maltparser.core.syntaxgraph.DependencyStructure; 020 import org.maltparser.core.syntaxgraph.edge.Edge; 021 import org.maltparser.core.syntaxgraph.node.DependencyNode; 022 import org.maltparser.parser.SingleMalt; 023 024 /** 025 * The purpose of MaltParserService is to easily write third-party programs that uses MaltParser. 026 * 027 * There are two ways to call the MaltParserService: 028 * 1. By running experiments, which allow other programs to train a parser model or parse with a parser model. IO-handling is done by MaltParser. 029 * 2. By first initialize a parser model and then call the method parse() with an array of tokens that MaltParser parses. IO-handling of the sentence is 030 * done by the third-party program. 031 * 032 * How to use MaltParserService, please see the examples provided in the directory 'examples/apiexamples/srcex' 033 * 034 * @author Johan Hall 035 */ 036 public class MaltParserService { 037 private URL urlMaltJar; 038 private Engine engine; 039 private FlowChartInstance flowChartInstance; 040 private DataFormatInstance dataFormatInstance; 041 private SingleMalt singleMalt; 042 private int optionContainer; 043 private boolean initialized = false; 044 045 /** 046 * Creates a MaltParserService with the option container 0 047 * 048 * @throws MaltChainedException 049 */ 050 public MaltParserService() throws MaltChainedException { 051 this(0); 052 } 053 054 /** 055 * Creates a MaltParserService with the specified option container. To use different option containers allows the calling program 056 * to load several parser models or several experiments. The option management in MaltParser uses the singleton design pattern, which means that there can only 057 * be one instance of the option manager. To be able to have several parser models or experiments at same time please use different option containers. 058 * 059 * @param optionContainer an integer from 0 to max value of data type Integer 060 * @throws MaltChainedException 061 */ 062 public MaltParserService(int optionContainer) throws MaltChainedException { 063 setOptionContainer(optionContainer); 064 initialize(); 065 } 066 067 /** 068 * Use this constructor only when you want a MaltParserService without an option manager. Without the option manager MaltParser cannot 069 * load or create a parser model. 070 * 071 * @param optionFreeInitialization true, means that MaltParserService is created without an option manager, false will do the same as MaltParserService(). 072 * @throws MaltChainedException 073 */ 074 public MaltParserService(boolean optionFreeInitialization) throws MaltChainedException { 075 if (optionFreeInitialization == false) { 076 setOptionContainer(0); 077 initialize(); 078 } else { 079 setOptionContainer(-1); 080 } 081 } 082 083 /** 084 * Runs a MaltParser experiment. The experiment is controlled by a commandLine string, please see the documentation of MaltParser to see all available options. 085 * 086 * @param commandLine a commandLine string that controls the MaltParser. 087 * @throws MaltChainedException 088 */ 089 public void runExperiment(String commandLine) throws MaltChainedException { 090 OptionManager.instance().parseCommandLine(commandLine, optionContainer); 091 engine = new Engine(); 092 engine.initialize(optionContainer); 093 engine.process(optionContainer); 094 engine.terminate(optionContainer); 095 } 096 097 /** 098 * Initialize a parser model that later can by used to parse sentences. MaltParser is controlled by a commandLine string, please see the documentation of MaltParser to see all available options. 099 * 100 * @param commandLine a commandLine string that controls the MaltParser 101 * @throws MaltChainedException 102 */ 103 public void initializeParserModel(String commandLine) throws MaltChainedException { 104 if (optionContainer == -1) { 105 throw new MaltChainedException("MaltParserService has been initialized as an option free initialization and therefore no parser model can be initialized."); 106 } 107 OptionManager.instance().parseCommandLine(commandLine, optionContainer); 108 // Creates an engine 109 engine = new Engine(); 110 // Initialize the engine with option container and gets a flow chart instance 111 flowChartInstance = engine.initialize(optionContainer); 112 // Runs the preprocess chart items of the "parse" flow chart 113 if (flowChartInstance.hasPreProcessChartItems()) { 114 flowChartInstance.preprocess(); 115 } 116 singleMalt = (SingleMalt)flowChartInstance.getFlowChartRegistry(org.maltparser.parser.SingleMalt.class, "singlemalt"); 117 singleMalt.getConfigurationDir().initDataFormat(); 118 dataFormatInstance = singleMalt.getConfigurationDir().getDataFormatManager().getInputDataFormatSpec().createDataFormatInstance( 119 singleMalt.getSymbolTables(), 120 OptionManager.instance().getOptionValueString(optionContainer, "singlemalt", "null_value")); 121 initialized = true; 122 } 123 124 125 126 /** 127 * Parses an array of tokens and returns a dependency structure. 128 * 129 * Note: To call this method requires that a parser model has been initialized by using the initializeParserModel(). 130 * 131 * @param tokens an array of tokens 132 * @return a dependency structure 133 * @throws MaltChainedException 134 */ 135 public DependencyStructure parse(String[] tokens) throws MaltChainedException { 136 if (!initialized) { 137 throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method."); 138 } 139 if (tokens == null || tokens.length == 0) { 140 throw new MaltChainedException("Nothing to parse. "); 141 } 142 143 DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables()); 144 145 for (int i = 0; i < tokens.length; i++) { 146 Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); 147 DependencyNode node = outputGraph.addDependencyNode(i+1); 148 String[] items = tokens[i].split("\t"); 149 for (int j = 0; j < items.length; j++) { 150 if (columns.hasNext()) { 151 ColumnDescription column = columns.next(); 152 if (column.getCategory() == ColumnDescription.INPUT && node != null) { 153 outputGraph.addLabel(node, column.getName(), items[j]); 154 } 155 } 156 } 157 } 158 outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT"); 159 // Invoke parse with the output graph 160 singleMalt.parse(outputGraph); 161 return outputGraph; 162 } 163 164 /** 165 * Converts an array of tokens to a dependency structure. 166 * 167 * Note that this method uses the same data format specification and symbol table as the parser engine. This can cause problem in multi-threaded 168 * environment. 169 * 170 * Please use (in multi-threaded environment) 171 * toDependencyStructure(String[] tokens, DataFormatSpecification dataFormatSpecification) 172 * or 173 * toDependencyStructure(String[] tokens, String dataFormatFileName) 174 * 175 * @param tokens an array of tokens 176 * @return a dependency structure 177 * @throws MaltChainedException 178 */ 179 public DependencyStructure toDependencyStructure(String[] tokens) throws MaltChainedException { 180 if (!initialized) { 181 throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method."); 182 } 183 if (tokens == null || tokens.length == 0) { 184 throw new MaltChainedException("Nothing to convert. "); 185 } 186 DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables()); 187 188 for (int i = 0; i < tokens.length; i++) { 189 Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); 190 DependencyNode node = outputGraph.addDependencyNode(i+1); 191 String[] items = tokens[i].split("\t"); 192 Edge edge = null; 193 for (int j = 0; j < items.length; j++) { 194 if (columns.hasNext()) { 195 ColumnDescription column = columns.next(); 196 if (column.getCategory() == ColumnDescription.INPUT && node != null) { 197 outputGraph.addLabel(node, column.getName(), items[j]); 198 } else if (column.getCategory() == ColumnDescription.HEAD) { 199 if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) { 200 edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1); 201 } 202 } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) { 203 outputGraph.addLabel(edge, column.getName(), items[j]); 204 } 205 } 206 } 207 } 208 outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT"); 209 return outputGraph; 210 } 211 212 /** 213 * Reads the data format specification file 214 * 215 * @param dataFormatFileName the path to the data format specification file 216 * @return a data format specification 217 * @throws MaltChainedException 218 */ 219 public DataFormatSpecification readDataFormatSpecification(String dataFormatFileName) throws MaltChainedException { 220 DataFormatSpecification dataFormat = new DataFormatSpecification(); 221 dataFormat.parseDataFormatXMLfile(dataFormatFileName); 222 return dataFormat; 223 } 224 225 /** 226 * Converts an array of tokens to a dependency structure 227 * 228 * @param tokens tokens an array of tokens 229 * @param dataFormatSpecification a data format specification 230 * @return a dependency structure 231 * @throws MaltChainedException 232 */ 233 public DependencyStructure toDependencyStructure(String[] tokens, DataFormatSpecification dataFormatSpecification) throws MaltChainedException { 234 // Creates a symbol table handler 235 SymbolTableHandler symbolTables = new TrieSymbolTableHandler(TrieSymbolTableHandler.ADD_NEW_TO_TRIE); 236 237 // Initialize data format instance 238 DataFormatInstance dataFormatInstance = dataFormatSpecification.createDataFormatInstance(symbolTables, "none"); 239 240 // Creates a dependency graph 241 if (tokens == null || tokens.length == 0) { 242 throw new MaltChainedException("Nothing to convert. "); 243 } 244 DependencyStructure outputGraph = new DependencyGraph(symbolTables); 245 246 for (int i = 0; i < tokens.length; i++) { 247 Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); 248 DependencyNode node = outputGraph.addDependencyNode(i+1); 249 String[] items = tokens[i].split("\t"); 250 Edge edge = null; 251 for (int j = 0; j < items.length; j++) { 252 if (columns.hasNext()) { 253 ColumnDescription column = columns.next(); 254 if (column.getCategory() == ColumnDescription.INPUT && node != null) { 255 outputGraph.addLabel(node, column.getName(), items[j]); 256 } else if (column.getCategory() == ColumnDescription.HEAD) { 257 if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) { 258 edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1); 259 } 260 } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) { 261 outputGraph.addLabel(edge, column.getName(), items[j]); 262 } 263 } 264 } 265 } 266 outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT"); 267 return outputGraph; 268 } 269 270 /** 271 * Converts an array of tokens to a dependency structure 272 * 273 * @param tokens an array of tokens 274 * @param dataFormatFileName the path to the data format file 275 * @return a dependency structure 276 * @throws MaltChainedException 277 */ 278 public DependencyStructure toDependencyStructure(String[] tokens, String dataFormatFileName) throws MaltChainedException { 279 return toDependencyStructure(tokens, readDataFormatSpecification(dataFormatFileName)); 280 } 281 282 /** 283 * Same as parse(String[] tokens), but instead it returns an array of tokens with a head index and a dependency type at the end of string 284 * 285 * @param tokens an array of tokens to parse 286 * @return an array of tokens with a head index and a dependency type at the end of string 287 * @throws MaltChainedException 288 */ 289 public String[] parseTokens(String[] tokens) throws MaltChainedException { 290 DependencyStructure outputGraph = parse(tokens); 291 StringBuilder sb = new StringBuilder(); 292 String[] outputTokens = new String[tokens.length]; 293 SymbolTable deprelTable = outputGraph.getSymbolTables().getSymbolTable("DEPREL"); 294 for (Integer index : outputGraph.getTokenIndices()) { 295 sb.setLength(0); 296 if (index <= tokens.length) { 297 DependencyNode node = outputGraph.getDependencyNode(index); 298 sb.append(tokens[index -1]); 299 sb.append('\t'); 300 sb.append(node.getHead().getIndex()); 301 sb.append('\t'); 302 if (node.getHeadEdge().hasLabel(deprelTable)) { 303 sb.append(node.getHeadEdge().getLabelSymbol(deprelTable)); 304 } else { 305 sb.append(outputGraph.getDefaultRootEdgeLabelSymbol(deprelTable)); 306 } 307 outputTokens[index-1] = sb.toString(); 308 } 309 } 310 return outputTokens; 311 } 312 313 /** 314 * Terminates the parser model. 315 * 316 * @throws MaltChainedException 317 */ 318 public void terminateParserModel() throws MaltChainedException { 319 if (!initialized) { 320 throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method."); 321 } 322 // Runs the postprocess chart items of the "parse" flow chart 323 if (flowChartInstance.hasPostProcessChartItems()) { 324 flowChartInstance.postprocess(); 325 } 326 327 // Terminate the flow chart with an option container 328 engine.terminate(optionContainer); 329 } 330 331 private void initialize() throws MaltChainedException { 332 if (OptionManager.instance().getOptionDescriptions().getOptionGroupNameSet().size() > 0) { 333 return; // OptionManager is already initialized 334 } 335 String maltpath = getMaltJarPath(); 336 if (maltpath == null) { 337 throw new MaltChainedException("malt.jar could not be found. "); 338 } 339 final URLFinder f = new URLFinder(); 340 urlMaltJar = f.findURL(maltpath); 341 try { 342 OptionManager.instance().loadOptionDescriptionFile(new URL("jar:"+urlMaltJar.toString()+"!/appdata/options.xml")); 343 344 } catch (MalformedURLException e) { 345 throw new MaltChainedException("MaltParser couldn't find its options 'malt.jar!/appdata/options.xml'", e); 346 } 347 OptionManager.instance().generateMaps(); 348 } 349 350 351 /** 352 * Returns the option container index 353 * 354 * @return the option container index 355 */ 356 public int getOptionContainer() { 357 return optionContainer; 358 } 359 360 private void setOptionContainer(int optionContainer) { 361 this.optionContainer = optionContainer; 362 } 363 364 /** 365 * Returns the path of malt.jar file 366 * 367 * @return the path of malt.jar file 368 */ 369 public static String getMaltJarPath() { 370 if (SystemInfo.getMaltJarPath() != null) { 371 return SystemInfo.getMaltJarPath().toString(); 372 } 373 return null; 374 } 375 376 377 }