001 package org.maltparser; 002 003 import java.net.MalformedURLException; 004 import java.net.URL; 005 import java.util.Iterator; 006 007 import org.maltparser.core.exception.MaltChainedException; 008 import org.maltparser.core.flow.FlowChartInstance; 009 import org.maltparser.core.helper.SystemInfo; 010 import org.maltparser.core.helper.URLFinder; 011 import org.maltparser.core.io.dataformat.ColumnDescription; 012 import org.maltparser.core.io.dataformat.DataFormatInstance; 013 import org.maltparser.core.io.dataformat.DataFormatSpecification; 014 import org.maltparser.core.options.OptionManager; 015 import org.maltparser.core.symbol.SymbolTable; 016 import org.maltparser.core.symbol.SymbolTableHandler; 017 import org.maltparser.core.symbol.trie.TrieSymbolTableHandler; 018 import org.maltparser.core.syntaxgraph.DependencyGraph; 019 import org.maltparser.core.syntaxgraph.DependencyStructure; 020 import org.maltparser.core.syntaxgraph.edge.Edge; 021 import org.maltparser.core.syntaxgraph.node.DependencyNode; 022 import org.maltparser.parser.SingleMalt; 023 024 /** 025 * The purpose of MaltParserService is to easily write third-party programs that uses MaltParser. 026 * 027 * There are two ways to call the MaltParserService: 028 * 1. By running experiments, which allow other programs to train a parser model or parse with a parser model. IO-handling is done by MaltParser. 029 * 2. By first initialize a parser model and then call the method parse() with an array of tokens that MaltParser parses. IO-handling of the sentence is 030 * done by the third-party program. 031 * 032 * How to use MaltParserService, please see the examples provided in the directory 'examples/apiexamples/srcex' 033 * 034 * @author Johan Hall 035 */ 036 public class MaltParserService { 037 private URL urlMaltJar; 038 private Engine engine; 039 private FlowChartInstance flowChartInstance; 040 private DataFormatInstance dataFormatInstance; 041 private SingleMalt singleMalt; 042 private int optionContainer; 043 private boolean initialized = false; 044 045 /** 046 * Creates a MaltParserService with the option container 0 047 * 048 * @throws MaltChainedException 049 */ 050 public MaltParserService() throws MaltChainedException { 051 this(0); 052 } 053 054 /** 055 * Creates a MaltParserService with the specified option container. To use different option containers allows the calling program 056 * to load several parser models or several experiments. The option management in MaltParser uses the singleton design pattern, which means that there can only 057 * be one instance of the option manager. To be able to have several parser models or experiments at same time please use different option containers. 058 * 059 * @param optionContainer an integer from 0 to max value of data type Integer 060 * @throws MaltChainedException 061 */ 062 public MaltParserService(int optionContainer) throws MaltChainedException { 063 initialize(); 064 setOptionContainer(optionContainer); 065 } 066 067 /** 068 * Runs a MaltParser experiment. The experiment is controlled by a commandLine string, please see the documentation of MaltParser to see all available options. 069 * 070 * @param commandLine a commandLine string that controls the MaltParser. 071 * @throws MaltChainedException 072 */ 073 public void runExperiment(String commandLine) throws MaltChainedException { 074 OptionManager.instance().parseCommandLine(commandLine, optionContainer); 075 engine = new Engine(); 076 engine.initialize(optionContainer); 077 engine.process(optionContainer); 078 engine.terminate(optionContainer); 079 } 080 081 /** 082 * Initialize a parser model that later can by used to parse sentences. MaltParser is controlled by a commandLine string, please see the documentation of MaltParser to see all available options. 083 * 084 * @param commandLine a commandLine string that controls the MaltParser 085 * @throws MaltChainedException 086 */ 087 public void initializeParserModel(String commandLine) throws MaltChainedException { 088 OptionManager.instance().parseCommandLine(commandLine, optionContainer); 089 // Creates an engine 090 engine = new Engine(); 091 // Initialize the engine with option container and gets a flow chart instance 092 flowChartInstance = engine.initialize(optionContainer); 093 // Runs the preprocess chart items of the "parse" flow chart 094 if (flowChartInstance.hasPreProcessChartItems()) { 095 flowChartInstance.preprocess(); 096 } 097 singleMalt = (SingleMalt)flowChartInstance.getFlowChartRegistry(org.maltparser.parser.SingleMalt.class, "singlemalt"); 098 singleMalt.getConfigurationDir().initDataFormat(); 099 dataFormatInstance = singleMalt.getConfigurationDir().getDataFormatManager().getInputDataFormatSpec().createDataFormatInstance( 100 singleMalt.getSymbolTables(), 101 OptionManager.instance().getOptionValueString(optionContainer, "singlemalt", "null_value")); //, 102 // OptionManager.instance().getOptionValueString(optionContainer, "graph", "root_label")); 103 initialized = true; 104 } 105 106 107 108 /** 109 * Parses an array of tokens and returns a dependency structure. 110 * 111 * Note: To call this method requires that a parser model has been initialized by using the initializeParserModel(). 112 * 113 * @param tokens an array of tokens 114 * @return a dependency structure 115 * @throws MaltChainedException 116 */ 117 public DependencyStructure parse(String[] tokens) throws MaltChainedException { 118 if (!initialized) { 119 throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method."); 120 } 121 if (tokens == null || tokens.length == 0) { 122 throw new MaltChainedException("Nothing to parse. "); 123 } 124 125 DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables()); 126 127 for (int i = 0; i < tokens.length; i++) { 128 Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); 129 DependencyNode node = outputGraph.addDependencyNode(i+1); 130 String[] items = tokens[i].split("\t"); 131 for (int j = 0; j < items.length; j++) { 132 if (columns.hasNext()) { 133 ColumnDescription column = columns.next(); 134 if (column.getCategory() == ColumnDescription.INPUT && node != null) { 135 outputGraph.addLabel(node, column.getName(), items[j]); 136 } 137 } 138 } 139 } 140 outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT"); 141 // Invoke parse with the output graph 142 singleMalt.parse(outputGraph); 143 return outputGraph; 144 } 145 146 /** 147 * Converts an array of tokens to a dependency structure 148 * 149 * @param tokens an array of tokens 150 * @return a dependency structure 151 * @throws MaltChainedException 152 */ 153 public DependencyStructure toDependencyStructure(String[] tokens) throws MaltChainedException { 154 if (tokens == null || tokens.length == 0) { 155 throw new MaltChainedException("Nothing to convert. "); 156 } 157 DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables()); 158 159 for (int i = 0; i < tokens.length; i++) { 160 Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); 161 DependencyNode node = outputGraph.addDependencyNode(i+1); 162 String[] items = tokens[i].split("\t"); 163 Edge edge = null; 164 for (int j = 0; j < items.length; j++) { 165 if (columns.hasNext()) { 166 ColumnDescription column = columns.next(); 167 if (column.getCategory() == ColumnDescription.INPUT && node != null) { 168 outputGraph.addLabel(node, column.getName(), items[j]); 169 } else if (column.getCategory() == ColumnDescription.HEAD) { 170 if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) { 171 edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1); 172 } 173 } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) { 174 outputGraph.addLabel(edge, column.getName(), items[j]); 175 } 176 } 177 } 178 } 179 outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT"); 180 return outputGraph; 181 } 182 183 public DependencyStructure toDependencyStructure(String[] tokens, String dataFormatFileName) throws MaltChainedException { 184 // Creates a symbol table handler 185 SymbolTableHandler symbolTables = new TrieSymbolTableHandler(TrieSymbolTableHandler.ADD_NEW_TO_TRIE); 186 187 // Initialize data format instance of the CoNLL data format from conllx.xml (conllx.xml located in same directory) 188 DataFormatSpecification dataFormat = new DataFormatSpecification(); 189 dataFormat.parseDataFormatXMLfile(dataFormatFileName); 190 DataFormatInstance dataFormatInstance = dataFormat.createDataFormatInstance(symbolTables, "none"); 191 192 // Creates a dependency graph 193 if (tokens == null || tokens.length == 0) { 194 throw new MaltChainedException("Nothing to convert. "); 195 } 196 DependencyStructure outputGraph = new DependencyGraph(symbolTables); 197 198 for (int i = 0; i < tokens.length; i++) { 199 Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); 200 DependencyNode node = outputGraph.addDependencyNode(i+1); 201 String[] items = tokens[i].split("\t"); 202 Edge edge = null; 203 for (int j = 0; j < items.length; j++) { 204 if (columns.hasNext()) { 205 ColumnDescription column = columns.next(); 206 if (column.getCategory() == ColumnDescription.INPUT && node != null) { 207 outputGraph.addLabel(node, column.getName(), items[j]); 208 } else if (column.getCategory() == ColumnDescription.HEAD) { 209 if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) { 210 edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1); 211 } 212 } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) { 213 outputGraph.addLabel(edge, column.getName(), items[j]); 214 } 215 } 216 } 217 } 218 outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT"); 219 return outputGraph; 220 } 221 222 /** 223 * Same as parse(String[] tokens), but instead it returns an array of tokens with a head index and a dependency type at the end of string 224 * 225 * @param tokens an array of tokens to parse 226 * @return an array of tokens with a head index and a dependency type at the end of string 227 * @throws MaltChainedException 228 */ 229 public String[] parseTokens(String[] tokens) throws MaltChainedException { 230 DependencyStructure outputGraph = parse(tokens); 231 StringBuilder sb = new StringBuilder(); 232 String[] outputTokens = new String[tokens.length]; 233 SymbolTable deprelTable = outputGraph.getSymbolTables().getSymbolTable("DEPREL"); 234 for (Integer index : outputGraph.getTokenIndices()) { 235 sb.setLength(0); 236 if (index <= tokens.length) { 237 DependencyNode node = outputGraph.getDependencyNode(index); 238 sb.append(tokens[index -1]); 239 sb.append('\t'); 240 sb.append(node.getHead().getIndex()); 241 sb.append('\t'); 242 if (node.getHeadEdge().hasLabel(deprelTable)) { 243 sb.append(node.getHeadEdge().getLabelSymbol(deprelTable)); 244 } else { 245 sb.append(outputGraph.getDefaultRootEdgeLabelSymbol(deprelTable)); 246 } 247 outputTokens[index-1] = sb.toString(); 248 } 249 } 250 return outputTokens; 251 } 252 253 /** 254 * Terminates the parser model. 255 * 256 * @throws MaltChainedException 257 */ 258 public void terminateParserModel() throws MaltChainedException { 259 // Runs the postprocess chart items of the "parse" flow chart 260 if (flowChartInstance.hasPostProcessChartItems()) { 261 flowChartInstance.postprocess(); 262 } 263 264 // Terminate the flow chart with an option container 265 engine.terminate(optionContainer); 266 } 267 268 private void initialize() throws MaltChainedException { 269 if (OptionManager.instance().getOptionDescriptions().getOptionGroupNameSet().size() > 0) { 270 return; // OptionManager is already initialized 271 } 272 String maltpath = getMaltJarPath(); 273 if (maltpath == null) { 274 throw new MaltChainedException("malt.jar could not be found. "); 275 } 276 final URLFinder f = new URLFinder(); 277 urlMaltJar = f.findURL(maltpath); 278 try { 279 OptionManager.instance().loadOptionDescriptionFile(new URL("jar:"+urlMaltJar.toString()+"!/appdata/options.xml")); 280 281 } catch (MalformedURLException e) { 282 throw new MaltChainedException("MaltParser couldn't find its options 'malt.jar!/appdata/options.xml'", e); 283 } 284 OptionManager.instance().generateMaps(); 285 } 286 287 288 /** 289 * Returns the option container index 290 * 291 * @return the option container index 292 */ 293 public int getOptionContainer() { 294 return optionContainer; 295 } 296 297 private void setOptionContainer(int optionContainer) { 298 this.optionContainer = optionContainer; 299 } 300 301 /** 302 * Returns the path of malt.jar file 303 * 304 * @return the path of malt.jar file 305 */ 306 public static String getMaltJarPath() { 307 if (SystemInfo.getMaltJarPath() != null) { 308 return SystemInfo.getMaltJarPath().toString(); 309 } 310 return null; 311 } 312 313 314 }