001 package org.maltparser; 002 003 import java.net.MalformedURLException; 004 import java.net.URL; 005 import java.util.Iterator; 006 007 import org.maltparser.core.exception.MaltChainedException; 008 import org.maltparser.core.flow.FlowChartInstance; 009 import org.maltparser.core.helper.SystemInfo; 010 import org.maltparser.core.helper.Util; 011 import org.maltparser.core.io.dataformat.ColumnDescription; 012 import org.maltparser.core.io.dataformat.DataFormatException; 013 import org.maltparser.core.io.dataformat.DataFormatInstance; 014 import org.maltparser.core.io.dataformat.DataFormatSpecification; 015 import org.maltparser.core.options.OptionManager; 016 import org.maltparser.core.symbol.SymbolTable; 017 import org.maltparser.core.symbol.SymbolTableHandler; 018 import org.maltparser.core.symbol.trie.TrieSymbolTableHandler; 019 import org.maltparser.core.syntaxgraph.DependencyGraph; 020 import org.maltparser.core.syntaxgraph.DependencyStructure; 021 import org.maltparser.core.syntaxgraph.edge.Edge; 022 import org.maltparser.core.syntaxgraph.node.DependencyNode; 023 import org.maltparser.parser.SingleMalt; 024 025 /** 026 * The purpose of MaltParserService is to easily write third-party programs that uses MaltParser. 027 * 028 * There are two ways to call the MaltParserService: 029 * 1. By running experiments, which allow other programs to train a parser model or parse with a parser model. IO-handling is done by MaltParser. 030 * 2. By first initialize a parser model and then call the method parse() with an array of tokens that MaltParser parses. IO-handling of the sentence is 031 * done by the third-party program. 032 * 033 * How to use MaltParserService, please see the examples provided in the directory 'examples/apiexamples/srcex' 034 * 035 * @author Johan Hall 036 */ 037 public class MaltParserService { 038 private URL urlMaltJar; 039 private Engine engine; 040 private FlowChartInstance flowChartInstance; 041 private DataFormatInstance dataFormatInstance; 042 private SingleMalt singleMalt; 043 private int optionContainer; 044 private boolean initialized = false; 045 046 /** 047 * Creates a MaltParserService with the option container 0 048 * 049 * @throws MaltChainedException 050 */ 051 public MaltParserService() throws MaltChainedException { 052 this(0); 053 } 054 055 /** 056 * Creates a MaltParserService with the specified option container. To use different option containers allows the calling program 057 * to load several parser models or several experiments. The option management in MaltParser uses the singleton design pattern, which means that there can only 058 * be one instance of the option manager. To be able to have several parser models or experiments at same time please use different option containers. 059 * 060 * @param optionContainer an integer from 0 to max value of data type Integer 061 * @throws MaltChainedException 062 */ 063 public MaltParserService(int optionContainer) throws MaltChainedException { 064 initialize(); 065 setOptionContainer(optionContainer); 066 } 067 068 /** 069 * Runs a MaltParser experiment. The experiment is controlled by a commandLine string, please see the documentation of MaltParser to see all available options. 070 * 071 * @param commandLine a commandLine string that controls the MaltParser. 072 * @throws MaltChainedException 073 */ 074 public void runExperiment(String commandLine) throws MaltChainedException { 075 OptionManager.instance().parseCommandLine(commandLine, optionContainer); 076 engine = new Engine(); 077 engine.initialize(optionContainer); 078 engine.process(optionContainer); 079 engine.terminate(optionContainer); 080 } 081 082 /** 083 * Initialize a parser model that later can by used to parse sentences. MaltParser is controlled by a commandLine string, please see the documentation of MaltParser to see all available options. 084 * 085 * @param commandLine a commandLine string that controls the MaltParser 086 * @throws MaltChainedException 087 */ 088 public void initializeParserModel(String commandLine) throws MaltChainedException { 089 OptionManager.instance().parseCommandLine(commandLine, optionContainer); 090 // Creates an engine 091 engine = new Engine(); 092 // Initialize the engine with option container and gets a flow chart instance 093 flowChartInstance = engine.initialize(optionContainer); 094 // Runs the preprocess chart items of the "parse" flow chart 095 if (flowChartInstance.hasPreProcessChartItems()) { 096 flowChartInstance.preprocess(); 097 } 098 singleMalt = (SingleMalt)flowChartInstance.getFlowChartRegistry(org.maltparser.parser.SingleMalt.class, "singlemalt"); 099 singleMalt.getConfigurationDir().initDataFormat(); 100 dataFormatInstance = singleMalt.getConfigurationDir().getDataFormatManager().getInputDataFormatSpec().createDataFormatInstance( 101 singleMalt.getSymbolTables(), 102 OptionManager.instance().getOptionValueString(optionContainer, "singlemalt", "null_value")); //, 103 // OptionManager.instance().getOptionValueString(optionContainer, "graph", "root_label")); 104 initialized = true; 105 } 106 107 108 109 /** 110 * Parses an array of tokens and returns a dependency structure. 111 * 112 * Note: To call this method requires that a parser model has been initialized by using the initializeParserModel(). 113 * 114 * @param tokens an array of tokens 115 * @return a dependency structure 116 * @throws MaltChainedException 117 */ 118 public DependencyStructure parse(String[] tokens) throws MaltChainedException { 119 if (!initialized) { 120 throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method."); 121 } 122 if (tokens == null || tokens.length == 0) { 123 throw new MaltChainedException("Nothing to parse. "); 124 } 125 126 DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables()); 127 128 for (int i = 0; i < tokens.length; i++) { 129 Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); 130 DependencyNode node = outputGraph.addDependencyNode(i+1); 131 String[] items = tokens[i].split("\t"); 132 for (int j = 0; j < items.length; j++) { 133 if (columns.hasNext()) { 134 ColumnDescription column = columns.next(); 135 if (column.getCategory() == ColumnDescription.INPUT && node != null) { 136 outputGraph.addLabel(node, column.getName(), items[j]); 137 } 138 } 139 } 140 } 141 outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT"); 142 // Invoke parse with the output graph 143 singleMalt.parse(outputGraph); 144 return outputGraph; 145 } 146 147 /** 148 * Converts an array of tokens to a dependency structure 149 * 150 * @param tokens an array of tokens 151 * @return a dependency structure 152 * @throws MaltChainedException 153 */ 154 public DependencyStructure toDependencyStructure(String[] tokens) throws MaltChainedException { 155 if (tokens == null || tokens.length == 0) { 156 throw new MaltChainedException("Nothing to convert. "); 157 } 158 DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables()); 159 160 for (int i = 0; i < tokens.length; i++) { 161 Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); 162 DependencyNode node = outputGraph.addDependencyNode(i+1); 163 String[] items = tokens[i].split("\t"); 164 Edge edge = null; 165 for (int j = 0; j < items.length; j++) { 166 if (columns.hasNext()) { 167 ColumnDescription column = columns.next(); 168 if (column.getCategory() == ColumnDescription.INPUT && node != null) { 169 outputGraph.addLabel(node, column.getName(), items[j]); 170 } else if (column.getCategory() == ColumnDescription.HEAD) { 171 if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) { 172 edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1); 173 } 174 } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) { 175 outputGraph.addLabel(edge, column.getName(), items[j]); 176 } 177 } 178 } 179 } 180 outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT"); 181 return outputGraph; 182 } 183 184 public DependencyStructure toDependencyStructure(String[] tokens, String dataFormatFileName) throws MaltChainedException { 185 // Creates a symbol table handler 186 SymbolTableHandler symbolTables = new TrieSymbolTableHandler(); 187 188 // Initialize data format instance of the CoNLL data format from conllx.xml (conllx.xml located in same directory) 189 DataFormatSpecification dataFormat = new DataFormatSpecification(); 190 dataFormat.parseDataFormatXMLfile(dataFormatFileName); 191 DataFormatInstance dataFormatInstance = dataFormat.createDataFormatInstance(symbolTables, "none"); 192 193 // Creates a dependency graph 194 if (tokens == null || tokens.length == 0) { 195 throw new MaltChainedException("Nothing to convert. "); 196 } 197 DependencyStructure outputGraph = new DependencyGraph(symbolTables); 198 199 for (int i = 0; i < tokens.length; i++) { 200 Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); 201 DependencyNode node = outputGraph.addDependencyNode(i+1); 202 String[] items = tokens[i].split("\t"); 203 Edge edge = null; 204 for (int j = 0; j < items.length; j++) { 205 if (columns.hasNext()) { 206 ColumnDescription column = columns.next(); 207 if (column.getCategory() == ColumnDescription.INPUT && node != null) { 208 outputGraph.addLabel(node, column.getName(), items[j]); 209 } else if (column.getCategory() == ColumnDescription.HEAD) { 210 if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) { 211 edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1); 212 } 213 } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) { 214 outputGraph.addLabel(edge, column.getName(), items[j]); 215 } 216 } 217 } 218 } 219 outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT"); 220 return outputGraph; 221 } 222 223 /** 224 * Same as parse(String[] tokens), but instead it returns an array of tokens with a head index and a dependency type at the end of string 225 * 226 * @param tokens an array of tokens to parse 227 * @return an array of tokens with a head index and a dependency type at the end of string 228 * @throws MaltChainedException 229 */ 230 public String[] parseTokens(String[] tokens) throws MaltChainedException { 231 DependencyStructure outputGraph = parse(tokens); 232 StringBuilder sb = new StringBuilder(); 233 String[] outputTokens = new String[tokens.length]; 234 SymbolTable deprelTable = outputGraph.getSymbolTables().getSymbolTable("DEPREL"); 235 for (Integer index : outputGraph.getTokenIndices()) { 236 sb.setLength(0); 237 if (index <= tokens.length) { 238 DependencyNode node = outputGraph.getDependencyNode(index); 239 sb.append(tokens[index -1]); 240 sb.append('\t'); 241 sb.append(node.getHead().getIndex()); 242 sb.append('\t'); 243 if (node.getHeadEdge().hasLabel(deprelTable)) { 244 sb.append(node.getHeadEdge().getLabelSymbol(deprelTable)); 245 } else { 246 sb.append(outputGraph.getDefaultRootEdgeLabelSymbol(deprelTable)); 247 } 248 outputTokens[index-1] = sb.toString(); 249 } 250 } 251 return outputTokens; 252 } 253 254 /** 255 * Terminates the parser model. 256 * 257 * @throws MaltChainedException 258 */ 259 public void terminateParserModel() throws MaltChainedException { 260 // Runs the postprocess chart items of the "parse" flow chart 261 if (flowChartInstance.hasPostProcessChartItems()) { 262 flowChartInstance.postprocess(); 263 } 264 265 // Terminate the flow chart with an option container 266 engine.terminate(optionContainer); 267 } 268 269 private void initialize() throws MaltChainedException { 270 if (OptionManager.instance().getOptionDescriptions().getOptionGroupNameSet().size() > 0) { 271 return; // OptionManager is already initialized 272 } 273 String maltpath = getMaltJarPath(); 274 if (maltpath == null) { 275 throw new MaltChainedException("malt.jar could not be found. "); 276 } 277 urlMaltJar = Util.findURL(maltpath); 278 try { 279 OptionManager.instance().loadOptionDescriptionFile(new URL("jar:"+urlMaltJar.toString()+"!/appdata/options.xml")); 280 281 } catch (MalformedURLException e) { 282 throw new MaltChainedException("MaltParser couldn't find its options 'malt.jar!/appdata/options.xml'", e); 283 } 284 OptionManager.instance().generateMaps(); 285 } 286 287 288 /** 289 * Returns the option container index 290 * 291 * @return the option container index 292 */ 293 public int getOptionContainer() { 294 return optionContainer; 295 } 296 297 private void setOptionContainer(int optionContainer) { 298 this.optionContainer = optionContainer; 299 } 300 301 /** 302 * Returns the path of malt.jar file 303 * 304 * @return the path of malt.jar file 305 */ 306 public static String getMaltJarPath() { 307 if (SystemInfo.getMaltJarPath() != null) { 308 return SystemInfo.getMaltJarPath().toString(); 309 } 310 return null; 311 } 312 313 314 }