001 package org.maltparser; 002 003 import java.net.MalformedURLException; 004 import java.net.URL; 005 import java.util.Iterator; 006 007 import org.maltparser.core.exception.MaltChainedException; 008 import org.maltparser.core.flow.FlowChartInstance; 009 import org.maltparser.core.helper.SystemInfo; 010 import org.maltparser.core.helper.Util; 011 import org.maltparser.core.io.dataformat.ColumnDescription; 012 import org.maltparser.core.io.dataformat.DataFormatException; 013 import org.maltparser.core.io.dataformat.DataFormatInstance; 014 import org.maltparser.core.options.OptionManager; 015 import org.maltparser.core.symbol.SymbolTable; 016 import org.maltparser.core.syntaxgraph.DependencyGraph; 017 import org.maltparser.core.syntaxgraph.DependencyStructure; 018 import org.maltparser.core.syntaxgraph.edge.Edge; 019 import org.maltparser.core.syntaxgraph.node.DependencyNode; 020 import org.maltparser.parser.SingleMalt; 021 022 /** 023 * The purpose of MaltParserService is to easily write third-party programs that uses MaltParser. 024 * 025 * There are two ways to call the MaltParserService: 026 * 1. By running experiments, which allow other programs to train a parser model or parse with a parser model. IO-handling is done by MaltParser. 027 * 2. By first initialize a parser model and then call the method parse() with an array of tokens that MaltParser parses. IO-handling of the sentence is 028 * done by the third-party program. 029 * 030 * How to use MaltParserService, please see the examples provided in the directory 'examples/apiexamples/srcex' 031 * 032 * @author Johan Hall 033 */ 034 public class MaltParserService { 035 private URL urlMaltJar; 036 private Engine engine; 037 private FlowChartInstance flowChartInstance; 038 private DataFormatInstance dataFormatInstance; 039 private SingleMalt singleMalt; 040 private int optionContainer; 041 private boolean initialized = false; 042 043 /** 044 * Creates a MaltParserService with the option container 0 045 * 046 * @throws MaltChainedException 047 */ 048 public MaltParserService() throws MaltChainedException { 049 this(0); 050 } 051 052 /** 053 * Creates a MaltParserService with the specified option container. To use different option containers allows the calling program 054 * to load several parser models or several experiments. The option management in MaltParser uses the singleton design pattern, which means that there can only 055 * be one instance of the option manager. To be able to have several parser models or experiments at same time please use different option containers. 056 * 057 * @param optionContainer an integer from 0 to max value of data type Integer 058 * @throws MaltChainedException 059 */ 060 public MaltParserService(int optionContainer) throws MaltChainedException { 061 initialize(); 062 setOptionContainer(optionContainer); 063 } 064 065 /** 066 * Runs a MaltParser experiment. The experiment is controlled by a commandLine string, please see the documentation of MaltParser to see all available options. 067 * 068 * @param commandLine a commandLine string that controls the MaltParser. 069 * @throws MaltChainedException 070 */ 071 public void runExperiment(String commandLine) throws MaltChainedException { 072 OptionManager.instance().parseCommandLine(commandLine, optionContainer); 073 engine = new Engine(); 074 engine.initialize(optionContainer); 075 engine.process(optionContainer); 076 engine.terminate(optionContainer); 077 } 078 079 /** 080 * Initialize a parser model that later can by used to parse sentences. MaltParser is controlled by a commandLine string, please see the documentation of MaltParser to see all available options. 081 * 082 * @param commandLine a commandLine string that controls the MaltParser 083 * @throws MaltChainedException 084 */ 085 public void initializeParserModel(String commandLine) throws MaltChainedException { 086 OptionManager.instance().parseCommandLine(commandLine, optionContainer); 087 // Creates an engine 088 engine = new Engine(); 089 // Initialize the engine with option container and gets a flow chart instance 090 flowChartInstance = engine.initialize(optionContainer); 091 // Runs the preprocess chart items of the "parse" flow chart 092 if (flowChartInstance.hasPreProcessChartItems()) { 093 flowChartInstance.preprocess(); 094 } 095 singleMalt = (SingleMalt)flowChartInstance.getFlowChartRegistry(org.maltparser.parser.SingleMalt.class, "singlemalt"); 096 singleMalt.getConfigurationDir().initDataFormat(); 097 dataFormatInstance = singleMalt.getConfigurationDir().getDataFormatManager().getInputDataFormatSpec().createDataFormatInstance( 098 singleMalt.getSymbolTables(), 099 OptionManager.instance().getOptionValueString(optionContainer, "singlemalt", "null_value")); //, 100 // OptionManager.instance().getOptionValueString(optionContainer, "graph", "root_label")); 101 initialized = true; 102 } 103 104 /** 105 * Parses an array of tokens and returns a dependency structure. 106 * 107 * Note: To call this method requires that a parser model has been initialized by using the initializeParserModel(). 108 * 109 * @param tokens an array of tokens 110 * @return a dependency structure 111 * @throws MaltChainedException 112 */ 113 public DependencyStructure parse(String[] tokens) throws MaltChainedException { 114 if (!initialized) { 115 throw new MaltChainedException("No parser model has been initialized. Please use the method initializeParserModel() before invoking this method."); 116 } 117 if (tokens == null || tokens.length == 0) { 118 throw new MaltChainedException("Nothing to parse. "); 119 } 120 121 DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables()); 122 123 for (int i = 0; i < tokens.length; i++) { 124 Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); 125 DependencyNode node = outputGraph.addDependencyNode(i+1); 126 String[] items = tokens[i].split("\t"); 127 for (int j = 0; j < items.length; j++) { 128 if (columns.hasNext()) { 129 ColumnDescription column = columns.next(); 130 if (column.getCategory() == ColumnDescription.INPUT && node != null) { 131 outputGraph.addLabel(node, column.getName(), items[j]); 132 } 133 } 134 } 135 } 136 outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT"); 137 // Invoke parse with the output graph 138 singleMalt.parse(outputGraph); 139 return outputGraph; 140 } 141 142 /** 143 * Converts an array of tokens to a dependency structure 144 * 145 * @param tokens an array of tokens 146 * @return a dependency structure 147 * @throws MaltChainedException 148 */ 149 public DependencyStructure toDependencyStructure(String[] tokens) throws MaltChainedException { 150 if (tokens == null || tokens.length == 0) { 151 throw new MaltChainedException("Nothing to convert. "); 152 } 153 DependencyStructure outputGraph = new DependencyGraph(singleMalt.getSymbolTables()); 154 155 for (int i = 0; i < tokens.length; i++) { 156 Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); 157 DependencyNode node = outputGraph.addDependencyNode(i+1); 158 String[] items = tokens[i].split("\t"); 159 Edge edge = null; 160 for (int j = 0; j < items.length; j++) { 161 if (columns.hasNext()) { 162 ColumnDescription column = columns.next(); 163 if (column.getCategory() == ColumnDescription.INPUT && node != null) { 164 outputGraph.addLabel(node, column.getName(), items[j]); 165 } else if (column.getCategory() == ColumnDescription.HEAD) { 166 if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals("_")) { 167 edge = ((DependencyStructure)outputGraph).addDependencyEdge(Integer.parseInt(items[j]), i+1); 168 } 169 } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) { 170 outputGraph.addLabel(edge, column.getName(), items[j]); 171 } 172 } 173 } 174 } 175 outputGraph.setDefaultRootEdgeLabel(outputGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT"); 176 return outputGraph; 177 } 178 179 /** 180 * Same as parse(String[] tokens), but instead it returns an array of tokens with a head index and a dependency type at the end of string 181 * 182 * @param tokens an array of tokens to parse 183 * @return an array of tokens with a head index and a dependency type at the end of string 184 * @throws MaltChainedException 185 */ 186 public String[] parseTokens(String[] tokens) throws MaltChainedException { 187 DependencyStructure outputGraph = parse(tokens); 188 StringBuilder sb = new StringBuilder(); 189 String[] outputTokens = new String[tokens.length]; 190 SymbolTable deprelTable = outputGraph.getSymbolTables().getSymbolTable("DEPREL"); 191 for (Integer index : outputGraph.getTokenIndices()) { 192 sb.setLength(0); 193 if (index <= tokens.length) { 194 DependencyNode node = outputGraph.getDependencyNode(index); 195 sb.append(tokens[index -1]); 196 sb.append('\t'); 197 sb.append(node.getHead().getIndex()); 198 sb.append('\t'); 199 if (node.getHeadEdge().hasLabel(deprelTable)) { 200 sb.append(node.getHeadEdge().getLabelSymbol(deprelTable)); 201 } else { 202 sb.append(outputGraph.getDefaultRootEdgeLabelSymbol(deprelTable)); 203 } 204 outputTokens[index-1] = sb.toString(); 205 } 206 } 207 return outputTokens; 208 } 209 210 /** 211 * Terminates the parser model. 212 * 213 * @throws MaltChainedException 214 */ 215 public void terminateParserModel() throws MaltChainedException { 216 // Runs the postprocess chart items of the "parse" flow chart 217 if (flowChartInstance.hasPostProcessChartItems()) { 218 flowChartInstance.postprocess(); 219 } 220 221 // Terminate the flow chart with an option container 222 engine.terminate(optionContainer); 223 } 224 225 private void initialize() throws MaltChainedException { 226 if (OptionManager.instance().getOptionDescriptions().getOptionGroupNameSet().size() > 0) { 227 return; // OptionManager is already initialized 228 } 229 String maltpath = getMaltJarPath(); 230 if (maltpath == null) { 231 throw new MaltChainedException("malt.jar could not be found. "); 232 } 233 urlMaltJar = Util.findURL(maltpath); 234 try { 235 OptionManager.instance().loadOptionDescriptionFile(new URL("jar:"+urlMaltJar.toString()+"!/appdata/options.xml")); 236 237 } catch (MalformedURLException e) { 238 throw new MaltChainedException("MaltParser couldn't find its options 'malt.jar!/appdata/options.xml'", e); 239 } 240 OptionManager.instance().generateMaps(); 241 } 242 243 244 /** 245 * Returns the option container index 246 * 247 * @return the option container index 248 */ 249 public int getOptionContainer() { 250 return optionContainer; 251 } 252 253 private void setOptionContainer(int optionContainer) { 254 this.optionContainer = optionContainer; 255 } 256 257 /** 258 * Returns the path of malt.jar file 259 * 260 * @return the path of malt.jar file 261 */ 262 public static String getMaltJarPath() { 263 if (SystemInfo.getMaltJarPath() != null) { 264 return SystemInfo.getMaltJarPath().toString(); 265 } 266 return null; 267 } 268 269 270 }