001 package org.maltparser.ml.libsvm.malt04; 002 003 import java.io.BufferedReader; 004 import java.io.BufferedWriter; 005 import java.io.File; 006 import java.io.FileNotFoundException; 007 import java.io.IOException; 008 import java.io.InputStream; 009 import java.io.InputStreamReader; 010 import java.io.OutputStreamWriter; 011 import java.io.PrintStream; 012 import java.text.DecimalFormat; 013 import java.text.DecimalFormatSymbols; 014 import java.util.ArrayList; 015 import java.util.Set; 016 import java.util.StringTokenizer; 017 import java.util.Vector; 018 import java.util.regex.Pattern; 019 import java.util.regex.PatternSyntaxException; 020 021 import org.maltparser.core.exception.MaltChainedException; 022 import org.maltparser.core.feature.FeatureException; 023 import org.maltparser.core.feature.FeatureVector; 024 import org.maltparser.core.feature.function.FeatureFunction; 025 import org.maltparser.core.feature.map.SplitFeature; 026 import org.maltparser.core.feature.value.FeatureValue; 027 import org.maltparser.core.feature.value.MultipleFeatureValue; 028 import org.maltparser.core.feature.value.SingleFeatureValue; 029 import org.maltparser.core.helper.NoPrintStream; 030 import org.maltparser.core.symbol.SymbolTable; 031 import org.maltparser.core.symbol.Table; 032 import org.maltparser.core.syntaxgraph.DependencyStructure; 033 import org.maltparser.core.syntaxgraph.feature.InputColumnFeature; 034 import org.maltparser.core.syntaxgraph.feature.OutputColumnFeature; 035 import org.maltparser.core.syntaxgraph.node.DependencyNode; 036 import org.maltparser.ml.LearningMethod; 037 import org.maltparser.ml.libsvm.LibsvmException; 038 import org.maltparser.parser.DependencyParserConfig; 039 import org.maltparser.parser.algorithm.nivre.malt04.NivreEagerMalt04; 040 import org.maltparser.parser.algorithm.nivre.malt04.NivreStandardMalt04; 041 import org.maltparser.parser.guide.instance.InstanceModel; 042 import org.maltparser.parser.history.action.SingleDecision; 043 044 import libsvm28.svm; 045 import libsvm28.svm_model; 046 import libsvm28.svm_node; 047 import libsvm28.svm_parameter; 048 import libsvm28.svm_problem; 049 /** 050 * Implements an interface to the LIBSVM learner (LIBSVM 2.80 is used). More information about 051 * LIBSVM can be found at <a href="http://www.csie.ntu.edu.tw/~cjlin/libsvm/" target="_blank">LIBSVM -- A Library for Support Vector Machines</a>. 052 * 053 * This class tries to reproduce the same behavior as MaltParser 0.4. Unfortunately we have to introduce some strange behaviors and bugs to 054 * able to reproduce the results: 055 * 056 * <ol> 057 * <li>RightArc{CLASSITEM_SEPARATOR}{ROOT_LABEL} is mapped to the Reduce transition for the Nivre Arc-eager and Nivre Arc-standard algorthm, where {ROOT_LABEL} is specified 058 * by the <code>--graph-root_label</code> option and the <code>--guide-classitem_separator</code> option (bug in MaltParser 0.4). 059 * <li>LeftArc{CLASSITEM_SEPARATOR}{ROOT_LABEL} is mapped to the Right Arc transition with last dependency type in the DEPREL tagset, here {ROOT_LABEL} is specified 060 * by the <code>--graph-root_label</code> option and the <code>--guide-classitem_separator</code> option (bug in MaltParser 0.4). 061 * <li>The mapping of RightArc{CLASSITEM_SEPARATOR}{ROOT_LABEL} into Reduce results in an illegal transition and therefore the default transition (Shift) is used during parsing (indirect bug in MaltParser 0.4). 062 * <li>Null-value of the LEMMA, FORM, FEATS columns in the CoNLL shared task format is not written into the instance file (this can be controlled 063 * by the <code>--libsvm-libsvm_exclude_null</code> and <code>--libsvm-libsvm_exclude_columns</code> options in the new MaltParser) 064 * <li>If feature is an output feature and <code>feature != "OutputColumn(DEPREL, Stack[0])"</code> and it points at a node which has the root as head it will not extract the dependency type of informative root label, 065 * instead it will extract the root label specified by the <code>--graph-root_label</code> option (bug in MaltParser 0.4). 066 * <li>If <code>feature = "Split(InputColumn(FEATS, X), \|")</code>, where <code>X</code> is arbitrary node. The set of syntactic and/or morphological features will not be ordered correctly 067 * according to the LIBSVM format (bug in MaltParser 0.4). 068 * <li>If <code>feature = "Split(InputColumn(FEATS, X), \|")</code>, where <code>X</code> is arbitrary node. It will not regard the set of syntactic and/or morphological features as set. In some cases, there are treebanks that does not follow the 069 * CoNLL data format and have individual syntactic and/or morphological features twice in the FEATS column (bug in MaltParser 0.4). 070 * <li>Unfortunately there is minor difference between LIBSVM 2.80 (used by MaltParser 0.4) and the latest version of LIBSVM. Therefore we have to use 071 * the LIBSVM 2.80 to able to reproduce the results. 072 * </ol> 073 * 074 * @author Johan Hall 075 * @since 1.0 076 */ 077 public class LibsvmMalt04 implements LearningMethod { 078 public final static String LIBSVM_VERSION = "2.80"; 079 private StringBuilder sb; 080 /** 081 * The parent instance model 082 */ 083 protected InstanceModel owner; 084 /** 085 * The learner/classifier mode 086 */ 087 protected int learnerMode; 088 /** 089 * The name of the learner 090 */ 091 protected String name; 092 /** 093 * Number of processed instances 094 */ 095 protected int numberOfInstances; 096 /** 097 * Instance output stream writer 098 */ 099 private BufferedWriter instanceOutput = null; 100 //private BufferedWriter debugTransOut = null; 101 //private int sentenceCount = 1; 102 103 protected String pathExternalSVMTrain = null; 104 /** 105 * LIBSVM svm_model object, only used during classification. 106 */ 107 private svm_model model = null; 108 /** 109 * LIBSVM svm_parameter object 110 */ 111 private svm_parameter svmParam; 112 /** 113 * Parameter string 114 */ 115 private String paramString; 116 /** 117 * An array of LIBSVM svm_node objects, only used during classification. 118 */ 119 private ArrayList<svm_node> xlist = null; 120 /** 121 * RA_ROOT is used for mapping RightArc_ROOT - REDUCE (bug in MaltParser 0.4) 122 */ 123 private String RA_ROOT = ""; 124 /** 125 * LA_ROOT is used for mapping RightArc_ROOT - RightArc_{Last dependency type in the DEPREL tagset} (bug in MaltParser 0.4) 126 */ 127 private String LA_ROOT = ""; 128 /** 129 * Root handling of the Nivre arc-eager and Nivre arc-standard algorithm. Used for introducing a bug in MaltParser 0. 130 */ 131 private int rootHandling = -1; 132 /** 133 * true if Nivre arc-standard is the current parsing algorthm, otherwise false 134 */ 135 private boolean nivrestandard = false; 136 /** 137 * true if Nivre arc-eager/arc-standard is the current parsing algorthm, otherwise false 138 */ 139 private boolean nivre = false; 140 141 private boolean saveInstanceFiles; 142 /** 143 * Constructs a LIBSVM learner. 144 * 145 * @param owner the guide model owner 146 * @param learnerMode the mode of the learner TRAIN or CLASSIFY 147 */ 148 public LibsvmMalt04(InstanceModel owner, Integer learnerMode) throws MaltChainedException { 149 setOwner(owner); 150 setLearningMethodName("libsvmmalt04"); 151 setLearnerMode(learnerMode.intValue()); 152 setNumberOfInstances(0); 153 initSpecialParameters(); 154 initSvmParam(getConfiguration().getOptionValue("libsvm", "libsvm_options").toString()); 155 if (learnerMode == TRAIN) { 156 instanceOutput = new BufferedWriter(getInstanceOutputStreamWriter(".ins")); 157 //debugTransOut = new BufferedWriter(getInstanceOutputStreamWriter(".trans")); 158 } 159 sb = new StringBuilder(6); 160 } 161 162 /* (non-Javadoc) 163 * @see org.maltparser.ml.LearningMethod#addInstance(org.maltparser.parser.guide.classtable.ClassTable, org.maltparser.parser.guide.feature.FeatureVector) 164 */ 165 public void addInstance(SingleDecision decision, FeatureVector featureVector) throws MaltChainedException { 166 if (featureVector == null) { 167 throw new LibsvmException("The feature vector cannot be found"); 168 } else if (decision == null) { 169 throw new LibsvmException("The decision cannot be found"); 170 } 171 try { 172 if (nivre == true && RA_ROOT.equals(decision.getDecisionSymbol()) == true) { 173 instanceOutput.write("2\t"); 174 //debugTransOut.write(2+" "+classCodeTable.getCurrentClassString()+" "+sentenceCount+"\n"); 175 } else if (nivre == true && LA_ROOT.equals(decision.getDecisionSymbol()) == true) { 176 Table table = decision.getGuideHistory().getTableHandler("A").getSymbolTable("DEPREL"); 177 int code = 2 + ((SymbolTable)table).getValueCounter() - 1; 178 //int code = 2 + classCodeTable.getParserAction().getOutputSymbolTables().get("DEPREL").getValueCounter() - 1; 179 instanceOutput.write(code+"\t"); 180 //debugTransOut.write(code+" "+classCodeTable.getCurrentClassString()+" "+sentenceCount+"\n"); 181 } else { 182 instanceOutput.write(decision.getDecisionCode()+"\t"); 183 //debugTransOut.write(classCodeTable.getCurrentClassCode()+" "+classCodeTable.getCurrentClassString()+" "+sentenceCount+"\n"); 184 } 185 186 for (int i = 0; i < featureVector.size(); i++) { 187 FeatureValue featureValue = featureVector.get(i).getFeatureValue(); 188 if (featureValue.isNullValue()) { 189 if (featureVector.get(i) instanceof InputColumnFeature) { 190 if (((InputColumnFeature)featureVector.get(i)).getColumnName().equals("FORM") || 191 ((InputColumnFeature)featureVector.get(i)).getColumnName().equals("LEMMA") || 192 ((InputColumnFeature)featureVector.get(i)).getColumnName().equals("FEATS")) { 193 instanceOutput.write("-1"); 194 if (i != featureVector.size()) { 195 instanceOutput.write('\t'); 196 } 197 continue; 198 } 199 } else if (featureVector.get(i) instanceof SplitFeature && ((SplitFeature)featureVector.get(i)).getParentFeature() instanceof InputColumnFeature) { 200 if (((InputColumnFeature)((SplitFeature)featureVector.get(i)).getParentFeature()).getColumnName().equals("FEATS")) { 201 instanceOutput.write("-1"); 202 if (i != featureVector.size()) { 203 instanceOutput.write('\t'); 204 } 205 continue; 206 } 207 } 208 } 209 if (featureVector.get(i) instanceof OutputColumnFeature && !featureVector.get(i).toString().endsWith("DEPREL, Stack[0])")) { 210 OutputColumnFeature ocf = (OutputColumnFeature)featureVector.get(i); 211 DependencyNode node = null; 212 if (ocf.getAddressFunction().getAddressValue().getAddress() instanceof DependencyNode) { 213 node = (DependencyNode)ocf.getAddressFunction().getAddressValue().getAddress(); 214 } 215 if (node != null && node.getHead() != null && node.getHead().isRoot()) { 216 instanceOutput.write("0"); 217 } else { 218 if (featureValue instanceof SingleFeatureValue) { 219 instanceOutput.write(((SingleFeatureValue)featureValue).getCode()+""); 220 } else if (featureValue instanceof MultipleFeatureValue) { 221 Set<Integer> values = ((MultipleFeatureValue)featureValue).getCodes(); 222 int j=0; 223 for (Integer value : values) { 224 instanceOutput.write(value.toString()); 225 if (j != values.size()-1) { 226 instanceOutput.write("|"); 227 } 228 j++; 229 } 230 } 231 } 232 } else if (featureVector.get(i) instanceof SplitFeature && ((SplitFeature)featureVector.get(i)).getParentFeature() instanceof InputColumnFeature) { 233 if (((InputColumnFeature)((SplitFeature)featureVector.get(i)).getParentFeature()).getColumnName().equals("FEATS")) { 234 SplitFeature sf = (SplitFeature)featureVector.get(i); 235 String value = ((SingleFeatureValue)sf.getParentFeature().getFeatureValue()).getSymbol(); 236 if (sf.getFeatureValue().isNullValue()) { 237 instanceOutput.write("-1"); 238 } else { 239 int code; 240 String items[]; 241 try { 242 items = value.split(sf.getSeparators()); 243 } catch (PatternSyntaxException e) { 244 throw new FeatureException("The split feature '"+featureVector.get(i).toString()+"' could not split the value using the following separators '"+sf.getSeparators()+"'",e); 245 } 246 for (int j = 0; j < items.length; j++) { 247 code = sf.getSymbolTable().addSymbol(items[j]); 248 instanceOutput.write(code+""); 249 if (j != items.length-1) { 250 instanceOutput.write("|"); 251 } 252 } 253 } 254 } 255 } else { 256 if (featureValue instanceof SingleFeatureValue) { 257 instanceOutput.write(((SingleFeatureValue)featureValue).getCode()+""); 258 } else if (featureValue instanceof MultipleFeatureValue) { 259 Set<Integer> values = ((MultipleFeatureValue)featureValue).getCodes(); 260 int j=0; 261 for (Integer value : values) { 262 instanceOutput.write(value.toString()); 263 if (j != values.size()-1) { 264 instanceOutput.write("|"); 265 } 266 j++; 267 } 268 } 269 } 270 271 if (i != featureVector.size()) { 272 instanceOutput.write('\t'); 273 } 274 } 275 276 instanceOutput.write('\n'); 277 increaseNumberOfInstances(); 278 } catch (IOException e) { 279 throw new LibsvmException("The LIBSVM learner cannot write to the instance file. ", e); 280 } 281 282 } 283 284 /* (non-Javadoc) 285 * @see org.maltparser.ml.LearningMethod#finalizeSentence(org.maltparser.core.sentence.Sentence, org.maltparser.core.graph.DependencyGraph) 286 */ 287 public void finalizeSentence(DependencyStructure dependencyGraph) throws MaltChainedException { 288 // sentenceCount++; 289 } 290 291 /* (non-Javadoc) 292 * @see org.maltparser.ml.LearningMethod#noMoreInstances() 293 */ 294 public void noMoreInstances() throws MaltChainedException { 295 closeInstanceWriter(); 296 } 297 298 299 /* (non-Javadoc) 300 * @see org.maltparser.ml.LearningMethod#train(org.maltparser.parser.guide.feature.FeatureVector) 301 */ 302 public void train(FeatureVector featureVector) throws MaltChainedException { 303 if (featureVector == null) { 304 throw new LibsvmException("The feature vector cannot be found. "); 305 } else if (owner == null) { 306 throw new LibsvmException("The parent guide model cannot be found. "); 307 } 308 309 if (pathExternalSVMTrain != null) { 310 trainExternal(featureVector); 311 return; 312 } 313 svm_problem prob = new svm_problem(); 314 File modelFile = getFile(".mod"); 315 try { 316 317 ArrayList<Integer> cardinalities = new ArrayList<Integer>(); 318 for (FeatureFunction feature : featureVector) { 319 cardinalities.add(feature.getFeatureValue().getCardinality()); 320 } 321 322 readProblemMaltSVMFormat(getInstanceInputStreamReader(".ins"), prob, cardinalities, svmParam); 323 324 String errorMessage = svm.svm_check_parameter(prob, svmParam); 325 if(errorMessage != null) { 326 throw new LibsvmException(errorMessage); 327 } 328 getConfiguration().getConfigLogger().info("Creating LIBSVM model "+modelFile.getName()+"\n"); 329 PrintStream out = System.out; 330 PrintStream err = System.err; 331 System.setOut(NoPrintStream.NO_PRINTSTREAM); 332 //System.setErr(new PrintStream(new LoggingOutputStream(owner.getConfiguration().getConfigLogger(), owner.getConfiguration().getConfigLogger().getLevel()), true)); 333 System.setErr(NoPrintStream.NO_PRINTSTREAM); 334 335 svm.svm_save_model(modelFile.getAbsolutePath(), svm.svm_train(prob, svmParam)); 336 337 System.setOut(err); 338 System.setOut(out); 339 if (!saveInstanceFiles) { 340 getFile(".ins").delete(); 341 } 342 } catch (OutOfMemoryError e) { 343 throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e); 344 } catch (IllegalArgumentException e) { 345 throw new LibsvmException("The LIBSVM learner was not able to redirect Standard Error stream. ", e); 346 } catch (SecurityException e) { 347 throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e); 348 } catch (IOException e) { 349 throw new LibsvmException("The LIBSVM learner cannot save the model file '"+modelFile.getAbsolutePath()+"'. ", e); 350 } 351 } 352 353 private void trainExternal(FeatureVector featureVector) throws MaltChainedException { 354 355 try { 356 ArrayList<Integer> cardinalities = new ArrayList<Integer>(); 357 for (FeatureFunction feature : featureVector) { 358 cardinalities.add(feature.getFeatureValue().getCardinality()); 359 } 360 361 maltSVMFormat2OriginalSVMFormat(getInstanceInputStreamReader(".ins"), getInstanceOutputStreamWriter(".ins.tmp"), cardinalities); 362 getConfiguration().getConfigLogger().info("Creating LIBSVM model (svm-train) "+getFile(".mod").getName()); 363 364 ArrayList<String> commands = new ArrayList<String>(); 365 commands.add(pathExternalSVMTrain); 366 String[] params = getSVMParamStringArray(svmParam); 367 for (int i=0; i < params.length; i++) { 368 commands.add(params[i]); 369 } 370 commands.add(getFile(".ins.tmp").getAbsolutePath()); 371 commands.add(getFile(".mod").getAbsolutePath()); 372 String[] arrayCommands = commands.toArray(new String[commands.size()]); 373 Process child = Runtime.getRuntime().exec(arrayCommands); 374 InputStream in = child.getInputStream(); 375 while (in.read() != -1){} 376 if (child.waitFor() != 0) { 377 owner.getGuide().getConfiguration().getConfigLogger().info(" FAILED ("+child.exitValue()+")"); 378 } 379 in.close(); 380 if (!saveInstanceFiles) { 381 getFile(".ins").delete(); 382 getFile(".ins.tmp").delete(); 383 } 384 owner.getGuide().getConfiguration().getConfigLogger().info("\n"); 385 } catch (InterruptedException e) { 386 throw new LibsvmException("SVM-trainer is interrupted. ", e); 387 } catch (IllegalArgumentException e) { 388 throw new LibsvmException("The LIBSVM learner was not able to redirect Standard Error stream. ", e); 389 } catch (SecurityException e) { 390 throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e); 391 } catch (IOException e) { 392 throw new LibsvmException("The LIBSVM learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e); 393 } catch (OutOfMemoryError e) { 394 throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e); 395 } 396 } 397 398 /* (non-Javadoc) 399 * @see org.maltparser.ml.LearningMethod#moveAllInstances(java.io.BufferedWriter, org.maltparser.parser.guide.feature.Feature, java.util.ArrayList) 400 */ 401 public void moveAllInstances(LearningMethod method, FeatureFunction divideFeature, ArrayList<Integer> divideFeatureIndexVector) throws MaltChainedException { 402 if (method == null) { 403 throw new LibsvmException("The learning method cannot be found. "); 404 } else if (divideFeature == null) { 405 throw new LibsvmException("The divide feature cannot be found. "); 406 } 407 try { 408 BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins")); 409 BufferedWriter out = method.getInstanceWriter(); 410 int l = in.read(); 411 char c; 412 int j = 0; 413 while(true) { 414 if (l == -1) { 415 sb.setLength(0); 416 break; 417 } 418 c = (char)l; 419 l = in.read(); 420 if (c == '\t') { 421 out.write(sb.toString()); 422 out.write('\t'); 423 j++; 424 sb.setLength(0); 425 } else if (c == '\n') { 426 out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode())); 427 out.write('\n'); 428 sb.setLength(0); 429 method.increaseNumberOfInstances(); 430 this.decreaseNumberOfInstances(); 431 j = 0; 432 } else { 433 sb.append(c); 434 } 435 } 436 in.close(); 437 getFile(".ins").delete(); 438 } catch (SecurityException e) { 439 throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e); 440 } catch (NullPointerException e) { 441 throw new LibsvmException("The instance file cannot be found. ", e); 442 } catch (FileNotFoundException e) { 443 throw new LibsvmException("The instance file cannot be found. ", e); 444 } catch (IOException e) { 445 throw new LibsvmException("The LIBSVM learner read from the instance file. ", e); 446 } 447 } 448 449 /* (non-Javadoc) 450 * @see org.maltparser.ml.LearningMethod#predict(org.maltparser.parser.guide.feature.FeatureVector, org.maltparser.ml.KBestList) 451 */ 452 public boolean predict(FeatureVector featureVector, SingleDecision decision) throws MaltChainedException { 453 if (model == null) { 454 File modelFile = getFile(".mod"); 455 try { 456 model = svm.svm_load_model(modelFile.getAbsolutePath()); 457 } catch (IOException e) { 458 throw new LibsvmException("The file '"+modelFile.getAbsolutePath()+"' cannot be loaded. ", e); 459 } 460 } 461 if (xlist == null) { 462 xlist = new ArrayList<svm_node>(featureVector.size()); 463 } 464 if (model == null) { 465 throw new LibsvmException("The LIBSVM learner cannot predict the next class, because the learning model cannot be found. "); 466 } else if (featureVector == null) { 467 throw new LibsvmException("The LIBSVM learner cannot predict the next class, because the feature vector cannot be found. "); 468 } 469 int j = 0; 470 int offset = 0; 471 472 for (FeatureFunction feature : featureVector) { 473 if (feature instanceof SplitFeature && feature.toString().startsWith("Split(InputColumn(FEATS")) { 474 SplitFeature sf = (SplitFeature)feature; 475 String value = ((SingleFeatureValue)sf.getParentFeature().getFeatureValue()).getSymbol(); 476 477 SymbolTable table = sf.getSymbolTable(); 478 String items[]; 479 try { 480 items = value.split(sf.getSeparators()); 481 } catch (PatternSyntaxException e) { 482 throw new FeatureException("The split feature '"+feature.toString()+"' could not split the value using the following separators '"+sf.getSeparators()+"'",e); 483 } 484 for (int k=0; k < items.length; k++) { 485 if (!(table.isNullValue(items[k]) && table.getSymbolStringToCode(items[k]) == 0)) { 486 if (j >= xlist.size()) { 487 svm_node x = new svm_node(); 488 x.value = 1.0; 489 xlist.add(j,x); 490 } 491 xlist.get(j++).index = table.addSymbol(items[k]) + offset; 492 } 493 } 494 } else { 495 FeatureValue featureValue = feature.getFeatureValue(); 496 if (featureValue instanceof SingleFeatureValue) { 497 if (((SingleFeatureValue)featureValue).isKnown()) { 498 if (j >= xlist.size()) { 499 svm_node x = new svm_node(); 500 x.value = 1.0; 501 xlist.add(j,x); 502 } 503 if (feature instanceof OutputColumnFeature && !feature.toString().endsWith("DEPREL, Stack[0])")) { 504 OutputColumnFeature ocf = (OutputColumnFeature)feature; 505 DependencyNode node = null; 506 if (ocf.getAddressFunction().getAddressValue().getAddress() instanceof DependencyNode) { 507 node = (DependencyNode)ocf.getAddressFunction().getAddressValue().getAddress(); 508 } 509 if (node != null && node.getHead() != null && node.getHead().isRoot()) { 510 xlist.get(j++).index = 0 + offset; 511 } else { 512 xlist.get(j++).index = ((SingleFeatureValue)featureValue).getCode() + offset; 513 } 514 } else { 515 xlist.get(j++).index = ((SingleFeatureValue)featureValue).getCode() + offset; 516 } 517 } 518 } else if (featureValue instanceof MultipleFeatureValue) { 519 Set<Integer> values = ((MultipleFeatureValue)featureValue).getCodes(); 520 for (Integer value : values) { 521 if (((MultipleFeatureValue)featureValue).isKnown(value)) { 522 if (j >= xlist.size()) { 523 svm_node x = new svm_node(); 524 x.value = 1.0; 525 xlist.add(j,x); 526 } 527 if (feature instanceof OutputColumnFeature && !feature.toString().endsWith("DEPREL, Stack[0])")) { 528 OutputColumnFeature ocf = (OutputColumnFeature)feature; 529 DependencyNode node = null; 530 if (ocf.getAddressFunction().getAddressValue().getAddress() instanceof DependencyNode) { 531 node = (DependencyNode)ocf.getAddressFunction().getAddressValue().getAddress(); 532 } 533 if (node != null && node.getHead() != null && node.getHead().isRoot()) { 534 xlist.get(j++).index = 0 + offset; 535 } else { 536 xlist.get(j++).index = value + offset; 537 } 538 } else { 539 xlist.get(j++).index = value + offset; 540 } 541 } 542 } 543 } 544 } 545 offset += feature.getFeatureValue().getCardinality(); 546 } 547 int transition = (int)svm.svm_predict(model, xlist.subList(0, j).toArray(new svm_node[0])); 548 if (nivrestandard == true && rootHandling == NivreStandardMalt04.NORMAL && transition == 2) { 549 transition = 1; 550 } 551 552 decision.getKBestList().add(transition); 553 554 return true; 555 } 556 557 /* (non-Javadoc) 558 * @see org.maltparser.ml.LearningMethod#terminate() 559 */ 560 public void terminate() throws MaltChainedException { 561 closeInstanceWriter(); 562 model = null; 563 svmParam = null; 564 xlist = null; 565 owner = null; 566 } 567 568 /* (non-Javadoc) 569 * @see org.maltparser.ml.LearningMethod#getInstanceWriter() 570 */ 571 public BufferedWriter getInstanceWriter() { 572 return instanceOutput; 573 } 574 575 /** 576 * Close the instance writer 577 * 578 * @throws MaltChainedException 579 */ 580 protected void closeInstanceWriter() throws MaltChainedException { 581 try { 582 if (instanceOutput != null) { 583 instanceOutput.flush(); 584 instanceOutput.close(); 585 instanceOutput = null; 586 } 587 588 /*if (debugTransOut != null) { 589 debugTransOut.flush(); 590 debugTransOut.close(); 591 debugTransOut = null; 592 }*/ 593 } catch (IOException e) { 594 throw new LibsvmException("The LIBSVM learner cannot close the instance file. ", e); 595 } 596 } 597 598 /** 599 * Initialize the LIBSVM according to the parameter string 600 * 601 * @param paramString the parameter string to configure the LIBSVM learner. 602 * @throws MaltChainedException 603 */ 604 protected void initSvmParam(String paramString) throws MaltChainedException { 605 this.paramString = paramString; 606 svmParam = new svm_parameter(); 607 initParameters(svmParam); 608 parseParameters(paramString, svmParam); 609 } 610 611 /** 612 * Initialize the LIBSVM with a coding and a behavior strategy. This strategy parameter is 613 * used for reproduce the behavior of MaltParser 0.4 (C-impl). 614 * 615 * @throws MaltChainedException 616 */ 617 protected void initSpecialParameters() throws MaltChainedException { 618 if (getConfiguration().getParsingAlgorithm() instanceof NivreEagerMalt04 || getConfiguration().getParsingAlgorithm() instanceof NivreStandardMalt04) { 619 nivre = true; 620 RA_ROOT = "RA"+getConfiguration().getOptionValue("guide", "classitem_separator").toString()+getConfiguration().getOptionValue("graph", "root_label").toString(); 621 LA_ROOT = "LA"+getConfiguration().getOptionValue("guide", "classitem_separator").toString()+getConfiguration().getOptionValue("graph", "root_label").toString(); 622 if (getConfiguration().getParsingAlgorithm() instanceof NivreEagerMalt04) { 623 rootHandling = ((NivreEagerMalt04)getConfiguration().getParsingAlgorithm()).getRootHandling(); 624 } else if (getConfiguration().getParsingAlgorithm() instanceof NivreStandardMalt04) { 625 rootHandling = ((NivreStandardMalt04)getConfiguration().getParsingAlgorithm()).getRootHandling(); 626 nivrestandard = true; 627 } 628 } 629 630 saveInstanceFiles = ((Boolean)getConfiguration().getOptionValue("libsvm", "save_instance_files")).booleanValue(); 631 if (!getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().equals("")) { 632 try { 633 if (!new File(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString()).exists()) { 634 throw new LibsvmException("The path to the external LIBSVM trainer 'svm-train' is wrong."); 635 } 636 if (new File(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString()).isDirectory()) { 637 throw new LibsvmException("The option --libsvm-libsvm_external points to a directory, the path should point at the 'svm-train' file or the 'svm-train.exe' file"); 638 } 639 if (!(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().endsWith("svm-train") || getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().endsWith("svm-train.exe"))) { 640 throw new LibsvmException("The option --libsvm-libsvm_external does not specify the path to 'svm-train' file or the 'svm-train.exe' file. "); 641 } 642 pathExternalSVMTrain = getConfiguration().getOptionValue("libsvm", "libsvm_external").toString(); 643 } catch (SecurityException e) { 644 throw new LibsvmException("Access denied to the file specified by the option --libsvm-libsvm_external. ", e); 645 } 646 } 647 } 648 649 /** 650 * Returns the parameter string for used for configure LIBSVM 651 * 652 * @return the parameter string for used for configure LIBSVM 653 */ 654 public String getParamString() { 655 return paramString; 656 } 657 658 /** 659 * Returns the parent instance model 660 * 661 * @return the parent instance model 662 */ 663 public InstanceModel getOwner() { 664 return owner; 665 } 666 667 /** 668 * Sets the parent instance model 669 * 670 * @param owner a instance model 671 */ 672 protected void setOwner(InstanceModel owner) { 673 this.owner = owner; 674 } 675 676 /** 677 * Returns the learner mode 678 * 679 * @return the learner mode 680 */ 681 public int getLearnerMode() { 682 return learnerMode; 683 } 684 685 /** 686 * Sets the learner mode 687 * 688 * @param learnerMode the learner mode 689 */ 690 public void setLearnerMode(int learnerMode) { 691 this.learnerMode = learnerMode; 692 } 693 694 /** 695 * Returns the name of the learning method 696 * 697 * @return the name of the learning method 698 */ 699 public String getLearningMethodName() { 700 return name; 701 } 702 703 /** 704 * Returns the current configuration 705 * 706 * @return the current configuration 707 * @throws MaltChainedException 708 */ 709 public DependencyParserConfig getConfiguration() throws MaltChainedException { 710 return owner.getGuide().getConfiguration(); 711 } 712 713 /** 714 * Returns the number of processed instances 715 * 716 * @return the number of processed instances 717 */ 718 public int getNumberOfInstances() { 719 return numberOfInstances; 720 } 721 722 /* (non-Javadoc) 723 * @see org.maltparser.ml.LearningMethod#increaseNumberOfInstances() 724 */ 725 public void increaseNumberOfInstances() { 726 numberOfInstances++; 727 owner.increaseFrequency(); 728 } 729 730 /* (non-Javadoc) 731 * @see org.maltparser.ml.LearningMethod#decreaseNumberOfInstances() 732 */ 733 public void decreaseNumberOfInstances() { 734 numberOfInstances--; 735 owner.decreaseFrequency(); 736 } 737 738 /** 739 * Sets the number of instance 740 * 741 * @param numberOfInstances the number of instance 742 */ 743 protected void setNumberOfInstances(int numberOfInstances) { 744 this.numberOfInstances = 0; 745 } 746 747 /** 748 * Sets the learning method name 749 * 750 * @param name the learning method name 751 */ 752 protected void setLearningMethodName(String name) { 753 this.name = name; 754 } 755 756 /** 757 * Returns the instance output writer. The naming of the file is standardized according to the learning method name, but file suffix can vary. 758 * 759 * @param suffix the file suffix of the file name 760 * @return the instance output writer 761 * @throws MaltChainedException 762 */ 763 protected OutputStreamWriter getInstanceOutputStreamWriter(String suffix) throws MaltChainedException { 764 return getConfiguration().getConfigurationDir().getOutputStreamWriter(owner.getModelName()+getLearningMethodName()+suffix); 765 } 766 767 /** 768 * Returns the instance input reader. The naming of the file is standardized according to the learning method name, but file suffix can vary. 769 * 770 * @param suffix the file suffix of the file name 771 * @return the instance input reader 772 * @throws MaltChainedException 773 */ 774 protected InputStreamReader getInstanceInputStreamReader(String suffix) throws MaltChainedException { 775 return getConfiguration().getConfigurationDir().getInputStreamReader(owner.getModelName()+getLearningMethodName()+suffix); 776 } 777 778 /** 779 * Returns a file object. The naming of the file is standardized according to the learning method name, but file suffix can vary. 780 * 781 * @param suffix the file suffix of the file name 782 * @return Returns a file object 783 * @throws MaltChainedException 784 */ 785 protected File getFile(String suffix) throws MaltChainedException { 786 return getConfiguration().getConfigurationDir().getFile(owner.getModelName()+getLearningMethodName()+suffix); 787 } 788 789 790 /** 791 * Reads an instance file into a svm_problem object according to the Malt-SVM format, which is column fixed format (tab-separated). 792 * 793 * @param isr the instance stream reader for the instance file 794 * @param prob a svm_problem object 795 * @param cardinality a vector containing the number of distinct values for a particular column. 796 * @param param a svm_parameter object 797 * @throws LibsvmException 798 */ 799 public void readProblemMaltSVMFormat(InputStreamReader isr, svm_problem prob, ArrayList<Integer> cardinality, svm_parameter param) throws LibsvmException { 800 try { 801 BufferedReader fp = new BufferedReader(isr); 802 int max_index = 0; 803 if (xlist == null) { 804 xlist = new ArrayList<svm_node>(); 805 } 806 prob.l = getNumberOfInstances(); 807 prob.x = new svm_node[prob.l][]; 808 prob.y = new double[prob.l]; 809 int i = 0; 810 Pattern tabPattern = Pattern.compile("\t"); 811 Pattern pipePattern = Pattern.compile("\\|"); 812 while(true) { 813 String line = fp.readLine(); 814 if(line == null) break; 815 String[] columns = tabPattern.split(line); 816 if (columns.length == 0) { 817 continue; 818 } 819 820 int offset = 0; 821 int j = 0; 822 try { 823 prob.y[i] = (double)Integer.parseInt(columns[j]); 824 int p = 0; 825 for(j = 1; j < columns.length; j++) { 826 String[] items = pipePattern.split(columns[j]); 827 for (int k = 0; k < items.length; k++) { 828 try { 829 if (Integer.parseInt(items[k]) != -1) { 830 xlist.add(p, new svm_node()); 831 xlist.get(p).value = 1.0; 832 xlist.get(p).index = Integer.parseInt(items[k])+offset; 833 p++; 834 } 835 } catch (NumberFormatException e) { 836 throw new LibsvmException("The instance file contain a non-integer value '"+items[k]+"'", e); 837 } 838 } 839 offset += cardinality.get(j-1); 840 } 841 prob.x[i] = xlist.subList(0, p).toArray(new svm_node[0]); 842 if(columns.length>0) { 843 max_index = Math.max(max_index, xlist.get(p-1).index); 844 } 845 i++; 846 xlist.clear(); 847 } catch (ArrayIndexOutOfBoundsException e) { 848 throw new LibsvmException("Cannot read from the instance file. ", e); 849 } 850 } 851 fp.close(); 852 if (param.gamma == 0) { 853 param.gamma = 1.0/max_index; 854 } 855 xlist = null; 856 } catch (IOException e) { 857 throw new LibsvmException("Cannot read from the instance file. ", e); 858 } 859 } 860 861 862 /** 863 * Assign a default value to all svm parameters 864 * 865 * @param param a svm_parameter object 866 */ 867 public void initParameters(svm_parameter param) throws LibsvmException { 868 if (param == null) { 869 throw new LibsvmException("Svm-parameters cannot be found. "); 870 } 871 param.svm_type = svm_parameter.C_SVC; 872 param.kernel_type = svm_parameter.POLY; 873 param.degree = 2.0; // libsvm 2.8 874 param.gamma = 0.2; // 1/k 875 param.coef0 = 0; 876 param.nu = 0.5; 877 param.cache_size = 40; 878 param.C = 0.5; 879 param.eps = 1.0; 880 param.p = 0.1; 881 param.shrinking = 1; 882 param.probability = 0; 883 param.nr_weight = 0; 884 param.weight_label = new int[0]; 885 param.weight = new double[0]; 886 } 887 888 /** 889 * Returns a string containing all svm-parameters of interest 890 * 891 * @param param a svm_parameter object 892 * @return a string containing all svm-parameters of interest 893 */ 894 public String toStringParameters(svm_parameter param) { 895 if (param == null) { 896 throw new IllegalArgumentException("Svm-parameters cannot be found. "); 897 } 898 StringBuffer sb = new StringBuffer(); 899 900 String[] svmtypes = {"C_SVC", "NU_SVC","ONE_CLASS","EPSILON_SVR","NU_SVR"}; 901 String[] kerneltypes = {"LINEAR", "POLY","RBF","SIGMOID","PRECOMPUTED"}; 902 DecimalFormat dform = new DecimalFormat("#0.0#"); 903 DecimalFormatSymbols sym = new DecimalFormatSymbols(); 904 sym.setDecimalSeparator('.'); 905 dform.setDecimalFormatSymbols(sym); 906 sb.append("LIBSVM SETTINGS\n"); 907 sb.append(" SVM type : " + svmtypes[param.svm_type] + " (" + param.svm_type + ")\n"); 908 sb.append(" Kernel : " + kerneltypes[param.kernel_type] + " (" + param.kernel_type + ")\n"); 909 if (param.kernel_type == svm_parameter.POLY) { 910 sb.append(" Degree : " + param.degree + "\n"); 911 } 912 if (param.kernel_type == svm_parameter.POLY || param.kernel_type == svm_parameter.RBF || param.kernel_type == svm_parameter.SIGMOID) { 913 sb.append(" Gamma : " + dform.format(param.gamma) + "\n"); 914 if (param.kernel_type == svm_parameter.POLY || param.kernel_type == svm_parameter.SIGMOID) { 915 sb.append(" Coef0 : " + dform.format(param.coef0) + "\n"); 916 } 917 } 918 if (param.svm_type == svm_parameter.NU_SVC || param.svm_type == svm_parameter.NU_SVR || param.svm_type == svm_parameter.ONE_CLASS) { 919 sb.append(" Nu : " + dform.format(param.nu) + "\n"); 920 } 921 sb.append(" Cache Size : " + dform.format(param.cache_size) + " MB\n"); 922 if (param.svm_type == svm_parameter.C_SVC || param.svm_type == svm_parameter.NU_SVR || param.svm_type == svm_parameter.EPSILON_SVR) { 923 sb.append(" C : " + dform.format(param.C) + "\n"); 924 } 925 sb.append(" Eps : " + dform.format(param.eps) + "\n"); 926 if (param.svm_type == svm_parameter.EPSILON_SVR) { 927 sb.append(" P : " + dform.format(param.p) + "\n"); 928 } 929 sb.append(" Shrinking : " + param.shrinking + "\n"); 930 sb.append(" Probability : " + param.probability + "\n"); 931 if (param.svm_type == svm_parameter.C_SVC) { 932 sb.append(" #Weight : " + param.nr_weight + "\n"); 933 if (param.nr_weight > 0) { 934 sb.append(" Weight labels : "); 935 for (int i = 0; i < param.nr_weight; i++) { 936 sb.append(param.weight_label[i]); 937 if (i != param.nr_weight-1) { 938 sb.append(", "); 939 } 940 } 941 sb.append("\n"); 942 for (int i = 0; i < param.nr_weight; i++) { 943 sb.append(dform.format(param.weight)); 944 if (i != param.nr_weight-1) { 945 sb.append(", "); 946 } 947 } 948 sb.append("\n"); 949 } 950 } 951 return sb.toString(); 952 } 953 954 public String[] getSVMParamStringArray(svm_parameter param) { 955 ArrayList<String> params = new ArrayList<String>(); 956 957 if (param.svm_type != 0) { 958 params.add("-s"); params.add(new Integer(param.svm_type).toString()); 959 } 960 if (param.kernel_type != 2) { 961 params.add("-t"); params.add(new Integer(param.kernel_type).toString()); 962 } 963 if (param.degree != 3) { 964 params.add("-d"); params.add(new Double(param.degree).toString()); 965 } 966 params.add("-g"); params.add(new Double(param.gamma).toString()); 967 if (param.coef0 != 0) { 968 params.add("-r"); params.add(new Double(param.coef0).toString()); 969 } 970 if (param.nu != 0.5) { 971 params.add("-n"); params.add(new Double(param.nu).toString()); 972 } 973 if (param.cache_size != 100) { 974 params.add("-m"); params.add(new Double(param.cache_size).toString()); 975 } 976 if (param.C != 1) { 977 params.add("-c"); params.add(new Double(param.C).toString()); 978 } 979 if (param.eps != 0.001) { 980 params.add("-e"); params.add(new Double(param.eps).toString()); 981 } 982 if (param.p != 0.1) { 983 params.add("-p"); params.add(new Double(param.p).toString()); 984 } 985 if (param.shrinking != 1) { 986 params.add("-h"); params.add(new Integer(param.shrinking).toString()); 987 } 988 if (param.probability != 0) { 989 params.add("-b"); params.add(new Integer(param.probability).toString()); 990 } 991 992 return params.toArray(new String[params.size()]); 993 } 994 995 /** 996 * Parses the parameter string. The parameter string must contain parameter and value pairs, which are seperated by a blank 997 * or a underscore. The parameter begins with a character '-' followed by a one-character flag and the value must comply with 998 * the parameters data type. Some examples: 999 * 1000 * -s 0 -t 1 -d 2 -g 0.4 -e 0.1 1001 * -s_0_-t_1_-d_2_-g_0.4_-e_0.1 1002 * 1003 * @param paramstring the parameter string 1004 * @param param a svm_parameter object 1005 * @throws LibsvmException 1006 */ 1007 public void parseParameters(String paramstring, svm_parameter param) throws LibsvmException { 1008 if (param == null) { 1009 throw new LibsvmException("Svm-parameters cannot be found. "); 1010 } 1011 if (paramstring == null) { 1012 return; 1013 } 1014 String[] argv; 1015 try { 1016 argv = paramstring.split("[_\\p{Blank}]"); 1017 } catch (PatternSyntaxException e) { 1018 throw new LibsvmException("Could not split the svm-parameter string '"+paramstring+"'. ", e); 1019 } 1020 for (int i=0; i < argv.length-1; i++) { 1021 if(argv[i].charAt(0) != '-') { 1022 throw new LibsvmException("The argument flag should start with the following character '-', not with "+argv[i].charAt(0)); 1023 } 1024 if(++i>=argv.length) { 1025 throw new LibsvmException("The last argument does not have any value. "); 1026 } 1027 try { 1028 switch(argv[i-1].charAt(1)) { 1029 case 's': 1030 param.svm_type = Integer.parseInt(argv[i]); 1031 break; 1032 case 't': 1033 param.kernel_type = Integer.parseInt(argv[i]); 1034 break; 1035 case 'd': 1036 param.degree = Double.valueOf(argv[i]).doubleValue(); //libsvm2.8 1037 break; 1038 case 'g': 1039 param.gamma = Double.valueOf(argv[i]).doubleValue(); 1040 break; 1041 case 'r': 1042 param.coef0 = Double.valueOf(argv[i]).doubleValue(); 1043 break; 1044 case 'n': 1045 param.nu = Double.valueOf(argv[i]).doubleValue(); 1046 break; 1047 case 'm': 1048 param.cache_size = Double.valueOf(argv[i]).doubleValue(); 1049 break; 1050 case 'c': 1051 param.C = Double.valueOf(argv[i]).doubleValue(); 1052 break; 1053 case 'e': 1054 param.eps = Double.valueOf(argv[i]).doubleValue(); 1055 break; 1056 case 'p': 1057 param.p = Double.valueOf(argv[i]).doubleValue(); 1058 break; 1059 case 'h': 1060 param.shrinking = Integer.parseInt(argv[i]); 1061 break; 1062 case 'b': 1063 param.probability = Integer.parseInt(argv[i]); 1064 break; 1065 case 'w': 1066 ++param.nr_weight; 1067 { 1068 int[] old = param.weight_label; 1069 param.weight_label = new int[param.nr_weight]; 1070 System.arraycopy(old,0,param.weight_label,0,param.nr_weight-1); 1071 } 1072 1073 { 1074 double[] old = param.weight; 1075 param.weight = new double[param.nr_weight]; 1076 System.arraycopy(old,0,param.weight,0,param.nr_weight-1); 1077 } 1078 1079 param.weight_label[param.nr_weight-1] = Integer.parseInt(argv[i].substring(2)); 1080 param.weight[param.nr_weight-1] = Double.valueOf(argv[i]).doubleValue(); 1081 break; 1082 case 'Y': 1083 case 'V': 1084 case 'S': 1085 case 'F': 1086 case 'T': 1087 case 'M': 1088 case 'N': 1089 break; 1090 default: 1091 throw new LibsvmException("Unknown svm parameter: '"+argv[i-1]+"' with value '"+argv[i]+"'. "); 1092 } 1093 } catch (ArrayIndexOutOfBoundsException e) { 1094 throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e); 1095 } catch (NumberFormatException e) { 1096 throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e); 1097 } catch (NullPointerException e) { 1098 throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e); 1099 } 1100 } 1101 } 1102 1103 /** 1104 * Converts the instance file (Malt's own SVM format) into the LIBSVM (SVMLight) format. The input instance file is removed (replaced) 1105 * by the instance file in the LIBSVM (SVMLight) format. If a column contains -1, the value will be removed in destination file. 1106 * 1107 * @param isr the input stream reader for the source instance file 1108 * @param osw the output stream writer for the destination instance file 1109 * @param cardinality a vector containing the number of distinct values for a particular column 1110 * @throws LibsvmException 1111 */ 1112 public static void maltSVMFormat2OriginalSVMFormat(InputStreamReader isr, OutputStreamWriter osw, ArrayList<Integer> cardinality) throws LibsvmException { 1113 try { 1114 final BufferedReader in = new BufferedReader(isr); 1115 final BufferedWriter out = new BufferedWriter(osw); 1116 int c; 1117 int j = 0; 1118 int offset = 0; 1119 int code = 0; 1120 while(true) { 1121 c = in.read(); 1122 if (c == -1) { 1123 break; 1124 } 1125 1126 if (c == '\t' || c == '|') { 1127 if (j == 0) { 1128 out.write(Integer.toString(code)); 1129 j++; 1130 } else { 1131 if (code != -1) { 1132 out.write(' '); 1133 out.write(Integer.toString(code+offset)); 1134 out.write(":1"); 1135 } 1136 if (c == '\t') { 1137 offset += cardinality.get(j-1); 1138 j++; 1139 } 1140 } 1141 code = 0; 1142 } else if (c == '\n') { 1143 j = 0; 1144 offset = 0; 1145 out.write('\n'); 1146 code = 0; 1147 } else if (c == '-') { 1148 code = -1; 1149 } else if (code != -1) { 1150 if (c > 47 && c < 58) { 1151 code = code * 10 + (c-48); 1152 } else { 1153 throw new LibsvmException("The instance file contain a non-integer value, when converting the Malt SVM format into LIBSVM format."); 1154 } 1155 } 1156 } 1157 in.close(); 1158 out.close(); 1159 } catch (IOException e) { 1160 throw new LibsvmException("Cannot read from the instance file, when converting the Malt SVM format into LIBSVM format. ", e); 1161 } 1162 } 1163 1164 /** 1165 * Returns the double (floating-point) value of the string s 1166 * 1167 * @param s string value that should be converted into a double. 1168 * @return the double (floating-point) value of the string s 1169 * @throws LibsvmException 1170 */ 1171 public static double atof(String s) throws LibsvmException { 1172 try { 1173 return Double.valueOf(s).doubleValue(); 1174 } catch (NumberFormatException e) { 1175 throw new LibsvmException("Could not convert the string value '"+s+"' into a correct numeric value. ", e); 1176 } catch (NullPointerException e) { 1177 throw new LibsvmException("Could not convert the string value '"+s+"' into a correct numeric value. ", e); 1178 } 1179 } 1180 1181 /** 1182 * Returns the integer value of the string s 1183 * 1184 * @param s string value that should be converted into an integer 1185 * @return the integer value of the string s 1186 * @throws LibsvmException 1187 */ 1188 public static int atoi(String s) throws LibsvmException { 1189 try { 1190 return Integer.parseInt(s); 1191 } catch (NumberFormatException e) { 1192 throw new LibsvmException("Could not convert the string value '"+s+"' into a correct integer value. ", e); 1193 } catch (NullPointerException e) { 1194 throw new LibsvmException("Could not convert the string value '"+s+"' into a correct integer value. ", e); 1195 } 1196 } 1197 1198 /** 1199 * Reads an instance file into a svm_problem object according to the LIBSVM (SVMLight) format. 1200 * 1201 * @param isr the input stream reader for the source instance file 1202 * @param prob a svm_problem object 1203 * @param param a svm_parameter object 1204 * @throws LibsvmException 1205 */ 1206 public static void readProblemOriginalSVMFormat(InputStreamReader isr, svm_problem prob, svm_parameter param) throws LibsvmException { 1207 BufferedReader fp = new BufferedReader(isr); 1208 1209 Vector<String> vy = new Vector<String>(); 1210 Vector<svm_node[]> vx = new Vector<svm_node[]>(); 1211 int max_index = 0; 1212 1213 while(true) { 1214 String line; 1215 try { 1216 line = fp.readLine(); 1217 } catch (IOException e) { 1218 throw new LibsvmException("", e); 1219 } 1220 if(line == null) break; 1221 1222 StringTokenizer st = new StringTokenizer(line," \t\n\r\f:"); 1223 1224 vy.addElement(st.nextToken()); 1225 int m = st.countTokens()/2; 1226 svm_node[] x = new svm_node[m]; 1227 for(int j=0;j<m;j++) { 1228 x[j] = new svm_node(); 1229 x[j].index = atoi(st.nextToken()); 1230 x[j].value = atof(st.nextToken()); 1231 } 1232 if(m>0) max_index = Math.max(max_index, x[m-1].index); 1233 vx.addElement(x); 1234 } 1235 1236 prob.l = vy.size(); 1237 prob.x = new svm_node[prob.l][]; 1238 for(int i=0;i<prob.l;i++) { 1239 prob.x[i] = (svm_node[])vx.elementAt(i); 1240 } 1241 prob.y = new double[prob.l]; 1242 for(int i=0;i<prob.l;i++) { 1243 prob.y[i] = atof((String)vy.elementAt(i)); 1244 } 1245 if(param.gamma == 0.0) { 1246 param.gamma = 1.0/max_index; 1247 } 1248 1249 try { 1250 fp.close(); 1251 } catch (IOException e) { 1252 throw new LibsvmException("The instance file cannot be closed. ", e); 1253 } 1254 } 1255 1256 protected void finalize() throws Throwable { 1257 try { 1258 closeInstanceWriter(); 1259 } finally { 1260 super.finalize(); 1261 } 1262 } 1263 /* (non-Javadoc) 1264 * @see java.lang.Object#toString() 1265 */ 1266 public String toString() { 1267 StringBuffer sb = new StringBuffer(); 1268 sb.append("\nLIBSVM INTERFACE\n"); 1269 sb.append(" LIBSVM version: "+LIBSVM_VERSION+"\n"); 1270 sb.append(" SVM-param string: "+paramString+"\n"); 1271 sb.append(" Coding and behavior strategy: MaltParser 0.4\n"); 1272 sb.append(toStringParameters(svmParam)); 1273 return sb.toString(); 1274 } 1275 }