001 package org.maltparser.ml.libsvm; 002 003 import java.io.BufferedReader; 004 import java.io.BufferedWriter; 005 import java.io.File; 006 import java.io.FileNotFoundException; 007 import java.io.IOException; 008 import java.io.InputStream; 009 import java.io.InputStreamReader; 010 import java.io.OutputStreamWriter; 011 import java.io.PrintStream; 012 import java.text.DecimalFormat; 013 import java.text.DecimalFormatSymbols; 014 import java.util.ArrayList; 015 import java.util.Set; 016 import java.util.regex.Pattern; 017 import java.util.regex.PatternSyntaxException; 018 019 import libsvm.svm; 020 import libsvm.svm_model; 021 import libsvm.svm_node; 022 import libsvm.svm_parameter; 023 import libsvm.svm_problem; 024 025 import org.maltparser.core.exception.MaltChainedException; 026 import org.maltparser.core.feature.FeatureVector; 027 import org.maltparser.core.feature.function.FeatureFunction; 028 import org.maltparser.core.feature.value.FeatureValue; 029 import org.maltparser.core.feature.value.MultipleFeatureValue; 030 import org.maltparser.core.feature.value.SingleFeatureValue; 031 import org.maltparser.core.helper.NoPrintStream; 032 import org.maltparser.core.syntaxgraph.DependencyStructure; 033 import org.maltparser.ml.LearningMethod; 034 import org.maltparser.ml.libsvm.LibsvmException; 035 import org.maltparser.parser.DependencyParserConfig; 036 import org.maltparser.parser.guide.instance.InstanceModel; 037 import org.maltparser.parser.history.action.SingleDecision; 038 import org.maltparser.parser.history.kbest.KBestList; 039 import org.maltparser.parser.history.kbest.ScoredKBestList; 040 041 /** 042 Implements an interface to the LIBSVM learner (currently the LIBSVM 2.86 is used). More information 043 about LIBSVM can be found at 044 <a href="http://www.csie.ntu.edu.tw/~cjlin/libsvm/" target="_blank">LIBSVM -- A Library for Support Vector Machines</a>. 045 046 @author Johan Hall 047 @since 1.0 048 */ 049 public class Libsvm implements LearningMethod { 050 public final static String LIBSVM_VERSION = "2.89"; 051 public enum Verbostity { 052 SILENT, ERROR, ALL 053 } 054 protected InstanceModel owner; 055 protected int learnerMode; 056 protected String name; 057 protected int numberOfInstances; 058 protected boolean saveInstanceFiles; 059 protected boolean excludeNullValues; 060 protected String pathExternalSVMTrain = null; 061 private int[] cardinalities; 062 063 /** 064 * Instance output stream writer 065 */ 066 private BufferedWriter instanceOutput = null; 067 /** 068 * LIBSVM svm_model object, only used during classification. 069 */ 070 private svm_model model = null; 071 072 /** 073 * LIBSVM svm_parameter object 074 */ 075 private svm_parameter svmParam; 076 /** 077 * Parameter string 078 */ 079 private String paramString; 080 /** 081 * An array of LIBSVM svm_node objects, only used during classification. 082 */ 083 private ArrayList<svm_node> xlist = null; 084 085 private Verbostity verbosity; 086 /** 087 * Constructs a LIBSVM learner. 088 * 089 * @param owner the guide model owner 090 * @param learnerMode the mode of the learner TRAIN or CLASSIFY 091 */ 092 public Libsvm(InstanceModel owner, Integer learnerMode) throws MaltChainedException { 093 setOwner(owner); 094 setLearningMethodName("libsvm"); 095 setLearnerMode(learnerMode.intValue()); 096 setNumberOfInstances(0); 097 verbosity = Verbostity.SILENT; 098 initSvmParam(getConfiguration().getOptionValue("libsvm", "libsvm_options").toString()); 099 initSpecialParameters(); 100 if (learnerMode == BATCH) { 101 // if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) { 102 // if (pathExternalSVMTrain != null) { 103 // owner.getGuide().getConfiguration().getConfigLogger().info(" Learner : LIBSVM external "+ getParamString() + "\n"); 104 // } else { 105 // owner.getGuide().getConfiguration().getConfigLogger().info(" Learner : LIBSVM "+LIBSVM_VERSION+" "+ getParamString() + "\n"); 106 // } 107 // } 108 instanceOutput = new BufferedWriter(getInstanceOutputStreamWriter(".ins")); 109 } 110 // else { 111 // if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) { 112 // owner.getGuide().getConfiguration().getConfigLogger().info(" Classifier : LIBSVM "+LIBSVM_VERSION+" "+ getParamString()+ "\n"); 113 // } 114 // } 115 } 116 117 118 public void addInstance(SingleDecision decision, FeatureVector featureVector) throws MaltChainedException { 119 if (featureVector == null) { 120 throw new LibsvmException("The feature vector cannot be found"); 121 } else if (decision == null) { 122 throw new LibsvmException("The decision cannot be found"); 123 } 124 try { 125 instanceOutput.write(decision.getDecisionCode()+"\t"); 126 for (int i = 0; i < featureVector.size(); i++) { 127 FeatureValue featureValue = featureVector.get(i).getFeatureValue(); 128 if (excludeNullValues == true && featureValue.isNullValue()) { 129 instanceOutput.write("-1"); 130 } else { 131 if (featureValue instanceof SingleFeatureValue) { 132 instanceOutput.write(((SingleFeatureValue)featureValue).getCode()+""); 133 } else if (featureValue instanceof MultipleFeatureValue) { 134 Set<Integer> values = ((MultipleFeatureValue)featureValue).getCodes(); 135 int j=0; 136 for (Integer value : values) { 137 instanceOutput.write(value.toString()); 138 if (j != values.size()-1) { 139 instanceOutput.write("|"); 140 } 141 j++; 142 } 143 } 144 } 145 if (i != featureVector.size()) { 146 instanceOutput.write('\t'); 147 } 148 } 149 150 instanceOutput.write('\n'); 151 instanceOutput.flush(); 152 increaseNumberOfInstances(); 153 } catch (IOException e) { 154 throw new LibsvmException("The LIBSVM learner cannot write to the instance file. ", e); 155 } 156 } 157 158 public void finalizeSentence(DependencyStructure dependencyGraph) throws MaltChainedException { } 159 160 /* (non-Javadoc) 161 * @see org.maltparser.ml.LearningMethod#noMoreInstances() 162 */ 163 public void noMoreInstances() throws MaltChainedException { 164 closeInstanceWriter(); 165 } 166 167 168 /* (non-Javadoc) 169 * @see org.maltparser.ml.LearningMethod#train(org.maltparser.parser.guide.feature.FeatureVector) 170 */ 171 public void train(FeatureVector featureVector) throws MaltChainedException { 172 if (featureVector == null) { 173 throw new LibsvmException("The feature vector cannot be found. "); 174 } else if (owner == null) { 175 throw new LibsvmException("The parent guide model cannot be found. "); 176 } 177 cardinalities = getCardinalities(featureVector); 178 if (pathExternalSVMTrain == null) { 179 try { 180 final svm_problem prob = readProblemMaltSVMFormat(getInstanceInputStreamReader(".ins"), cardinalities, svmParam); 181 if(svm.svm_check_parameter(prob, svmParam) != null) { 182 throw new LibsvmException(svm.svm_check_parameter(prob, svmParam)); 183 } 184 owner.getGuide().getConfiguration().getConfigLogger().info("Creating LIBSVM model "+getFile(".mod").getName()+"\n"); 185 final PrintStream out = System.out; 186 final PrintStream err = System.err; 187 System.setOut(NoPrintStream.NO_PRINTSTREAM); 188 System.setErr(NoPrintStream.NO_PRINTSTREAM); 189 190 svm.svm_save_model(getFile(".mod").getAbsolutePath(), svm.svm_train(prob, svmParam)); 191 System.setOut(err); 192 System.setOut(out); 193 if (!saveInstanceFiles) { 194 getFile(".ins").delete(); 195 } 196 } catch (OutOfMemoryError e) { 197 throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e); 198 } catch (IllegalArgumentException e) { 199 throw new LibsvmException("The LIBSVM learner was not able to redirect Standard Error stream. ", e); 200 } catch (SecurityException e) { 201 throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e); 202 } catch (IOException e) { 203 throw new LibsvmException("The LIBSVM learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e); 204 } 205 } else { 206 trainExternal(featureVector); 207 } 208 saveCardinalities(getInstanceOutputStreamWriter(".car"), cardinalities); 209 } 210 211 private void trainExternal(FeatureVector featureVector) throws MaltChainedException { 212 try { 213 maltSVMFormat2OriginalSVMFormat(getInstanceInputStreamReader(".ins"), getInstanceOutputStreamWriter(".ins.tmp"), cardinalities); 214 owner.getGuide().getConfiguration().getConfigLogger().info("Creating LIBSVM model (svm-train) "+getFile(".mod").getName()); 215 216 final ArrayList<String> commands = new ArrayList<String>(); 217 commands.add(pathExternalSVMTrain); 218 final String[] params = getSVMParamStringArray(svmParam); 219 for (int i=0; i < params.length; i++) { 220 commands.add(params[i]); 221 } 222 commands.add(getFile(".ins.tmp").getAbsolutePath()); 223 commands.add(getFile(".mod").getAbsolutePath()); 224 String[] arrayCommands = commands.toArray(new String[commands.size()]); 225 226 if (verbosity == Verbostity.ALL) { 227 owner.getGuide().getConfiguration().getConfigLogger().info('\n'); 228 } 229 final Process child = Runtime.getRuntime().exec(arrayCommands); 230 final InputStream in = child.getInputStream(); 231 final InputStream err = child.getErrorStream(); 232 int c; 233 while ((c = in.read()) != -1){ 234 if (verbosity == Verbostity.ALL) { 235 owner.getGuide().getConfiguration().getConfigLogger().info((char)c); 236 } 237 } 238 while ((c = err.read()) != -1){ 239 if (verbosity == Verbostity.ALL || verbosity == Verbostity.ERROR) { 240 owner.getGuide().getConfiguration().getConfigLogger().info((char)c); 241 } 242 } 243 if (child.waitFor() != 0) { 244 owner.getGuide().getConfiguration().getConfigLogger().info(" FAILED ("+child.exitValue()+")"); 245 } 246 in.close(); 247 err.close(); 248 if (!saveInstanceFiles) { 249 getFile(".ins").delete(); 250 getFile(".ins.tmp").delete(); 251 } 252 owner.getGuide().getConfiguration().getConfigLogger().info('\n'); 253 } catch (InterruptedException e) { 254 throw new LibsvmException("SVM-trainer is interrupted. ", e); 255 } catch (IllegalArgumentException e) { 256 throw new LibsvmException("The LIBSVM learner was not able to redirect Standard Error stream. ", e); 257 } catch (SecurityException e) { 258 throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e); 259 } catch (IOException e) { 260 throw new LibsvmException("The LIBSVM learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e); 261 } catch (OutOfMemoryError e) { 262 throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e); 263 } 264 } 265 266 private int[] getCardinalities(FeatureVector featureVector) { 267 int[] cardinalities = new int[featureVector.size()]; 268 int i = 0; 269 for (FeatureFunction feature : featureVector) { 270 cardinalities[i++] = feature.getFeatureValue().getCardinality(); 271 } 272 return cardinalities; 273 } 274 275 private void saveCardinalities(OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException { 276 final BufferedWriter out = new BufferedWriter(osw); 277 try { 278 for (int i = 0, n = cardinalities.length; i < n; i++) { 279 out.write(Integer.toString(cardinalities[i])); 280 if (i < n - 1) { 281 out.write(','); 282 } 283 } 284 out.write('\n'); 285 out.close(); 286 } catch (IOException e) { 287 throw new LibsvmException("", e); 288 } 289 } 290 291 private int[] loadCardinalities(InputStreamReader isr) throws MaltChainedException { 292 int[] cardinalities = null; 293 try { 294 final BufferedReader in = new BufferedReader(isr); 295 String line; 296 if ((line = in.readLine()) != null) { 297 String[] items = line.split(","); 298 cardinalities = new int[items.length]; 299 for (int i = 0; i < items.length; i++) { 300 cardinalities[i] = Integer.parseInt(items[i]); 301 } 302 } 303 in.close(); 304 } catch (IOException e) { 305 throw new LibsvmException("", e); 306 } catch (NumberFormatException e) { 307 throw new LibsvmException("", e); 308 } 309 return cardinalities; 310 } 311 312 /* (non-Javadoc) 313 * @see org.maltparser.ml.LearningMethod#moveAllInstances(org.maltparser.ml.LearningMethod, org.maltparser.core.feature.function.FeatureFunction, java.util.ArrayList) 314 */ 315 public void moveAllInstances(LearningMethod method, FeatureFunction divideFeature, ArrayList<Integer> divideFeatureIndexVector) throws MaltChainedException { 316 if (method == null) { 317 throw new LibsvmException("The learning method cannot be found. "); 318 } else if (divideFeature == null) { 319 throw new LibsvmException("The divide feature cannot be found. "); 320 } 321 try { 322 final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins")); 323 final BufferedWriter out = method.getInstanceWriter(); 324 final StringBuilder sb = new StringBuilder(6); 325 int l = in.read(); 326 char c; 327 int j = 0; 328 while(true) { 329 if (l == -1) { 330 sb.setLength(0); 331 break; 332 } 333 334 c = (char)l; 335 l = in.read(); 336 if (c == '\t') { 337 if (divideFeatureIndexVector.contains(j-1)) { 338 out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode())); 339 out.write('\t'); 340 } 341 out.write(sb.toString()); 342 j++; 343 out.write('\t'); 344 sb.setLength(0); 345 } else if (c == '\n') { 346 out.write(sb.toString()); 347 if (divideFeatureIndexVector.contains(j-1)) { 348 out.write('\t'); 349 out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode())); 350 } 351 out.write('\n'); 352 sb.setLength(0); 353 method.increaseNumberOfInstances(); 354 this.decreaseNumberOfInstances(); 355 j = 0; 356 } else { 357 sb.append(c); 358 } 359 } 360 in.close(); 361 getFile(".ins").delete(); 362 } catch (SecurityException e) { 363 throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e); 364 } catch (NullPointerException e) { 365 throw new LibsvmException("The instance file cannot be found. ", e); 366 } catch (FileNotFoundException e) { 367 throw new LibsvmException("The instance file cannot be found. ", e); 368 } catch (IOException e) { 369 throw new LibsvmException("The LIBSVM learner read from the instance file. ", e); 370 } 371 } 372 373 /* (non-Javadoc) 374 * @see org.maltparser.ml.LearningMethod#predict(org.maltparser.parser.guide.feature.FeatureVector, org.maltparser.ml.KBestList) 375 */ 376 public boolean predict(FeatureVector featureVector, SingleDecision decision) throws MaltChainedException { 377 if (model == null) { 378 File modelFile = getFile(".mod"); 379 try { 380 model = svm.svm_load_model(modelFile.getAbsolutePath()); 381 } catch (IOException e) { 382 throw new LibsvmException("The file '"+modelFile.getAbsolutePath()+"' cannot be loaded. ", e); 383 } 384 } 385 if (cardinalities == null) { 386 if (getFile(".car").exists()) { 387 cardinalities = loadCardinalities(getInstanceInputStreamReader(".car")); 388 } else { 389 cardinalities = getCardinalities(featureVector); 390 } 391 } 392 if (xlist == null) { 393 xlist = new ArrayList<svm_node>(featureVector.size()); 394 } 395 if (model == null) { 396 throw new LibsvmException("The LIBSVM learner cannot predict the next class, because the learning model cannot be found. "); 397 } else if (featureVector == null) { 398 throw new LibsvmException("The LIBSVM learner cannot predict the next class, because the feature vector cannot be found. "); 399 } 400 int j = 0; 401 int offset = 0; 402 int i = 0; 403 for (FeatureFunction feature : featureVector) { 404 final FeatureValue featureValue = feature.getFeatureValue(); 405 if (!(excludeNullValues == true && featureValue.isNullValue())) { 406 if (featureValue instanceof SingleFeatureValue) { 407 if (((SingleFeatureValue)featureValue).getCode() < cardinalities[i]) { 408 if (j >= xlist.size()) { 409 svm_node x = new svm_node(); 410 x.value = 1; 411 xlist.add(j,x); 412 } 413 xlist.get(j++).index = ((SingleFeatureValue)featureValue).getCode() + offset; 414 } 415 } else if (featureValue instanceof MultipleFeatureValue) { 416 for (Integer value : ((MultipleFeatureValue)featureValue).getCodes()) { 417 if (value < cardinalities[i]) { 418 // if (((MultipleFeatureValue)featureValue).isKnown(value)) { 419 if (j >= xlist.size()) { 420 svm_node x = new svm_node(); 421 x.value = 1; 422 xlist.add(j,x); 423 } 424 xlist.get(j++).index = value + offset; 425 } 426 } 427 } 428 } 429 offset += cardinalities[i]; 430 i++; 431 } 432 433 svm_node[] xarray = new svm_node[j]; 434 for (int k = 0; k < j; k++) { 435 xarray[k] = xlist.get(k); 436 } 437 if (decision.getKBestList().getK() == 1 || svm.svm_get_svm_type(model) == svm_parameter.ONE_CLASS || 438 svm.svm_get_svm_type(model) == svm_parameter.EPSILON_SVR || 439 svm.svm_get_svm_type(model) == svm_parameter.NU_SVR) { 440 decision.getKBestList().add((int)svm.svm_predict(model, xarray)); 441 } else { 442 svm_predict_with_kbestlist(model, xarray, decision.getKBestList()); 443 } 444 445 return true; 446 } 447 448 449 public void terminate() throws MaltChainedException { 450 closeInstanceWriter(); 451 model = null; 452 svmParam = null; 453 xlist = null; 454 owner = null; 455 } 456 457 public BufferedWriter getInstanceWriter() { 458 return instanceOutput; 459 } 460 461 protected void closeInstanceWriter() throws MaltChainedException { 462 try { 463 if (instanceOutput != null) { 464 instanceOutput.flush(); 465 instanceOutput.close(); 466 instanceOutput = null; 467 } 468 } catch (IOException e) { 469 throw new LibsvmException("The LIBSVM learner cannot close the instance file. ", e); 470 } 471 } 472 473 /** 474 * Initialize the LIBSVM according to the parameter string 475 * 476 * @param paramString the parameter string to configure the LIBSVM learner. 477 * @throws MaltChainedException 478 */ 479 protected void initSvmParam(String paramString) throws MaltChainedException { 480 this.paramString = paramString; 481 svmParam = new svm_parameter(); 482 initParameters(svmParam); 483 parseParameters(paramString, svmParam); 484 } 485 486 /** 487 * Returns the parameter string for used for configure LIBSVM 488 * 489 * @return the parameter string for used for configure LIBSVM 490 */ 491 public String getParamString() { 492 return paramString; 493 } 494 495 public InstanceModel getOwner() { 496 return owner; 497 } 498 499 protected void setOwner(InstanceModel owner) { 500 this.owner = owner; 501 } 502 503 public int getLearnerMode() { 504 return learnerMode; 505 } 506 507 public void setLearnerMode(int learnerMode) throws MaltChainedException { 508 this.learnerMode = learnerMode; 509 } 510 511 public String getLearningMethodName() { 512 return name; 513 } 514 515 /** 516 * Returns the current configuration 517 * 518 * @return the current configuration 519 * @throws MaltChainedException 520 */ 521 public DependencyParserConfig getConfiguration() throws MaltChainedException { 522 return owner.getGuide().getConfiguration(); 523 } 524 525 public int getNumberOfInstances() { 526 return numberOfInstances; 527 } 528 529 public void increaseNumberOfInstances() { 530 numberOfInstances++; 531 owner.increaseFrequency(); 532 } 533 534 public void decreaseNumberOfInstances() { 535 numberOfInstances--; 536 owner.decreaseFrequency(); 537 } 538 539 protected void setNumberOfInstances(int numberOfInstances) { 540 this.numberOfInstances = 0; 541 } 542 543 protected void setLearningMethodName(String name) { 544 this.name = name; 545 } 546 547 protected OutputStreamWriter getInstanceOutputStreamWriter(String suffix) throws MaltChainedException { 548 return getConfiguration().getConfigurationDir().getOutputStreamWriter(owner.getModelName()+getLearningMethodName()+suffix); 549 } 550 551 protected InputStreamReader getInstanceInputStreamReader(String suffix) throws MaltChainedException { 552 return getConfiguration().getConfigurationDir().getInputStreamReader(owner.getModelName()+getLearningMethodName()+suffix); 553 } 554 555 protected File getFile(String suffix) throws MaltChainedException { 556 return getConfiguration().getConfigurationDir().getFile(owner.getModelName()+getLearningMethodName()+suffix); 557 } 558 559 /** 560 * Reads an instance file into a svm_problem object according to the Malt-SVM format, which is column fixed format (tab-separated). 561 * 562 * @param isr the instance stream reader for the instance file 563 * @param cardinalities a array containing the number of distinct values for a particular column. 564 * @param param a svm_parameter object 565 * @throws LibsvmException 566 */ 567 public final svm_problem readProblemMaltSVMFormat(InputStreamReader isr, int[] cardinalities, svm_parameter param) throws MaltChainedException { 568 final svm_problem prob = new svm_problem(); 569 try { 570 final BufferedReader fp = new BufferedReader(isr); 571 int max_index = 0; 572 if (xlist == null) { 573 xlist = new ArrayList<svm_node>(); 574 } 575 prob.l = getNumberOfInstances(); 576 prob.x = new svm_node[prob.l][]; 577 prob.y = new double[prob.l]; 578 int i = 0; 579 final Pattern tabPattern = Pattern.compile("\t"); 580 final Pattern pipePattern = Pattern.compile("\\|"); 581 while(true) { 582 String line = fp.readLine(); 583 if(line == null) break; 584 String[] columns = tabPattern.split(line); 585 586 if (columns.length == 0) { 587 continue; 588 } 589 590 int offset = 0; 591 int j = 0; 592 try { 593 prob.y[i] = (double)Integer.parseInt(columns[j]); 594 int p = 0; 595 for(j = 1; j < columns.length; j++) { 596 final String[] items = pipePattern.split(columns[j]); 597 for (int k = 0; k < items.length; k++) { 598 try { 599 if (Integer.parseInt(items[k]) != -1) { 600 xlist.add(p, new svm_node()); 601 xlist.get(p).value = 1; 602 xlist.get(p).index = Integer.parseInt(items[k])+offset; 603 p++; 604 } 605 } catch (NumberFormatException e) { 606 throw new LibsvmException("The instance file contain a non-integer value '"+items[k]+"'", e); 607 } 608 } 609 offset += cardinalities[j-1]; 610 } 611 prob.x[i] = xlist.subList(0, p).toArray(new svm_node[0]); 612 if(columns.length > 1) { 613 max_index = Math.max(max_index, xlist.get(p-1).index); 614 } 615 i++; 616 xlist.clear(); 617 } catch (ArrayIndexOutOfBoundsException e) { 618 throw new LibsvmException("Cannot read from the instance file. ", e); 619 } 620 } 621 fp.close(); 622 if (param.gamma == 0) { 623 param.gamma = 1.0/max_index; 624 } 625 xlist = null; 626 } catch (IOException e) { 627 throw new LibsvmException("Cannot read from the instance file. ", e); 628 } 629 return prob; 630 } 631 632 protected void initSpecialParameters() throws MaltChainedException { 633 if (getConfiguration().getOptionValue("singlemalt", "null_value") != null && getConfiguration().getOptionValue("singlemalt", "null_value").toString().equalsIgnoreCase("none")) { 634 excludeNullValues = true; 635 } else { 636 excludeNullValues = false; 637 } 638 saveInstanceFiles = ((Boolean)getConfiguration().getOptionValue("libsvm", "save_instance_files")).booleanValue(); 639 640 if (!getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().equals("")) { 641 try { 642 if (!new File(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString()).exists()) { 643 throw new LibsvmException("The path to the external LIBSVM trainer 'svm-train' is wrong."); 644 } 645 if (new File(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString()).isDirectory()) { 646 throw new LibsvmException("The option --libsvm-libsvm_external points to a directory, the path should point at the 'svm-train' file or the 'svm-train.exe' file"); 647 } 648 if (!(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().endsWith("svm-train") || getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().endsWith("svm-train.exe"))) { 649 throw new LibsvmException("The option --libsvm-libsvm_external does not specify the path to 'svm-train' file or the 'svm-train.exe' file. "); 650 } 651 pathExternalSVMTrain = getConfiguration().getOptionValue("libsvm", "libsvm_external").toString(); 652 } catch (SecurityException e) { 653 throw new LibsvmException("Access denied to the file specified by the option --libsvm-libsvm_external. ", e); 654 } 655 } 656 if (getConfiguration().getOptionValue("libsvm", "verbosity") != null) { 657 verbosity = Verbostity.valueOf(getConfiguration().getOptionValue("libsvm", "verbosity").toString().toUpperCase()); 658 } 659 } 660 661 /** 662 * Assign a default value to all svm parameters 663 * 664 * @param param a svm_parameter object 665 */ 666 protected void initParameters(svm_parameter param) throws MaltChainedException { 667 if (param == null) { 668 throw new LibsvmException("Svm-parameters cannot be found. "); 669 } 670 param.svm_type = svm_parameter.C_SVC; 671 param.kernel_type = svm_parameter.POLY; 672 param.degree = 2; 673 param.gamma = 0.2; // 1/k 674 param.coef0 = 0; 675 param.nu = 0.5; 676 param.cache_size = 100; 677 param.C = 1; 678 param.eps = 1.0; 679 param.p = 0.1; 680 param.shrinking = 1; 681 param.probability = 0; 682 param.nr_weight = 0; 683 param.weight_label = new int[0]; 684 param.weight = new double[0]; 685 } 686 687 /** 688 * Returns a string containing all svm-parameters of interest 689 * 690 * @param param a svm_parameter object 691 * @return a string containing all svm-parameters of interest 692 */ 693 public String toStringParameters(svm_parameter param) { 694 if (param == null) { 695 throw new IllegalArgumentException("Svm-parameters cannot be found. "); 696 } 697 final StringBuffer sb = new StringBuffer(); 698 699 final String[] svmtypes = {"C_SVC", "NU_SVC","ONE_CLASS","EPSILON_SVR","NU_SVR"}; 700 final String[] kerneltypes = {"LINEAR", "POLY","RBF","SIGMOID","PRECOMPUTED"}; 701 final DecimalFormat dform = new DecimalFormat("#0.0#"); 702 final DecimalFormatSymbols sym = new DecimalFormatSymbols(); 703 sym.setDecimalSeparator('.'); 704 dform.setDecimalFormatSymbols(sym); 705 sb.append("LIBSVM SETTINGS\n"); 706 sb.append(" SVM type : " + svmtypes[param.svm_type] + " (" + param.svm_type + ")\n"); 707 sb.append(" Kernel : " + kerneltypes[param.kernel_type] + " (" + param.kernel_type + ")\n"); 708 if (param.kernel_type == svm_parameter.POLY) { 709 sb.append(" Degree : " + param.degree + "\n"); 710 } 711 if (param.kernel_type == svm_parameter.POLY || param.kernel_type == svm_parameter.RBF || param.kernel_type == svm_parameter.SIGMOID) { 712 sb.append(" Gamma : " + dform.format(param.gamma) + "\n"); 713 if (param.kernel_type == svm_parameter.POLY || param.kernel_type == svm_parameter.SIGMOID) { 714 sb.append(" Coef0 : " + dform.format(param.coef0) + "\n"); 715 } 716 } 717 if (param.svm_type == svm_parameter.NU_SVC || param.svm_type == svm_parameter.NU_SVR || param.svm_type == svm_parameter.ONE_CLASS) { 718 sb.append(" Nu : " + dform.format(param.nu) + "\n"); 719 } 720 sb.append(" Cache Size : " + dform.format(param.cache_size) + " MB\n"); 721 if (param.svm_type == svm_parameter.C_SVC || param.svm_type == svm_parameter.NU_SVR || param.svm_type == svm_parameter.EPSILON_SVR) { 722 sb.append(" C : " + dform.format(param.C) + "\n"); 723 } 724 sb.append(" Eps : " + dform.format(param.eps) + "\n"); 725 if (param.svm_type == svm_parameter.EPSILON_SVR) { 726 sb.append(" P : " + dform.format(param.p) + "\n"); 727 } 728 sb.append(" Shrinking : " + param.shrinking + "\n"); 729 sb.append(" Probability : " + param.probability + "\n"); 730 if (param.svm_type == svm_parameter.C_SVC) { 731 sb.append(" #Weight : " + param.nr_weight + "\n"); 732 if (param.nr_weight > 0) { 733 sb.append(" Weight labels : "); 734 for (int i = 0; i < param.nr_weight; i++) { 735 sb.append(param.weight_label[i]); 736 if (i != param.nr_weight-1) { 737 sb.append(", "); 738 } 739 } 740 sb.append("\n"); 741 for (int i = 0; i < param.nr_weight; i++) { 742 sb.append(dform.format(param.weight)); 743 if (i != param.nr_weight-1) { 744 sb.append(", "); 745 } 746 } 747 sb.append("\n"); 748 } 749 } 750 return sb.toString(); 751 } 752 753 public String[] getSVMParamStringArray(svm_parameter param) { 754 final ArrayList<String> params = new ArrayList<String>(); 755 756 if (param.svm_type != 0) { 757 params.add("-s"); params.add(new Integer(param.svm_type).toString()); 758 } 759 if (param.kernel_type != 2) { 760 params.add("-t"); params.add(new Integer(param.kernel_type).toString()); 761 } 762 if (param.degree != 3) { 763 params.add("-d"); params.add(new Integer(param.degree).toString()); 764 } 765 params.add("-g"); params.add(new Double(param.gamma).toString()); 766 if (param.coef0 != 0) { 767 params.add("-r"); params.add(new Double(param.coef0).toString()); 768 } 769 if (param.nu != 0.5) { 770 params.add("-n"); params.add(new Double(param.nu).toString()); 771 } 772 if (param.cache_size != 100) { 773 params.add("-m"); params.add(new Double(param.cache_size).toString()); 774 } 775 if (param.C != 1) { 776 params.add("-c"); params.add(new Double(param.C).toString()); 777 } 778 if (param.eps != 0.001) { 779 params.add("-e"); params.add(new Double(param.eps).toString()); 780 } 781 if (param.p != 0.1) { 782 params.add("-p"); params.add(new Double(param.p).toString()); 783 } 784 if (param.shrinking != 1) { 785 params.add("-h"); params.add(new Integer(param.shrinking).toString()); 786 } 787 if (param.probability != 0) { 788 params.add("-b"); params.add(new Integer(param.probability).toString()); 789 } 790 791 return params.toArray(new String[params.size()]); 792 } 793 794 /** 795 * Parses the parameter string. The parameter string must contain parameter and value pairs, which are separated by a blank 796 * or a underscore. The parameter begins with a character '-' followed by a one-character flag and the value must comply with 797 * the parameters data type. Some examples: 798 * 799 * -s 0 -t 1 -d 2 -g 0.4 -e 0.1 800 * -s_0_-t_1_-d_2_-g_0.4_-e_0.1 801 * 802 * @param paramstring the parameter string 803 * @param param a svm_parameter object 804 * @throws LibsvmException 805 */ 806 public void parseParameters(String paramstring, svm_parameter param) throws MaltChainedException { 807 if (param == null) { 808 throw new LibsvmException("Svm-parameters cannot be found. "); 809 } 810 if (paramstring == null) { 811 return; 812 } 813 final String[] argv; 814 try { 815 argv = paramstring.split("[_\\p{Blank}]"); 816 } catch (PatternSyntaxException e) { 817 throw new LibsvmException("Could not split the svm-parameter string '"+paramstring+"'. ", e); 818 } 819 for (int i=0; i < argv.length-1; i++) { 820 if(argv[i].charAt(0) != '-') { 821 throw new LibsvmException("The argument flag should start with the following character '-', not with "+argv[i].charAt(0)); 822 } 823 if(++i>=argv.length) { 824 throw new LibsvmException("The last argument does not have any value. "); 825 } 826 try { 827 switch(argv[i-1].charAt(1)) { 828 case 's': 829 param.svm_type = Integer.parseInt(argv[i]); 830 break; 831 case 't': 832 param.kernel_type = Integer.parseInt(argv[i]); 833 break; 834 case 'd': 835 param.degree = Integer.parseInt(argv[i]); 836 break; 837 case 'g': 838 param.gamma = Double.valueOf(argv[i]).doubleValue(); 839 break; 840 case 'r': 841 param.coef0 = Double.valueOf(argv[i]).doubleValue(); 842 break; 843 case 'n': 844 param.nu = Double.valueOf(argv[i]).doubleValue(); 845 break; 846 case 'm': 847 param.cache_size = Double.valueOf(argv[i]).doubleValue(); 848 break; 849 case 'c': 850 param.C = Double.valueOf(argv[i]).doubleValue(); 851 break; 852 case 'e': 853 param.eps = Double.valueOf(argv[i]).doubleValue(); 854 break; 855 case 'p': 856 param.p = Double.valueOf(argv[i]).doubleValue(); 857 break; 858 case 'h': 859 param.shrinking = Integer.parseInt(argv[i]); 860 break; 861 case 'b': 862 param.probability = Integer.parseInt(argv[i]); 863 break; 864 case 'w': 865 ++param.nr_weight; 866 { 867 int[] old = param.weight_label; 868 param.weight_label = new int[param.nr_weight]; 869 System.arraycopy(old,0,param.weight_label,0,param.nr_weight-1); 870 } 871 872 { 873 double[] old = param.weight; 874 param.weight = new double[param.nr_weight]; 875 System.arraycopy(old,0,param.weight,0,param.nr_weight-1); 876 } 877 878 param.weight_label[param.nr_weight-1] = Integer.parseInt(argv[i].substring(2)); 879 param.weight[param.nr_weight-1] = Double.valueOf(argv[i]).doubleValue(); 880 break; 881 case 'Y': 882 case 'V': 883 case 'S': 884 case 'F': 885 case 'T': 886 case 'M': 887 case 'N': 888 break; 889 default: 890 throw new LibsvmException("Unknown svm parameter: '"+argv[i-1]+"' with value '"+argv[i]+"'. "); 891 } 892 } catch (ArrayIndexOutOfBoundsException e) { 893 throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e); 894 } catch (NumberFormatException e) { 895 throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e); 896 } catch (NullPointerException e) { 897 throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e); 898 } 899 } 900 } 901 902 public void svm_predict_with_kbestlist(svm_model model, svm_node[] x, KBestList kBestList) throws MaltChainedException { 903 int i; 904 final int nr_class = svm.svm_get_nr_class(model); 905 final double[] dec_values = new double[nr_class*(nr_class-1)/2]; 906 svm.svm_predict_values(model, x, dec_values); 907 908 final int[] vote = new int[nr_class]; 909 final double[] score = new double[nr_class]; 910 final int[] voteindex = new int[nr_class]; 911 for(i=0;i<nr_class;i++) { 912 vote[i] = 0; 913 score[i] = 0.0; 914 voteindex[i] = i; 915 } 916 int pos=0; 917 for(i=0;i<nr_class;i++) { 918 for(int j=i+1;j<nr_class;j++) { 919 if(dec_values[pos] > 0) { 920 vote[i]++; 921 } else { 922 vote[j]++; 923 } 924 score[i] += dec_values[pos]; 925 score[j] += dec_values[pos]; 926 pos++; 927 } 928 } 929 for(i=0;i<nr_class;i++) { 930 score[i] = score[i]/nr_class; 931 } 932 int lagest, tmpint; 933 double tmpdouble; 934 for (i=0;i<nr_class-1;i++) { 935 lagest = i; 936 for (int j=i;j<nr_class;j++) { 937 if (vote[j] > vote[lagest]) { 938 lagest = j; 939 } 940 } 941 tmpint = vote[lagest]; 942 vote[lagest] = vote[i]; 943 vote[i] = tmpint; 944 tmpdouble = score[lagest]; 945 score[lagest] = score[i]; 946 score[i] = tmpdouble; 947 tmpint = voteindex[lagest]; 948 voteindex[lagest] = voteindex[i]; 949 voteindex[i] = tmpint; 950 } 951 final int[] labels = new int[nr_class]; 952 svm.svm_get_labels(model, labels); 953 int k = nr_class-1; 954 if (kBestList.getK() != -1) { 955 k = kBestList.getK() - 1; 956 } 957 958 for (i=0; i<nr_class && k >= 0; i++, k--) { 959 if (vote[i] > 0 || i == 0) { 960 if (kBestList instanceof ScoredKBestList) { 961 ((ScoredKBestList)kBestList).add(labels[voteindex[i]], (float)vote[i]/(float)(nr_class*(nr_class-1)/2)); 962 } else { 963 kBestList.add(labels[voteindex[i]]); 964 } 965 } 966 } 967 } 968 /** 969 * Converts the instance file (Malt's own SVM format) into the LIBSVM (SVMLight) format. The input instance file is removed (replaced) 970 * by the instance file in the LIBSVM (SVMLight) format. If a column contains -1, the value will be removed in destination file. 971 * 972 * @param isr the input stream reader for the source instance file 973 * @param osw the output stream writer for the destination instance file 974 * @param cardinalities a vector containing the number of distinct values for a particular column 975 * @throws LibsvmException 976 */ 977 public static void maltSVMFormat2OriginalSVMFormat(InputStreamReader isr, OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException { 978 try { 979 final BufferedReader in = new BufferedReader(isr); 980 final BufferedWriter out = new BufferedWriter(osw); 981 982 int c; 983 int j = 0; 984 int offset = 0; 985 int code = 0; 986 while(true) { 987 c = in.read(); 988 if (c == -1) { 989 break; 990 } 991 992 if (c == '\t' || c == '|') { 993 if (j == 0) { 994 out.write(Integer.toString(code)); 995 j++; 996 } else { 997 if (code != -1) { 998 out.write(' '); 999 out.write(Integer.toString(code+offset)); 1000 out.write(":1"); 1001 } 1002 if (c == '\t') { 1003 offset += cardinalities[j-1]; 1004 j++; 1005 } 1006 } 1007 code = 0; 1008 } else if (c == '\n') { 1009 j = 0; 1010 offset = 0; 1011 out.write('\n'); 1012 code = 0; 1013 } else if (c == '-') { 1014 code = -1; 1015 } else if (code != -1) { 1016 if (c > 47 && c < 58) { 1017 code = code * 10 + (c-48); 1018 } else { 1019 throw new LibsvmException("The instance file contain a non-integer value, when converting the Malt SVM format into LIBSVM format."); 1020 } 1021 } 1022 } 1023 in.close(); 1024 out.close(); 1025 } catch (IOException e) { 1026 throw new LibsvmException("Cannot read from the instance file, when converting the Malt SVM format into LIBSVM format. ", e); 1027 } 1028 } 1029 1030 protected void finalize() throws Throwable { 1031 try { 1032 closeInstanceWriter(); 1033 } finally { 1034 super.finalize(); 1035 } 1036 } 1037 1038 /* (non-Javadoc) 1039 * @see java.lang.Object#toString() 1040 */ 1041 public String toString() { 1042 final StringBuffer sb = new StringBuffer(); 1043 sb.append("\nLIBSVM INTERFACE\n"); 1044 sb.append(" LIBSVM version: "+LIBSVM_VERSION+"\n"); 1045 sb.append(" SVM-param string: "+paramString+"\n"); 1046 1047 sb.append(toStringParameters(svmParam)); 1048 return sb.toString(); 1049 } 1050 }