001 package org.maltparser.ml.libsvm; 002 003 import java.io.BufferedReader; 004 import java.io.BufferedWriter; 005 import java.io.File; 006 import java.io.FileNotFoundException; 007 import java.io.IOException; 008 import java.io.InputStream; 009 import java.io.InputStreamReader; 010 import java.io.OutputStreamWriter; 011 import java.io.PrintStream; 012 import java.text.DecimalFormat; 013 import java.text.DecimalFormatSymbols; 014 import java.util.ArrayList; 015 import java.util.Set; 016 import java.util.regex.Pattern; 017 import java.util.regex.PatternSyntaxException; 018 019 import libsvm.svm; 020 import libsvm.svm_model; 021 import libsvm.svm_node; 022 import libsvm.svm_parameter; 023 import libsvm.svm_problem; 024 025 import org.maltparser.core.exception.MaltChainedException; 026 import org.maltparser.core.feature.FeatureVector; 027 import org.maltparser.core.feature.function.FeatureFunction; 028 import org.maltparser.core.feature.value.FeatureValue; 029 import org.maltparser.core.feature.value.MultipleFeatureValue; 030 import org.maltparser.core.feature.value.SingleFeatureValue; 031 import org.maltparser.core.helper.NoPrintStream; 032 import org.maltparser.core.syntaxgraph.DependencyStructure; 033 import org.maltparser.ml.LearningMethod; 034 import org.maltparser.ml.libsvm.LibsvmException; 035 import org.maltparser.parser.DependencyParserConfig; 036 import org.maltparser.parser.guide.instance.InstanceModel; 037 import org.maltparser.parser.history.action.SingleDecision; 038 import org.maltparser.parser.history.kbest.KBestList; 039 040 /** 041 Implements an interface to the LIBSVM learner (currently the LIBSVM 2.86 is used). More information 042 about LIBSVM can be found at 043 <a href="http://www.csie.ntu.edu.tw/~cjlin/libsvm/" target="_blank">LIBSVM -- A Library for Support Vector Machines</a>. 044 045 @author Johan Hall 046 @since 1.0 047 */ 048 public class Libsvm implements LearningMethod { 049 public final static String LIBSVM_VERSION = "2.86"; 050 public enum Verbostity { 051 SILENT, ERROR, ALL 052 } 053 protected InstanceModel owner; 054 protected int learnerMode; 055 protected String name; 056 protected int numberOfInstances; 057 protected boolean saveInstanceFiles; 058 protected boolean excludeNullValues; 059 protected String pathExternalSVMTrain = null; 060 private final StringBuilder sb; 061 /** 062 * Instance output stream writer 063 */ 064 private BufferedWriter instanceOutput = null; 065 /** 066 * LIBSVM svm_model object, only used during classification. 067 */ 068 private svm_model model = null; 069 070 //private FastMulticlassModel fastModel = null; 071 /** 072 * LIBSVM svm_parameter object 073 */ 074 private svm_parameter svmParam; 075 /** 076 * Parameter string 077 */ 078 private String paramString; 079 /** 080 * An array of LIBSVM svm_node objects, only used during classification. 081 */ 082 private ArrayList<svm_node> xlist = null; 083 084 private Verbostity verbosity; 085 /** 086 * Constructs a LIBSVM learner. 087 * 088 * @param owner the guide model owner 089 * @param learnerMode the mode of the learner TRAIN or CLASSIFY 090 */ 091 public Libsvm(InstanceModel owner, Integer learnerMode) throws MaltChainedException { 092 setOwner(owner); 093 setLearningMethodName("libsvm"); 094 setLearnerMode(learnerMode.intValue()); 095 setNumberOfInstances(0); 096 verbosity = Verbostity.SILENT; 097 initSvmParam(getConfiguration().getOptionValue("libsvm", "libsvm_options").toString()); 098 initSpecialParameters(); 099 if (learnerMode == TRAIN) { 100 instanceOutput = new BufferedWriter(getInstanceOutputStreamWriter(".ins")); 101 } 102 sb = new StringBuilder(6); 103 104 } 105 106 107 public void addInstance(SingleDecision decision, FeatureVector featureVector) throws MaltChainedException { 108 if (featureVector == null) { 109 throw new LibsvmException("The feature vector cannot be found"); 110 } else if (decision == null) { 111 throw new LibsvmException("The decision cannot be found"); 112 } 113 try { 114 instanceOutput.write(decision.getDecisionCode()+"\t"); 115 for (int i = 0; i < featureVector.size(); i++) { 116 FeatureValue featureValue = featureVector.get(i).getFeatureValue(); 117 if (excludeNullValues == true && featureValue.isNullValue()) { 118 instanceOutput.write("-1"); 119 } else { 120 if (featureValue instanceof SingleFeatureValue) { 121 instanceOutput.write(((SingleFeatureValue)featureValue).getCode()+""); 122 } else if (featureValue instanceof MultipleFeatureValue) { 123 Set<Integer> values = ((MultipleFeatureValue)featureValue).getCodes(); 124 int j=0; 125 for (Integer value : values) { 126 instanceOutput.write(value.toString()); 127 if (j != values.size()-1) { 128 instanceOutput.write("|"); 129 } 130 j++; 131 } 132 } 133 } 134 if (i != featureVector.size()) { 135 instanceOutput.write('\t'); 136 } 137 } 138 139 instanceOutput.write('\n'); 140 increaseNumberOfInstances(); 141 } catch (IOException e) { 142 throw new LibsvmException("The LIBSVM learner cannot write to the instance file. ", e); 143 } 144 } 145 146 147 public void finalizeSentence(DependencyStructure dependencyGraph) throws MaltChainedException { } 148 149 /* (non-Javadoc) 150 * @see org.maltparser.ml.LearningMethod#noMoreInstances() 151 */ 152 public void noMoreInstances() throws MaltChainedException { 153 closeInstanceWriter(); 154 } 155 156 157 /* (non-Javadoc) 158 * @see org.maltparser.ml.LearningMethod#train(org.maltparser.parser.guide.feature.FeatureVector) 159 */ 160 public void train(FeatureVector featureVector) throws MaltChainedException { 161 if (featureVector == null) { 162 throw new LibsvmException("The feature vector cannot be found. "); 163 } else if (owner == null) { 164 throw new LibsvmException("The parent guide model cannot be found. "); 165 } 166 if (pathExternalSVMTrain == null) { 167 final svm_problem prob = new svm_problem(); 168 try { 169 final ArrayList<Integer> cardinalities = new ArrayList<Integer>(); 170 171 for (FeatureFunction feature : featureVector) { 172 cardinalities.add(feature.getFeatureValue().getCardinality()); 173 } 174 175 readProblemMaltSVMFormat(getInstanceInputStreamReader(".ins"), prob, cardinalities, svmParam); 176 final String errorMessage = svm.svm_check_parameter(prob, svmParam); 177 if(errorMessage != null) { 178 throw new LibsvmException(errorMessage); 179 } 180 owner.getGuide().getConfiguration().getConfigLogger().info("Creating LIBSVM model "+getFile(".mod").getName()+"\n"); 181 final PrintStream out = System.out; 182 final PrintStream err = System.err; 183 System.setOut(NoPrintStream.NO_PRINTSTREAM); 184 //System.setErr(new PrintStream(new LoggingOutputStream(owner.getGuide().getConfiguration().getConfigLogger(), owner.getGuide().getConfiguration().getConfigLogger().getLevel()), true)); 185 System.setErr(NoPrintStream.NO_PRINTSTREAM); 186 187 svm.svm_save_model(getFile(".mod").getAbsolutePath(), svm.svm_train(prob, svmParam)); 188 189 System.setOut(err); 190 System.setOut(out); 191 if (!saveInstanceFiles) { 192 getFile(".ins").delete(); 193 } 194 } catch (OutOfMemoryError e) { 195 throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e); 196 } catch (IllegalArgumentException e) { 197 throw new LibsvmException("The LIBSVM learner was not able to redirect Standard Error stream. ", e); 198 } catch (SecurityException e) { 199 throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e); 200 } catch (IOException e) { 201 throw new LibsvmException("The LIBSVM learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e); 202 } 203 } else { 204 trainExternal(featureVector); 205 } 206 } 207 208 private void trainExternal(FeatureVector featureVector) throws MaltChainedException { 209 210 try { 211 final ArrayList<Integer> cardinalities = new ArrayList<Integer>(); 212 for (FeatureFunction feature : featureVector) { 213 cardinalities.add(feature.getFeatureValue().getCardinality()); 214 } 215 maltSVMFormat2OriginalSVMFormat(getInstanceInputStreamReader(".ins"), getInstanceOutputStreamWriter(".ins.tmp"), cardinalities); 216 owner.getGuide().getConfiguration().getConfigLogger().info("Creating LIBSVM model (svm-train) "+getFile(".mod").getName()); 217 218 final ArrayList<String> commands = new ArrayList<String>(); 219 commands.add(pathExternalSVMTrain); 220 final String[] params = getSVMParamStringArray(svmParam); 221 for (int i=0; i < params.length; i++) { 222 commands.add(params[i]); 223 } 224 commands.add(getFile(".ins.tmp").getAbsolutePath()); 225 commands.add(getFile(".mod").getAbsolutePath()); 226 String[] arrayCommands = commands.toArray(new String[commands.size()]); 227 228 if (verbosity == Verbostity.ALL) { 229 owner.getGuide().getConfiguration().getConfigLogger().info('\n'); 230 } 231 final Process child = Runtime.getRuntime().exec(arrayCommands); 232 final InputStream in = child.getInputStream(); 233 final InputStream err = child.getErrorStream(); 234 int c; 235 while ((c = in.read()) != -1){ 236 if (verbosity == Verbostity.ALL) { 237 owner.getGuide().getConfiguration().getConfigLogger().info((char)c); 238 } 239 } 240 while ((c = err.read()) != -1){ 241 if (verbosity == Verbostity.ALL || verbosity == Verbostity.ERROR) { 242 owner.getGuide().getConfiguration().getConfigLogger().info((char)c); 243 } 244 } 245 if (child.waitFor() != 0) { 246 owner.getGuide().getConfiguration().getConfigLogger().info(" FAILED ("+child.exitValue()+")"); 247 } 248 in.close(); 249 err.close(); 250 if (!saveInstanceFiles) { 251 getFile(".ins").delete(); 252 getFile(".ins.tmp").delete(); 253 } 254 owner.getGuide().getConfiguration().getConfigLogger().info('\n'); 255 } catch (InterruptedException e) { 256 throw new LibsvmException("SVM-trainer is interrupted. ", e); 257 } catch (IllegalArgumentException e) { 258 throw new LibsvmException("The LIBSVM learner was not able to redirect Standard Error stream. ", e); 259 } catch (SecurityException e) { 260 throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e); 261 } catch (IOException e) { 262 throw new LibsvmException("The LIBSVM learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e); 263 } catch (OutOfMemoryError e) { 264 throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e); 265 } 266 } 267 268 /* (non-Javadoc) 269 * @see org.maltparser.ml.LearningMethod#moveAllInstances(org.maltparser.ml.LearningMethod, org.maltparser.core.feature.function.FeatureFunction, java.util.ArrayList) 270 */ 271 public void moveAllInstances(LearningMethod method, FeatureFunction divideFeature, ArrayList<Integer> divideFeatureIndexVector) throws MaltChainedException { 272 if (method == null) { 273 throw new LibsvmException("The learning method cannot be found. "); 274 } else if (divideFeature == null) { 275 throw new LibsvmException("The divide feature cannot be found. "); 276 } 277 try { 278 final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins")); 279 final BufferedWriter out = method.getInstanceWriter(); 280 int l = in.read(); 281 char c; 282 int j = 0; 283 while(true) { 284 if (l == -1) { 285 sb.setLength(0); 286 break; 287 } 288 289 c = (char)l; 290 l = in.read(); 291 if (c == '\t') { 292 if (divideFeatureIndexVector.contains(j-1)) { 293 out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode())); 294 out.write('\t'); 295 } 296 out.write(sb.toString()); 297 j++; 298 out.write('\t'); 299 sb.setLength(0); 300 } else if (c == '\n') { 301 out.write(sb.toString()); 302 if (divideFeatureIndexVector.contains(j-1)) { 303 out.write('\t'); 304 out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode())); 305 } 306 out.write('\n'); 307 sb.setLength(0); 308 method.increaseNumberOfInstances(); 309 this.decreaseNumberOfInstances(); 310 j = 0; 311 } else { 312 sb.append(c); 313 } 314 } 315 in.close(); 316 getFile(".ins").delete(); 317 } catch (SecurityException e) { 318 throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e); 319 } catch (NullPointerException e) { 320 throw new LibsvmException("The instance file cannot be found. ", e); 321 } catch (FileNotFoundException e) { 322 throw new LibsvmException("The instance file cannot be found. ", e); 323 } catch (IOException e) { 324 throw new LibsvmException("The LIBSVM learner read from the instance file. ", e); 325 } 326 } 327 328 /* (non-Javadoc) 329 * @see org.maltparser.ml.LearningMethod#predict(org.maltparser.parser.guide.feature.FeatureVector, org.maltparser.ml.KBestList) 330 */ 331 public boolean predict(FeatureVector featureVector, SingleDecision decision) throws MaltChainedException { 332 if (model == null) { 333 File modelFile = getFile(".mod"); 334 try { 335 model = svm.svm_load_model(modelFile.getAbsolutePath()); 336 } catch (IOException e) { 337 throw new LibsvmException("The file '"+modelFile.getAbsolutePath()+"' cannot be loaded. ", e); 338 } 339 } 340 if (xlist == null) { 341 xlist = new ArrayList<svm_node>(featureVector.size()); 342 } 343 if (model == null) { 344 throw new LibsvmException("The LIBSVM learner cannot predict the next class, because the learning model cannot be found. "); 345 } else if (featureVector == null) { 346 throw new LibsvmException("The LIBSVM learner cannot predict the next class, because the feature vector cannot be found. "); 347 } 348 int j = 0; 349 int offset = 0; 350 351 for (FeatureFunction feature : featureVector) { 352 final FeatureValue featureValue = feature.getFeatureValue(); 353 if (!(excludeNullValues == true && featureValue.isNullValue())) { 354 if (featureValue instanceof SingleFeatureValue) { 355 if (((SingleFeatureValue)featureValue).isKnown()) { 356 if (j >= xlist.size()) { 357 svm_node x = new svm_node(); 358 x.value = 1; 359 xlist.add(j,x); 360 } 361 xlist.get(j++).index = ((SingleFeatureValue)featureValue).getCode() + offset; 362 } 363 } else if (featureValue instanceof MultipleFeatureValue) { 364 for (Integer value : ((MultipleFeatureValue)featureValue).getCodes()) { 365 if (((MultipleFeatureValue)featureValue).isKnown(value)) { 366 if (j >= xlist.size()) { 367 svm_node x = new svm_node(); 368 x.value = 1; 369 xlist.add(j,x); 370 } 371 xlist.get(j++).index = value + offset; 372 } 373 } 374 } 375 } 376 offset += featureValue.getCardinality(); 377 } 378 379 if (decision.getKBestList().getK() == 1 || svm.svm_get_svm_type(model) == svm_parameter.ONE_CLASS || 380 svm.svm_get_svm_type(model) == svm_parameter.EPSILON_SVR || 381 svm.svm_get_svm_type(model) == svm_parameter.NU_SVR) { 382 decision.getKBestList().add((int)svm.svm_predict(model, xlist.subList(0, j).toArray(new svm_node[0]))); 383 } else { 384 svm_predict_with_kbestlist(model, xlist.subList(0, j).toArray(new svm_node[0]), decision.getKBestList()); 385 } 386 387 return true; 388 } 389 390 391 public void terminate() throws MaltChainedException { 392 closeInstanceWriter(); 393 model = null; 394 svmParam = null; 395 xlist = null; 396 owner = null; 397 } 398 399 public BufferedWriter getInstanceWriter() { 400 return instanceOutput; 401 } 402 403 protected void closeInstanceWriter() throws MaltChainedException { 404 try { 405 if (instanceOutput != null) { 406 instanceOutput.flush(); 407 instanceOutput.close(); 408 instanceOutput = null; 409 } 410 } catch (IOException e) { 411 throw new LibsvmException("The LIBSVM learner cannot close the instance file. ", e); 412 } 413 } 414 415 /** 416 * Initialize the LIBSVM according to the parameter string 417 * 418 * @param paramString the parameter string to configure the LIBSVM learner. 419 * @throws MaltChainedException 420 */ 421 protected void initSvmParam(String paramString) throws MaltChainedException { 422 this.paramString = paramString; 423 svmParam = new svm_parameter(); 424 initParameters(svmParam); 425 parseParameters(paramString, svmParam); 426 } 427 428 /** 429 * Returns the parameter string for used for configure LIBSVM 430 * 431 * @return the parameter string for used for configure LIBSVM 432 */ 433 public String getParamString() { 434 return paramString; 435 } 436 437 public InstanceModel getOwner() { 438 return owner; 439 } 440 441 protected void setOwner(InstanceModel owner) { 442 this.owner = owner; 443 } 444 445 public int getLearnerMode() { 446 return learnerMode; 447 } 448 449 public void setLearnerMode(int learnerMode) { 450 this.learnerMode = learnerMode; 451 } 452 453 public String getLearningMethodName() { 454 return name; 455 } 456 457 /** 458 * Returns the current configuration 459 * 460 * @return the current configuration 461 * @throws MaltChainedException 462 */ 463 public DependencyParserConfig getConfiguration() throws MaltChainedException { 464 return owner.getGuide().getConfiguration(); 465 } 466 467 public int getNumberOfInstances() { 468 return numberOfInstances; 469 } 470 471 public void increaseNumberOfInstances() { 472 numberOfInstances++; 473 owner.increaseFrequency(); 474 } 475 476 public void decreaseNumberOfInstances() { 477 numberOfInstances--; 478 owner.decreaseFrequency(); 479 } 480 481 protected void setNumberOfInstances(int numberOfInstances) { 482 this.numberOfInstances = 0; 483 } 484 485 protected void setLearningMethodName(String name) { 486 this.name = name; 487 } 488 489 protected OutputStreamWriter getInstanceOutputStreamWriter(String suffix) throws MaltChainedException { 490 return getConfiguration().getConfigurationDir().getOutputStreamWriter(owner.getModelName()+getLearningMethodName()+suffix); 491 } 492 493 protected InputStreamReader getInstanceInputStreamReader(String suffix) throws MaltChainedException { 494 return getConfiguration().getConfigurationDir().getInputStreamReader(owner.getModelName()+getLearningMethodName()+suffix); 495 } 496 497 protected File getFile(String suffix) throws MaltChainedException { 498 return getConfiguration().getConfigurationDir().getFile(owner.getModelName()+getLearningMethodName()+suffix); 499 } 500 501 /** 502 * Reads an instance file into a svm_problem object according to the Malt-SVM format, which is column fixed format (tab-separated). 503 * 504 * @param isr the instance stream reader for the instance file 505 * @param prob a svm_problem object 506 * @param cardinality a vector containing the number of distinct values for a particular column. 507 * @param param a svm_parameter object 508 * @throws LibsvmException 509 */ 510 public final void readProblemMaltSVMFormat(InputStreamReader isr, svm_problem prob, ArrayList<Integer> cardinality, svm_parameter param) throws MaltChainedException { 511 try { 512 final BufferedReader fp = new BufferedReader(isr); 513 int max_index = 0; 514 if (xlist == null) { 515 xlist = new ArrayList<svm_node>(); 516 } 517 prob.l = getNumberOfInstances(); 518 prob.x = new svm_node[prob.l][]; 519 prob.y = new double[prob.l]; 520 int i = 0; 521 final Pattern tabPattern = Pattern.compile("\t"); 522 final Pattern pipePattern = Pattern.compile("\\|"); 523 while(true) { 524 String line = fp.readLine(); 525 if(line == null) break; 526 String[] columns = tabPattern.split(line); 527 528 if (columns.length == 0) { 529 continue; 530 } 531 532 int offset = 0; 533 int j = 0; 534 try { 535 prob.y[i] = (double)Integer.parseInt(columns[j]); 536 int p = 0; 537 for(j = 1; j < columns.length; j++) { 538 final String[] items = pipePattern.split(columns[j]); 539 for (int k = 0; k < items.length; k++) { 540 try { 541 if (Integer.parseInt(items[k]) != -1) { 542 xlist.add(p, new svm_node()); 543 xlist.get(p).value = 1; 544 xlist.get(p).index = Integer.parseInt(items[k])+offset; 545 p++; 546 } 547 } catch (NumberFormatException e) { 548 throw new LibsvmException("The instance file contain a non-integer value '"+items[k]+"'", e); 549 } 550 } 551 offset += cardinality.get(j-1); 552 } 553 prob.x[i] = xlist.subList(0, p).toArray(new svm_node[0]); 554 if(columns.length > 1) { 555 max_index = Math.max(max_index, xlist.get(p-1).index); 556 } 557 i++; 558 xlist.clear(); 559 } catch (ArrayIndexOutOfBoundsException e) { 560 throw new LibsvmException("Cannot read from the instance file. ", e); 561 } 562 } 563 fp.close(); 564 if (param.gamma == 0) { 565 param.gamma = 1.0/max_index; 566 } 567 xlist = null; 568 } catch (IOException e) { 569 throw new LibsvmException("Cannot read from the instance file. ", e); 570 } 571 } 572 573 protected void initSpecialParameters() throws MaltChainedException { 574 if (getConfiguration().getOptionValue("singlemalt", "null_value") != null && getConfiguration().getOptionValue("singlemalt", "null_value").toString().equalsIgnoreCase("none")) { 575 excludeNullValues = true; 576 } else { 577 excludeNullValues = false; 578 } 579 saveInstanceFiles = ((Boolean)getConfiguration().getOptionValue("libsvm", "save_instance_files")).booleanValue(); 580 581 if (!getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().equals("")) { 582 try { 583 if (!new File(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString()).exists()) { 584 throw new LibsvmException("The path to the external LIBSVM trainer 'svm-train' is wrong."); 585 } 586 if (new File(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString()).isDirectory()) { 587 throw new LibsvmException("The option --libsvm-libsvm_external points to a directory, the path should point at the 'svm-train' file or the 'svm-train.exe' file"); 588 } 589 if (!(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().endsWith("svm-train") || getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().endsWith("svm-train.exe"))) { 590 throw new LibsvmException("The option --libsvm-libsvm_external does not specify the path to 'svm-train' file or the 'svm-train.exe' file. "); 591 } 592 pathExternalSVMTrain = getConfiguration().getOptionValue("libsvm", "libsvm_external").toString(); 593 } catch (SecurityException e) { 594 throw new LibsvmException("Access denied to the file specified by the option --libsvm-libsvm_external. ", e); 595 } 596 } 597 if (getConfiguration().getOptionValue("libsvm", "verbosity") != null) { 598 verbosity = Verbostity.valueOf(getConfiguration().getOptionValue("libsvm", "verbosity").toString().toUpperCase()); 599 } 600 } 601 602 /** 603 * Assign a default value to all svm parameters 604 * 605 * @param param a svm_parameter object 606 */ 607 protected void initParameters(svm_parameter param) throws MaltChainedException { 608 if (param == null) { 609 throw new LibsvmException("Svm-parameters cannot be found. "); 610 } 611 param.svm_type = svm_parameter.C_SVC; 612 param.kernel_type = svm_parameter.POLY; 613 param.degree = 2; 614 param.gamma = 0.2; // 1/k 615 param.coef0 = 0; 616 param.nu = 0.5; 617 param.cache_size = 100; 618 param.C = 1; 619 param.eps = 1.0; 620 param.p = 0.1; 621 param.shrinking = 1; 622 param.probability = 0; 623 param.nr_weight = 0; 624 param.weight_label = new int[0]; 625 param.weight = new double[0]; 626 } 627 628 /** 629 * Returns a string containing all svm-parameters of interest 630 * 631 * @param param a svm_parameter object 632 * @return a string containing all svm-parameters of interest 633 */ 634 public String toStringParameters(svm_parameter param) { 635 if (param == null) { 636 throw new IllegalArgumentException("Svm-parameters cannot be found. "); 637 } 638 final StringBuffer sb = new StringBuffer(); 639 640 final String[] svmtypes = {"C_SVC", "NU_SVC","ONE_CLASS","EPSILON_SVR","NU_SVR"}; 641 final String[] kerneltypes = {"LINEAR", "POLY","RBF","SIGMOID","PRECOMPUTED"}; 642 final DecimalFormat dform = new DecimalFormat("#0.0#"); 643 final DecimalFormatSymbols sym = new DecimalFormatSymbols(); 644 sym.setDecimalSeparator('.'); 645 dform.setDecimalFormatSymbols(sym); 646 sb.append("LIBSVM SETTINGS\n"); 647 sb.append(" SVM type : " + svmtypes[param.svm_type] + " (" + param.svm_type + ")\n"); 648 sb.append(" Kernel : " + kerneltypes[param.kernel_type] + " (" + param.kernel_type + ")\n"); 649 if (param.kernel_type == svm_parameter.POLY) { 650 sb.append(" Degree : " + param.degree + "\n"); 651 } 652 if (param.kernel_type == svm_parameter.POLY || param.kernel_type == svm_parameter.RBF || param.kernel_type == svm_parameter.SIGMOID) { 653 sb.append(" Gamma : " + dform.format(param.gamma) + "\n"); 654 if (param.kernel_type == svm_parameter.POLY || param.kernel_type == svm_parameter.SIGMOID) { 655 sb.append(" Coef0 : " + dform.format(param.coef0) + "\n"); 656 } 657 } 658 if (param.svm_type == svm_parameter.NU_SVC || param.svm_type == svm_parameter.NU_SVR || param.svm_type == svm_parameter.ONE_CLASS) { 659 sb.append(" Nu : " + dform.format(param.nu) + "\n"); 660 } 661 sb.append(" Cache Size : " + dform.format(param.cache_size) + " MB\n"); 662 if (param.svm_type == svm_parameter.C_SVC || param.svm_type == svm_parameter.NU_SVR || param.svm_type == svm_parameter.EPSILON_SVR) { 663 sb.append(" C : " + dform.format(param.C) + "\n"); 664 } 665 sb.append(" Eps : " + dform.format(param.eps) + "\n"); 666 if (param.svm_type == svm_parameter.EPSILON_SVR) { 667 sb.append(" P : " + dform.format(param.p) + "\n"); 668 } 669 sb.append(" Shrinking : " + param.shrinking + "\n"); 670 sb.append(" Probability : " + param.probability + "\n"); 671 if (param.svm_type == svm_parameter.C_SVC) { 672 sb.append(" #Weight : " + param.nr_weight + "\n"); 673 if (param.nr_weight > 0) { 674 sb.append(" Weight labels : "); 675 for (int i = 0; i < param.nr_weight; i++) { 676 sb.append(param.weight_label[i]); 677 if (i != param.nr_weight-1) { 678 sb.append(", "); 679 } 680 } 681 sb.append("\n"); 682 for (int i = 0; i < param.nr_weight; i++) { 683 sb.append(dform.format(param.weight)); 684 if (i != param.nr_weight-1) { 685 sb.append(", "); 686 } 687 } 688 sb.append("\n"); 689 } 690 } 691 return sb.toString(); 692 } 693 694 public String[] getSVMParamStringArray(svm_parameter param) { 695 final ArrayList<String> params = new ArrayList<String>(); 696 697 if (param.svm_type != 0) { 698 params.add("-s"); params.add(new Integer(param.svm_type).toString()); 699 } 700 if (param.kernel_type != 2) { 701 params.add("-t"); params.add(new Integer(param.kernel_type).toString()); 702 } 703 if (param.degree != 3) { 704 params.add("-d"); params.add(new Integer(param.degree).toString()); 705 } 706 params.add("-g"); params.add(new Double(param.gamma).toString()); 707 if (param.coef0 != 0) { 708 params.add("-r"); params.add(new Double(param.coef0).toString()); 709 } 710 if (param.nu != 0.5) { 711 params.add("-n"); params.add(new Double(param.nu).toString()); 712 } 713 if (param.cache_size != 100) { 714 params.add("-m"); params.add(new Double(param.cache_size).toString()); 715 } 716 if (param.C != 1) { 717 params.add("-c"); params.add(new Double(param.C).toString()); 718 } 719 if (param.eps != 0.001) { 720 params.add("-e"); params.add(new Double(param.eps).toString()); 721 } 722 if (param.p != 0.1) { 723 params.add("-p"); params.add(new Double(param.p).toString()); 724 } 725 if (param.shrinking != 1) { 726 params.add("-h"); params.add(new Integer(param.shrinking).toString()); 727 } 728 if (param.probability != 0) { 729 params.add("-b"); params.add(new Integer(param.probability).toString()); 730 } 731 732 return params.toArray(new String[params.size()]); 733 } 734 /** 735 * Parses the parameter string. The parameter string must contain parameter and value pairs, which are separated by a blank 736 * or a underscore. The parameter begins with a character '-' followed by a one-character flag and the value must comply with 737 * the parameters data type. Some examples: 738 * 739 * -s 0 -t 1 -d 2 -g 0.4 -e 0.1 740 * -s_0_-t_1_-d_2_-g_0.4_-e_0.1 741 * 742 * @param paramstring the parameter string 743 * @param param a svm_parameter object 744 * @throws LibsvmException 745 */ 746 public void parseParameters(String paramstring, svm_parameter param) throws MaltChainedException { 747 if (param == null) { 748 throw new LibsvmException("Svm-parameters cannot be found. "); 749 } 750 if (paramstring == null) { 751 return; 752 } 753 final String[] argv; 754 try { 755 argv = paramstring.split("[_\\p{Blank}]"); 756 } catch (PatternSyntaxException e) { 757 throw new LibsvmException("Could not split the svm-parameter string '"+paramstring+"'. ", e); 758 } 759 for (int i=0; i < argv.length-1; i++) { 760 if(argv[i].charAt(0) != '-') { 761 throw new LibsvmException("The argument flag should start with the following character '-', not with "+argv[i].charAt(0)); 762 } 763 if(++i>=argv.length) { 764 throw new LibsvmException("The last argument does not have any value. "); 765 } 766 try { 767 switch(argv[i-1].charAt(1)) { 768 case 's': 769 param.svm_type = Integer.parseInt(argv[i]); 770 break; 771 case 't': 772 param.kernel_type = Integer.parseInt(argv[i]); 773 break; 774 case 'd': 775 param.degree = Integer.parseInt(argv[i]); 776 break; 777 case 'g': 778 param.gamma = Double.valueOf(argv[i]).doubleValue(); 779 break; 780 case 'r': 781 param.coef0 = Double.valueOf(argv[i]).doubleValue(); 782 break; 783 case 'n': 784 param.nu = Double.valueOf(argv[i]).doubleValue(); 785 break; 786 case 'm': 787 param.cache_size = Double.valueOf(argv[i]).doubleValue(); 788 break; 789 case 'c': 790 param.C = Double.valueOf(argv[i]).doubleValue(); 791 break; 792 case 'e': 793 param.eps = Double.valueOf(argv[i]).doubleValue(); 794 break; 795 case 'p': 796 param.p = Double.valueOf(argv[i]).doubleValue(); 797 break; 798 case 'h': 799 param.shrinking = Integer.parseInt(argv[i]); 800 break; 801 case 'b': 802 param.probability = Integer.parseInt(argv[i]); 803 break; 804 case 'w': 805 ++param.nr_weight; 806 { 807 int[] old = param.weight_label; 808 param.weight_label = new int[param.nr_weight]; 809 System.arraycopy(old,0,param.weight_label,0,param.nr_weight-1); 810 } 811 812 { 813 double[] old = param.weight; 814 param.weight = new double[param.nr_weight]; 815 System.arraycopy(old,0,param.weight,0,param.nr_weight-1); 816 } 817 818 param.weight_label[param.nr_weight-1] = Integer.parseInt(argv[i].substring(2)); 819 param.weight[param.nr_weight-1] = Double.valueOf(argv[i]).doubleValue(); 820 break; 821 case 'Y': 822 case 'V': 823 case 'S': 824 case 'F': 825 case 'T': 826 case 'M': 827 case 'N': 828 break; 829 default: 830 throw new LibsvmException("Unknown svm parameter: '"+argv[i-1]+"' with value '"+argv[i]+"'. "); 831 } 832 } catch (ArrayIndexOutOfBoundsException e) { 833 throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e); 834 } catch (NumberFormatException e) { 835 throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e); 836 } catch (NullPointerException e) { 837 throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e); 838 } 839 } 840 } 841 842 public void svm_predict_with_kbestlist(svm_model model, svm_node[] x, KBestList kBestList) throws MaltChainedException { 843 int i; 844 final int nr_class = svm.svm_get_nr_class(model); 845 final double[] dec_values = new double[nr_class*(nr_class-1)/2]; 846 svm.svm_predict_values(model, x, dec_values); 847 848 final int[] vote = new int[nr_class]; 849 final int[] voteindex = new int[nr_class]; 850 for(i=0;i<nr_class;i++) { 851 vote[i] = 0; 852 voteindex[i] = i; 853 } 854 int pos=0; 855 for(i=0;i<nr_class;i++) { 856 for(int j=i+1;j<nr_class;j++) { 857 if(dec_values[pos++] > 0) { 858 ++vote[i]; 859 } else { 860 ++vote[j]; 861 } 862 } 863 } 864 865 int small, temp; 866 for (i=0;i<nr_class-1;i++) { 867 small = i; 868 for (int j=i;j<nr_class;j++) { 869 if (vote[j] > vote[small]) { 870 small = j; 871 } 872 } 873 temp = vote[small]; 874 vote[small] = vote[i]; 875 vote[i] = temp; 876 temp = voteindex[small]; 877 voteindex[small] = voteindex[i]; 878 voteindex[i] = temp; 879 } 880 final int[] labels = new int[nr_class]; 881 svm.svm_get_labels(model, labels); 882 int k = nr_class-1; 883 if (kBestList.getK() != -1) { 884 k = kBestList.getK() - 1; 885 } 886 887 for (i=0; i<nr_class && k >= 0; i++, k--) { 888 if (vote[i] > 0 || i == 0) { 889 //kBestList.addKBestItem(labels[voteindex[i]], (double)vote[i]/(double)(nr_class*(nr_class-1)/2)); 890 //kBestList.addKBestItem(labels[voteindex[i]]); 891 kBestList.add(labels[voteindex[i]]); 892 } 893 } 894 } 895 896 /** 897 * Converts the instance file (Malt's own SVM format) into the LIBSVM (SVMLight) format. The input instance file is removed (replaced) 898 * by the instance file in the LIBSVM (SVMLight) format. If a column contains -1, the value will be removed in destination file. 899 * 900 * @param isr the input stream reader for the source instance file 901 * @param osw the output stream writer for the destination instance file 902 * @param cardinality a vector containing the number of distinct values for a particular column 903 * @throws LibsvmException 904 */ 905 public static void maltSVMFormat2OriginalSVMFormat(InputStreamReader isr, OutputStreamWriter osw, ArrayList<Integer> cardinality) throws MaltChainedException { 906 try { 907 final BufferedReader in = new BufferedReader(isr); 908 final BufferedWriter out = new BufferedWriter(osw); 909 910 int c; 911 int j = 0; 912 int offset = 0; 913 int code = 0; 914 while(true) { 915 c = in.read(); 916 if (c == -1) { 917 break; 918 } 919 920 if (c == '\t' || c == '|') { 921 if (j == 0) { 922 out.write(Integer.toString(code)); 923 j++; 924 } else { 925 if (code != -1) { 926 out.write(' '); 927 out.write(Integer.toString(code+offset)); 928 out.write(":1"); 929 } 930 if (c == '\t') { 931 offset += cardinality.get(j-1); 932 j++; 933 } 934 } 935 code = 0; 936 } else if (c == '\n') { 937 j = 0; 938 offset = 0; 939 out.write('\n'); 940 code = 0; 941 } else if (c == '-') { 942 code = -1; 943 } else if (code != -1) { 944 if (c > 47 && c < 58) { 945 code = code * 10 + (c-48); 946 } else { 947 throw new LibsvmException("The instance file contain a non-integer value, when converting the Malt SVM format into LIBSVM format."); 948 } 949 } 950 } 951 in.close(); 952 out.close(); 953 } catch (IOException e) { 954 throw new LibsvmException("Cannot read from the instance file, when converting the Malt SVM format into LIBSVM format. ", e); 955 } 956 } 957 958 protected void finalize() throws Throwable { 959 try { 960 closeInstanceWriter(); 961 } finally { 962 super.finalize(); 963 } 964 } 965 966 /* (non-Javadoc) 967 * @see java.lang.Object#toString() 968 */ 969 public String toString() { 970 final StringBuffer sb = new StringBuffer(); 971 sb.append("\nLIBSVM INTERFACE\n"); 972 sb.append(" LIBSVM version: "+LIBSVM_VERSION+"\n"); 973 sb.append(" SVM-param string: "+paramString+"\n"); 974 975 sb.append(toStringParameters(svmParam)); 976 return sb.toString(); 977 } 978 }