001 package org.maltparser.ml.liblinear; 002 003 import java.io.BufferedReader; 004 import java.io.BufferedWriter; 005 import java.io.File; 006 import java.io.FileNotFoundException; 007 import java.io.IOException; 008 import java.io.InputStream; 009 import java.io.InputStreamReader; 010 import java.io.OutputStreamWriter; 011 import java.io.PrintStream; 012 import java.util.ArrayList; 013 import java.util.LinkedHashMap; 014 import java.util.Set; 015 import java.util.regex.Pattern; 016 import java.util.regex.PatternSyntaxException; 017 018 import liblinear.FeatureNode; 019 import liblinear.Linear; 020 import liblinear.Model; 021 import liblinear.Parameter; 022 import liblinear.Problem; 023 import liblinear.SolverType; 024 025 import org.maltparser.core.exception.MaltChainedException; 026 import org.maltparser.core.feature.FeatureVector; 027 import org.maltparser.core.feature.function.FeatureFunction; 028 import org.maltparser.core.feature.value.FeatureValue; 029 import org.maltparser.core.feature.value.MultipleFeatureValue; 030 import org.maltparser.core.feature.value.SingleFeatureValue; 031 import org.maltparser.core.helper.NoPrintStream; 032 import org.maltparser.core.syntaxgraph.DependencyStructure; 033 import org.maltparser.ml.LearningMethod; 034 import org.maltparser.parser.DependencyParserConfig; 035 import org.maltparser.parser.guide.instance.InstanceModel; 036 import org.maltparser.parser.history.action.SingleDecision; 037 import org.maltparser.parser.history.kbest.KBestList; 038 import org.maltparser.parser.history.kbest.ScoredKBestList; 039 040 041 public class Liblinear implements LearningMethod { 042 public final static String LIBLINEAR_VERSION = "1.33"; 043 public enum Verbostity { 044 SILENT, ERROR, ALL 045 } 046 private LinkedHashMap<String, String> liblinearOptions; 047 048 protected InstanceModel owner; 049 protected int learnerMode; 050 protected String name; 051 protected int numberOfInstances; 052 protected boolean saveInstanceFiles; 053 protected boolean excludeNullValues; 054 protected String pathExternalLiblinearTrain = null; 055 private int[] cardinalities; 056 /** 057 * Instance output stream writer 058 */ 059 private BufferedWriter instanceOutput = null; 060 /** 061 * Liblinear model object, only used during classification. 062 */ 063 private Model model = null; 064 065 /** 066 * Parameter string 067 */ 068 private String paramString; 069 070 private ArrayList<FeatureNode> xlist = null; 071 072 private Verbostity verbosity; 073 /** 074 * Constructs a Liblinear learner. 075 * 076 * @param owner the guide model owner 077 * @param learnerMode the mode of the learner TRAIN or CLASSIFY 078 */ 079 public Liblinear(InstanceModel owner, Integer learnerMode) throws MaltChainedException { 080 setOwner(owner); 081 setLearningMethodName("liblinear"); 082 setLearnerMode(learnerMode.intValue()); 083 setNumberOfInstances(0); 084 verbosity = Verbostity.SILENT; 085 086 liblinearOptions = new LinkedHashMap<String, String>(); 087 initLiblinearOptions(); 088 parseParameters(getConfiguration().getOptionValue("liblinear", "liblinear_options").toString()); 089 initSpecialParameters(); 090 if (learnerMode == BATCH) { 091 // if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) { 092 // if (pathExternalLiblinearTrain != null) { 093 // owner.getGuide().getConfiguration().getConfigLogger().info(" Learner : Liblinear external "+ getLibLinearOptions() + "\n"); 094 // } else { 095 // owner.getGuide().getConfiguration().getConfigLogger().info(" Learner : Liblinear "+LIBLINEAR_VERSION+" "+ getLibLinearOptions() + "\n"); 096 // } 097 // } 098 instanceOutput = new BufferedWriter(getInstanceOutputStreamWriter(".ins")); 099 } 100 // else { 101 // if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) { 102 // owner.getGuide().getConfiguration().getConfigLogger().info(" Classifier : Liblinear "+LIBLINEAR_VERSION+" "+ getLibLinearOptions()+ "\n"); 103 // } 104 // } 105 } 106 107 108 public void addInstance(SingleDecision decision, FeatureVector featureVector) throws MaltChainedException { 109 if (featureVector == null) { 110 throw new LiblinearException("The feature vector cannot be found"); 111 } else if (decision == null) { 112 throw new LiblinearException("The decision cannot be found"); 113 } 114 try { 115 instanceOutput.write(decision.getDecisionCode()+"\t"); 116 for (int i = 0; i < featureVector.size(); i++) { 117 FeatureValue featureValue = featureVector.get(i).getFeatureValue(); 118 if (excludeNullValues == true && featureValue.isNullValue()) { 119 instanceOutput.write("-1"); 120 } else { 121 if (featureValue instanceof SingleFeatureValue) { 122 instanceOutput.write(((SingleFeatureValue)featureValue).getCode()+""); 123 } else if (featureValue instanceof MultipleFeatureValue) { 124 Set<Integer> values = ((MultipleFeatureValue)featureValue).getCodes(); 125 int j=0; 126 for (Integer value : values) { 127 instanceOutput.write(value.toString()); 128 if (j != values.size()-1) { 129 instanceOutput.write("|"); 130 } 131 j++; 132 } 133 } 134 } 135 if (i != featureVector.size()) { 136 instanceOutput.write('\t'); 137 } 138 } 139 140 instanceOutput.write('\n'); 141 instanceOutput.flush(); 142 increaseNumberOfInstances(); 143 } catch (IOException e) { 144 throw new LiblinearException("The Liblinear learner cannot write to the instance file. ", e); 145 } 146 } 147 148 public void finalizeSentence(DependencyStructure dependencyGraph) throws MaltChainedException { } 149 150 /* (non-Javadoc) 151 * @see org.maltparser.ml.LearningMethod#noMoreInstances() 152 */ 153 public void noMoreInstances() throws MaltChainedException { 154 closeInstanceWriter(); 155 } 156 157 158 /* (non-Javadoc) 159 * @see org.maltparser.ml.LearningMethod#train(org.maltparser.parser.guide.feature.FeatureVector) 160 */ 161 public void train(FeatureVector featureVector) throws MaltChainedException { 162 if (featureVector == null) { 163 throw new LiblinearException("The feature vector cannot be found. "); 164 } else if (owner == null) { 165 throw new LiblinearException("The parent guide model cannot be found. "); 166 } 167 cardinalities = getCardinalities(featureVector); 168 if (pathExternalLiblinearTrain == null) { 169 try { 170 final Problem problem = readLibLinearProblem(getInstanceInputStreamReader(".ins"), cardinalities); 171 if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) { 172 owner.getGuide().getConfiguration().getConfigLogger().info("Creating Liblinear model "+getFile(".mod").getName()+"\n"); 173 } 174 final PrintStream out = System.out; 175 final PrintStream err = System.err; 176 System.setOut(NoPrintStream.NO_PRINTSTREAM); 177 System.setErr(NoPrintStream.NO_PRINTSTREAM); 178 Linear.saveModel(new File(getFile(".mod").getAbsolutePath()), Linear.train(problem, getLiblinearParameters())); 179 180 System.setOut(err); 181 System.setOut(out); 182 if (!saveInstanceFiles) { 183 getFile(".ins").delete(); 184 } 185 } catch (OutOfMemoryError e) { 186 throw new LiblinearException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e); 187 } catch (IllegalArgumentException e) { 188 throw new LiblinearException("The Liblinear learner was not able to redirect Standard Error stream. ", e); 189 } catch (SecurityException e) { 190 throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e); 191 } catch (IOException e) { 192 throw new LiblinearException("The Liblinear learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e); 193 } 194 } else { 195 trainExternal(featureVector); 196 } 197 saveCardinalities(getInstanceOutputStreamWriter(".car"), cardinalities); 198 } 199 200 private void trainExternal(FeatureVector featureVector) throws MaltChainedException { 201 try { 202 maltSVMFormat2OriginalSVMFormat(getInstanceInputStreamReader(".ins"), getInstanceOutputStreamWriter(".ins.tmp"), cardinalities); 203 owner.getGuide().getConfiguration().getConfigLogger().info("Creating Liblinear model (external) "+getFile(".mod").getName()); 204 205 final String[] params = getLibLinearParamStringArray(); 206 String[] arrayCommands = new String[params.length+3]; 207 int i = 0; 208 arrayCommands[i++] = pathExternalLiblinearTrain; 209 for (; i <= params.length; i++) { 210 arrayCommands[i] = params[i-1]; 211 } 212 arrayCommands[i++] = getFile(".ins.tmp").getAbsolutePath(); 213 arrayCommands[i++] = getFile(".mod").getAbsolutePath(); 214 215 if (verbosity == Verbostity.ALL) { 216 owner.getGuide().getConfiguration().getConfigLogger().info('\n'); 217 } 218 final Process child = Runtime.getRuntime().exec(arrayCommands); 219 final InputStream in = child.getInputStream(); 220 final InputStream err = child.getErrorStream(); 221 int c; 222 while ((c = in.read()) != -1){ 223 if (verbosity == Verbostity.ALL) { 224 owner.getGuide().getConfiguration().getConfigLogger().info((char)c); 225 } 226 } 227 while ((c = err.read()) != -1){ 228 if (verbosity == Verbostity.ALL || verbosity == Verbostity.ERROR) { 229 owner.getGuide().getConfiguration().getConfigLogger().info((char)c); 230 } 231 } 232 if (child.waitFor() != 0) { 233 owner.getGuide().getConfiguration().getConfigLogger().info(" FAILED ("+child.exitValue()+")"); 234 } 235 in.close(); 236 err.close(); 237 if (!saveInstanceFiles) { 238 getFile(".ins").delete(); 239 getFile(".ins.tmp").delete(); 240 } 241 owner.getGuide().getConfiguration().getConfigLogger().info('\n'); 242 } catch (InterruptedException e) { 243 throw new LiblinearException("Liblinear is interrupted. ", e); 244 } catch (IllegalArgumentException e) { 245 throw new LiblinearException("The Liblinear learner was not able to redirect Standard Error stream. ", e); 246 } catch (SecurityException e) { 247 throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e); 248 } catch (IOException e) { 249 throw new LiblinearException("The Liblinear learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e); 250 } catch (OutOfMemoryError e) { 251 throw new LiblinearException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e); 252 } 253 } 254 255 private int[] getCardinalities(FeatureVector featureVector) { 256 int[] cardinalities = new int[featureVector.size()]; 257 int i = 0; 258 for (FeatureFunction feature : featureVector) { 259 cardinalities[i++] = feature.getFeatureValue().getCardinality(); 260 } 261 return cardinalities; 262 } 263 264 private void saveCardinalities(OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException { 265 final BufferedWriter out = new BufferedWriter(osw); 266 try { 267 for (int i = 0, n = cardinalities.length; i < n; i++) { 268 out.write(Integer.toString(cardinalities[i])); 269 if (i < n - 1) { 270 out.write(','); 271 } 272 } 273 out.write('\n'); 274 out.close(); 275 } catch (IOException e) { 276 throw new LiblinearException("", e); 277 } 278 } 279 280 private int[] loadCardinalities(InputStreamReader isr) throws MaltChainedException { 281 int[] cardinalities = null; 282 try { 283 final BufferedReader in = new BufferedReader(isr); 284 String line; 285 if ((line = in.readLine()) != null) { 286 String[] items = line.split(","); 287 cardinalities = new int[items.length]; 288 for (int i = 0; i < items.length; i++) { 289 cardinalities[i] = Integer.parseInt(items[i]); 290 } 291 } 292 in.close(); 293 } catch (IOException e) { 294 throw new LiblinearException("", e); 295 } catch (NumberFormatException e) { 296 throw new LiblinearException("", e); 297 } 298 return cardinalities; 299 } 300 301 /* (non-Javadoc) 302 * @see org.maltparser.ml.LearningMethod#moveAllInstances(org.maltparser.ml.LearningMethod, org.maltparser.core.feature.function.FeatureFunction, java.util.ArrayList) 303 */ 304 public void moveAllInstances(LearningMethod method, FeatureFunction divideFeature, ArrayList<Integer> divideFeatureIndexVector) throws MaltChainedException { 305 if (method == null) { 306 throw new LiblinearException("The learning method cannot be found. "); 307 } else if (divideFeature == null) { 308 throw new LiblinearException("The divide feature cannot be found. "); 309 } 310 try { 311 final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins")); 312 final BufferedWriter out = method.getInstanceWriter(); 313 final StringBuilder sb = new StringBuilder(6); 314 int l = in.read(); 315 char c; 316 int j = 0; 317 318 while(true) { 319 if (l == -1) { 320 sb.setLength(0); 321 break; 322 } 323 324 c = (char)l; 325 l = in.read(); 326 if (c == '\t') { 327 if (divideFeatureIndexVector.contains(j-1)) { 328 out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode())); 329 out.write('\t'); 330 } 331 out.write(sb.toString()); 332 j++; 333 out.write('\t'); 334 sb.setLength(0); 335 } else if (c == '\n') { 336 out.write(sb.toString()); 337 if (divideFeatureIndexVector.contains(j-1)) { 338 out.write('\t'); 339 out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode())); 340 } 341 out.write('\n'); 342 sb.setLength(0); 343 method.increaseNumberOfInstances(); 344 this.decreaseNumberOfInstances(); 345 j = 0; 346 } else { 347 sb.append(c); 348 } 349 } 350 in.close(); 351 getFile(".ins").delete(); 352 } catch (SecurityException e) { 353 throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e); 354 } catch (NullPointerException e) { 355 throw new LiblinearException("The instance file cannot be found. ", e); 356 } catch (FileNotFoundException e) { 357 throw new LiblinearException("The instance file cannot be found. ", e); 358 } catch (IOException e) { 359 throw new LiblinearException("The Liblinear learner read from the instance file. ", e); 360 } 361 } 362 363 /* (non-Javadoc) 364 * @see org.maltparser.ml.LearningMethod#predict(org.maltparser.parser.guide.feature.FeatureVector, org.maltparser.ml.KBestList) 365 */ 366 public boolean predict(FeatureVector featureVector, SingleDecision decision) throws MaltChainedException { 367 if (model == null) { 368 File modelFile = getFile(".mod"); 369 try { 370 model = Linear.loadModel(new File(modelFile.getAbsolutePath())); 371 } catch (IOException e) { 372 throw new LiblinearException("The file '"+modelFile.getAbsolutePath()+"' cannot be loaded. ", e); 373 } 374 } 375 376 if (cardinalities == null) { 377 if (getFile(".car").exists()) { 378 cardinalities = loadCardinalities(getInstanceInputStreamReader(".car")); 379 } else { 380 cardinalities = getCardinalities(featureVector); 381 } 382 } 383 if (xlist == null) { 384 xlist = new ArrayList<FeatureNode>(featureVector.size()); 385 } 386 if (model == null) { 387 throw new LiblinearException("The Liblinear learner cannot predict the next class, because the learning model cannot be found. "); 388 } else if (featureVector == null) { 389 throw new LiblinearException("The Liblinear learner cannot predict the next class, because the feature vector cannot be found. "); 390 } 391 int j = 0; 392 int offset = 1; 393 int i = 0; 394 for (FeatureFunction feature : featureVector) { 395 final FeatureValue featureValue = feature.getFeatureValue(); 396 if (!(excludeNullValues == true && featureValue.isNullValue())) { 397 if (featureValue instanceof SingleFeatureValue) { 398 if (((SingleFeatureValue)featureValue).getCode() < cardinalities[i]) { 399 xlist.add(j++, new FeatureNode(((SingleFeatureValue)featureValue).getCode() + offset, 1)); 400 } 401 } else if (featureValue instanceof MultipleFeatureValue) { 402 for (Integer value : ((MultipleFeatureValue)featureValue).getCodes()) { 403 if (value < cardinalities[i]) { 404 xlist.add(j++, new FeatureNode(value + offset, 1)); 405 } 406 } 407 } 408 } 409 offset += cardinalities[i]; 410 i++; 411 } 412 413 FeatureNode[] xarray = new FeatureNode[j]; 414 for (int k = 0; k < j; k++) { 415 xarray[k] = xlist.get(k); 416 } 417 418 if (decision.getKBestList().getK() == 1) { 419 decision.getKBestList().add(Linear.predict(model, xarray)); 420 } else { 421 liblinear_predict_with_kbestlist(model, xarray, decision.getKBestList()); 422 } 423 424 xlist.clear(); 425 426 return true; 427 } 428 429 430 public void terminate() throws MaltChainedException { 431 closeInstanceWriter(); 432 model = null; 433 xlist = null; 434 owner = null; 435 } 436 437 public BufferedWriter getInstanceWriter() { 438 return instanceOutput; 439 } 440 441 protected void closeInstanceWriter() throws MaltChainedException { 442 try { 443 if (instanceOutput != null) { 444 instanceOutput.flush(); 445 instanceOutput.close(); 446 instanceOutput = null; 447 } 448 } catch (IOException e) { 449 throw new LiblinearException("The Liblinear learner cannot close the instance file. ", e); 450 } 451 } 452 453 454 /** 455 * Returns the parameter string for used for configure Liblinear 456 * 457 * @return the parameter string for used for configure Liblinear 458 */ 459 public String getParamString() { 460 return paramString; 461 } 462 463 public InstanceModel getOwner() { 464 return owner; 465 } 466 467 protected void setOwner(InstanceModel owner) { 468 this.owner = owner; 469 } 470 471 public int getLearnerMode() { 472 return learnerMode; 473 } 474 475 public void setLearnerMode(int learnerMode) throws MaltChainedException { 476 this.learnerMode = learnerMode; 477 } 478 479 public String getLearningMethodName() { 480 return name; 481 } 482 483 /** 484 * Returns the current configuration 485 * 486 * @return the current configuration 487 * @throws MaltChainedException 488 */ 489 public DependencyParserConfig getConfiguration() throws MaltChainedException { 490 return owner.getGuide().getConfiguration(); 491 } 492 493 public int getNumberOfInstances() { 494 return numberOfInstances; 495 } 496 497 public void increaseNumberOfInstances() { 498 numberOfInstances++; 499 owner.increaseFrequency(); 500 } 501 502 public void decreaseNumberOfInstances() { 503 numberOfInstances--; 504 owner.decreaseFrequency(); 505 } 506 507 protected void setNumberOfInstances(int numberOfInstances) { 508 this.numberOfInstances = 0; 509 } 510 511 protected void setLearningMethodName(String name) { 512 this.name = name; 513 } 514 515 protected OutputStreamWriter getInstanceOutputStreamWriter(String suffix) throws MaltChainedException { 516 return getConfiguration().getConfigurationDir().getOutputStreamWriter(owner.getModelName()+getLearningMethodName()+suffix); 517 } 518 519 protected InputStreamReader getInstanceInputStreamReader(String suffix) throws MaltChainedException { 520 return getConfiguration().getConfigurationDir().getInputStreamReader(owner.getModelName()+getLearningMethodName()+suffix); 521 } 522 523 protected File getFile(String suffix) throws MaltChainedException { 524 return getConfiguration().getConfigurationDir().getFile(owner.getModelName()+getLearningMethodName()+suffix); 525 } 526 527 /** 528 * Reads an instance file into a svm_problem object according to the Malt-SVM format, which is column fixed format (tab-separated). 529 * 530 * @param isr the instance stream reader for the instance file 531 * @param cardinalities a array containing the number of distinct values for a particular column. 532 * @throws LiblinearException 533 */ 534 public Problem readLibLinearProblem(InputStreamReader isr, int[] cardinalities) throws MaltChainedException { 535 Problem problem = new Problem(); 536 537 try { 538 final BufferedReader fp = new BufferedReader(isr); 539 int max_index = 0; 540 if (xlist == null) { 541 xlist = new ArrayList<FeatureNode>(); 542 } 543 problem.bias = getBias(); 544 problem.l = getNumberOfInstances(); 545 problem.x = new FeatureNode[problem.l][]; 546 problem.y = new int[problem.l]; 547 int i = 0; 548 final Pattern tabPattern = Pattern.compile("\t"); 549 final Pattern pipePattern = Pattern.compile("\\|"); 550 while(true) { 551 String line = fp.readLine(); 552 if(line == null) break; 553 String[] columns = tabPattern.split(line); 554 555 if (columns.length == 0) { 556 continue; 557 } 558 559 int offset = 1; 560 int j = 0; 561 try { 562 problem.y[i] = Integer.parseInt(columns[j]); 563 int p = 0; 564 for(j = 1; j < columns.length; j++) { 565 final String[] items = pipePattern.split(columns[j]); 566 for (int k = 0; k < items.length; k++) { 567 try { 568 if (Integer.parseInt(items[k]) != -1) { 569 xlist.add(p, new FeatureNode(Integer.parseInt(items[k])+offset, 1)); 570 p++; 571 } 572 } catch (NumberFormatException e) { 573 throw new LiblinearException("The instance file contain a non-integer value '"+items[k]+"'", e); 574 } 575 } 576 offset += cardinalities[j-1]; 577 } 578 problem.x[i] = xlist.subList(0, p).toArray(new FeatureNode[0]); 579 if(columns.length > 1) { 580 max_index = Math.max(max_index, problem.x[i][p-1].index); 581 } 582 i++; 583 xlist.clear(); 584 } catch (ArrayIndexOutOfBoundsException e) { 585 throw new LiblinearException("Cannot read from the instance file. ", e); 586 } 587 } 588 fp.close(); 589 problem.n = max_index; 590 if ( problem.bias >= 0 ) { 591 problem.n++; 592 } 593 xlist = null; 594 } catch (IOException e) { 595 throw new LiblinearException("Cannot read from the instance file. ", e); 596 } 597 return problem; 598 } 599 600 protected void initSpecialParameters() throws MaltChainedException { 601 if (getConfiguration().getOptionValue("singlemalt", "null_value") != null && getConfiguration().getOptionValue("singlemalt", "null_value").toString().equalsIgnoreCase("none")) { 602 excludeNullValues = true; 603 } else { 604 excludeNullValues = false; 605 } 606 saveInstanceFiles = ((Boolean)getConfiguration().getOptionValue("liblinear", "save_instance_files")).booleanValue(); 607 608 if (!getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().equals("")) { 609 try { 610 if (!new File(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString()).exists()) { 611 throw new LiblinearException("The path to the external Liblinear trainer 'svm-train' is wrong."); 612 } 613 if (new File(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString()).isDirectory()) { 614 throw new LiblinearException("The option --liblinear-liblinear_external points to a directory, the path should point at the 'train' file or the 'train.exe' file"); 615 } 616 if (!(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().endsWith("train") || getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().endsWith("train.exe"))) { 617 throw new LiblinearException("The option --liblinear-liblinear_external does not specify the path to 'train' file or the 'train.exe' file. "); 618 } 619 pathExternalLiblinearTrain = getConfiguration().getOptionValue("liblinear", "liblinear_external").toString(); 620 } catch (SecurityException e) { 621 throw new LiblinearException("Access denied to the file specified by the option --liblinear-liblinear_external. ", e); 622 } 623 } 624 if (getConfiguration().getOptionValue("liblinear", "verbosity") != null) { 625 verbosity = Verbostity.valueOf(getConfiguration().getOptionValue("liblinear", "verbosity").toString().toUpperCase()); 626 } 627 } 628 629 public String getLibLinearOptions() { 630 StringBuilder sb = new StringBuilder(); 631 for (String key : liblinearOptions.keySet()) { 632 sb.append('-'); 633 sb.append(key); 634 sb.append(' '); 635 sb.append(liblinearOptions.get(key)); 636 sb.append(' '); 637 } 638 return sb.toString(); 639 } 640 641 public void parseParameters(String paramstring) throws MaltChainedException { 642 if (paramstring == null) { 643 return; 644 } 645 final String[] argv; 646 String allowedFlags = "sceB"; 647 try { 648 argv = paramstring.split("[_\\p{Blank}]"); 649 } catch (PatternSyntaxException e) { 650 throw new LiblinearException("Could not split the liblinear-parameter string '"+paramstring+"'. ", e); 651 } 652 for (int i=0; i < argv.length-1; i++) { 653 if(argv[i].charAt(0) != '-') { 654 throw new LiblinearException("The argument flag should start with the following character '-', not with "+argv[i].charAt(0)); 655 } 656 if(++i>=argv.length) { 657 throw new LiblinearException("The last argument does not have any value. "); 658 } 659 try { 660 int index = allowedFlags.indexOf(argv[i-1].charAt(1)); 661 if (index != -1) { 662 liblinearOptions.put(Character.toString(argv[i-1].charAt(1)), argv[i]); 663 } else { 664 throw new LiblinearException("Unknown liblinear parameter: '"+argv[i-1]+"' with value '"+argv[i]+"'. "); 665 } 666 } catch (ArrayIndexOutOfBoundsException e) { 667 throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e); 668 } catch (NumberFormatException e) { 669 throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e); 670 } catch (NullPointerException e) { 671 throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e); 672 } 673 } 674 } 675 676 public double getBias() throws MaltChainedException { 677 try { 678 return Double.valueOf(liblinearOptions.get("B")).doubleValue(); 679 } catch (NumberFormatException e) { 680 throw new LiblinearException("The liblinear bias value is not numerical value. ", e); 681 } 682 } 683 684 public Parameter getLiblinearParameters() throws MaltChainedException { 685 Parameter param = new Parameter(SolverType.L2LOSS_SVM_DUAL, 1, 0.1); 686 String type = liblinearOptions.get("s"); 687 if (type.equals("0")) { 688 param.setSolverType(SolverType.L2_LR); 689 } else if (type.equals("1")) { 690 param.setSolverType(SolverType.L2LOSS_SVM_DUAL); 691 } else if (type.equals("2")) { 692 param.setSolverType(SolverType.L2LOSS_SVM); 693 } else if (type.equals("3")) { 694 param.setSolverType(SolverType.L1LOSS_SVM_DUAL); 695 } else if (type.equals("4")) { 696 param.setSolverType(SolverType.MCSVM_CS); 697 } else { 698 throw new LiblinearException("The liblinear type (-s) is not an integer value between 0 and 4. "); 699 } 700 try { 701 param.setC(Double.valueOf(liblinearOptions.get("c")).doubleValue()); 702 } catch (NumberFormatException e) { 703 throw new LiblinearException("The liblinear cost (-c) value is not numerical value. ", e); 704 } 705 try { 706 param.setEps(Double.valueOf(liblinearOptions.get("e")).doubleValue()); 707 } catch (NumberFormatException e) { 708 throw new LiblinearException("The liblinear epsilon (-e) value is not numerical value. ", e); 709 } 710 return param; 711 } 712 713 public void initLiblinearOptions() { 714 liblinearOptions.put("s", "1"); // type = SolverType.L2LOSS_SVM_DUAL (default) 715 liblinearOptions.put("c", "1"); // cost = 1 (default) 716 liblinearOptions.put("e", "0.1"); // epsilon = 0.1 (default) 717 liblinearOptions.put("B", "1"); // bias = 1 (default) 718 } 719 720 public String[] getLibLinearParamStringArray() { 721 final ArrayList<String> params = new ArrayList<String>(); 722 723 for (String key : liblinearOptions.keySet()) { 724 params.add("-"+key); params.add(liblinearOptions.get(key)); 725 } 726 return params.toArray(new String[params.size()]); 727 } 728 729 730 public void liblinear_predict_with_kbestlist(Model model, FeatureNode[] x, KBestList kBestList) throws MaltChainedException { 731 int i; 732 final int nr_class = model.getNrClass(); 733 final double[] dec_values = new double[nr_class]; 734 735 Linear.predictValues(model, x, dec_values); 736 final int[] labels = model.getLabels(); 737 int[] predictionList = new int[nr_class]; 738 for(i=0;i<nr_class;i++) { 739 predictionList[i] = labels[i]; 740 } 741 742 double tmpDec; 743 int tmpObj; 744 int lagest; 745 for (i=0;i<nr_class-1;i++) { 746 lagest = i; 747 for (int j=i;j<nr_class;j++) { 748 if (dec_values[j] > dec_values[lagest]) { 749 lagest = j; 750 } 751 } 752 tmpDec = dec_values[lagest]; 753 dec_values[lagest] = dec_values[i]; 754 dec_values[i] = tmpDec; 755 tmpObj = predictionList[lagest]; 756 predictionList[lagest] = predictionList[i]; 757 predictionList[i] = tmpObj; 758 } 759 760 int k = nr_class-1; 761 if (kBestList.getK() != -1) { 762 k = kBestList.getK() - 1; 763 } 764 765 for (i=0; i<nr_class && k >= 0; i++, k--) { 766 if (kBestList instanceof ScoredKBestList) { 767 ((ScoredKBestList)kBestList).add(predictionList[i], (float)dec_values[i]); 768 } else { 769 kBestList.add(predictionList[i]); 770 } 771 772 } 773 } 774 775 /** 776 * Converts the instance file (Malt's own SVM format) into the Liblinear (SVMLight) format. The input instance file is removed (replaced) 777 * by the instance file in the Liblinear (SVMLight) format. If a column contains -1, the value will be removed in destination file. 778 * 779 * @param isr the input stream reader for the source instance file 780 * @param osw the output stream writer for the destination instance file 781 * @param cardinalities a vector containing the number of distinct values for a particular column 782 * @throws LiblinearException 783 */ 784 public static void maltSVMFormat2OriginalSVMFormat(InputStreamReader isr, OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException { 785 try { 786 final BufferedReader in = new BufferedReader(isr); 787 final BufferedWriter out = new BufferedWriter(osw); 788 789 int c; 790 int j = 0; 791 int offset = 1; 792 int code = 0; 793 while(true) { 794 c = in.read(); 795 if (c == -1) { 796 break; 797 } 798 799 if (c == '\t' || c == '|') { 800 if (j == 0) { 801 out.write(Integer.toString(code)); 802 j++; 803 } else { 804 if (code != -1) { 805 out.write(' '); 806 out.write(Integer.toString(code+offset)); 807 out.write(":1"); 808 } 809 if (c == '\t') { 810 offset += cardinalities[j-1]; 811 j++; 812 } 813 } 814 code = 0; 815 } else if (c == '\n') { 816 j = 0; 817 offset = 1; 818 out.write('\n'); 819 code = 0; 820 } else if (c == '-') { 821 code = -1; 822 } else if (code != -1) { 823 if (c > 47 && c < 58) { 824 code = code * 10 + (c-48); 825 } else { 826 throw new LiblinearException("The instance file contain a non-integer value, when converting the Malt SVM format into Liblinear format."); 827 } 828 } 829 } 830 in.close(); 831 out.close(); 832 } catch (IOException e) { 833 throw new LiblinearException("Cannot read from the instance file, when converting the Malt SVM format into Liblinear format. ", e); 834 } 835 } 836 837 protected void finalize() throws Throwable { 838 try { 839 closeInstanceWriter(); 840 } finally { 841 super.finalize(); 842 } 843 } 844 845 /* (non-Javadoc) 846 * @see java.lang.Object#toString() 847 */ 848 public String toString() { 849 final StringBuffer sb = new StringBuffer(); 850 sb.append("\nLiblinear INTERFACE\n"); 851 sb.append(" Liblinear version: "+LIBLINEAR_VERSION+"\n"); 852 sb.append(" Liblinear string: "+paramString+"\n"); 853 854 sb.append(getLibLinearOptions()); 855 return sb.toString(); 856 } 857 }