001 package org.maltparser.ml.liblinear; 002 003 import java.io.BufferedReader; 004 import java.io.BufferedWriter; 005 import java.io.File; 006 import java.io.FileNotFoundException; 007 import java.io.IOException; 008 import java.io.InputStream; 009 import java.io.InputStreamReader; 010 import java.io.OutputStreamWriter; 011 import java.io.PrintStream; 012 import java.util.ArrayList; 013 import java.util.LinkedHashMap; 014 import java.util.Set; 015 import java.util.regex.Pattern; 016 import java.util.regex.PatternSyntaxException; 017 018 import liblinear.FeatureNode; 019 import liblinear.Linear; 020 import liblinear.Model; 021 import liblinear.Parameter; 022 import liblinear.Problem; 023 import liblinear.SolverType; 024 025 import org.maltparser.core.exception.MaltChainedException; 026 import org.maltparser.core.feature.FeatureVector; 027 import org.maltparser.core.feature.function.FeatureFunction; 028 import org.maltparser.core.feature.value.FeatureValue; 029 import org.maltparser.core.feature.value.MultipleFeatureValue; 030 import org.maltparser.core.feature.value.SingleFeatureValue; 031 import org.maltparser.core.helper.NoPrintStream; 032 import org.maltparser.core.syntaxgraph.DependencyStructure; 033 import org.maltparser.ml.LearningMethod; 034 import org.maltparser.parser.DependencyParserConfig; 035 import org.maltparser.parser.guide.instance.InstanceModel; 036 import org.maltparser.parser.history.action.SingleDecision; 037 import org.maltparser.parser.history.kbest.KBestList; 038 import org.maltparser.parser.history.kbest.ScoredKBestList; 039 040 041 public class Liblinear implements LearningMethod { 042 public final static String LIBLINEAR_VERSION = "1.33"; 043 public enum Verbostity { 044 SILENT, ERROR, ALL 045 } 046 private LinkedHashMap<String, String> liblinearOptions; 047 048 protected InstanceModel owner; 049 protected int learnerMode; 050 protected String name; 051 protected int numberOfInstances; 052 protected boolean saveInstanceFiles; 053 protected boolean excludeNullValues; 054 protected String pathExternalLiblinearTrain = null; 055 private int[] cardinalities; 056 /** 057 * Instance output stream writer 058 */ 059 private BufferedWriter instanceOutput = null; 060 /** 061 * Liblinear model object, only used during classification. 062 */ 063 private Model model = null; 064 065 /** 066 * Parameter string 067 */ 068 private String paramString; 069 070 private ArrayList<FeatureNode> xlist = null; 071 072 private Verbostity verbosity; 073 /** 074 * Constructs a Liblinear learner. 075 * 076 * @param owner the guide model owner 077 * @param learnerMode the mode of the learner TRAIN or CLASSIFY 078 */ 079 public Liblinear(InstanceModel owner, Integer learnerMode) throws MaltChainedException { 080 setOwner(owner); 081 setLearningMethodName("liblinear"); 082 setLearnerMode(learnerMode.intValue()); 083 setNumberOfInstances(0); 084 verbosity = Verbostity.SILENT; 085 086 liblinearOptions = new LinkedHashMap<String, String>(); 087 initLiblinearOptions(); 088 parseParameters(getConfiguration().getOptionValue("liblinear", "liblinear_options").toString()); 089 initSpecialParameters(); 090 if (learnerMode == BATCH) { 091 // if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) { 092 // if (pathExternalLiblinearTrain != null) { 093 // owner.getGuide().getConfiguration().getConfigLogger().info(" Learner : Liblinear external "+ getLibLinearOptions() + "\n"); 094 // } else { 095 // owner.getGuide().getConfiguration().getConfigLogger().info(" Learner : Liblinear "+LIBLINEAR_VERSION+" "+ getLibLinearOptions() + "\n"); 096 // } 097 // } 098 instanceOutput = new BufferedWriter(getInstanceOutputStreamWriter(".ins")); 099 } 100 // else { 101 // if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) { 102 // owner.getGuide().getConfiguration().getConfigLogger().info(" Classifier : Liblinear "+LIBLINEAR_VERSION+" "+ getLibLinearOptions()+ "\n"); 103 // } 104 // } 105 } 106 107 108 public void addInstance(SingleDecision decision, FeatureVector featureVector) throws MaltChainedException { 109 if (featureVector == null) { 110 throw new LiblinearException("The feature vector cannot be found"); 111 } else if (decision == null) { 112 throw new LiblinearException("The decision cannot be found"); 113 } 114 try { 115 instanceOutput.write(decision.getDecisionCode()+"\t"); 116 for (int i = 0; i < featureVector.size(); i++) { 117 FeatureValue featureValue = featureVector.get(i).getFeatureValue(); 118 if (excludeNullValues == true && featureValue.isNullValue()) { 119 instanceOutput.write("-1"); 120 } else { 121 if (featureValue instanceof SingleFeatureValue) { 122 instanceOutput.write(((SingleFeatureValue)featureValue).getCode()+""); 123 } else if (featureValue instanceof MultipleFeatureValue) { 124 Set<Integer> values = ((MultipleFeatureValue)featureValue).getCodes(); 125 int j=0; 126 for (Integer value : values) { 127 instanceOutput.write(value.toString()); 128 if (j != values.size()-1) { 129 instanceOutput.write("|"); 130 } 131 j++; 132 } 133 } 134 } 135 if (i != featureVector.size()) { 136 instanceOutput.write('\t'); 137 } 138 } 139 140 instanceOutput.write('\n'); 141 instanceOutput.flush(); 142 increaseNumberOfInstances(); 143 } catch (IOException e) { 144 throw new LiblinearException("The Liblinear learner cannot write to the instance file. ", e); 145 } 146 } 147 148 public void finalizeSentence(DependencyStructure dependencyGraph) throws MaltChainedException { } 149 150 /* (non-Javadoc) 151 * @see org.maltparser.ml.LearningMethod#noMoreInstances() 152 */ 153 public void noMoreInstances() throws MaltChainedException { 154 closeInstanceWriter(); 155 } 156 157 158 /* (non-Javadoc) 159 * @see org.maltparser.ml.LearningMethod#train(org.maltparser.parser.guide.feature.FeatureVector) 160 */ 161 public void train(FeatureVector featureVector) throws MaltChainedException { 162 if (featureVector == null) { 163 throw new LiblinearException("The feature vector cannot be found. "); 164 } else if (owner == null) { 165 throw new LiblinearException("The parent guide model cannot be found. "); 166 } 167 cardinalities = getCardinalities(featureVector); 168 if (pathExternalLiblinearTrain == null) { 169 try { 170 final Problem problem = readLibLinearProblem(getInstanceInputStreamReader(".ins"), cardinalities); 171 if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) { 172 owner.getGuide().getConfiguration().getConfigLogger().info("Creating Liblinear model "+getFile(".mod").getName()+"\n"); 173 } 174 final PrintStream out = System.out; 175 final PrintStream err = System.err; 176 System.setOut(NoPrintStream.NO_PRINTSTREAM); 177 System.setErr(NoPrintStream.NO_PRINTSTREAM); 178 Linear.saveModel(new File(getFile(".mod").getAbsolutePath()), Linear.train(problem, getLiblinearParameters())); 179 180 System.setOut(err); 181 System.setOut(out); 182 if (!saveInstanceFiles) { 183 getFile(".ins").delete(); 184 } 185 } catch (OutOfMemoryError e) { 186 throw new LiblinearException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e); 187 } catch (IllegalArgumentException e) { 188 throw new LiblinearException("The Liblinear learner was not able to redirect Standard Error stream. ", e); 189 } catch (SecurityException e) { 190 throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e); 191 } catch (IOException e) { 192 throw new LiblinearException("The Liblinear learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e); 193 } 194 } else { 195 trainExternal(featureVector); 196 } 197 saveCardinalities(getInstanceOutputStreamWriter(".car"), cardinalities); 198 } 199 200 private void trainExternal(FeatureVector featureVector) throws MaltChainedException { 201 try { 202 maltSVMFormat2OriginalSVMFormat(getInstanceInputStreamReader(".ins"), getInstanceOutputStreamWriter(".ins.tmp"), cardinalities); 203 owner.getGuide().getConfiguration().getConfigLogger().info("Creating Liblinear model (external) "+getFile(".mod").getName()); 204 205 final String[] params = getLibLinearParamStringArray(); 206 String[] arrayCommands = new String[params.length+3]; 207 int i = 0; 208 arrayCommands[i++] = pathExternalLiblinearTrain; 209 for (; i <= params.length; i++) { 210 arrayCommands[i] = params[i-1]; 211 } 212 arrayCommands[i++] = getFile(".ins.tmp").getAbsolutePath(); 213 arrayCommands[i++] = getFile(".mod").getAbsolutePath(); 214 215 if (verbosity == Verbostity.ALL) { 216 owner.getGuide().getConfiguration().getConfigLogger().info('\n'); 217 } 218 final Process child = Runtime.getRuntime().exec(arrayCommands); 219 final InputStream in = child.getInputStream(); 220 final InputStream err = child.getErrorStream(); 221 int c; 222 while ((c = in.read()) != -1){ 223 if (verbosity == Verbostity.ALL) { 224 owner.getGuide().getConfiguration().getConfigLogger().info((char)c); 225 } 226 } 227 while ((c = err.read()) != -1){ 228 if (verbosity == Verbostity.ALL || verbosity == Verbostity.ERROR) { 229 owner.getGuide().getConfiguration().getConfigLogger().info((char)c); 230 } 231 } 232 if (child.waitFor() != 0) { 233 owner.getGuide().getConfiguration().getConfigLogger().info(" FAILED ("+child.exitValue()+")"); 234 } 235 in.close(); 236 err.close(); 237 if (!saveInstanceFiles) { 238 getFile(".ins").delete(); 239 getFile(".ins.tmp").delete(); 240 } 241 owner.getGuide().getConfiguration().getConfigLogger().info('\n'); 242 } catch (InterruptedException e) { 243 throw new LiblinearException("Liblinear is interrupted. ", e); 244 } catch (IllegalArgumentException e) { 245 throw new LiblinearException("The Liblinear learner was not able to redirect Standard Error stream. ", e); 246 } catch (SecurityException e) { 247 throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e); 248 } catch (IOException e) { 249 throw new LiblinearException("The Liblinear learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e); 250 } catch (OutOfMemoryError e) { 251 throw new LiblinearException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e); 252 } 253 } 254 255 private int[] getCardinalities(FeatureVector featureVector) { 256 int[] cardinalities = new int[featureVector.size()]; 257 int i = 0; 258 for (FeatureFunction feature : featureVector) { 259 cardinalities[i++] = feature.getFeatureValue().getCardinality(); 260 } 261 return cardinalities; 262 } 263 264 private void saveCardinalities(OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException { 265 final BufferedWriter out = new BufferedWriter(osw); 266 try { 267 for (int i = 0, n = cardinalities.length; i < n; i++) { 268 out.write(Integer.toString(cardinalities[i])); 269 if (i < n - 1) { 270 out.write(','); 271 } 272 } 273 out.write('\n'); 274 out.close(); 275 } catch (IOException e) { 276 throw new LiblinearException("", e); 277 } 278 } 279 280 private int[] loadCardinalities(InputStreamReader isr) throws MaltChainedException { 281 int[] cardinalities = null; 282 try { 283 final BufferedReader in = new BufferedReader(isr); 284 String line; 285 if ((line = in.readLine()) != null) { 286 String[] items = line.split(","); 287 cardinalities = new int[items.length]; 288 for (int i = 0; i < items.length; i++) { 289 cardinalities[i] = Integer.parseInt(items[i]); 290 } 291 } 292 in.close(); 293 } catch (IOException e) { 294 throw new LiblinearException("", e); 295 } catch (NumberFormatException e) { 296 throw new LiblinearException("", e); 297 } 298 return cardinalities; 299 } 300 301 /* (non-Javadoc) 302 * @see org.maltparser.ml.LearningMethod#moveAllInstances(org.maltparser.ml.LearningMethod, org.maltparser.core.feature.function.FeatureFunction, java.util.ArrayList) 303 */ 304 public void moveAllInstances(LearningMethod method, FeatureFunction divideFeature, ArrayList<Integer> divideFeatureIndexVector) throws MaltChainedException { 305 if (method == null) { 306 throw new LiblinearException("The learning method cannot be found. "); 307 } else if (divideFeature == null) { 308 throw new LiblinearException("The divide feature cannot be found. "); 309 } 310 try { 311 final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins")); 312 final BufferedWriter out = method.getInstanceWriter(); 313 final StringBuilder sb = new StringBuilder(6); 314 int l = in.read(); 315 char c; 316 int j = 0; 317 318 while(true) { 319 if (l == -1) { 320 sb.setLength(0); 321 break; 322 } 323 324 c = (char)l; 325 l = in.read(); 326 if (c == '\t') { 327 if (divideFeatureIndexVector.contains(j-1)) { 328 out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode())); 329 out.write('\t'); 330 } 331 out.write(sb.toString()); 332 j++; 333 out.write('\t'); 334 sb.setLength(0); 335 } else if (c == '\n') { 336 out.write(sb.toString()); 337 if (divideFeatureIndexVector.contains(j-1)) { 338 out.write('\t'); 339 out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode())); 340 } 341 out.write('\n'); 342 sb.setLength(0); 343 method.increaseNumberOfInstances(); 344 this.decreaseNumberOfInstances(); 345 j = 0; 346 } else { 347 sb.append(c); 348 } 349 } 350 in.close(); 351 getFile(".ins").delete(); 352 } catch (SecurityException e) { 353 throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e); 354 } catch (NullPointerException e) { 355 throw new LiblinearException("The instance file cannot be found. ", e); 356 } catch (FileNotFoundException e) { 357 throw new LiblinearException("The instance file cannot be found. ", e); 358 } catch (IOException e) { 359 throw new LiblinearException("The Liblinear learner read from the instance file. ", e); 360 } 361 } 362 363 /* (non-Javadoc) 364 * @see org.maltparser.ml.LearningMethod#predict(org.maltparser.parser.guide.feature.FeatureVector, org.maltparser.ml.KBestList) 365 */ 366 public boolean predict(FeatureVector featureVector, SingleDecision decision) throws MaltChainedException { 367 if (model == null) { 368 File modelFile = getFile(".mod"); 369 try { 370 model = Linear.loadModel(new File(modelFile.getAbsolutePath())); 371 } catch (IOException e) { 372 throw new LiblinearException("The file '"+modelFile.getAbsolutePath()+"' cannot be loaded. ", e); 373 } 374 } 375 376 if (cardinalities == null) { 377 if (getFile(".car").exists()) { 378 cardinalities = loadCardinalities(getInstanceInputStreamReader(".car")); 379 } else { 380 cardinalities = getCardinalities(featureVector); 381 } 382 } 383 if (xlist == null) { 384 xlist = new ArrayList<FeatureNode>(featureVector.size()); 385 } 386 if (model == null) { 387 throw new LiblinearException("The Liblinear learner cannot predict the next class, because the learning model cannot be found. "); 388 } else if (featureVector == null) { 389 throw new LiblinearException("The Liblinear learner cannot predict the next class, because the feature vector cannot be found. "); 390 } 391 int j = 0; 392 int offset = 1; 393 int i = 0; 394 for (FeatureFunction feature : featureVector) { 395 final FeatureValue featureValue = feature.getFeatureValue(); 396 if (!(excludeNullValues == true && featureValue.isNullValue())) { 397 if (featureValue instanceof SingleFeatureValue) { 398 if (((SingleFeatureValue)featureValue).getCode() < cardinalities[i]) { 399 xlist.add(j++, new FeatureNode(((SingleFeatureValue)featureValue).getCode() + offset, 1)); 400 } 401 } else if (featureValue instanceof MultipleFeatureValue) { 402 for (Integer value : ((MultipleFeatureValue)featureValue).getCodes()) { 403 if (value < cardinalities[i]) { 404 xlist.add(j++, new FeatureNode(value + offset, 1)); 405 } 406 } 407 } 408 } 409 offset += cardinalities[i]; 410 i++; 411 } 412 413 FeatureNode[] xarray = new FeatureNode[j]; 414 for (int k = 0; k < j; k++) { 415 xarray[k] = xlist.get(k); 416 } 417 418 if (decision.getKBestList().getK() == 1) { 419 decision.getKBestList().add(Linear.predict(model, xarray)); 420 } else { 421 liblinear_predict_with_kbestlist(model, xarray, decision.getKBestList()); 422 } 423 424 return true; 425 } 426 427 428 public void terminate() throws MaltChainedException { 429 closeInstanceWriter(); 430 model = null; 431 xlist = null; 432 owner = null; 433 } 434 435 public BufferedWriter getInstanceWriter() { 436 return instanceOutput; 437 } 438 439 protected void closeInstanceWriter() throws MaltChainedException { 440 try { 441 if (instanceOutput != null) { 442 instanceOutput.flush(); 443 instanceOutput.close(); 444 instanceOutput = null; 445 } 446 } catch (IOException e) { 447 throw new LiblinearException("The Liblinear learner cannot close the instance file. ", e); 448 } 449 } 450 451 452 /** 453 * Returns the parameter string for used for configure Liblinear 454 * 455 * @return the parameter string for used for configure Liblinear 456 */ 457 public String getParamString() { 458 return paramString; 459 } 460 461 public InstanceModel getOwner() { 462 return owner; 463 } 464 465 protected void setOwner(InstanceModel owner) { 466 this.owner = owner; 467 } 468 469 public int getLearnerMode() { 470 return learnerMode; 471 } 472 473 public void setLearnerMode(int learnerMode) throws MaltChainedException { 474 this.learnerMode = learnerMode; 475 } 476 477 public String getLearningMethodName() { 478 return name; 479 } 480 481 /** 482 * Returns the current configuration 483 * 484 * @return the current configuration 485 * @throws MaltChainedException 486 */ 487 public DependencyParserConfig getConfiguration() throws MaltChainedException { 488 return owner.getGuide().getConfiguration(); 489 } 490 491 public int getNumberOfInstances() { 492 return numberOfInstances; 493 } 494 495 public void increaseNumberOfInstances() { 496 numberOfInstances++; 497 owner.increaseFrequency(); 498 } 499 500 public void decreaseNumberOfInstances() { 501 numberOfInstances--; 502 owner.decreaseFrequency(); 503 } 504 505 protected void setNumberOfInstances(int numberOfInstances) { 506 this.numberOfInstances = 0; 507 } 508 509 protected void setLearningMethodName(String name) { 510 this.name = name; 511 } 512 513 protected OutputStreamWriter getInstanceOutputStreamWriter(String suffix) throws MaltChainedException { 514 return getConfiguration().getConfigurationDir().getOutputStreamWriter(owner.getModelName()+getLearningMethodName()+suffix); 515 } 516 517 protected InputStreamReader getInstanceInputStreamReader(String suffix) throws MaltChainedException { 518 return getConfiguration().getConfigurationDir().getInputStreamReader(owner.getModelName()+getLearningMethodName()+suffix); 519 } 520 521 protected File getFile(String suffix) throws MaltChainedException { 522 return getConfiguration().getConfigurationDir().getFile(owner.getModelName()+getLearningMethodName()+suffix); 523 } 524 525 /** 526 * Reads an instance file into a svm_problem object according to the Malt-SVM format, which is column fixed format (tab-separated). 527 * 528 * @param isr the instance stream reader for the instance file 529 * @param cardinalities a array containing the number of distinct values for a particular column. 530 * @throws LiblinearException 531 */ 532 public Problem readLibLinearProblem(InputStreamReader isr, int[] cardinalities) throws MaltChainedException { 533 Problem problem = new Problem(); 534 535 try { 536 final BufferedReader fp = new BufferedReader(isr); 537 int max_index = 0; 538 if (xlist == null) { 539 xlist = new ArrayList<FeatureNode>(); 540 } 541 problem.bias = getBias(); 542 problem.l = getNumberOfInstances(); 543 problem.x = new FeatureNode[problem.l][]; 544 problem.y = new int[problem.l]; 545 int i = 0; 546 final Pattern tabPattern = Pattern.compile("\t"); 547 final Pattern pipePattern = Pattern.compile("\\|"); 548 while(true) { 549 String line = fp.readLine(); 550 if(line == null) break; 551 String[] columns = tabPattern.split(line); 552 553 if (columns.length == 0) { 554 continue; 555 } 556 557 int offset = 1; 558 int j = 0; 559 try { 560 problem.y[i] = Integer.parseInt(columns[j]); 561 int p = 0; 562 for(j = 1; j < columns.length; j++) { 563 final String[] items = pipePattern.split(columns[j]); 564 for (int k = 0; k < items.length; k++) { 565 try { 566 if (Integer.parseInt(items[k]) != -1) { 567 xlist.add(p, new FeatureNode(Integer.parseInt(items[k])+offset, 1)); 568 p++; 569 } 570 } catch (NumberFormatException e) { 571 throw new LiblinearException("The instance file contain a non-integer value '"+items[k]+"'", e); 572 } 573 } 574 offset += cardinalities[j-1]; 575 } 576 problem.x[i] = xlist.subList(0, p).toArray(new FeatureNode[0]); 577 if(columns.length > 1) { 578 max_index = Math.max(max_index, problem.x[i][p-1].index); 579 } 580 i++; 581 xlist.clear(); 582 } catch (ArrayIndexOutOfBoundsException e) { 583 throw new LiblinearException("Cannot read from the instance file. ", e); 584 } 585 } 586 fp.close(); 587 problem.n = max_index; 588 if ( problem.bias >= 0 ) { 589 problem.n++; 590 } 591 xlist = null; 592 } catch (IOException e) { 593 throw new LiblinearException("Cannot read from the instance file. ", e); 594 } 595 return problem; 596 } 597 598 protected void initSpecialParameters() throws MaltChainedException { 599 if (getConfiguration().getOptionValue("singlemalt", "null_value") != null && getConfiguration().getOptionValue("singlemalt", "null_value").toString().equalsIgnoreCase("none")) { 600 excludeNullValues = true; 601 } else { 602 excludeNullValues = false; 603 } 604 saveInstanceFiles = ((Boolean)getConfiguration().getOptionValue("liblinear", "save_instance_files")).booleanValue(); 605 606 if (!getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().equals("")) { 607 try { 608 if (!new File(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString()).exists()) { 609 throw new LiblinearException("The path to the external Liblinear trainer 'svm-train' is wrong."); 610 } 611 if (new File(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString()).isDirectory()) { 612 throw new LiblinearException("The option --liblinear-liblinear_external points to a directory, the path should point at the 'train' file or the 'train.exe' file"); 613 } 614 if (!(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().endsWith("train") || getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().endsWith("train.exe"))) { 615 throw new LiblinearException("The option --liblinear-liblinear_external does not specify the path to 'train' file or the 'train.exe' file. "); 616 } 617 pathExternalLiblinearTrain = getConfiguration().getOptionValue("liblinear", "liblinear_external").toString(); 618 } catch (SecurityException e) { 619 throw new LiblinearException("Access denied to the file specified by the option --liblinear-liblinear_external. ", e); 620 } 621 } 622 if (getConfiguration().getOptionValue("liblinear", "verbosity") != null) { 623 verbosity = Verbostity.valueOf(getConfiguration().getOptionValue("liblinear", "verbosity").toString().toUpperCase()); 624 } 625 } 626 627 public String getLibLinearOptions() { 628 StringBuilder sb = new StringBuilder(); 629 for (String key : liblinearOptions.keySet()) { 630 sb.append('-'); 631 sb.append(key); 632 sb.append(' '); 633 sb.append(liblinearOptions.get(key)); 634 sb.append(' '); 635 } 636 return sb.toString(); 637 } 638 639 public void parseParameters(String paramstring) throws MaltChainedException { 640 if (paramstring == null) { 641 return; 642 } 643 final String[] argv; 644 String allowedFlags = "sceB"; 645 try { 646 argv = paramstring.split("[_\\p{Blank}]"); 647 } catch (PatternSyntaxException e) { 648 throw new LiblinearException("Could not split the liblinear-parameter string '"+paramstring+"'. ", e); 649 } 650 for (int i=0; i < argv.length-1; i++) { 651 if(argv[i].charAt(0) != '-') { 652 throw new LiblinearException("The argument flag should start with the following character '-', not with "+argv[i].charAt(0)); 653 } 654 if(++i>=argv.length) { 655 throw new LiblinearException("The last argument does not have any value. "); 656 } 657 try { 658 int index = allowedFlags.indexOf(argv[i-1].charAt(1)); 659 if (index != -1) { 660 liblinearOptions.put(Character.toString(argv[i-1].charAt(1)), argv[i]); 661 } else { 662 throw new LiblinearException("Unknown liblinear parameter: '"+argv[i-1]+"' with value '"+argv[i]+"'. "); 663 } 664 } catch (ArrayIndexOutOfBoundsException e) { 665 throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e); 666 } catch (NumberFormatException e) { 667 throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e); 668 } catch (NullPointerException e) { 669 throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e); 670 } 671 } 672 } 673 674 public double getBias() throws MaltChainedException { 675 try { 676 return Double.valueOf(liblinearOptions.get("B")).doubleValue(); 677 } catch (NumberFormatException e) { 678 throw new LiblinearException("The liblinear bias value is not numerical value. ", e); 679 } 680 } 681 682 public Parameter getLiblinearParameters() throws MaltChainedException { 683 Parameter param = new Parameter(SolverType.L2LOSS_SVM_DUAL, 1, 0.1); 684 String type = liblinearOptions.get("s"); 685 if (type.equals("0")) { 686 param.setSolverType(SolverType.L2_LR); 687 } else if (type.equals("1")) { 688 param.setSolverType(SolverType.L2LOSS_SVM_DUAL); 689 } else if (type.equals("2")) { 690 param.setSolverType(SolverType.L2LOSS_SVM); 691 } else if (type.equals("3")) { 692 param.setSolverType(SolverType.L1LOSS_SVM_DUAL); 693 } else if (type.equals("4")) { 694 param.setSolverType(SolverType.MCSVM_CS); 695 } else { 696 throw new LiblinearException("The liblinear type (-s) is not an integer value between 0 and 4. "); 697 } 698 try { 699 param.setC(Double.valueOf(liblinearOptions.get("c")).doubleValue()); 700 } catch (NumberFormatException e) { 701 throw new LiblinearException("The liblinear cost (-c) value is not numerical value. ", e); 702 } 703 try { 704 param.setEps(Double.valueOf(liblinearOptions.get("e")).doubleValue()); 705 } catch (NumberFormatException e) { 706 throw new LiblinearException("The liblinear epsilon (-e) value is not numerical value. ", e); 707 } 708 return param; 709 } 710 711 public void initLiblinearOptions() { 712 liblinearOptions.put("s", "1"); // type = SolverType.L2LOSS_SVM_DUAL (default) 713 liblinearOptions.put("c", "1"); // cost = 1 (default) 714 liblinearOptions.put("e", "0.1"); // epsilon = 0.1 (default) 715 liblinearOptions.put("B", "1"); // bias = 1 (default) 716 } 717 718 public String[] getLibLinearParamStringArray() { 719 final ArrayList<String> params = new ArrayList<String>(); 720 721 for (String key : liblinearOptions.keySet()) { 722 params.add("-"+key); params.add(liblinearOptions.get(key)); 723 } 724 return params.toArray(new String[params.size()]); 725 } 726 727 728 public void liblinear_predict_with_kbestlist(Model model, FeatureNode[] x, KBestList kBestList) throws MaltChainedException { 729 int i; 730 final int nr_class = model.getNrClass(); 731 final double[] dec_values = new double[nr_class]; 732 733 Linear.predictValues(model, x, dec_values); 734 final int[] labels = model.getLabels(); 735 int[] predictionList = new int[nr_class]; 736 for(i=0;i<nr_class;i++) { 737 predictionList[i] = labels[i]; 738 } 739 740 double tmpDec; 741 int tmpObj; 742 int lagest; 743 for (i=0;i<nr_class-1;i++) { 744 lagest = i; 745 for (int j=i;j<nr_class;j++) { 746 if (dec_values[j] > dec_values[lagest]) { 747 lagest = j; 748 } 749 } 750 tmpDec = dec_values[lagest]; 751 dec_values[lagest] = dec_values[i]; 752 dec_values[i] = tmpDec; 753 tmpObj = predictionList[lagest]; 754 predictionList[lagest] = predictionList[i]; 755 predictionList[i] = tmpObj; 756 } 757 758 int k = nr_class-1; 759 if (kBestList.getK() != -1) { 760 k = kBestList.getK() - 1; 761 } 762 763 for (i=0; i<nr_class && k >= 0; i++, k--) { 764 if (kBestList instanceof ScoredKBestList) { 765 ((ScoredKBestList)kBestList).add(predictionList[i], (float)dec_values[i]); 766 } else { 767 kBestList.add(predictionList[i]); 768 } 769 770 } 771 } 772 773 /** 774 * Converts the instance file (Malt's own SVM format) into the Liblinear (SVMLight) format. The input instance file is removed (replaced) 775 * by the instance file in the Liblinear (SVMLight) format. If a column contains -1, the value will be removed in destination file. 776 * 777 * @param isr the input stream reader for the source instance file 778 * @param osw the output stream writer for the destination instance file 779 * @param cardinalities a vector containing the number of distinct values for a particular column 780 * @throws LiblinearException 781 */ 782 public static void maltSVMFormat2OriginalSVMFormat(InputStreamReader isr, OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException { 783 try { 784 final BufferedReader in = new BufferedReader(isr); 785 final BufferedWriter out = new BufferedWriter(osw); 786 787 int c; 788 int j = 0; 789 int offset = 1; 790 int code = 0; 791 while(true) { 792 c = in.read(); 793 if (c == -1) { 794 break; 795 } 796 797 if (c == '\t' || c == '|') { 798 if (j == 0) { 799 out.write(Integer.toString(code)); 800 j++; 801 } else { 802 if (code != -1) { 803 out.write(' '); 804 out.write(Integer.toString(code+offset)); 805 out.write(":1"); 806 } 807 if (c == '\t') { 808 offset += cardinalities[j-1]; 809 j++; 810 } 811 } 812 code = 0; 813 } else if (c == '\n') { 814 j = 0; 815 offset = 1; 816 out.write('\n'); 817 code = 0; 818 } else if (c == '-') { 819 code = -1; 820 } else if (code != -1) { 821 if (c > 47 && c < 58) { 822 code = code * 10 + (c-48); 823 } else { 824 throw new LiblinearException("The instance file contain a non-integer value, when converting the Malt SVM format into Liblinear format."); 825 } 826 } 827 } 828 in.close(); 829 out.close(); 830 } catch (IOException e) { 831 throw new LiblinearException("Cannot read from the instance file, when converting the Malt SVM format into Liblinear format. ", e); 832 } 833 } 834 835 protected void finalize() throws Throwable { 836 try { 837 closeInstanceWriter(); 838 } finally { 839 super.finalize(); 840 } 841 } 842 843 /* (non-Javadoc) 844 * @see java.lang.Object#toString() 845 */ 846 public String toString() { 847 final StringBuffer sb = new StringBuffer(); 848 sb.append("\nLiblinear INTERFACE\n"); 849 sb.append(" Liblinear version: "+LIBLINEAR_VERSION+"\n"); 850 sb.append(" Liblinear string: "+paramString+"\n"); 851 852 sb.append(getLibLinearOptions()); 853 return sb.toString(); 854 } 855 }