001    package org.maltparser.ml.liblinear;
002    
003    import java.io.BufferedReader;
004    import java.io.BufferedWriter;
005    import java.io.File;
006    import java.io.FileNotFoundException;
007    import java.io.IOException;
008    import java.io.InputStream;
009    import java.io.InputStreamReader;
010    import java.io.OutputStreamWriter;
011    import java.io.PrintStream;
012    import java.util.ArrayList;
013    import java.util.LinkedHashMap;
014    import java.util.Set;
015    import java.util.regex.Pattern;
016    import java.util.regex.PatternSyntaxException;
017    
018    import liblinear.FeatureNode;
019    import liblinear.Linear;
020    import liblinear.Model;
021    import liblinear.Parameter;
022    import liblinear.Problem;
023    import liblinear.SolverType;
024    
025    import org.maltparser.core.exception.MaltChainedException;
026    import org.maltparser.core.feature.FeatureVector;
027    import org.maltparser.core.feature.function.FeatureFunction;
028    import org.maltparser.core.feature.value.FeatureValue;
029    import org.maltparser.core.feature.value.MultipleFeatureValue;
030    import org.maltparser.core.feature.value.SingleFeatureValue;
031    import org.maltparser.core.helper.NoPrintStream;
032    import org.maltparser.core.syntaxgraph.DependencyStructure;
033    import org.maltparser.ml.LearningMethod;
034    import org.maltparser.parser.DependencyParserConfig;
035    import org.maltparser.parser.guide.instance.InstanceModel;
036    import org.maltparser.parser.history.action.SingleDecision;
037    import org.maltparser.parser.history.kbest.KBestList;
038    import org.maltparser.parser.history.kbest.ScoredKBestList;
039    
040    
041    public class Liblinear implements LearningMethod {
042            public final static String LIBLINEAR_VERSION = "1.33";
043            public enum Verbostity {
044                    SILENT, ERROR, ALL
045            }
046            private LinkedHashMap<String, String> liblinearOptions;
047            
048            protected InstanceModel owner;
049            protected int learnerMode;
050            protected String name;
051            protected int numberOfInstances;
052            protected boolean saveInstanceFiles;
053            protected boolean excludeNullValues;
054            protected String pathExternalLiblinearTrain = null;
055            private int[] cardinalities;
056            /**
057             * Instance output stream writer 
058             */
059            private BufferedWriter instanceOutput = null; 
060            /**
061             * Liblinear model object, only used during classification.
062             */
063            private Model model = null;
064            
065            /**
066             * Parameter string
067             */
068            private String paramString;
069    
070            private ArrayList<FeatureNode> xlist = null;
071    
072            private Verbostity verbosity;
073            /**
074             * Constructs a Liblinear learner.
075             * 
076             * @param owner the guide model owner
077             * @param learnerMode the mode of the learner TRAIN or CLASSIFY
078             */
079            public Liblinear(InstanceModel owner, Integer learnerMode) throws MaltChainedException {
080                    setOwner(owner);
081                    setLearningMethodName("liblinear");
082                    setLearnerMode(learnerMode.intValue());
083                    setNumberOfInstances(0);
084                    verbosity = Verbostity.SILENT;
085    
086                    liblinearOptions = new LinkedHashMap<String, String>();
087                    initLiblinearOptions();
088                    parseParameters(getConfiguration().getOptionValue("liblinear", "liblinear_options").toString());
089                    initSpecialParameters();
090                    if (learnerMode == BATCH) {
091    //                      if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) {
092    //                              if (pathExternalLiblinearTrain != null) {
093    //                                      owner.getGuide().getConfiguration().getConfigLogger().info("  Learner              : Liblinear external "+ getLibLinearOptions() + "\n");
094    //                              } else {
095    //                                      owner.getGuide().getConfiguration().getConfigLogger().info("  Learner              : Liblinear "+LIBLINEAR_VERSION+" "+ getLibLinearOptions() + "\n");
096    //                              }
097    //                      }
098                            instanceOutput = new BufferedWriter(getInstanceOutputStreamWriter(".ins"));
099                    } 
100    //              else {
101    //                      if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) {
102    //                              owner.getGuide().getConfiguration().getConfigLogger().info("  Classifier           : Liblinear "+LIBLINEAR_VERSION+" "+ getLibLinearOptions()+ "\n");
103    //                      }
104    //              }
105            }
106            
107            
108            public void addInstance(SingleDecision decision, FeatureVector featureVector) throws MaltChainedException {
109                    if (featureVector == null) {
110                            throw new LiblinearException("The feature vector cannot be found");
111                    } else if (decision == null) {
112                            throw new LiblinearException("The decision cannot be found");
113                    }       
114                    try {
115                            instanceOutput.write(decision.getDecisionCode()+"\t");
116                            for (int i = 0; i < featureVector.size(); i++) {
117                                    FeatureValue featureValue = featureVector.get(i).getFeatureValue();
118                                    if (excludeNullValues == true && featureValue.isNullValue()) {
119                                            instanceOutput.write("-1");
120                                    } else {
121                                            if (featureValue instanceof SingleFeatureValue) {
122                                                    instanceOutput.write(((SingleFeatureValue)featureValue).getCode()+"");
123                                            } else if (featureValue instanceof MultipleFeatureValue) {
124                                                    Set<Integer> values = ((MultipleFeatureValue)featureValue).getCodes();
125                                                    int j=0;
126                                                    for (Integer value : values) {
127                                                            instanceOutput.write(value.toString());
128                                                            if (j != values.size()-1) {
129                                                                    instanceOutput.write("|");
130                                                            }
131                                                            j++;
132                                                    }
133                                            }
134                                    }
135                                    if (i != featureVector.size()) {
136                                            instanceOutput.write('\t');
137                                    }
138                            }
139    
140                            instanceOutput.write('\n');
141                            instanceOutput.flush();
142                            increaseNumberOfInstances();
143                    } catch (IOException e) {
144                            throw new LiblinearException("The Liblinear learner cannot write to the instance file. ", e);
145                    }
146            }
147            
148            public void finalizeSentence(DependencyStructure dependencyGraph) throws MaltChainedException { }
149            
150            /* (non-Javadoc)
151             * @see org.maltparser.ml.LearningMethod#noMoreInstances()
152             */
153            public void noMoreInstances() throws MaltChainedException {
154                    closeInstanceWriter();
155            }
156    
157    
158            /* (non-Javadoc)
159             * @see org.maltparser.ml.LearningMethod#train(org.maltparser.parser.guide.feature.FeatureVector)
160             */
161            public void train(FeatureVector featureVector) throws MaltChainedException {
162                    if (featureVector == null) {
163                            throw new LiblinearException("The feature vector cannot be found. ");
164                    } else if (owner == null) {
165                            throw new LiblinearException("The parent guide model cannot be found. ");
166                    }
167                    cardinalities = getCardinalities(featureVector);
168                    if (pathExternalLiblinearTrain == null) {
169                            try {
170                                    final Problem problem = readLibLinearProblem(getInstanceInputStreamReader(".ins"), cardinalities);
171                                    if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) {
172                                            owner.getGuide().getConfiguration().getConfigLogger().info("Creating Liblinear model "+getFile(".mod").getName()+"\n");
173                                    }
174                                    final PrintStream out = System.out;
175                                    final PrintStream err = System.err;
176                                    System.setOut(NoPrintStream.NO_PRINTSTREAM);
177                                    System.setErr(NoPrintStream.NO_PRINTSTREAM);
178                            Linear.saveModel(new File(getFile(".mod").getAbsolutePath()), Linear.train(problem, getLiblinearParameters()));
179                                    
180                                    System.setOut(err);
181                                    System.setOut(out);
182                                    if (!saveInstanceFiles) {
183                                            getFile(".ins").delete();
184                                    }
185                            } catch (OutOfMemoryError e) {
186                                    throw new LiblinearException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
187                            } catch (IllegalArgumentException e) {
188                                    throw new LiblinearException("The Liblinear learner was not able to redirect Standard Error stream. ", e);
189                            } catch (SecurityException e) {
190                                    throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e);
191                            } catch (IOException e) {
192                                    throw new LiblinearException("The Liblinear learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
193                            }
194                    } else {
195                            trainExternal(featureVector);
196                    }
197                    saveCardinalities(getInstanceOutputStreamWriter(".car"), cardinalities);
198            }
199            
200            private void trainExternal(FeatureVector featureVector) throws MaltChainedException {
201                    try {           
202                            maltSVMFormat2OriginalSVMFormat(getInstanceInputStreamReader(".ins"), getInstanceOutputStreamWriter(".ins.tmp"), cardinalities);
203                            owner.getGuide().getConfiguration().getConfigLogger().info("Creating Liblinear model (external) "+getFile(".mod").getName());
204    
205                            final String[] params = getLibLinearParamStringArray();
206                            String[] arrayCommands = new String[params.length+3];
207                            int i = 0;
208                            arrayCommands[i++] = pathExternalLiblinearTrain;
209                            for (; i <= params.length; i++) {
210                                    arrayCommands[i] = params[i-1];
211                            }
212                            arrayCommands[i++] = getFile(".ins.tmp").getAbsolutePath();
213                            arrayCommands[i++] = getFile(".mod").getAbsolutePath();
214                            
215                    if (verbosity == Verbostity.ALL) {
216                            owner.getGuide().getConfiguration().getConfigLogger().info('\n');
217                    }
218                            final Process child = Runtime.getRuntime().exec(arrayCommands);
219                    final InputStream in = child.getInputStream();
220                    final InputStream err = child.getErrorStream();
221                    int c;
222                    while ((c = in.read()) != -1){
223                            if (verbosity == Verbostity.ALL) {
224                                    owner.getGuide().getConfiguration().getConfigLogger().info((char)c);
225                            }
226                    }
227                    while ((c = err.read()) != -1){
228                            if (verbosity == Verbostity.ALL || verbosity == Verbostity.ERROR) {
229                                    owner.getGuide().getConfiguration().getConfigLogger().info((char)c);
230                            }
231                    }
232                if (child.waitFor() != 0) {
233                    owner.getGuide().getConfiguration().getConfigLogger().info(" FAILED ("+child.exitValue()+")");
234                }
235                    in.close();
236                    err.close();
237                    if (!saveInstanceFiles) {
238                                    getFile(".ins").delete();
239                                    getFile(".ins.tmp").delete();
240                    }
241                    owner.getGuide().getConfiguration().getConfigLogger().info('\n');
242                    } catch (InterruptedException e) {
243                             throw new LiblinearException("Liblinear is interrupted. ", e);
244                    } catch (IllegalArgumentException e) {
245                            throw new LiblinearException("The Liblinear learner was not able to redirect Standard Error stream. ", e);
246                    } catch (SecurityException e) {
247                            throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e);
248                    } catch (IOException e) {
249                            throw new LiblinearException("The Liblinear learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
250                    } catch (OutOfMemoryError e) {
251                            throw new LiblinearException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
252                    }
253            }
254            
255            private int[] getCardinalities(FeatureVector featureVector) {
256                    int[] cardinalities = new int[featureVector.size()];
257                    int i = 0;
258                    for (FeatureFunction feature : featureVector) {
259                            cardinalities[i++] = feature.getFeatureValue().getCardinality();
260                    }
261                    return cardinalities;
262            }
263            
264            private void saveCardinalities(OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException {
265                    final BufferedWriter out = new BufferedWriter(osw);
266                    try {
267                            for (int i = 0, n = cardinalities.length; i < n; i++) {
268                                    out.write(Integer.toString(cardinalities[i]));
269                                    if (i < n - 1) {
270                                            out.write(',');
271                                    }
272                            }
273                            out.write('\n');
274                            out.close();
275                    } catch (IOException e) {
276                            throw new LiblinearException("", e);
277                    }
278            }
279            
280            private int[] loadCardinalities(InputStreamReader isr) throws MaltChainedException {
281                    int[] cardinalities = null;
282                    try {
283                            final BufferedReader in = new BufferedReader(isr); 
284                            String line;
285                            if ((line = in.readLine()) != null) {
286                                    String[] items = line.split(",");
287                                    cardinalities = new int[items.length];
288                                    for (int i = 0; i < items.length; i++) {
289                                            cardinalities[i] = Integer.parseInt(items[i]);
290                                    }
291                            }
292                            in.close();
293                    } catch (IOException e) {
294                            throw new LiblinearException("", e);
295                    } catch (NumberFormatException e) {
296                            throw new LiblinearException("", e);
297                    }
298                    return cardinalities;
299            }
300            
301            /* (non-Javadoc)
302             * @see org.maltparser.ml.LearningMethod#moveAllInstances(org.maltparser.ml.LearningMethod, org.maltparser.core.feature.function.FeatureFunction, java.util.ArrayList)
303             */
304            public void moveAllInstances(LearningMethod method, FeatureFunction divideFeature, ArrayList<Integer> divideFeatureIndexVector) throws MaltChainedException {
305                    if (method == null) {
306                            throw new LiblinearException("The learning method cannot be found. ");
307                    } else if (divideFeature == null) {
308                            throw new LiblinearException("The divide feature cannot be found. ");
309                    } 
310                    try {
311                            final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins"));
312                            final BufferedWriter out = method.getInstanceWriter();
313                            final StringBuilder sb = new StringBuilder(6);
314                            int l = in.read();
315                            char c;
316                            int j = 0;
317            
318                            while(true) {
319                                    if (l == -1) {
320                                            sb.setLength(0);
321                                            break;
322                                    }
323                                    
324                                    c = (char)l; 
325                                    l = in.read();
326                                    if (c == '\t') {
327                                            if (divideFeatureIndexVector.contains(j-1)) {
328                                                    out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode()));
329                                                    out.write('\t');
330                                            }
331                                            out.write(sb.toString());
332                                            j++;
333                                            out.write('\t');
334                                            sb.setLength(0);
335                                    } else if (c == '\n') {
336                                            out.write(sb.toString());
337                                            if (divideFeatureIndexVector.contains(j-1)) {
338                                                    out.write('\t');
339                                                    out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode()));
340                                            }
341                                            out.write('\n');
342                                            sb.setLength(0);
343                                            method.increaseNumberOfInstances();
344                                            this.decreaseNumberOfInstances();
345                                            j = 0;
346                                    } else {
347                                            sb.append(c);
348                                    }
349                            }       
350                            in.close();
351                            getFile(".ins").delete();
352                    } catch (SecurityException e) {
353                            throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e);
354                    } catch (NullPointerException  e) {
355                            throw new LiblinearException("The instance file cannot be found. ", e);
356                    } catch (FileNotFoundException e) {
357                            throw new LiblinearException("The instance file cannot be found. ", e);
358                    } catch (IOException e) {
359                            throw new LiblinearException("The Liblinear learner read from the instance file. ", e);
360                    }
361            }
362            
363            /* (non-Javadoc)
364             * @see org.maltparser.ml.LearningMethod#predict(org.maltparser.parser.guide.feature.FeatureVector, org.maltparser.ml.KBestList)
365             */
366            public boolean predict(FeatureVector featureVector, SingleDecision decision) throws MaltChainedException {
367                    if (model == null) {
368                            File modelFile = getFile(".mod");
369                            try {
370                                    model = Linear.loadModel(new File(modelFile.getAbsolutePath()));
371                            } catch (IOException e) {
372                                    throw new LiblinearException("The file '"+modelFile.getAbsolutePath()+"' cannot be loaded. ", e);
373                            }
374                    }
375    
376                    if (cardinalities == null) {
377                            if (getFile(".car").exists()) {
378                                    cardinalities = loadCardinalities(getInstanceInputStreamReader(".car"));
379                            } else {
380                                    cardinalities = getCardinalities(featureVector);
381                            }
382                    }
383                    if (xlist == null) {
384                            xlist = new ArrayList<FeatureNode>(featureVector.size()); 
385                    }
386                    if (model == null) { 
387                            throw new LiblinearException("The Liblinear learner cannot predict the next class, because the learning model cannot be found. ");
388                    } else if (featureVector == null) {
389                            throw new LiblinearException("The Liblinear learner cannot predict the next class, because the feature vector cannot be found. ");
390                    }
391                    int j = 0;
392                    int offset = 1;
393                    int i = 0;
394                    for (FeatureFunction feature : featureVector) {
395                            final FeatureValue featureValue = feature.getFeatureValue();
396                            if (!(excludeNullValues == true && featureValue.isNullValue())) {
397                                    if (featureValue instanceof SingleFeatureValue) {
398                                            if (((SingleFeatureValue)featureValue).getCode() < cardinalities[i]) {
399                                                    xlist.add(j++, new FeatureNode(((SingleFeatureValue)featureValue).getCode() + offset, 1));
400                                            }
401                                    } else if (featureValue instanceof MultipleFeatureValue) {
402                                            for (Integer value : ((MultipleFeatureValue)featureValue).getCodes()) {
403                                                    if (value < cardinalities[i]) {
404                                                            xlist.add(j++, new FeatureNode(value + offset, 1));
405                                                    }
406                                            }
407                                    }
408                            }
409                            offset += cardinalities[i];
410                            i++;
411                    }
412                    
413                    FeatureNode[] xarray = new FeatureNode[j];
414                    for (int k = 0; k < j; k++) {
415                            xarray[k] = xlist.get(k);
416                    }
417    
418                    if (decision.getKBestList().getK() == 1) {
419                            decision.getKBestList().add(Linear.predict(model, xarray));
420                    } else {
421                            liblinear_predict_with_kbestlist(model, xarray, decision.getKBestList());
422                    }
423    
424                    return true;
425            }
426            
427    
428            public void terminate() throws MaltChainedException { 
429                    closeInstanceWriter();
430                    model = null;
431                    xlist = null;
432                    owner = null;
433            }
434    
435            public BufferedWriter getInstanceWriter() {
436                    return instanceOutput;
437            }
438            
439            protected void closeInstanceWriter() throws MaltChainedException {
440                    try {
441                            if (instanceOutput != null) {
442                                    instanceOutput.flush();
443                                    instanceOutput.close();
444                                    instanceOutput = null;
445                            }
446                    } catch (IOException e) {
447                            throw new LiblinearException("The Liblinear learner cannot close the instance file. ", e);
448                    }
449            }
450            
451            
452            /**
453             * Returns the parameter string for used for configure Liblinear
454             * 
455             * @return the parameter string for used for configure Liblinear
456             */
457            public String getParamString() {
458                    return paramString;
459            }
460            
461            public InstanceModel getOwner() {
462                    return owner;
463            }
464    
465            protected void setOwner(InstanceModel owner) {
466                    this.owner = owner;
467            }
468            
469            public int getLearnerMode() {
470                    return learnerMode;
471            }
472    
473            public void setLearnerMode(int learnerMode) throws MaltChainedException {
474                    this.learnerMode = learnerMode;
475            }
476            
477            public String getLearningMethodName() {
478                    return name;
479            }
480            
481            /**
482             * Returns the current configuration
483             * 
484             * @return the current configuration
485             * @throws MaltChainedException
486             */
487            public DependencyParserConfig getConfiguration() throws MaltChainedException {
488                    return owner.getGuide().getConfiguration();
489            }
490            
491            public int getNumberOfInstances() {
492                    return numberOfInstances;
493            }
494    
495            public void increaseNumberOfInstances() {
496                    numberOfInstances++;
497                    owner.increaseFrequency();
498            }
499            
500            public void decreaseNumberOfInstances() {
501                    numberOfInstances--;
502                    owner.decreaseFrequency();
503            }
504            
505            protected void setNumberOfInstances(int numberOfInstances) {
506                    this.numberOfInstances = 0;
507            }
508    
509            protected void setLearningMethodName(String name) {
510                    this.name = name;
511            }
512            
513            protected OutputStreamWriter getInstanceOutputStreamWriter(String suffix) throws MaltChainedException {
514                    return getConfiguration().getConfigurationDir().getOutputStreamWriter(owner.getModelName()+getLearningMethodName()+suffix);
515            }
516            
517            protected InputStreamReader getInstanceInputStreamReader(String suffix) throws MaltChainedException {
518                    return getConfiguration().getConfigurationDir().getInputStreamReader(owner.getModelName()+getLearningMethodName()+suffix);
519            }
520            
521            protected File getFile(String suffix) throws MaltChainedException {
522                    return getConfiguration().getConfigurationDir().getFile(owner.getModelName()+getLearningMethodName()+suffix);
523            }
524            
525            /**
526             * Reads an instance file into a svm_problem object according to the Malt-SVM format, which is column fixed format (tab-separated).
527             * 
528             * @param isr   the instance stream reader for the instance file
529             * @param cardinalities a array containing the number of distinct values for a particular column.
530             * @throws LiblinearException
531             */
532            public Problem readLibLinearProblem(InputStreamReader isr, int[] cardinalities) throws MaltChainedException {
533                    Problem problem = new Problem();
534    
535                    try {
536                            final BufferedReader fp = new BufferedReader(isr);
537                            int max_index = 0;
538                            if (xlist == null) {
539                                    xlist = new ArrayList<FeatureNode>(); 
540                            }
541                            problem.bias = getBias();
542                            problem.l = getNumberOfInstances();
543                            problem.x = new FeatureNode[problem.l][];
544                            problem.y = new int[problem.l];
545                            int i = 0;
546                            final Pattern tabPattern = Pattern.compile("\t");
547                            final Pattern pipePattern = Pattern.compile("\\|");
548                            while(true) {
549                                    String line = fp.readLine();
550                                    if(line == null) break;
551                                    String[] columns = tabPattern.split(line);
552    
553                                    if (columns.length == 0) {
554                                            continue;
555                                    }
556                                    
557                                    int offset = 1; 
558                                    int j = 0;
559                                    try {
560                                            problem.y[i] = Integer.parseInt(columns[j]);
561                                            int p = 0;
562                                            for(j = 1; j < columns.length; j++) {
563                                                    final String[] items = pipePattern.split(columns[j]);   
564                                                    for (int k = 0; k < items.length; k++) {
565                                                            try {
566                                                                    if (Integer.parseInt(items[k]) != -1) {
567                                                                            xlist.add(p, new FeatureNode(Integer.parseInt(items[k])+offset, 1));
568                                                                            p++;
569                                                                    }
570                                                            } catch (NumberFormatException e) {
571                                                                    throw new LiblinearException("The instance file contain a non-integer value '"+items[k]+"'", e);
572                                                            }
573                                                    }
574                                                    offset += cardinalities[j-1];
575                                            }
576                                            problem.x[i] = xlist.subList(0, p).toArray(new FeatureNode[0]);
577                                            if(columns.length > 1) {
578                                                    max_index = Math.max(max_index, problem.x[i][p-1].index);
579                                            }
580                                            i++;
581                                            xlist.clear();
582                                    } catch (ArrayIndexOutOfBoundsException e) {
583                                            throw new LiblinearException("Cannot read from the instance file. ", e);
584                                    }
585                            }
586                            fp.close();     
587                            problem.n = max_index;
588                            if ( problem.bias >= 0 ) {
589                                    problem.n++;
590                            }
591                            xlist = null;
592                    } catch (IOException e) {
593                            throw new LiblinearException("Cannot read from the instance file. ", e);
594                    }
595                    return problem;
596            }
597            
598            protected void initSpecialParameters() throws MaltChainedException {
599                    if (getConfiguration().getOptionValue("singlemalt", "null_value") != null && getConfiguration().getOptionValue("singlemalt", "null_value").toString().equalsIgnoreCase("none")) {
600                            excludeNullValues = true;
601                    } else {
602                            excludeNullValues = false;
603                    }
604                    saveInstanceFiles = ((Boolean)getConfiguration().getOptionValue("liblinear", "save_instance_files")).booleanValue();
605                            
606                    if (!getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().equals("")) {
607                            try {
608                                    if (!new File(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString()).exists()) {
609                                            throw new LiblinearException("The path to the external Liblinear trainer 'svm-train' is wrong.");
610                                    }
611                                    if (new File(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString()).isDirectory()) {
612                                            throw new LiblinearException("The option --liblinear-liblinear_external points to a directory, the path should point at the 'train' file or the 'train.exe' file");
613                                    }
614                                    if (!(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().endsWith("train") || getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().endsWith("train.exe"))) {
615                                            throw new LiblinearException("The option --liblinear-liblinear_external does not specify the path to 'train' file or the 'train.exe' file. ");
616                                    }
617                                    pathExternalLiblinearTrain = getConfiguration().getOptionValue("liblinear", "liblinear_external").toString();
618                            } catch (SecurityException e) {
619                                    throw new LiblinearException("Access denied to the file specified by the option --liblinear-liblinear_external. ", e);
620                            }
621                    }
622                    if (getConfiguration().getOptionValue("liblinear", "verbosity") != null) {
623                            verbosity = Verbostity.valueOf(getConfiguration().getOptionValue("liblinear", "verbosity").toString().toUpperCase());
624                    }
625            }
626            
627            public String getLibLinearOptions() {
628                    StringBuilder sb = new StringBuilder();
629                    for (String key : liblinearOptions.keySet()) {
630                            sb.append('-');
631                            sb.append(key);
632                            sb.append(' ');
633                            sb.append(liblinearOptions.get(key));
634                            sb.append(' ');
635                    }
636                    return sb.toString();
637            }
638            
639            public void parseParameters(String paramstring) throws MaltChainedException {
640                    if (paramstring == null) {
641                            return;
642                    }
643                    final String[] argv;
644                    String allowedFlags = "sceB";
645                    try {
646                            argv = paramstring.split("[_\\p{Blank}]");
647                    } catch (PatternSyntaxException e) {
648                            throw new LiblinearException("Could not split the liblinear-parameter string '"+paramstring+"'. ", e);
649                    }
650                    for (int i=0; i < argv.length-1; i++) {
651                            if(argv[i].charAt(0) != '-') {
652                                    throw new LiblinearException("The argument flag should start with the following character '-', not with "+argv[i].charAt(0));
653                            }
654                            if(++i>=argv.length) {
655                                    throw new LiblinearException("The last argument does not have any value. ");
656                            }
657                            try {
658                                    int index = allowedFlags.indexOf(argv[i-1].charAt(1));
659                                    if (index != -1) {
660                                            liblinearOptions.put(Character.toString(argv[i-1].charAt(1)), argv[i]);
661                                    } else {
662                                            throw new LiblinearException("Unknown liblinear parameter: '"+argv[i-1]+"' with value '"+argv[i]+"'. ");                
663                                    }
664                            } catch (ArrayIndexOutOfBoundsException e) {
665                                    throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);
666                            } catch (NumberFormatException e) {
667                                    throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);     
668                            } catch (NullPointerException e) {
669                                    throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);     
670                            }
671                    }
672            }
673            
674            public double getBias() throws MaltChainedException {
675                    try {
676                            return Double.valueOf(liblinearOptions.get("B")).doubleValue();
677                    } catch (NumberFormatException e) {
678                            throw new LiblinearException("The liblinear bias value is not numerical value. ", e);
679                    }
680            }
681    
682            public Parameter getLiblinearParameters() throws MaltChainedException {
683                    Parameter param = new Parameter(SolverType.L2LOSS_SVM_DUAL, 1, 0.1);
684                    String type = liblinearOptions.get("s");
685                    if (type.equals("0")) {
686                            param.setSolverType(SolverType.L2_LR);
687                    } else if (type.equals("1")) {
688                            param.setSolverType(SolverType.L2LOSS_SVM_DUAL);
689                    } else if (type.equals("2")) {
690                            param.setSolverType(SolverType.L2LOSS_SVM);
691                    } else if (type.equals("3")) {
692                            param.setSolverType(SolverType.L1LOSS_SVM_DUAL);
693                    } else if (type.equals("4")) {
694                            param.setSolverType(SolverType.MCSVM_CS);
695                    } else {
696                            throw new LiblinearException("The liblinear type (-s) is not an integer value between 0 and 4. ");
697                    }
698                    try {
699                            param.setC(Double.valueOf(liblinearOptions.get("c")).doubleValue());
700                    } catch (NumberFormatException e) {
701                            throw new LiblinearException("The liblinear cost (-c) value is not numerical value. ", e);
702                    }
703                    try {
704                            param.setEps(Double.valueOf(liblinearOptions.get("e")).doubleValue());
705                    } catch (NumberFormatException e) {
706                            throw new LiblinearException("The liblinear epsilon (-e) value is not numerical value. ", e);
707                    }
708                    return param;
709            }
710    
711            public void initLiblinearOptions() {
712                    liblinearOptions.put("s", "1"); // type = SolverType.L2LOSS_SVM_DUAL (default)
713                    liblinearOptions.put("c", "1"); // cost = 1 (default)
714                    liblinearOptions.put("e", "0.1"); // epsilon = 0.1 (default)
715                    liblinearOptions.put("B", "1"); // bias = 1 (default)
716            }
717    
718            public String[] getLibLinearParamStringArray() {
719                    final ArrayList<String> params = new ArrayList<String>();
720    
721                    for (String key : liblinearOptions.keySet()) {
722                            params.add("-"+key); params.add(liblinearOptions.get(key));
723                    }
724                    return params.toArray(new String[params.size()]);
725            }
726            
727            
728            public void liblinear_predict_with_kbestlist(Model model, FeatureNode[] x, KBestList kBestList) throws MaltChainedException {
729                    int i;
730                    final int nr_class = model.getNrClass();
731                    final double[] dec_values = new double[nr_class];
732    
733                    Linear.predictValues(model, x, dec_values);
734                    final int[] labels = model.getLabels();
735                    int[] predictionList = new int[nr_class];
736                    for(i=0;i<nr_class;i++) {
737                            predictionList[i] = labels[i];
738                    }
739    
740                    double tmpDec;
741                    int tmpObj;
742                    int lagest;
743                    for (i=0;i<nr_class-1;i++) {
744                            lagest = i;
745                            for (int j=i;j<nr_class;j++) {
746                                    if (dec_values[j] > dec_values[lagest]) {
747                                            lagest = j;
748                                    }
749                            }
750                            tmpDec = dec_values[lagest];
751                            dec_values[lagest] = dec_values[i];
752                            dec_values[i] = tmpDec;
753                            tmpObj = predictionList[lagest];
754                            predictionList[lagest] = predictionList[i];
755                            predictionList[i] = tmpObj;
756                    }
757                    
758                    int k = nr_class-1;
759                    if (kBestList.getK() != -1) {
760                            k = kBestList.getK() - 1;
761                    }
762                    
763                    for (i=0; i<nr_class && k >= 0; i++, k--) {
764                            if (kBestList instanceof ScoredKBestList) {
765                                    ((ScoredKBestList)kBestList).add(predictionList[i], (float)dec_values[i]);
766                            } else {
767                                    kBestList.add(predictionList[i]);
768                            }
769    
770                    }
771            }
772            
773            /**
774             * Converts the instance file (Malt's own SVM format) into the Liblinear (SVMLight) format. The input instance file is removed (replaced)
775             * by the instance file in the Liblinear (SVMLight) format. If a column contains -1, the value will be removed in destination file. 
776             * 
777             * @param isr the input stream reader for the source instance file
778             * @param osw   the output stream writer for the destination instance file
779             * @param cardinalities a vector containing the number of distinct values for a particular column
780             * @throws LiblinearException
781             */
782            public static void maltSVMFormat2OriginalSVMFormat(InputStreamReader isr, OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException {
783                    try {
784                            final BufferedReader in = new BufferedReader(isr);
785                            final BufferedWriter out = new BufferedWriter(osw);
786    
787                            int c;
788                            int j = 0;
789                            int offset = 1;
790                            int code = 0;
791                            while(true) {
792                                    c = in.read();
793                                    if (c == -1) {
794                                            break;
795                                    }
796                                    
797                                    if (c == '\t' || c == '|') {
798                                            if (j == 0) {
799                                                    out.write(Integer.toString(code));
800                                                    j++;
801                                            } else {
802                                                    if (code != -1) {
803                                                            out.write(' ');
804                                                            out.write(Integer.toString(code+offset));
805                                                            out.write(":1");
806                                                    }
807                                                    if (c == '\t') {
808                                                            offset += cardinalities[j-1];
809                                                            j++;
810                                                    }
811                                            }
812                                            code = 0;
813                                    } else if (c == '\n') {
814                                            j = 0;
815                                            offset = 1;
816                                            out.write('\n');
817                                            code = 0;
818                                    } else if (c == '-') {
819                                            code = -1;
820                                    } else if (code != -1) {
821                                            if (c > 47 && c < 58) {
822                                                    code = code * 10 + (c-48);
823                                            } else {
824                                                    throw new LiblinearException("The instance file contain a non-integer value, when converting the Malt SVM format into Liblinear format.");
825                                            }
826                                    }       
827                            }                       
828                            in.close();     
829                            out.close();
830                    } catch (IOException e) {
831                            throw new LiblinearException("Cannot read from the instance file, when converting the Malt SVM format into Liblinear format. ", e);
832                    }
833            }
834            
835            protected void finalize() throws Throwable {
836                    try {
837                            closeInstanceWriter();
838                    } finally {
839                            super.finalize();
840                    }
841            }
842            
843            /* (non-Javadoc)
844             * @see java.lang.Object#toString()
845             */
846            public String toString() {
847                    final StringBuffer sb = new StringBuffer();
848                    sb.append("\nLiblinear INTERFACE\n");
849                    sb.append("  Liblinear version: "+LIBLINEAR_VERSION+"\n");
850                    sb.append("  Liblinear string: "+paramString+"\n");
851                    
852                    sb.append(getLibLinearOptions());
853                    return sb.toString();
854            }
855    }