001    package org.maltparser.ml.liblinear;
002    
003    import java.io.BufferedReader;
004    import java.io.BufferedWriter;
005    import java.io.File;
006    import java.io.FileNotFoundException;
007    import java.io.IOException;
008    import java.io.InputStream;
009    import java.io.InputStreamReader;
010    import java.io.OutputStreamWriter;
011    import java.io.PrintStream;
012    import java.util.ArrayList;
013    import java.util.LinkedHashMap;
014    import java.util.Set;
015    import java.util.regex.Pattern;
016    import java.util.regex.PatternSyntaxException;
017    
018    import liblinear.FeatureNode;
019    import liblinear.Linear;
020    import liblinear.Model;
021    import liblinear.Parameter;
022    import liblinear.Problem;
023    import liblinear.SolverType;
024    
025    import org.maltparser.core.exception.MaltChainedException;
026    import org.maltparser.core.feature.FeatureVector;
027    import org.maltparser.core.feature.function.FeatureFunction;
028    import org.maltparser.core.feature.value.FeatureValue;
029    import org.maltparser.core.feature.value.MultipleFeatureValue;
030    import org.maltparser.core.feature.value.SingleFeatureValue;
031    import org.maltparser.core.helper.NoPrintStream;
032    import org.maltparser.core.syntaxgraph.DependencyStructure;
033    import org.maltparser.ml.LearningMethod;
034    import org.maltparser.parser.DependencyParserConfig;
035    import org.maltparser.parser.guide.instance.InstanceModel;
036    import org.maltparser.parser.history.action.SingleDecision;
037    import org.maltparser.parser.history.kbest.KBestList;
038    import org.maltparser.parser.history.kbest.ScoredKBestList;
039    
040    
041    public class Liblinear implements LearningMethod {
042            public final static String LIBLINEAR_VERSION = "1.33";
043            public enum Verbostity {
044                    SILENT, ERROR, ALL
045            }
046            private LinkedHashMap<String, String> liblinearOptions;
047            
048            protected InstanceModel owner;
049            protected int learnerMode;
050            protected String name;
051            protected int numberOfInstances;
052            protected boolean saveInstanceFiles;
053            protected boolean excludeNullValues;
054            protected String pathExternalLiblinearTrain = null;
055            private int[] cardinalities;
056            /**
057             * Instance output stream writer 
058             */
059            private BufferedWriter instanceOutput = null; 
060            /**
061             * Liblinear model object, only used during classification.
062             */
063            private Model model = null;
064            
065            /**
066             * Parameter string
067             */
068            private String paramString;
069    
070            private ArrayList<FeatureNode> xlist = null;
071    
072            private Verbostity verbosity;
073            /**
074             * Constructs a Liblinear learner.
075             * 
076             * @param owner the guide model owner
077             * @param learnerMode the mode of the learner TRAIN or CLASSIFY
078             */
079            public Liblinear(InstanceModel owner, Integer learnerMode) throws MaltChainedException {
080                    setOwner(owner);
081                    setLearningMethodName("liblinear");
082                    setLearnerMode(learnerMode.intValue());
083                    setNumberOfInstances(0);
084                    verbosity = Verbostity.SILENT;
085    
086                    liblinearOptions = new LinkedHashMap<String, String>();
087                    initLiblinearOptions();
088                    parseParameters(getConfiguration().getOptionValue("liblinear", "liblinear_options").toString());
089                    initSpecialParameters();
090                    if (learnerMode == BATCH) {
091    //                      if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) {
092    //                              if (pathExternalLiblinearTrain != null) {
093    //                                      owner.getGuide().getConfiguration().getConfigLogger().info("  Learner              : Liblinear external "+ getLibLinearOptions() + "\n");
094    //                              } else {
095    //                                      owner.getGuide().getConfiguration().getConfigLogger().info("  Learner              : Liblinear "+LIBLINEAR_VERSION+" "+ getLibLinearOptions() + "\n");
096    //                              }
097    //                      }
098                            instanceOutput = new BufferedWriter(getInstanceOutputStreamWriter(".ins"));
099                    } 
100    //              else {
101    //                      if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) {
102    //                              owner.getGuide().getConfiguration().getConfigLogger().info("  Classifier           : Liblinear "+LIBLINEAR_VERSION+" "+ getLibLinearOptions()+ "\n");
103    //                      }
104    //              }
105            }
106            
107            
108            public void addInstance(SingleDecision decision, FeatureVector featureVector) throws MaltChainedException {
109                    if (featureVector == null) {
110                            throw new LiblinearException("The feature vector cannot be found");
111                    } else if (decision == null) {
112                            throw new LiblinearException("The decision cannot be found");
113                    }       
114                    try {
115                            instanceOutput.write(decision.getDecisionCode()+"\t");
116                            for (int i = 0; i < featureVector.size(); i++) {
117                                    FeatureValue featureValue = featureVector.get(i).getFeatureValue();
118                                    if (excludeNullValues == true && featureValue.isNullValue()) {
119                                            instanceOutput.write("-1");
120                                    } else {
121                                            if (featureValue instanceof SingleFeatureValue) {
122                                                    instanceOutput.write(((SingleFeatureValue)featureValue).getCode()+"");
123                                            } else if (featureValue instanceof MultipleFeatureValue) {
124                                                    Set<Integer> values = ((MultipleFeatureValue)featureValue).getCodes();
125                                                    int j=0;
126                                                    for (Integer value : values) {
127                                                            instanceOutput.write(value.toString());
128                                                            if (j != values.size()-1) {
129                                                                    instanceOutput.write("|");
130                                                            }
131                                                            j++;
132                                                    }
133                                            }
134                                    }
135                                    if (i != featureVector.size()) {
136                                            instanceOutput.write('\t');
137                                    }
138                            }
139    
140                            instanceOutput.write('\n');
141                            instanceOutput.flush();
142                            increaseNumberOfInstances();
143                    } catch (IOException e) {
144                            throw new LiblinearException("The Liblinear learner cannot write to the instance file. ", e);
145                    }
146            }
147            
148            public void finalizeSentence(DependencyStructure dependencyGraph) throws MaltChainedException { }
149            
150            /* (non-Javadoc)
151             * @see org.maltparser.ml.LearningMethod#noMoreInstances()
152             */
153            public void noMoreInstances() throws MaltChainedException {
154                    closeInstanceWriter();
155            }
156    
157    
158            /* (non-Javadoc)
159             * @see org.maltparser.ml.LearningMethod#train(org.maltparser.parser.guide.feature.FeatureVector)
160             */
161            public void train(FeatureVector featureVector) throws MaltChainedException {
162                    if (featureVector == null) {
163                            throw new LiblinearException("The feature vector cannot be found. ");
164                    } else if (owner == null) {
165                            throw new LiblinearException("The parent guide model cannot be found. ");
166                    }
167                    cardinalities = getCardinalities(featureVector);
168                    if (pathExternalLiblinearTrain == null) {
169                            try {
170                                    final Problem problem = readLibLinearProblem(getInstanceInputStreamReader(".ins"), cardinalities);
171                                    if (owner.getGuide().getConfiguration().getConfigLogger().isInfoEnabled()) {
172                                            owner.getGuide().getConfiguration().getConfigLogger().info("Creating Liblinear model "+getFile(".mod").getName()+"\n");
173                                    }
174                                    final PrintStream out = System.out;
175                                    final PrintStream err = System.err;
176                                    System.setOut(NoPrintStream.NO_PRINTSTREAM);
177                                    System.setErr(NoPrintStream.NO_PRINTSTREAM);
178                            Linear.saveModel(new File(getFile(".mod").getAbsolutePath()), Linear.train(problem, getLiblinearParameters()));
179                                    
180                                    System.setOut(err);
181                                    System.setOut(out);
182                                    if (!saveInstanceFiles) {
183                                            getFile(".ins").delete();
184                                    }
185                            } catch (OutOfMemoryError e) {
186                                    throw new LiblinearException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
187                            } catch (IllegalArgumentException e) {
188                                    throw new LiblinearException("The Liblinear learner was not able to redirect Standard Error stream. ", e);
189                            } catch (SecurityException e) {
190                                    throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e);
191                            } catch (IOException e) {
192                                    throw new LiblinearException("The Liblinear learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
193                            }
194                    } else {
195                            trainExternal(featureVector);
196                    }
197                    saveCardinalities(getInstanceOutputStreamWriter(".car"), cardinalities);
198            }
199            
200            private void trainExternal(FeatureVector featureVector) throws MaltChainedException {
201                    try {           
202                            maltSVMFormat2OriginalSVMFormat(getInstanceInputStreamReader(".ins"), getInstanceOutputStreamWriter(".ins.tmp"), cardinalities);
203                            owner.getGuide().getConfiguration().getConfigLogger().info("Creating Liblinear model (external) "+getFile(".mod").getName());
204    
205                            final String[] params = getLibLinearParamStringArray();
206                            String[] arrayCommands = new String[params.length+3];
207                            int i = 0;
208                            arrayCommands[i++] = pathExternalLiblinearTrain;
209                            for (; i <= params.length; i++) {
210                                    arrayCommands[i] = params[i-1];
211                            }
212                            arrayCommands[i++] = getFile(".ins.tmp").getAbsolutePath();
213                            arrayCommands[i++] = getFile(".mod").getAbsolutePath();
214                            
215                    if (verbosity == Verbostity.ALL) {
216                            owner.getGuide().getConfiguration().getConfigLogger().info('\n');
217                    }
218                            final Process child = Runtime.getRuntime().exec(arrayCommands);
219                    final InputStream in = child.getInputStream();
220                    final InputStream err = child.getErrorStream();
221                    int c;
222                    while ((c = in.read()) != -1){
223                            if (verbosity == Verbostity.ALL) {
224                                    owner.getGuide().getConfiguration().getConfigLogger().info((char)c);
225                            }
226                    }
227                    while ((c = err.read()) != -1){
228                            if (verbosity == Verbostity.ALL || verbosity == Verbostity.ERROR) {
229                                    owner.getGuide().getConfiguration().getConfigLogger().info((char)c);
230                            }
231                    }
232                if (child.waitFor() != 0) {
233                    owner.getGuide().getConfiguration().getConfigLogger().info(" FAILED ("+child.exitValue()+")");
234                }
235                    in.close();
236                    err.close();
237                    if (!saveInstanceFiles) {
238                                    getFile(".ins").delete();
239                                    getFile(".ins.tmp").delete();
240                    }
241                    owner.getGuide().getConfiguration().getConfigLogger().info('\n');
242                    } catch (InterruptedException e) {
243                             throw new LiblinearException("Liblinear is interrupted. ", e);
244                    } catch (IllegalArgumentException e) {
245                            throw new LiblinearException("The Liblinear learner was not able to redirect Standard Error stream. ", e);
246                    } catch (SecurityException e) {
247                            throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e);
248                    } catch (IOException e) {
249                            throw new LiblinearException("The Liblinear learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
250                    } catch (OutOfMemoryError e) {
251                            throw new LiblinearException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
252                    }
253            }
254            
255            private int[] getCardinalities(FeatureVector featureVector) {
256                    int[] cardinalities = new int[featureVector.size()];
257                    int i = 0;
258                    for (FeatureFunction feature : featureVector) {
259                            cardinalities[i++] = feature.getFeatureValue().getCardinality();
260                    }
261                    return cardinalities;
262            }
263            
264            private void saveCardinalities(OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException {
265                    final BufferedWriter out = new BufferedWriter(osw);
266                    try {
267                            for (int i = 0, n = cardinalities.length; i < n; i++) {
268                                    out.write(Integer.toString(cardinalities[i]));
269                                    if (i < n - 1) {
270                                            out.write(',');
271                                    }
272                            }
273                            out.write('\n');
274                            out.close();
275                    } catch (IOException e) {
276                            throw new LiblinearException("", e);
277                    }
278            }
279            
280            private int[] loadCardinalities(InputStreamReader isr) throws MaltChainedException {
281                    int[] cardinalities = null;
282                    try {
283                            final BufferedReader in = new BufferedReader(isr); 
284                            String line;
285                            if ((line = in.readLine()) != null) {
286                                    String[] items = line.split(",");
287                                    cardinalities = new int[items.length];
288                                    for (int i = 0; i < items.length; i++) {
289                                            cardinalities[i] = Integer.parseInt(items[i]);
290                                    }
291                            }
292                            in.close();
293                    } catch (IOException e) {
294                            throw new LiblinearException("", e);
295                    } catch (NumberFormatException e) {
296                            throw new LiblinearException("", e);
297                    }
298                    return cardinalities;
299            }
300            
301            /* (non-Javadoc)
302             * @see org.maltparser.ml.LearningMethod#moveAllInstances(org.maltparser.ml.LearningMethod, org.maltparser.core.feature.function.FeatureFunction, java.util.ArrayList)
303             */
304            public void moveAllInstances(LearningMethod method, FeatureFunction divideFeature, ArrayList<Integer> divideFeatureIndexVector) throws MaltChainedException {
305                    if (method == null) {
306                            throw new LiblinearException("The learning method cannot be found. ");
307                    } else if (divideFeature == null) {
308                            throw new LiblinearException("The divide feature cannot be found. ");
309                    } 
310                    try {
311                            final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins"));
312                            final BufferedWriter out = method.getInstanceWriter();
313                            final StringBuilder sb = new StringBuilder(6);
314                            int l = in.read();
315                            char c;
316                            int j = 0;
317            
318                            while(true) {
319                                    if (l == -1) {
320                                            sb.setLength(0);
321                                            break;
322                                    }
323                                    
324                                    c = (char)l; 
325                                    l = in.read();
326                                    if (c == '\t') {
327                                            if (divideFeatureIndexVector.contains(j-1)) {
328                                                    out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode()));
329                                                    out.write('\t');
330                                            }
331                                            out.write(sb.toString());
332                                            j++;
333                                            out.write('\t');
334                                            sb.setLength(0);
335                                    } else if (c == '\n') {
336                                            out.write(sb.toString());
337                                            if (divideFeatureIndexVector.contains(j-1)) {
338                                                    out.write('\t');
339                                                    out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode()));
340                                            }
341                                            out.write('\n');
342                                            sb.setLength(0);
343                                            method.increaseNumberOfInstances();
344                                            this.decreaseNumberOfInstances();
345                                            j = 0;
346                                    } else {
347                                            sb.append(c);
348                                    }
349                            }       
350                            in.close();
351                            getFile(".ins").delete();
352                    } catch (SecurityException e) {
353                            throw new LiblinearException("The Liblinear learner cannot remove the instance file. ", e);
354                    } catch (NullPointerException  e) {
355                            throw new LiblinearException("The instance file cannot be found. ", e);
356                    } catch (FileNotFoundException e) {
357                            throw new LiblinearException("The instance file cannot be found. ", e);
358                    } catch (IOException e) {
359                            throw new LiblinearException("The Liblinear learner read from the instance file. ", e);
360                    }
361            }
362            
363            /* (non-Javadoc)
364             * @see org.maltparser.ml.LearningMethod#predict(org.maltparser.parser.guide.feature.FeatureVector, org.maltparser.ml.KBestList)
365             */
366            public boolean predict(FeatureVector featureVector, SingleDecision decision) throws MaltChainedException {
367                    if (model == null) {
368                            File modelFile = getFile(".mod");
369                            try {
370                                    model = Linear.loadModel(new File(modelFile.getAbsolutePath()));
371                            } catch (IOException e) {
372                                    throw new LiblinearException("The file '"+modelFile.getAbsolutePath()+"' cannot be loaded. ", e);
373                            }
374                    }
375    
376                    if (cardinalities == null) {
377                            if (getFile(".car").exists()) {
378                                    cardinalities = loadCardinalities(getInstanceInputStreamReader(".car"));
379                            } else {
380                                    cardinalities = getCardinalities(featureVector);
381                            }
382                    }
383                    if (xlist == null) {
384                            xlist = new ArrayList<FeatureNode>(featureVector.size()); 
385                    }
386                    if (model == null) { 
387                            throw new LiblinearException("The Liblinear learner cannot predict the next class, because the learning model cannot be found. ");
388                    } else if (featureVector == null) {
389                            throw new LiblinearException("The Liblinear learner cannot predict the next class, because the feature vector cannot be found. ");
390                    }
391                    int j = 0;
392                    int offset = 1;
393                    int i = 0;
394                    for (FeatureFunction feature : featureVector) {
395                            final FeatureValue featureValue = feature.getFeatureValue();
396                            if (!(excludeNullValues == true && featureValue.isNullValue())) {
397                                    if (featureValue instanceof SingleFeatureValue) {
398                                            if (((SingleFeatureValue)featureValue).getCode() < cardinalities[i]) {
399                                                    xlist.add(j++, new FeatureNode(((SingleFeatureValue)featureValue).getCode() + offset, 1));
400                                            }
401                                    } else if (featureValue instanceof MultipleFeatureValue) {
402                                            for (Integer value : ((MultipleFeatureValue)featureValue).getCodes()) {
403                                                    if (value < cardinalities[i]) {
404                                                            xlist.add(j++, new FeatureNode(value + offset, 1));
405                                                    }
406                                            }
407                                    }
408                            }
409                            offset += cardinalities[i];
410                            i++;
411                    }
412                    
413                    FeatureNode[] xarray = new FeatureNode[j];
414                    for (int k = 0; k < j; k++) {
415                            xarray[k] = xlist.get(k);
416                    }
417    
418                    if (decision.getKBestList().getK() == 1) {
419                            decision.getKBestList().add(Linear.predict(model, xarray));
420                    } else {
421                            liblinear_predict_with_kbestlist(model, xarray, decision.getKBestList());
422                    }
423                    
424                    xlist.clear();
425    
426                    return true;
427            }
428            
429    
430            public void terminate() throws MaltChainedException { 
431                    closeInstanceWriter();
432                    model = null;
433                    xlist = null;
434                    owner = null;
435            }
436    
437            public BufferedWriter getInstanceWriter() {
438                    return instanceOutput;
439            }
440            
441            protected void closeInstanceWriter() throws MaltChainedException {
442                    try {
443                            if (instanceOutput != null) {
444                                    instanceOutput.flush();
445                                    instanceOutput.close();
446                                    instanceOutput = null;
447                            }
448                    } catch (IOException e) {
449                            throw new LiblinearException("The Liblinear learner cannot close the instance file. ", e);
450                    }
451            }
452            
453            
454            /**
455             * Returns the parameter string for used for configure Liblinear
456             * 
457             * @return the parameter string for used for configure Liblinear
458             */
459            public String getParamString() {
460                    return paramString;
461            }
462            
463            public InstanceModel getOwner() {
464                    return owner;
465            }
466    
467            protected void setOwner(InstanceModel owner) {
468                    this.owner = owner;
469            }
470            
471            public int getLearnerMode() {
472                    return learnerMode;
473            }
474    
475            public void setLearnerMode(int learnerMode) throws MaltChainedException {
476                    this.learnerMode = learnerMode;
477            }
478            
479            public String getLearningMethodName() {
480                    return name;
481            }
482            
483            /**
484             * Returns the current configuration
485             * 
486             * @return the current configuration
487             * @throws MaltChainedException
488             */
489            public DependencyParserConfig getConfiguration() throws MaltChainedException {
490                    return owner.getGuide().getConfiguration();
491            }
492            
493            public int getNumberOfInstances() {
494                    return numberOfInstances;
495            }
496    
497            public void increaseNumberOfInstances() {
498                    numberOfInstances++;
499                    owner.increaseFrequency();
500            }
501            
502            public void decreaseNumberOfInstances() {
503                    numberOfInstances--;
504                    owner.decreaseFrequency();
505            }
506            
507            protected void setNumberOfInstances(int numberOfInstances) {
508                    this.numberOfInstances = 0;
509            }
510    
511            protected void setLearningMethodName(String name) {
512                    this.name = name;
513            }
514            
515            protected OutputStreamWriter getInstanceOutputStreamWriter(String suffix) throws MaltChainedException {
516                    return getConfiguration().getConfigurationDir().getOutputStreamWriter(owner.getModelName()+getLearningMethodName()+suffix);
517            }
518            
519            protected InputStreamReader getInstanceInputStreamReader(String suffix) throws MaltChainedException {
520                    return getConfiguration().getConfigurationDir().getInputStreamReader(owner.getModelName()+getLearningMethodName()+suffix);
521            }
522            
523            protected File getFile(String suffix) throws MaltChainedException {
524                    return getConfiguration().getConfigurationDir().getFile(owner.getModelName()+getLearningMethodName()+suffix);
525            }
526            
527            /**
528             * Reads an instance file into a svm_problem object according to the Malt-SVM format, which is column fixed format (tab-separated).
529             * 
530             * @param isr   the instance stream reader for the instance file
531             * @param cardinalities a array containing the number of distinct values for a particular column.
532             * @throws LiblinearException
533             */
534            public Problem readLibLinearProblem(InputStreamReader isr, int[] cardinalities) throws MaltChainedException {
535                    Problem problem = new Problem();
536    
537                    try {
538                            final BufferedReader fp = new BufferedReader(isr);
539                            int max_index = 0;
540                            if (xlist == null) {
541                                    xlist = new ArrayList<FeatureNode>(); 
542                            }
543                            problem.bias = getBias();
544                            problem.l = getNumberOfInstances();
545                            problem.x = new FeatureNode[problem.l][];
546                            problem.y = new int[problem.l];
547                            int i = 0;
548                            final Pattern tabPattern = Pattern.compile("\t");
549                            final Pattern pipePattern = Pattern.compile("\\|");
550                            while(true) {
551                                    String line = fp.readLine();
552                                    if(line == null) break;
553                                    String[] columns = tabPattern.split(line);
554    
555                                    if (columns.length == 0) {
556                                            continue;
557                                    }
558                                    
559                                    int offset = 1; 
560                                    int j = 0;
561                                    try {
562                                            problem.y[i] = Integer.parseInt(columns[j]);
563                                            int p = 0;
564                                            for(j = 1; j < columns.length; j++) {
565                                                    final String[] items = pipePattern.split(columns[j]);   
566                                                    for (int k = 0; k < items.length; k++) {
567                                                            try {
568                                                                    if (Integer.parseInt(items[k]) != -1) {
569                                                                            xlist.add(p, new FeatureNode(Integer.parseInt(items[k])+offset, 1));
570                                                                            p++;
571                                                                    }
572                                                            } catch (NumberFormatException e) {
573                                                                    throw new LiblinearException("The instance file contain a non-integer value '"+items[k]+"'", e);
574                                                            }
575                                                    }
576                                                    offset += cardinalities[j-1];
577                                            }
578                                            problem.x[i] = xlist.subList(0, p).toArray(new FeatureNode[0]);
579                                            if(columns.length > 1) {
580                                                    max_index = Math.max(max_index, problem.x[i][p-1].index);
581                                            }
582                                            i++;
583                                            xlist.clear();
584                                    } catch (ArrayIndexOutOfBoundsException e) {
585                                            throw new LiblinearException("Cannot read from the instance file. ", e);
586                                    }
587                            }
588                            fp.close();     
589                            problem.n = max_index;
590                            if ( problem.bias >= 0 ) {
591                                    problem.n++;
592                            }
593                            xlist = null;
594                    } catch (IOException e) {
595                            throw new LiblinearException("Cannot read from the instance file. ", e);
596                    }
597                    return problem;
598            }
599            
600            protected void initSpecialParameters() throws MaltChainedException {
601                    if (getConfiguration().getOptionValue("singlemalt", "null_value") != null && getConfiguration().getOptionValue("singlemalt", "null_value").toString().equalsIgnoreCase("none")) {
602                            excludeNullValues = true;
603                    } else {
604                            excludeNullValues = false;
605                    }
606                    saveInstanceFiles = ((Boolean)getConfiguration().getOptionValue("liblinear", "save_instance_files")).booleanValue();
607                            
608                    if (!getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().equals("")) {
609                            try {
610                                    if (!new File(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString()).exists()) {
611                                            throw new LiblinearException("The path to the external Liblinear trainer 'svm-train' is wrong.");
612                                    }
613                                    if (new File(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString()).isDirectory()) {
614                                            throw new LiblinearException("The option --liblinear-liblinear_external points to a directory, the path should point at the 'train' file or the 'train.exe' file");
615                                    }
616                                    if (!(getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().endsWith("train") || getConfiguration().getOptionValue("liblinear", "liblinear_external").toString().endsWith("train.exe"))) {
617                                            throw new LiblinearException("The option --liblinear-liblinear_external does not specify the path to 'train' file or the 'train.exe' file. ");
618                                    }
619                                    pathExternalLiblinearTrain = getConfiguration().getOptionValue("liblinear", "liblinear_external").toString();
620                            } catch (SecurityException e) {
621                                    throw new LiblinearException("Access denied to the file specified by the option --liblinear-liblinear_external. ", e);
622                            }
623                    }
624                    if (getConfiguration().getOptionValue("liblinear", "verbosity") != null) {
625                            verbosity = Verbostity.valueOf(getConfiguration().getOptionValue("liblinear", "verbosity").toString().toUpperCase());
626                    }
627            }
628            
629            public String getLibLinearOptions() {
630                    StringBuilder sb = new StringBuilder();
631                    for (String key : liblinearOptions.keySet()) {
632                            sb.append('-');
633                            sb.append(key);
634                            sb.append(' ');
635                            sb.append(liblinearOptions.get(key));
636                            sb.append(' ');
637                    }
638                    return sb.toString();
639            }
640            
641            public void parseParameters(String paramstring) throws MaltChainedException {
642                    if (paramstring == null) {
643                            return;
644                    }
645                    final String[] argv;
646                    String allowedFlags = "sceB";
647                    try {
648                            argv = paramstring.split("[_\\p{Blank}]");
649                    } catch (PatternSyntaxException e) {
650                            throw new LiblinearException("Could not split the liblinear-parameter string '"+paramstring+"'. ", e);
651                    }
652                    for (int i=0; i < argv.length-1; i++) {
653                            if(argv[i].charAt(0) != '-') {
654                                    throw new LiblinearException("The argument flag should start with the following character '-', not with "+argv[i].charAt(0));
655                            }
656                            if(++i>=argv.length) {
657                                    throw new LiblinearException("The last argument does not have any value. ");
658                            }
659                            try {
660                                    int index = allowedFlags.indexOf(argv[i-1].charAt(1));
661                                    if (index != -1) {
662                                            liblinearOptions.put(Character.toString(argv[i-1].charAt(1)), argv[i]);
663                                    } else {
664                                            throw new LiblinearException("Unknown liblinear parameter: '"+argv[i-1]+"' with value '"+argv[i]+"'. ");                
665                                    }
666                            } catch (ArrayIndexOutOfBoundsException e) {
667                                    throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);
668                            } catch (NumberFormatException e) {
669                                    throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);     
670                            } catch (NullPointerException e) {
671                                    throw new LiblinearException("The liblinear parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);     
672                            }
673                    }
674            }
675            
676            public double getBias() throws MaltChainedException {
677                    try {
678                            return Double.valueOf(liblinearOptions.get("B")).doubleValue();
679                    } catch (NumberFormatException e) {
680                            throw new LiblinearException("The liblinear bias value is not numerical value. ", e);
681                    }
682            }
683    
684            public Parameter getLiblinearParameters() throws MaltChainedException {
685                    Parameter param = new Parameter(SolverType.L2LOSS_SVM_DUAL, 1, 0.1);
686                    String type = liblinearOptions.get("s");
687                    if (type.equals("0")) {
688                            param.setSolverType(SolverType.L2_LR);
689                    } else if (type.equals("1")) {
690                            param.setSolverType(SolverType.L2LOSS_SVM_DUAL);
691                    } else if (type.equals("2")) {
692                            param.setSolverType(SolverType.L2LOSS_SVM);
693                    } else if (type.equals("3")) {
694                            param.setSolverType(SolverType.L1LOSS_SVM_DUAL);
695                    } else if (type.equals("4")) {
696                            param.setSolverType(SolverType.MCSVM_CS);
697                    } else {
698                            throw new LiblinearException("The liblinear type (-s) is not an integer value between 0 and 4. ");
699                    }
700                    try {
701                            param.setC(Double.valueOf(liblinearOptions.get("c")).doubleValue());
702                    } catch (NumberFormatException e) {
703                            throw new LiblinearException("The liblinear cost (-c) value is not numerical value. ", e);
704                    }
705                    try {
706                            param.setEps(Double.valueOf(liblinearOptions.get("e")).doubleValue());
707                    } catch (NumberFormatException e) {
708                            throw new LiblinearException("The liblinear epsilon (-e) value is not numerical value. ", e);
709                    }
710                    return param;
711            }
712    
713            public void initLiblinearOptions() {
714                    liblinearOptions.put("s", "1"); // type = SolverType.L2LOSS_SVM_DUAL (default)
715                    liblinearOptions.put("c", "1"); // cost = 1 (default)
716                    liblinearOptions.put("e", "0.1"); // epsilon = 0.1 (default)
717                    liblinearOptions.put("B", "1"); // bias = 1 (default)
718            }
719    
720            public String[] getLibLinearParamStringArray() {
721                    final ArrayList<String> params = new ArrayList<String>();
722    
723                    for (String key : liblinearOptions.keySet()) {
724                            params.add("-"+key); params.add(liblinearOptions.get(key));
725                    }
726                    return params.toArray(new String[params.size()]);
727            }
728            
729            
730            public void liblinear_predict_with_kbestlist(Model model, FeatureNode[] x, KBestList kBestList) throws MaltChainedException {
731                    int i;
732                    final int nr_class = model.getNrClass();
733                    final double[] dec_values = new double[nr_class];
734    
735                    Linear.predictValues(model, x, dec_values);
736                    final int[] labels = model.getLabels();
737                    int[] predictionList = new int[nr_class];
738                    for(i=0;i<nr_class;i++) {
739                            predictionList[i] = labels[i];
740                    }
741    
742                    double tmpDec;
743                    int tmpObj;
744                    int lagest;
745                    for (i=0;i<nr_class-1;i++) {
746                            lagest = i;
747                            for (int j=i;j<nr_class;j++) {
748                                    if (dec_values[j] > dec_values[lagest]) {
749                                            lagest = j;
750                                    }
751                            }
752                            tmpDec = dec_values[lagest];
753                            dec_values[lagest] = dec_values[i];
754                            dec_values[i] = tmpDec;
755                            tmpObj = predictionList[lagest];
756                            predictionList[lagest] = predictionList[i];
757                            predictionList[i] = tmpObj;
758                    }
759                    
760                    int k = nr_class-1;
761                    if (kBestList.getK() != -1) {
762                            k = kBestList.getK() - 1;
763                    }
764                    
765                    for (i=0; i<nr_class && k >= 0; i++, k--) {
766                            if (kBestList instanceof ScoredKBestList) {
767                                    ((ScoredKBestList)kBestList).add(predictionList[i], (float)dec_values[i]);
768                            } else {
769                                    kBestList.add(predictionList[i]);
770                            }
771    
772                    }
773            }
774            
775            /**
776             * Converts the instance file (Malt's own SVM format) into the Liblinear (SVMLight) format. The input instance file is removed (replaced)
777             * by the instance file in the Liblinear (SVMLight) format. If a column contains -1, the value will be removed in destination file. 
778             * 
779             * @param isr the input stream reader for the source instance file
780             * @param osw   the output stream writer for the destination instance file
781             * @param cardinalities a vector containing the number of distinct values for a particular column
782             * @throws LiblinearException
783             */
784            public static void maltSVMFormat2OriginalSVMFormat(InputStreamReader isr, OutputStreamWriter osw, int[] cardinalities) throws MaltChainedException {
785                    try {
786                            final BufferedReader in = new BufferedReader(isr);
787                            final BufferedWriter out = new BufferedWriter(osw);
788    
789                            int c;
790                            int j = 0;
791                            int offset = 1;
792                            int code = 0;
793                            while(true) {
794                                    c = in.read();
795                                    if (c == -1) {
796                                            break;
797                                    }
798                                    
799                                    if (c == '\t' || c == '|') {
800                                            if (j == 0) {
801                                                    out.write(Integer.toString(code));
802                                                    j++;
803                                            } else {
804                                                    if (code != -1) {
805                                                            out.write(' ');
806                                                            out.write(Integer.toString(code+offset));
807                                                            out.write(":1");
808                                                    }
809                                                    if (c == '\t') {
810                                                            offset += cardinalities[j-1];
811                                                            j++;
812                                                    }
813                                            }
814                                            code = 0;
815                                    } else if (c == '\n') {
816                                            j = 0;
817                                            offset = 1;
818                                            out.write('\n');
819                                            code = 0;
820                                    } else if (c == '-') {
821                                            code = -1;
822                                    } else if (code != -1) {
823                                            if (c > 47 && c < 58) {
824                                                    code = code * 10 + (c-48);
825                                            } else {
826                                                    throw new LiblinearException("The instance file contain a non-integer value, when converting the Malt SVM format into Liblinear format.");
827                                            }
828                                    }       
829                            }                       
830                            in.close();     
831                            out.close();
832                    } catch (IOException e) {
833                            throw new LiblinearException("Cannot read from the instance file, when converting the Malt SVM format into Liblinear format. ", e);
834                    }
835            }
836            
837            protected void finalize() throws Throwable {
838                    try {
839                            closeInstanceWriter();
840                    } finally {
841                            super.finalize();
842                    }
843            }
844            
845            /* (non-Javadoc)
846             * @see java.lang.Object#toString()
847             */
848            public String toString() {
849                    final StringBuffer sb = new StringBuffer();
850                    sb.append("\nLiblinear INTERFACE\n");
851                    sb.append("  Liblinear version: "+LIBLINEAR_VERSION+"\n");
852                    sb.append("  Liblinear string: "+paramString+"\n");
853                    
854                    sb.append(getLibLinearOptions());
855                    return sb.toString();
856            }
857    }