001    package org.maltparser.ml.libsvm;
002    
003    import java.io.BufferedReader;
004    import java.io.BufferedWriter;
005    import java.io.File;
006    import java.io.FileNotFoundException;
007    import java.io.IOException;
008    import java.io.InputStream;
009    import java.io.InputStreamReader;
010    import java.io.OutputStreamWriter;
011    import java.io.PrintStream;
012    import java.text.DecimalFormat;
013    import java.text.DecimalFormatSymbols;
014    import java.util.ArrayList;
015    import java.util.Set;
016    import java.util.regex.Pattern;
017    import java.util.regex.PatternSyntaxException;
018    
019    import libsvm.svm;
020    import libsvm.svm_model;
021    import libsvm.svm_node;
022    import libsvm.svm_parameter;
023    import libsvm.svm_problem;
024    
025    import org.maltparser.core.exception.MaltChainedException;
026    import org.maltparser.core.feature.FeatureVector;
027    import org.maltparser.core.feature.function.FeatureFunction;
028    import org.maltparser.core.feature.value.FeatureValue;
029    import org.maltparser.core.feature.value.MultipleFeatureValue;
030    import org.maltparser.core.feature.value.SingleFeatureValue;
031    import org.maltparser.core.helper.NoPrintStream;
032    import org.maltparser.core.syntaxgraph.DependencyStructure;
033    import org.maltparser.ml.LearningMethod;
034    import org.maltparser.ml.libsvm.LibsvmException;
035    import org.maltparser.parser.DependencyParserConfig;
036    import org.maltparser.parser.guide.instance.InstanceModel;
037    import org.maltparser.parser.history.action.SingleDecision;
038    import org.maltparser.parser.history.kbest.KBestList;
039    
040    /**
041    Implements an interface to the LIBSVM learner (currently the LIBSVM 2.86 is used). More information
042    about LIBSVM can be found at 
043    <a href="http://www.csie.ntu.edu.tw/~cjlin/libsvm/" target="_blank">LIBSVM -- A Library for Support Vector Machines</a>.
044    
045    @author Johan Hall
046    @since 1.0
047    */
048    public class Libsvm implements LearningMethod {
049            public final static String LIBSVM_VERSION = "2.86";
050            public enum Verbostity {
051                    SILENT, ERROR, ALL
052            }
053            protected InstanceModel owner;
054            protected int learnerMode;
055            protected String name;
056            protected int numberOfInstances;
057            protected boolean saveInstanceFiles;
058            protected boolean excludeNullValues;
059            protected String pathExternalSVMTrain = null;
060            private final StringBuilder sb;
061            /**
062             * Instance output stream writer 
063             */
064            private BufferedWriter instanceOutput = null; 
065            /**
066             * LIBSVM svm_model object, only used during classification.
067             */
068            private svm_model model = null;
069            
070            //private FastMulticlassModel fastModel = null;
071            /**
072             * LIBSVM svm_parameter object
073             */
074            private svm_parameter svmParam;
075            /**
076             * Parameter string
077             */
078            private String paramString;
079            /**
080             * An array of LIBSVM svm_node objects, only used during classification.
081             */
082            private ArrayList<svm_node> xlist = null;
083            
084            private Verbostity verbosity;
085            /**
086             * Constructs a LIBSVM learner.
087             * 
088             * @param owner the guide model owner
089             * @param learnerMode the mode of the learner TRAIN or CLASSIFY
090             */
091            public Libsvm(InstanceModel owner, Integer learnerMode) throws MaltChainedException {
092                    setOwner(owner);
093                    setLearningMethodName("libsvm");
094                    setLearnerMode(learnerMode.intValue());
095                    setNumberOfInstances(0);
096                    verbosity = Verbostity.SILENT;
097                    initSvmParam(getConfiguration().getOptionValue("libsvm", "libsvm_options").toString());
098                    initSpecialParameters();
099                    if (learnerMode == TRAIN) {
100                            instanceOutput = new BufferedWriter(getInstanceOutputStreamWriter(".ins"));
101                    } 
102                    sb = new StringBuilder(6);
103                    
104            }
105            
106            
107            public void addInstance(SingleDecision decision, FeatureVector featureVector) throws MaltChainedException {
108                    if (featureVector == null) {
109                            throw new LibsvmException("The feature vector cannot be found");
110                    } else if (decision == null) {
111                            throw new LibsvmException("The decision cannot be found");
112                    }       
113                    try {
114                            instanceOutput.write(decision.getDecisionCode()+"\t");
115                            for (int i = 0; i < featureVector.size(); i++) {
116                                    FeatureValue featureValue = featureVector.get(i).getFeatureValue();
117                                    if (excludeNullValues == true && featureValue.isNullValue()) {
118                                            instanceOutput.write("-1");
119                                    } else {
120                                            if (featureValue instanceof SingleFeatureValue) {
121                                                    instanceOutput.write(((SingleFeatureValue)featureValue).getCode()+"");
122                                            } else if (featureValue instanceof MultipleFeatureValue) {
123                                                    Set<Integer> values = ((MultipleFeatureValue)featureValue).getCodes();
124                                                    int j=0;
125                                                    for (Integer value : values) {
126                                                            instanceOutput.write(value.toString());
127                                                            if (j != values.size()-1) {
128                                                                    instanceOutput.write("|");
129                                                            }
130                                                            j++;
131                                                    }
132                                            }
133                                    }
134                                    if (i != featureVector.size()) {
135                                            instanceOutput.write('\t');
136                                    }
137                            }
138    
139                            instanceOutput.write('\n');
140                            increaseNumberOfInstances();
141                    } catch (IOException e) {
142                            throw new LibsvmException("The LIBSVM learner cannot write to the instance file. ", e);
143                    }
144            }
145            
146    
147            public void finalizeSentence(DependencyStructure dependencyGraph) throws MaltChainedException { }
148            
149            /* (non-Javadoc)
150             * @see org.maltparser.ml.LearningMethod#noMoreInstances()
151             */
152            public void noMoreInstances() throws MaltChainedException {
153                    closeInstanceWriter();
154            }
155    
156    
157            /* (non-Javadoc)
158             * @see org.maltparser.ml.LearningMethod#train(org.maltparser.parser.guide.feature.FeatureVector)
159             */
160            public void train(FeatureVector featureVector) throws MaltChainedException {
161                    if (featureVector == null) {
162                            throw new LibsvmException("The feature vector cannot be found. ");
163                    } else if (owner == null) {
164                            throw new LibsvmException("The parent guide model cannot be found. ");
165                    }
166                    if (pathExternalSVMTrain == null) {
167                            final svm_problem prob = new svm_problem();
168                            try {                   
169                                    final ArrayList<Integer> cardinalities = new ArrayList<Integer>();
170            
171                                    for (FeatureFunction feature : featureVector) {
172                                            cardinalities.add(feature.getFeatureValue().getCardinality());
173                                    }
174                                    
175                                    readProblemMaltSVMFormat(getInstanceInputStreamReader(".ins"), prob, cardinalities, svmParam);
176                                    final String errorMessage = svm.svm_check_parameter(prob, svmParam);
177                                    if(errorMessage != null) {
178                                            throw new LibsvmException(errorMessage);
179                                    }
180                                    owner.getGuide().getConfiguration().getConfigLogger().info("Creating LIBSVM model "+getFile(".mod").getName()+"\n");
181                                    final PrintStream out = System.out;
182                                    final PrintStream err = System.err;
183                                    System.setOut(NoPrintStream.NO_PRINTSTREAM);
184                                    //System.setErr(new PrintStream(new LoggingOutputStream(owner.getGuide().getConfiguration().getConfigLogger(), owner.getGuide().getConfiguration().getConfigLogger().getLevel()), true));
185                                    System.setErr(NoPrintStream.NO_PRINTSTREAM);
186                                    
187                                    svm.svm_save_model(getFile(".mod").getAbsolutePath(), svm.svm_train(prob, svmParam));
188                                    
189                                    System.setOut(err);
190                                    System.setOut(out);
191                                    if (!saveInstanceFiles) {
192                                            getFile(".ins").delete();
193                                    }
194                            } catch (OutOfMemoryError e) {
195                                    throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
196                            } catch (IllegalArgumentException e) {
197                                    throw new LibsvmException("The LIBSVM learner was not able to redirect Standard Error stream. ", e);
198                            } catch (SecurityException e) {
199                                    throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e);
200                            } catch (IOException e) {
201                                    throw new LibsvmException("The LIBSVM learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
202                            }
203                    } else {
204                            trainExternal(featureVector);
205                    }
206            }
207    
208            private void trainExternal(FeatureVector featureVector) throws MaltChainedException {
209    
210                    try {           
211                            final ArrayList<Integer> cardinalities = new ArrayList<Integer>();
212                            for (FeatureFunction feature : featureVector) {
213                                    cardinalities.add(feature.getFeatureValue().getCardinality());
214                            }
215                            maltSVMFormat2OriginalSVMFormat(getInstanceInputStreamReader(".ins"), getInstanceOutputStreamWriter(".ins.tmp"), cardinalities);
216                            owner.getGuide().getConfiguration().getConfigLogger().info("Creating LIBSVM model (svm-train) "+getFile(".mod").getName());
217    
218                            final ArrayList<String> commands = new ArrayList<String>();
219                            commands.add(pathExternalSVMTrain);
220                            final String[] params = getSVMParamStringArray(svmParam);
221                            for (int i=0; i < params.length; i++) {
222                                    commands.add(params[i]);
223                            }
224                            commands.add(getFile(".ins.tmp").getAbsolutePath());
225                            commands.add(getFile(".mod").getAbsolutePath());
226                            String[] arrayCommands =  commands.toArray(new String[commands.size()]);
227                            
228                    if (verbosity == Verbostity.ALL) {
229                            owner.getGuide().getConfiguration().getConfigLogger().info('\n');
230                    }
231                            final Process child = Runtime.getRuntime().exec(arrayCommands);
232                    final InputStream in = child.getInputStream();
233                    final InputStream err = child.getErrorStream();
234                    int c;
235                    while ((c = in.read()) != -1){
236                            if (verbosity == Verbostity.ALL) {
237                                    owner.getGuide().getConfiguration().getConfigLogger().info((char)c);
238                            }
239                    }
240                    while ((c = err.read()) != -1){
241                            if (verbosity == Verbostity.ALL || verbosity == Verbostity.ERROR) {
242                                    owner.getGuide().getConfiguration().getConfigLogger().info((char)c);
243                            }
244                    }
245                if (child.waitFor() != 0) {
246                    owner.getGuide().getConfiguration().getConfigLogger().info(" FAILED ("+child.exitValue()+")");
247                }
248                    in.close();
249                    err.close();
250                    if (!saveInstanceFiles) {
251                                    getFile(".ins").delete();
252                                    getFile(".ins.tmp").delete();
253                    }
254                    owner.getGuide().getConfiguration().getConfigLogger().info('\n');
255                    } catch (InterruptedException e) {
256                             throw new LibsvmException("SVM-trainer is interrupted. ", e);
257                    } catch (IllegalArgumentException e) {
258                            throw new LibsvmException("The LIBSVM learner was not able to redirect Standard Error stream. ", e);
259                    } catch (SecurityException e) {
260                            throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e);
261                    } catch (IOException e) {
262                            throw new LibsvmException("The LIBSVM learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
263                    } catch (OutOfMemoryError e) {
264                            throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
265                    }
266            }
267            
268            /* (non-Javadoc)
269             * @see org.maltparser.ml.LearningMethod#moveAllInstances(org.maltparser.ml.LearningMethod, org.maltparser.core.feature.function.FeatureFunction, java.util.ArrayList)
270             */
271            public void moveAllInstances(LearningMethod method, FeatureFunction divideFeature, ArrayList<Integer> divideFeatureIndexVector) throws MaltChainedException {
272                    if (method == null) {
273                            throw new LibsvmException("The learning method cannot be found. ");
274                    } else if (divideFeature == null) {
275                            throw new LibsvmException("The divide feature cannot be found. ");
276                    } 
277                    try {
278                            final BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins"));
279                            final BufferedWriter out = method.getInstanceWriter();
280                            int l = in.read();
281                            char c;
282                            int j = 0;
283                            while(true) {
284                                    if (l == -1) {
285                                            sb.setLength(0);
286                                            break;
287                                    }
288                                    
289                                    c = (char)l; 
290                                    l = in.read();
291                                    if (c == '\t') {
292                                            if (divideFeatureIndexVector.contains(j-1)) {
293                                                    out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode()));
294                                                    out.write('\t');
295                                            }
296                                            out.write(sb.toString());
297                                            j++;
298                                            out.write('\t');
299                                            sb.setLength(0);
300                                    } else if (c == '\n') {
301                                            out.write(sb.toString());
302                                            if (divideFeatureIndexVector.contains(j-1)) {
303                                                    out.write('\t');
304                                                    out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode()));
305                                            }
306                                            out.write('\n');
307                                            sb.setLength(0);
308                                            method.increaseNumberOfInstances();
309                                            this.decreaseNumberOfInstances();
310                                            j = 0;
311                                    } else {
312                                            sb.append(c);
313                                    }
314                            }       
315                            in.close();
316                            getFile(".ins").delete();
317                    } catch (SecurityException e) {
318                            throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e);
319                    } catch (NullPointerException  e) {
320                            throw new LibsvmException("The instance file cannot be found. ", e);
321                    } catch (FileNotFoundException e) {
322                            throw new LibsvmException("The instance file cannot be found. ", e);
323                    } catch (IOException e) {
324                            throw new LibsvmException("The LIBSVM learner read from the instance file. ", e);
325                    }
326            }
327            
328            /* (non-Javadoc)
329             * @see org.maltparser.ml.LearningMethod#predict(org.maltparser.parser.guide.feature.FeatureVector, org.maltparser.ml.KBestList)
330             */
331            public boolean predict(FeatureVector featureVector, SingleDecision decision) throws MaltChainedException {
332                    if (model == null) {
333                            File modelFile = getFile(".mod");
334                            try {
335                                    model = svm.svm_load_model(modelFile.getAbsolutePath());        
336                            } catch (IOException e) {
337                                    throw new LibsvmException("The file '"+modelFile.getAbsolutePath()+"' cannot be loaded. ", e);
338                            }
339                    }
340                    if (xlist == null) {
341                            xlist = new ArrayList<svm_node>(featureVector.size()); 
342                    }
343                    if (model == null) { 
344                            throw new LibsvmException("The LIBSVM learner cannot predict the next class, because the learning model cannot be found. ");
345                    } else if (featureVector == null) {
346                            throw new LibsvmException("The LIBSVM learner cannot predict the next class, because the feature vector cannot be found. ");
347                    }
348                    int j = 0;
349                    int offset = 0;
350    
351                    for (FeatureFunction feature : featureVector) {
352                            final FeatureValue featureValue = feature.getFeatureValue();
353                            if (!(excludeNullValues == true && featureValue.isNullValue())) {
354                                    if (featureValue instanceof SingleFeatureValue) {
355                                            if (((SingleFeatureValue)featureValue).isKnown()) {
356                                                    if (j >= xlist.size()) {
357                                                            svm_node x =  new svm_node();
358                                                            x.value = 1;
359                                                            xlist.add(j,x);
360                                                    }
361                                                    xlist.get(j++).index = ((SingleFeatureValue)featureValue).getCode() + offset;
362                                            }
363                                    } else if (featureValue instanceof MultipleFeatureValue) {
364                                            for (Integer value : ((MultipleFeatureValue)featureValue).getCodes()) {
365                                                    if (((MultipleFeatureValue)featureValue).isKnown(value)) {
366                                                            if (j >= xlist.size()) {
367                                                                    svm_node x =  new svm_node();
368                                                                    x.value = 1;
369                                                                    xlist.add(j,x);
370                                                            }
371                                                            xlist.get(j++).index = value + offset;
372                                                    }
373                                            }
374                                    }
375                            }
376                            offset += featureValue.getCardinality();
377                    }
378    
379                    if (decision.getKBestList().getK() == 1 || svm.svm_get_svm_type(model) == svm_parameter.ONE_CLASS ||
380                                    svm.svm_get_svm_type(model) == svm_parameter.EPSILON_SVR ||
381                                    svm.svm_get_svm_type(model) == svm_parameter.NU_SVR) {
382                            decision.getKBestList().add((int)svm.svm_predict(model, xlist.subList(0, j).toArray(new svm_node[0])));
383                    } else {
384                            svm_predict_with_kbestlist(model, xlist.subList(0, j).toArray(new svm_node[0]), decision.getKBestList());
385                    }
386    
387                    return true;
388            }
389            
390    
391            public void terminate() throws MaltChainedException { 
392                    closeInstanceWriter();
393                    model = null;
394                    svmParam = null;
395                    xlist = null;
396                    owner = null;
397            }
398    
399            public BufferedWriter getInstanceWriter() {
400                    return instanceOutput;
401            }
402            
403            protected void closeInstanceWriter() throws MaltChainedException {
404                    try {
405                            if (instanceOutput != null) {
406                                    instanceOutput.flush();
407                                    instanceOutput.close();
408                                    instanceOutput = null;
409                            }
410                    } catch (IOException e) {
411                            throw new LibsvmException("The LIBSVM learner cannot close the instance file. ", e);
412                    }
413            }
414            
415            /**
416             * Initialize the LIBSVM according to the parameter string
417             * 
418             * @param paramString the parameter string to configure the LIBSVM learner.
419             * @throws MaltChainedException
420             */
421            protected void initSvmParam(String paramString) throws MaltChainedException {
422                    this.paramString = paramString;
423                    svmParam = new svm_parameter();
424                    initParameters(svmParam);
425                    parseParameters(paramString, svmParam);
426            }
427            
428            /**
429             * Returns the parameter string for used for configure LIBSVM
430             * 
431             * @return the parameter string for used for configure LIBSVM
432             */
433            public String getParamString() {
434                    return paramString;
435            }
436            
437            public InstanceModel getOwner() {
438                    return owner;
439            }
440    
441            protected void setOwner(InstanceModel owner) {
442                    this.owner = owner;
443            }
444            
445            public int getLearnerMode() {
446                    return learnerMode;
447            }
448    
449            public void setLearnerMode(int learnerMode) {
450                    this.learnerMode = learnerMode;
451            }
452            
453            public String getLearningMethodName() {
454                    return name;
455            }
456            
457            /**
458             * Returns the current configuration
459             * 
460             * @return the current configuration
461             * @throws MaltChainedException
462             */
463            public DependencyParserConfig getConfiguration() throws MaltChainedException {
464                    return owner.getGuide().getConfiguration();
465            }
466            
467            public int getNumberOfInstances() {
468                    return numberOfInstances;
469            }
470    
471            public void increaseNumberOfInstances() {
472                    numberOfInstances++;
473                    owner.increaseFrequency();
474            }
475            
476            public void decreaseNumberOfInstances() {
477                    numberOfInstances--;
478                    owner.decreaseFrequency();
479            }
480            
481            protected void setNumberOfInstances(int numberOfInstances) {
482                    this.numberOfInstances = 0;
483            }
484    
485            protected void setLearningMethodName(String name) {
486                    this.name = name;
487            }
488            
489            protected OutputStreamWriter getInstanceOutputStreamWriter(String suffix) throws MaltChainedException {
490                    return getConfiguration().getConfigurationDir().getOutputStreamWriter(owner.getModelName()+getLearningMethodName()+suffix);
491            }
492            
493            protected InputStreamReader getInstanceInputStreamReader(String suffix) throws MaltChainedException {
494                    return getConfiguration().getConfigurationDir().getInputStreamReader(owner.getModelName()+getLearningMethodName()+suffix);
495            }
496            
497            protected File getFile(String suffix) throws MaltChainedException {
498                    return getConfiguration().getConfigurationDir().getFile(owner.getModelName()+getLearningMethodName()+suffix);
499            }
500            
501            /**
502             * Reads an instance file into a svm_problem object according to the Malt-SVM format, which is column fixed format (tab-separated).
503             * 
504             * @param isr   the instance stream reader for the instance file
505             * @param prob  a svm_problem object
506             * @param cardinality   a vector containing the number of distinct values for a particular column.
507             * @param param a svm_parameter object
508             * @throws LibsvmException
509             */
510            public final void readProblemMaltSVMFormat(InputStreamReader isr, svm_problem prob, ArrayList<Integer> cardinality, svm_parameter param) throws MaltChainedException {
511                    try {
512                            final BufferedReader fp = new BufferedReader(isr);
513                            int max_index = 0;
514                            if (xlist == null) {
515                                    xlist = new ArrayList<svm_node>(); 
516                            }
517                            prob.l = getNumberOfInstances();
518                            prob.x = new svm_node[prob.l][];
519                            prob.y = new double[prob.l];
520                            int i = 0;
521                            final Pattern tabPattern = Pattern.compile("\t");
522                            final Pattern pipePattern = Pattern.compile("\\|");
523                            while(true) {
524                                    String line = fp.readLine();
525                                    if(line == null) break;
526                                    String[] columns = tabPattern.split(line);
527    
528                                    if (columns.length == 0) {
529                                            continue;
530                                    }
531                                    
532                                    int offset = 0; 
533                                    int j = 0;
534                                    try {
535                                            prob.y[i] = (double)Integer.parseInt(columns[j]);
536                                            int p = 0;
537                                            for(j = 1; j < columns.length; j++) {
538                                                    final String[] items = pipePattern.split(columns[j]);   
539                                                    for (int k = 0; k < items.length; k++) {
540                                                            try {
541                                                                    if (Integer.parseInt(items[k]) != -1) {
542                                                                            xlist.add(p, new svm_node());
543                                                                            xlist.get(p).value = 1;
544                                                                            xlist.get(p).index = Integer.parseInt(items[k])+offset;
545                                                                            p++;
546                                                                    }
547                                                            } catch (NumberFormatException e) {
548                                                                    throw new LibsvmException("The instance file contain a non-integer value '"+items[k]+"'", e);
549                                                            }
550                                                    }
551                                                    offset += cardinality.get(j-1);
552                                            }
553                                            prob.x[i] = xlist.subList(0, p).toArray(new svm_node[0]);
554                                            if(columns.length > 1) {
555                                                    max_index = Math.max(max_index, xlist.get(p-1).index);
556                                            }
557                                            i++;
558                                            xlist.clear();
559                                    } catch (ArrayIndexOutOfBoundsException e) {
560                                            throw new LibsvmException("Cannot read from the instance file. ", e);
561                                    }
562                            }
563                            fp.close();     
564                            if (param.gamma == 0) {
565                                    param.gamma = 1.0/max_index;
566                            }
567                            xlist = null;
568                    } catch (IOException e) {
569                            throw new LibsvmException("Cannot read from the instance file. ", e);
570                    }
571            }
572            
573            protected void initSpecialParameters() throws MaltChainedException {
574                    if (getConfiguration().getOptionValue("singlemalt", "null_value") != null && getConfiguration().getOptionValue("singlemalt", "null_value").toString().equalsIgnoreCase("none")) {
575                            excludeNullValues = true;
576                    } else {
577                            excludeNullValues = false;
578                    }
579                    saveInstanceFiles = ((Boolean)getConfiguration().getOptionValue("libsvm", "save_instance_files")).booleanValue();
580                            
581                    if (!getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().equals("")) {
582                            try {
583                                    if (!new File(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString()).exists()) {
584                                            throw new LibsvmException("The path to the external LIBSVM trainer 'svm-train' is wrong.");
585                                    }
586                                    if (new File(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString()).isDirectory()) {
587                                            throw new LibsvmException("The option --libsvm-libsvm_external points to a directory, the path should point at the 'svm-train' file or the 'svm-train.exe' file");
588                                    }
589                                    if (!(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().endsWith("svm-train") || getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().endsWith("svm-train.exe"))) {
590                                            throw new LibsvmException("The option --libsvm-libsvm_external does not specify the path to 'svm-train' file or the 'svm-train.exe' file. ");
591                                    }
592                                    pathExternalSVMTrain = getConfiguration().getOptionValue("libsvm", "libsvm_external").toString();
593                            } catch (SecurityException e) {
594                                    throw new LibsvmException("Access denied to the file specified by the option --libsvm-libsvm_external. ", e);
595                            }
596                    }
597                    if (getConfiguration().getOptionValue("libsvm", "verbosity") != null) {
598                            verbosity = Verbostity.valueOf(getConfiguration().getOptionValue("libsvm", "verbosity").toString().toUpperCase());
599                    }
600            }
601            
602            /**
603             * Assign a default value to all svm parameters
604             * 
605             * @param param a svm_parameter object
606             */
607            protected void initParameters(svm_parameter param) throws MaltChainedException {
608                    if (param == null) {
609                            throw new LibsvmException("Svm-parameters cannot be found. ");
610                    }
611                    param.svm_type = svm_parameter.C_SVC;
612                    param.kernel_type = svm_parameter.POLY;
613                    param.degree = 2;
614                    param.gamma = 0.2;      // 1/k
615                    param.coef0 = 0;
616                    param.nu = 0.5;
617                    param.cache_size = 100; 
618                    param.C = 1; 
619                    param.eps = 1.0; 
620                    param.p = 0.1;
621                    param.shrinking = 1;
622                    param.probability = 0;
623                    param.nr_weight = 0;
624                    param.weight_label = new int[0];
625                    param.weight = new double[0];
626            }
627            
628            /**
629             * Returns a string containing all svm-parameters of interest
630             * 
631             * @param param a svm_parameter object
632             * @return a string containing all svm-parameters of interest
633             */
634            public String toStringParameters(svm_parameter param)  {
635                    if (param == null) {
636                            throw new IllegalArgumentException("Svm-parameters cannot be found. ");
637                    }
638                    final StringBuffer sb = new StringBuffer();
639                    
640                    final String[] svmtypes = {"C_SVC", "NU_SVC","ONE_CLASS","EPSILON_SVR","NU_SVR"};
641                    final String[] kerneltypes = {"LINEAR", "POLY","RBF","SIGMOID","PRECOMPUTED"};
642                    final DecimalFormat dform = new DecimalFormat("#0.0#"); 
643                    final DecimalFormatSymbols sym = new DecimalFormatSymbols();
644                    sym.setDecimalSeparator('.');
645                    dform.setDecimalFormatSymbols(sym);
646                    sb.append("LIBSVM SETTINGS\n");
647                    sb.append("  SVM type      : " + svmtypes[param.svm_type] + " (" + param.svm_type + ")\n");
648                    sb.append("  Kernel        : " + kerneltypes[param.kernel_type] + " (" + param.kernel_type + ")\n");
649                    if (param.kernel_type == svm_parameter.POLY) {
650                            sb.append("  Degree        : " + param.degree + "\n");
651                    }
652                    if (param.kernel_type == svm_parameter.POLY || param.kernel_type == svm_parameter.RBF || param.kernel_type == svm_parameter.SIGMOID) {
653                            sb.append("  Gamma         : " + dform.format(param.gamma) + "\n");
654                            if (param.kernel_type == svm_parameter.POLY || param.kernel_type == svm_parameter.SIGMOID) {
655                                    sb.append("  Coef0         : " + dform.format(param.coef0) + "\n");
656                            }
657                    }
658                    if (param.svm_type == svm_parameter.NU_SVC || param.svm_type == svm_parameter.NU_SVR || param.svm_type == svm_parameter.ONE_CLASS) {
659                            sb.append("  Nu            : " + dform.format(param.nu) + "\n");
660                    }
661                    sb.append("  Cache Size    : " + dform.format(param.cache_size) + " MB\n");
662                    if (param.svm_type == svm_parameter.C_SVC || param.svm_type == svm_parameter.NU_SVR || param.svm_type == svm_parameter.EPSILON_SVR) {
663                            sb.append("  C             : " + dform.format(param.C) + "\n");
664                    }
665                    sb.append("  Eps           : " + dform.format(param.eps) + "\n");
666                    if (param.svm_type == svm_parameter.EPSILON_SVR) {
667                            sb.append("  P             : " + dform.format(param.p) + "\n");
668                    }
669                    sb.append("  Shrinking     : " + param.shrinking + "\n");
670                    sb.append("  Probability   : " + param.probability + "\n");
671                    if (param.svm_type == svm_parameter.C_SVC) {
672                            sb.append("  #Weight       : " + param.nr_weight + "\n");
673                            if (param.nr_weight > 0) {
674                                    sb.append("  Weight labels : ");
675                                    for (int i = 0; i < param.nr_weight; i++) {
676                                            sb.append(param.weight_label[i]);
677                                            if (i != param.nr_weight-1) {
678                                                    sb.append(", ");
679                                            }
680                                    }
681                                    sb.append("\n");
682                                    for (int i = 0; i < param.nr_weight; i++) {
683                                            sb.append(dform.format(param.weight));
684                                            if (i != param.nr_weight-1) {
685                                                    sb.append(", ");
686                                            }
687                                    }
688                                    sb.append("\n");
689                            }
690                    }
691                    return sb.toString();
692            }
693            
694            public String[] getSVMParamStringArray(svm_parameter param) {
695                    final ArrayList<String> params = new ArrayList<String>();
696    
697                    if (param.svm_type != 0) {
698                            params.add("-s"); params.add(new Integer(param.svm_type).toString());
699                    }
700                    if (param.kernel_type != 2) {
701                            params.add("-t"); params.add(new Integer(param.kernel_type).toString());
702                    }
703                    if (param.degree != 3) {
704                            params.add("-d"); params.add(new Integer(param.degree).toString());
705                    }
706                    params.add("-g"); params.add(new Double(param.gamma).toString());
707                    if (param.coef0 != 0) {
708                            params.add("-r"); params.add(new Double(param.coef0).toString());
709                    }
710                    if (param.nu != 0.5) {
711                            params.add("-n"); params.add(new Double(param.nu).toString());
712                    }
713                    if (param.cache_size != 100) {
714                            params.add("-m"); params.add(new Double(param.cache_size).toString());
715                    }
716                    if (param.C != 1) {
717                            params.add("-c"); params.add(new Double(param.C).toString());
718                    }
719                    if (param.eps != 0.001) {
720                            params.add("-e"); params.add(new Double(param.eps).toString());
721                    }
722                    if (param.p != 0.1) {
723                            params.add("-p"); params.add(new Double(param.p).toString());
724                    }
725                    if (param.shrinking != 1) {
726                            params.add("-h"); params.add(new Integer(param.shrinking).toString());
727                    }
728                    if (param.probability != 0) {
729                            params.add("-b"); params.add(new Integer(param.probability).toString());
730                    }
731    
732                    return params.toArray(new String[params.size()]);
733            }
734            /**
735             * Parses the parameter string. The parameter string must contain parameter and value pairs, which are separated by a blank 
736             * or a underscore. The parameter begins with a character '-' followed by a one-character flag and the value must comply with
737             * the parameters data type. Some examples:
738             * 
739             * -s 0 -t 1 -d 2 -g 0.4 -e 0.1
740             * -s_0_-t_1_-d_2_-g_0.4_-e_0.1
741             * 
742             * @param paramstring   the parameter string 
743             * @param param a svm_parameter object
744             * @throws LibsvmException
745             */
746            public void parseParameters(String paramstring, svm_parameter param) throws MaltChainedException {
747                    if (param == null) {
748                            throw new LibsvmException("Svm-parameters cannot be found. ");
749                    }
750                    if (paramstring == null) {
751                            return;
752                    }
753                    final String[] argv;
754                    try {
755                            argv = paramstring.split("[_\\p{Blank}]");
756                    } catch (PatternSyntaxException e) {
757                            throw new LibsvmException("Could not split the svm-parameter string '"+paramstring+"'. ", e);
758                    }
759                    for (int i=0; i < argv.length-1; i++) {
760                            if(argv[i].charAt(0) != '-') {
761                                    throw new LibsvmException("The argument flag should start with the following character '-', not with "+argv[i].charAt(0));
762                            }
763                            if(++i>=argv.length) {
764                                    throw new LibsvmException("The last argument does not have any value. ");
765                            }
766                            try {
767                                    switch(argv[i-1].charAt(1)) {
768                                    case 's':
769                                            param.svm_type = Integer.parseInt(argv[i]);
770                                            break;
771                                    case 't':
772                                            param.kernel_type = Integer.parseInt(argv[i]);
773                                            break;
774                                    case 'd':
775                                            param.degree = Integer.parseInt(argv[i]);
776                                            break;
777                                    case 'g':
778                                            param.gamma = Double.valueOf(argv[i]).doubleValue();
779                                            break;
780                                    case 'r':
781                                            param.coef0 = Double.valueOf(argv[i]).doubleValue();
782                                            break;
783                                    case 'n':
784                                            param.nu = Double.valueOf(argv[i]).doubleValue();
785                                            break;
786                                    case 'm':
787                                            param.cache_size = Double.valueOf(argv[i]).doubleValue();
788                                            break;
789                                    case 'c':
790                                            param.C = Double.valueOf(argv[i]).doubleValue();
791                                            break;
792                                    case 'e':
793                                            param.eps = Double.valueOf(argv[i]).doubleValue();
794                                            break;
795                                    case 'p':
796                                            param.p = Double.valueOf(argv[i]).doubleValue();
797                                            break;
798                                    case 'h':
799                                            param.shrinking = Integer.parseInt(argv[i]);
800                                            break;
801                                case 'b':
802                                            param.probability = Integer.parseInt(argv[i]);
803                                            break;
804                                    case 'w':
805                                            ++param.nr_weight;
806                                            {
807                                                    int[] old = param.weight_label;
808                                                    param.weight_label = new int[param.nr_weight];
809                                                    System.arraycopy(old,0,param.weight_label,0,param.nr_weight-1);
810                                            }
811            
812                                            {
813                                                    double[] old = param.weight;
814                                                    param.weight = new double[param.nr_weight];
815                                                    System.arraycopy(old,0,param.weight,0,param.nr_weight-1);
816                                            }
817            
818                                            param.weight_label[param.nr_weight-1] = Integer.parseInt(argv[i].substring(2));
819                                            param.weight[param.nr_weight-1] = Double.valueOf(argv[i]).doubleValue();
820                                            break;
821                                    case 'Y':
822                                    case 'V':
823                                    case 'S':
824                                    case 'F':
825                                    case 'T':
826                                    case 'M':
827                                    case 'N':
828                                            break;
829                                    default:
830                                            throw new LibsvmException("Unknown svm parameter: '"+argv[i-1]+"' with value '"+argv[i]+"'. ");         
831                                    }
832                            } catch (ArrayIndexOutOfBoundsException e) {
833                                    throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);
834                            } catch (NumberFormatException e) {
835                                    throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);      
836                            } catch (NullPointerException e) {
837                                    throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);      
838                            }
839                    }
840            }
841            
842            public void svm_predict_with_kbestlist(svm_model model, svm_node[] x, KBestList kBestList) throws MaltChainedException {
843                    int i;
844                    final int nr_class = svm.svm_get_nr_class(model);
845                    final double[] dec_values = new double[nr_class*(nr_class-1)/2];
846                    svm.svm_predict_values(model, x, dec_values);
847    
848                    final int[] vote = new int[nr_class];
849                    final int[] voteindex = new int[nr_class];
850                    for(i=0;i<nr_class;i++) {
851                            vote[i] = 0;
852                            voteindex[i] = i;
853                    }
854                    int pos=0;
855                    for(i=0;i<nr_class;i++) {
856                            for(int j=i+1;j<nr_class;j++) {
857                                    if(dec_values[pos++] > 0) {
858                                            ++vote[i];
859                                    } else {
860                                            ++vote[j];
861                                    }
862                            }
863                    }
864    
865                    int small, temp;
866                    for (i=0;i<nr_class-1;i++) {
867                            small = i;
868                            for (int j=i;j<nr_class;j++) {
869                                    if (vote[j] > vote[small]) {
870                                            small = j;
871                                    }
872                            }
873                            temp = vote[small];
874                            vote[small] = vote[i];
875                            vote[i] = temp;
876                            temp = voteindex[small];
877                            voteindex[small] = voteindex[i];
878                            voteindex[i] = temp;
879                    }
880                    final int[] labels = new int[nr_class];
881                    svm.svm_get_labels(model, labels);
882                    int k = nr_class-1;
883                    if (kBestList.getK() != -1) {
884                            k = kBestList.getK() - 1;
885                    }
886                    
887                    for (i=0; i<nr_class && k >= 0; i++, k--) {
888                            if (vote[i] > 0 || i == 0) {
889                                    //kBestList.addKBestItem(labels[voteindex[i]], (double)vote[i]/(double)(nr_class*(nr_class-1)/2));
890                                    //kBestList.addKBestItem(labels[voteindex[i]]);
891                                    kBestList.add(labels[voteindex[i]]);
892                            }
893                    }
894            }
895            
896            /**
897             * Converts the instance file (Malt's own SVM format) into the LIBSVM (SVMLight) format. The input instance file is removed (replaced)
898             * by the instance file in the LIBSVM (SVMLight) format. If a column contains -1, the value will be removed in destination file. 
899             * 
900             * @param isr the input stream reader for the source instance file
901             * @param osw   the output stream writer for the destination instance file
902             * @param cardinality a vector containing the number of distinct values for a particular column
903             * @throws LibsvmException
904             */
905            public static void maltSVMFormat2OriginalSVMFormat(InputStreamReader isr, OutputStreamWriter osw, ArrayList<Integer> cardinality) throws MaltChainedException {
906                    try {
907                            final BufferedReader in = new BufferedReader(isr);
908                            final BufferedWriter out = new BufferedWriter(osw);
909    
910                            int c;
911                            int j = 0;
912                            int offset = 0; 
913                            int code = 0;
914                            while(true) {
915                                    c = in.read();
916                                    if (c == -1) {
917                                            break;
918                                    }
919                                    
920                                    if (c == '\t' || c == '|') {
921                                            if (j == 0) {
922                                                    out.write(Integer.toString(code));
923                                                    j++;
924                                            } else {
925                                                    if (code != -1) {
926                                                            out.write(' ');
927                                                            out.write(Integer.toString(code+offset));
928                                                            out.write(":1");
929                                                    }
930                                                    if (c == '\t') {
931                                                            offset += cardinality.get(j-1);
932                                                            j++;
933                                                    }
934                                            }
935                                            code = 0;
936                                    } else if (c == '\n') {
937                                            j = 0;
938                                            offset = 0;
939                                            out.write('\n');
940                                            code = 0;
941                                    } else if (c == '-') {
942                                            code = -1;
943                                    } else if (code != -1) {
944                                            if (c > 47 && c < 58) {
945                                                    code = code * 10 + (c-48);
946                                            } else {
947                                                    throw new LibsvmException("The instance file contain a non-integer value, when converting the Malt SVM format into LIBSVM format.");
948                                            }
949                                    }       
950                            }                       
951                            in.close();     
952                            out.close();
953                    } catch (IOException e) {
954                            throw new LibsvmException("Cannot read from the instance file, when converting the Malt SVM format into LIBSVM format. ", e);
955                    }
956            }
957            
958            protected void finalize() throws Throwable {
959                    try {
960                            closeInstanceWriter();
961                    } finally {
962                            super.finalize();
963                    }
964            }
965            
966            /* (non-Javadoc)
967             * @see java.lang.Object#toString()
968             */
969            public String toString() {
970                    final StringBuffer sb = new StringBuffer();
971                    sb.append("\nLIBSVM INTERFACE\n");
972                    sb.append("  LIBSVM version: "+LIBSVM_VERSION+"\n");
973                    sb.append("  SVM-param string: "+paramString+"\n");
974                    
975                    sb.append(toStringParameters(svmParam));
976                    return sb.toString();
977            }
978    }