001    package org.maltparser.ml.libsvm.malt04;
002    
003    import java.io.BufferedReader;
004    import java.io.BufferedWriter;
005    import java.io.File;
006    import java.io.FileNotFoundException;
007    import java.io.IOException;
008    import java.io.InputStream;
009    import java.io.InputStreamReader;
010    import java.io.OutputStreamWriter;
011    import java.io.PrintStream;
012    import java.text.DecimalFormat;
013    import java.text.DecimalFormatSymbols;
014    import java.util.ArrayList;
015    import java.util.Set;
016    import java.util.StringTokenizer;
017    import java.util.Vector;
018    import java.util.regex.Pattern;
019    import java.util.regex.PatternSyntaxException;
020    
021    import org.maltparser.core.exception.MaltChainedException;
022    import org.maltparser.core.feature.FeatureException;
023    import org.maltparser.core.feature.FeatureVector;
024    import org.maltparser.core.feature.function.FeatureFunction;
025    import org.maltparser.core.feature.map.SplitFeature;
026    import org.maltparser.core.feature.value.FeatureValue;
027    import org.maltparser.core.feature.value.MultipleFeatureValue;
028    import org.maltparser.core.feature.value.SingleFeatureValue;
029    import org.maltparser.core.helper.NoPrintStream;
030    import org.maltparser.core.symbol.SymbolTable;
031    import org.maltparser.core.symbol.Table;
032    import org.maltparser.core.syntaxgraph.DependencyStructure;
033    import org.maltparser.core.syntaxgraph.feature.InputColumnFeature;
034    import org.maltparser.core.syntaxgraph.feature.OutputColumnFeature;
035    import org.maltparser.core.syntaxgraph.node.DependencyNode;
036    import org.maltparser.ml.LearningMethod;
037    import org.maltparser.ml.libsvm.LibsvmException;
038    import org.maltparser.parser.DependencyParserConfig;
039    import org.maltparser.parser.algorithm.nivre.malt04.NivreEagerMalt04;
040    import org.maltparser.parser.algorithm.nivre.malt04.NivreStandardMalt04;
041    import org.maltparser.parser.guide.instance.InstanceModel;
042    import org.maltparser.parser.history.action.SingleDecision;
043    
044    import libsvm28.svm;
045    import libsvm28.svm_model;
046    import libsvm28.svm_node;
047    import libsvm28.svm_parameter;
048    import libsvm28.svm_problem;
049    /**
050     * Implements an interface to the LIBSVM learner (LIBSVM 2.80 is used). More information about 
051     * LIBSVM can be found at <a href="http://www.csie.ntu.edu.tw/~cjlin/libsvm/" target="_blank">LIBSVM -- A Library for Support Vector Machines</a>.
052     * 
053     * This class tries to reproduce the same behavior as MaltParser 0.4. Unfortunately we have to introduce some strange behaviors and bugs to 
054     * able to reproduce the results:
055     * 
056     * <ol>
057     * <li>RightArc{CLASSITEM_SEPARATOR}{ROOT_LABEL} is mapped to the Reduce transition for the Nivre Arc-eager and Nivre Arc-standard algorthm, where {ROOT_LABEL} is specified
058     * by the <code>--graph-root_label</code> option and the <code>--guide-classitem_separator</code> option (bug in MaltParser 0.4).
059     * <li>LeftArc{CLASSITEM_SEPARATOR}{ROOT_LABEL} is mapped to the Right Arc transition with last dependency type in the DEPREL tagset, here {ROOT_LABEL} is specified
060     * by the <code>--graph-root_label</code> option and the <code>--guide-classitem_separator</code> option (bug in MaltParser 0.4).
061     * <li>The mapping of RightArc{CLASSITEM_SEPARATOR}{ROOT_LABEL} into Reduce results in an illegal transition and therefore the default transition (Shift) is used during parsing (indirect bug in MaltParser 0.4).
062     * <li>Null-value of the LEMMA, FORM, FEATS columns in the CoNLL shared task format is not written into the instance file (this can be controlled 
063     * by the <code>--libsvm-libsvm_exclude_null</code> and  <code>--libsvm-libsvm_exclude_columns</code> options in the new MaltParser)
064     * <li>If feature is an output feature and <code>feature != "OutputColumn(DEPREL, Stack[0])"</code> and it points at a node which has the root as head it will not extract the dependency type of informative root label,
065     * instead it will extract the root label specified by the <code>--graph-root_label</code> option (bug in MaltParser 0.4).
066     * <li>If <code>feature = "Split(InputColumn(FEATS, X), \|")</code>, where <code>X</code> is arbitrary node. The set of syntactic and/or morphological features will not be ordered correctly
067     * according to the LIBSVM format (bug in MaltParser 0.4).
068     * <li>If <code>feature = "Split(InputColumn(FEATS, X), \|")</code>, where <code>X</code> is arbitrary node. It will not regard the set of syntactic and/or morphological features as set. In some cases, there are treebanks that does not follow the
069     * CoNLL data format and have individual syntactic and/or morphological features twice in the FEATS column (bug in MaltParser 0.4). 
070     * <li>Unfortunately there is minor difference between LIBSVM 2.80 (used by MaltParser 0.4) and the latest version of LIBSVM. Therefore we have to use
071     * the LIBSVM 2.80 to able to reproduce the results.
072     * </ol>
073     * 
074     * @author Johan Hall
075     * @since 1.0
076    */
077    public class LibsvmMalt04 implements LearningMethod {
078            public final static String LIBSVM_VERSION = "2.80";
079            private StringBuilder sb;
080            /**
081             * The parent instance model
082             */
083            protected InstanceModel owner;
084            /**
085             * The learner/classifier mode
086             */
087            protected int learnerMode;
088            /**
089             * The name of the learner
090             */
091            protected String name;
092            /**
093             * Number of processed instances
094             */
095            protected int numberOfInstances;
096            /**
097             * Instance output stream writer 
098             */
099            private BufferedWriter instanceOutput = null; 
100            //private BufferedWriter debugTransOut = null; 
101            //private int sentenceCount = 1;
102            
103            protected String pathExternalSVMTrain = null;
104            /**
105             * LIBSVM svm_model object, only used during classification.
106             */
107            private svm_model model = null;
108            /**
109             * LIBSVM svm_parameter object
110             */
111            private svm_parameter svmParam;
112            /**
113             * Parameter string
114             */
115            private String paramString;
116            /**
117             * An array of LIBSVM svm_node objects, only used during classification.
118             */
119            private ArrayList<svm_node> xlist = null;
120            /**
121             * RA_ROOT is used for mapping RightArc_ROOT - REDUCE (bug in MaltParser 0.4)
122             */
123            private String RA_ROOT = "";
124            /**
125             * LA_ROOT is used for mapping RightArc_ROOT - RightArc_{Last dependency type in the DEPREL tagset} (bug in MaltParser 0.4)
126             */
127            private String LA_ROOT = "";
128            /**
129             * Root handling of the Nivre arc-eager and Nivre arc-standard algorithm. Used for introducing a bug in MaltParser 0.
130             */
131            private int rootHandling = -1;
132            /**
133             * true if Nivre arc-standard is the current parsing algorthm, otherwise false 
134             */
135            private boolean nivrestandard = false;
136            /**
137             * true if Nivre arc-eager/arc-standard is the current parsing algorthm, otherwise false
138             */
139            private boolean nivre = false;
140            
141            private boolean saveInstanceFiles;
142            /**
143             * Constructs a LIBSVM learner.
144             * 
145             * @param owner the guide model owner
146             * @param learnerMode the mode of the learner TRAIN or CLASSIFY
147             */
148            public LibsvmMalt04(InstanceModel owner, Integer learnerMode) throws MaltChainedException {
149                    setOwner(owner);
150                    setLearningMethodName("libsvmmalt04");
151                    setLearnerMode(learnerMode.intValue());
152                    setNumberOfInstances(0);
153                    initSpecialParameters();
154                    initSvmParam(getConfiguration().getOptionValue("libsvm", "libsvm_options").toString());
155                    if (learnerMode == TRAIN) {
156                            instanceOutput = new BufferedWriter(getInstanceOutputStreamWriter(".ins"));
157                            //debugTransOut = new BufferedWriter(getInstanceOutputStreamWriter(".trans"));
158                    }
159                    sb = new StringBuilder(6);
160            }
161            
162            /* (non-Javadoc)
163             * @see org.maltparser.ml.LearningMethod#addInstance(org.maltparser.parser.guide.classtable.ClassTable, org.maltparser.parser.guide.feature.FeatureVector)
164             */
165            public void addInstance(SingleDecision decision, FeatureVector featureVector) throws MaltChainedException {
166                    if (featureVector == null) {
167                            throw new LibsvmException("The feature vector cannot be found");
168                    } else if (decision == null) {
169                            throw new LibsvmException("The decision cannot be found");
170                    }
171                    try {
172                            if (nivre == true && RA_ROOT.equals(decision.getDecisionSymbol()) == true) {
173                                    instanceOutput.write("2\t");
174                                    //debugTransOut.write(2+" "+classCodeTable.getCurrentClassString()+" "+sentenceCount+"\n");
175                            } else if (nivre == true && LA_ROOT.equals(decision.getDecisionSymbol()) == true) {
176                                    Table table = decision.getGuideHistory().getTableHandler("A").getSymbolTable("DEPREL");
177                                    int code = 2 + ((SymbolTable)table).getValueCounter() - 1;
178                                    //int code = 2 + classCodeTable.getParserAction().getOutputSymbolTables().get("DEPREL").getValueCounter() - 1;
179                                    instanceOutput.write(code+"\t");
180                                    //debugTransOut.write(code+" "+classCodeTable.getCurrentClassString()+" "+sentenceCount+"\n");
181                            } else {
182                                    instanceOutput.write(decision.getDecisionCode()+"\t");
183                                    //debugTransOut.write(classCodeTable.getCurrentClassCode()+" "+classCodeTable.getCurrentClassString()+" "+sentenceCount+"\n");
184                            }
185                            
186                            for (int i = 0; i < featureVector.size(); i++) {
187                                    FeatureValue featureValue = featureVector.get(i).getFeatureValue();
188                                    if (featureValue.isNullValue()) {
189                                            if (featureVector.get(i) instanceof InputColumnFeature) {
190                                                    if (((InputColumnFeature)featureVector.get(i)).getColumnName().equals("FORM") || 
191                                                            ((InputColumnFeature)featureVector.get(i)).getColumnName().equals("LEMMA") ||
192                                                            ((InputColumnFeature)featureVector.get(i)).getColumnName().equals("FEATS")) {
193                                                            instanceOutput.write("-1");
194                                                            if (i != featureVector.size()) {
195                                                                    instanceOutput.write('\t');
196                                                            }
197                                                            continue;
198                                                    }
199                                            } else if (featureVector.get(i) instanceof SplitFeature && ((SplitFeature)featureVector.get(i)).getParentFeature() instanceof InputColumnFeature) {
200                                                    if (((InputColumnFeature)((SplitFeature)featureVector.get(i)).getParentFeature()).getColumnName().equals("FEATS")) {
201                                                            instanceOutput.write("-1");
202                                                            if (i != featureVector.size()) {
203                                                                    instanceOutput.write('\t');
204                                                            }
205                                                            continue;                                               
206                                                    }
207                                            }
208                                    }
209                                    if (featureVector.get(i) instanceof OutputColumnFeature && !featureVector.get(i).toString().endsWith("DEPREL, Stack[0])")) {
210                                            OutputColumnFeature ocf = (OutputColumnFeature)featureVector.get(i);
211                                            DependencyNode node = null;
212                                            if (ocf.getAddressFunction().getAddressValue().getAddress() instanceof DependencyNode) {
213                                                    node = (DependencyNode)ocf.getAddressFunction().getAddressValue().getAddress();
214                                            }
215                                            if (node != null && node.getHead() != null && node.getHead().isRoot()) {
216                                                    instanceOutput.write("0");
217                                            } else {
218                                                    if (featureValue instanceof SingleFeatureValue) {
219                                                            instanceOutput.write(((SingleFeatureValue)featureValue).getCode()+"");
220                                                    } else if (featureValue instanceof MultipleFeatureValue) {
221                                                            Set<Integer> values = ((MultipleFeatureValue)featureValue).getCodes();
222                                                            int j=0;
223                                                            for (Integer value : values) {
224                                                                    instanceOutput.write(value.toString());
225                                                                    if (j != values.size()-1) {
226                                                                            instanceOutput.write("|");
227                                                                    }
228                                                                    j++;
229                                                            }
230                                                    }
231                                            }
232                                    } else if (featureVector.get(i) instanceof SplitFeature && ((SplitFeature)featureVector.get(i)).getParentFeature() instanceof InputColumnFeature) {
233                                            if (((InputColumnFeature)((SplitFeature)featureVector.get(i)).getParentFeature()).getColumnName().equals("FEATS")) {
234                                                    SplitFeature sf = (SplitFeature)featureVector.get(i);
235                                                    String value = ((SingleFeatureValue)sf.getParentFeature().getFeatureValue()).getSymbol();
236                                                    if (sf.getFeatureValue().isNullValue()) {
237                                                            instanceOutput.write("-1");
238                                                    } else {
239                                                            int code;
240                                                            String items[];
241                                                            try {
242                                                                    items = value.split(sf.getSeparators());
243                                                            } catch (PatternSyntaxException e) {
244                                                                    throw new FeatureException("The split feature '"+featureVector.get(i).toString()+"' could not split the value using the following separators '"+sf.getSeparators()+"'",e);
245                                                            }
246                                                            for (int j = 0; j < items.length; j++) {
247                                                                    code = sf.getSymbolTable().addSymbol(items[j]);
248                                                                    instanceOutput.write(code+"");
249                                                                    if (j != items.length-1) {
250                                                                            instanceOutput.write("|");
251                                                                    }
252                                                            }
253                                                    }
254                                            }
255                                    } else {
256                                            if (featureValue instanceof SingleFeatureValue) {
257                                                    instanceOutput.write(((SingleFeatureValue)featureValue).getCode()+"");
258                                            } else if (featureValue instanceof MultipleFeatureValue) {
259                                                    Set<Integer> values = ((MultipleFeatureValue)featureValue).getCodes();
260                                                    int j=0;
261                                                    for (Integer value : values) {
262                                                            instanceOutput.write(value.toString());
263                                                            if (j != values.size()-1) {
264                                                                    instanceOutput.write("|");
265                                                            }
266                                                            j++;
267                                                    }
268                                            }
269                                    }
270                                    
271                                    if (i != featureVector.size()) {
272                                            instanceOutput.write('\t');
273                                    }
274                            }
275    
276                            instanceOutput.write('\n');
277                            increaseNumberOfInstances();
278                    } catch (IOException e) {
279                            throw new LibsvmException("The LIBSVM learner cannot write to the instance file. ", e);
280                    }
281    
282            }
283    
284            /* (non-Javadoc)
285             * @see org.maltparser.ml.LearningMethod#finalizeSentence(org.maltparser.core.sentence.Sentence, org.maltparser.core.graph.DependencyGraph)
286             */
287            public void finalizeSentence(DependencyStructure dependencyGraph) throws MaltChainedException {
288    //              sentenceCount++;
289            }
290            
291            /* (non-Javadoc)
292             * @see org.maltparser.ml.LearningMethod#noMoreInstances()
293             */
294            public void noMoreInstances() throws MaltChainedException {
295                    closeInstanceWriter();
296            }
297    
298    
299            /* (non-Javadoc)
300             * @see org.maltparser.ml.LearningMethod#train(org.maltparser.parser.guide.feature.FeatureVector)
301             */
302            public void train(FeatureVector featureVector) throws MaltChainedException {
303                    if (featureVector == null) {
304                            throw new LibsvmException("The feature vector cannot be found. ");
305                    } else if (owner == null) {
306                            throw new LibsvmException("The parent guide model cannot be found. ");
307                    }
308                    
309                    if (pathExternalSVMTrain != null) {
310                            trainExternal(featureVector);
311                            return;
312                    }
313                    svm_problem prob = new svm_problem();
314                    File modelFile = getFile(".mod");
315                    try {           
316                            
317                            ArrayList<Integer> cardinalities = new ArrayList<Integer>();
318                            for (FeatureFunction feature : featureVector) {
319                                    cardinalities.add(feature.getFeatureValue().getCardinality());
320                            }
321    
322                            readProblemMaltSVMFormat(getInstanceInputStreamReader(".ins"), prob, cardinalities, svmParam);
323                            
324                            String errorMessage = svm.svm_check_parameter(prob, svmParam);
325                            if(errorMessage != null) {
326                                    throw new LibsvmException(errorMessage);
327                            }
328                            getConfiguration().getConfigLogger().info("Creating LIBSVM model "+modelFile.getName()+"\n");
329                            PrintStream out = System.out;
330                            PrintStream err = System.err;
331                            System.setOut(NoPrintStream.NO_PRINTSTREAM);
332                            //System.setErr(new PrintStream(new LoggingOutputStream(owner.getConfiguration().getConfigLogger(), owner.getConfiguration().getConfigLogger().getLevel()), true));
333                            System.setErr(NoPrintStream.NO_PRINTSTREAM);
334                            
335                            svm.svm_save_model(modelFile.getAbsolutePath(), svm.svm_train(prob, svmParam));
336                            
337                            System.setOut(err);
338                            System.setOut(out); 
339                            if (!saveInstanceFiles) {
340                                    getFile(".ins").delete();
341                            }
342                    } catch (OutOfMemoryError e) {
343                            throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
344                    } catch (IllegalArgumentException e) {
345                            throw new LibsvmException("The LIBSVM learner was not able to redirect Standard Error stream. ", e);
346                    } catch (SecurityException e) {
347                            throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e);
348                    } catch (IOException e) {
349                            throw new LibsvmException("The LIBSVM learner cannot save the model file '"+modelFile.getAbsolutePath()+"'. ", e);
350                    }
351            }
352    
353            private void trainExternal(FeatureVector featureVector) throws MaltChainedException {
354    
355                    try {           
356                            ArrayList<Integer> cardinalities = new ArrayList<Integer>();
357                            for (FeatureFunction feature : featureVector) {
358                                    cardinalities.add(feature.getFeatureValue().getCardinality());
359                            }
360    
361                            maltSVMFormat2OriginalSVMFormat(getInstanceInputStreamReader(".ins"), getInstanceOutputStreamWriter(".ins.tmp"), cardinalities);
362                            getConfiguration().getConfigLogger().info("Creating LIBSVM model (svm-train) "+getFile(".mod").getName());
363    
364                            ArrayList<String> commands = new ArrayList<String>();
365                            commands.add(pathExternalSVMTrain);
366                            String[] params = getSVMParamStringArray(svmParam);
367                            for (int i=0; i < params.length; i++) {
368                                    commands.add(params[i]);
369                            }
370                            commands.add(getFile(".ins.tmp").getAbsolutePath());
371                            commands.add(getFile(".mod").getAbsolutePath());
372                            String[] arrayCommands =  commands.toArray(new String[commands.size()]);
373                            Process child = Runtime.getRuntime().exec(arrayCommands);
374                    InputStream in = child.getInputStream();
375                    while (in.read() != -1){}
376                if (child.waitFor() != 0) {
377                    owner.getGuide().getConfiguration().getConfigLogger().info(" FAILED ("+child.exitValue()+")");
378                }
379                    in.close();
380                    if (!saveInstanceFiles) {
381                            getFile(".ins").delete();
382                            getFile(".ins.tmp").delete();
383                    }
384                    owner.getGuide().getConfiguration().getConfigLogger().info("\n");
385                    } catch (InterruptedException e) {
386                             throw new LibsvmException("SVM-trainer is interrupted. ", e);
387                    } catch (IllegalArgumentException e) {
388                            throw new LibsvmException("The LIBSVM learner was not able to redirect Standard Error stream. ", e);
389                    } catch (SecurityException e) {
390                            throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e);
391                    } catch (IOException e) {
392                            throw new LibsvmException("The LIBSVM learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
393                    } catch (OutOfMemoryError e) {
394                            throw new LibsvmException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
395                    }
396            }
397            
398            /* (non-Javadoc)
399             * @see org.maltparser.ml.LearningMethod#moveAllInstances(java.io.BufferedWriter, org.maltparser.parser.guide.feature.Feature, java.util.ArrayList)
400             */
401            public void moveAllInstances(LearningMethod method, FeatureFunction divideFeature, ArrayList<Integer> divideFeatureIndexVector) throws MaltChainedException {
402                    if (method == null) {
403                            throw new LibsvmException("The learning method cannot be found. ");
404                    } else if (divideFeature == null) {
405                            throw new LibsvmException("The divide feature cannot be found. ");
406                    } 
407                    try {
408                            BufferedReader in = new BufferedReader(getInstanceInputStreamReader(".ins"));
409                            BufferedWriter out = method.getInstanceWriter();
410                            int l = in.read();
411                            char c;
412                            int j = 0;
413                            while(true) {
414                                    if (l == -1) {
415                                            sb.setLength(0);
416                                            break;
417                                    }
418                                    c = (char)l; 
419                                    l = in.read();
420                                    if (c == '\t') {
421                                            out.write(sb.toString());
422                                            out.write('\t');
423                                            j++;
424                                            sb.setLength(0);
425                                    } else if (c == '\n') {
426                                            out.write(Integer.toString(((SingleFeatureValue)divideFeature.getFeatureValue()).getCode()));
427                                            out.write('\n');
428                                            sb.setLength(0);
429                                            method.increaseNumberOfInstances();
430                                            this.decreaseNumberOfInstances();
431                                            j = 0;
432                                    } else {
433                                            sb.append(c);
434                                    }
435                            }
436                            in.close();
437                            getFile(".ins").delete();
438                    } catch (SecurityException e) {
439                            throw new LibsvmException("The LIBSVM learner cannot remove the instance file. ", e);
440                    } catch (NullPointerException  e) {
441                            throw new LibsvmException("The instance file cannot be found. ", e);
442                    } catch (FileNotFoundException e) {
443                            throw new LibsvmException("The instance file cannot be found. ", e);
444                    } catch (IOException e) {
445                            throw new LibsvmException("The LIBSVM learner read from the instance file. ", e);
446                    }
447            }
448            
449            /* (non-Javadoc)
450             * @see org.maltparser.ml.LearningMethod#predict(org.maltparser.parser.guide.feature.FeatureVector, org.maltparser.ml.KBestList)
451             */
452            public boolean predict(FeatureVector featureVector, SingleDecision decision) throws MaltChainedException {
453                    if (model == null) {
454                            File modelFile = getFile(".mod");
455                            try {
456                                    model = svm.svm_load_model(modelFile.getAbsolutePath());        
457                            } catch (IOException e) {
458                                    throw new LibsvmException("The file '"+modelFile.getAbsolutePath()+"' cannot be loaded. ", e);
459                            }
460                    }
461                    if (xlist == null) {
462                            xlist = new ArrayList<svm_node>(featureVector.size()); 
463                    }
464                    if (model == null) {
465                            throw new LibsvmException("The LIBSVM learner cannot predict the next class, because the learning model cannot be found. ");
466                    } else if (featureVector == null) {
467                            throw new LibsvmException("The LIBSVM learner cannot predict the next class, because the feature vector cannot be found. ");
468                    }
469                    int j = 0;
470                    int offset = 0;
471    
472                    for (FeatureFunction feature : featureVector) {
473                            if (feature instanceof SplitFeature && feature.toString().startsWith("Split(InputColumn(FEATS")) {
474                                    SplitFeature sf = (SplitFeature)feature;
475                                    String value = ((SingleFeatureValue)sf.getParentFeature().getFeatureValue()).getSymbol();
476    
477                                    SymbolTable table = sf.getSymbolTable();
478                                    String items[];
479                                    try {
480                                            items = value.split(sf.getSeparators());
481                                    } catch (PatternSyntaxException e) {
482                                            throw new FeatureException("The split feature '"+feature.toString()+"' could not split the value using the following separators '"+sf.getSeparators()+"'",e);
483                                    }
484                                    for (int k=0; k < items.length; k++) {
485                                            if (!(table.isNullValue(items[k]) && table.getSymbolStringToCode(items[k]) == 0)) {
486                                                    if (j >= xlist.size()) {
487                                                            svm_node x =  new svm_node();
488                                                            x.value = 1.0;
489                                                            xlist.add(j,x);
490                                                    }
491                                                    xlist.get(j++).index = table.addSymbol(items[k]) + offset;
492                                            }
493                                    }
494                            } else {
495                                    FeatureValue featureValue = feature.getFeatureValue();
496                                    if (featureValue instanceof SingleFeatureValue) {
497                                            if (((SingleFeatureValue)featureValue).isKnown()) {
498                                                    if (j >= xlist.size()) {
499                                                            svm_node x =  new svm_node();
500                                                            x.value = 1.0;
501                                                            xlist.add(j,x);
502                                                    }
503                                                    if (feature instanceof OutputColumnFeature && !feature.toString().endsWith("DEPREL, Stack[0])")) {
504                                                            OutputColumnFeature ocf = (OutputColumnFeature)feature;
505                                                            DependencyNode node = null;
506                                                            if (ocf.getAddressFunction().getAddressValue().getAddress() instanceof DependencyNode) {
507                                                                    node = (DependencyNode)ocf.getAddressFunction().getAddressValue().getAddress();
508                                                            }
509                                                            if (node != null && node.getHead() != null && node.getHead().isRoot()) {
510                                                                    xlist.get(j++).index = 0 + offset;
511                                                            } else {
512                                                                    xlist.get(j++).index = ((SingleFeatureValue)featureValue).getCode() + offset;
513                                                            }
514                                                    } else {
515                                                            xlist.get(j++).index = ((SingleFeatureValue)featureValue).getCode() + offset;
516                                                    }
517                                            }
518                                    } else if (featureValue instanceof MultipleFeatureValue) {
519                                            Set<Integer> values = ((MultipleFeatureValue)featureValue).getCodes();
520                                            for (Integer value : values) {
521                                                    if (((MultipleFeatureValue)featureValue).isKnown(value)) {
522                                                            if (j >= xlist.size()) {
523                                                                    svm_node x =  new svm_node();
524                                                                    x.value = 1.0;
525                                                                    xlist.add(j,x);
526                                                            }
527                                                            if (feature instanceof OutputColumnFeature && !feature.toString().endsWith("DEPREL, Stack[0])")) {
528                                                                    OutputColumnFeature ocf = (OutputColumnFeature)feature;
529                                                                    DependencyNode node = null;
530                                                                    if (ocf.getAddressFunction().getAddressValue().getAddress() instanceof DependencyNode) {
531                                                                            node = (DependencyNode)ocf.getAddressFunction().getAddressValue().getAddress();
532                                                                    }
533                                                                    if (node != null && node.getHead() != null && node.getHead().isRoot()) {
534                                                                            xlist.get(j++).index = 0 + offset;
535                                                                    } else {
536                                                                            xlist.get(j++).index = value + offset;
537                                                                    }
538                                                            } else {
539                                                                    xlist.get(j++).index = value + offset;
540                                                            }
541                                                    }
542                                            }
543                                    }
544                            }
545                            offset += feature.getFeatureValue().getCardinality();
546                    }
547                    int transition = (int)svm.svm_predict(model, xlist.subList(0, j).toArray(new svm_node[0]));
548                    if (nivrestandard == true && rootHandling == NivreStandardMalt04.NORMAL && transition == 2) {
549                            transition = 1;
550                    }
551                            
552                    decision.getKBestList().add(transition);
553    
554                    return true;
555            } 
556    
557            /* (non-Javadoc)
558             * @see org.maltparser.ml.LearningMethod#terminate()
559             */
560            public void terminate() throws MaltChainedException { 
561                    closeInstanceWriter();
562                    model = null;
563                    svmParam = null;
564                    xlist = null;
565                    owner = null;
566            }
567    
568            /* (non-Javadoc)
569             * @see org.maltparser.ml.LearningMethod#getInstanceWriter()
570             */
571            public BufferedWriter getInstanceWriter() {
572                    return instanceOutput;
573            }
574            
575            /**
576             * Close the instance writer
577             * 
578             * @throws MaltChainedException
579             */
580            protected void closeInstanceWriter() throws MaltChainedException {
581                    try {
582                            if (instanceOutput != null) {
583                                    instanceOutput.flush();
584                                    instanceOutput.close();
585                                    instanceOutput = null;
586                            }
587                            
588                            /*if (debugTransOut != null) {
589                                    debugTransOut.flush();
590                                    debugTransOut.close();
591                                    debugTransOut = null;
592                            }*/
593                    } catch (IOException e) {
594                            throw new LibsvmException("The LIBSVM learner cannot close the instance file. ", e);
595                    }
596            }
597            
598            /**
599             * Initialize the LIBSVM according to the parameter string
600             * 
601             * @param paramString the parameter string to configure the LIBSVM learner.
602             * @throws MaltChainedException
603             */
604            protected void initSvmParam(String paramString) throws MaltChainedException {
605                    this.paramString = paramString;
606                    svmParam = new svm_parameter();
607                    initParameters(svmParam);
608                    parseParameters(paramString, svmParam);
609            }
610            
611            /**
612             * Initialize the LIBSVM with a coding and a behavior strategy. This strategy parameter is
613             * used for reproduce the behavior of MaltParser 0.4 (C-impl). 
614             * 
615             * @throws MaltChainedException
616             */
617            protected void initSpecialParameters() throws MaltChainedException {
618                    if (getConfiguration().getParsingAlgorithm() instanceof NivreEagerMalt04 || getConfiguration().getParsingAlgorithm() instanceof NivreStandardMalt04) {
619                            nivre = true;
620                            RA_ROOT = "RA"+getConfiguration().getOptionValue("guide", "classitem_separator").toString()+getConfiguration().getOptionValue("graph", "root_label").toString();        
621                            LA_ROOT = "LA"+getConfiguration().getOptionValue("guide", "classitem_separator").toString()+getConfiguration().getOptionValue("graph", "root_label").toString();        
622                            if (getConfiguration().getParsingAlgorithm() instanceof NivreEagerMalt04) {
623                                    rootHandling = ((NivreEagerMalt04)getConfiguration().getParsingAlgorithm()).getRootHandling();
624                            } else if (getConfiguration().getParsingAlgorithm() instanceof NivreStandardMalt04) {
625                                    rootHandling = ((NivreStandardMalt04)getConfiguration().getParsingAlgorithm()).getRootHandling();
626                                    nivrestandard = true;
627                            }
628                    }
629    
630                    saveInstanceFiles = ((Boolean)getConfiguration().getOptionValue("libsvm", "save_instance_files")).booleanValue();
631                    if (!getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().equals("")) {
632                            try {
633                                    if (!new File(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString()).exists()) {
634                                            throw new LibsvmException("The path to the external LIBSVM trainer 'svm-train' is wrong.");
635                                    }
636                                    if (new File(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString()).isDirectory()) {
637                                            throw new LibsvmException("The option --libsvm-libsvm_external points to a directory, the path should point at the 'svm-train' file or the 'svm-train.exe' file");
638                                    }
639                                    if (!(getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().endsWith("svm-train") || getConfiguration().getOptionValue("libsvm", "libsvm_external").toString().endsWith("svm-train.exe"))) {
640                                            throw new LibsvmException("The option --libsvm-libsvm_external does not specify the path to 'svm-train' file or the 'svm-train.exe' file. ");
641                                    }
642                                    pathExternalSVMTrain = getConfiguration().getOptionValue("libsvm", "libsvm_external").toString();
643                            } catch (SecurityException e) {
644                                    throw new LibsvmException("Access denied to the file specified by the option --libsvm-libsvm_external. ", e);
645                            }
646                    }
647            }
648            
649            /**
650             * Returns the parameter string for used for configure LIBSVM
651             * 
652             * @return the parameter string for used for configure LIBSVM
653             */
654            public String getParamString() {
655                    return paramString;
656            }
657            
658            /**
659             * Returns the parent instance model
660             * 
661             * @return the parent instance model
662             */
663            public InstanceModel getOwner() {
664                    return owner;
665            }
666    
667            /**
668             * Sets the parent instance model
669             * 
670             * @param owner a instance model
671             */
672            protected void setOwner(InstanceModel owner) {
673                    this.owner = owner;
674            }
675            
676            /**
677             * Returns the learner mode
678             * 
679             * @return the learner mode
680             */
681            public int getLearnerMode() {
682                    return learnerMode;
683            }
684    
685            /**
686             * Sets the learner mode
687             * 
688             * @param learnerMode the learner mode
689             */
690            public void setLearnerMode(int learnerMode) {
691                    this.learnerMode = learnerMode;
692            }
693            
694            /**
695             * Returns the name of the learning method
696             * 
697             * @return the name of the learning method
698             */
699            public String getLearningMethodName() {
700                    return name;
701            }
702            
703            /**
704             * Returns the current configuration
705             * 
706             * @return the current configuration
707             * @throws MaltChainedException
708             */
709            public DependencyParserConfig getConfiguration() throws MaltChainedException {
710                    return owner.getGuide().getConfiguration();
711            }
712            
713            /**
714             * Returns the number of processed instances
715             * 
716             * @return the number of processed instances
717             */
718            public int getNumberOfInstances() {
719                    return numberOfInstances;
720            }
721    
722            /* (non-Javadoc)
723             * @see org.maltparser.ml.LearningMethod#increaseNumberOfInstances()
724             */
725            public void increaseNumberOfInstances() {
726                    numberOfInstances++;
727                    owner.increaseFrequency();
728            }
729            
730            /* (non-Javadoc)
731             * @see org.maltparser.ml.LearningMethod#decreaseNumberOfInstances()
732             */
733            public void decreaseNumberOfInstances() {
734                    numberOfInstances--;
735                    owner.decreaseFrequency();
736            }
737            
738            /**
739             * Sets the number of instance
740             * 
741             * @param numberOfInstances the number of instance
742             */
743            protected void setNumberOfInstances(int numberOfInstances) {
744                    this.numberOfInstances = 0;
745            }
746    
747            /**
748             * Sets the learning method name
749             * 
750             * @param name the learning method name
751             */
752            protected void setLearningMethodName(String name) {
753                    this.name = name;
754            }
755            
756            /**
757             * Returns the instance output writer. The naming of the file is standardized according to the learning method name, but file suffix can vary. 
758             * 
759             * @param suffix the file suffix of the file name
760             * @return the instance output writer
761             * @throws MaltChainedException
762             */
763            protected OutputStreamWriter getInstanceOutputStreamWriter(String suffix) throws MaltChainedException {
764                    return getConfiguration().getConfigurationDir().getOutputStreamWriter(owner.getModelName()+getLearningMethodName()+suffix);
765            }
766            
767            /**
768             * Returns the instance input reader. The naming of the file is standardized according to the learning method name, but file suffix can vary.
769             * 
770             * @param suffix the file suffix of the file name
771             * @return the instance input reader
772             * @throws MaltChainedException
773             */
774            protected InputStreamReader getInstanceInputStreamReader(String suffix) throws MaltChainedException {
775                    return getConfiguration().getConfigurationDir().getInputStreamReader(owner.getModelName()+getLearningMethodName()+suffix);
776            }
777            
778            /**
779             * Returns a file object. The naming of the file is standardized according to the learning method name, but file suffix can vary.
780             * 
781             * @param suffix the file suffix of the file name
782             * @return Returns a file object
783             * @throws MaltChainedException
784             */
785            protected File getFile(String suffix) throws MaltChainedException {
786                    return getConfiguration().getConfigurationDir().getFile(owner.getModelName()+getLearningMethodName()+suffix);
787            }
788            
789            
790            /**
791             * Reads an instance file into a svm_problem object according to the Malt-SVM format, which is column fixed format (tab-separated).
792             * 
793             * @param isr   the instance stream reader for the instance file
794             * @param prob  a svm_problem object
795             * @param cardinality   a vector containing the number of distinct values for a particular column.
796             * @param param a svm_parameter object
797             * @throws LibsvmException
798             */
799            public void readProblemMaltSVMFormat(InputStreamReader isr, svm_problem prob, ArrayList<Integer> cardinality, svm_parameter param) throws LibsvmException {
800                    try {
801                            BufferedReader fp = new BufferedReader(isr);
802                            int max_index = 0;
803                            if (xlist == null) {
804                                    xlist = new ArrayList<svm_node>(); 
805                            }
806                            prob.l = getNumberOfInstances();
807                            prob.x = new svm_node[prob.l][];
808                            prob.y = new double[prob.l];
809                            int i = 0;
810                            Pattern tabPattern = Pattern.compile("\t");
811                            Pattern pipePattern = Pattern.compile("\\|");
812                            while(true) {
813                                    String line = fp.readLine();
814                                    if(line == null) break;
815                                    String[] columns = tabPattern.split(line);
816                                    if (columns.length == 0) {
817                                            continue;
818                                    }
819                                    
820                                    int offset = 0; 
821                                    int j = 0;
822                                    try {
823                                            prob.y[i] = (double)Integer.parseInt(columns[j]);
824                                            int p = 0;
825                                            for(j = 1; j < columns.length; j++) {
826                                                    String[] items = pipePattern.split(columns[j]); 
827                                                    for (int k = 0; k < items.length; k++) {
828                                                            try {
829                                                                    if (Integer.parseInt(items[k]) != -1) {
830                                                                            xlist.add(p, new svm_node());
831                                                                            xlist.get(p).value = 1.0;
832                                                                            xlist.get(p).index = Integer.parseInt(items[k])+offset;
833                                                                            p++;
834                                                                    }
835                                                            } catch (NumberFormatException e) {
836                                                                    throw new LibsvmException("The instance file contain a non-integer value '"+items[k]+"'", e);
837                                                            }
838                                                    }
839                                                    offset += cardinality.get(j-1);
840                                            }
841                                            prob.x[i] = xlist.subList(0, p).toArray(new svm_node[0]);
842                                            if(columns.length>0) {
843                                                    max_index = Math.max(max_index, xlist.get(p-1).index);
844                                            }
845                                            i++;
846                                            xlist.clear();
847                                    } catch (ArrayIndexOutOfBoundsException e) {
848                                            throw new LibsvmException("Cannot read from the instance file. ", e);
849                                    }
850                            }
851                            fp.close();     
852                            if (param.gamma == 0) {
853                                    param.gamma = 1.0/max_index;
854                            }
855                            xlist = null;
856                    } catch (IOException e) {
857                            throw new LibsvmException("Cannot read from the instance file. ", e);
858                    }
859            }
860            
861            
862            /**
863             * Assign a default value to all svm parameters
864             * 
865             * @param param a svm_parameter object
866             */
867            public void initParameters(svm_parameter param) throws LibsvmException {
868                    if (param == null) {
869                            throw new LibsvmException("Svm-parameters cannot be found. ");
870                    }
871                    param.svm_type = svm_parameter.C_SVC;
872                    param.kernel_type = svm_parameter.POLY;
873                    param.degree = 2.0; // libsvm 2.8
874                    param.gamma = 0.2;      // 1/k
875                    param.coef0 = 0;
876                    param.nu = 0.5;
877                    param.cache_size = 40; 
878                    param.C = 0.5; 
879                    param.eps = 1.0; 
880                    param.p = 0.1;
881                    param.shrinking = 1;
882                    param.probability = 0;
883                    param.nr_weight = 0;
884                    param.weight_label = new int[0];
885                    param.weight = new double[0];
886            }
887            
888            /**
889             * Returns a string containing all svm-parameters of interest
890             * 
891             * @param param a svm_parameter object
892             * @return a string containing all svm-parameters of interest
893             */
894            public String toStringParameters(svm_parameter param)  {
895                    if (param == null) {
896                            throw new IllegalArgumentException("Svm-parameters cannot be found. ");
897                    }
898                    StringBuffer sb = new StringBuffer();
899                    
900                    String[] svmtypes = {"C_SVC", "NU_SVC","ONE_CLASS","EPSILON_SVR","NU_SVR"};
901                    String[] kerneltypes = {"LINEAR", "POLY","RBF","SIGMOID","PRECOMPUTED"};
902                    DecimalFormat dform = new DecimalFormat("#0.0#"); 
903                    DecimalFormatSymbols sym = new DecimalFormatSymbols();
904                    sym.setDecimalSeparator('.');
905                    dform.setDecimalFormatSymbols(sym);
906                    sb.append("LIBSVM SETTINGS\n");
907                    sb.append("  SVM type      : " + svmtypes[param.svm_type] + " (" + param.svm_type + ")\n");
908                    sb.append("  Kernel        : " + kerneltypes[param.kernel_type] + " (" + param.kernel_type + ")\n");
909                    if (param.kernel_type == svm_parameter.POLY) {
910                            sb.append("  Degree        : " + param.degree + "\n");
911                    }
912                    if (param.kernel_type == svm_parameter.POLY || param.kernel_type == svm_parameter.RBF || param.kernel_type == svm_parameter.SIGMOID) {
913                            sb.append("  Gamma         : " + dform.format(param.gamma) + "\n");
914                            if (param.kernel_type == svm_parameter.POLY || param.kernel_type == svm_parameter.SIGMOID) {
915                                    sb.append("  Coef0         : " + dform.format(param.coef0) + "\n");
916                            }
917                    }
918                    if (param.svm_type == svm_parameter.NU_SVC || param.svm_type == svm_parameter.NU_SVR || param.svm_type == svm_parameter.ONE_CLASS) {
919                            sb.append("  Nu            : " + dform.format(param.nu) + "\n");
920                    }
921                    sb.append("  Cache Size    : " + dform.format(param.cache_size) + " MB\n");
922                    if (param.svm_type == svm_parameter.C_SVC || param.svm_type == svm_parameter.NU_SVR || param.svm_type == svm_parameter.EPSILON_SVR) {
923                            sb.append("  C             : " + dform.format(param.C) + "\n");
924                    }
925                    sb.append("  Eps           : " + dform.format(param.eps) + "\n");
926                    if (param.svm_type == svm_parameter.EPSILON_SVR) {
927                            sb.append("  P             : " + dform.format(param.p) + "\n");
928                    }
929                    sb.append("  Shrinking     : " + param.shrinking + "\n");
930                    sb.append("  Probability   : " + param.probability + "\n");
931                    if (param.svm_type == svm_parameter.C_SVC) {
932                            sb.append("  #Weight       : " + param.nr_weight + "\n");
933                            if (param.nr_weight > 0) {
934                                    sb.append("  Weight labels : ");
935                                    for (int i = 0; i < param.nr_weight; i++) {
936                                            sb.append(param.weight_label[i]);
937                                            if (i != param.nr_weight-1) {
938                                                    sb.append(", ");
939                                            }
940                                    }
941                                    sb.append("\n");
942                                    for (int i = 0; i < param.nr_weight; i++) {
943                                            sb.append(dform.format(param.weight));
944                                            if (i != param.nr_weight-1) {
945                                                    sb.append(", ");
946                                            }
947                                    }
948                                    sb.append("\n");
949                            }
950                    }
951                    return sb.toString();
952            }
953            
954            public String[] getSVMParamStringArray(svm_parameter param) {
955                    ArrayList<String> params = new ArrayList<String>();
956    
957                    if (param.svm_type != 0) {
958                            params.add("-s"); params.add(new Integer(param.svm_type).toString());
959                    }
960                    if (param.kernel_type != 2) {
961                            params.add("-t"); params.add(new Integer(param.kernel_type).toString());
962                    }
963                    if (param.degree != 3) {
964                            params.add("-d"); params.add(new Double(param.degree).toString());
965                    }
966                    params.add("-g"); params.add(new Double(param.gamma).toString());
967                    if (param.coef0 != 0) {
968                            params.add("-r"); params.add(new Double(param.coef0).toString());
969                    }
970                    if (param.nu != 0.5) {
971                            params.add("-n"); params.add(new Double(param.nu).toString());
972                    }
973                    if (param.cache_size != 100) {
974                            params.add("-m"); params.add(new Double(param.cache_size).toString());
975                    }
976                    if (param.C != 1) {
977                            params.add("-c"); params.add(new Double(param.C).toString());
978                    }
979                    if (param.eps != 0.001) {
980                            params.add("-e"); params.add(new Double(param.eps).toString());
981                    }
982                    if (param.p != 0.1) {
983                            params.add("-p"); params.add(new Double(param.p).toString());
984                    }
985                    if (param.shrinking != 1) {
986                            params.add("-h"); params.add(new Integer(param.shrinking).toString());
987                    }
988                    if (param.probability != 0) {
989                            params.add("-b"); params.add(new Integer(param.probability).toString());
990                    }
991    
992                    return params.toArray(new String[params.size()]);
993            }
994            
995            /**
996             * Parses the parameter string. The parameter string must contain parameter and value pairs, which are seperated by a blank 
997             * or a underscore. The parameter begins with a character '-' followed by a one-character flag and the value must comply with
998             * the parameters data type. Some examples:
999             * 
1000             * -s 0 -t 1 -d 2 -g 0.4 -e 0.1
1001             * -s_0_-t_1_-d_2_-g_0.4_-e_0.1
1002             * 
1003             * @param paramstring   the parameter string 
1004             * @param param a svm_parameter object
1005             * @throws LibsvmException
1006             */
1007            public void parseParameters(String paramstring, svm_parameter param) throws LibsvmException {
1008                    if (param == null) {
1009                            throw new LibsvmException("Svm-parameters cannot be found. ");
1010                    }
1011                    if (paramstring == null) {
1012                            return;
1013                    }
1014                    String[] argv;
1015                    try {
1016                            argv = paramstring.split("[_\\p{Blank}]");
1017                    } catch (PatternSyntaxException e) {
1018                            throw new LibsvmException("Could not split the svm-parameter string '"+paramstring+"'. ", e);
1019                    }
1020                    for (int i=0; i < argv.length-1; i++) {
1021                            if(argv[i].charAt(0) != '-') {
1022                                    throw new LibsvmException("The argument flag should start with the following character '-', not with "+argv[i].charAt(0));
1023                            }
1024                            if(++i>=argv.length) {
1025                                    throw new LibsvmException("The last argument does not have any value. ");
1026                            }
1027                            try {
1028                                    switch(argv[i-1].charAt(1)) {
1029                                    case 's':
1030                                            param.svm_type = Integer.parseInt(argv[i]);
1031                                            break;
1032                                    case 't':
1033                                            param.kernel_type = Integer.parseInt(argv[i]);
1034                                            break;
1035                                    case 'd':
1036                                            param.degree = Double.valueOf(argv[i]).doubleValue(); //libsvm2.8
1037                                            break;
1038                                    case 'g':
1039                                            param.gamma = Double.valueOf(argv[i]).doubleValue();
1040                                            break;
1041                                    case 'r':
1042                                            param.coef0 = Double.valueOf(argv[i]).doubleValue();
1043                                            break;
1044                                    case 'n':
1045                                            param.nu = Double.valueOf(argv[i]).doubleValue();
1046                                            break;
1047                                    case 'm':
1048                                            param.cache_size = Double.valueOf(argv[i]).doubleValue();
1049                                            break;
1050                                    case 'c':
1051                                            param.C = Double.valueOf(argv[i]).doubleValue();
1052                                            break;
1053                                    case 'e':
1054                                            param.eps = Double.valueOf(argv[i]).doubleValue();
1055                                            break;
1056                                    case 'p':
1057                                            param.p = Double.valueOf(argv[i]).doubleValue();
1058                                            break;
1059                                    case 'h':
1060                                            param.shrinking = Integer.parseInt(argv[i]);
1061                                            break;
1062                                case 'b':
1063                                            param.probability = Integer.parseInt(argv[i]);
1064                                            break;
1065                                    case 'w':
1066                                            ++param.nr_weight;
1067                                            {
1068                                                    int[] old = param.weight_label;
1069                                                    param.weight_label = new int[param.nr_weight];
1070                                                    System.arraycopy(old,0,param.weight_label,0,param.nr_weight-1);
1071                                            }
1072            
1073                                            {
1074                                                    double[] old = param.weight;
1075                                                    param.weight = new double[param.nr_weight];
1076                                                    System.arraycopy(old,0,param.weight,0,param.nr_weight-1);
1077                                            }
1078            
1079                                            param.weight_label[param.nr_weight-1] = Integer.parseInt(argv[i].substring(2));
1080                                            param.weight[param.nr_weight-1] = Double.valueOf(argv[i]).doubleValue();
1081                                            break;
1082                                    case 'Y':
1083                                    case 'V':
1084                                    case 'S':
1085                                    case 'F':
1086                                    case 'T':
1087                                    case 'M':
1088                                    case 'N':
1089                                            break;
1090                                    default:
1091                                            throw new LibsvmException("Unknown svm parameter: '"+argv[i-1]+"' with value '"+argv[i]+"'. ");         
1092                                    }
1093                            } catch (ArrayIndexOutOfBoundsException e) {
1094                                    throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);
1095                            } catch (NumberFormatException e) {
1096                                    throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);      
1097                            } catch (NullPointerException e) {
1098                                    throw new LibsvmException("The svm-parameter '"+argv[i-1]+"' could not convert the string value '"+argv[i]+"' into a correct numeric value. ", e);      
1099                            }
1100                    }
1101            }
1102            
1103            /**
1104             * Converts the instance file (Malt's own SVM format) into the LIBSVM (SVMLight) format. The input instance file is removed (replaced)
1105             * by the instance file in the LIBSVM (SVMLight) format. If a column contains -1, the value will be removed in destination file. 
1106             * 
1107             * @param isr the input stream reader for the source instance file
1108             * @param osw   the output stream writer for the destination instance file
1109             * @param cardinality a vector containing the number of distinct values for a particular column
1110             * @throws LibsvmException
1111             */
1112            public static void maltSVMFormat2OriginalSVMFormat(InputStreamReader isr, OutputStreamWriter osw, ArrayList<Integer> cardinality) throws LibsvmException {
1113                    try {
1114                            final BufferedReader in = new BufferedReader(isr);
1115                            final BufferedWriter out = new BufferedWriter(osw);
1116                            int c;
1117                            int j = 0;
1118                            int offset = 0; 
1119                            int code = 0;
1120                            while(true) {
1121                                    c = in.read();
1122                                    if (c == -1) {
1123                                            break;
1124                                    }
1125                                    
1126                                    if (c == '\t' || c == '|') {
1127                                            if (j == 0) {
1128                                                    out.write(Integer.toString(code));
1129                                                    j++;
1130                                            } else {
1131                                                    if (code != -1) {
1132                                                            out.write(' ');
1133                                                            out.write(Integer.toString(code+offset));
1134                                                            out.write(":1");
1135                                                    }
1136                                                    if (c == '\t') {
1137                                                            offset += cardinality.get(j-1);
1138                                                            j++;
1139                                                    }
1140                                            }
1141                                            code = 0;
1142                                    } else if (c == '\n') {
1143                                            j = 0;
1144                                            offset = 0;
1145                                            out.write('\n');
1146                                            code = 0;
1147                                    } else if (c == '-') {
1148                                            code = -1;
1149                                    } else if (code != -1) {
1150                                            if (c > 47 && c < 58) {
1151                                                    code = code * 10 + (c-48);
1152                                            } else {
1153                                                    throw new LibsvmException("The instance file contain a non-integer value, when converting the Malt SVM format into LIBSVM format.");
1154                                            }
1155                                    }       
1156                            }       
1157                            in.close();     
1158                            out.close();
1159                    } catch (IOException e) {
1160                            throw new LibsvmException("Cannot read from the instance file, when converting the Malt SVM format into LIBSVM format. ", e);
1161                    }
1162            }
1163            
1164            /**
1165             * Returns the double (floating-point) value of the string s
1166             * 
1167             * @param s string value that should be converted into a double.
1168             * @return the double (floating-point) value of the string s
1169             * @throws LibsvmException
1170             */
1171            public static double atof(String s) throws LibsvmException {
1172                    try {
1173                            return Double.valueOf(s).doubleValue();
1174                    } catch (NumberFormatException e) {
1175                            throw new LibsvmException("Could not convert the string value '"+s+"' into a correct numeric value. ", e);      
1176                    } catch (NullPointerException e) {
1177                            throw new LibsvmException("Could not convert the string value '"+s+"' into a correct numeric value. ", e);      
1178                    }
1179            }
1180    
1181            /**
1182             * Returns the integer value of the string s
1183             * 
1184             * @param s string value that should be converted into an integer
1185             * @return the integer value of the string s
1186             * @throws LibsvmException
1187             */
1188            public static int atoi(String s) throws LibsvmException {
1189                    try {
1190                            return Integer.parseInt(s);
1191                    } catch (NumberFormatException e) {
1192                            throw new LibsvmException("Could not convert the string value '"+s+"' into a correct integer value. ", e);      
1193                    } catch (NullPointerException e) {
1194                            throw new LibsvmException("Could not convert the string value '"+s+"' into a correct integer value. ", e);      
1195                    }
1196            }
1197            
1198            /**
1199             * Reads an instance file into a svm_problem object according to the LIBSVM (SVMLight) format.
1200             * 
1201             * @param isr the input stream reader for the source instance file
1202             * @param prob  a svm_problem object
1203             * @param param a svm_parameter object
1204             * @throws LibsvmException
1205             */
1206            public static void readProblemOriginalSVMFormat(InputStreamReader isr, svm_problem prob, svm_parameter param) throws LibsvmException {
1207                    BufferedReader fp = new BufferedReader(isr);
1208    
1209                    Vector<String> vy = new Vector<String>();
1210                    Vector<svm_node[]> vx = new Vector<svm_node[]>();
1211                    int max_index = 0;
1212    
1213                    while(true) {
1214                            String line;
1215                            try {
1216                                    line = fp.readLine();
1217                            } catch (IOException e) {
1218                                    throw new LibsvmException("", e);
1219                            }
1220                            if(line == null) break;
1221    
1222                            StringTokenizer st = new StringTokenizer(line," \t\n\r\f:");
1223    
1224                            vy.addElement(st.nextToken());
1225                            int m = st.countTokens()/2;
1226                            svm_node[] x = new svm_node[m];
1227                            for(int j=0;j<m;j++) {
1228                                    x[j] = new svm_node();
1229                                    x[j].index = atoi(st.nextToken());
1230                                    x[j].value = atof(st.nextToken());
1231                            }
1232                            if(m>0) max_index = Math.max(max_index, x[m-1].index);
1233                            vx.addElement(x);
1234                    }
1235    
1236                    prob.l = vy.size();
1237                    prob.x = new svm_node[prob.l][];
1238                    for(int i=0;i<prob.l;i++) {
1239                            prob.x[i] = (svm_node[])vx.elementAt(i);
1240                    }
1241                    prob.y = new double[prob.l];
1242                    for(int i=0;i<prob.l;i++) {
1243                            prob.y[i] = atof((String)vy.elementAt(i));
1244                    }
1245                    if(param.gamma == 0.0) {
1246                            param.gamma = 1.0/max_index;
1247                    }
1248                    
1249                    try {
1250                            fp.close();
1251                    } catch (IOException e) {
1252                            throw new LibsvmException("The instance file cannot be closed. ", e);
1253                    }
1254            }
1255            
1256            protected void finalize() throws Throwable {
1257                    try {
1258                            closeInstanceWriter();
1259                    } finally {
1260                            super.finalize();
1261                    }
1262            }
1263            /* (non-Javadoc)
1264             * @see java.lang.Object#toString()
1265             */
1266            public String toString() {
1267                    StringBuffer sb = new StringBuffer();
1268                    sb.append("\nLIBSVM INTERFACE\n");
1269                    sb.append("  LIBSVM version: "+LIBSVM_VERSION+"\n");
1270                    sb.append("  SVM-param string: "+paramString+"\n");
1271                    sb.append("  Coding and behavior strategy: MaltParser 0.4\n");
1272                    sb.append(toStringParameters(svmParam));
1273                    return sb.toString();
1274            }
1275    }