001 package org.maltparser.ml.lib;
002
003 import java.io.BufferedOutputStream;
004 import java.io.BufferedReader;
005 import java.io.FileOutputStream;
006 import java.io.IOException;
007 import java.io.InputStream;
008 import java.io.InputStreamReader;
009 import java.io.ObjectInputStream;
010 import java.io.ObjectOutputStream;
011 import java.io.PrintStream;
012 import java.util.LinkedHashMap;
013
014 import liblinear.FeatureNode;
015 import liblinear.Linear;
016 import liblinear.Model;
017 import liblinear.Parameter;
018 import liblinear.Problem;
019 import liblinear.SolverType;
020
021 import org.maltparser.core.exception.MaltChainedException;
022 import org.maltparser.core.feature.FeatureVector;
023 import org.maltparser.core.helper.NoPrintStream;
024 import org.maltparser.parser.guide.instance.InstanceModel;
025
026 public class LibLinear extends Lib {
027
028 public LibLinear(InstanceModel owner, Integer learnerMode) throws MaltChainedException {
029 super(owner, learnerMode, "liblinear");
030 if (learnerMode == CLASSIFY) {
031 try {
032 ObjectInputStream input = new ObjectInputStream(getInputStreamFromConfigFileEntry(".moo"));
033 try {
034 model = (MaltLibModel)input.readObject();
035 } finally {
036 input.close();
037 }
038 } catch (ClassNotFoundException e) {
039 throw new LibException("Couldn't load the liblinear model", e);
040 } catch (Exception e) {
041 throw new LibException("Couldn't load the liblinear model", e);
042 }
043 }
044
045 }
046
047 protected void trainInternal(FeatureVector featureVector) throws MaltChainedException {
048 try {
049 if (configLogger.isInfoEnabled()) {
050 configLogger.info("Creating Liblinear model "+getFile(".moo").getName()+"\n");
051 }
052 Problem problem = readProblem(getInstanceInputStreamReader(".ins"));
053 final PrintStream out = System.out;
054 final PrintStream err = System.err;
055 System.setOut(NoPrintStream.NO_PRINTSTREAM);
056 System.setErr(NoPrintStream.NO_PRINTSTREAM);
057 Parameter parameter = getLiblinearParameters();
058 Model model = Linear.train(problem, parameter);
059 System.setOut(err);
060 System.setOut(out);
061 MaltLiblinearModel xmodel = new MaltLiblinearModel(model, parameter.getSolverType());
062 ObjectOutputStream output = new ObjectOutputStream (new BufferedOutputStream(new FileOutputStream(getFile(".moo").getAbsolutePath())));
063 try{
064 output.writeObject(xmodel);
065 } finally {
066 output.close();
067 }
068 if (!saveInstanceFiles) {
069 getFile(".ins").delete();
070 }
071 } catch (OutOfMemoryError e) {
072 throw new LibException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
073 } catch (IllegalArgumentException e) {
074 throw new LibException("The Liblinear learner was not able to redirect Standard Error stream. ", e);
075 } catch (SecurityException e) {
076 throw new LibException("The Liblinear learner cannot remove the instance file. ", e);
077 } catch (IOException e) {
078 throw new LibException("The Liblinear learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
079 }
080 }
081
082 protected void trainExternal(FeatureVector featureVector) throws MaltChainedException {
083 try {
084
085 if (configLogger.isInfoEnabled()) {
086 owner.getGuide().getConfiguration().getConfigLogger().info("Creating liblinear model (external) "+getFile(".mod").getName());
087 }
088 binariesInstances2SVMFileFormat(getInstanceInputStreamReader(".ins"), getInstanceOutputStreamWriter(".ins.tmp"));
089 final String[] params = getLibParamStringArray();
090 String[] arrayCommands = new String[params.length+3];
091 int i = 0;
092 arrayCommands[i++] = pathExternalTrain;
093 for (; i <= params.length; i++) {
094 arrayCommands[i] = params[i-1];
095 }
096 arrayCommands[i++] = getFile(".ins.tmp").getAbsolutePath();
097 arrayCommands[i++] = getFile(".mod").getAbsolutePath();
098
099 if (verbosity == Verbostity.ALL) {
100 owner.getGuide().getConfiguration().getConfigLogger().info('\n');
101 }
102 final Process child = Runtime.getRuntime().exec(arrayCommands);
103 final InputStream in = child.getInputStream();
104 final InputStream err = child.getErrorStream();
105 int c;
106 while ((c = in.read()) != -1){
107 if (verbosity == Verbostity.ALL) {
108 owner.getGuide().getConfiguration().getConfigLogger().info((char)c);
109 }
110 }
111 while ((c = err.read()) != -1){
112 if (verbosity == Verbostity.ALL || verbosity == Verbostity.ERROR) {
113 owner.getGuide().getConfiguration().getConfigLogger().info((char)c);
114 }
115 }
116 if (child.waitFor() != 0) {
117 owner.getGuide().getConfiguration().getConfigLogger().info(" FAILED ("+child.exitValue()+")");
118 }
119 in.close();
120 err.close();
121 if (configLogger.isInfoEnabled()) {
122 configLogger.info("\nSaving Liblinear model "+getFile(".moo").getName()+"\n");
123 }
124 MaltLiblinearModel xmodel = new MaltLiblinearModel(getFile(".mod"));
125 ObjectOutputStream output = new ObjectOutputStream (new BufferedOutputStream(new FileOutputStream(getFile(".moo").getAbsolutePath())));
126 try{
127 output.writeObject(xmodel);
128 } finally {
129 output.close();
130 }
131 if (!saveInstanceFiles) {
132 getFile(".ins").delete();
133 getFile(".mod").delete();
134 getFile(".ins.tmp").delete();
135 }
136 if (configLogger.isInfoEnabled()) {
137 configLogger.info('\n');
138 }
139 } catch (InterruptedException e) {
140 throw new LibException("Learner is interrupted. ", e);
141 } catch (IllegalArgumentException e) {
142 throw new LibException("The learner was not able to redirect Standard Error stream. ", e);
143 } catch (SecurityException e) {
144 throw new LibException("The learner cannot remove the instance file. ", e);
145 } catch (IOException e) {
146 throw new LibException("The learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
147 } catch (OutOfMemoryError e) {
148 throw new LibException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
149 }
150 }
151
152 public void terminate() throws MaltChainedException {
153 super.terminate();
154 }
155
156 public void initLibOptions() {
157 libOptions = new LinkedHashMap<String, String>();
158 libOptions.put("s", "4"); // type = SolverType.L2LOSS_SVM_DUAL (default)
159 libOptions.put("c", "0.1"); // cost = 1 (default)
160 libOptions.put("e", "0.1"); // epsilon = 0.1 (default)
161 libOptions.put("B", "-1"); // bias = -1 (default)
162 }
163
164 public void initAllowedLibOptionFlags() {
165 allowedLibOptionFlags = "sceB";
166 }
167
168 private Problem readProblem(InputStreamReader isr) throws MaltChainedException {
169 Problem problem = new Problem();
170 final FeatureList featureList = new FeatureList();
171
172 try {
173 final BufferedReader fp = new BufferedReader(isr);
174
175 problem.bias = -1;
176 problem.l = getNumberOfInstances();
177 problem.x = new FeatureNode[problem.l][];
178 problem.y = new int[problem.l];
179 int i = 0;
180
181 while(true) {
182 String line = fp.readLine();
183 if(line == null) break;
184 int y = binariesInstance(line, featureList);
185 if (y == -1) {
186 continue;
187 }
188 try {
189 problem.y[i] = y;
190 problem.x[i] = new FeatureNode[featureList.size()];
191 int p = 0;
192 for (int k=0; k < featureList.size(); k++) {
193 MaltFeatureNode x = featureList.get(k);
194 problem.x[i][p++] = new FeatureNode(x.getIndex(), x.getValue());
195 }
196 i++;
197 } catch (ArrayIndexOutOfBoundsException e) {
198 throw new LibException("Couldn't read liblinear problem from the instance file. ", e);
199 }
200
201 }
202 fp.close();
203 problem.n = featureMap.size();
204 } catch (IOException e) {
205 throw new LibException("Cannot read from the instance file. ", e);
206 }
207 return problem;
208 }
209
210 private Parameter getLiblinearParameters() throws MaltChainedException {
211 Parameter param = new Parameter(SolverType.MCSVM_CS, 0.1, 0.1);
212 String type = libOptions.get("s");
213
214 if (type.equals("0")) {
215 param.setSolverType(SolverType.L2R_LR);
216 } else if (type.equals("1")) {
217 param.setSolverType(SolverType.L2R_L2LOSS_SVC_DUAL);
218 } else if (type.equals("2")) {
219 param.setSolverType(SolverType.L2R_L2LOSS_SVC);
220 } else if (type.equals("3")) {
221 param.setSolverType(SolverType.L2R_L1LOSS_SVC_DUAL);
222 } else if (type.equals("4")) {
223 param.setSolverType(SolverType.MCSVM_CS);
224 } else if (type.equals("5")) {
225 param.setSolverType(SolverType.L1R_L2LOSS_SVC);
226 } else if (type.equals("6")) {
227 param.setSolverType(SolverType.L1R_LR);
228 } else if (type.equals("7")) {
229 param.setSolverType(SolverType.L2R_LR_DUAL);
230 } else {
231 throw new LibException("The liblinear type (-s) is not an integer value between 0 and 4. ");
232 }
233 try {
234 param.setC(Double.valueOf(libOptions.get("c")).doubleValue());
235 } catch (NumberFormatException e) {
236 throw new LibException("The liblinear cost (-c) value is not numerical value. ", e);
237 }
238 try {
239 param.setEps(Double.valueOf(libOptions.get("e")).doubleValue());
240 } catch (NumberFormatException e) {
241 throw new LibException("The liblinear epsilon (-e) value is not numerical value. ", e);
242 }
243 return param;
244 }
245 }