001 package org.maltparser.ml.lib;
002
003 import java.io.BufferedOutputStream;
004 import java.io.BufferedReader;
005 import java.io.FileOutputStream;
006 import java.io.IOException;
007 import java.io.InputStream;
008 import java.io.InputStreamReader;
009 import java.io.ObjectInputStream;
010 import java.io.ObjectOutputStream;
011 import java.io.PrintStream;
012 import java.util.LinkedHashMap;
013
014 import liblinear.FeatureNode;
015 import liblinear.Linear;
016 import liblinear.Model;
017 import liblinear.Parameter;
018 import liblinear.Problem;
019 import liblinear.SolverType;
020
021 import org.maltparser.core.exception.MaltChainedException;
022 import org.maltparser.core.feature.FeatureVector;
023 import org.maltparser.core.helper.NoPrintStream;
024 import org.maltparser.core.helper.Util;
025 import org.maltparser.parser.guide.instance.InstanceModel;
026
027 public class LibLinear extends Lib {
028
029 public LibLinear(InstanceModel owner, Integer learnerMode) throws MaltChainedException {
030 super(owner, learnerMode, "liblinear");
031 if (learnerMode == CLASSIFY) {
032 try {
033 ObjectInputStream input = new ObjectInputStream(getInputStreamFromConfigFileEntry(".moo"));
034 try {
035 model = (MaltLibModel)input.readObject();
036 } finally {
037 input.close();
038 }
039 } catch (ClassNotFoundException e) {
040 throw new LibException("Couldn't load the liblinear model", e);
041 } catch (Exception e) {
042 throw new LibException("Couldn't load the liblinear model", e);
043 }
044 }
045
046 }
047
048 protected void trainInternal(FeatureVector featureVector) throws MaltChainedException {
049 try {
050 if (configLogger.isInfoEnabled()) {
051 configLogger.info("Creating Liblinear model "+getFile(".moo").getName()+"\n");
052 }
053 Problem problem = readProblem(getInstanceInputStreamReader(".ins"));
054 final PrintStream out = System.out;
055 final PrintStream err = System.err;
056 System.setOut(NoPrintStream.NO_PRINTSTREAM);
057 System.setErr(NoPrintStream.NO_PRINTSTREAM);
058 Parameter parameter = getLiblinearParameters();
059 Model model = Linear.train(problem, parameter);
060 System.setOut(err);
061 System.setOut(out);
062 // System.out.println(" model.getNrFeature():" + model.getNrFeature());
063 // System.out.println(" model.getFeatureWeights().length:" + model.getFeatureWeights().length);
064 double[][] wmatrix = convert(model.getFeatureWeights(), model.getNrClass(), model.getNrFeature());
065 MaltLiblinearModel xmodel = new MaltLiblinearModel(model.getLabels(), model.getNrClass(), wmatrix.length, wmatrix, parameter.getSolverType());
066 ObjectOutputStream output = new ObjectOutputStream (new BufferedOutputStream(new FileOutputStream(getFile(".moo").getAbsolutePath())));
067 try{
068 output.writeObject(xmodel);
069 } finally {
070 output.close();
071 }
072 if (!saveInstanceFiles) {
073 getFile(".ins").delete();
074 }
075 } catch (OutOfMemoryError e) {
076 throw new LibException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
077 } catch (IllegalArgumentException e) {
078 throw new LibException("The Liblinear learner was not able to redirect Standard Error stream. ", e);
079 } catch (SecurityException e) {
080 throw new LibException("The Liblinear learner cannot remove the instance file. ", e);
081 } catch (IOException e) {
082 throw new LibException("The Liblinear learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
083 }
084 }
085
086 private double[][] convert(double[] w, int nr_class, int nr_feature) {
087 double[][] wmatrix = new double[nr_feature][];
088 boolean reuse = false;
089 int ne = 0;
090 int nr = 0;
091 int no = 0;
092 int n = 0;
093
094 Long[] reverseMap = featureMap.reverseMap();
095 for (int i = 0; i < nr_feature; i++) {
096 reuse = false;
097 int k = nr_class;
098 for (int t = i * nr_class; (t + (k - 1)) >= t; k--) {
099 if (w[t + k - 1] != 0.0) {
100 break;
101 }
102 }
103 double[] copy = new double[k];
104 System.arraycopy(w, i * nr_class, copy, 0,k);
105 if (eliminate(copy)) {
106 ne++;
107 featureMap.removeIndex(reverseMap[i + 1]);
108 featureMap.decrementfeatureCounter();
109 reverseMap[i + 1] = null;
110 wmatrix[i] = null;
111 } else {
112 featureMap.setIndex(reverseMap[i + 1], i + 1 - ne);
113 for (int j = 0; j < i; j++) {
114 if (Util.equals(copy, wmatrix[j])) {
115 wmatrix[i] = wmatrix[j];
116 reuse = true;
117 nr++;
118 break;
119 }
120 }
121 if (reuse == false) {
122 no++;
123 wmatrix[i] = copy;
124 }
125 }
126 n++;
127 }
128 double[][] wmatrix_reduced = new double[nr_feature-ne][];
129 for (int i = 0, j = 0; i < wmatrix.length; i++) {
130 if (wmatrix[i] != null) {
131 wmatrix_reduced[j++] = wmatrix[i];
132 }
133 }
134 // System.out.println("NE:"+ne);
135 // System.out.println("NR:"+nr);
136 // System.out.println("NO:"+no);
137 // System.out.println("N :"+n);
138 return wmatrix_reduced;
139 }
140
141 public static boolean eliminate(double[] a) {
142 if (a.length == 0) {
143 return true;
144 }
145 for (int i = 1; i < a.length; i++) {
146 if (a[i] != a[i-1]) {
147 return false;
148 }
149 }
150 return true;
151 }
152
153 protected void trainExternal(FeatureVector featureVector) throws MaltChainedException {
154 try {
155
156 if (configLogger.isInfoEnabled()) {
157 owner.getGuide().getConfiguration().getConfigLogger().info("Creating liblinear model (external) "+getFile(".mod").getName());
158 }
159 binariesInstances2SVMFileFormat(getInstanceInputStreamReader(".ins"), getInstanceOutputStreamWriter(".ins.tmp"));
160 final String[] params = getLibParamStringArray();
161 String[] arrayCommands = new String[params.length+3];
162 int i = 0;
163 arrayCommands[i++] = pathExternalTrain;
164 for (; i <= params.length; i++) {
165 arrayCommands[i] = params[i-1];
166 }
167 arrayCommands[i++] = getFile(".ins.tmp").getAbsolutePath();
168 arrayCommands[i++] = getFile(".mod").getAbsolutePath();
169
170 if (verbosity == Verbostity.ALL) {
171 owner.getGuide().getConfiguration().getConfigLogger().info('\n');
172 }
173 final Process child = Runtime.getRuntime().exec(arrayCommands);
174 final InputStream in = child.getInputStream();
175 final InputStream err = child.getErrorStream();
176 int c;
177 while ((c = in.read()) != -1){
178 if (verbosity == Verbostity.ALL) {
179 owner.getGuide().getConfiguration().getConfigLogger().info((char)c);
180 }
181 }
182 while ((c = err.read()) != -1){
183 if (verbosity == Verbostity.ALL || verbosity == Verbostity.ERROR) {
184 owner.getGuide().getConfiguration().getConfigLogger().info((char)c);
185 }
186 }
187 if (child.waitFor() != 0) {
188 owner.getGuide().getConfiguration().getConfigLogger().info(" FAILED ("+child.exitValue()+")");
189 }
190 in.close();
191 err.close();
192 if (configLogger.isInfoEnabled()) {
193 configLogger.info("\nSaving Liblinear model "+getFile(".moo").getName()+"\n");
194 }
195 MaltLiblinearModel xmodel = new MaltLiblinearModel(getFile(".mod"));
196 ObjectOutputStream output = new ObjectOutputStream (new BufferedOutputStream(new FileOutputStream(getFile(".moo").getAbsolutePath())));
197 try{
198 output.writeObject(xmodel);
199 } finally {
200 output.close();
201 }
202 if (!saveInstanceFiles) {
203 getFile(".ins").delete();
204 getFile(".mod").delete();
205 getFile(".ins.tmp").delete();
206 }
207 if (configLogger.isInfoEnabled()) {
208 configLogger.info('\n');
209 }
210 } catch (InterruptedException e) {
211 throw new LibException("Learner is interrupted. ", e);
212 } catch (IllegalArgumentException e) {
213 throw new LibException("The learner was not able to redirect Standard Error stream. ", e);
214 } catch (SecurityException e) {
215 throw new LibException("The learner cannot remove the instance file. ", e);
216 } catch (IOException e) {
217 throw new LibException("The learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e);
218 } catch (OutOfMemoryError e) {
219 throw new LibException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e);
220 }
221 }
222
223 public void terminate() throws MaltChainedException {
224 super.terminate();
225 }
226
227 public void initLibOptions() {
228 libOptions = new LinkedHashMap<String, String>();
229 libOptions.put("s", "4"); // type = SolverType.L2LOSS_SVM_DUAL (default)
230 libOptions.put("c", "0.1"); // cost = 1 (default)
231 libOptions.put("e", "0.1"); // epsilon = 0.1 (default)
232 libOptions.put("B", "-1"); // bias = -1 (default)
233 }
234
235 public void initAllowedLibOptionFlags() {
236 allowedLibOptionFlags = "sceB";
237 }
238
239 private Problem readProblem(InputStreamReader isr) throws MaltChainedException {
240 Problem problem = new Problem();
241 final FeatureList featureList = new FeatureList();
242
243 try {
244 final BufferedReader fp = new BufferedReader(isr);
245
246 problem.bias = -1;
247 problem.l = getNumberOfInstances();
248 problem.x = new FeatureNode[problem.l][];
249 problem.y = new int[problem.l];
250 int i = 0;
251
252 while(true) {
253 String line = fp.readLine();
254 if(line == null) break;
255 int y = binariesInstance(line, featureList);
256 if (y == -1) {
257 continue;
258 }
259 try {
260 problem.y[i] = y;
261 problem.x[i] = new FeatureNode[featureList.size()];
262 int p = 0;
263 for (int k=0; k < featureList.size(); k++) {
264 MaltFeatureNode x = featureList.get(k);
265 problem.x[i][p++] = new FeatureNode(x.getIndex(), x.getValue());
266 }
267 i++;
268 } catch (ArrayIndexOutOfBoundsException e) {
269 throw new LibException("Couldn't read liblinear problem from the instance file. ", e);
270 }
271
272 }
273 fp.close();
274 problem.n = featureMap.size();
275 } catch (IOException e) {
276 throw new LibException("Cannot read from the instance file. ", e);
277 }
278 return problem;
279 }
280
281 private Parameter getLiblinearParameters() throws MaltChainedException {
282 Parameter param = new Parameter(SolverType.MCSVM_CS, 0.1, 0.1);
283 String type = libOptions.get("s");
284
285 if (type.equals("0")) {
286 param.setSolverType(SolverType.L2R_LR);
287 } else if (type.equals("1")) {
288 param.setSolverType(SolverType.L2R_L2LOSS_SVC_DUAL);
289 } else if (type.equals("2")) {
290 param.setSolverType(SolverType.L2R_L2LOSS_SVC);
291 } else if (type.equals("3")) {
292 param.setSolverType(SolverType.L2R_L1LOSS_SVC_DUAL);
293 } else if (type.equals("4")) {
294 param.setSolverType(SolverType.MCSVM_CS);
295 } else if (type.equals("5")) {
296 param.setSolverType(SolverType.L1R_L2LOSS_SVC);
297 } else if (type.equals("6")) {
298 param.setSolverType(SolverType.L1R_LR);
299 } else if (type.equals("7")) {
300 param.setSolverType(SolverType.L2R_LR_DUAL);
301 } else {
302 throw new LibException("The liblinear type (-s) is not an integer value between 0 and 4. ");
303 }
304 try {
305 param.setC(Double.valueOf(libOptions.get("c")).doubleValue());
306 } catch (NumberFormatException e) {
307 throw new LibException("The liblinear cost (-c) value is not numerical value. ", e);
308 }
309 try {
310 param.setEps(Double.valueOf(libOptions.get("e")).doubleValue());
311 } catch (NumberFormatException e) {
312 throw new LibException("The liblinear epsilon (-e) value is not numerical value. ", e);
313 }
314 return param;
315 }
316 }