001 package org.maltparser.ml.lib; 002 003 import java.io.BufferedOutputStream; 004 import java.io.BufferedReader; 005 import java.io.FileOutputStream; 006 import java.io.IOException; 007 import java.io.InputStream; 008 import java.io.InputStreamReader; 009 import java.io.ObjectInputStream; 010 import java.io.ObjectOutputStream; 011 import java.io.PrintStream; 012 import java.util.LinkedHashMap; 013 014 import liblinear.FeatureNode; 015 import liblinear.Linear; 016 import liblinear.Model; 017 import liblinear.Parameter; 018 import liblinear.Problem; 019 import liblinear.SolverType; 020 021 import org.maltparser.core.exception.MaltChainedException; 022 import org.maltparser.core.feature.FeatureVector; 023 import org.maltparser.core.helper.NoPrintStream; 024 import org.maltparser.parser.guide.instance.InstanceModel; 025 026 public class LibLinear extends Lib { 027 028 public LibLinear(InstanceModel owner, Integer learnerMode) throws MaltChainedException { 029 super(owner, learnerMode, "liblinear"); 030 if (learnerMode == CLASSIFY) { 031 try { 032 ObjectInputStream input = new ObjectInputStream(getInputStreamFromConfigFileEntry(".moo")); 033 try { 034 model = (MaltLibModel)input.readObject(); 035 } finally { 036 input.close(); 037 } 038 } catch (ClassNotFoundException e) { 039 throw new LibException("Couldn't load the liblinear model", e); 040 } catch (Exception e) { 041 throw new LibException("Couldn't load the liblinear model", e); 042 } 043 } 044 045 } 046 047 protected void trainInternal(FeatureVector featureVector) throws MaltChainedException { 048 try { 049 if (configLogger.isInfoEnabled()) { 050 configLogger.info("Creating Liblinear model "+getFile(".moo").getName()+"\n"); 051 } 052 Problem problem = readProblem(getInstanceInputStreamReader(".ins")); 053 final PrintStream out = System.out; 054 final PrintStream err = System.err; 055 System.setOut(NoPrintStream.NO_PRINTSTREAM); 056 System.setErr(NoPrintStream.NO_PRINTSTREAM); 057 Parameter parameter = getLiblinearParameters(); 058 Model model = Linear.train(problem, parameter); 059 System.setOut(err); 060 System.setOut(out); 061 MaltLiblinearModel xmodel = new MaltLiblinearModel(model, parameter.getSolverType()); 062 ObjectOutputStream output = new ObjectOutputStream (new BufferedOutputStream(new FileOutputStream(getFile(".moo").getAbsolutePath()))); 063 try{ 064 output.writeObject(xmodel); 065 } finally { 066 output.close(); 067 } 068 if (!saveInstanceFiles) { 069 getFile(".ins").delete(); 070 } 071 } catch (OutOfMemoryError e) { 072 throw new LibException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e); 073 } catch (IllegalArgumentException e) { 074 throw new LibException("The Liblinear learner was not able to redirect Standard Error stream. ", e); 075 } catch (SecurityException e) { 076 throw new LibException("The Liblinear learner cannot remove the instance file. ", e); 077 } catch (IOException e) { 078 throw new LibException("The Liblinear learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e); 079 } 080 } 081 082 protected void trainExternal(FeatureVector featureVector) throws MaltChainedException { 083 try { 084 085 if (configLogger.isInfoEnabled()) { 086 owner.getGuide().getConfiguration().getConfigLogger().info("Creating liblinear model (external) "+getFile(".mod").getName()); 087 } 088 binariesInstances2SVMFileFormat(getInstanceInputStreamReader(".ins"), getInstanceOutputStreamWriter(".ins.tmp")); 089 final String[] params = getLibParamStringArray(); 090 String[] arrayCommands = new String[params.length+3]; 091 int i = 0; 092 arrayCommands[i++] = pathExternalTrain; 093 for (; i <= params.length; i++) { 094 arrayCommands[i] = params[i-1]; 095 } 096 arrayCommands[i++] = getFile(".ins.tmp").getAbsolutePath(); 097 arrayCommands[i++] = getFile(".mod").getAbsolutePath(); 098 099 if (verbosity == Verbostity.ALL) { 100 owner.getGuide().getConfiguration().getConfigLogger().info('\n'); 101 } 102 final Process child = Runtime.getRuntime().exec(arrayCommands); 103 final InputStream in = child.getInputStream(); 104 final InputStream err = child.getErrorStream(); 105 int c; 106 while ((c = in.read()) != -1){ 107 if (verbosity == Verbostity.ALL) { 108 owner.getGuide().getConfiguration().getConfigLogger().info((char)c); 109 } 110 } 111 while ((c = err.read()) != -1){ 112 if (verbosity == Verbostity.ALL || verbosity == Verbostity.ERROR) { 113 owner.getGuide().getConfiguration().getConfigLogger().info((char)c); 114 } 115 } 116 if (child.waitFor() != 0) { 117 owner.getGuide().getConfiguration().getConfigLogger().info(" FAILED ("+child.exitValue()+")"); 118 } 119 in.close(); 120 err.close(); 121 if (configLogger.isInfoEnabled()) { 122 configLogger.info("\nSaving Liblinear model "+getFile(".moo").getName()+"\n"); 123 } 124 MaltLiblinearModel xmodel = new MaltLiblinearModel(getFile(".mod")); 125 ObjectOutputStream output = new ObjectOutputStream (new BufferedOutputStream(new FileOutputStream(getFile(".moo").getAbsolutePath()))); 126 try{ 127 output.writeObject(xmodel); 128 } finally { 129 output.close(); 130 } 131 if (!saveInstanceFiles) { 132 getFile(".ins").delete(); 133 getFile(".mod").delete(); 134 getFile(".ins.tmp").delete(); 135 } 136 if (configLogger.isInfoEnabled()) { 137 configLogger.info('\n'); 138 } 139 } catch (InterruptedException e) { 140 throw new LibException("Learner is interrupted. ", e); 141 } catch (IllegalArgumentException e) { 142 throw new LibException("The learner was not able to redirect Standard Error stream. ", e); 143 } catch (SecurityException e) { 144 throw new LibException("The learner cannot remove the instance file. ", e); 145 } catch (IOException e) { 146 throw new LibException("The learner cannot save the model file '"+getFile(".mod").getAbsolutePath()+"'. ", e); 147 } catch (OutOfMemoryError e) { 148 throw new LibException("Out of memory. Please increase the Java heap size (-Xmx<size>). ", e); 149 } 150 } 151 152 public void terminate() throws MaltChainedException { 153 super.terminate(); 154 } 155 156 public void initLibOptions() { 157 libOptions = new LinkedHashMap<String, String>(); 158 libOptions.put("s", "4"); // type = SolverType.L2LOSS_SVM_DUAL (default) 159 libOptions.put("c", "0.1"); // cost = 1 (default) 160 libOptions.put("e", "0.1"); // epsilon = 0.1 (default) 161 libOptions.put("B", "-1"); // bias = -1 (default) 162 } 163 164 public void initAllowedLibOptionFlags() { 165 allowedLibOptionFlags = "sceB"; 166 } 167 168 private Problem readProblem(InputStreamReader isr) throws MaltChainedException { 169 Problem problem = new Problem(); 170 final FeatureList featureList = new FeatureList(); 171 172 try { 173 final BufferedReader fp = new BufferedReader(isr); 174 175 problem.bias = -1; 176 problem.l = getNumberOfInstances(); 177 problem.x = new FeatureNode[problem.l][]; 178 problem.y = new int[problem.l]; 179 int i = 0; 180 181 while(true) { 182 String line = fp.readLine(); 183 if(line == null) break; 184 int y = binariesInstance(line, featureList); 185 if (y == -1) { 186 continue; 187 } 188 try { 189 problem.y[i] = y; 190 problem.x[i] = new FeatureNode[featureList.size()]; 191 int p = 0; 192 for (int k=0; k < featureList.size(); k++) { 193 MaltFeatureNode x = featureList.get(k); 194 problem.x[i][p++] = new FeatureNode(x.getIndex(), x.getValue()); 195 } 196 i++; 197 } catch (ArrayIndexOutOfBoundsException e) { 198 throw new LibException("Couldn't read liblinear problem from the instance file. ", e); 199 } 200 201 } 202 fp.close(); 203 problem.n = featureMap.size(); 204 } catch (IOException e) { 205 throw new LibException("Cannot read from the instance file. ", e); 206 } 207 return problem; 208 } 209 210 private Parameter getLiblinearParameters() throws MaltChainedException { 211 Parameter param = new Parameter(SolverType.MCSVM_CS, 0.1, 0.1); 212 String type = libOptions.get("s"); 213 214 if (type.equals("0")) { 215 param.setSolverType(SolverType.L2R_LR); 216 } else if (type.equals("1")) { 217 param.setSolverType(SolverType.L2R_L2LOSS_SVC_DUAL); 218 } else if (type.equals("2")) { 219 param.setSolverType(SolverType.L2R_L2LOSS_SVC); 220 } else if (type.equals("3")) { 221 param.setSolverType(SolverType.L2R_L1LOSS_SVC_DUAL); 222 } else if (type.equals("4")) { 223 param.setSolverType(SolverType.MCSVM_CS); 224 } else if (type.equals("5")) { 225 param.setSolverType(SolverType.L1R_L2LOSS_SVC); 226 } else if (type.equals("6")) { 227 param.setSolverType(SolverType.L1R_LR); 228 } else if (type.equals("7")) { 229 param.setSolverType(SolverType.L2R_LR_DUAL); 230 } else { 231 throw new LibException("The liblinear type (-s) is not an integer value between 0 and 4. "); 232 } 233 try { 234 param.setC(Double.valueOf(libOptions.get("c")).doubleValue()); 235 } catch (NumberFormatException e) { 236 throw new LibException("The liblinear cost (-c) value is not numerical value. ", e); 237 } 238 try { 239 param.setEps(Double.valueOf(libOptions.get("e")).doubleValue()); 240 } catch (NumberFormatException e) { 241 throw new LibException("The liblinear epsilon (-e) value is not numerical value. ", e); 242 } 243 return param; 244 } 245 }