001package org.maltparser.core.feature; 002 003 004import java.util.ArrayList; 005import java.util.Stack; 006import java.util.regex.Pattern; 007 008import org.maltparser.core.exception.MaltChainedException; 009import org.maltparser.core.feature.function.AddressFunction; 010import org.maltparser.core.feature.function.FeatureFunction; 011import org.maltparser.core.feature.function.Function; 012import org.maltparser.core.feature.spec.SpecificationModel; 013import org.maltparser.core.feature.spec.SpecificationSubModel; 014import org.maltparser.core.feature.system.FeatureEngine; 015import org.maltparser.core.helper.HashMap; 016 017 018/** 019* 020* 021* @author Johan Hall 022*/ 023public class FeatureModel extends HashMap<String, FeatureVector> { 024 public final static long serialVersionUID = 3256444702936019250L; 025 private final static Pattern splitPattern = Pattern.compile("\\(|\\)|\\[|\\]|,"); 026 private final SpecificationModel specModel; 027 private final ArrayList<AddressFunction> addressFunctionCache; 028 private final ArrayList<FeatureFunction> featureFunctionCache; 029 private final FeatureFunction divideFeatureFunction; 030 private final FeatureRegistry registry; 031 private final FeatureEngine featureEngine; 032 private final FeatureVector mainFeatureVector; 033 private final ArrayList<Integer> divideFeatureIndexVector; 034 035 public FeatureModel(SpecificationModel _specModel, FeatureRegistry _registry, FeatureEngine _engine, String dataSplitColumn, String dataSplitStructure) throws MaltChainedException { 036 this.specModel = _specModel; 037 this.registry = _registry; 038 this.featureEngine = _engine; 039 this.addressFunctionCache = new ArrayList<AddressFunction>(); 040 this.featureFunctionCache = new ArrayList<FeatureFunction>(); 041 FeatureVector tmpMainFeatureVector = null; 042 for (SpecificationSubModel subModel : specModel) { 043 FeatureVector fv = new FeatureVector(this, subModel); 044 if (tmpMainFeatureVector == null) { 045 if (subModel.getSubModelName().equals("MAIN")) { 046 tmpMainFeatureVector = fv; 047 } else { 048 tmpMainFeatureVector = fv; 049 put(subModel.getSubModelName(), fv); 050 } 051 } else { 052 put(subModel.getSubModelName(), fv); 053 } 054 } 055 this.mainFeatureVector = tmpMainFeatureVector; 056 if (dataSplitColumn != null && dataSplitColumn.length() > 0 && dataSplitStructure != null && dataSplitStructure.length() > 0) { 057 final StringBuilder sb = new StringBuilder(); 058 sb.append("InputColumn("); 059 sb.append(dataSplitColumn); 060 sb.append(", "); 061 sb.append(dataSplitStructure); 062 sb.append(')'); 063 this.divideFeatureFunction = identifyFeature(sb.toString()); 064// this.divideFeatureIndexVectorMap = new HashMap<String,ArrayList<Integer>>(); 065 this.divideFeatureIndexVector = new ArrayList<Integer>(); 066 067 for (int i = 0; i < mainFeatureVector.size(); i++) { 068 if (mainFeatureVector.get(i).equals(divideFeatureFunction)) { 069 divideFeatureIndexVector.add(i); 070 } 071 } 072 for (SpecificationSubModel subModel : specModel) { 073 FeatureVector featureVector = get(subModel.getSubModelName()); 074 if (featureVector == null) { 075 featureVector = mainFeatureVector; 076 } 077 String divideKeyName = "/"+subModel.getSubModelName(); 078// divideFeatureIndexVectorMap.put(divideKeyName, divideFeatureIndexVector); 079 080 FeatureVector divideFeatureVector = (FeatureVector)featureVector.clone(); 081 for (Integer i : divideFeatureIndexVector) { 082 divideFeatureVector.remove(divideFeatureVector.get(i)); 083 } 084 put(divideKeyName,divideFeatureVector); 085 } 086 } else { 087 this.divideFeatureFunction = null; 088// this.divideFeatureIndexVectorMap = null; 089 this.divideFeatureIndexVector = null; 090 } 091 } 092 093 public SpecificationModel getSpecModel() { 094 return specModel; 095 } 096 097 public FeatureRegistry getRegistry() { 098 return registry; 099 } 100 101 public FeatureEngine getFeatureEngine() { 102 return featureEngine; 103 } 104 105 public FeatureVector getMainFeatureVector() { 106 return mainFeatureVector; 107 } 108 109 public FeatureVector getFeatureVector(String subModelName) { 110 return get(subModelName); 111 } 112 113 public FeatureVector getFeatureVector(String decisionSymbol, String subModelName) { 114 final StringBuilder sb = new StringBuilder(); 115 if (decisionSymbol.length() > 0) { 116 sb.append(decisionSymbol); 117 sb.append('.'); 118 } 119 sb.append(subModelName); 120 if (containsKey(sb.toString())) { 121 return get(sb.toString()); 122 } else if (containsKey(subModelName)) { 123 return get(subModelName); 124 } 125 return mainFeatureVector; 126 } 127 128 public FeatureFunction getDivideFeatureFunction() { 129 return divideFeatureFunction; 130 } 131 132 public boolean hasDivideFeatureFunction() { 133 return divideFeatureFunction != null; 134 } 135 136// public ArrayList<Integer> getDivideFeatureIndexVectorMap(String divideSubModelName) { 137// return divideFeatureIndexVectorMap.get(divideSubModelName); 138// } 139// 140// public boolean hasDivideFeatureIndexVectorMap() { 141// return divideFeatureIndexVectorMap != null; 142// } 143 144 public ArrayList<Integer> getDivideFeatureIndexVector() { 145 return divideFeatureIndexVector; 146 } 147 148 public boolean hasDivideFeatureIndexVector() { 149 return divideFeatureIndexVector != null; 150 } 151 152 public void update() throws MaltChainedException { 153 for (int i = 0, n = addressFunctionCache.size(); i < n; i++) { 154 addressFunctionCache.get(i).update(); 155 } 156 157 for (int i = 0, n = featureFunctionCache.size(); i < n; i++) { 158 featureFunctionCache.get(i).update(); 159 } 160 } 161 162 public void update(Object[] arguments) throws MaltChainedException { 163 for (int i = 0, n = addressFunctionCache.size(); i < n; i++) { 164 addressFunctionCache.get(i).update(arguments); 165 } 166 167 for (int i = 0, n = featureFunctionCache.size(); i < n; i++) { 168 featureFunctionCache.get(i).update(); 169 } 170 } 171 172 public FeatureFunction identifyFeature(String spec) throws MaltChainedException { 173 String[] items =splitPattern.split(spec); 174 Stack<Object> objects = new Stack<Object>(); 175 for (int i = items.length-1; i >= 0; i--) { 176 if (items[i].trim().length() != 0) { 177 objects.push(items[i].trim()); 178 } 179 } 180 identifyFeatureFunction(objects); 181 if (objects.size() != 1 || !(objects.peek() instanceof FeatureFunction) || (objects.peek() instanceof AddressFunction)) { 182 throw new FeatureException("The feature specification '"+spec+"' were not recognized properly. "); 183 } 184 return (FeatureFunction)objects.pop(); 185 } 186 187 protected void identifyFeatureFunction(Stack<Object> objects) throws MaltChainedException { 188 Function function = featureEngine.newFunction(objects.peek().toString(), registry); 189 if (function != null) { 190 objects.pop(); 191 if (!objects.isEmpty()) { 192 identifyFeatureFunction(objects); 193 } 194 initializeFunction(function, objects); 195 } else { 196 if (!objects.isEmpty()) { 197 Object o = objects.pop(); 198 if (!objects.isEmpty()) { 199 identifyFeatureFunction(objects); 200 } 201 objects.push(o); 202 } 203 } 204 } 205 206 protected void initializeFunction(Function function, Stack<Object> objects) throws MaltChainedException { 207 Class<?>[] paramTypes = function.getParameterTypes(); 208 Object[] arguments = new Object[paramTypes.length]; 209 for (int i = 0; i < paramTypes.length; i++) { 210 if (paramTypes[i] == java.lang.Integer.class) { 211 if (objects.peek() instanceof String) { 212 String object = (String)objects.pop(); 213 try { 214 objects.push(Integer.parseInt(object)); 215 } catch (NumberFormatException e) { 216 throw new FeatureException("The function '"+function.getClass()+"' cannot be initialized with argument '"+object+"'" + ", expect an integer value. ", e); 217 } 218 } else { 219 throw new FeatureException("The function '"+function.getClass()+"' cannot be initialized with argument '"+objects.peek()+"'" + ", expect an integer value. "); 220 } 221 } else if (paramTypes[i] == java.lang.Double.class) { 222 if (objects.peek() instanceof String) { 223 String object = (String)objects.pop(); 224 try { 225 objects.push(Double.parseDouble(object)); 226 } catch (NumberFormatException e) { 227 throw new FeatureException("The function '"+function.getClass()+"' cannot be initialized with argument '"+object+"'" + ", expect a numeric value. ", e); 228 } 229 } else { 230 throw new FeatureException("The function '"+function.getClass()+"' cannot be initialized with argument '"+objects.peek()+"'" + ", expect a numeric value. "); 231 } 232 } else if (paramTypes[i] == java.lang.Boolean.class) { 233 if (objects.peek() instanceof String) { 234 objects.push(Boolean.parseBoolean(((String)objects.pop()))); 235 } else { 236 throw new FeatureException("The function '"+function.getClass()+"' cannot be initialized with argument '"+objects.peek()+"'" + ", expect a boolean value. "); 237 238 } 239 } 240 if (!paramTypes[i].isInstance(objects.peek())) { 241 throw new FeatureException("The function '"+function.getClass()+"' cannot be initialized with argument '"+objects.peek()+"'"); 242 } 243 arguments[i] = objects.pop(); 244 } 245 function.initialize(arguments); 246 if (function instanceof AddressFunction) { 247 int index = addressFunctionCache.indexOf(function); 248 if (index != -1) { 249 function = addressFunctionCache.get(index); 250 } else { 251 addressFunctionCache.add((AddressFunction)function); 252 } 253 } else if (function instanceof FeatureFunction) { 254 int index = featureFunctionCache.indexOf(function); 255 if (index != -1) { 256 function = featureFunctionCache.get(index); 257 } else { 258 featureFunctionCache.add((FeatureFunction)function); 259 } 260 } 261 objects.push(function); 262 } 263 264 public String toString() { 265 return specModel.toString(); 266 } 267}