001package org.maltparser.core.feature;
002
003
004import java.util.ArrayList;
005import java.util.Stack;
006import java.util.regex.Pattern;
007
008import org.maltparser.core.exception.MaltChainedException;
009import org.maltparser.core.feature.function.AddressFunction;
010import org.maltparser.core.feature.function.FeatureFunction;
011import org.maltparser.core.feature.function.Function;
012import org.maltparser.core.feature.spec.SpecificationModel;
013import org.maltparser.core.feature.spec.SpecificationSubModel;
014import org.maltparser.core.feature.system.FeatureEngine;
015import org.maltparser.core.helper.HashMap;
016
017
018/**
019*
020*
021* @author Johan Hall
022*/
023public class FeatureModel extends HashMap<String, FeatureVector> {
024        public final static long serialVersionUID = 3256444702936019250L;
025        private final static Pattern splitPattern = Pattern.compile("\\(|\\)|\\[|\\]|,");
026        private final SpecificationModel specModel;
027        private final ArrayList<AddressFunction> addressFunctionCache;
028        private final ArrayList<FeatureFunction> featureFunctionCache;
029        private final FeatureFunction divideFeatureFunction;
030        private final FeatureRegistry registry;
031        private final FeatureEngine featureEngine;
032        private final FeatureVector mainFeatureVector; 
033        private final ArrayList<Integer> divideFeatureIndexVector;
034        
035        public FeatureModel(SpecificationModel _specModel, FeatureRegistry _registry, FeatureEngine _engine, String dataSplitColumn, String dataSplitStructure) throws MaltChainedException {
036                this.specModel = _specModel;
037                this.registry = _registry;
038                this.featureEngine = _engine;
039                this.addressFunctionCache = new ArrayList<AddressFunction>();
040                this.featureFunctionCache = new ArrayList<FeatureFunction>();
041                FeatureVector tmpMainFeatureVector = null;
042                for (SpecificationSubModel subModel : specModel) {
043                        FeatureVector fv = new FeatureVector(this, subModel);
044                        if (tmpMainFeatureVector == null) {
045                                if (subModel.getSubModelName().equals("MAIN")) {
046                                        tmpMainFeatureVector = fv;
047                                } else {
048                                        tmpMainFeatureVector = fv;
049                                        put(subModel.getSubModelName(), fv);
050                                }
051                        } else {
052                                put(subModel.getSubModelName(), fv);
053                        }
054                }
055                this.mainFeatureVector = tmpMainFeatureVector;
056                if (dataSplitColumn != null && dataSplitColumn.length() > 0 && dataSplitStructure != null && dataSplitStructure.length() > 0) {
057                        final StringBuilder sb = new StringBuilder();
058                        sb.append("InputColumn(");
059                        sb.append(dataSplitColumn);
060                        sb.append(", ");
061                        sb.append(dataSplitStructure);
062                        sb.append(')');
063                        this.divideFeatureFunction = identifyFeature(sb.toString());
064//                      this.divideFeatureIndexVectorMap = new HashMap<String,ArrayList<Integer>>();
065                        this.divideFeatureIndexVector = new ArrayList<Integer>();
066
067                        for (int i = 0; i < mainFeatureVector.size(); i++) {
068                                if (mainFeatureVector.get(i).equals(divideFeatureFunction)) {
069                                        divideFeatureIndexVector.add(i);
070                                }
071                        }
072                        for (SpecificationSubModel subModel : specModel) {
073                                FeatureVector featureVector = get(subModel.getSubModelName());
074                                if (featureVector == null) {
075                                        featureVector = mainFeatureVector;      
076                                }
077                                String divideKeyName = "/"+subModel.getSubModelName();
078//                              divideFeatureIndexVectorMap.put(divideKeyName, divideFeatureIndexVector);
079                                
080                                FeatureVector divideFeatureVector = (FeatureVector)featureVector.clone();
081                                for (Integer i : divideFeatureIndexVector) {
082                                        divideFeatureVector.remove(divideFeatureVector.get(i));
083                                }
084                                put(divideKeyName,divideFeatureVector);
085                        }
086                } else {
087                        this.divideFeatureFunction = null;
088//                      this.divideFeatureIndexVectorMap = null;
089                        this.divideFeatureIndexVector = null;
090                }
091        }
092
093        public SpecificationModel getSpecModel() {
094                return specModel;
095        }
096        
097        public FeatureRegistry getRegistry() {
098                return registry;
099        }
100
101        public FeatureEngine getFeatureEngine() {
102                return featureEngine;
103        }
104        
105        public FeatureVector getMainFeatureVector() {
106                return mainFeatureVector;
107        }
108        
109        public FeatureVector getFeatureVector(String subModelName) {
110                return get(subModelName);
111        }
112        
113        public FeatureVector getFeatureVector(String decisionSymbol, String subModelName) {
114                final StringBuilder sb = new StringBuilder();
115                if (decisionSymbol.length() > 0) {
116                        sb.append(decisionSymbol);
117                        sb.append('.');
118                }
119                sb.append(subModelName);
120                if (containsKey(sb.toString())) {
121                        return get(sb.toString());
122                } else if (containsKey(subModelName)) {
123                        return get(subModelName);
124                }
125                return mainFeatureVector;
126        }
127        
128        public FeatureFunction getDivideFeatureFunction() {
129                return divideFeatureFunction;
130        }
131        
132        public boolean hasDivideFeatureFunction() {
133                return divideFeatureFunction != null;
134        }
135
136//      public ArrayList<Integer> getDivideFeatureIndexVectorMap(String divideSubModelName) {
137//              return divideFeatureIndexVectorMap.get(divideSubModelName);
138//      }
139//
140//      public boolean hasDivideFeatureIndexVectorMap() {
141//              return divideFeatureIndexVectorMap != null;
142//      }
143        
144        public ArrayList<Integer> getDivideFeatureIndexVector() {
145                return divideFeatureIndexVector;
146        }
147
148        public boolean hasDivideFeatureIndexVector() {
149                return divideFeatureIndexVector != null;
150        }
151        
152        public void update() throws MaltChainedException {
153                for (int i = 0, n = addressFunctionCache.size(); i < n; i++) {
154                        addressFunctionCache.get(i).update();
155                }
156                
157                for (int i = 0, n = featureFunctionCache.size(); i < n; i++) {
158                        featureFunctionCache.get(i).update();
159                }
160        }
161        
162        public void update(Object[] arguments) throws MaltChainedException {
163                for (int i = 0, n = addressFunctionCache.size(); i < n; i++) {
164                        addressFunctionCache.get(i).update(arguments);
165                }
166                
167                for (int i = 0, n = featureFunctionCache.size(); i < n; i++) {
168                        featureFunctionCache.get(i).update();
169                }
170        }
171        
172        public FeatureFunction identifyFeature(String spec) throws MaltChainedException {
173                String[] items =splitPattern.split(spec);
174                Stack<Object> objects = new Stack<Object>();
175                for (int i = items.length-1; i >= 0; i--) {
176                        if (items[i].trim().length() != 0) {
177                                objects.push(items[i].trim());
178                        }
179                }
180                identifyFeatureFunction(objects);
181                if (objects.size() != 1 || !(objects.peek() instanceof FeatureFunction) || (objects.peek() instanceof AddressFunction)) {
182                        throw new FeatureException("The feature specification '"+spec+"' were not recognized properly. ");
183                }
184                return (FeatureFunction)objects.pop();
185        }
186        
187        protected void identifyFeatureFunction(Stack<Object> objects) throws MaltChainedException {
188                Function function = featureEngine.newFunction(objects.peek().toString(), registry);
189                if (function != null) {
190                        objects.pop();
191                        if (!objects.isEmpty()) {
192                                identifyFeatureFunction(objects);
193                        }
194                        initializeFunction(function, objects);
195                } else {
196                        if (!objects.isEmpty()) {
197                                Object o = objects.pop();
198                                if (!objects.isEmpty()) {
199                                        identifyFeatureFunction(objects);
200                                }
201                                objects.push(o);
202                        }
203                }
204        }
205        
206        protected void initializeFunction(Function function, Stack<Object> objects) throws MaltChainedException {
207                Class<?>[] paramTypes = function.getParameterTypes();
208                Object[] arguments = new Object[paramTypes.length];
209                for (int i = 0; i < paramTypes.length; i++) {
210                        if (paramTypes[i] == java.lang.Integer.class) {
211                                if (objects.peek() instanceof String) {
212                                        String object = (String)objects.pop();
213                                        try {
214                                                objects.push(Integer.parseInt(object));
215                                        } catch (NumberFormatException e) {
216                                                throw new FeatureException("The function '"+function.getClass()+"' cannot be initialized with argument '"+object+"'" + ", expect an integer value. ", e);
217                                        }
218                                } else {
219                                        throw new FeatureException("The function '"+function.getClass()+"' cannot be initialized with argument '"+objects.peek()+"'" + ", expect an integer value. ");
220                                }
221                        } else if (paramTypes[i] == java.lang.Double.class) {
222                                if (objects.peek() instanceof String) {
223                                        String object = (String)objects.pop();
224                                        try {
225                                                objects.push(Double.parseDouble(object));
226                                        } catch (NumberFormatException e) {
227                                                throw new FeatureException("The function '"+function.getClass()+"' cannot be initialized with argument '"+object+"'" + ", expect a numeric value. ", e);
228                                        }
229                                } else {
230                                        throw new FeatureException("The function '"+function.getClass()+"' cannot be initialized with argument '"+objects.peek()+"'" + ", expect a numeric value. ");
231                                }
232                        } else if (paramTypes[i] == java.lang.Boolean.class) {
233                                if (objects.peek() instanceof String) {
234                                        objects.push(Boolean.parseBoolean(((String)objects.pop())));
235                                } else {
236                                        throw new FeatureException("The function '"+function.getClass()+"' cannot be initialized with argument '"+objects.peek()+"'" + ", expect a boolean value. ");
237                                        
238                                }
239                        }
240                        if (!paramTypes[i].isInstance(objects.peek())) {
241                                throw new FeatureException("The function '"+function.getClass()+"' cannot be initialized with argument '"+objects.peek()+"'");
242                        }
243                        arguments[i] = objects.pop();
244                }
245                function.initialize(arguments);
246                if (function instanceof AddressFunction) {
247                        int index = addressFunctionCache.indexOf(function);
248                        if (index != -1) {
249                                function = addressFunctionCache.get(index);
250                        } else {
251                                addressFunctionCache.add((AddressFunction)function);
252                        }
253                } else if (function instanceof FeatureFunction) {
254                        int index = featureFunctionCache.indexOf(function);
255                        if (index != -1) {
256                                function = featureFunctionCache.get(index);
257                        } else {
258                                featureFunctionCache.add((FeatureFunction)function);
259                        }
260                }
261                objects.push(function);
262        }
263        
264        public String toString() {
265                return specModel.toString();
266        }
267}