001    package org.maltparser.core.syntaxgraph.feature;
002    
003    import java.util.LinkedHashMap;
004    import java.util.Map;
005    import org.maltparser.core.exception.MaltChainedException;
006    import org.maltparser.core.feature.function.AddressFunction;
007    import org.maltparser.core.feature.function.FeatureFunction;
008    import org.maltparser.core.feature.value.AddressValue;
009    import org.maltparser.core.feature.value.FeatureValue;
010    import org.maltparser.core.feature.value.SingleFeatureValue;
011    import org.maltparser.core.io.dataformat.ColumnDescription;
012    import org.maltparser.core.symbol.SymbolTable;
013    import org.maltparser.core.symbol.SymbolTableHandler;
014    import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId;
015    import org.maltparser.core.syntaxgraph.SyntaxGraphException;
016    import org.maltparser.core.syntaxgraph.node.DependencyNode;
017    
018    public class DistanceFeature implements FeatureFunction {
019            protected AddressFunction addressFunction1;
020            protected AddressFunction addressFunction2;
021            protected SymbolTableHandler tableHandler;
022            protected SymbolTable table;
023            protected SingleFeatureValue featureValue;
024            protected String normalizationString;
025            protected Map<Integer,String> normalization;
026            
027            
028            public DistanceFeature(SymbolTableHandler tableHandler) throws MaltChainedException {
029                    super();
030                    featureValue = new SingleFeatureValue(this);
031                    setTableHandler(tableHandler);
032                    normalization = new LinkedHashMap<Integer,String>();
033            }
034            
035            /**
036             * Initialize the distance feature function
037             * 
038             * @param arguments an array of arguments with the type returned by getParameterTypes()
039             * @throws MaltChainedException
040             */
041            public void initialize(Object[] arguments) throws MaltChainedException {
042                    if (arguments.length != 3) {
043                            throw new SyntaxGraphException("Could not initialize DistanceFeature: number of arguments is not correct. ");
044                    }
045                    // Checks that the two arguments are address functions
046                    if (!(arguments[0] instanceof AddressFunction)) {
047                            throw new SyntaxGraphException("Could not initialize DistanceFeature: the first argument is not an address function. ");
048                    }
049                    if (!(arguments[1] instanceof AddressFunction)) {
050                            throw new SyntaxGraphException("Could not initialize DistanceFeature: the second argument is not an address function. ");
051                    }
052                    if (!(arguments[2] instanceof java.lang.String)) {
053                            throw new SyntaxGraphException("Could not initialize DistanceFeature: the third argument is not a string. ");
054                    }
055                    setAddressFunction1((AddressFunction)arguments[0]);
056                    setAddressFunction2((AddressFunction)arguments[1]);
057                    
058                    normalizationString = (String)arguments[2];
059                    // Creates a symbol table called "DISTANCE" using one null value
060                    setSymbolTable(tableHandler.addSymbolTable("DISTANCE_"+normalizationString, ColumnDescription.INPUT, "one"));
061                    
062                    String[] items  = normalizationString.split("\\|");
063                    
064                    if (items.length <= 0 || !items[0].equals("0")) {
065                            throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a list of integer values separated with | and the first element must be 0.");
066                    }
067                    int tmp = -1;
068                    for (int i = 0; i < items.length; i++) {
069                            int v;
070                            try {
071                                    v = Integer.parseInt(items[i]);
072                            } catch (NumberFormatException e) {
073                                    throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |", e);
074                            }
075                            normalization.put(v, ">="+v);
076                            table.addSymbol(">="+v);
077                            if (tmp != -1 && tmp >= v) {
078                                    throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |");
079                            }
080                            tmp = v;
081                    }
082            }
083            
084            /**
085             * Returns an array of class types used by the feature extraction system to invoke initialize with
086             * correct arguments.
087             * 
088             * @return an array of class types
089             */
090            public Class<?>[] getParameterTypes() {
091                    Class<?>[] paramTypes = { org.maltparser.core.feature.function.AddressFunction.class, 
092                                                                      org.maltparser.core.feature.function.AddressFunction.class,
093                                                                      java.lang.String.class};
094                    return paramTypes; 
095            }
096            
097            /**
098             * Returns the string representation of the integer <code>code</code> according to the distance feature function. 
099             * 
100             * @param code the integer representation of the symbol
101             * @return the string representation of the integer <code>code</code> according to the distance feature function.
102             * @throws MaltChainedException
103             */
104            public String getSymbol(int code) throws MaltChainedException {
105                    return table.getSymbolCodeToString(code);
106            }
107            
108            /**
109             * Returns the integer representation of the string <code>symbol</code> according to the distance feature function.
110             * 
111             * @param symbol the string representation of the symbol
112             * @return the integer representation of the string <code>symbol</code> according to the distance feature function.
113             * @throws MaltChainedException
114             */
115            public int getCode(String symbol) throws MaltChainedException {
116                    return table.getSymbolStringToCode(symbol);
117            }
118            
119            /**
120             * Cause the distance feature function to update the cardinality of the feature value.
121             * 
122             * @throws MaltChainedException
123             */
124            public void updateCardinality() {
125    //              featureValue.setCardinality(table.getValueCounter()); 
126            }
127            
128            /**
129             * Cause the feature function to update the feature value.
130             * 
131             * @throws MaltChainedException
132             */
133            public void update() throws MaltChainedException {
134                    // Retrieve the address value 
135                    final AddressValue arg1 = addressFunction1.getAddressValue();
136                    final AddressValue arg2 = addressFunction2.getAddressValue();
137    //              featureValue.setKnown(true);
138                    // if arg1 or arg2 is null, then set a NO_NODE null value as feature value
139                    if (arg1.getAddress() == null || arg2.getAddress() == null) { 
140                            featureValue.setIndexCode(table.getNullValueCode(NullValueId.NO_NODE));
141                            featureValue.setSymbol(table.getNullValueSymbol(NullValueId.NO_NODE));
142                            featureValue.setValue(1);
143    
144                            featureValue.setNullValue(true);                        
145                    } else {
146                            // Unfortunately this method takes a lot of time  arg1.getAddressClass().asSubclass(org.maltparser.core.syntaxgraph.node.DependencyNode.class);
147                            // Cast the address arguments to dependency nodes
148                            final DependencyNode node1 = (DependencyNode)arg1.getAddress();
149                            final DependencyNode node2 = (DependencyNode)arg2.getAddress();
150                            
151                            if (!node1.isRoot() && !node2.isRoot()) { 
152                                    // Calculates the distance
153                                    final int index1 = node1.getIndex();
154                                    final int index2 = node2.getIndex();
155                                    final int distance = Math.abs(index1-index2);
156                                    
157                                    
158                                    int lower = -1;
159                                    boolean f = false;
160                                    for (Integer upper : normalization.keySet()) {
161                                            if (distance >= lower && distance < upper) {
162                                                    featureValue.setIndexCode(table.getSymbolStringToCode(normalization.get(lower)));
163                                                    featureValue.setSymbol(normalization.get(lower));
164                                                    featureValue.setValue(1);
165                                                    f = true;
166                                                    break;
167                                            }
168                                            lower = upper;
169                                    }
170                                    if (f == false) {
171                                            featureValue.setIndexCode(table.getSymbolStringToCode(normalization.get(lower)));
172                                            featureValue.setSymbol(normalization.get(lower));
173                                            featureValue.setValue(1);
174                                    }
175                                    
176                                    // Tells the feature value that the feature is known and is not a null value
177                                    
178                                    featureValue.setNullValue(false);
179    
180                            } else { 
181                                    // if node1 or node2 is a root node, set a ROOT_NODE null value as feature value
182                                    featureValue.setIndexCode(table.getNullValueCode(NullValueId.ROOT_NODE));
183                                    featureValue.setSymbol(table.getNullValueSymbol(NullValueId.ROOT_NODE));
184                                    featureValue.setValue(1);
185                                    featureValue.setNullValue(true);
186                            }
187                    }
188            }
189            
190            /**
191             * Returns the feature value
192             * 
193             * @return the feature value
194             */
195            public FeatureValue getFeatureValue() {
196                    return featureValue;
197            }
198            
199            /**
200             * Returns the symbol table used by the distance feature function
201             * 
202             * @return the symbol table used by the distance feature function
203             */
204            public SymbolTable getSymbolTable() {
205                    return table;
206            }
207            
208            /**
209             * Returns the address function 1 (argument 1) 
210             * 
211             * @return the address function 1 (argument 1) 
212             */
213            public AddressFunction getAddressFunction1() {
214                    return addressFunction1;
215            }
216    
217    
218            /**
219             * Sets the address function 1 (argument 1) 
220             * 
221             * @param addressFunction1 a address function 1 (argument 1) 
222             */
223            public void setAddressFunction1(AddressFunction addressFunction1) {
224                    this.addressFunction1 = addressFunction1;
225            }
226            
227            /**
228             * Returns the address function 2 (argument 2) 
229             * 
230             * @return the address function 1 (argument 2) 
231             */
232            public AddressFunction getAddressFunction2() {
233                    return addressFunction2;
234            }
235    
236            /**
237             * Sets the address function 2 (argument 2) 
238             * 
239             * @param addressFunction2 a address function 2 (argument 2) 
240             */
241            public void setAddressFunction2(AddressFunction addressFunction2) {
242                    this.addressFunction2 = addressFunction2;
243            }
244            
245            /**
246             * Returns symbol table handler
247             * 
248             * @return a symbol table handler
249             */
250            public SymbolTableHandler getTableHandler() {
251                    return tableHandler;
252            }
253    
254            /**
255             * Sets the symbol table handler
256             * 
257             * @param tableHandler a symbol table handler
258             */
259            public void setTableHandler(SymbolTableHandler tableHandler) {
260                    this.tableHandler = tableHandler;
261            }
262    
263            /**
264             * Sets the symbol table used by the distance feature function
265             * 
266             * @param table
267             */
268            public void setSymbolTable(SymbolTable table) {
269                    this.table = table;
270            }
271            
272            public  int getType() {
273                    return ColumnDescription.STRING;
274            }
275            
276            public String getMapIdentifier() {
277                    return getSymbolTable().getName();
278            }
279            
280            public boolean equals(Object obj) {
281                    if (this == obj)
282                            return true;
283                    if (obj == null)
284                            return false;
285                    if (getClass() != obj.getClass())
286                            return false;
287                    return obj.toString().equals(this.toString());
288            }
289            
290            public int hashCode() {
291                    return 217 + (null == toString() ? 0 : toString().hashCode());
292            }
293            
294            public String toString() {
295                    final StringBuilder sb = new StringBuilder();
296                    sb.append("Distance(");
297                    sb.append(addressFunction1.toString());
298                    sb.append(", ");
299                    sb.append(addressFunction2.toString());
300                    sb.append(", ");
301                    sb.append(normalizationString);
302                    sb.append(')');
303                    return sb.toString();
304            }
305    }
306