001    package org.maltparser.core.syntaxgraph.feature;
002    
003    import java.util.LinkedHashMap;
004    import java.util.Map;
005    import org.maltparser.core.exception.MaltChainedException;
006    import org.maltparser.core.feature.function.AddressFunction;
007    import org.maltparser.core.feature.function.FeatureFunction;
008    import org.maltparser.core.feature.value.AddressValue;
009    import org.maltparser.core.feature.value.FeatureValue;
010    import org.maltparser.core.feature.value.SingleFeatureValue;
011    import org.maltparser.core.io.dataformat.ColumnDescription;
012    import org.maltparser.core.symbol.SymbolTable;
013    import org.maltparser.core.symbol.SymbolTableHandler;
014    import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId;
015    import org.maltparser.core.syntaxgraph.SyntaxGraphException;
016    import org.maltparser.core.syntaxgraph.node.DependencyNode;
017    
018    public class DistanceFeature implements FeatureFunction {
019            protected AddressFunction addressFunction1;
020            protected AddressFunction addressFunction2;
021            protected SymbolTableHandler tableHandler;
022            protected SymbolTable table;
023            protected SingleFeatureValue featureValue;
024            protected String normalizationString;
025            protected Map<Integer,String> normalization;
026            
027            
028            public DistanceFeature(SymbolTableHandler tableHandler) throws MaltChainedException {
029                    super();
030                    featureValue = new SingleFeatureValue(this);
031                    setTableHandler(tableHandler);
032                    normalization = new LinkedHashMap<Integer,String>();
033            }
034            
035            /**
036             * Initialize the distance feature function
037             * 
038             * @param arguments an array of arguments with the type returned by getParameterTypes()
039             * @throws MaltChainedException
040             */
041            public void initialize(Object[] arguments) throws MaltChainedException {
042                    if (arguments.length != 3) {
043                            throw new SyntaxGraphException("Could not initialize DistanceFeature: number of arguments is not correct. ");
044                    }
045                    // Checks that the two arguments are address functions
046                    if (!(arguments[0] instanceof AddressFunction)) {
047                            throw new SyntaxGraphException("Could not initialize DistanceFeature: the first argument is not an address function. ");
048                    }
049                    if (!(arguments[1] instanceof AddressFunction)) {
050                            throw new SyntaxGraphException("Could not initialize DistanceFeature: the second argument is not an address function. ");
051                    }
052                    if (!(arguments[2] instanceof java.lang.String)) {
053                            throw new SyntaxGraphException("Could not initialize DistanceFeature: the third argument is not a string. ");
054                    }
055                    setAddressFunction1((AddressFunction)arguments[0]);
056                    setAddressFunction2((AddressFunction)arguments[1]);
057                    
058                    // Creates a symbol table called "DISTANCE" using one null value
059                    setSymbolTable(tableHandler.addSymbolTable("DISTANCE", ColumnDescription.INPUT, "one"));
060                    normalizationString = (String)arguments[2];
061                    String[] items  = normalizationString.split("\\|");
062                    
063                    if (items.length <= 0 || !items[0].equals("0")) {
064                            throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a list of integer values separated with | and the first element must be 0.");
065                    }
066                    int tmp = -1;
067                    for (int i = 0; i < items.length; i++) {
068                            int v;
069                            try {
070                                    v = Integer.parseInt(items[i]);
071                            } catch (NumberFormatException e) {
072                                    throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |", e);
073                            }
074                            normalization.put(v, ">="+v);
075                            table.addSymbol(">="+v);
076                            if (tmp != -1 && tmp >= v) {
077                                    throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |");
078                            }
079                            tmp = v;
080                    }
081            }
082            
083            /**
084             * Returns an array of class types used by the feature extraction system to invoke initialize with
085             * correct arguments.
086             * 
087             * @return an array of class types
088             */
089            public Class<?>[] getParameterTypes() {
090                    Class<?>[] paramTypes = { org.maltparser.core.feature.function.AddressFunction.class, 
091                                                                      org.maltparser.core.feature.function.AddressFunction.class,
092                                                                      java.lang.String.class};
093                    return paramTypes; 
094            }
095            
096            /**
097             * Returns the string representation of the integer <code>code</code> according to the distance feature function. 
098             * 
099             * @param code the integer representation of the symbol
100             * @return the string representation of the integer <code>code</code> according to the distance feature function.
101             * @throws MaltChainedException
102             */
103            public String getSymbol(int code) throws MaltChainedException {
104                    return table.getSymbolCodeToString(code);
105            }
106            
107            /**
108             * Returns the integer representation of the string <code>symbol</code> according to the distance feature function.
109             * 
110             * @param symbol the string representation of the symbol
111             * @return the integer representation of the string <code>symbol</code> according to the distance feature function.
112             * @throws MaltChainedException
113             */
114            public int getCode(String symbol) throws MaltChainedException {
115                    return table.getSymbolStringToCode(symbol);
116            }
117            
118            /**
119             * Cause the distance feature function to update the cardinality of the feature value.
120             * 
121             * @throws MaltChainedException
122             */
123            public void updateCardinality() {
124    //              featureValue.setCardinality(table.getValueCounter()); 
125            }
126            
127            /**
128             * Cause the feature function to update the feature value.
129             * 
130             * @throws MaltChainedException
131             */
132            public void update() throws MaltChainedException {
133                    // Retrieve the address value 
134                    final AddressValue arg1 = addressFunction1.getAddressValue();
135                    final AddressValue arg2 = addressFunction2.getAddressValue();
136    //              featureValue.setKnown(true);
137                    // if arg1 or arg2 is null, then set a NO_NODE null value as feature value
138                    if (arg1.getAddress() == null || arg2.getAddress() == null) { 
139                            featureValue.setIndexCode(table.getNullValueCode(NullValueId.NO_NODE));
140                            featureValue.setSymbol(table.getNullValueSymbol(NullValueId.NO_NODE));
141                            featureValue.setValue(1);
142    
143                            featureValue.setNullValue(true);                        
144                    } else {
145                            // Unfortunately this method takes a lot of time  arg1.getAddressClass().asSubclass(org.maltparser.core.syntaxgraph.node.DependencyNode.class);
146                            // Cast the address arguments to dependency nodes
147                            final DependencyNode node1 = (DependencyNode)arg1.getAddress();
148                            final DependencyNode node2 = (DependencyNode)arg2.getAddress();
149                            
150                            if (!node1.isRoot() && !node2.isRoot()) { 
151                                    // Calculates the distance
152                                    final int index1 = node1.getIndex();
153                                    final int index2 = node2.getIndex();
154                                    final int distance = Math.abs(index1-index2);
155                                    
156                                    
157                                    int lower = -1;
158                                    boolean f = false;
159                                    for (Integer upper : normalization.keySet()) {
160                                            if (distance >= lower && distance < upper) {
161                                                    featureValue.setIndexCode(table.getSymbolStringToCode(normalization.get(lower)));
162                                                    featureValue.setSymbol(normalization.get(lower));
163                                                    featureValue.setValue(1);
164                                                    f = true;
165                                                    break;
166                                            }
167                                            lower = upper;
168                                    }
169                                    if (f == false) {
170                                            featureValue.setIndexCode(table.getSymbolStringToCode(normalization.get(lower)));
171                                            featureValue.setSymbol(normalization.get(lower));
172                                            featureValue.setValue(1);
173                                    }
174                                    
175                                    // Tells the feature value that the feature is known and is not a null value
176                                    
177                                    featureValue.setNullValue(false);
178    
179                            } else { 
180                                    // if node1 or node2 is a root node, set a ROOT_NODE null value as feature value
181                                    featureValue.setIndexCode(table.getNullValueCode(NullValueId.ROOT_NODE));
182                                    featureValue.setSymbol(table.getNullValueSymbol(NullValueId.ROOT_NODE));
183                                    featureValue.setValue(1);
184                                    featureValue.setNullValue(true);
185                            }
186                    }
187            }
188            
189            /**
190             * Returns the feature value
191             * 
192             * @return the feature value
193             */
194            public FeatureValue getFeatureValue() {
195                    return featureValue;
196            }
197            
198            /**
199             * Returns the symbol table used by the distance feature function
200             * 
201             * @return the symbol table used by the distance feature function
202             */
203            public SymbolTable getSymbolTable() {
204                    return table;
205            }
206            
207            /**
208             * Returns the address function 1 (argument 1) 
209             * 
210             * @return the address function 1 (argument 1) 
211             */
212            public AddressFunction getAddressFunction1() {
213                    return addressFunction1;
214            }
215    
216    
217            /**
218             * Sets the address function 1 (argument 1) 
219             * 
220             * @param addressFunction1 a address function 1 (argument 1) 
221             */
222            public void setAddressFunction1(AddressFunction addressFunction1) {
223                    this.addressFunction1 = addressFunction1;
224            }
225            
226            /**
227             * Returns the address function 2 (argument 2) 
228             * 
229             * @return the address function 1 (argument 2) 
230             */
231            public AddressFunction getAddressFunction2() {
232                    return addressFunction2;
233            }
234    
235            /**
236             * Sets the address function 2 (argument 2) 
237             * 
238             * @param addressFunction2 a address function 2 (argument 2) 
239             */
240            public void setAddressFunction2(AddressFunction addressFunction2) {
241                    this.addressFunction2 = addressFunction2;
242            }
243            
244            /**
245             * Returns symbol table handler
246             * 
247             * @return a symbol table handler
248             */
249            public SymbolTableHandler getTableHandler() {
250                    return tableHandler;
251            }
252    
253            /**
254             * Sets the symbol table handler
255             * 
256             * @param tableHandler a symbol table handler
257             */
258            public void setTableHandler(SymbolTableHandler tableHandler) {
259                    this.tableHandler = tableHandler;
260            }
261    
262            /**
263             * Sets the symbol table used by the distance feature function
264             * 
265             * @param table
266             */
267            public void setSymbolTable(SymbolTable table) {
268                    this.table = table;
269            }
270            
271            public boolean equals(Object obj) {
272                    if (this == obj)
273                            return true;
274                    if (obj == null)
275                            return false;
276                    if (getClass() != obj.getClass())
277                            return false;
278                    return obj.toString().equals(this.toString());
279            }
280            
281            public int hashCode() {
282                    return 217 + (null == toString() ? 0 : toString().hashCode());
283            }
284            
285            public String toString() {
286                    final StringBuilder sb = new StringBuilder();
287                    sb.append("Distance(");
288                    sb.append(addressFunction1.toString());
289                    sb.append(", ");
290                    sb.append(addressFunction2.toString());
291                    sb.append(", ");
292                    sb.append(normalizationString);
293                    sb.append(')');
294                    return sb.toString();
295            }
296    }
297