001 package org.maltparser.core.syntaxgraph.feature; 002 003 import java.util.LinkedHashMap; 004 import java.util.Map; 005 import org.maltparser.core.exception.MaltChainedException; 006 import org.maltparser.core.feature.function.AddressFunction; 007 import org.maltparser.core.feature.function.FeatureFunction; 008 import org.maltparser.core.feature.value.AddressValue; 009 import org.maltparser.core.feature.value.FeatureValue; 010 import org.maltparser.core.feature.value.SingleFeatureValue; 011 import org.maltparser.core.io.dataformat.ColumnDescription; 012 import org.maltparser.core.symbol.SymbolTable; 013 import org.maltparser.core.symbol.SymbolTableHandler; 014 import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId; 015 import org.maltparser.core.syntaxgraph.SyntaxGraphException; 016 import org.maltparser.core.syntaxgraph.node.DependencyNode; 017 018 public class DistanceFeature implements FeatureFunction { 019 protected AddressFunction addressFunction1; 020 protected AddressFunction addressFunction2; 021 protected SymbolTableHandler tableHandler; 022 protected SymbolTable table; 023 protected SingleFeatureValue featureValue; 024 protected String normalizationString; 025 protected Map<Integer,String> normalization; 026 027 028 public DistanceFeature(SymbolTableHandler tableHandler) throws MaltChainedException { 029 super(); 030 featureValue = new SingleFeatureValue(this); 031 setTableHandler(tableHandler); 032 normalization = new LinkedHashMap<Integer,String>(); 033 } 034 035 /** 036 * Initialize the distance feature function 037 * 038 * @param arguments an array of arguments with the type returned by getParameterTypes() 039 * @throws MaltChainedException 040 */ 041 public void initialize(Object[] arguments) throws MaltChainedException { 042 if (arguments.length != 3) { 043 throw new SyntaxGraphException("Could not initialize DistanceFeature: number of arguments is not correct. "); 044 } 045 // Checks that the two arguments are address functions 046 if (!(arguments[0] instanceof AddressFunction)) { 047 throw new SyntaxGraphException("Could not initialize DistanceFeature: the first argument is not an address function. "); 048 } 049 if (!(arguments[1] instanceof AddressFunction)) { 050 throw new SyntaxGraphException("Could not initialize DistanceFeature: the second argument is not an address function. "); 051 } 052 if (!(arguments[2] instanceof java.lang.String)) { 053 throw new SyntaxGraphException("Could not initialize DistanceFeature: the third argument is not a string. "); 054 } 055 setAddressFunction1((AddressFunction)arguments[0]); 056 setAddressFunction2((AddressFunction)arguments[1]); 057 058 normalizationString = (String)arguments[2]; 059 // Creates a symbol table called "DISTANCE" using one null value 060 setSymbolTable(tableHandler.addSymbolTable("DISTANCE_"+normalizationString, ColumnDescription.INPUT, "one")); 061 062 String[] items = normalizationString.split("\\|"); 063 064 if (items.length <= 0 || !items[0].equals("0")) { 065 throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a list of integer values separated with | and the first element must be 0."); 066 } 067 int tmp = -1; 068 for (int i = 0; i < items.length; i++) { 069 int v; 070 try { 071 v = Integer.parseInt(items[i]); 072 } catch (NumberFormatException e) { 073 throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |", e); 074 } 075 normalization.put(v, ">="+v); 076 table.addSymbol(">="+v); 077 if (tmp != -1 && tmp >= v) { 078 throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |"); 079 } 080 tmp = v; 081 } 082 } 083 084 /** 085 * Returns an array of class types used by the feature extraction system to invoke initialize with 086 * correct arguments. 087 * 088 * @return an array of class types 089 */ 090 public Class<?>[] getParameterTypes() { 091 Class<?>[] paramTypes = { org.maltparser.core.feature.function.AddressFunction.class, 092 org.maltparser.core.feature.function.AddressFunction.class, 093 java.lang.String.class}; 094 return paramTypes; 095 } 096 097 /** 098 * Returns the string representation of the integer <code>code</code> according to the distance feature function. 099 * 100 * @param code the integer representation of the symbol 101 * @return the string representation of the integer <code>code</code> according to the distance feature function. 102 * @throws MaltChainedException 103 */ 104 public String getSymbol(int code) throws MaltChainedException { 105 return table.getSymbolCodeToString(code); 106 } 107 108 /** 109 * Returns the integer representation of the string <code>symbol</code> according to the distance feature function. 110 * 111 * @param symbol the string representation of the symbol 112 * @return the integer representation of the string <code>symbol</code> according to the distance feature function. 113 * @throws MaltChainedException 114 */ 115 public int getCode(String symbol) throws MaltChainedException { 116 return table.getSymbolStringToCode(symbol); 117 } 118 119 /** 120 * Cause the distance feature function to update the cardinality of the feature value. 121 * 122 * @throws MaltChainedException 123 */ 124 public void updateCardinality() { 125 // featureValue.setCardinality(table.getValueCounter()); 126 } 127 128 /** 129 * Cause the feature function to update the feature value. 130 * 131 * @throws MaltChainedException 132 */ 133 public void update() throws MaltChainedException { 134 // Retrieve the address value 135 final AddressValue arg1 = addressFunction1.getAddressValue(); 136 final AddressValue arg2 = addressFunction2.getAddressValue(); 137 // featureValue.setKnown(true); 138 // if arg1 or arg2 is null, then set a NO_NODE null value as feature value 139 if (arg1.getAddress() == null || arg2.getAddress() == null) { 140 featureValue.setIndexCode(table.getNullValueCode(NullValueId.NO_NODE)); 141 featureValue.setSymbol(table.getNullValueSymbol(NullValueId.NO_NODE)); 142 featureValue.setValue(1); 143 144 featureValue.setNullValue(true); 145 } else { 146 // Unfortunately this method takes a lot of time arg1.getAddressClass().asSubclass(org.maltparser.core.syntaxgraph.node.DependencyNode.class); 147 // Cast the address arguments to dependency nodes 148 final DependencyNode node1 = (DependencyNode)arg1.getAddress(); 149 final DependencyNode node2 = (DependencyNode)arg2.getAddress(); 150 151 if (!node1.isRoot() && !node2.isRoot()) { 152 // Calculates the distance 153 final int index1 = node1.getIndex(); 154 final int index2 = node2.getIndex(); 155 final int distance = Math.abs(index1-index2); 156 157 158 int lower = -1; 159 boolean f = false; 160 for (Integer upper : normalization.keySet()) { 161 if (distance >= lower && distance < upper) { 162 featureValue.setIndexCode(table.getSymbolStringToCode(normalization.get(lower))); 163 featureValue.setSymbol(normalization.get(lower)); 164 featureValue.setValue(1); 165 f = true; 166 break; 167 } 168 lower = upper; 169 } 170 if (f == false) { 171 featureValue.setIndexCode(table.getSymbolStringToCode(normalization.get(lower))); 172 featureValue.setSymbol(normalization.get(lower)); 173 featureValue.setValue(1); 174 } 175 176 // Tells the feature value that the feature is known and is not a null value 177 178 featureValue.setNullValue(false); 179 180 } else { 181 // if node1 or node2 is a root node, set a ROOT_NODE null value as feature value 182 featureValue.setIndexCode(table.getNullValueCode(NullValueId.ROOT_NODE)); 183 featureValue.setSymbol(table.getNullValueSymbol(NullValueId.ROOT_NODE)); 184 featureValue.setValue(1); 185 featureValue.setNullValue(true); 186 } 187 } 188 } 189 190 /** 191 * Returns the feature value 192 * 193 * @return the feature value 194 */ 195 public FeatureValue getFeatureValue() { 196 return featureValue; 197 } 198 199 /** 200 * Returns the symbol table used by the distance feature function 201 * 202 * @return the symbol table used by the distance feature function 203 */ 204 public SymbolTable getSymbolTable() { 205 return table; 206 } 207 208 /** 209 * Returns the address function 1 (argument 1) 210 * 211 * @return the address function 1 (argument 1) 212 */ 213 public AddressFunction getAddressFunction1() { 214 return addressFunction1; 215 } 216 217 218 /** 219 * Sets the address function 1 (argument 1) 220 * 221 * @param addressFunction1 a address function 1 (argument 1) 222 */ 223 public void setAddressFunction1(AddressFunction addressFunction1) { 224 this.addressFunction1 = addressFunction1; 225 } 226 227 /** 228 * Returns the address function 2 (argument 2) 229 * 230 * @return the address function 1 (argument 2) 231 */ 232 public AddressFunction getAddressFunction2() { 233 return addressFunction2; 234 } 235 236 /** 237 * Sets the address function 2 (argument 2) 238 * 239 * @param addressFunction2 a address function 2 (argument 2) 240 */ 241 public void setAddressFunction2(AddressFunction addressFunction2) { 242 this.addressFunction2 = addressFunction2; 243 } 244 245 /** 246 * Returns symbol table handler 247 * 248 * @return a symbol table handler 249 */ 250 public SymbolTableHandler getTableHandler() { 251 return tableHandler; 252 } 253 254 /** 255 * Sets the symbol table handler 256 * 257 * @param tableHandler a symbol table handler 258 */ 259 public void setTableHandler(SymbolTableHandler tableHandler) { 260 this.tableHandler = tableHandler; 261 } 262 263 /** 264 * Sets the symbol table used by the distance feature function 265 * 266 * @param table 267 */ 268 public void setSymbolTable(SymbolTable table) { 269 this.table = table; 270 } 271 272 public int getType() { 273 return ColumnDescription.STRING; 274 } 275 276 public String getMapIdentifier() { 277 return getSymbolTable().getName(); 278 } 279 280 public boolean equals(Object obj) { 281 if (this == obj) 282 return true; 283 if (obj == null) 284 return false; 285 if (getClass() != obj.getClass()) 286 return false; 287 return obj.toString().equals(this.toString()); 288 } 289 290 public int hashCode() { 291 return 217 + (null == toString() ? 0 : toString().hashCode()); 292 } 293 294 public String toString() { 295 final StringBuilder sb = new StringBuilder(); 296 sb.append("Distance("); 297 sb.append(addressFunction1.toString()); 298 sb.append(", "); 299 sb.append(addressFunction2.toString()); 300 sb.append(", "); 301 sb.append(normalizationString); 302 sb.append(')'); 303 return sb.toString(); 304 } 305 } 306