001package org.maltparser.core.syntaxgraph.feature; 002 003import java.util.LinkedHashMap; 004import java.util.Map; 005import java.util.regex.Pattern; 006 007import org.maltparser.core.exception.MaltChainedException; 008import org.maltparser.core.feature.function.AddressFunction; 009import org.maltparser.core.feature.function.FeatureFunction; 010import org.maltparser.core.feature.value.AddressValue; 011import org.maltparser.core.feature.value.FeatureValue; 012import org.maltparser.core.feature.value.SingleFeatureValue; 013import org.maltparser.core.io.dataformat.ColumnDescription; 014import org.maltparser.core.symbol.SymbolTable; 015import org.maltparser.core.symbol.SymbolTableHandler; 016import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId; 017import org.maltparser.core.syntaxgraph.SyntaxGraphException; 018import org.maltparser.core.syntaxgraph.node.DependencyNode; 019 020public final class DistanceFeature implements FeatureFunction { 021 public final static Class<?>[] paramTypes = { org.maltparser.core.feature.function.AddressFunction.class, 022 org.maltparser.core.feature.function.AddressFunction.class, 023 java.lang.String.class }; 024 private final static Pattern splitPattern = Pattern.compile("\\|"); 025 private AddressFunction addressFunction1; 026 private AddressFunction addressFunction2; 027 private final SymbolTableHandler tableHandler; 028 private SymbolTable table; 029 private final SingleFeatureValue featureValue; 030 private String normalizationString; 031 private final Map<Integer,String> normalization; 032 033// public DistanceFeature(FeatureRegistry registry) throws MaltChainedException { 034// this(registry.getSymbolTableHandler()); 035// } 036 037 public DistanceFeature(SymbolTableHandler tableHandler) throws MaltChainedException { 038 this.featureValue = new SingleFeatureValue(this); 039 this.tableHandler = tableHandler; 040 this.normalization = new LinkedHashMap<Integer,String>(); 041 } 042 043 /** 044 * Initialize the distance feature function 045 * 046 * @param arguments an array of arguments with the type returned by getParameterTypes() 047 * @throws MaltChainedException 048 */ 049 public void initialize(Object[] arguments) throws MaltChainedException { 050 if (arguments.length != 3) { 051 throw new SyntaxGraphException("Could not initialize DistanceFeature: number of arguments is not correct. "); 052 } 053 // Checks that the two arguments are address functions 054 if (!(arguments[0] instanceof AddressFunction)) { 055 throw new SyntaxGraphException("Could not initialize DistanceFeature: the first argument is not an address function. "); 056 } 057 if (!(arguments[1] instanceof AddressFunction)) { 058 throw new SyntaxGraphException("Could not initialize DistanceFeature: the second argument is not an address function. "); 059 } 060 if (!(arguments[2] instanceof java.lang.String)) { 061 throw new SyntaxGraphException("Could not initialize DistanceFeature: the third argument is not a string. "); 062 } 063 setAddressFunction1((AddressFunction)arguments[0]); 064 setAddressFunction2((AddressFunction)arguments[1]); 065 066 normalizationString = (String)arguments[2]; 067 // Creates a symbol table called "DISTANCE" using one null value 068 setSymbolTable(tableHandler.addSymbolTable("DISTANCE_"+normalizationString, ColumnDescription.INPUT, ColumnDescription.STRING, "one")); 069 070 String[] items = splitPattern.split(normalizationString); 071 072 if (items.length <= 0 || !items[0].equals("0")) { 073 throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a list of integer values separated with | and the first element must be 0."); 074 } 075 int tmp = -1; 076 for (int i = 0; i < items.length; i++) { 077 int v; 078 try { 079 v = Integer.parseInt(items[i]); 080 } catch (NumberFormatException e) { 081 throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |", e); 082 } 083 normalization.put(v, ">="+v); 084 table.addSymbol(">="+v); 085 if (tmp != -1 && tmp >= v) { 086 throw new SyntaxGraphException("Could not initialize DistanceFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |"); 087 } 088 tmp = v; 089 } 090 } 091 092 /** 093 * Returns an array of class types used by the feature extraction system to invoke initialize with 094 * correct arguments. 095 * 096 * @return an array of class types 097 */ 098 public Class<?>[] getParameterTypes() { 099 return paramTypes; 100 } 101 102 /** 103 * Returns the string representation of the integer <code>code</code> according to the distance feature function. 104 * 105 * @param code the integer representation of the symbol 106 * @return the string representation of the integer <code>code</code> according to the distance feature function. 107 * @throws MaltChainedException 108 */ 109 public String getSymbol(int code) throws MaltChainedException { 110 return table.getSymbolCodeToString(code); 111 } 112 113 /** 114 * Returns the integer representation of the string <code>symbol</code> according to the distance feature function. 115 * 116 * @param symbol the string representation of the symbol 117 * @return the integer representation of the string <code>symbol</code> according to the distance feature function. 118 * @throws MaltChainedException 119 */ 120 public int getCode(String symbol) throws MaltChainedException { 121 return table.getSymbolStringToCode(symbol); 122 } 123 124 /** 125 * Cause the feature function to update the feature value. 126 * 127 * @throws MaltChainedException 128 */ 129 public void update() throws MaltChainedException { 130 // Retrieve the address value 131 final AddressValue arg1 = addressFunction1.getAddressValue(); 132 final AddressValue arg2 = addressFunction2.getAddressValue(); 133// featureValue.setKnown(true); 134 // if arg1 or arg2 is null, then set a NO_NODE null value as feature value 135 if (arg1.getAddress() == null || arg2.getAddress() == null) { 136 featureValue.setIndexCode(table.getNullValueCode(NullValueId.NO_NODE)); 137 featureValue.setSymbol(table.getNullValueSymbol(NullValueId.NO_NODE)); 138 featureValue.setValue(1); 139 140 featureValue.setNullValue(true); 141 } else { 142 // Unfortunately this method takes a lot of time arg1.getAddressClass().asSubclass(org.maltparser.core.syntaxgraph.node.DependencyNode.class); 143 // Cast the address arguments to dependency nodes 144 final DependencyNode node1 = (DependencyNode)arg1.getAddress(); 145 final DependencyNode node2 = (DependencyNode)arg2.getAddress(); 146 147 if (!node1.isRoot() && !node2.isRoot()) { 148 // Calculates the distance 149 final int index1 = node1.getIndex(); 150 final int index2 = node2.getIndex(); 151 final int distance = Math.abs(index1-index2); 152 153 154 int lower = -1; 155 boolean f = false; 156 for (Integer upper : normalization.keySet()) { 157 if (distance >= lower && distance < upper) { 158 featureValue.setIndexCode(table.getSymbolStringToCode(normalization.get(lower))); 159 featureValue.setSymbol(normalization.get(lower)); 160 featureValue.setValue(1); 161 f = true; 162 break; 163 } 164 lower = upper; 165 } 166 if (f == false) { 167 featureValue.setIndexCode(table.getSymbolStringToCode(normalization.get(lower))); 168 featureValue.setSymbol(normalization.get(lower)); 169 featureValue.setValue(1); 170 } 171 172 // Tells the feature value that the feature is known and is not a null value 173 174 featureValue.setNullValue(false); 175 176 } else { 177 // if node1 or node2 is a root node, set a ROOT_NODE null value as feature value 178 featureValue.setIndexCode(table.getNullValueCode(NullValueId.ROOT_NODE)); 179 featureValue.setSymbol(table.getNullValueSymbol(NullValueId.ROOT_NODE)); 180 featureValue.setValue(1); 181 featureValue.setNullValue(true); 182 } 183 } 184 } 185 186 /** 187 * Returns the feature value 188 * 189 * @return the feature value 190 */ 191 public FeatureValue getFeatureValue() { 192 return featureValue; 193 } 194 195 /** 196 * Returns the symbol table used by the distance feature function 197 * 198 * @return the symbol table used by the distance feature function 199 */ 200 public SymbolTable getSymbolTable() { 201 return table; 202 } 203 204 /** 205 * Returns the address function 1 (argument 1) 206 * 207 * @return the address function 1 (argument 1) 208 */ 209 public AddressFunction getAddressFunction1() { 210 return addressFunction1; 211 } 212 213 214 /** 215 * Sets the address function 1 (argument 1) 216 * 217 * @param addressFunction1 a address function 1 (argument 1) 218 */ 219 public void setAddressFunction1(AddressFunction addressFunction1) { 220 this.addressFunction1 = addressFunction1; 221 } 222 223 /** 224 * Returns the address function 2 (argument 2) 225 * 226 * @return the address function 1 (argument 2) 227 */ 228 public AddressFunction getAddressFunction2() { 229 return addressFunction2; 230 } 231 232 /** 233 * Sets the address function 2 (argument 2) 234 * 235 * @param addressFunction2 a address function 2 (argument 2) 236 */ 237 public void setAddressFunction2(AddressFunction addressFunction2) { 238 this.addressFunction2 = addressFunction2; 239 } 240 241 /** 242 * Returns symbol table handler 243 * 244 * @return a symbol table handler 245 */ 246 public SymbolTableHandler getTableHandler() { 247 return tableHandler; 248 } 249 250 /** 251 * Sets the symbol table used by the distance feature function 252 * 253 * @param table 254 */ 255 public void setSymbolTable(SymbolTable table) { 256 this.table = table; 257 } 258 259 public int getType() { 260 return ColumnDescription.STRING; 261 } 262 263 public String getMapIdentifier() { 264 return getSymbolTable().getName(); 265 } 266 267 public boolean equals(Object obj) { 268 if (this == obj) 269 return true; 270 if (obj == null) 271 return false; 272 if (getClass() != obj.getClass()) 273 return false; 274 return obj.toString().equals(this.toString()); 275 } 276 277 public int hashCode() { 278 return 217 + (null == toString() ? 0 : toString().hashCode()); 279 } 280 281 public String toString() { 282 final StringBuilder sb = new StringBuilder(); 283 sb.append("Distance("); 284 sb.append(addressFunction1.toString()); 285 sb.append(", "); 286 sb.append(addressFunction2.toString()); 287 sb.append(", "); 288 sb.append(normalizationString); 289 sb.append(')'); 290 return sb.toString(); 291 } 292} 293