001package org.maltparser.core.syntaxgraph.feature; 002 003import java.util.LinkedHashMap; 004import java.util.Map; 005import java.util.regex.Pattern; 006 007import org.maltparser.core.exception.MaltChainedException; 008import org.maltparser.core.feature.function.AddressFunction; 009import org.maltparser.core.feature.function.FeatureFunction; 010import org.maltparser.core.feature.value.AddressValue; 011import org.maltparser.core.feature.value.FeatureValue; 012import org.maltparser.core.feature.value.SingleFeatureValue; 013import org.maltparser.core.io.dataformat.ColumnDescription; 014import org.maltparser.core.symbol.SymbolTable; 015import org.maltparser.core.symbol.SymbolTableHandler; 016import org.maltparser.core.symbol.nullvalue.NullValues.NullValueId; 017import org.maltparser.core.syntaxgraph.SyntaxGraphException; 018import org.maltparser.core.syntaxgraph.node.DependencyNode; 019 020public final class NumOfFeature implements FeatureFunction { 021 public final static Class<?>[] paramTypes = { org.maltparser.core.feature.function.AddressFunction.class, 022 java.lang.String.class, 023 java.lang.String.class}; 024 private final static Pattern splitPattern = Pattern.compile("\\|"); 025 public enum NumOfRelation { 026 LDEPS, RDEPS, DEPS 027 }; 028 private AddressFunction addressFunction; 029 private final SymbolTableHandler tableHandler; 030 private SymbolTable table; 031 private final SingleFeatureValue featureValue; 032 private NumOfRelation numOfRelation; 033 private String numOfRelationName; 034 private String normalizationString; 035 private final Map<Integer,String> normalization; 036 037 public NumOfFeature(SymbolTableHandler tableHandler) throws MaltChainedException { 038 this.tableHandler = tableHandler; 039 this.featureValue = new SingleFeatureValue(this); 040 this.normalization = new LinkedHashMap<Integer,String>(); 041 } 042 043 /** 044 * Initialize the distance feature function 045 * 046 * @param arguments an array of arguments with the type returned by getParameterTypes() 047 * @throws MaltChainedException 048 */ 049 public void initialize(Object[] arguments) throws MaltChainedException { 050 if (arguments.length != 3) { 051 throw new SyntaxGraphException("Could not initialize NumOfFeature: number of arguments are not correct. "); 052 } 053 // Checks that the two arguments are address functions 054 if (!(arguments[0] instanceof AddressFunction)) { 055 throw new SyntaxGraphException("Could not initialize NumOfFeature: the first argument is not an address function. "); 056 } 057 if (!(arguments[1] instanceof java.lang.String)) { 058 throw new SyntaxGraphException("Could not initialize NumOfFeature: the second argument (relation) is not a string. "); 059 } 060 if (!(arguments[2] instanceof java.lang.String)) { 061 throw new SyntaxGraphException("Could not initialize NumOfFeature: the third argument (normalization) is not a string. "); 062 } 063 setAddressFunction((AddressFunction)arguments[0]); 064 setNumOfRelation((String)arguments[1]); 065 normalizationString = (String)arguments[2]; 066 // Creates a symbol table called "NUMOF" using one null value 067 setSymbolTable(tableHandler.addSymbolTable("NUMOF"+normalizationString, ColumnDescription.INPUT, ColumnDescription.STRING, "one")); 068 069 String[] items = splitPattern.split(normalizationString); 070 071 if (items.length <= 0 || !items[0].equals("0")) { 072 throw new SyntaxGraphException("Could not initialize NumOfFeature ("+this+"): the third argument (normalization) must contain a list of integer values separated with | and the first element must be 0."); 073 } 074 int tmp = -1; 075 for (int i = 0; i < items.length; i++) { 076 int v; 077 try { 078 v = Integer.parseInt(items[i]); 079 } catch (NumberFormatException e) { 080 throw new SyntaxGraphException("Could not initialize NumOfFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |", e); 081 } 082 normalization.put(v, ">="+v); 083 table.addSymbol(">="+v); 084 if (tmp != -1 && tmp >= v) { 085 throw new SyntaxGraphException("Could not initialize NumOfFeature ("+this+"): the third argument (normalization) must contain a sorted list of integer values separated with |"); 086 } 087 tmp = v; 088 } 089 } 090 091 /** 092 * Returns an array of class types used by the feature extraction system to invoke initialize with 093 * correct arguments. 094 * 095 * @return an array of class types 096 */ 097 public Class<?>[] getParameterTypes() { 098 return paramTypes; 099 } 100 101 /** 102 * Returns the string representation of the integer <code>code</code> according to the numof feature function. 103 * 104 * @param code the integer representation of the symbol 105 * @return the string representation of the integer <code>code</code> according to the numof feature function. 106 * @throws MaltChainedException 107 */ 108 public String getSymbol(int code) throws MaltChainedException { 109 return table.getSymbolCodeToString(code); 110 } 111 112 /** 113 * Returns the integer representation of the string <code>symbol</code> according to the numof feature function. 114 * 115 * @param symbol the string representation of the symbol 116 * @return the integer representation of the string <code>symbol</code> according to the numof feature function. 117 * @throws MaltChainedException 118 */ 119 public int getCode(String symbol) throws MaltChainedException { 120 return table.getSymbolStringToCode(symbol); 121 } 122 123 /** 124 * Cause the feature function to update the feature value. 125 * 126 * @throws MaltChainedException 127 */ 128 public void update() throws MaltChainedException { 129 // Retrieve the address value 130 final AddressValue arg1 = addressFunction.getAddressValue(); 131 // if arg1 or arg2 is null, then set a NO_NODE null value as feature value 132 if (arg1.getAddress() == null ) { 133 featureValue.setIndexCode(table.getNullValueCode(NullValueId.NO_NODE)); 134 featureValue.setSymbol(table.getNullValueSymbol(NullValueId.NO_NODE)); 135 featureValue.setNullValue(true); 136 } else { 137 // Unfortunately this method takes a lot of time arg1.getAddressClass().asSubclass(org.maltparser.core.syntaxgraph.node.DependencyNode.class); 138 // Cast the address arguments to dependency nodes 139 final DependencyNode node = (DependencyNode)arg1.getAddress(); 140 int numof = 0; 141 if (numOfRelation == NumOfRelation.DEPS) { 142 numof = node.getLeftDependentCount() + node.getRightDependentCount(); 143 } else if (numOfRelation == NumOfRelation.LDEPS) { 144 numof = node.getLeftDependentCount(); 145 } else if (numOfRelation == NumOfRelation.RDEPS) { 146 numof = node.getRightDependentCount(); 147 } 148 int lower = -1; 149 boolean f = false; 150 for (Integer upper : normalization.keySet()) { 151 if (numof >= lower && numof < upper) { 152 featureValue.setIndexCode(table.getSymbolStringToCode(normalization.get(lower))); 153 featureValue.setSymbol(normalization.get(lower)); 154 f = true; 155 break; 156 } 157 lower = upper; 158 } 159 if (f == false) { 160 featureValue.setIndexCode(table.getSymbolStringToCode(normalization.get(lower))); 161 featureValue.setSymbol(normalization.get(lower)); 162 } 163 // Tells the feature value that the feature is known and is not a null value 164 featureValue.setNullValue(false); 165 } 166 featureValue.setValue(1); 167 } 168 169 public void setNumOfRelation(String numOfRelationName) { 170 this.numOfRelationName = numOfRelationName; 171 numOfRelation = NumOfRelation.valueOf(numOfRelationName.toUpperCase()); 172 } 173 174 public NumOfRelation getNumOfRelation() { 175 return numOfRelation; 176 } 177 178 /** 179 * Returns the feature value 180 * 181 * @return the feature value 182 */ 183 public FeatureValue getFeatureValue() { 184 return featureValue; 185 } 186 187 /** 188 * Returns the symbol table used by the numof feature function 189 * 190 * @return the symbol table used by the numof feature function 191 */ 192 public SymbolTable getSymbolTable() { 193 return table; 194 } 195 196 /** 197 * Returns the address function 198 * 199 * @return the address function 200 */ 201 public AddressFunction getAddressFunction() { 202 return addressFunction; 203 } 204 205 206 /** 207 * Sets the address function 208 * 209 * @param addressFunction a address function 210 */ 211 public void setAddressFunction(AddressFunction addressFunction) { 212 this.addressFunction = addressFunction; 213 } 214 215 /** 216 * Sets the symbol table used by the numof feature function 217 * 218 * @param table 219 */ 220 public void setSymbolTable(SymbolTable table) { 221 this.table = table; 222 } 223 224 public int getType() { 225 return ColumnDescription.STRING; 226 } 227 228 public String getMapIdentifier() { 229 return getSymbolTable().getName(); 230 } 231 232 public boolean equals(Object obj) { 233 if (this == obj) 234 return true; 235 if (obj == null) 236 return false; 237 if (getClass() != obj.getClass()) 238 return false; 239 return obj.toString().equals(this.toString()); 240 } 241 242 public int hashCode() { 243 return 217 + (null == toString() ? 0 : toString().hashCode()); 244 } 245 246 public String toString() { 247 final StringBuilder sb = new StringBuilder(); 248 sb.append("NumOf("); 249 sb.append(addressFunction.toString()); 250 sb.append(", "); 251 sb.append(numOfRelationName); 252 sb.append(", "); 253 sb.append(normalizationString); 254 sb.append(')'); 255 return sb.toString(); 256 } 257}