001 package org.maltparser.core.feature.map; 002 003 004 import org.maltparser.core.exception.MaltChainedException; 005 import org.maltparser.core.feature.FeatureException; 006 import org.maltparser.core.feature.function.FeatureFunction; 007 import org.maltparser.core.feature.function.FeatureMapFunction; 008 import org.maltparser.core.feature.value.FeatureValue; 009 import org.maltparser.core.feature.value.FunctionValue; 010 import org.maltparser.core.feature.value.SingleFeatureValue; 011 import org.maltparser.core.io.dataformat.ColumnDescription; 012 import org.maltparser.core.io.dataformat.DataFormatInstance; 013 import org.maltparser.core.symbol.SymbolTable; 014 import org.maltparser.core.symbol.SymbolTableHandler; 015 /** 016 * 017 * 018 * @author Johan Hall 019 */ 020 public class MergeFeature implements FeatureMapFunction { 021 protected FeatureFunction firstFeature; 022 protected FeatureFunction secondFeature; 023 protected DataFormatInstance dataFormatInstance; 024 protected SymbolTable table; 025 protected ColumnDescription column; 026 protected SingleFeatureValue singleFeatureValue; 027 028 public MergeFeature(DataFormatInstance dataFormatInstance) throws MaltChainedException { 029 super(); 030 setDataFormatInstance(dataFormatInstance); 031 singleFeatureValue = new SingleFeatureValue(this); 032 } 033 034 public void initialize(Object[] arguments) throws MaltChainedException { 035 if (arguments.length != 2) { 036 throw new FeatureException("Could not initialize MergeFeature: number of arguments are not correct. "); 037 } 038 if (!(arguments[0] instanceof FeatureFunction)) { 039 throw new FeatureException("Could not initialize MergeFeature: the first argument is not a feature. "); 040 } 041 if (!(arguments[1] instanceof FeatureFunction)) { 042 throw new FeatureException("Could not initialize MergeFeature: the second argument is not a feature. "); 043 } 044 setFirstFeature((FeatureFunction)arguments[0]); 045 setSecondFeature((FeatureFunction)arguments[1]); 046 ColumnDescription firstColumn = dataFormatInstance.getColumnDescriptionByName(firstFeature.getSymbolTable().getName()); 047 ColumnDescription secondColumn = dataFormatInstance.getColumnDescriptionByName(secondFeature.getSymbolTable().getName()); 048 if (firstColumn.getType() != secondColumn.getType()) { 049 throw new FeatureException("Could not initialize MergeFeature: the first and the second arguments are not of the same type."); 050 } 051 setColumn(dataFormatInstance.addInternalColumnDescription("MERGE2_"+firstFeature.getSymbolTable().getName()+"_"+secondFeature.getSymbolTable().getName(), firstColumn)); 052 setSymbolTable(column.getSymbolTable()); 053 } 054 055 public void update() throws MaltChainedException { 056 singleFeatureValue.reset(); 057 firstFeature.update(); 058 secondFeature.update(); 059 FunctionValue firstValue = firstFeature.getFeatureValue(); 060 FunctionValue secondValue = secondFeature.getFeatureValue(); 061 if (firstValue instanceof SingleFeatureValue && secondValue instanceof SingleFeatureValue) { 062 String firstSymbol = ((SingleFeatureValue)firstValue).getSymbol(); 063 if (((FeatureValue)firstValue).isNullValue() && ((FeatureValue)secondValue).isNullValue()) { 064 singleFeatureValue.setIndexCode(firstFeature.getSymbolTable().getSymbolStringToCode(firstSymbol)); 065 // singleFeatureValue.setKnown(firstFeature.getSymbolTable().getKnown(firstSymbol)); 066 singleFeatureValue.setSymbol(firstSymbol); 067 singleFeatureValue.setNullValue(true); 068 } else { 069 if (column.getType() == ColumnDescription.STRING) { 070 StringBuilder mergedValue = new StringBuilder(); 071 mergedValue.append(firstSymbol); 072 mergedValue.append('~'); 073 mergedValue.append(((SingleFeatureValue)secondValue).getSymbol()); 074 singleFeatureValue.setIndexCode(table.addSymbol(mergedValue.toString())); 075 // singleFeatureValue.setKnown(table.getKnown(mergedValue.toString())); 076 singleFeatureValue.setSymbol(mergedValue.toString()); 077 singleFeatureValue.setNullValue(false); 078 singleFeatureValue.setValue(1); 079 } else { 080 if (((FeatureValue)firstValue).isNullValue() || ((FeatureValue)secondValue).isNullValue()) { 081 singleFeatureValue.setValue(0); 082 table.addSymbol("#null#"); 083 singleFeatureValue.setSymbol("#null#"); 084 singleFeatureValue.setNullValue(true); 085 // singleFeatureValue.setKnown(true); 086 singleFeatureValue.setIndexCode(1); 087 } else { 088 if (column.getType() == ColumnDescription.BOOLEAN) { 089 boolean result = false; 090 int dotIndex = firstSymbol.indexOf('.'); 091 result = firstSymbol.equals("1") || firstSymbol.equals("true") || firstSymbol.equals("#true#") || (dotIndex != -1 && firstSymbol.substring(0,dotIndex).equals("1")); 092 if (result == true) { 093 String secondSymbol = ((SingleFeatureValue)secondValue).getSymbol(); 094 dotIndex = secondSymbol.indexOf('.'); 095 result = secondSymbol.equals("1") || secondSymbol.equals("true") || secondSymbol.equals("#true#") || (dotIndex != -1 && secondSymbol.substring(0,dotIndex).equals("1")); 096 } 097 if (result) { 098 singleFeatureValue.setValue(1); 099 table.addSymbol("true"); 100 singleFeatureValue.setSymbol("true"); 101 } else { 102 singleFeatureValue.setValue(0); 103 table.addSymbol("false"); 104 singleFeatureValue.setSymbol("false"); 105 } 106 } else if (column.getType() == ColumnDescription.INTEGER) { 107 Integer firstInt = 0; 108 Integer secondInt = 0; 109 110 int dotIndex = firstSymbol.indexOf('.'); 111 try { 112 if (dotIndex == -1) { 113 firstInt = Integer.parseInt(firstSymbol); 114 } else { 115 firstInt = Integer.parseInt(firstSymbol.substring(0,dotIndex)); 116 } 117 } catch (NumberFormatException e) { 118 throw new FeatureException("Could not cast the feature value '"+firstSymbol+"' to integer value.", e); 119 } 120 String secondSymbol = ((SingleFeatureValue)secondValue).getSymbol(); 121 dotIndex = secondSymbol.indexOf('.'); 122 try { 123 if (dotIndex == -1) { 124 secondInt = Integer.parseInt(secondSymbol); 125 } else { 126 secondInt = Integer.parseInt(secondSymbol.substring(0,dotIndex)); 127 } 128 } catch (NumberFormatException e) { 129 throw new FeatureException("Could not cast the feature value '"+secondSymbol+"' to integer value.", e); 130 } 131 Integer result = firstInt*secondInt; 132 singleFeatureValue.setValue(result); 133 table.addSymbol(result.toString()); 134 singleFeatureValue.setSymbol(result.toString()); 135 } else if (column.getType() == ColumnDescription.REAL) { 136 Double firstReal = 0.0; 137 Double secondReal = 0.0; 138 try { 139 firstReal = Double.parseDouble(firstSymbol); 140 } catch (NumberFormatException e) { 141 throw new FeatureException("Could not cast the feature value '"+firstSymbol+"' to real value.", e); 142 } 143 String secondSymbol = ((SingleFeatureValue)secondValue).getSymbol(); 144 try { 145 secondReal = Double.parseDouble(secondSymbol); 146 } catch (NumberFormatException e) { 147 throw new FeatureException("Could not cast the feature value '"+secondSymbol+"' to real value.", e); 148 } 149 Double result = firstReal*secondReal; 150 singleFeatureValue.setValue(result); 151 table.addSymbol(result.toString()); 152 singleFeatureValue.setSymbol(result.toString()); 153 } 154 singleFeatureValue.setNullValue(false); 155 // singleFeatureValue.setKnown(true); 156 singleFeatureValue.setIndexCode(1); 157 } 158 } 159 } 160 } else { 161 throw new FeatureException("It is not possible to merge Split-features. "); 162 } 163 } 164 165 public Class<?>[] getParameterTypes() { 166 Class<?>[] paramTypes = { org.maltparser.core.feature.function.FeatureFunction.class, org.maltparser.core.feature.function.FeatureFunction.class }; 167 return paramTypes; 168 } 169 170 public FeatureValue getFeatureValue() { 171 return singleFeatureValue; 172 // return multipleFeatureValue; 173 } 174 175 public String getSymbol(int code) throws MaltChainedException { 176 return table.getSymbolCodeToString(code); 177 } 178 179 public int getCode(String symbol) throws MaltChainedException { 180 return table.getSymbolStringToCode(symbol); 181 } 182 183 public void updateCardinality() throws MaltChainedException { 184 // firstFeature.updateCardinality(); 185 // secondFeature.updateCardinality(); 186 // singleFeatureValue.setCardinality(table.getValueCounter()); 187 188 // multipleFeatureValue.setCardinality(table.getValueCounter()); 189 } 190 191 public FeatureFunction getFirstFeature() { 192 return firstFeature; 193 } 194 195 public void setFirstFeature(FeatureFunction firstFeature) { 196 this.firstFeature = firstFeature; 197 } 198 199 public FeatureFunction getSecondFeature() { 200 return secondFeature; 201 } 202 203 public void setSecondFeature(FeatureFunction secondFeature) { 204 this.secondFeature = secondFeature; 205 } 206 207 public SymbolTableHandler getTableHandler() { 208 return dataFormatInstance.getSymbolTables(); 209 } 210 211 public SymbolTable getSymbolTable() { 212 return table; 213 } 214 215 public void setSymbolTable(SymbolTable table) { 216 this.table = table; 217 } 218 219 public ColumnDescription getColumn() { 220 return column; 221 } 222 223 protected void setColumn(ColumnDescription column) { 224 this.column = column; 225 } 226 227 public DataFormatInstance getDataFormatInstance() { 228 return dataFormatInstance; 229 } 230 231 public void setDataFormatInstance(DataFormatInstance dataFormatInstance) { 232 this.dataFormatInstance = dataFormatInstance; 233 } 234 235 public boolean equals(Object obj) { 236 if (this == obj) 237 return true; 238 if (obj == null) 239 return false; 240 if (getClass() != obj.getClass()) 241 return false; 242 return obj.toString().equals(this.toString()); 243 } 244 245 public String toString() { 246 final StringBuilder sb = new StringBuilder(); 247 sb.append("Merge("); 248 sb.append(firstFeature.toString()); 249 sb.append(", "); 250 sb.append(secondFeature.toString()); 251 sb.append(')'); 252 return sb.toString(); 253 } 254 255 }