001 package org.maltparser.core.feature.map; 002 003 004 import org.maltparser.core.exception.MaltChainedException; 005 import org.maltparser.core.feature.FeatureException; 006 import org.maltparser.core.feature.function.FeatureFunction; 007 import org.maltparser.core.feature.function.FeatureMapFunction; 008 import org.maltparser.core.feature.value.FeatureValue; 009 import org.maltparser.core.feature.value.FunctionValue; 010 import org.maltparser.core.feature.value.SingleFeatureValue; 011 import org.maltparser.core.io.dataformat.ColumnDescription; 012 import org.maltparser.core.io.dataformat.DataFormatInstance; 013 import org.maltparser.core.symbol.SymbolTable; 014 import org.maltparser.core.symbol.SymbolTableHandler; 015 /** 016 * 017 * 018 * @author Johan Hall 019 */ 020 public class MergeFeature implements FeatureMapFunction { 021 private FeatureFunction firstFeature; 022 private FeatureFunction secondFeature; 023 private DataFormatInstance dataFormatInstance; 024 private SymbolTable table; 025 private ColumnDescription column; 026 private final SingleFeatureValue singleFeatureValue; 027 // protected int type; 028 029 030 public MergeFeature(DataFormatInstance dataFormatInstance) throws MaltChainedException { 031 super(); 032 setDataFormatInstance(dataFormatInstance); 033 singleFeatureValue = new SingleFeatureValue(this); 034 } 035 036 public void initialize(Object[] arguments) throws MaltChainedException { 037 if (arguments.length != 2) { 038 throw new FeatureException("Could not initialize MergeFeature: number of arguments are not correct. "); 039 } 040 if (!(arguments[0] instanceof FeatureFunction)) { 041 throw new FeatureException("Could not initialize MergeFeature: the first argument is not a feature. "); 042 } 043 if (!(arguments[1] instanceof FeatureFunction)) { 044 throw new FeatureException("Could not initialize MergeFeature: the second argument is not a feature. "); 045 } 046 setFirstFeature((FeatureFunction)arguments[0]); 047 setSecondFeature((FeatureFunction)arguments[1]); 048 ColumnDescription firstColumn = (firstFeature.getSymbolTable() != null)?dataFormatInstance.getColumnDescriptionByName(firstFeature.getSymbolTable().getName()):null; 049 ColumnDescription secondColumn = (secondFeature.getSymbolTable() != null)?dataFormatInstance.getColumnDescriptionByName(secondFeature.getSymbolTable().getName()):null; 050 // if (firstColumn.getType() != secondColumn.getType()) { 051 // throw new FeatureException("Could not initialize MergeFeature: the first and the second arguments are not of the same type."); 052 // } 053 // setColumn(dataFormatInstance.addInternalColumnDescription("MERGE2_"+firstFeature.getSymbolTable().getName()+"_"+secondFeature.getSymbolTable().getName(), firstColumn)); 054 055 if (firstFeature.getType() != secondFeature.getType()) { 056 throw new FeatureException("Could not initialize MergeFeature: the first and the second arguments are not of the same type."); 057 } 058 if (firstColumn != null || secondColumn != null) { 059 setColumn(dataFormatInstance.addInternalColumnDescription("MERGE2_"+firstFeature.getMapIdentifier()+"_"+secondFeature.getMapIdentifier(), (firstColumn != null)?firstColumn:secondColumn)); 060 } else { 061 setColumn(dataFormatInstance.addInternalColumnDescription("MERGE2_"+firstFeature.getMapIdentifier()+"_"+secondFeature.getMapIdentifier(), ColumnDescription.INPUT, firstFeature.getType(), "", "One")); 062 } 063 setSymbolTable(column.getSymbolTable()); 064 } 065 066 public void update() throws MaltChainedException { 067 singleFeatureValue.reset(); 068 firstFeature.update(); 069 secondFeature.update(); 070 FeatureValue firstValue = firstFeature.getFeatureValue(); 071 FeatureValue secondValue = secondFeature.getFeatureValue(); 072 if (firstValue instanceof SingleFeatureValue && secondValue instanceof SingleFeatureValue) { 073 String firstSymbol = ((SingleFeatureValue)firstValue).getSymbol(); 074 if (firstValue.isNullValue() && secondValue.isNullValue()) { 075 singleFeatureValue.setIndexCode(firstFeature.getSymbolTable().getSymbolStringToCode(firstSymbol)); 076 singleFeatureValue.setSymbol(firstSymbol); 077 singleFeatureValue.setNullValue(true); 078 } else { 079 if (column.getType() == ColumnDescription.STRING) { 080 StringBuilder mergedValue = new StringBuilder(); 081 mergedValue.append(firstSymbol); 082 mergedValue.append('~'); 083 mergedValue.append(((SingleFeatureValue)secondValue).getSymbol()); 084 singleFeatureValue.setIndexCode(table.addSymbol(mergedValue.toString())); 085 singleFeatureValue.setSymbol(mergedValue.toString()); 086 singleFeatureValue.setNullValue(false); 087 singleFeatureValue.setValue(1); 088 } else { 089 if (firstValue.isNullValue() || secondValue.isNullValue()) { 090 singleFeatureValue.setValue(0); 091 table.addSymbol("#null#"); 092 singleFeatureValue.setSymbol("#null#"); 093 singleFeatureValue.setNullValue(true); 094 singleFeatureValue.setIndexCode(1); 095 } else { 096 if (column.getType() == ColumnDescription.BOOLEAN) { 097 boolean result = false; 098 int dotIndex = firstSymbol.indexOf('.'); 099 result = firstSymbol.equals("1") || firstSymbol.equals("true") || firstSymbol.equals("#true#") || (dotIndex != -1 && firstSymbol.substring(0,dotIndex).equals("1")); 100 if (result == true) { 101 String secondSymbol = ((SingleFeatureValue)secondValue).getSymbol(); 102 dotIndex = secondSymbol.indexOf('.'); 103 result = secondSymbol.equals("1") || secondSymbol.equals("true") || secondSymbol.equals("#true#") || (dotIndex != -1 && secondSymbol.substring(0,dotIndex).equals("1")); 104 } 105 if (result) { 106 singleFeatureValue.setValue(1); 107 table.addSymbol("true"); 108 singleFeatureValue.setSymbol("true"); 109 } else { 110 singleFeatureValue.setValue(0); 111 table.addSymbol("false"); 112 singleFeatureValue.setSymbol("false"); 113 } 114 } else if (column.getType() == ColumnDescription.INTEGER) { 115 Integer firstInt = 0; 116 Integer secondInt = 0; 117 118 int dotIndex = firstSymbol.indexOf('.'); 119 try { 120 if (dotIndex == -1) { 121 firstInt = Integer.parseInt(firstSymbol); 122 } else { 123 firstInt = Integer.parseInt(firstSymbol.substring(0,dotIndex)); 124 } 125 } catch (NumberFormatException e) { 126 throw new FeatureException("Could not cast the feature value '"+firstSymbol+"' to integer value.", e); 127 } 128 String secondSymbol = ((SingleFeatureValue)secondValue).getSymbol(); 129 dotIndex = secondSymbol.indexOf('.'); 130 try { 131 if (dotIndex == -1) { 132 secondInt = Integer.parseInt(secondSymbol); 133 } else { 134 secondInt = Integer.parseInt(secondSymbol.substring(0,dotIndex)); 135 } 136 } catch (NumberFormatException e) { 137 throw new FeatureException("Could not cast the feature value '"+secondSymbol+"' to integer value.", e); 138 } 139 Integer result = firstInt*secondInt; 140 singleFeatureValue.setValue(result); 141 table.addSymbol(result.toString()); 142 singleFeatureValue.setSymbol(result.toString()); 143 } else if (column.getType() == ColumnDescription.REAL) { 144 Double firstReal = 0.0; 145 Double secondReal = 0.0; 146 try { 147 firstReal = Double.parseDouble(firstSymbol); 148 } catch (NumberFormatException e) { 149 throw new FeatureException("Could not cast the feature value '"+firstSymbol+"' to real value.", e); 150 } 151 String secondSymbol = ((SingleFeatureValue)secondValue).getSymbol(); 152 try { 153 secondReal = Double.parseDouble(secondSymbol); 154 } catch (NumberFormatException e) { 155 throw new FeatureException("Could not cast the feature value '"+secondSymbol+"' to real value.", e); 156 } 157 Double result = firstReal*secondReal; 158 singleFeatureValue.setValue(result); 159 table.addSymbol(result.toString()); 160 singleFeatureValue.setSymbol(result.toString()); 161 } 162 singleFeatureValue.setNullValue(false); 163 singleFeatureValue.setIndexCode(1); 164 } 165 } 166 } 167 } else { 168 throw new FeatureException("It is not possible to merge Split-features. "); 169 } 170 } 171 172 public Class<?>[] getParameterTypes() { 173 Class<?>[] paramTypes = { org.maltparser.core.feature.function.FeatureFunction.class, org.maltparser.core.feature.function.FeatureFunction.class }; 174 return paramTypes; 175 } 176 177 public FeatureValue getFeatureValue() { 178 return singleFeatureValue; 179 } 180 181 public String getSymbol(int code) throws MaltChainedException { 182 return table.getSymbolCodeToString(code); 183 } 184 185 public int getCode(String symbol) throws MaltChainedException { 186 return table.getSymbolStringToCode(symbol); 187 } 188 189 public FeatureFunction getFirstFeature() { 190 return firstFeature; 191 } 192 193 public void setFirstFeature(FeatureFunction firstFeature) { 194 this.firstFeature = firstFeature; 195 } 196 197 public FeatureFunction getSecondFeature() { 198 return secondFeature; 199 } 200 201 public void setSecondFeature(FeatureFunction secondFeature) { 202 this.secondFeature = secondFeature; 203 } 204 205 public SymbolTableHandler getTableHandler() { 206 return dataFormatInstance.getSymbolTables(); 207 } 208 209 public SymbolTable getSymbolTable() { 210 return table; 211 } 212 213 public void setSymbolTable(SymbolTable table) { 214 this.table = table; 215 } 216 217 public ColumnDescription getColumn() { 218 return column; 219 } 220 221 protected void setColumn(ColumnDescription column) { 222 this.column = column; 223 } 224 225 public DataFormatInstance getDataFormatInstance() { 226 return dataFormatInstance; 227 } 228 229 public void setDataFormatInstance(DataFormatInstance dataFormatInstance) { 230 this.dataFormatInstance = dataFormatInstance; 231 } 232 233 public int getType() { 234 // return type; 235 return column.getType(); 236 } 237 238 // public void setType(int type) { 239 // this.type = type; 240 // } 241 242 public String getMapIdentifier() { 243 return getSymbolTable().getName(); 244 } 245 246 public boolean equals(Object obj) { 247 if (this == obj) 248 return true; 249 if (obj == null) 250 return false; 251 if (getClass() != obj.getClass()) 252 return false; 253 return obj.toString().equals(this.toString()); 254 } 255 256 public String toString() { 257 final StringBuilder sb = new StringBuilder(); 258 sb.append("Merge("); 259 sb.append(firstFeature.toString()); 260 sb.append(", "); 261 sb.append(secondFeature.toString()); 262 sb.append(')'); 263 return sb.toString(); 264 } 265 266 }