001package org.maltparser.core.feature.map; 002 003import java.util.regex.Pattern; 004import java.util.regex.PatternSyntaxException; 005 006import org.maltparser.core.exception.MaltChainedException; 007import org.maltparser.core.feature.FeatureException; 008import org.maltparser.core.feature.function.FeatureFunction; 009import org.maltparser.core.feature.function.FeatureMapFunction; 010import org.maltparser.core.feature.value.FeatureValue; 011import org.maltparser.core.feature.value.FunctionValue; 012import org.maltparser.core.feature.value.MultipleFeatureValue; 013import org.maltparser.core.feature.value.SingleFeatureValue; 014import org.maltparser.core.io.dataformat.ColumnDescription; 015import org.maltparser.core.io.dataformat.DataFormatInstance; 016import org.maltparser.core.symbol.SymbolTable; 017import org.maltparser.core.symbol.SymbolTableHandler; 018 019/** 020* 021* 022* @author Johan Hall 023*/ 024public final class SplitFeature implements FeatureMapFunction { 025 public final static Class<?>[] paramTypes = { org.maltparser.core.feature.function.FeatureFunction.class, java.lang.String.class }; 026 private FeatureFunction parentFeature; 027 private final MultipleFeatureValue multipleFeatureValue; 028 private final DataFormatInstance dataFormatInstance; 029 private final SymbolTableHandler tableHandler; 030 private ColumnDescription column; 031 private SymbolTable table; 032 private String separators; 033 private Pattern separatorsPattern; 034 035 public SplitFeature(DataFormatInstance dataFormatInstance, SymbolTableHandler tableHandler) throws MaltChainedException { 036 this.dataFormatInstance = dataFormatInstance; 037 this.tableHandler = tableHandler; 038 this.multipleFeatureValue = new MultipleFeatureValue(this); 039 } 040 041 public void initialize(Object[] arguments) throws MaltChainedException { 042 if (arguments.length != 2) { 043 throw new FeatureException("Could not initialize SplitFeature: number of arguments are not correct. "); 044 } 045 if (!(arguments[0] instanceof FeatureFunction)) { 046 throw new FeatureException("Could not initialize SplitFeature: the first argument is not a feature. "); 047 } 048 if (!(arguments[1] instanceof String)) { 049 throw new FeatureException("Could not initialize SplitFeature: the second argument is not a string. "); 050 } 051 setParentFeature((FeatureFunction)arguments[0]); 052 setSeparators((String)arguments[1]); 053 ColumnDescription parentColumn = dataFormatInstance.getColumnDescriptionByName(parentFeature.getSymbolTable().getName()); 054 if (parentColumn.getType() != ColumnDescription.STRING) { 055 throw new FeatureException("Could not initialize SplitFeature: the first argument must be a string. "); 056 } 057 setColumn(dataFormatInstance.addInternalColumnDescription(tableHandler, "SPLIT_"+parentFeature.getSymbolTable().getName(), parentColumn)); 058 setSymbolTable(tableHandler.getSymbolTable(column.getName())); 059// setSymbolTable(tableHandler.addSymbolTable("SPLIT_"+parentFeature.getSymbolTable().getName(), parentFeature.getSymbolTable())); 060 } 061 062 public Class<?>[] getParameterTypes() { 063 return paramTypes; 064 } 065 066 public FeatureValue getFeatureValue() { 067 return multipleFeatureValue; 068 } 069 070 public String getSymbol(int code) throws MaltChainedException { 071 return table.getSymbolCodeToString(code); 072 } 073 074 public int getCode(String symbol) throws MaltChainedException { 075 return table.getSymbolStringToCode(symbol); 076 } 077 078 public void update() throws MaltChainedException { 079 multipleFeatureValue.reset(); 080 parentFeature.update(); 081 FunctionValue value = parentFeature.getFeatureValue(); 082 if (value instanceof SingleFeatureValue) { 083 String symbol = ((SingleFeatureValue)value).getSymbol(); 084 if (((FeatureValue)value).isNullValue()) { 085 multipleFeatureValue.addFeatureValue(parentFeature.getSymbolTable().getSymbolStringToCode(symbol), symbol); 086 multipleFeatureValue.setNullValue(true); 087 } else { 088 String items[]; 089 try { 090 items = separatorsPattern.split(symbol); 091 } catch (PatternSyntaxException e) { 092 throw new FeatureException("The split feature '"+this.toString()+"' could not split the value using the following separators '"+separators+"'",e); 093 } 094 for (int i = 0; i < items.length; i++) { 095 if (items[i].length() > 0) { 096 multipleFeatureValue.addFeatureValue(table.addSymbol(items[i]), items[i]); 097 } 098 } 099 multipleFeatureValue.setNullValue(false); 100 } 101 } else if (value instanceof MultipleFeatureValue) { 102 if (((MultipleFeatureValue)value).isNullValue()) { 103 multipleFeatureValue.addFeatureValue(parentFeature.getSymbolTable().getSymbolStringToCode(((MultipleFeatureValue)value).getFirstSymbol()), ((MultipleFeatureValue)value).getFirstSymbol()); 104 multipleFeatureValue.setNullValue(true); 105 } else { 106 for (String symbol : ((MultipleFeatureValue)value).getSymbols()) { 107 String items[]; 108 try { 109 items = separatorsPattern.split(symbol); 110 } catch (PatternSyntaxException e) { 111 throw new FeatureException("The split feature '"+this.toString()+"' could not split the value using the following separators '"+separators+"'", e); 112 } 113 for (int i = 0; i < items.length; i++) { 114 multipleFeatureValue.addFeatureValue(table.addSymbol(items[i]), items[i]); 115 } 116 multipleFeatureValue.setNullValue(false); 117 } 118 } 119 } 120 } 121 122 public boolean equals(Object obj) { 123 if (this == obj) 124 return true; 125 if (obj == null) 126 return false; 127 if (getClass() != obj.getClass()) 128 return false; 129 return obj.toString().equals(this.toString()); 130 } 131 132 public FeatureFunction getParentFeature() { 133 return parentFeature; 134 } 135 136 public void setParentFeature(FeatureFunction parentFeature) { 137 this.parentFeature = parentFeature; 138 } 139 140 public String getSeparators() { 141 return separators; 142 } 143 144 public void setSeparators(String separators) { 145 this.separators = separators; 146 separatorsPattern = Pattern.compile(separators); 147 } 148 149 public SymbolTable getSymbolTable() { 150 return table; 151 } 152 153 public void setSymbolTable(SymbolTable table) { 154 this.table = table; 155 } 156 157 public SymbolTableHandler getTableHandler() { 158 return tableHandler; 159 } 160 161 public DataFormatInstance getDataFormatInstance() { 162 return dataFormatInstance; 163 } 164 165 public ColumnDescription getColumn() { 166 return column; 167 } 168 169 protected void setColumn(ColumnDescription column) { 170 this.column = column; 171 } 172 173 public int getType() { 174 return column.getType(); 175 } 176 177 public String getMapIdentifier() { 178 return getSymbolTable().getName(); 179 } 180 181 public String toString() { 182 final StringBuilder sb = new StringBuilder(); 183 sb.append("Split("); 184 sb.append(parentFeature.toString()); 185 sb.append(", "); 186 sb.append(separators); 187 sb.append(')'); 188 return sb.toString(); 189 } 190} 191