001 package org.maltparser.core.feature.map; 002 003 import java.util.regex.Pattern; 004 import java.util.regex.PatternSyntaxException; 005 006 import org.maltparser.core.exception.MaltChainedException; 007 import org.maltparser.core.feature.FeatureException; 008 import org.maltparser.core.feature.function.FeatureFunction; 009 import org.maltparser.core.feature.function.FeatureMapFunction; 010 import org.maltparser.core.feature.value.FeatureValue; 011 import org.maltparser.core.feature.value.FunctionValue; 012 import org.maltparser.core.feature.value.MultipleFeatureValue; 013 import org.maltparser.core.feature.value.SingleFeatureValue; 014 import org.maltparser.core.io.dataformat.ColumnDescription; 015 import org.maltparser.core.io.dataformat.DataFormatInstance; 016 import org.maltparser.core.symbol.SymbolTable; 017 import org.maltparser.core.symbol.SymbolTableHandler; 018 019 /** 020 * 021 * 022 * @author Johan Hall 023 */ 024 public class SplitFeature implements FeatureMapFunction { 025 protected FeatureFunction parentFeature; 026 protected MultipleFeatureValue multipleFeatureValue; 027 protected DataFormatInstance dataFormatInstance; 028 protected ColumnDescription column; 029 protected SymbolTable table; 030 protected String separators; 031 protected Pattern separatorsPattern; 032 033 public SplitFeature(DataFormatInstance dataFormatInstance) throws MaltChainedException { 034 super(); 035 setDataFormatInstance(dataFormatInstance); 036 multipleFeatureValue = new MultipleFeatureValue(this); 037 } 038 039 public void initialize(Object[] arguments) throws MaltChainedException { 040 if (arguments.length != 2) { 041 throw new FeatureException("Could not initialize SplitFeature: number of arguments are not correct. "); 042 } 043 if (!(arguments[0] instanceof FeatureFunction)) { 044 throw new FeatureException("Could not initialize SplitFeature: the first argument is not a feature. "); 045 } 046 if (!(arguments[1] instanceof String)) { 047 throw new FeatureException("Could not initialize SplitFeature: the second argument is not a string. "); 048 } 049 setParentFeature((FeatureFunction)arguments[0]); 050 setSeparators((String)arguments[1]); 051 ColumnDescription parentColumn = dataFormatInstance.getColumnDescriptionByName(parentFeature.getSymbolTable().getName()); 052 if (parentColumn.getType() != ColumnDescription.STRING) { 053 throw new FeatureException("Could not initialize SplitFeature: the first argument must be a string. "); 054 } 055 setColumn(dataFormatInstance.addInternalColumnDescription("SPLIT_"+parentFeature.getSymbolTable().getName(), parentColumn)); 056 setSymbolTable(column.getSymbolTable()); 057 // setSymbolTable(tableHandler.addSymbolTable("SPLIT_"+parentFeature.getSymbolTable().getName(), parentFeature.getSymbolTable())); 058 } 059 060 public Class<?>[] getParameterTypes() { 061 Class<?>[] paramTypes = { org.maltparser.core.feature.function.FeatureFunction.class, java.lang.String.class }; 062 return paramTypes; 063 } 064 065 public FeatureValue getFeatureValue() { 066 return multipleFeatureValue; 067 } 068 069 public String getSymbol(int code) throws MaltChainedException { 070 return table.getSymbolCodeToString(code); 071 } 072 073 public int getCode(String symbol) throws MaltChainedException { 074 return table.getSymbolStringToCode(symbol); 075 } 076 077 public void update() throws MaltChainedException { 078 multipleFeatureValue.reset(); 079 parentFeature.update(); 080 FunctionValue value = parentFeature.getFeatureValue(); 081 if (value instanceof SingleFeatureValue) { 082 String symbol = ((SingleFeatureValue)value).getSymbol(); 083 if (((FeatureValue)value).isNullValue()) { 084 multipleFeatureValue.addFeatureValue(parentFeature.getSymbolTable().getSymbolStringToCode(symbol), symbol); 085 multipleFeatureValue.setNullValue(true); 086 } else { 087 String items[]; 088 try { 089 items = separatorsPattern.split(symbol); 090 } catch (PatternSyntaxException e) { 091 throw new FeatureException("The split feature '"+this.toString()+"' could not split the value using the following separators '"+separators+"'",e); 092 } 093 for (int i = 0; i < items.length; i++) { 094 if (items[i].length() > 0) { 095 multipleFeatureValue.addFeatureValue(table.addSymbol(items[i]), items[i]); 096 } 097 } 098 multipleFeatureValue.setNullValue(false); 099 } 100 } else if (value instanceof MultipleFeatureValue) { 101 if (((MultipleFeatureValue)value).isNullValue()) { 102 multipleFeatureValue.addFeatureValue(parentFeature.getSymbolTable().getSymbolStringToCode(((MultipleFeatureValue)value).getFirstSymbol()), ((MultipleFeatureValue)value).getFirstSymbol()); 103 multipleFeatureValue.setNullValue(true); 104 } else { 105 for (String symbol : ((MultipleFeatureValue)value).getSymbols()) { 106 String items[]; 107 try { 108 items = separatorsPattern.split(symbol); 109 } catch (PatternSyntaxException e) { 110 throw new FeatureException("The split feature '"+this.toString()+"' could not split the value using the following separators '"+separators+"'", e); 111 } 112 for (int i = 0; i < items.length; i++) { 113 multipleFeatureValue.addFeatureValue(table.addSymbol(items[i]), items[i]); 114 } 115 multipleFeatureValue.setNullValue(false); 116 } 117 } 118 } 119 } 120 121 public void updateCardinality() throws MaltChainedException { 122 // parentFeature.updateCardinality(); 123 // multipleFeatureValue.setCardinality(table.getValueCounter()); 124 } 125 126 public boolean equals(Object obj) { 127 if (this == obj) 128 return true; 129 if (obj == null) 130 return false; 131 if (getClass() != obj.getClass()) 132 return false; 133 return obj.toString().equals(this.toString()); 134 } 135 136 public FeatureFunction getParentFeature() { 137 return parentFeature; 138 } 139 140 public void setParentFeature(FeatureFunction parentFeature) { 141 this.parentFeature = parentFeature; 142 } 143 144 public String getSeparators() { 145 return separators; 146 } 147 148 public void setSeparators(String separators) { 149 this.separators = separators; 150 separatorsPattern = Pattern.compile(separators); 151 } 152 153 public SymbolTable getSymbolTable() { 154 return table; 155 } 156 157 public void setSymbolTable(SymbolTable table) { 158 this.table = table; 159 } 160 161 public SymbolTableHandler getTableHandler() { 162 return dataFormatInstance.getSymbolTables(); 163 } 164 165 public DataFormatInstance getDataFormatInstance() { 166 return dataFormatInstance; 167 } 168 169 public void setDataFormatInstance(DataFormatInstance dataFormatInstance) { 170 this.dataFormatInstance = dataFormatInstance; 171 } 172 173 public ColumnDescription getColumn() { 174 return column; 175 } 176 177 protected void setColumn(ColumnDescription column) { 178 this.column = column; 179 } 180 181 public String toString() { 182 final StringBuilder sb = new StringBuilder(); 183 sb.append("Split("); 184 sb.append(parentFeature.toString()); 185 sb.append(", "); 186 sb.append(separators); 187 sb.append(')'); 188 return sb.toString(); 189 } 190 } 191