001 package org.maltparser.core.feature.map; 002 003 import java.util.regex.Pattern; 004 import java.util.regex.PatternSyntaxException; 005 006 import org.maltparser.core.exception.MaltChainedException; 007 import org.maltparser.core.feature.FeatureException; 008 import org.maltparser.core.feature.function.FeatureFunction; 009 import org.maltparser.core.feature.function.FeatureMapFunction; 010 import org.maltparser.core.feature.value.FeatureValue; 011 import org.maltparser.core.feature.value.FunctionValue; 012 import org.maltparser.core.feature.value.MultipleFeatureValue; 013 import org.maltparser.core.feature.value.SingleFeatureValue; 014 import org.maltparser.core.symbol.SymbolTable; 015 import org.maltparser.core.symbol.SymbolTableHandler; 016 017 /** 018 * 019 * 020 * @author Johan Hall 021 */ 022 public class SplitFeature implements FeatureMapFunction { 023 protected FeatureFunction parentFeature; 024 protected MultipleFeatureValue multipleFeatureValue; 025 protected SymbolTableHandler tableHandler; 026 protected SymbolTable table; 027 protected String separators; 028 protected Pattern separatorsPattern; 029 030 public SplitFeature(SymbolTableHandler tableHandler) throws MaltChainedException { 031 super(); 032 setTableHandler(tableHandler); 033 multipleFeatureValue = new MultipleFeatureValue(this); 034 } 035 036 public void initialize(Object[] arguments) throws MaltChainedException { 037 if (arguments.length != 2) { 038 throw new FeatureException("Could not initialize SplitFeature: number of arguments are not correct. "); 039 } 040 if (!(arguments[0] instanceof FeatureFunction)) { 041 throw new FeatureException("Could not initialize SplitFeature: the first argument is not a feature. "); 042 } 043 if (!(arguments[1] instanceof String)) { 044 throw new FeatureException("Could not initialize SplitFeature: the second argument is not a string. "); 045 } 046 setParentFeature((FeatureFunction)arguments[0]); 047 setSeparators((String)arguments[1]); 048 setSymbolTable(tableHandler.addSymbolTable("SPLIT_"+parentFeature.getSymbolTable().getName(), parentFeature.getSymbolTable())); 049 } 050 051 public Class<?>[] getParameterTypes() { 052 Class<?>[] paramTypes = { org.maltparser.core.feature.function.FeatureFunction.class, java.lang.String.class }; 053 return paramTypes; 054 } 055 056 public FeatureValue getFeatureValue() { 057 return multipleFeatureValue; 058 } 059 060 public String getSymbol(int code) throws MaltChainedException { 061 return table.getSymbolCodeToString(code); 062 } 063 064 public int getCode(String symbol) throws MaltChainedException { 065 return table.getSymbolStringToCode(symbol); 066 } 067 068 public void update() throws MaltChainedException { 069 multipleFeatureValue.reset(); 070 parentFeature.update(); 071 FunctionValue value = parentFeature.getFeatureValue(); 072 if (value instanceof SingleFeatureValue) { 073 String symbol = ((SingleFeatureValue)value).getSymbol(); 074 if (((FeatureValue)value).isNullValue()) { 075 multipleFeatureValue.addFeatureValue(parentFeature.getSymbolTable().getSymbolStringToCode(symbol), symbol, true); 076 multipleFeatureValue.setNullValue(true); 077 } else { 078 String items[]; 079 try { 080 items = separatorsPattern.split(symbol); 081 } catch (PatternSyntaxException e) { 082 throw new FeatureException("The split feature '"+this.toString()+"' could not split the value using the following separators '"+separators+"'",e); 083 } 084 for (int i = 0; i < items.length; i++) { 085 multipleFeatureValue.addFeatureValue(table.addSymbol(items[i]), items[i], table.getKnown(items[i])); 086 } 087 multipleFeatureValue.setNullValue(false); 088 } 089 } else if (value instanceof MultipleFeatureValue) { 090 if (((MultipleFeatureValue)value).isNullValue()) { 091 multipleFeatureValue.addFeatureValue(parentFeature.getSymbolTable().getSymbolStringToCode(((MultipleFeatureValue)value).getFirstSymbol()), ((MultipleFeatureValue)value).getFirstSymbol(), true); 092 multipleFeatureValue.setNullValue(true); 093 } else { 094 for (String symbol : ((MultipleFeatureValue)value).getSymbols()) { 095 String items[]; 096 try { 097 items = separatorsPattern.split(symbol); 098 } catch (PatternSyntaxException e) { 099 throw new FeatureException("The split feature '"+this.toString()+"' could not split the value using the following separators '"+separators+"'", e); 100 } 101 for (int i = 0; i < items.length; i++) { 102 multipleFeatureValue.addFeatureValue(table.addSymbol(items[i]), items[i], table.getKnown(items[i])); 103 } 104 multipleFeatureValue.setNullValue(false); 105 } 106 } 107 } 108 } 109 110 public void updateCardinality() throws MaltChainedException { 111 parentFeature.updateCardinality(); 112 multipleFeatureValue.setCardinality(table.getValueCounter()); 113 } 114 115 public boolean equals(Object obj) { 116 if (this == obj) 117 return true; 118 if (obj == null) 119 return false; 120 if (getClass() != obj.getClass()) 121 return false; 122 return obj.toString().equals(this.toString()); 123 } 124 125 public FeatureFunction getParentFeature() { 126 return parentFeature; 127 } 128 129 public void setParentFeature(FeatureFunction parentFeature) { 130 this.parentFeature = parentFeature; 131 } 132 133 public String getSeparators() { 134 return separators; 135 } 136 137 public void setSeparators(String separators) { 138 this.separators = separators; 139 separatorsPattern = Pattern.compile(separators); 140 } 141 142 public SymbolTable getSymbolTable() { 143 return table; 144 } 145 146 public void setSymbolTable(SymbolTable table) { 147 this.table = table; 148 } 149 150 public SymbolTableHandler getTableHandler() { 151 return tableHandler; 152 } 153 154 public void setTableHandler(SymbolTableHandler tableHandler) { 155 this.tableHandler = tableHandler; 156 } 157 158 159 160 public String toString() { 161 final StringBuilder sb = new StringBuilder(); 162 sb.append("Split("); 163 sb.append(parentFeature.toString()); 164 sb.append(", "); 165 sb.append(separators); 166 sb.append(')'); 167 return sb.toString(); 168 } 169 } 170