001    package org.maltparser.core.feature.map;
002    
003    import java.util.regex.Pattern;
004    import java.util.regex.PatternSyntaxException;
005    
006    import org.maltparser.core.exception.MaltChainedException;
007    import org.maltparser.core.feature.FeatureException;
008    import org.maltparser.core.feature.function.FeatureFunction;
009    import org.maltparser.core.feature.function.FeatureMapFunction;
010    import org.maltparser.core.feature.value.FeatureValue;
011    import org.maltparser.core.feature.value.FunctionValue;
012    import org.maltparser.core.feature.value.MultipleFeatureValue;
013    import org.maltparser.core.feature.value.SingleFeatureValue;
014    import org.maltparser.core.io.dataformat.ColumnDescription;
015    import org.maltparser.core.io.dataformat.DataFormatInstance;
016    import org.maltparser.core.symbol.SymbolTable;
017    import org.maltparser.core.symbol.SymbolTableHandler;
018    
019    /**
020    *
021    *
022    * @author Johan Hall
023    */
024    public class SplitFeature implements FeatureMapFunction {
025            protected FeatureFunction parentFeature;
026            protected MultipleFeatureValue multipleFeatureValue;
027            protected DataFormatInstance dataFormatInstance;
028            protected ColumnDescription column;
029            protected SymbolTable table;
030            protected String separators;
031            protected Pattern separatorsPattern;
032            
033            public SplitFeature(DataFormatInstance dataFormatInstance) throws MaltChainedException {
034                    super();
035                    setDataFormatInstance(dataFormatInstance);
036                    multipleFeatureValue = new MultipleFeatureValue(this);
037            }
038            
039            public void initialize(Object[] arguments) throws MaltChainedException {
040                    if (arguments.length != 2) {
041                            throw new FeatureException("Could not initialize SplitFeature: number of arguments are not correct. ");
042                    }
043                    if (!(arguments[0] instanceof FeatureFunction)) {
044                            throw new FeatureException("Could not initialize SplitFeature: the first argument is not a feature. ");
045                    }
046                    if (!(arguments[1] instanceof String)) {
047                            throw new FeatureException("Could not initialize SplitFeature: the second argument is not a string. ");
048                    }
049                    setParentFeature((FeatureFunction)arguments[0]);
050                    setSeparators((String)arguments[1]);
051                    ColumnDescription parentColumn = dataFormatInstance.getColumnDescriptionByName(parentFeature.getSymbolTable().getName());
052                    if (parentColumn.getType() != ColumnDescription.STRING) {
053                            throw new FeatureException("Could not initialize SplitFeature: the first argument must be a string. ");
054                    }
055                    setColumn(dataFormatInstance.addInternalColumnDescription("SPLIT_"+parentFeature.getSymbolTable().getName(), parentColumn));
056                    setSymbolTable(column.getSymbolTable());
057    //              setSymbolTable(tableHandler.addSymbolTable("SPLIT_"+parentFeature.getSymbolTable().getName(), parentFeature.getSymbolTable()));
058            }
059            
060            public Class<?>[] getParameterTypes() {
061                    Class<?>[] paramTypes = { org.maltparser.core.feature.function.FeatureFunction.class, java.lang.String.class };
062                    return paramTypes; 
063            }
064    
065            public FeatureValue getFeatureValue() {
066                    return multipleFeatureValue;
067            }
068    
069            public String getSymbol(int code) throws MaltChainedException {
070                    return table.getSymbolCodeToString(code);
071            }
072            
073            public int getCode(String symbol) throws MaltChainedException {
074                    return table.getSymbolStringToCode(symbol);
075            }
076    
077            public void update() throws MaltChainedException {
078                    multipleFeatureValue.reset();
079                    parentFeature.update();
080                    FunctionValue value = parentFeature.getFeatureValue();
081                    if (value instanceof SingleFeatureValue) {
082                            String symbol = ((SingleFeatureValue)value).getSymbol();
083                            if (((FeatureValue)value).isNullValue()) {
084                                    multipleFeatureValue.addFeatureValue(parentFeature.getSymbolTable().getSymbolStringToCode(symbol), symbol);
085                                    multipleFeatureValue.setNullValue(true);
086                            } else {
087                                    String items[];
088                                    try {
089                                            items = separatorsPattern.split(symbol);
090                                    } catch (PatternSyntaxException e) {
091                                            throw new FeatureException("The split feature '"+this.toString()+"' could not split the value using the following separators '"+separators+"'",e);
092                                    }
093                                    for (int i = 0; i < items.length; i++) {
094                                            if (items[i].length() > 0) {
095                                                    multipleFeatureValue.addFeatureValue(table.addSymbol(items[i]), items[i]);
096                                            }
097                                    }
098                                    multipleFeatureValue.setNullValue(false);
099                            }
100                    } else if (value instanceof MultipleFeatureValue) {
101                            if (((MultipleFeatureValue)value).isNullValue()) {
102                                    multipleFeatureValue.addFeatureValue(parentFeature.getSymbolTable().getSymbolStringToCode(((MultipleFeatureValue)value).getFirstSymbol()), ((MultipleFeatureValue)value).getFirstSymbol());
103                                    multipleFeatureValue.setNullValue(true);
104                            } else {
105                                    for (String symbol : ((MultipleFeatureValue)value).getSymbols()) {
106                                            String items[];
107                                            try {
108                                                    items = separatorsPattern.split(symbol);
109                                            } catch (PatternSyntaxException e) {
110                                                    throw new FeatureException("The split feature '"+this.toString()+"' could not split the value using the following separators '"+separators+"'", e);
111                                            }
112                                            for (int i = 0; i < items.length; i++) {
113                                                    multipleFeatureValue.addFeatureValue(table.addSymbol(items[i]), items[i]);
114                                            }
115                                            multipleFeatureValue.setNullValue(false);
116                                    }
117                            }
118                    }
119            }
120    
121            public void updateCardinality() throws MaltChainedException {
122    //              parentFeature.updateCardinality();
123    //              multipleFeatureValue.setCardinality(table.getValueCounter()); 
124            }
125            
126            public boolean equals(Object obj) {
127                    if (this == obj)
128                            return true;
129                    if (obj == null)
130                            return false;
131                    if (getClass() != obj.getClass())
132                            return false;
133                    return obj.toString().equals(this.toString());
134            }
135            
136            public FeatureFunction getParentFeature() {
137                    return parentFeature;
138            }
139            
140            public void setParentFeature(FeatureFunction parentFeature) {
141                    this.parentFeature = parentFeature;
142            }
143    
144            public String getSeparators() {
145                    return separators;
146            }
147            
148            public void setSeparators(String separators) {
149                    this.separators = separators;
150                    separatorsPattern = Pattern.compile(separators);
151            }
152    
153            public SymbolTable getSymbolTable() {
154                    return table;
155            }
156    
157            public void setSymbolTable(SymbolTable table) {
158                    this.table = table;
159            }
160    
161            public SymbolTableHandler getTableHandler() {
162                    return dataFormatInstance.getSymbolTables();
163            }
164    
165            public DataFormatInstance getDataFormatInstance() {
166                    return dataFormatInstance;
167            }
168    
169            public void setDataFormatInstance(DataFormatInstance dataFormatInstance) {
170                    this.dataFormatInstance = dataFormatInstance;
171            }
172            
173            public ColumnDescription getColumn() {
174                    return column;
175            }
176            
177            protected void setColumn(ColumnDescription column) {
178                    this.column = column;
179            }
180    
181            public String toString() {
182                    final StringBuilder sb = new StringBuilder();
183                    sb.append("Split(");
184                    sb.append(parentFeature.toString());
185                    sb.append(", ");
186                    sb.append(separators);
187                    sb.append(')');
188                    return sb.toString();
189            }       
190    }
191