001package org.maltparser.core.feature.map;
002
003import java.util.regex.Pattern;
004import java.util.regex.PatternSyntaxException;
005
006import org.maltparser.core.exception.MaltChainedException;
007import org.maltparser.core.feature.FeatureException;
008import org.maltparser.core.feature.function.FeatureFunction;
009import org.maltparser.core.feature.function.FeatureMapFunction;
010import org.maltparser.core.feature.value.FeatureValue;
011import org.maltparser.core.feature.value.FunctionValue;
012import org.maltparser.core.feature.value.MultipleFeatureValue;
013import org.maltparser.core.feature.value.SingleFeatureValue;
014import org.maltparser.core.io.dataformat.ColumnDescription;
015import org.maltparser.core.io.dataformat.DataFormatInstance;
016import org.maltparser.core.symbol.SymbolTable;
017import org.maltparser.core.symbol.SymbolTableHandler;
018
019/**
020*
021*
022* @author Johan Hall
023*/
024public final class SplitFeature implements FeatureMapFunction {
025        public final static Class<?>[] paramTypes = { org.maltparser.core.feature.function.FeatureFunction.class, java.lang.String.class };
026        private FeatureFunction parentFeature;
027        private final MultipleFeatureValue multipleFeatureValue;
028        private final DataFormatInstance dataFormatInstance;
029        private final SymbolTableHandler tableHandler;
030        private ColumnDescription column;
031        private SymbolTable table;
032        private String separators;
033        private Pattern separatorsPattern;
034        
035        public SplitFeature(DataFormatInstance dataFormatInstance, SymbolTableHandler tableHandler) throws MaltChainedException {
036                this.dataFormatInstance = dataFormatInstance;
037                this.tableHandler = tableHandler;
038                this.multipleFeatureValue = new MultipleFeatureValue(this);
039        }
040        
041        public void initialize(Object[] arguments) throws MaltChainedException {
042                if (arguments.length != 2) {
043                        throw new FeatureException("Could not initialize SplitFeature: number of arguments are not correct. ");
044                }
045                if (!(arguments[0] instanceof FeatureFunction)) {
046                        throw new FeatureException("Could not initialize SplitFeature: the first argument is not a feature. ");
047                }
048                if (!(arguments[1] instanceof String)) {
049                        throw new FeatureException("Could not initialize SplitFeature: the second argument is not a string. ");
050                }
051                setParentFeature((FeatureFunction)arguments[0]);
052                setSeparators((String)arguments[1]);
053                ColumnDescription parentColumn = dataFormatInstance.getColumnDescriptionByName(parentFeature.getSymbolTable().getName());
054                if (parentColumn.getType() != ColumnDescription.STRING) {
055                        throw new FeatureException("Could not initialize SplitFeature: the first argument must be a string. ");
056                }
057                setColumn(dataFormatInstance.addInternalColumnDescription(tableHandler, "SPLIT_"+parentFeature.getSymbolTable().getName(), parentColumn));
058                setSymbolTable(tableHandler.getSymbolTable(column.getName()));
059//              setSymbolTable(tableHandler.addSymbolTable("SPLIT_"+parentFeature.getSymbolTable().getName(), parentFeature.getSymbolTable()));
060        }
061        
062        public Class<?>[] getParameterTypes() {
063                return paramTypes; 
064        }
065
066        public FeatureValue getFeatureValue() {
067                return multipleFeatureValue;
068        }
069
070        public String getSymbol(int code) throws MaltChainedException {
071                return table.getSymbolCodeToString(code);
072        }
073        
074        public int getCode(String symbol) throws MaltChainedException {
075                return table.getSymbolStringToCode(symbol);
076        }
077
078        public void update() throws MaltChainedException {
079                multipleFeatureValue.reset();
080                parentFeature.update();
081                FunctionValue value = parentFeature.getFeatureValue();
082                if (value instanceof SingleFeatureValue) {
083                        String symbol = ((SingleFeatureValue)value).getSymbol();
084                        if (((FeatureValue)value).isNullValue()) {
085                                multipleFeatureValue.addFeatureValue(parentFeature.getSymbolTable().getSymbolStringToCode(symbol), symbol);
086                                multipleFeatureValue.setNullValue(true);
087                        } else {
088                                String items[];
089                                try {
090                                        items = separatorsPattern.split(symbol);
091                                } catch (PatternSyntaxException e) {
092                                        throw new FeatureException("The split feature '"+this.toString()+"' could not split the value using the following separators '"+separators+"'",e);
093                                }
094                                for (int i = 0; i < items.length; i++) {
095                                        if (items[i].length() > 0) {
096                                                multipleFeatureValue.addFeatureValue(table.addSymbol(items[i]), items[i]);
097                                        }
098                                }
099                                multipleFeatureValue.setNullValue(false);
100                        }
101                } else if (value instanceof MultipleFeatureValue) {
102                        if (((MultipleFeatureValue)value).isNullValue()) {
103                                multipleFeatureValue.addFeatureValue(parentFeature.getSymbolTable().getSymbolStringToCode(((MultipleFeatureValue)value).getFirstSymbol()), ((MultipleFeatureValue)value).getFirstSymbol());
104                                multipleFeatureValue.setNullValue(true);
105                        } else {
106                                for (String symbol : ((MultipleFeatureValue)value).getSymbols()) {
107                                        String items[];
108                                        try {
109                                                items = separatorsPattern.split(symbol);
110                                        } catch (PatternSyntaxException e) {
111                                                throw new FeatureException("The split feature '"+this.toString()+"' could not split the value using the following separators '"+separators+"'", e);
112                                        }
113                                        for (int i = 0; i < items.length; i++) {
114                                                multipleFeatureValue.addFeatureValue(table.addSymbol(items[i]), items[i]);
115                                        }
116                                        multipleFeatureValue.setNullValue(false);
117                                }
118                        }
119                }
120        }
121        
122        public boolean equals(Object obj) {
123                if (this == obj)
124                        return true;
125                if (obj == null)
126                        return false;
127                if (getClass() != obj.getClass())
128                        return false;
129                return obj.toString().equals(this.toString());
130        }
131        
132        public FeatureFunction getParentFeature() {
133                return parentFeature;
134        }
135        
136        public void setParentFeature(FeatureFunction parentFeature) {
137                this.parentFeature = parentFeature;
138        }
139
140        public String getSeparators() {
141                return separators;
142        }
143        
144        public void setSeparators(String separators) {
145                this.separators = separators;
146                separatorsPattern = Pattern.compile(separators);
147        }
148
149        public SymbolTable getSymbolTable() {
150                return table;
151        }
152
153        public void setSymbolTable(SymbolTable table) {
154                this.table = table;
155        }
156
157        public SymbolTableHandler getTableHandler() {
158                return tableHandler;
159        }
160
161        public DataFormatInstance getDataFormatInstance() {
162                return dataFormatInstance;
163        }
164        
165        public ColumnDescription getColumn() {
166                return column;
167        }
168        
169        protected void setColumn(ColumnDescription column) {
170                this.column = column;
171        }
172
173        public  int getType() {
174                return column.getType();
175        }
176        
177        public String getMapIdentifier() {
178                return getSymbolTable().getName();
179        }
180        
181        public String toString() {
182                final StringBuilder sb = new StringBuilder();
183                sb.append("Split(");
184                sb.append(parentFeature.toString());
185                sb.append(", ");
186                sb.append(separators);
187                sb.append(')');
188                return sb.toString();
189        }       
190}
191