001package org.maltparser.core.propagation;
002
003import java.util.SortedSet;
004import java.util.TreeSet;
005import java.util.regex.Pattern;
006
007import org.maltparser.core.exception.MaltChainedException;
008import org.maltparser.core.io.dataformat.ColumnDescription;
009import org.maltparser.core.io.dataformat.DataFormatInstance;
010import org.maltparser.core.propagation.spec.PropagationSpec;
011import org.maltparser.core.symbol.SymbolTable;
012import org.maltparser.core.symbol.SymbolTableHandler;
013import org.maltparser.core.syntaxgraph.edge.Edge;
014import org.maltparser.core.syntaxgraph.node.DependencyNode;
015
016/**
017 * A propagation object propagate a column value from one node to a column in another node based on the propagation specification. 
018 * 
019 * @author Johan Hall
020 *
021 */
022public class Propagation {
023        /**
024         * 
025         */
026        private SymbolTable fromTable;
027        private SymbolTable toTable;
028        private SymbolTable deprelTable;
029        private final SortedSet<String> forSet;
030        private final SortedSet<String> overSet;
031        
032        private final Pattern symbolSeparator;
033        
034        /**
035         * Creates a propagation object based on the propagation specification
036         * 
037         * @param spec a propagation specification
038         * @param dataFormatInstance a data format instance
039         * @throws MaltChainedException
040         */
041        public Propagation(PropagationSpec spec, DataFormatInstance dataFormatInstance, SymbolTableHandler tableHandler) throws MaltChainedException {
042                ColumnDescription fromColumn = dataFormatInstance.getColumnDescriptionByName(spec.getFrom());
043                if (fromColumn == null) {
044                        throw new PropagationException("The symbol table '"+spec.getFrom()+" does not exists.");
045                }
046                fromTable = tableHandler.getSymbolTable(spec.getFrom());
047
048                ColumnDescription toColumn = dataFormatInstance.getColumnDescriptionByName(spec.getTo());
049                if (toColumn == null) {
050                        toColumn = dataFormatInstance.addInternalColumnDescription(tableHandler, spec.getTo(), fromColumn);
051                        toTable = tableHandler.getSymbolTable(spec.getTo());
052                }
053
054                
055                forSet = new TreeSet<String>();
056                if (spec.getFor() != null && spec.getFor().length() > 0) {
057                        String[] items = spec.getFor().split("\\|");
058                        
059                        for (String item : items) {
060                                forSet.add(item);
061                        }
062                }
063                
064                overSet = new TreeSet<String>();
065                if (spec.getOver() != null && spec.getOver().length() > 0) {
066                        String[] items = spec.getOver().split("\\|");
067                        
068                        for (String item : items) {
069                                overSet.add(item);
070                        }
071                }
072                
073//              ColumnDescription deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL");
074                deprelTable = tableHandler.getSymbolTable("DEPREL");
075                symbolSeparator = Pattern.compile("\\|");
076        }
077
078        /**
079         * Propagate columns according to the propagation specification
080         * 
081         * @param e an edge 
082         * @throws MaltChainedException
083         */
084        public void propagate(Edge e) throws MaltChainedException {
085                if (e != null && e.hasLabel(deprelTable) && !e.getSource().isRoot()) {
086                        if (overSet.size() == 0 || overSet.contains(e.getLabelSymbol(deprelTable))) {
087                                DependencyNode to = (DependencyNode)e.getSource();
088                                DependencyNode from = (DependencyNode)e.getTarget();
089                                String fromSymbol = null;
090                                if (e.hasLabel(fromTable)) {
091                                        fromSymbol = e.getLabelSymbol(fromTable);
092                                } else if (from.hasLabel(fromTable)) {
093                                        fromSymbol = from.getLabelSymbol(fromTable);
094                                }
095                                
096                                String propSymbol = null;
097                                if (to.hasLabel(toTable)) {
098                                        propSymbol = union(fromSymbol, to.getLabelSymbol(toTable));
099                                } else {
100                                        if (forSet.size() == 0 || forSet.contains(fromSymbol)) {
101                                                propSymbol = fromSymbol;
102                                        }
103                                }
104                                if (propSymbol != null) {
105                                        to.addLabel(toTable, propSymbol);
106                                }
107                        }
108                }
109        }
110        
111        private String union(String fromSymbol, String toSymbol) {
112                SortedSet<String> symbolSet = new TreeSet<String>();
113                
114                if (fromSymbol != null && fromSymbol.length() != 0) {
115                        String[] fromSymbols = symbolSeparator.split(fromSymbol);
116                        for (int i = 0; i < fromSymbols.length; i++) {
117                                if (forSet.size() == 0 || forSet.contains(fromSymbols[i])) {
118                                        symbolSet.add(fromSymbols[i]);
119                                }
120                        }
121                }
122                if (toSymbol != null && toSymbol.length() != 0) {
123                        String[] toSymbols = symbolSeparator.split(toSymbol);
124                        for (int i = 0; i < toSymbols.length; i++) {
125                                symbolSet.add(toSymbols[i]);
126                        }
127                }
128                
129                if (symbolSet.size() > 0) {
130                        StringBuilder sb = new StringBuilder();
131                        for (String symbol : symbolSet) {
132                                sb.append(symbol);
133                                sb.append('|');
134                        }
135                        sb.setLength(sb.length()-1);
136                        return sb.toString();
137                }
138
139                
140                return "";
141        }
142        @Override
143        public String toString() {
144                return "Propagation [forSet=" + forSet + ", fromTable=" + fromTable
145                                + ", overSet=" + overSet + ", toTable=" + toTable + "]";
146        }
147}