001    package org.maltparser.core.propagation;
002    
003    import java.util.SortedSet;
004    import java.util.TreeSet;
005    import java.util.regex.Pattern;
006    
007    import org.maltparser.core.exception.MaltChainedException;
008    import org.maltparser.core.io.dataformat.ColumnDescription;
009    import org.maltparser.core.io.dataformat.DataFormatInstance;
010    import org.maltparser.core.propagation.spec.PropagationSpec;
011    import org.maltparser.core.symbol.SymbolTable;
012    import org.maltparser.core.syntaxgraph.edge.Edge;
013    import org.maltparser.core.syntaxgraph.node.DependencyNode;
014    
015    /**
016     * A propagation object propagate a column value from one node to a column in another node based on the propagation specification. 
017     * 
018     * @author Johan Hall
019     *
020     */
021    public class Propagation {
022            /**
023             * 
024             */
025            private SymbolTable fromTable;
026            private SymbolTable toTable;
027            private SymbolTable deprelTable;
028            private SortedSet<String> forSet;
029            private SortedSet<String> overSet;
030            
031            private Pattern symbolSeparator;
032            
033            /**
034             * Creates a propagation object based on the propagation specification
035             * 
036             * @param spec a propagation specification
037             * @param dataFormatInstance a data format instance
038             * @throws MaltChainedException
039             */
040            public Propagation(PropagationSpec spec, DataFormatInstance dataFormatInstance) throws MaltChainedException {
041                    ColumnDescription fromColumn = dataFormatInstance.getColumnDescriptionByName(spec.getFrom());
042                    if (fromColumn == null) {
043                            throw new PropagationException("The symbol table '"+spec.getFrom()+" does not exists.");
044                    }
045                    fromTable = fromColumn.getSymbolTable();
046    
047                    ColumnDescription toColumn = dataFormatInstance.getColumnDescriptionByName(spec.getTo());
048                    if (toColumn == null) {
049                            toColumn = dataFormatInstance.addInternalColumnDescription(spec.getTo(), fromColumn);
050                            toTable = toColumn.getSymbolTable();
051                    }
052    
053                    
054                    forSet = new TreeSet<String>();
055                    if (spec.getFor() != null && spec.getFor().length() > 0) {
056                            String[] items = spec.getFor().split("\\|");
057                            
058                            for (String item : items) {
059                                    forSet.add(item);
060                            }
061                    }
062                    
063                    overSet = new TreeSet<String>();
064                    if (spec.getOver() != null && spec.getOver().length() > 0) {
065                            String[] items = spec.getOver().split("\\|");
066                            
067                            for (String item : items) {
068                                    overSet.add(item);
069                            }
070                    }
071                    
072                    ColumnDescription deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL");
073                    deprelTable = deprelColumn.getSymbolTable();
074                    symbolSeparator = Pattern.compile("\\|");
075            }
076    
077            /**
078             * Propagate columns according to the propagation specification
079             * 
080             * @param e an edge 
081             * @throws MaltChainedException
082             */
083            public void propagate(Edge e) throws MaltChainedException {
084                    if (e != null && e.hasLabel(deprelTable) && !e.getSource().isRoot()) {
085                            if (overSet.size() == 0 || overSet.contains(e.getLabelSymbol(deprelTable))) {
086                                    DependencyNode to = (DependencyNode)e.getSource();
087                                    DependencyNode from = (DependencyNode)e.getTarget();
088                                    String fromSymbol = null;
089                                    if (e.hasLabel(fromTable)) {
090                                            fromSymbol = e.getLabelSymbol(fromTable);
091                                    } else if (from.hasLabel(fromTable)) {
092                                            fromSymbol = from.getLabelSymbol(fromTable);
093                                    }
094                                    
095                                    String propSymbol = null;
096                                    if (to.hasLabel(toTable)) {
097                                            propSymbol = union(fromSymbol, to.getLabelSymbol(toTable));
098                                    } else {
099                                            if (forSet.size() == 0 || forSet.contains(fromSymbol)) {
100                                                    propSymbol = fromSymbol;
101                                            }
102                                    }
103                                    if (propSymbol != null) {
104                                            to.addLabel(toTable, propSymbol);
105                                    }
106                            }
107                    }
108            }
109            
110            private String union(String fromSymbol, String toSymbol) {
111                    SortedSet<String> symbolSet = new TreeSet<String>();
112                    
113                    if (fromSymbol != null && fromSymbol.length() != 0) {
114                            String[] fromSymbols = symbolSeparator.split(fromSymbol);
115                            for (int i = 0; i < fromSymbols.length; i++) {
116                                    if (forSet.size() == 0 || forSet.contains(fromSymbols[i])) {
117                                            symbolSet.add(fromSymbols[i]);
118                                    }
119                            }
120                    }
121                    if (toSymbol != null && toSymbol.length() != 0) {
122                            String[] toSymbols = symbolSeparator.split(toSymbol);
123                            for (int i = 0; i < toSymbols.length; i++) {
124                                    symbolSet.add(toSymbols[i]);
125                            }
126                    }
127                    
128                    if (symbolSet.size() > 0) {
129                            StringBuilder sb = new StringBuilder();
130                            for (String symbol : symbolSet) {
131                                    sb.append(symbol);
132                                    sb.append('|');
133                            }
134                            sb.setLength(sb.length()-1);
135                            return sb.toString();
136                    }
137    
138                    
139                    return "";
140            }
141            @Override
142            public String toString() {
143                    return "Propagation [forSet=" + forSet + ", fromTable=" + fromTable
144                                    + ", overSet=" + overSet + ", toTable=" + toTable + "]";
145            }
146    }