001 package org.maltparser.core.propagation;
002
003 import java.util.SortedSet;
004 import java.util.TreeSet;
005 import java.util.regex.Pattern;
006
007 import org.maltparser.core.exception.MaltChainedException;
008 import org.maltparser.core.io.dataformat.ColumnDescription;
009 import org.maltparser.core.io.dataformat.DataFormatInstance;
010 import org.maltparser.core.propagation.spec.PropagationSpec;
011 import org.maltparser.core.symbol.SymbolTable;
012 import org.maltparser.core.syntaxgraph.edge.Edge;
013 import org.maltparser.core.syntaxgraph.node.DependencyNode;
014
015 /**
016 * A propagation object propagate a column value from one node to a column in another node based on the propagation specification.
017 *
018 * @author Johan Hall
019 *
020 */
021 public class Propagation {
022 /**
023 *
024 */
025 private SymbolTable fromTable;
026 private SymbolTable toTable;
027 private SymbolTable deprelTable;
028 private SortedSet<String> forSet;
029 private SortedSet<String> overSet;
030
031 private Pattern symbolSeparator;
032
033 /**
034 * Creates a propagation object based on the propagation specification
035 *
036 * @param spec a propagation specification
037 * @param dataFormatInstance a data format instance
038 * @throws MaltChainedException
039 */
040 public Propagation(PropagationSpec spec, DataFormatInstance dataFormatInstance) throws MaltChainedException {
041 ColumnDescription fromColumn = dataFormatInstance.getColumnDescriptionByName(spec.getFrom());
042 if (fromColumn == null) {
043 throw new PropagationException("The symbol table '"+spec.getFrom()+" does not exists.");
044 }
045 fromTable = fromColumn.getSymbolTable();
046
047 ColumnDescription toColumn = dataFormatInstance.getColumnDescriptionByName(spec.getTo());
048 if (toColumn == null) {
049 toColumn = dataFormatInstance.addInternalColumnDescription(spec.getTo(), fromColumn);
050 toTable = toColumn.getSymbolTable();
051 }
052
053
054 forSet = new TreeSet<String>();
055 if (spec.getFor() != null && spec.getFor().length() > 0) {
056 String[] items = spec.getFor().split("\\|");
057
058 for (String item : items) {
059 forSet.add(item);
060 }
061 }
062
063 overSet = new TreeSet<String>();
064 if (spec.getOver() != null && spec.getOver().length() > 0) {
065 String[] items = spec.getOver().split("\\|");
066
067 for (String item : items) {
068 overSet.add(item);
069 }
070 }
071
072 ColumnDescription deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL");
073 deprelTable = deprelColumn.getSymbolTable();
074 symbolSeparator = Pattern.compile("\\|");
075 }
076
077 /**
078 * Propagate columns according to the propagation specification
079 *
080 * @param e an edge
081 * @throws MaltChainedException
082 */
083 public void propagate(Edge e) throws MaltChainedException {
084 if (e != null && e.hasLabel(deprelTable) && !e.getSource().isRoot()) {
085 if (overSet.size() == 0 || overSet.contains(e.getLabelSymbol(deprelTable))) {
086 DependencyNode to = (DependencyNode)e.getSource();
087 DependencyNode from = (DependencyNode)e.getTarget();
088 String fromSymbol = null;
089 if (e.hasLabel(fromTable)) {
090 fromSymbol = e.getLabelSymbol(fromTable);
091 } else if (from.hasLabel(fromTable)) {
092 fromSymbol = from.getLabelSymbol(fromTable);
093 }
094
095 String propSymbol = null;
096 if (to.hasLabel(toTable)) {
097 propSymbol = union(fromSymbol, to.getLabelSymbol(toTable));
098 } else {
099 if (forSet.size() == 0 || forSet.contains(fromSymbol)) {
100 propSymbol = fromSymbol;
101 }
102 }
103 if (propSymbol != null) {
104 to.addLabel(toTable, propSymbol);
105 }
106 }
107 }
108 }
109
110 private String union(String fromSymbol, String toSymbol) {
111 SortedSet<String> symbolSet = new TreeSet<String>();
112
113 if (fromSymbol != null && fromSymbol.length() != 0) {
114 String[] fromSymbols = symbolSeparator.split(fromSymbol);
115 for (int i = 0; i < fromSymbols.length; i++) {
116 if (forSet.size() == 0 || forSet.contains(fromSymbols[i])) {
117 symbolSet.add(fromSymbols[i]);
118 }
119 }
120 }
121 if (toSymbol != null && toSymbol.length() != 0) {
122 String[] toSymbols = symbolSeparator.split(toSymbol);
123 for (int i = 0; i < toSymbols.length; i++) {
124 symbolSet.add(toSymbols[i]);
125 }
126 }
127
128 if (symbolSet.size() > 0) {
129 StringBuilder sb = new StringBuilder();
130 for (String symbol : symbolSet) {
131 sb.append(symbol);
132 sb.append('|');
133 }
134 sb.setLength(sb.length()-1);
135 return sb.toString();
136 }
137
138
139 return "";
140 }
141 @Override
142 public String toString() {
143 return "Propagation [forSet=" + forSet + ", fromTable=" + fromTable
144 + ", overSet=" + overSet + ", toTable=" + toTable + "]";
145 }
146 }