001package org.maltparser.core.propagation; 002 003import java.util.SortedSet; 004import java.util.TreeSet; 005import java.util.regex.Pattern; 006 007import org.maltparser.core.exception.MaltChainedException; 008import org.maltparser.core.io.dataformat.ColumnDescription; 009import org.maltparser.core.io.dataformat.DataFormatInstance; 010import org.maltparser.core.propagation.spec.PropagationSpec; 011import org.maltparser.core.symbol.SymbolTable; 012import org.maltparser.core.symbol.SymbolTableHandler; 013import org.maltparser.core.syntaxgraph.edge.Edge; 014import org.maltparser.core.syntaxgraph.node.DependencyNode; 015 016/** 017 * A propagation object propagate a column value from one node to a column in another node based on the propagation specification. 018 * 019 * @author Johan Hall 020 * 021 */ 022public class Propagation { 023 /** 024 * 025 */ 026 private SymbolTable fromTable; 027 private SymbolTable toTable; 028 private SymbolTable deprelTable; 029 private final SortedSet<String> forSet; 030 private final SortedSet<String> overSet; 031 032 private final Pattern symbolSeparator; 033 034 /** 035 * Creates a propagation object based on the propagation specification 036 * 037 * @param spec a propagation specification 038 * @param dataFormatInstance a data format instance 039 * @throws MaltChainedException 040 */ 041 public Propagation(PropagationSpec spec, DataFormatInstance dataFormatInstance, SymbolTableHandler tableHandler) throws MaltChainedException { 042 ColumnDescription fromColumn = dataFormatInstance.getColumnDescriptionByName(spec.getFrom()); 043 if (fromColumn == null) { 044 throw new PropagationException("The symbol table '"+spec.getFrom()+" does not exists."); 045 } 046 fromTable = tableHandler.getSymbolTable(spec.getFrom()); 047 048 ColumnDescription toColumn = dataFormatInstance.getColumnDescriptionByName(spec.getTo()); 049 if (toColumn == null) { 050 toColumn = dataFormatInstance.addInternalColumnDescription(tableHandler, spec.getTo(), fromColumn); 051 toTable = tableHandler.getSymbolTable(spec.getTo()); 052 } 053 054 055 forSet = new TreeSet<String>(); 056 if (spec.getFor() != null && spec.getFor().length() > 0) { 057 String[] items = spec.getFor().split("\\|"); 058 059 for (String item : items) { 060 forSet.add(item); 061 } 062 } 063 064 overSet = new TreeSet<String>(); 065 if (spec.getOver() != null && spec.getOver().length() > 0) { 066 String[] items = spec.getOver().split("\\|"); 067 068 for (String item : items) { 069 overSet.add(item); 070 } 071 } 072 073// ColumnDescription deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL"); 074 deprelTable = tableHandler.getSymbolTable("DEPREL"); 075 symbolSeparator = Pattern.compile("\\|"); 076 } 077 078 /** 079 * Propagate columns according to the propagation specification 080 * 081 * @param e an edge 082 * @throws MaltChainedException 083 */ 084 public void propagate(Edge e) throws MaltChainedException { 085 if (e != null && e.hasLabel(deprelTable) && !e.getSource().isRoot()) { 086 if (overSet.size() == 0 || overSet.contains(e.getLabelSymbol(deprelTable))) { 087 DependencyNode to = (DependencyNode)e.getSource(); 088 DependencyNode from = (DependencyNode)e.getTarget(); 089 String fromSymbol = null; 090 if (e.hasLabel(fromTable)) { 091 fromSymbol = e.getLabelSymbol(fromTable); 092 } else if (from.hasLabel(fromTable)) { 093 fromSymbol = from.getLabelSymbol(fromTable); 094 } 095 096 String propSymbol = null; 097 if (to.hasLabel(toTable)) { 098 propSymbol = union(fromSymbol, to.getLabelSymbol(toTable)); 099 } else { 100 if (forSet.size() == 0 || forSet.contains(fromSymbol)) { 101 propSymbol = fromSymbol; 102 } 103 } 104 if (propSymbol != null) { 105 to.addLabel(toTable, propSymbol); 106 } 107 } 108 } 109 } 110 111 private String union(String fromSymbol, String toSymbol) { 112 SortedSet<String> symbolSet = new TreeSet<String>(); 113 114 if (fromSymbol != null && fromSymbol.length() != 0) { 115 String[] fromSymbols = symbolSeparator.split(fromSymbol); 116 for (int i = 0; i < fromSymbols.length; i++) { 117 if (forSet.size() == 0 || forSet.contains(fromSymbols[i])) { 118 symbolSet.add(fromSymbols[i]); 119 } 120 } 121 } 122 if (toSymbol != null && toSymbol.length() != 0) { 123 String[] toSymbols = symbolSeparator.split(toSymbol); 124 for (int i = 0; i < toSymbols.length; i++) { 125 symbolSet.add(toSymbols[i]); 126 } 127 } 128 129 if (symbolSet.size() > 0) { 130 StringBuilder sb = new StringBuilder(); 131 for (String symbol : symbolSet) { 132 sb.append(symbol); 133 sb.append('|'); 134 } 135 sb.setLength(sb.length()-1); 136 return sb.toString(); 137 } 138 139 140 return ""; 141 } 142 @Override 143 public String toString() { 144 return "Propagation [forSet=" + forSet + ", fromTable=" + fromTable 145 + ", overSet=" + overSet + ", toTable=" + toTable + "]"; 146 } 147}