001 package org.maltparser.core.propagation; 002 003 import java.util.SortedSet; 004 import java.util.TreeSet; 005 import java.util.regex.Pattern; 006 007 import org.maltparser.core.exception.MaltChainedException; 008 import org.maltparser.core.io.dataformat.ColumnDescription; 009 import org.maltparser.core.io.dataformat.DataFormatInstance; 010 import org.maltparser.core.propagation.spec.PropagationSpec; 011 import org.maltparser.core.symbol.SymbolTable; 012 import org.maltparser.core.syntaxgraph.edge.Edge; 013 import org.maltparser.core.syntaxgraph.node.DependencyNode; 014 015 /** 016 * A propagation object propagate a column value from one node to a column in another node based on the propagation specification. 017 * 018 * @author Johan Hall 019 * 020 */ 021 public class Propagation { 022 /** 023 * 024 */ 025 private SymbolTable fromTable; 026 private SymbolTable toTable; 027 private SymbolTable deprelTable; 028 private SortedSet<String> forSet; 029 private SortedSet<String> overSet; 030 031 private Pattern symbolSeparator; 032 033 /** 034 * Creates a propagation object based on the propagation specification 035 * 036 * @param spec a propagation specification 037 * @param dataFormatInstance a data format instance 038 * @throws MaltChainedException 039 */ 040 public Propagation(PropagationSpec spec, DataFormatInstance dataFormatInstance) throws MaltChainedException { 041 ColumnDescription fromColumn = dataFormatInstance.getColumnDescriptionByName(spec.getFrom()); 042 if (fromColumn == null) { 043 throw new PropagationException("The symbol table '"+spec.getFrom()+" does not exists."); 044 } 045 fromTable = fromColumn.getSymbolTable(); 046 047 ColumnDescription toColumn = dataFormatInstance.getColumnDescriptionByName(spec.getTo()); 048 if (toColumn == null) { 049 toColumn = dataFormatInstance.addInternalColumnDescription(spec.getTo(), fromColumn); 050 toTable = toColumn.getSymbolTable(); 051 } 052 053 054 forSet = new TreeSet<String>(); 055 if (spec.getFor() != null && spec.getFor().length() > 0) { 056 String[] items = spec.getFor().split("\\|"); 057 058 for (String item : items) { 059 forSet.add(item); 060 } 061 } 062 063 overSet = new TreeSet<String>(); 064 if (spec.getOver() != null && spec.getOver().length() > 0) { 065 String[] items = spec.getOver().split("\\|"); 066 067 for (String item : items) { 068 overSet.add(item); 069 } 070 } 071 072 ColumnDescription deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL"); 073 deprelTable = deprelColumn.getSymbolTable(); 074 symbolSeparator = Pattern.compile("\\|"); 075 } 076 077 /** 078 * Propagate columns according to the propagation specification 079 * 080 * @param e an edge 081 * @throws MaltChainedException 082 */ 083 public void propagate(Edge e) throws MaltChainedException { 084 if (e != null && e.hasLabel(deprelTable) && !e.getSource().isRoot()) { 085 if (overSet.size() == 0 || overSet.contains(e.getLabelSymbol(deprelTable))) { 086 DependencyNode to = (DependencyNode)e.getSource(); 087 DependencyNode from = (DependencyNode)e.getTarget(); 088 String fromSymbol = null; 089 if (e.hasLabel(fromTable)) { 090 fromSymbol = e.getLabelSymbol(fromTable); 091 } else if (from.hasLabel(fromTable)) { 092 fromSymbol = from.getLabelSymbol(fromTable); 093 } 094 095 String propSymbol = null; 096 if (to.hasLabel(toTable)) { 097 propSymbol = union(fromSymbol, to.getLabelSymbol(toTable)); 098 } else { 099 if (forSet.size() == 0 || forSet.contains(fromSymbol)) { 100 propSymbol = fromSymbol; 101 } 102 } 103 if (propSymbol != null) { 104 to.addLabel(toTable, propSymbol); 105 } 106 } 107 } 108 } 109 110 private String union(String fromSymbol, String toSymbol) { 111 SortedSet<String> symbolSet = new TreeSet<String>(); 112 113 if (fromSymbol != null && fromSymbol.length() != 0) { 114 String[] fromSymbols = symbolSeparator.split(fromSymbol); 115 for (int i = 0; i < fromSymbols.length; i++) { 116 if (forSet.size() == 0 || forSet.contains(fromSymbols[i])) { 117 symbolSet.add(fromSymbols[i]); 118 } 119 } 120 } 121 if (toSymbol != null && toSymbol.length() != 0) { 122 String[] toSymbols = symbolSeparator.split(toSymbol); 123 for (int i = 0; i < toSymbols.length; i++) { 124 symbolSet.add(toSymbols[i]); 125 } 126 } 127 128 if (symbolSet.size() > 0) { 129 StringBuilder sb = new StringBuilder(); 130 for (String symbol : symbolSet) { 131 sb.append(symbol); 132 sb.append('|'); 133 } 134 sb.setLength(sb.length()-1); 135 return sb.toString(); 136 } 137 138 139 return ""; 140 } 141 @Override 142 public String toString() { 143 return "Propagation [forSet=" + forSet + ", fromTable=" + fromTable 144 + ", overSet=" + overSet + ", toTable=" + toTable + "]"; 145 } 146 }