001package org.maltparser.core.lw.graph; 002 003import java.util.ArrayList; 004import java.util.Arrays; 005import java.util.List; 006 007import org.maltparser.core.exception.MaltChainedException; 008import org.maltparser.core.symbol.SymbolTable; 009import org.maltparser.core.syntaxgraph.DependencyStructure; 010import org.maltparser.core.syntaxgraph.edge.Edge; 011import org.maltparser.core.syntaxgraph.node.DependencyNode; 012 013/** 014* A lightweight version of pseudo projectivity and this class can only perform deprojectivizing. The class is based on 015* the more complex class org.maltparser.transform.pseudo.PseudoProjectivity. 016* 017* 018* @author Johan Hall 019*/ 020public final class LWDeprojectivizer { 021 public static final int NONE = 0; 022 public static final int BASELINE = 1; 023 public static final int HEAD = 1; 024 public static final int PATH = 1; 025 public static final int HEADPATH = 1; 026 public static final int TRACE = 1; 027 028 public LWDeprojectivizer() { } 029 030 public static int getMarkingStrategyInt(String markingStrategyString) { 031 if (markingStrategyString.equalsIgnoreCase("none")) { 032 return LWDeprojectivizer.NONE; 033 } else if (markingStrategyString.equalsIgnoreCase("baseline")) { 034 return LWDeprojectivizer.BASELINE; 035 } else if (markingStrategyString.equalsIgnoreCase("head")) { 036 return LWDeprojectivizer.HEAD; 037 } else if (markingStrategyString.equalsIgnoreCase("path")) { 038 return LWDeprojectivizer.PATH; 039 } else if (markingStrategyString.equalsIgnoreCase("head+path")) { 040 return LWDeprojectivizer.HEADPATH; 041 } else if (markingStrategyString.equalsIgnoreCase("trace")) { 042 return LWDeprojectivizer.TRACE; 043 } 044 return LWDeprojectivizer.NONE; 045 } 046 047 public void deprojectivize(DependencyStructure pdg, int markingStrategy) throws MaltChainedException { 048 SymbolTable deprelSymbolTable = pdg.getSymbolTables().getSymbolTable("DEPREL"); 049 SymbolTable ppliftedSymbolTable = pdg.getSymbolTables().getSymbolTable("PPLIFTED"); 050 SymbolTable pppathSymbolTable = pdg.getSymbolTables().getSymbolTable("PPPATH"); 051 052 boolean[] nodeLifted = new boolean[pdg.nDependencyNode()]; Arrays.fill(nodeLifted, false); 053 boolean[] nodePath = new boolean[pdg.nDependencyNode()]; Arrays.fill(nodePath, false); 054 String[] synacticHeadDeprel = new String[pdg.nDependencyNode()]; Arrays.fill(synacticHeadDeprel, null); 055 056 for (int index : pdg.getTokenIndices()) { 057 Edge e = pdg.getDependencyNode(index).getHeadEdge(); 058 if (e.hasLabel(deprelSymbolTable)) { 059 if (e.hasLabel(pppathSymbolTable) && pppathSymbolTable.getSymbolCodeToString(e.getLabelCode(pppathSymbolTable)).equals("#true#")) { 060 nodePath[pdg.getDependencyNode(index).getIndex()] = true; 061 } 062 if (e.hasLabel(ppliftedSymbolTable) && !ppliftedSymbolTable.getSymbolCodeToString(e.getLabelCode(ppliftedSymbolTable)).equals("#false#")) { 063 nodeLifted[index] = true; 064 065 if (!ppliftedSymbolTable.getSymbolCodeToString(e.getLabelCode(ppliftedSymbolTable)).equals("#true#")) { 066 synacticHeadDeprel[index] = ppliftedSymbolTable.getSymbolCodeToString(e.getLabelCode(ppliftedSymbolTable)); 067 } 068 } 069 } 070 } 071 deattachCoveredRootsForDeprojectivization(pdg, deprelSymbolTable); 072 if (markingStrategy == LWDeprojectivizer.HEAD && needsDeprojectivizeWithHead(pdg, nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable)) { 073 deprojectivizeWithHead(pdg, pdg.getDependencyRoot(), nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable); 074 } else if (markingStrategy == LWDeprojectivizer.PATH) { 075 deprojectivizeWithPath(pdg, pdg.getDependencyRoot(), nodeLifted, nodePath); 076 } else if (markingStrategy == LWDeprojectivizer.HEADPATH) { 077 deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot(), nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable); 078 } 079 } 080 081 private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg, SymbolTable deprelSymbolTable) throws MaltChainedException { 082 SymbolTable ppcoveredRootSymbolTable = pdg.getSymbolTables().getSymbolTable("PPCOVERED"); 083 for (int index : pdg.getTokenIndices()) { 084 Edge e = pdg.getDependencyNode(index).getHeadEdge(); 085 if (e.hasLabel(deprelSymbolTable)) { 086 if (e.hasLabel(ppcoveredRootSymbolTable) && ppcoveredRootSymbolTable.getSymbolCodeToString(e.getLabelCode(ppcoveredRootSymbolTable)).equals("#true#")) { 087 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex()); 088 } 089 } 090 } 091 } 092 093 // Check whether there is at least one node in the specified dependency structure that can be lifted. 094 // If this is not the case, there is no need to call deprojectivizeWithHead. 095 096 private boolean needsDeprojectivizeWithHead(DependencyStructure pdg, boolean[] nodeLifted, boolean[] nodePath, String[] synacticHeadDeprel, SymbolTable deprelSymbolTable) throws MaltChainedException { 097 for (int index : pdg.getDependencyIndices()) { 098 if (nodeLifted[index]) { 099 DependencyNode node = pdg.getDependencyNode(index); 100 if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel[index], nodePath, deprelSymbolTable) != null) { 101 return true; 102 } 103 } 104 } 105 return false; 106 } 107 108 private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node, boolean[] nodeLifted, boolean[] nodePath, String[] synacticHeadDeprel, SymbolTable deprelSymbolTable) throws MaltChainedException { 109 boolean success = true, childSuccess = false; 110 int i, childAttempts = 2; 111 DependencyNode possibleSyntacticHead; 112 String syntacticHeadDeprel; 113 if (nodeLifted[node.getIndex()]) { 114 syntacticHeadDeprel = synacticHeadDeprel[node.getIndex()]; 115 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel, nodePath, deprelSymbolTable); 116 if (possibleSyntacticHead != null) { 117 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 118 nodeLifted[node.getIndex()] = false; 119 } else { 120 success = false; 121 } 122 } 123 while (!childSuccess && childAttempts > 0) { 124 childSuccess = true; 125 126 List<DependencyNode> children = node.getListOfDependents(); 127 for (i = 0; i < children.size(); i++) { 128 if (!deprojectivizeWithHead(pdg, children.get(i), nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable)) { 129 childSuccess = false; 130 } 131 } 132 childAttempts--; 133 } 134 return childSuccess && success; 135 } 136 137 private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel, boolean[] nodePath, SymbolTable deprelSymbolTable) 138 throws MaltChainedException { 139 DependencyNode dependent; 140 String dependentDeprel; 141 List<DependencyNode> nodes = new ArrayList<DependencyNode>(); 142 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false, nodePath)); 143 while (nodes.size() > 0) { 144 dependent = nodes.remove(0); 145 if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) { 146 dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)); 147 if (dependentDeprel.equals(syntacticHeadDeprel)) { 148 return dependent; 149 } 150 } 151 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false, nodePath)); 152 } 153 return null; 154 } 155 156 157 private List<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid, 158 boolean percentOnly, boolean[] nodePath) { 159 List<DependencyNode> output = new ArrayList<DependencyNode>(); 160 List<DependencyNode> dependents = governor.getListOfDependents(); 161// SortedSet<DependencyNode> dependents = new TreeSet<DependencyNode>(); 162// dependents.addAll(governor.getLeftDependents()); 163// dependents.addAll(governor.getRightDependents()); 164 165 166 DependencyNode[] deps = new DependencyNode[dependents.size()]; 167 int[] distances = new int[dependents.size()]; 168 for (int i = 0; i < dependents.size(); i++) { 169 distances[i] = Math.abs(dependents.get(i).getIndex() - avoid.getIndex()); 170 deps[i] = dependents.get(i); 171 } 172 if (distances.length > 1) { 173 int smallest; 174 int n = distances.length; 175 int tmpDist; 176 DependencyNode tmpDep; 177 for (int i=0; i < n; i++) { 178 smallest = i; 179 for (int j=i; j < n; j++) { 180 if (distances[j] < distances[smallest]) { 181 smallest = j; 182 } 183 } 184 if (smallest != i) { 185 tmpDist = distances[smallest]; 186 distances[smallest] = distances[i]; 187 distances[i] = tmpDist; 188 tmpDep = deps[smallest]; 189 deps[smallest] = deps[i]; 190 deps[i] = tmpDep; 191 } 192 } 193 } 194 for (int i=0; i<distances.length;i++) { 195 if (deps[i] != avoid && (!percentOnly || (percentOnly && nodePath[deps[i].getIndex()]))) { 196 output.add(deps[i]); 197 } 198 } 199 return output; 200 } 201 202 private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node, boolean[] nodeLifted, boolean[] nodePath) throws MaltChainedException { 203 boolean success = true, childSuccess = false; 204 int i, childAttempts = 2; 205 DependencyNode possibleSyntacticHead; 206 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted[node.getIndex()] && nodePath[node.getIndex()]) { 207 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node, nodePath); 208 if (possibleSyntacticHead != null) { 209 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 210 nodeLifted[node.getIndex()] = false; 211 } else { 212 success = false; 213 } 214 } 215 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted[node.getIndex()]) { 216 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node, nodePath); 217 if (possibleSyntacticHead != null) { 218 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 219 nodeLifted[node.getIndex()] = false; 220 } else { 221 success = false; 222 } 223 } 224 while (!childSuccess && childAttempts > 0) { 225 childSuccess = true; 226 List<DependencyNode> children = node.getListOfDependents(); 227 for (i = 0; i < children.size(); i++) { 228 if (!deprojectivizeWithPath(pdg, children.get(i), nodeLifted, nodePath)) { 229 childSuccess = false; 230 } 231 } 232 childAttempts--; 233 } 234 return childSuccess && success; 235 } 236 237 private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, boolean[] nodePath) { 238 DependencyNode dependent; 239 List<DependencyNode> nodes = new ArrayList<DependencyNode>(), newNodes; 240 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true, nodePath)); 241 while (nodes.size() > 0) { 242 dependent = nodes.remove(0); 243 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true, nodePath)).size()) == 0) { 244 return dependent; 245 } 246 nodes.addAll(newNodes); 247 } 248 return null; 249 } 250 251 private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node, boolean[] nodeLifted, boolean[] nodePath, String[] synacticHeadDeprel, SymbolTable deprelSymbolTable) throws MaltChainedException { 252 boolean success = true, childSuccess = false; 253 int i, childAttempts = 2; 254 DependencyNode possibleSyntacticHead; 255 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted[node.getIndex()] && nodePath[node.getIndex()]) { 256 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel[node.getIndex()], nodePath, deprelSymbolTable); 257 if (possibleSyntacticHead != null) { 258 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 259 nodeLifted[node.getIndex()] = false; 260 } else { 261 success = false; 262 } 263 } 264 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted[node.getIndex()]) { 265 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel[node.getIndex()], nodePath, deprelSymbolTable); 266 if (possibleSyntacticHead != null) { 267 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 268 nodeLifted[node.getIndex()] = false; 269 } else { 270 success = false; 271 } 272 } 273 while (!childSuccess && childAttempts > 0) { 274 childSuccess = true; 275 List<DependencyNode> children = node.getListOfDependents(); 276 for (i = 0; i < children.size(); i++) { 277 if (!deprojectivizeWithHeadAndPath(pdg, children.get(i), nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable)) { 278 childSuccess = false; 279 } 280 } 281 childAttempts--; 282 } 283 return childSuccess && success; 284 } 285 286 private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode, boolean[] nodePath, SymbolTable deprelSymbolTable) 287 throws MaltChainedException { 288 DependencyNode dependent; 289 List<DependencyNode> nodes = new ArrayList<DependencyNode>(), newNodes = null, secondChance = new ArrayList<DependencyNode>(); 290 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true, nodePath)); 291 while (nodes.size() > 0) { 292 dependent = nodes.remove(0); 293 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true, nodePath)).size()) == 0 294 && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) { 295 return dependent; 296 } 297 nodes.addAll(newNodes); 298 if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode) 299 && newNodes.size() != 0) { 300 secondChance.add(dependent); 301 } 302 } 303 if (secondChance.size() > 0) { 304 return secondChance.get(0); 305 } 306 return null; 307 } 308}