001package org.maltparser.core.lw.graph; 002 003import java.util.ArrayList; 004import java.util.Arrays; 005import java.util.List; 006 007import org.maltparser.core.exception.MaltChainedException; 008import org.maltparser.core.symbol.SymbolTable; 009import org.maltparser.core.syntaxgraph.DependencyStructure; 010import org.maltparser.core.syntaxgraph.edge.Edge; 011import org.maltparser.core.syntaxgraph.node.DependencyNode; 012 013/** 014* A lightweight version of pseudo projectivity and this class can only perform deprojectivizing. The class is based on 015* the more complex class org.maltparser.transform.pseudo.PseudoProjectivity. 016* 017* 018* @author Johan Hall 019*/ 020public final class LWDeprojectivizer { 021 public static final int NONE = 0; 022 public static final int BASELINE = 1; 023 public static final int HEAD = 2; 024 public static final int PATH = 3; 025 public static final int HEADPATH = 4; 026 public static final int TRACE = 5; 027 028 public int counter = 0; 029 public LWDeprojectivizer() { } 030 031 public static int getMarkingStrategyInt(String markingStrategyString) { 032 if (markingStrategyString.equalsIgnoreCase("none")) { 033 return LWDeprojectivizer.NONE; 034 } else if (markingStrategyString.equalsIgnoreCase("baseline")) { 035 return LWDeprojectivizer.BASELINE; 036 } else if (markingStrategyString.equalsIgnoreCase("head")) { 037 return LWDeprojectivizer.HEAD; 038 } else if (markingStrategyString.equalsIgnoreCase("path")) { 039 return LWDeprojectivizer.PATH; 040 } else if (markingStrategyString.equalsIgnoreCase("head+path")) { 041 return LWDeprojectivizer.HEADPATH; 042 } else if (markingStrategyString.equalsIgnoreCase("trace")) { 043 return LWDeprojectivizer.TRACE; 044 } 045 return LWDeprojectivizer.NONE; 046 } 047 048 public void deprojectivize(DependencyStructure pdg, int markingStrategy) throws MaltChainedException { 049 SymbolTable deprelSymbolTable = pdg.getSymbolTables().getSymbolTable("DEPREL"); 050 SymbolTable ppliftedSymbolTable = pdg.getSymbolTables().getSymbolTable("PPLIFTED"); 051 SymbolTable pppathSymbolTable = pdg.getSymbolTables().getSymbolTable("PPPATH"); 052 boolean[] nodeLifted = new boolean[pdg.nDependencyNode()]; Arrays.fill(nodeLifted, false); 053 boolean[] nodePath = new boolean[pdg.nDependencyNode()]; Arrays.fill(nodePath, false); 054 String[] synacticHeadDeprel = new String[pdg.nDependencyNode()]; Arrays.fill(synacticHeadDeprel, null); 055 056 for (int index : pdg.getTokenIndices()) { 057 Edge e = pdg.getDependencyNode(index).getHeadEdge(); 058 if (e.hasLabel(deprelSymbolTable)) { 059 if (e.hasLabel(pppathSymbolTable) && pppathSymbolTable.getSymbolCodeToString(e.getLabelCode(pppathSymbolTable)).equals("#true#")) { 060 nodePath[pdg.getDependencyNode(index).getIndex()] = true; 061 } 062 if (e.hasLabel(ppliftedSymbolTable) && !ppliftedSymbolTable.getSymbolCodeToString(e.getLabelCode(ppliftedSymbolTable)).equals("#false#")) { 063 nodeLifted[index] = true; 064 065 if (!ppliftedSymbolTable.getSymbolCodeToString(e.getLabelCode(ppliftedSymbolTable)).equals("#true#")) { 066 synacticHeadDeprel[index] = ppliftedSymbolTable.getSymbolCodeToString(e.getLabelCode(ppliftedSymbolTable)); 067 } 068 } 069 } 070 } 071 deattachCoveredRootsForDeprojectivization(pdg, deprelSymbolTable); 072 if (markingStrategy == LWDeprojectivizer.HEAD && needsDeprojectivizeWithHead(pdg, nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable)) { 073 deprojectivizeWithHead(pdg, pdg.getDependencyRoot(), nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable); 074 } else if (markingStrategy == LWDeprojectivizer.PATH) { 075 deprojectivizeWithPath(pdg, pdg.getDependencyRoot(), nodeLifted, nodePath); 076 } else if (markingStrategy == LWDeprojectivizer.HEADPATH) { 077 deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot(), nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable); 078 } 079 } 080 081 private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg, SymbolTable deprelSymbolTable) throws MaltChainedException { 082 SymbolTable ppcoveredRootSymbolTable = pdg.getSymbolTables().getSymbolTable("PPCOVERED"); 083 for (int index : pdg.getTokenIndices()) { 084 Edge e = pdg.getDependencyNode(index).getHeadEdge(); 085 if (e.hasLabel(deprelSymbolTable)) { 086 if (e.hasLabel(ppcoveredRootSymbolTable) && ppcoveredRootSymbolTable.getSymbolCodeToString(e.getLabelCode(ppcoveredRootSymbolTable)).equals("#true#")) { 087 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex()); 088 } 089 } 090 } 091 } 092 093 // Check whether there is at least one node in the specified dependency structure that can be lifted. 094 // If this is not the case, there is no need to call deprojectivizeWithHead. 095 096 private boolean needsDeprojectivizeWithHead(DependencyStructure pdg, boolean[] nodeLifted, boolean[] nodePath, String[] synacticHeadDeprel, SymbolTable deprelSymbolTable) throws MaltChainedException { 097 for (int index : pdg.getDependencyIndices()) { 098 if (nodeLifted[index]) { 099 DependencyNode node = pdg.getDependencyNode(index); 100 if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel[index], nodePath, deprelSymbolTable) != null) { 101 return true; 102 } 103 } 104 } 105 return false; 106 } 107 108 private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node, boolean[] nodeLifted, boolean[] nodePath, String[] synacticHeadDeprel, SymbolTable deprelSymbolTable) throws MaltChainedException { 109 boolean success = true, childSuccess = false; 110 int i, childAttempts = (counter < 10000 ? 2 : 1); 111 DependencyNode possibleSyntacticHead; 112 String syntacticHeadDeprel; 113 114 counter++; 115 if (nodeLifted[node.getIndex()]) { 116 syntacticHeadDeprel = synacticHeadDeprel[node.getIndex()]; 117 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel, nodePath, deprelSymbolTable); 118 if (possibleSyntacticHead != null) { 119 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 120 nodeLifted[node.getIndex()] = false; 121 } else { 122 success = false; 123 } 124 } 125 while (!childSuccess && childAttempts > 0) { 126 childSuccess = true; 127 128 List<DependencyNode> children = node.getListOfDependents(); 129 for (i = 0; i < children.size(); i++) { 130 if (!deprojectivizeWithHead(pdg, children.get(i), nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable)) { 131 childSuccess = false; 132 } 133 } 134 childAttempts--; 135 } 136 return childSuccess && success; 137 } 138 139 private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel, boolean[] nodePath, SymbolTable deprelSymbolTable) 140 throws MaltChainedException { 141 DependencyNode dependent; 142 String dependentDeprel; 143 List<DependencyNode> nodes = new ArrayList<DependencyNode>(); 144 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false, nodePath)); 145 while (nodes.size() > 0) { 146 dependent = nodes.remove(0); 147 if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) { 148 dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)); 149 if (dependentDeprel.equals(syntacticHeadDeprel)) { 150 return dependent; 151 } 152 } 153 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false, nodePath)); 154 } 155 return null; 156 } 157 158 159 private List<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid, 160 boolean percentOnly, boolean[] nodePath) { 161 List<DependencyNode> output = new ArrayList<DependencyNode>(); 162 List<DependencyNode> dependents = governor.getListOfDependents(); 163// SortedSet<DependencyNode> dependents = new TreeSet<DependencyNode>(); 164// dependents.addAll(governor.getLeftDependents()); 165// dependents.addAll(governor.getRightDependents()); 166 167 168 DependencyNode[] deps = new DependencyNode[dependents.size()]; 169 int[] distances = new int[dependents.size()]; 170 for (int i = 0; i < dependents.size(); i++) { 171 distances[i] = Math.abs(dependents.get(i).getIndex() - avoid.getIndex()); 172 deps[i] = dependents.get(i); 173 } 174 if (distances.length > 1) { 175 int smallest; 176 int n = distances.length; 177 int tmpDist; 178 DependencyNode tmpDep; 179 for (int i=0; i < n; i++) { 180 smallest = i; 181 for (int j=i; j < n; j++) { 182 if (distances[j] < distances[smallest]) { 183 smallest = j; 184 } 185 } 186 if (smallest != i) { 187 tmpDist = distances[smallest]; 188 distances[smallest] = distances[i]; 189 distances[i] = tmpDist; 190 tmpDep = deps[smallest]; 191 deps[smallest] = deps[i]; 192 deps[i] = tmpDep; 193 } 194 } 195 } 196 for (int i=0; i<distances.length;i++) { 197 if (deps[i] != avoid && (!percentOnly || (percentOnly && nodePath[deps[i].getIndex()]))) { 198 output.add(deps[i]); 199 } 200 } 201 return output; 202 } 203 204 private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node, boolean[] nodeLifted, boolean[] nodePath) throws MaltChainedException { 205 boolean success = true, childSuccess = false; 206 int i, childAttempts = (counter < 10000 ? 2 : 1); 207 DependencyNode possibleSyntacticHead; 208 209 counter++; 210 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted[node.getIndex()] && nodePath[node.getIndex()]) { 211 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node, nodePath); 212 if (possibleSyntacticHead != null) { 213 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 214 nodeLifted[node.getIndex()] = false; 215 } else { 216 success = false; 217 } 218 } 219 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted[node.getIndex()]) { 220 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node, nodePath); 221 if (possibleSyntacticHead != null) { 222 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 223 nodeLifted[node.getIndex()] = false; 224 } else { 225 success = false; 226 } 227 } 228 while (!childSuccess && childAttempts > 0) { 229 childSuccess = true; 230 List<DependencyNode> children = node.getListOfDependents(); 231 for (i = 0; i < children.size(); i++) { 232 if (!deprojectivizeWithPath(pdg, children.get(i), nodeLifted, nodePath)) { 233 childSuccess = false; 234 } 235 } 236 childAttempts--; 237 } 238 return childSuccess && success; 239 } 240 241 private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, boolean[] nodePath) { 242 DependencyNode dependent; 243 List<DependencyNode> nodes = new ArrayList<DependencyNode>(), newNodes; 244 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true, nodePath)); 245 while (nodes.size() > 0) { 246 dependent = nodes.remove(0); 247 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true, nodePath)).size()) == 0) { 248 return dependent; 249 } 250 nodes.addAll(newNodes); 251 } 252 return null; 253 } 254 255 private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node, boolean[] nodeLifted, boolean[] nodePath, String[] synacticHeadDeprel, SymbolTable deprelSymbolTable) throws MaltChainedException { 256 boolean success = true, childSuccess = false; 257 int i, childAttempts = (counter < 10000 ? 2 : 1); 258 DependencyNode possibleSyntacticHead; 259 260 counter++; 261 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted[node.getIndex()] && nodePath[node.getIndex()]) { 262 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel[node.getIndex()], nodePath, deprelSymbolTable); 263 if (possibleSyntacticHead != null) { 264 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 265 nodeLifted[node.getIndex()] = false; 266 } else { 267 success = false; 268 } 269 } 270 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted[node.getIndex()]) { 271 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel[node.getIndex()], nodePath, deprelSymbolTable); 272 if (possibleSyntacticHead != null) { 273 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 274 nodeLifted[node.getIndex()] = false; 275 } else { 276 success = false; 277 } 278 } 279 while (!childSuccess && childAttempts > 0) { 280 childSuccess = true; 281 List<DependencyNode> children = node.getListOfDependents(); 282 for (i = 0; i < children.size(); i++) { 283 if (!deprojectivizeWithHeadAndPath(pdg, children.get(i), nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable)) { 284 childSuccess = false; 285 } 286 } 287 childAttempts--; 288 } 289 return childSuccess && success; 290 } 291 292 private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode, boolean[] nodePath, SymbolTable deprelSymbolTable) 293 throws MaltChainedException { 294 DependencyNode dependent; 295 List<DependencyNode> nodes = new ArrayList<DependencyNode>(), newNodes = null, secondChance = new ArrayList<DependencyNode>(); 296 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true, nodePath)); 297 while (nodes.size() > 0) { 298 dependent = nodes.remove(0); 299 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true, nodePath)).size()) == 0 300 && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) { 301 return dependent; 302 } 303 nodes.addAll(newNodes); 304 if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode) 305 && newNodes.size() != 0) { 306 secondChance.add(dependent); 307 } 308 } 309 if (secondChance.size() > 0) { 310 return secondChance.get(0); 311 } 312 return null; 313 } 314}