001 package org.maltparser.transform.pseudo; 002 003 import java.util.Vector; 004 005 import org.apache.log4j.Logger; 006 import org.maltparser.core.exception.MaltChainedException; 007 import org.maltparser.core.symbol.SymbolTable; 008 import org.maltparser.core.symbol.SymbolTableHandler; 009 import org.maltparser.core.syntaxgraph.DependencyStructure; 010 import org.maltparser.core.syntaxgraph.node.DependencyNode; 011 012 /** 013 * This class contains methods for projectivizing and deprojectivizing 014 * 015 * @author Jens Nilsson 016 * @since 1.0 017 */ 018 public class PseudoProjectivity { 019 static int id = 0; 020 021 private enum PseudoProjectiveEncoding { 022 NONE, BASELINE, HEAD, PATH, HEADPATH, TRACE 023 }; 024 025 private enum CoveredRootAttachment { 026 NONE, LEFT, RIGHT, HEAD 027 }; 028 029 private enum LiftingOrder { 030 SHORTEST, DEEPEST 031 }; 032 033 private PseudoProjectiveEncoding markingStrategy; 034 private CoveredRootAttachment rootAttachment; 035 private LiftingOrder liftingOrder; 036 private Logger configLogger; 037 038 private SymbolTable deprelSymbolTable; 039 private SymbolTable pppathSymbolTable; 040 private SymbolTable ppliftedSymbolTable; 041 private SymbolTable ppcoveredRootSymbolTable; 042 private Vector<Boolean> nodeLifted; 043 private Vector<Vector<DependencyNode>> nodeTrace; 044 private Vector<DependencyNode> headDeprel; 045 private Vector<Boolean> nodePath; 046 private Vector<Boolean> isCoveredRoot; 047 private Vector<Integer> nodeRelationLength; 048 private Vector<String> synacticHeadDeprel; 049 050 051 public PseudoProjectivity() { } 052 053 public void initialize(String markingStrategyString, String coveredRoot, String liftingOrder, Logger configLogger, 054 SymbolTableHandler symbolTables) throws MaltChainedException { 055 nodeLifted = new Vector<Boolean>(); 056 nodeTrace = new Vector<Vector<DependencyNode>>(); 057 headDeprel = new Vector<DependencyNode>(); 058 nodePath = new Vector<Boolean>(); 059 isCoveredRoot = new Vector<Boolean>(); 060 nodeRelationLength = new Vector<Integer>(); 061 synacticHeadDeprel = new Vector<String>(); 062 063 this.configLogger = configLogger; 064 if (markingStrategyString.equalsIgnoreCase("none")) { 065 markingStrategy = PseudoProjectiveEncoding.NONE; 066 } else if (markingStrategyString.equalsIgnoreCase("baseline")) { 067 markingStrategy = PseudoProjectiveEncoding.BASELINE; 068 } else if (markingStrategyString.equalsIgnoreCase("head")) { 069 markingStrategy = PseudoProjectiveEncoding.HEAD; 070 } else if (markingStrategyString.equalsIgnoreCase("path")) { 071 markingStrategy = PseudoProjectiveEncoding.PATH; 072 } else if (markingStrategyString.equalsIgnoreCase("head+path")) { 073 markingStrategy = PseudoProjectiveEncoding.HEADPATH; 074 } else if (markingStrategyString.equalsIgnoreCase("trace")) { 075 markingStrategy = PseudoProjectiveEncoding.TRACE; 076 } 077 078 this.deprelSymbolTable = symbolTables.getSymbolTable("DEPREL"); 079 if (markingStrategy == PseudoProjectiveEncoding.HEAD || markingStrategy == PseudoProjectiveEncoding.PATH 080 || markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 081 this.ppliftedSymbolTable = symbolTables.addSymbolTable("PPLIFTED", deprelSymbolTable); 082 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) { 083 ppliftedSymbolTable.addSymbol("#true#"); 084 ppliftedSymbolTable.addSymbol("#false#"); 085 } else { 086 ppliftedSymbolTable.addSymbol("#false#"); 087 } 088 } 089 090 if (markingStrategy == PseudoProjectiveEncoding.PATH || markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 091 pppathSymbolTable = symbolTables.addSymbolTable("PPPATH", deprelSymbolTable); 092 pppathSymbolTable.addSymbol("#true#"); 093 pppathSymbolTable.addSymbol("#false#"); 094 } 095 096 if (coveredRoot.equalsIgnoreCase("none")) { 097 this.rootAttachment = CoveredRootAttachment.NONE; 098 } else if (coveredRoot.equalsIgnoreCase("left")) { 099 this.rootAttachment = CoveredRootAttachment.LEFT; 100 } else if (coveredRoot.equalsIgnoreCase("right")) { 101 this.rootAttachment = CoveredRootAttachment.RIGHT; 102 } else if (coveredRoot.equalsIgnoreCase("head")) { 103 this.rootAttachment = CoveredRootAttachment.HEAD; 104 } 105 106 if (this.rootAttachment != CoveredRootAttachment.NONE) { 107 this.ppcoveredRootSymbolTable = symbolTables.addSymbolTable("PPCOVERED", deprelSymbolTable); 108 ppcoveredRootSymbolTable.addSymbol("#true#"); 109 ppcoveredRootSymbolTable.addSymbol("#false#"); 110 } 111 if (liftingOrder.equalsIgnoreCase("shortest")) { 112 this.liftingOrder = LiftingOrder.SHORTEST; 113 } else if (liftingOrder.equalsIgnoreCase("deepest")) { 114 this.liftingOrder = LiftingOrder.DEEPEST; 115 } 116 } 117 118 private void initProjectivization(DependencyStructure pdg) throws MaltChainedException { 119 nodeLifted.clear(); 120 nodeTrace.clear(); 121 headDeprel.clear(); 122 nodePath.clear(); 123 isCoveredRoot.clear(); 124 nodeRelationLength.clear(); 125 126 for (int index : pdg.getDependencyIndices()) { 127 nodeLifted.add(false); 128 nodeTrace.add(new Vector<DependencyNode>()); 129 headDeprel.add(null); 130 nodePath.add(false); 131 isCoveredRoot.add(false); 132 if (ppliftedSymbolTable != null && index != 0) { 133 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#false#")); 134 } 135 if (pppathSymbolTable != null && index != 0) { 136 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#false#")); 137 } 138 if (ppcoveredRootSymbolTable != null && index != 0) { 139 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#false#")); 140 } 141 } 142 computeRelationLength(pdg); 143 } 144 145 public void projectivize(DependencyStructure pdg) throws MaltChainedException { 146 id++; 147 if (!pdg.isTree()) { 148 configLogger.info("\n[Warning: Sentence '" + id + "' cannot projectivize, because the dependency graph is not a tree]\n"); 149 return; 150 } 151 DependencyNode deepestNonProjectiveNode; 152 initProjectivization(pdg); 153 154 if (markingStrategy != PseudoProjectiveEncoding.NONE) { 155 while (!pdg.isProjective()) { 156 if (liftingOrder == LiftingOrder.DEEPEST) { 157 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg); 158 } else { 159 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg); 160 } 161 if (!attachCoveredRoots(pdg, deepestNonProjectiveNode)) { 162 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true); 163 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead()); 164 setPath(deepestNonProjectiveNode.getHead()); 165 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex()); 166 } 167 } 168 if (rootAttachment == CoveredRootAttachment.NONE) { 169 deattachCoveredRootsForProjectivization(pdg); 170 } 171 } else if (rootAttachment != CoveredRootAttachment.NONE) { 172 for (int index : pdg.getTokenIndices()) { 173 attachCoveredRoots(pdg, pdg.getTokenNode(index)); 174 } 175 } 176 // collectTraceStatistics(pdg); 177 assignPseudoProjectiveDeprels(pdg); 178 } 179 180 181 public void mergeArclabels(DependencyStructure pdg) throws MaltChainedException { 182 assignPseudoProjectiveDeprelsForMerge(pdg); 183 } 184 185 public void splitArclabels(DependencyStructure pdg) throws MaltChainedException { 186 int pathLabelIndex = -1, movedLabelIndex = -1, coveredArcLabelIndex; 187 String label; 188 initDeprojeciviztion(pdg); 189 for (int index : pdg.getTokenIndices()) { 190 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 191 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 192 if (label != null && (pathLabelIndex = label.indexOf("%")) != -1) { 193 label = label.substring(0, pathLabelIndex); 194 setLabel(pdg.getTokenNode(index), label); 195 pdg.getTokenNode(index).getHeadEdge().addLabel(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#true#")); 196 } 197 if (label != null && (movedLabelIndex = label.indexOf("|")) != -1 && label.indexOf("|null") == -1) { 198 if (movedLabelIndex + 1 < label.length()) { 199 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode(label.substring(movedLabelIndex + 1))); 200 } else { 201 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#true#")); 202 } 203 label = label.substring(0, movedLabelIndex); 204 setLabel(pdg.getTokenNode(index), label); 205 } 206 } 207 } 208 for (int index : pdg.getTokenIndices()) { 209 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 210 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 211 if ((coveredArcLabelIndex = label.indexOf("|null")) != -1) { 212 label = label.substring(0, coveredArcLabelIndex); 213 setLabel(pdg.getTokenNode(index), label); 214 pdg.getTokenNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#")); 215 } 216 } 217 } 218 } 219 220 private void setHeadDeprel(DependencyNode node, DependencyNode parent) { 221 if (headDeprel.get(node.getIndex()) == null) { 222 headDeprel.set(node.getIndex(), parent); 223 } 224 nodeTrace.set(node.getIndex(), headDeprel); 225 } 226 227 private void setPath(DependencyNode node) { 228 nodePath.set(node.getIndex(), true); 229 } 230 231 private boolean isCoveredRoot(DependencyNode node) { 232 return isCoveredRoot.get(node.getIndex()); 233 } 234 235 private void deattachCoveredRootsForProjectivization(DependencyStructure pdg) throws MaltChainedException { 236 for (int index : pdg.getTokenIndices()) { 237 if (isCoveredRoot(pdg.getTokenNode(index))) { 238 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getTokenNode(index).getIndex()); 239 } 240 } 241 } 242 243 private boolean attachCoveredRoots(DependencyStructure pdg, DependencyNode deepest) throws MaltChainedException { 244 int i; 245 boolean foundCoveredRoot = false; 246 DependencyNode coveredRootHead; 247 for (i = Math.min(deepest.getIndex(), deepest.getHead().getIndex()) + 1; i < Math.max(deepest.getIndex(), deepest.getHead() 248 .getIndex()); i++) { 249 int leftMostIndex = pdg.getDependencyNode(i).getLeftmostProperDescendantIndex(); 250 if (leftMostIndex == -1) { 251 leftMostIndex = i; 252 } 253 int rightMostIndex = pdg.getDependencyNode(i).getRightmostProperDescendantIndex(); 254 if (rightMostIndex == -1) { 255 rightMostIndex = i; 256 } 257 // if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot() 258 // && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < pdg.getDependencyNode(i).getLeftmostDescendantIndex() 259 // && pdg.getDependencyNode(i).getRightmostDescendantIndex() < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) { 260 if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot() 261 && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < leftMostIndex 262 && rightMostIndex < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) { 263 if (rootAttachment == CoveredRootAttachment.LEFT) { 264 if (deepest.getHead().getIndex() < deepest.getIndex()) { 265 coveredRootHead = deepest.getHead(); 266 } else { 267 coveredRootHead = deepest; 268 } 269 } else if (rootAttachment == CoveredRootAttachment.RIGHT) { 270 if (deepest.getIndex() < deepest.getHead().getIndex()) { 271 coveredRootHead = deepest.getHead(); 272 } else { 273 coveredRootHead = deepest; 274 } 275 } else { 276 coveredRootHead = deepest.getHead(); 277 } 278 pdg.moveDependencyEdge(coveredRootHead.getIndex(), pdg.getDependencyNode(i).getIndex()); 279 setCoveredRoot(pdg.getDependencyNode(i)); 280 foundCoveredRoot = true; 281 } 282 } 283 return foundCoveredRoot; 284 } 285 286 private void setCoveredRoot(DependencyNode node) { 287 isCoveredRoot.set(node.getIndex(), true); 288 } 289 290 private DependencyNode getDeepestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException { 291 DependencyNode deepestNonProjectiveNode = null; 292 for (int index : pdg.getDependencyIndices()) { 293 if (!pdg.getDependencyNode(index).isProjective() 294 && (deepestNonProjectiveNode == null 295 || pdg.getDependencyNode(index).getDependencyNodeDepth() > pdg.getDependencyNode(deepestNonProjectiveNode.getIndex()).getDependencyNodeDepth())) { 296 deepestNonProjectiveNode = pdg.getDependencyNode(index); 297 } 298 } 299 300 return deepestNonProjectiveNode; 301 } 302 303 private DependencyNode getShortestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException { 304 DependencyNode shortestNonProjectiveNode = null; 305 for (int index : pdg.getDependencyIndices()) { 306 if (!pdg.getDependencyNode(index).isProjective() 307 && (shortestNonProjectiveNode == null 308 || nodeRelationLength.get(index) < nodeRelationLength.get(shortestNonProjectiveNode.getIndex()) 309 || (nodeRelationLength.get(index) == nodeRelationLength.get(shortestNonProjectiveNode.getIndex())))) { 310 shortestNonProjectiveNode = pdg.getDependencyNode(index); 311 } 312 } 313 return shortestNonProjectiveNode; 314 } 315 316 317 private void computeRelationLength(DependencyStructure pdg) throws MaltChainedException { 318 nodeRelationLength.add(0); 319 for (int index : pdg.getTokenIndices()) { 320 nodeRelationLength.add(Math.abs(pdg.getDependencyNode(index).getIndex() - pdg.getDependencyNode(index).getHead().getIndex())); 321 } 322 } 323 324 private void assignPseudoProjectiveDeprels(DependencyStructure pdg) throws MaltChainedException { 325 int newLabelCode; 326 for (int index : pdg.getTokenIndices()) { 327 if (!isCoveredRoot(pdg.getDependencyNode(index))) { 328 if (this.markingStrategy == PseudoProjectiveEncoding.HEAD || this.markingStrategy == PseudoProjectiveEncoding.PATH 329 || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 330 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) { 331 if (nodeLifted.get(index)) { 332 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#true#"); 333 } else { 334 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#"); 335 } 336 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode); 337 } else { 338 if (nodeLifted.get(index)) { 339 newLabelCode = ppliftedSymbolTable.addSymbol(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode( 340 headDeprel.get(index).getIndex()).getHeadEdge().getLabelCode(deprelSymbolTable))); 341 } else { 342 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#"); 343 } 344 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode); 345 } 346 } 347 348 if (this.markingStrategy == PseudoProjectiveEncoding.PATH || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 349 if (nodePath.get(index)) { 350 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#true#"); 351 } else { 352 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#false#"); 353 } 354 pdg.getDependencyNode(index).getHeadEdge().addLabel(pppathSymbolTable, newLabelCode); 355 } 356 357 // if (markingStrategy == PseudoProjectiveEncoding.TRACE) { 358 // if (nodeLifted.get(i)) { 359 // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getNode(i).getLabelCode(deprelSymbolTable)) + "|"; 360 // addArc(pdg, pdg.getNode(i), pdg.getNode(i).getHead(), newLabel); 361 // } 362 // } 363 } else if (rootAttachment != CoveredRootAttachment.NONE) { 364 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#")); 365 } 366 } 367 } 368 369 private void setLabel(DependencyNode node, String label) throws MaltChainedException { 370 // node.getLabelCode().clear(); 371 node.getHeadEdge().getLabelSet().put(deprelSymbolTable, deprelSymbolTable.addSymbol(label)); 372 } 373 374 private void assignPseudoProjectiveDeprelsForMerge(DependencyStructure pdg) throws MaltChainedException { 375 Vector<String> originalDeprel = new Vector<String>(); 376 String newLabel; 377 originalDeprel.add(null); 378 for (int index : pdg.getTokenIndices()) { 379 originalDeprel.add(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable))); 380 } 381 for (int index : pdg.getTokenIndices()) { 382 newLabel = null; 383 if (!isCoveredRoot(pdg.getDependencyNode(index))) { 384 if (markingStrategy == PseudoProjectiveEncoding.HEAD) { 385 if (nodeLifted.get(index)) { 386 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 387 + originalDeprel.get(headDeprel.get(index).getIndex()); 388 // } else { 389 // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 390 } 391 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) { 392 if (nodeLifted.get(index) && nodePath.get(index)) { 393 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|%"; 394 } else if (nodeLifted.get(index) && !nodePath.get(index)) { 395 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"; 396 } else if (!nodeLifted.get(index) && nodePath.get(index)) { 397 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "%"; 398 } 399 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 400 if (nodeLifted.get(index) && nodePath.get(index)) { 401 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 402 + originalDeprel.get(headDeprel.get(index).getIndex()) + "%"; 403 } else if (nodeLifted.get(index) && !nodePath.get(index)) { 404 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 405 + originalDeprel.get(headDeprel.get(index).getIndex()); 406 } else if (!nodeLifted.get(index) && nodePath.get(index)) { 407 newLabel = originalDeprel.get(pdg.getDependencyNode(index).getIndex()) + "%"; 408 } 409 } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) { 410 if (nodeLifted.get(index)) { 411 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"; 412 } 413 } 414 } else if (rootAttachment != CoveredRootAttachment.NONE) { 415 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|null"; 416 } 417 if (newLabel != null) { 418 setLabel(pdg.getDependencyNode(index), newLabel); 419 } 420 } 421 } 422 423 public void deprojectivize(DependencyStructure pdg) throws MaltChainedException { 424 initDeprojeciviztion(pdg); 425 426 for (int index : pdg.getTokenIndices()) { 427 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 428 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(pppathSymbolTable) 429 && pppathSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(pppathSymbolTable)).equals("#true#")) { 430 setPath(pdg.getDependencyNode(index)); 431 } 432 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppliftedSymbolTable) 433 && !ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#false#")) { 434 nodeLifted.set(index, true); 435 if (!ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#true#")) { 436 synacticHeadDeprel.set(index, ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge() 437 .getLabelCode(ppliftedSymbolTable))); 438 } 439 } 440 } 441 } 442 deattachCoveredRootsForDeprojectivization(pdg); 443 if (markingStrategy == PseudoProjectiveEncoding.HEAD) { 444 deprojectivizeWithHead(pdg, pdg.getDependencyRoot()); 445 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) { 446 deprojectivizeWithPath(pdg, pdg.getDependencyRoot()); 447 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 448 deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot()); 449 // } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) { 450 // deprojectivizeWithTrace(pdg, pdg.getRoot()); 451 } 452 } 453 454 private void initDeprojeciviztion(DependencyStructure pdg) { 455 nodeLifted.clear(); 456 nodePath.clear(); 457 synacticHeadDeprel.clear(); 458 for (int index : pdg.getDependencyIndices()) { 459 nodeLifted.add(false); 460 nodePath.add(false); 461 synacticHeadDeprel.add(null); 462 } 463 } 464 465 private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg) throws MaltChainedException { 466 for (int index : pdg.getTokenIndices()) { 467 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 468 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppcoveredRootSymbolTable) 469 && ppcoveredRootSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppcoveredRootSymbolTable)).equals( 470 "#true#")) { 471 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex()); 472 } 473 } 474 } 475 } 476 477 private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 478 boolean success = true, childSuccess = false; 479 int i, childAttempts = 2; 480 DependencyNode child, possibleSyntacticHead; 481 String syntacticHeadDeprel; 482 if (nodeLifted.get(node.getIndex())) { 483 syntacticHeadDeprel = synacticHeadDeprel.get(node.getIndex()); 484 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel); 485 if (possibleSyntacticHead != null) { 486 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 487 // addEdge(pdg, possibleSyntacticHead, node, node.getHeadEdge().getLabelSet()); 488 nodeLifted.set(node.getIndex(), false); 489 } else { 490 success = false; 491 } 492 } 493 while (!childSuccess && childAttempts > 0) { 494 childSuccess = true; 495 Vector<DependencyNode> children = new Vector<DependencyNode>(); 496 i = 0; 497 while ((child = node.getLeftDependent(i)) != null) { 498 children.add(child); 499 i++; 500 } 501 i = 0; 502 while ((child = node.getRightDependent(i)) != null) { 503 children.add(child); 504 i++; 505 } 506 for (i = 0; i < children.size(); i++) { 507 child = children.get(i); 508 if (!deprojectivizeWithHead(pdg, child)) { 509 childSuccess = false; 510 } 511 } 512 childAttempts--; 513 } 514 return childSuccess && success; 515 } 516 517 private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel) 518 throws MaltChainedException { 519 DependencyNode dependent; 520 String dependentDeprel; 521 Vector<DependencyNode> nodes = new Vector<DependencyNode>(); 522 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false)); 523 while (nodes.size() > 0) { 524 dependent = nodes.remove(0); 525 if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) { 526 dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)); 527 if (dependentDeprel.equals(syntacticHeadDeprel)) { 528 return dependent; 529 } 530 } 531 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false)); 532 } 533 return null; 534 } 535 536 private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid, 537 boolean percentOnly) { 538 int i, j; 539 Vector<DependencyNode> dependents = new Vector<DependencyNode>(); 540 DependencyNode leftChild, rightChild; 541 542 i = governor.getLeftDependentCount() - 1; 543 j = 0; 544 leftChild = governor.getLeftDependent(i--); 545 rightChild = governor.getRightDependent(j++); 546 547 while (leftChild != null && rightChild != null) { 548 if (leftChild == avoid) { 549 leftChild = governor.getLeftDependent(i--); 550 } else if (rightChild == avoid) { 551 rightChild = governor.getRightDependent(j++); 552 } else if (Math.abs(leftChild.getIndex() - avoid.getIndex()) < Math.abs(rightChild.getIndex() - avoid.getIndex())) { 553 if (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex()))) { 554 dependents.add(leftChild); 555 } 556 leftChild = governor.getLeftDependent(i--); 557 } else { 558 if (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex()))) { 559 dependents.add(rightChild); 560 } 561 rightChild = governor.getRightDependent(j++); 562 } 563 } 564 while (leftChild != null) { 565 if (leftChild != avoid && (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex())))) { 566 dependents.add(leftChild); 567 } 568 leftChild = governor.getLeftDependent(i--); 569 } 570 while (rightChild != null) { 571 if (rightChild != avoid && (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex())))) { 572 dependents.add(rightChild); 573 } 574 rightChild = governor.getRightDependent(j++); 575 } 576 return dependents; 577 } 578 579 private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 580 boolean success = true, childSuccess = false; 581 int i, childAttempts = 2; 582 DependencyNode child, possibleSyntacticHead; 583 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) { 584 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node); 585 if (possibleSyntacticHead != null) { 586 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 587 // addEdge(pdg, possibleSyntacticHead, node, node.getHeadEdge().getLabelSet()); 588 nodeLifted.set(node.getIndex(), false); 589 } else { 590 success = false; 591 } 592 } 593 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) { 594 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node); 595 if (possibleSyntacticHead != null) { 596 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 597 // addEdge(pdg, possibleSyntacticHead, node, node.getHeadEdge().getLabelSet()); 598 nodeLifted.set(node.getIndex(), false); 599 } else { 600 success = false; 601 } 602 } 603 while (!childSuccess && childAttempts > 0) { 604 childSuccess = true; 605 Vector<DependencyNode> children = new Vector<DependencyNode>(); 606 i = 0; 607 while ((child = node.getLeftDependent(i)) != null) { 608 children.add(child); 609 i++; 610 } 611 i = 0; 612 while ((child = node.getRightDependent(i)) != null) { 613 children.add(child); 614 i++; 615 } 616 for (i = 0; i < children.size(); i++) { 617 child = children.get(i); 618 if (!deprojectivizeWithPath(pdg, child)) { 619 childSuccess = false; 620 } 621 } 622 childAttempts--; 623 } 624 return childSuccess && success; 625 } 626 627 private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid) { 628 DependencyNode dependent; 629 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes; 630 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true)); 631 while (nodes.size() > 0) { 632 dependent = nodes.remove(0); 633 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0) { 634 return dependent; 635 } 636 nodes.addAll(newNodes); 637 } 638 return null; 639 } 640 641 private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 642 boolean success = true, childSuccess = false; 643 int i, childAttempts = 2; 644 DependencyNode child, possibleSyntacticHead; 645 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) { 646 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node 647 .getIndex())); 648 if (possibleSyntacticHead != null) { 649 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 650 // addEdge(pdg, possibleSyntacticHead, node, node.getHeadEdge().getLabelSet()); 651 nodeLifted.set(node.getIndex(), false); 652 } else { 653 success = false; 654 } 655 } 656 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) { 657 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node 658 .getIndex())); 659 if (possibleSyntacticHead != null) { 660 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 661 // addEdge(pdg, possibleSyntacticHead, node, node.getHeadEdge().getLabelSet()); 662 nodeLifted.set(node.getIndex(), false); 663 } else { 664 success = false; 665 } 666 } 667 while (!childSuccess && childAttempts > 0) { 668 childSuccess = true; 669 Vector<DependencyNode> children = new Vector<DependencyNode>(); 670 i = 0; 671 while ((child = node.getLeftDependent(i)) != null) { 672 children.add(child); 673 i++; 674 } 675 i = 0; 676 while ((child = node.getRightDependent(i)) != null) { 677 children.add(child); 678 i++; 679 } 680 for (i = 0; i < children.size(); i++) { 681 child = children.get(i); 682 if (!deprojectivizeWithHeadAndPath(pdg, child)) { 683 childSuccess = false; 684 } 685 } 686 childAttempts--; 687 } 688 return childSuccess && success; 689 } 690 691 private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode) 692 throws MaltChainedException { 693 DependencyNode dependent; 694 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes = null, secondChance = new Vector<DependencyNode>(); 695 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true)); 696 while (nodes.size() > 0) { 697 dependent = nodes.remove(0); 698 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0 699 && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) { 700 return dependent; 701 } 702 nodes.addAll(newNodes); 703 if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode) 704 && newNodes.size() != 0) { 705 secondChance.add(dependent); 706 } 707 } 708 if (secondChance.size() > 0) { 709 return secondChance.firstElement(); 710 } 711 return null; 712 } 713 }