001 package org.maltparser.transform.pseudo; 002 003 import java.util.Vector; 004 005 import org.apache.log4j.Logger; 006 import org.maltparser.core.exception.MaltChainedException; 007 import org.maltparser.core.io.dataformat.ColumnDescription; 008 import org.maltparser.core.io.dataformat.DataFormatInstance; 009 import org.maltparser.core.symbol.SymbolTable; 010 import org.maltparser.core.syntaxgraph.DependencyStructure; 011 import org.maltparser.core.syntaxgraph.node.DependencyNode; 012 013 /** 014 * This class contains methods for projectivizing and deprojectivizing 015 * 016 * @author Jens Nilsson 017 */ 018 public class PseudoProjectivity { 019 static int id = 0; 020 021 private enum PseudoProjectiveEncoding { 022 NONE, BASELINE, HEAD, PATH, HEADPATH, TRACE 023 }; 024 025 private enum CoveredRootAttachment { 026 NONE, IGNORE, LEFT, RIGHT, HEAD 027 }; 028 029 private enum LiftingOrder { 030 SHORTEST, DEEPEST 031 }; 032 033 private PseudoProjectiveEncoding markingStrategy; 034 private CoveredRootAttachment rootAttachment; 035 private LiftingOrder liftingOrder; 036 private Logger configLogger; 037 038 private SymbolTable deprelSymbolTable; 039 private SymbolTable pppathSymbolTable; 040 private SymbolTable ppliftedSymbolTable; 041 private SymbolTable ppcoveredRootSymbolTable; 042 043 private ColumnDescription deprelColumn; 044 private ColumnDescription pppathColumn; 045 private ColumnDescription ppliftedColumn; 046 private ColumnDescription ppcoveredRootColumn; 047 048 private Vector<Boolean> nodeLifted; 049 private Vector<Vector<DependencyNode>> nodeTrace; 050 private Vector<DependencyNode> headDeprel; 051 private Vector<Boolean> nodePath; 052 private Vector<Boolean> isCoveredRoot; 053 private Vector<Integer> nodeRelationLength; 054 private Vector<String> synacticHeadDeprel; 055 056 057 public PseudoProjectivity() { } 058 059 public void initialize(String markingStrategyString, String coveredRoot, String liftingOrder, Logger configLogger, 060 DataFormatInstance dataFormatInstance) throws MaltChainedException { 061 nodeLifted = new Vector<Boolean>(); 062 nodeTrace = new Vector<Vector<DependencyNode>>(); 063 headDeprel = new Vector<DependencyNode>(); 064 nodePath = new Vector<Boolean>(); 065 isCoveredRoot = new Vector<Boolean>(); 066 nodeRelationLength = new Vector<Integer>(); 067 synacticHeadDeprel = new Vector<String>(); 068 069 this.configLogger = configLogger; 070 if (markingStrategyString.equalsIgnoreCase("none")) { 071 markingStrategy = PseudoProjectiveEncoding.NONE; 072 } else if (markingStrategyString.equalsIgnoreCase("baseline")) { 073 markingStrategy = PseudoProjectiveEncoding.BASELINE; 074 } else if (markingStrategyString.equalsIgnoreCase("head")) { 075 markingStrategy = PseudoProjectiveEncoding.HEAD; 076 } else if (markingStrategyString.equalsIgnoreCase("path")) { 077 markingStrategy = PseudoProjectiveEncoding.PATH; 078 } else if (markingStrategyString.equalsIgnoreCase("head+path")) { 079 markingStrategy = PseudoProjectiveEncoding.HEADPATH; 080 } else if (markingStrategyString.equalsIgnoreCase("trace")) { 081 markingStrategy = PseudoProjectiveEncoding.TRACE; 082 } 083 this.deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL"); 084 this.deprelSymbolTable = deprelColumn.getSymbolTable(); 085 // this.deprelSymbolTable = dataFormatInstance.getSymbolTables().getSymbolTable("DEPREL"); 086 if (markingStrategy == PseudoProjectiveEncoding.HEAD || markingStrategy == PseudoProjectiveEncoding.PATH 087 || markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 088 this.ppliftedColumn = dataFormatInstance.addInternalColumnDescription("PPLIFTED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy()); 089 this.ppliftedSymbolTable = ppliftedColumn.getSymbolTable(); 090 // this.ppliftedSymbolTable = dataFormatInstance.getSymbolTables().addSymbolTable("PPLIFTED", deprelSymbolTable); 091 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) { 092 ppliftedSymbolTable.addSymbol("#true#"); 093 ppliftedSymbolTable.addSymbol("#false#"); 094 } else { 095 ppliftedSymbolTable.addSymbol("#false#"); 096 } 097 } 098 099 if (markingStrategy == PseudoProjectiveEncoding.PATH || markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 100 this.pppathColumn = dataFormatInstance.addInternalColumnDescription("PPPATH", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy()); 101 this.pppathSymbolTable = pppathColumn.getSymbolTable(); 102 pppathSymbolTable.addSymbol("#true#"); 103 pppathSymbolTable.addSymbol("#false#"); 104 } 105 106 if (coveredRoot.equalsIgnoreCase("none")) { 107 this.rootAttachment = CoveredRootAttachment.NONE; 108 } else if (coveredRoot.equalsIgnoreCase("ignore")) { 109 this.rootAttachment = CoveredRootAttachment.IGNORE; 110 } else if (coveredRoot.equalsIgnoreCase("left")) { 111 this.rootAttachment = CoveredRootAttachment.LEFT; 112 } else if (coveredRoot.equalsIgnoreCase("right")) { 113 this.rootAttachment = CoveredRootAttachment.RIGHT; 114 } else if (coveredRoot.equalsIgnoreCase("head")) { 115 this.rootAttachment = CoveredRootAttachment.HEAD; 116 } 117 118 if (this.rootAttachment != CoveredRootAttachment.NONE) { 119 this.ppcoveredRootColumn = dataFormatInstance.addInternalColumnDescription("PPCOVERED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy()); 120 this.ppcoveredRootSymbolTable = ppcoveredRootColumn.getSymbolTable(); 121 ppcoveredRootSymbolTable.addSymbol("#true#"); 122 ppcoveredRootSymbolTable.addSymbol("#false#"); 123 } 124 if (liftingOrder.equalsIgnoreCase("shortest")) { 125 this.liftingOrder = LiftingOrder.SHORTEST; 126 } else if (liftingOrder.equalsIgnoreCase("deepest")) { 127 this.liftingOrder = LiftingOrder.DEEPEST; 128 } 129 } 130 131 private void initProjectivization(DependencyStructure pdg) throws MaltChainedException { 132 nodeLifted.clear(); 133 nodeTrace.clear(); 134 headDeprel.clear(); 135 nodePath.clear(); 136 isCoveredRoot.clear(); 137 nodeRelationLength.clear(); 138 139 for (int index : pdg.getDependencyIndices()) { 140 nodeLifted.add(false); 141 nodeTrace.add(new Vector<DependencyNode>()); 142 headDeprel.add(null); 143 nodePath.add(false); 144 isCoveredRoot.add(false); 145 if (ppliftedSymbolTable != null && index != 0) { 146 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#false#")); 147 } 148 if (pppathSymbolTable != null && index != 0) { 149 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#false#")); 150 } 151 if (ppcoveredRootSymbolTable != null && index != 0) { 152 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#false#")); 153 } 154 } 155 computeRelationLength(pdg); 156 } 157 158 public void projectivize(DependencyStructure pdg) throws MaltChainedException { 159 id++; 160 if (!pdg.isTree()) { 161 configLogger.info("\n[Warning: Sentence '" + id + "' cannot projectivize, because the dependency graph is not a tree]\n"); 162 return; 163 } 164 DependencyNode deepestNonProjectiveNode; 165 initProjectivization(pdg); 166 if (rootAttachment == CoveredRootAttachment.IGNORE) { 167 if (markingStrategy != PseudoProjectiveEncoding.NONE) { 168 while (!pdg.isProjective()) { 169 if (liftingOrder == LiftingOrder.DEEPEST) { 170 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg); 171 } else { 172 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg); 173 } 174 if (!attachCoveredRoots(pdg, deepestNonProjectiveNode)) { 175 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true); 176 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead()); 177 setPath(deepestNonProjectiveNode.getHead()); 178 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex()); 179 } 180 } 181 deattachCoveredRootsForProjectivization(pdg); 182 } 183 } else { 184 if (rootAttachment != CoveredRootAttachment.NONE) { 185 for (int index : pdg.getTokenIndices()) { 186 attachCoveredRoots(pdg, pdg.getTokenNode(index)); 187 } 188 } 189 if (markingStrategy != PseudoProjectiveEncoding.NONE) { 190 while (!pdg.isProjective()) { 191 if (liftingOrder == LiftingOrder.DEEPEST) { 192 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg); 193 } else { 194 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg); 195 } 196 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true); 197 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead()); 198 setPath(deepestNonProjectiveNode.getHead()); 199 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex()); 200 } 201 } 202 } 203 // collectTraceStatistics(pdg); 204 assignPseudoProjectiveDeprels(pdg); 205 } 206 207 public void mergeArclabels(DependencyStructure pdg) throws MaltChainedException { 208 assignPseudoProjectiveDeprelsForMerge(pdg); 209 } 210 211 public void splitArclabels(DependencyStructure pdg) throws MaltChainedException { 212 int pathLabelIndex = -1, movedLabelIndex = -1, coveredArcLabelIndex; 213 String label; 214 initDeprojeciviztion(pdg); 215 for (int index : pdg.getTokenIndices()) { 216 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 217 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 218 if (label != null && (pathLabelIndex = label.indexOf("%")) != -1) { 219 label = label.substring(0, pathLabelIndex); 220 setLabel(pdg.getTokenNode(index), label); 221 pdg.getTokenNode(index).getHeadEdge().addLabel(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#true#")); 222 } 223 if (label != null && (movedLabelIndex = label.indexOf("|")) != -1 && label.indexOf("|null") == -1) { 224 if (movedLabelIndex + 1 < label.length()) { 225 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode(label.substring(movedLabelIndex + 1))); 226 } else { 227 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#true#")); 228 } 229 label = label.substring(0, movedLabelIndex); 230 setLabel(pdg.getTokenNode(index), label); 231 } 232 } 233 } 234 for (int index : pdg.getTokenIndices()) { 235 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 236 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 237 if ((coveredArcLabelIndex = label.indexOf("|null")) != -1) { 238 label = label.substring(0, coveredArcLabelIndex); 239 setLabel(pdg.getTokenNode(index), label); 240 pdg.getTokenNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#")); 241 } 242 } 243 } 244 } 245 246 private void setHeadDeprel(DependencyNode node, DependencyNode parent) { 247 if (headDeprel.get(node.getIndex()) == null) { 248 headDeprel.set(node.getIndex(), parent); 249 } 250 nodeTrace.set(node.getIndex(), headDeprel); 251 } 252 253 private void setPath(DependencyNode node) { 254 nodePath.set(node.getIndex(), true); 255 } 256 257 private boolean isCoveredRoot(DependencyNode node) { 258 return isCoveredRoot.get(node.getIndex()); 259 } 260 261 private void deattachCoveredRootsForProjectivization(DependencyStructure pdg) throws MaltChainedException { 262 for (int index : pdg.getTokenIndices()) { 263 if (isCoveredRoot(pdg.getTokenNode(index))) { 264 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getTokenNode(index).getIndex()); 265 } 266 } 267 } 268 269 private boolean attachCoveredRoots(DependencyStructure pdg, DependencyNode deepest) throws MaltChainedException { 270 int i; 271 boolean foundCoveredRoot = false; 272 DependencyNode coveredRootHead; 273 for (i = Math.min(deepest.getIndex(), deepest.getHead().getIndex()) + 1; i < Math.max(deepest.getIndex(), deepest.getHead() 274 .getIndex()); i++) { 275 int leftMostIndex = pdg.getDependencyNode(i).getLeftmostProperDescendantIndex(); 276 if (leftMostIndex == -1) { 277 leftMostIndex = i; 278 } 279 int rightMostIndex = pdg.getDependencyNode(i).getRightmostProperDescendantIndex(); 280 if (rightMostIndex == -1) { 281 rightMostIndex = i; 282 } 283 if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot() 284 && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < leftMostIndex 285 && rightMostIndex < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) { 286 if (rootAttachment == CoveredRootAttachment.LEFT) { 287 if (deepest.getHead().getIndex() < deepest.getIndex()) { 288 coveredRootHead = deepest.getHead(); 289 } else { 290 coveredRootHead = deepest; 291 } 292 } else if (rootAttachment == CoveredRootAttachment.RIGHT) { 293 if (deepest.getIndex() < deepest.getHead().getIndex()) { 294 coveredRootHead = deepest.getHead(); 295 } else { 296 coveredRootHead = deepest; 297 } 298 } else { 299 coveredRootHead = deepest.getHead(); 300 } 301 pdg.moveDependencyEdge(coveredRootHead.getIndex(), pdg.getDependencyNode(i).getIndex()); 302 setCoveredRoot(pdg.getDependencyNode(i)); 303 foundCoveredRoot = true; 304 } 305 } 306 return foundCoveredRoot; 307 } 308 309 private void setCoveredRoot(DependencyNode node) { 310 isCoveredRoot.set(node.getIndex(), true); 311 } 312 313 private DependencyNode getDeepestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException { 314 DependencyNode deepestNonProjectiveNode = null; 315 for (int index : pdg.getDependencyIndices()) { 316 if (!pdg.getDependencyNode(index).isProjective() 317 && (deepestNonProjectiveNode == null 318 || pdg.getDependencyNode(index).getDependencyNodeDepth() > pdg.getDependencyNode(deepestNonProjectiveNode.getIndex()).getDependencyNodeDepth())) { 319 deepestNonProjectiveNode = pdg.getDependencyNode(index); 320 } 321 } 322 323 return deepestNonProjectiveNode; 324 } 325 326 private DependencyNode getShortestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException { 327 DependencyNode shortestNonProjectiveNode = null; 328 for (int index : pdg.getDependencyIndices()) { 329 if (!pdg.getDependencyNode(index).isProjective() 330 && (shortestNonProjectiveNode == null 331 || nodeRelationLength.get(index) < nodeRelationLength.get(shortestNonProjectiveNode.getIndex()) 332 || (nodeRelationLength.get(index) == nodeRelationLength.get(shortestNonProjectiveNode.getIndex())))) { 333 shortestNonProjectiveNode = pdg.getDependencyNode(index); 334 } 335 } 336 return shortestNonProjectiveNode; 337 } 338 339 340 private void computeRelationLength(DependencyStructure pdg) throws MaltChainedException { 341 nodeRelationLength.add(0); 342 for (int index : pdg.getTokenIndices()) { 343 nodeRelationLength.add(Math.abs(pdg.getDependencyNode(index).getIndex() - pdg.getDependencyNode(index).getHead().getIndex())); 344 } 345 } 346 347 private void assignPseudoProjectiveDeprels(DependencyStructure pdg) throws MaltChainedException { 348 int newLabelCode; 349 for (int index : pdg.getTokenIndices()) { 350 if (!isCoveredRoot(pdg.getDependencyNode(index))) { 351 if (this.markingStrategy == PseudoProjectiveEncoding.HEAD || this.markingStrategy == PseudoProjectiveEncoding.PATH 352 || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 353 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) { 354 if (nodeLifted.get(index)) { 355 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#true#"); 356 } else { 357 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#"); 358 } 359 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode); 360 } else { 361 if (nodeLifted.get(index)) { 362 newLabelCode = ppliftedSymbolTable.addSymbol(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode( 363 headDeprel.get(index).getIndex()).getHeadEdge().getLabelCode(deprelSymbolTable))); 364 } else { 365 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#"); 366 } 367 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode); 368 } 369 } 370 371 if (this.markingStrategy == PseudoProjectiveEncoding.PATH || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 372 if (nodePath.get(index)) { 373 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#true#"); 374 } else { 375 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#false#"); 376 } 377 pdg.getDependencyNode(index).getHeadEdge().addLabel(pppathSymbolTable, newLabelCode); 378 } 379 380 } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) { 381 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#")); 382 } 383 } 384 } 385 386 private void setLabel(DependencyNode node, String label) throws MaltChainedException { 387 // node.getLabelCode().clear(); 388 node.getHeadEdge().getLabelSet().put(deprelSymbolTable, deprelSymbolTable.addSymbol(label)); 389 } 390 391 private void assignPseudoProjectiveDeprelsForMerge(DependencyStructure pdg) throws MaltChainedException { 392 Vector<String> originalDeprel = new Vector<String>(); 393 String newLabel; 394 originalDeprel.add(null); 395 for (int index : pdg.getTokenIndices()) { 396 originalDeprel.add(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable))); 397 } 398 for (int index : pdg.getTokenIndices()) { 399 newLabel = null; 400 if (!isCoveredRoot(pdg.getDependencyNode(index))) { 401 if (markingStrategy == PseudoProjectiveEncoding.HEAD) { 402 if (nodeLifted.get(index)) { 403 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 404 + originalDeprel.get(headDeprel.get(index).getIndex()); 405 // } else { 406 // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 407 } 408 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) { 409 if (nodeLifted.get(index) && nodePath.get(index)) { 410 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|%"; 411 } else if (nodeLifted.get(index) && !nodePath.get(index)) { 412 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"; 413 } else if (!nodeLifted.get(index) && nodePath.get(index)) { 414 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "%"; 415 } 416 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 417 if (nodeLifted.get(index) && nodePath.get(index)) { 418 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 419 + originalDeprel.get(headDeprel.get(index).getIndex()) + "%"; 420 } else if (nodeLifted.get(index) && !nodePath.get(index)) { 421 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 422 + originalDeprel.get(headDeprel.get(index).getIndex()); 423 } else if (!nodeLifted.get(index) && nodePath.get(index)) { 424 newLabel = originalDeprel.get(pdg.getDependencyNode(index).getIndex()) + "%"; 425 } 426 } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) { 427 if (nodeLifted.get(index)) { 428 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"; 429 } 430 } 431 } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) { 432 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|null"; 433 } 434 if (newLabel != null) { 435 setLabel(pdg.getDependencyNode(index), newLabel); 436 } 437 } 438 } 439 440 public void deprojectivize(DependencyStructure pdg) throws MaltChainedException { 441 initDeprojeciviztion(pdg); 442 443 for (int index : pdg.getTokenIndices()) { 444 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 445 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(pppathSymbolTable) 446 && pppathSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(pppathSymbolTable)).equals("#true#")) { 447 setPath(pdg.getDependencyNode(index)); 448 } 449 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppliftedSymbolTable) 450 && !ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#false#")) { 451 nodeLifted.set(index, true); 452 if (!ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#true#")) { 453 synacticHeadDeprel.set(index, ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge() 454 .getLabelCode(ppliftedSymbolTable))); 455 } 456 } 457 } 458 } 459 deattachCoveredRootsForDeprojectivization(pdg); 460 if (markingStrategy == PseudoProjectiveEncoding.HEAD && needsDeprojectivizeWithHead(pdg)) { 461 deprojectivizeWithHead(pdg, pdg.getDependencyRoot()); 462 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) { 463 deprojectivizeWithPath(pdg, pdg.getDependencyRoot()); 464 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 465 deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot()); 466 } 467 } 468 469 private void initDeprojeciviztion(DependencyStructure pdg) { 470 nodeLifted.clear(); 471 nodePath.clear(); 472 synacticHeadDeprel.clear(); 473 for (int index : pdg.getDependencyIndices()) { 474 nodeLifted.add(false); 475 nodePath.add(false); 476 synacticHeadDeprel.add(null); 477 } 478 } 479 480 private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg) throws MaltChainedException { 481 for (int index : pdg.getTokenIndices()) { 482 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 483 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppcoveredRootSymbolTable) 484 && ppcoveredRootSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppcoveredRootSymbolTable)).equals( 485 "#true#")) { 486 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex()); 487 } 488 } 489 } 490 } 491 492 // Check whether there is at least one node in the specified dependency structure that can be lifted. 493 // If this is not the case, there is no need to call deprojectivizeWithHead. 494 495 private boolean needsDeprojectivizeWithHead(DependencyStructure pdg) throws MaltChainedException { 496 for (int index : pdg.getDependencyIndices()) { 497 if (nodeLifted.get(index)) { 498 DependencyNode node = pdg.getDependencyNode(index); 499 if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel.get(index)) != null) { 500 return true; 501 } 502 } 503 } 504 return false; 505 } 506 507 private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 508 boolean success = true, childSuccess = false; 509 int i, childAttempts = 2; 510 DependencyNode child, possibleSyntacticHead; 511 String syntacticHeadDeprel; 512 if (nodeLifted.get(node.getIndex())) { 513 syntacticHeadDeprel = synacticHeadDeprel.get(node.getIndex()); 514 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel); 515 if (possibleSyntacticHead != null) { 516 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 517 nodeLifted.set(node.getIndex(), false); 518 } else { 519 success = false; 520 } 521 } 522 while (!childSuccess && childAttempts > 0) { 523 childSuccess = true; 524 Vector<DependencyNode> children = new Vector<DependencyNode>(); 525 i = 0; 526 while ((child = node.getLeftDependent(i)) != null) { 527 children.add(child); 528 i++; 529 } 530 i = 0; 531 while ((child = node.getRightDependent(i)) != null) { 532 children.add(child); 533 i++; 534 } 535 for (i = 0; i < children.size(); i++) { 536 child = children.get(i); 537 if (!deprojectivizeWithHead(pdg, child)) { 538 childSuccess = false; 539 } 540 } 541 childAttempts--; 542 } 543 return childSuccess && success; 544 } 545 546 private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel) 547 throws MaltChainedException { 548 DependencyNode dependent; 549 String dependentDeprel; 550 Vector<DependencyNode> nodes = new Vector<DependencyNode>(); 551 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false)); 552 while (nodes.size() > 0) { 553 dependent = nodes.remove(0); 554 if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) { 555 dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)); 556 if (dependentDeprel.equals(syntacticHeadDeprel)) { 557 return dependent; 558 } 559 } 560 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false)); 561 } 562 return null; 563 } 564 565 private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid, 566 boolean percentOnly) { 567 int i, j; 568 Vector<DependencyNode> dependents = new Vector<DependencyNode>(); 569 DependencyNode leftChild, rightChild; 570 571 i = governor.getLeftDependentCount() - 1; 572 j = 0; 573 leftChild = governor.getLeftDependent(i--); 574 rightChild = governor.getRightDependent(j++); 575 576 while (leftChild != null && rightChild != null) { 577 if (leftChild == avoid) { 578 leftChild = governor.getLeftDependent(i--); 579 } else if (rightChild == avoid) { 580 rightChild = governor.getRightDependent(j++); 581 } else if (Math.abs(leftChild.getIndex() - avoid.getIndex()) < Math.abs(rightChild.getIndex() - avoid.getIndex())) { 582 if (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex()))) { 583 dependents.add(leftChild); 584 } 585 leftChild = governor.getLeftDependent(i--); 586 } else { 587 if (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex()))) { 588 dependents.add(rightChild); 589 } 590 rightChild = governor.getRightDependent(j++); 591 } 592 } 593 while (leftChild != null) { 594 if (leftChild != avoid && (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex())))) { 595 dependents.add(leftChild); 596 } 597 leftChild = governor.getLeftDependent(i--); 598 } 599 while (rightChild != null) { 600 if (rightChild != avoid && (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex())))) { 601 dependents.add(rightChild); 602 } 603 rightChild = governor.getRightDependent(j++); 604 } 605 return dependents; 606 } 607 608 private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 609 boolean success = true, childSuccess = false; 610 int i, childAttempts = 2; 611 DependencyNode child, possibleSyntacticHead; 612 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) { 613 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node); 614 if (possibleSyntacticHead != null) { 615 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 616 nodeLifted.set(node.getIndex(), false); 617 } else { 618 success = false; 619 } 620 } 621 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) { 622 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node); 623 if (possibleSyntacticHead != null) { 624 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 625 nodeLifted.set(node.getIndex(), false); 626 } else { 627 success = false; 628 } 629 } 630 while (!childSuccess && childAttempts > 0) { 631 childSuccess = true; 632 Vector<DependencyNode> children = new Vector<DependencyNode>(); 633 i = 0; 634 while ((child = node.getLeftDependent(i)) != null) { 635 children.add(child); 636 i++; 637 } 638 i = 0; 639 while ((child = node.getRightDependent(i)) != null) { 640 children.add(child); 641 i++; 642 } 643 for (i = 0; i < children.size(); i++) { 644 child = children.get(i); 645 if (!deprojectivizeWithPath(pdg, child)) { 646 childSuccess = false; 647 } 648 } 649 childAttempts--; 650 } 651 return childSuccess && success; 652 } 653 654 private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid) { 655 DependencyNode dependent; 656 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes; 657 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true)); 658 while (nodes.size() > 0) { 659 dependent = nodes.remove(0); 660 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0) { 661 return dependent; 662 } 663 nodes.addAll(newNodes); 664 } 665 return null; 666 } 667 668 private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 669 boolean success = true, childSuccess = false; 670 int i, childAttempts = 2; 671 DependencyNode child, possibleSyntacticHead; 672 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) { 673 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node 674 .getIndex())); 675 if (possibleSyntacticHead != null) { 676 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 677 nodeLifted.set(node.getIndex(), false); 678 } else { 679 success = false; 680 } 681 } 682 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) { 683 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node 684 .getIndex())); 685 if (possibleSyntacticHead != null) { 686 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 687 nodeLifted.set(node.getIndex(), false); 688 } else { 689 success = false; 690 } 691 } 692 while (!childSuccess && childAttempts > 0) { 693 childSuccess = true; 694 Vector<DependencyNode> children = new Vector<DependencyNode>(); 695 i = 0; 696 while ((child = node.getLeftDependent(i)) != null) { 697 children.add(child); 698 i++; 699 } 700 i = 0; 701 while ((child = node.getRightDependent(i)) != null) { 702 children.add(child); 703 i++; 704 } 705 for (i = 0; i < children.size(); i++) { 706 child = children.get(i); 707 if (!deprojectivizeWithHeadAndPath(pdg, child)) { 708 childSuccess = false; 709 } 710 } 711 childAttempts--; 712 } 713 return childSuccess && success; 714 } 715 716 private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode) 717 throws MaltChainedException { 718 DependencyNode dependent; 719 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes = null, secondChance = new Vector<DependencyNode>(); 720 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true)); 721 while (nodes.size() > 0) { 722 dependent = nodes.remove(0); 723 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0 724 && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) { 725 return dependent; 726 } 727 nodes.addAll(newNodes); 728 if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode) 729 && newNodes.size() != 0) { 730 secondChance.add(dependent); 731 } 732 } 733 if (secondChance.size() > 0) { 734 return secondChance.firstElement(); 735 } 736 return null; 737 } 738 }