001 package org.maltparser.transform.pseudo; 002 003 import java.util.Vector; 004 005 import org.apache.log4j.Logger; 006 import org.maltparser.core.exception.MaltChainedException; 007 import org.maltparser.core.io.dataformat.ColumnDescription; 008 import org.maltparser.core.io.dataformat.DataFormatInstance; 009 import org.maltparser.core.symbol.SymbolTable; 010 import org.maltparser.core.syntaxgraph.DependencyStructure; 011 import org.maltparser.core.syntaxgraph.node.DependencyNode; 012 013 /** 014 * This class contains methods for projectivizing and deprojectivizing 015 * 016 * @author Jens Nilsson 017 * @since 1.0 018 */ 019 public class PseudoProjectivity { 020 static int id = 0; 021 022 private enum PseudoProjectiveEncoding { 023 NONE, BASELINE, HEAD, PATH, HEADPATH, TRACE 024 }; 025 026 private enum CoveredRootAttachment { 027 NONE, IGNORE, LEFT, RIGHT, HEAD 028 }; 029 030 private enum LiftingOrder { 031 SHORTEST, DEEPEST 032 }; 033 034 private PseudoProjectiveEncoding markingStrategy; 035 private CoveredRootAttachment rootAttachment; 036 private LiftingOrder liftingOrder; 037 private Logger configLogger; 038 039 private SymbolTable deprelSymbolTable; 040 private SymbolTable pppathSymbolTable; 041 private SymbolTable ppliftedSymbolTable; 042 private SymbolTable ppcoveredRootSymbolTable; 043 044 private ColumnDescription deprelColumn; 045 private ColumnDescription pppathColumn; 046 private ColumnDescription ppliftedColumn; 047 private ColumnDescription ppcoveredRootColumn; 048 049 private Vector<Boolean> nodeLifted; 050 private Vector<Vector<DependencyNode>> nodeTrace; 051 private Vector<DependencyNode> headDeprel; 052 private Vector<Boolean> nodePath; 053 private Vector<Boolean> isCoveredRoot; 054 private Vector<Integer> nodeRelationLength; 055 private Vector<String> synacticHeadDeprel; 056 057 058 public PseudoProjectivity() { } 059 060 public void initialize(String markingStrategyString, String coveredRoot, String liftingOrder, Logger configLogger, 061 DataFormatInstance dataFormatInstance) throws MaltChainedException { 062 nodeLifted = new Vector<Boolean>(); 063 nodeTrace = new Vector<Vector<DependencyNode>>(); 064 headDeprel = new Vector<DependencyNode>(); 065 nodePath = new Vector<Boolean>(); 066 isCoveredRoot = new Vector<Boolean>(); 067 nodeRelationLength = new Vector<Integer>(); 068 synacticHeadDeprel = new Vector<String>(); 069 070 this.configLogger = configLogger; 071 if (markingStrategyString.equalsIgnoreCase("none")) { 072 markingStrategy = PseudoProjectiveEncoding.NONE; 073 } else if (markingStrategyString.equalsIgnoreCase("baseline")) { 074 markingStrategy = PseudoProjectiveEncoding.BASELINE; 075 } else if (markingStrategyString.equalsIgnoreCase("head")) { 076 markingStrategy = PseudoProjectiveEncoding.HEAD; 077 } else if (markingStrategyString.equalsIgnoreCase("path")) { 078 markingStrategy = PseudoProjectiveEncoding.PATH; 079 } else if (markingStrategyString.equalsIgnoreCase("head+path")) { 080 markingStrategy = PseudoProjectiveEncoding.HEADPATH; 081 } else if (markingStrategyString.equalsIgnoreCase("trace")) { 082 markingStrategy = PseudoProjectiveEncoding.TRACE; 083 } 084 this.deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL"); 085 this.deprelSymbolTable = deprelColumn.getSymbolTable(); 086 // this.deprelSymbolTable = dataFormatInstance.getSymbolTables().getSymbolTable("DEPREL"); 087 if (markingStrategy == PseudoProjectiveEncoding.HEAD || markingStrategy == PseudoProjectiveEncoding.PATH 088 || markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 089 this.ppliftedColumn = dataFormatInstance.addInternalColumnDescription("PPLIFTED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", deprelColumn.getNullValueStrategy()); 090 this.ppliftedSymbolTable = ppliftedColumn.getSymbolTable(); 091 // this.ppliftedSymbolTable = dataFormatInstance.getSymbolTables().addSymbolTable("PPLIFTED", deprelSymbolTable); 092 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) { 093 ppliftedSymbolTable.addSymbol("#true#"); 094 ppliftedSymbolTable.addSymbol("#false#"); 095 } else { 096 ppliftedSymbolTable.addSymbol("#false#"); 097 } 098 } 099 100 if (markingStrategy == PseudoProjectiveEncoding.PATH || markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 101 this.pppathColumn = dataFormatInstance.addInternalColumnDescription("PPPATH", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", deprelColumn.getNullValueStrategy()); 102 this.pppathSymbolTable = pppathColumn.getSymbolTable(); 103 pppathSymbolTable.addSymbol("#true#"); 104 pppathSymbolTable.addSymbol("#false#"); 105 } 106 107 if (coveredRoot.equalsIgnoreCase("none")) { 108 this.rootAttachment = CoveredRootAttachment.NONE; 109 } else if (coveredRoot.equalsIgnoreCase("ignore")) { 110 this.rootAttachment = CoveredRootAttachment.IGNORE; 111 } else if (coveredRoot.equalsIgnoreCase("left")) { 112 this.rootAttachment = CoveredRootAttachment.LEFT; 113 } else if (coveredRoot.equalsIgnoreCase("right")) { 114 this.rootAttachment = CoveredRootAttachment.RIGHT; 115 } else if (coveredRoot.equalsIgnoreCase("head")) { 116 this.rootAttachment = CoveredRootAttachment.HEAD; 117 } 118 119 if (this.rootAttachment != CoveredRootAttachment.NONE) { 120 this.ppcoveredRootColumn = dataFormatInstance.addInternalColumnDescription("PPCOVERED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", deprelColumn.getNullValueStrategy()); 121 this.ppcoveredRootSymbolTable = ppcoveredRootColumn.getSymbolTable(); 122 ppcoveredRootSymbolTable.addSymbol("#true#"); 123 ppcoveredRootSymbolTable.addSymbol("#false#"); 124 } 125 if (liftingOrder.equalsIgnoreCase("shortest")) { 126 this.liftingOrder = LiftingOrder.SHORTEST; 127 } else if (liftingOrder.equalsIgnoreCase("deepest")) { 128 this.liftingOrder = LiftingOrder.DEEPEST; 129 } 130 } 131 132 private void initProjectivization(DependencyStructure pdg) throws MaltChainedException { 133 nodeLifted.clear(); 134 nodeTrace.clear(); 135 headDeprel.clear(); 136 nodePath.clear(); 137 isCoveredRoot.clear(); 138 nodeRelationLength.clear(); 139 140 for (int index : pdg.getDependencyIndices()) { 141 nodeLifted.add(false); 142 nodeTrace.add(new Vector<DependencyNode>()); 143 headDeprel.add(null); 144 nodePath.add(false); 145 isCoveredRoot.add(false); 146 if (ppliftedSymbolTable != null && index != 0) { 147 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#false#")); 148 } 149 if (pppathSymbolTable != null && index != 0) { 150 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#false#")); 151 } 152 if (ppcoveredRootSymbolTable != null && index != 0) { 153 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#false#")); 154 } 155 } 156 computeRelationLength(pdg); 157 } 158 159 public void projectivize(DependencyStructure pdg) throws MaltChainedException { 160 id++; 161 if (!pdg.isTree()) { 162 configLogger.info("\n[Warning: Sentence '" + id + "' cannot projectivize, because the dependency graph is not a tree]\n"); 163 return; 164 } 165 DependencyNode deepestNonProjectiveNode; 166 initProjectivization(pdg); 167 if (rootAttachment == CoveredRootAttachment.IGNORE) { 168 if (markingStrategy != PseudoProjectiveEncoding.NONE) { 169 while (!pdg.isProjective()) { 170 if (liftingOrder == LiftingOrder.DEEPEST) { 171 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg); 172 } else { 173 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg); 174 } 175 if (!attachCoveredRoots(pdg, deepestNonProjectiveNode)) { 176 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true); 177 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead()); 178 setPath(deepestNonProjectiveNode.getHead()); 179 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex()); 180 } 181 } 182 deattachCoveredRootsForProjectivization(pdg); 183 } 184 } else { 185 if (rootAttachment != CoveredRootAttachment.NONE) { 186 for (int index : pdg.getTokenIndices()) { 187 attachCoveredRoots(pdg, pdg.getTokenNode(index)); 188 } 189 } 190 if (markingStrategy != PseudoProjectiveEncoding.NONE) { 191 while (!pdg.isProjective()) { 192 if (liftingOrder == LiftingOrder.DEEPEST) { 193 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg); 194 } else { 195 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg); 196 } 197 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true); 198 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead()); 199 setPath(deepestNonProjectiveNode.getHead()); 200 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex()); 201 } 202 } 203 } 204 // collectTraceStatistics(pdg); 205 assignPseudoProjectiveDeprels(pdg); 206 } 207 208 public void mergeArclabels(DependencyStructure pdg) throws MaltChainedException { 209 assignPseudoProjectiveDeprelsForMerge(pdg); 210 } 211 212 public void splitArclabels(DependencyStructure pdg) throws MaltChainedException { 213 int pathLabelIndex = -1, movedLabelIndex = -1, coveredArcLabelIndex; 214 String label; 215 initDeprojeciviztion(pdg); 216 for (int index : pdg.getTokenIndices()) { 217 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 218 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 219 if (label != null && (pathLabelIndex = label.indexOf("%")) != -1) { 220 label = label.substring(0, pathLabelIndex); 221 setLabel(pdg.getTokenNode(index), label); 222 pdg.getTokenNode(index).getHeadEdge().addLabel(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#true#")); 223 } 224 if (label != null && (movedLabelIndex = label.indexOf("|")) != -1 && label.indexOf("|null") == -1) { 225 if (movedLabelIndex + 1 < label.length()) { 226 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode(label.substring(movedLabelIndex + 1))); 227 } else { 228 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#true#")); 229 } 230 label = label.substring(0, movedLabelIndex); 231 setLabel(pdg.getTokenNode(index), label); 232 } 233 } 234 } 235 for (int index : pdg.getTokenIndices()) { 236 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 237 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 238 if ((coveredArcLabelIndex = label.indexOf("|null")) != -1) { 239 label = label.substring(0, coveredArcLabelIndex); 240 setLabel(pdg.getTokenNode(index), label); 241 pdg.getTokenNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#")); 242 } 243 } 244 } 245 } 246 247 private void setHeadDeprel(DependencyNode node, DependencyNode parent) { 248 if (headDeprel.get(node.getIndex()) == null) { 249 headDeprel.set(node.getIndex(), parent); 250 } 251 nodeTrace.set(node.getIndex(), headDeprel); 252 } 253 254 private void setPath(DependencyNode node) { 255 nodePath.set(node.getIndex(), true); 256 } 257 258 private boolean isCoveredRoot(DependencyNode node) { 259 return isCoveredRoot.get(node.getIndex()); 260 } 261 262 private void deattachCoveredRootsForProjectivization(DependencyStructure pdg) throws MaltChainedException { 263 for (int index : pdg.getTokenIndices()) { 264 if (isCoveredRoot(pdg.getTokenNode(index))) { 265 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getTokenNode(index).getIndex()); 266 } 267 } 268 } 269 270 private boolean attachCoveredRoots(DependencyStructure pdg, DependencyNode deepest) throws MaltChainedException { 271 int i; 272 boolean foundCoveredRoot = false; 273 DependencyNode coveredRootHead; 274 for (i = Math.min(deepest.getIndex(), deepest.getHead().getIndex()) + 1; i < Math.max(deepest.getIndex(), deepest.getHead() 275 .getIndex()); i++) { 276 int leftMostIndex = pdg.getDependencyNode(i).getLeftmostProperDescendantIndex(); 277 if (leftMostIndex == -1) { 278 leftMostIndex = i; 279 } 280 int rightMostIndex = pdg.getDependencyNode(i).getRightmostProperDescendantIndex(); 281 if (rightMostIndex == -1) { 282 rightMostIndex = i; 283 } 284 if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot() 285 && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < leftMostIndex 286 && rightMostIndex < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) { 287 if (rootAttachment == CoveredRootAttachment.LEFT) { 288 if (deepest.getHead().getIndex() < deepest.getIndex()) { 289 coveredRootHead = deepest.getHead(); 290 } else { 291 coveredRootHead = deepest; 292 } 293 } else if (rootAttachment == CoveredRootAttachment.RIGHT) { 294 if (deepest.getIndex() < deepest.getHead().getIndex()) { 295 coveredRootHead = deepest.getHead(); 296 } else { 297 coveredRootHead = deepest; 298 } 299 } else { 300 coveredRootHead = deepest.getHead(); 301 } 302 pdg.moveDependencyEdge(coveredRootHead.getIndex(), pdg.getDependencyNode(i).getIndex()); 303 setCoveredRoot(pdg.getDependencyNode(i)); 304 foundCoveredRoot = true; 305 } 306 } 307 return foundCoveredRoot; 308 } 309 310 private void setCoveredRoot(DependencyNode node) { 311 isCoveredRoot.set(node.getIndex(), true); 312 } 313 314 private DependencyNode getDeepestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException { 315 DependencyNode deepestNonProjectiveNode = null; 316 for (int index : pdg.getDependencyIndices()) { 317 if (!pdg.getDependencyNode(index).isProjective() 318 && (deepestNonProjectiveNode == null 319 || pdg.getDependencyNode(index).getDependencyNodeDepth() > pdg.getDependencyNode(deepestNonProjectiveNode.getIndex()).getDependencyNodeDepth())) { 320 deepestNonProjectiveNode = pdg.getDependencyNode(index); 321 } 322 } 323 324 return deepestNonProjectiveNode; 325 } 326 327 private DependencyNode getShortestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException { 328 DependencyNode shortestNonProjectiveNode = null; 329 for (int index : pdg.getDependencyIndices()) { 330 if (!pdg.getDependencyNode(index).isProjective() 331 && (shortestNonProjectiveNode == null 332 || nodeRelationLength.get(index) < nodeRelationLength.get(shortestNonProjectiveNode.getIndex()) 333 || (nodeRelationLength.get(index) == nodeRelationLength.get(shortestNonProjectiveNode.getIndex())))) { 334 shortestNonProjectiveNode = pdg.getDependencyNode(index); 335 } 336 } 337 return shortestNonProjectiveNode; 338 } 339 340 341 private void computeRelationLength(DependencyStructure pdg) throws MaltChainedException { 342 nodeRelationLength.add(0); 343 for (int index : pdg.getTokenIndices()) { 344 nodeRelationLength.add(Math.abs(pdg.getDependencyNode(index).getIndex() - pdg.getDependencyNode(index).getHead().getIndex())); 345 } 346 } 347 348 private void assignPseudoProjectiveDeprels(DependencyStructure pdg) throws MaltChainedException { 349 int newLabelCode; 350 for (int index : pdg.getTokenIndices()) { 351 if (!isCoveredRoot(pdg.getDependencyNode(index))) { 352 if (this.markingStrategy == PseudoProjectiveEncoding.HEAD || this.markingStrategy == PseudoProjectiveEncoding.PATH 353 || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 354 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) { 355 if (nodeLifted.get(index)) { 356 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#true#"); 357 } else { 358 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#"); 359 } 360 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode); 361 } else { 362 if (nodeLifted.get(index)) { 363 newLabelCode = ppliftedSymbolTable.addSymbol(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode( 364 headDeprel.get(index).getIndex()).getHeadEdge().getLabelCode(deprelSymbolTable))); 365 } else { 366 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#"); 367 } 368 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode); 369 } 370 } 371 372 if (this.markingStrategy == PseudoProjectiveEncoding.PATH || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 373 if (nodePath.get(index)) { 374 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#true#"); 375 } else { 376 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#false#"); 377 } 378 pdg.getDependencyNode(index).getHeadEdge().addLabel(pppathSymbolTable, newLabelCode); 379 } 380 381 } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) { 382 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#")); 383 } 384 } 385 } 386 387 private void setLabel(DependencyNode node, String label) throws MaltChainedException { 388 // node.getLabelCode().clear(); 389 node.getHeadEdge().getLabelSet().put(deprelSymbolTable, deprelSymbolTable.addSymbol(label)); 390 } 391 392 private void assignPseudoProjectiveDeprelsForMerge(DependencyStructure pdg) throws MaltChainedException { 393 Vector<String> originalDeprel = new Vector<String>(); 394 String newLabel; 395 originalDeprel.add(null); 396 for (int index : pdg.getTokenIndices()) { 397 originalDeprel.add(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable))); 398 } 399 for (int index : pdg.getTokenIndices()) { 400 newLabel = null; 401 if (!isCoveredRoot(pdg.getDependencyNode(index))) { 402 if (markingStrategy == PseudoProjectiveEncoding.HEAD) { 403 if (nodeLifted.get(index)) { 404 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 405 + originalDeprel.get(headDeprel.get(index).getIndex()); 406 // } else { 407 // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 408 } 409 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) { 410 if (nodeLifted.get(index) && nodePath.get(index)) { 411 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|%"; 412 } else if (nodeLifted.get(index) && !nodePath.get(index)) { 413 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"; 414 } else if (!nodeLifted.get(index) && nodePath.get(index)) { 415 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "%"; 416 } 417 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 418 if (nodeLifted.get(index) && nodePath.get(index)) { 419 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 420 + originalDeprel.get(headDeprel.get(index).getIndex()) + "%"; 421 } else if (nodeLifted.get(index) && !nodePath.get(index)) { 422 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 423 + originalDeprel.get(headDeprel.get(index).getIndex()); 424 } else if (!nodeLifted.get(index) && nodePath.get(index)) { 425 newLabel = originalDeprel.get(pdg.getDependencyNode(index).getIndex()) + "%"; 426 } 427 } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) { 428 if (nodeLifted.get(index)) { 429 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"; 430 } 431 } 432 } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) { 433 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|null"; 434 } 435 if (newLabel != null) { 436 setLabel(pdg.getDependencyNode(index), newLabel); 437 } 438 } 439 } 440 441 public void deprojectivize(DependencyStructure pdg) throws MaltChainedException { 442 initDeprojeciviztion(pdg); 443 444 for (int index : pdg.getTokenIndices()) { 445 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 446 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(pppathSymbolTable) 447 && pppathSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(pppathSymbolTable)).equals("#true#")) { 448 setPath(pdg.getDependencyNode(index)); 449 } 450 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppliftedSymbolTable) 451 && !ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#false#")) { 452 nodeLifted.set(index, true); 453 if (!ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#true#")) { 454 synacticHeadDeprel.set(index, ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge() 455 .getLabelCode(ppliftedSymbolTable))); 456 } 457 } 458 } 459 } 460 deattachCoveredRootsForDeprojectivization(pdg); 461 if (markingStrategy == PseudoProjectiveEncoding.HEAD && needsDeprojectivizeWithHead(pdg)) { 462 deprojectivizeWithHead(pdg, pdg.getDependencyRoot()); 463 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) { 464 deprojectivizeWithPath(pdg, pdg.getDependencyRoot()); 465 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 466 deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot()); 467 } 468 } 469 470 private void initDeprojeciviztion(DependencyStructure pdg) { 471 nodeLifted.clear(); 472 nodePath.clear(); 473 synacticHeadDeprel.clear(); 474 for (int index : pdg.getDependencyIndices()) { 475 nodeLifted.add(false); 476 nodePath.add(false); 477 synacticHeadDeprel.add(null); 478 } 479 } 480 481 private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg) throws MaltChainedException { 482 for (int index : pdg.getTokenIndices()) { 483 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 484 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppcoveredRootSymbolTable) 485 && ppcoveredRootSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppcoveredRootSymbolTable)).equals( 486 "#true#")) { 487 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex()); 488 } 489 } 490 } 491 } 492 493 // Check whether there is at least one node in the specified dependency structure that can be lifted. 494 // If this is not the case, there is no need to call deprojectivizeWithHead. 495 496 private boolean needsDeprojectivizeWithHead(DependencyStructure pdg) throws MaltChainedException { 497 for (int index : pdg.getDependencyIndices()) { 498 if (nodeLifted.get(index)) { 499 DependencyNode node = pdg.getDependencyNode(index); 500 if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel.get(index)) != null) { 501 return true; 502 } 503 } 504 } 505 return false; 506 } 507 508 private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 509 boolean success = true, childSuccess = false; 510 int i, childAttempts = 2; 511 DependencyNode child, possibleSyntacticHead; 512 String syntacticHeadDeprel; 513 if (nodeLifted.get(node.getIndex())) { 514 syntacticHeadDeprel = synacticHeadDeprel.get(node.getIndex()); 515 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel); 516 if (possibleSyntacticHead != null) { 517 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 518 nodeLifted.set(node.getIndex(), false); 519 } else { 520 success = false; 521 } 522 } 523 while (!childSuccess && childAttempts > 0) { 524 childSuccess = true; 525 Vector<DependencyNode> children = new Vector<DependencyNode>(); 526 i = 0; 527 while ((child = node.getLeftDependent(i)) != null) { 528 children.add(child); 529 i++; 530 } 531 i = 0; 532 while ((child = node.getRightDependent(i)) != null) { 533 children.add(child); 534 i++; 535 } 536 for (i = 0; i < children.size(); i++) { 537 child = children.get(i); 538 if (!deprojectivizeWithHead(pdg, child)) { 539 childSuccess = false; 540 } 541 } 542 childAttempts--; 543 } 544 return childSuccess && success; 545 } 546 547 private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel) 548 throws MaltChainedException { 549 DependencyNode dependent; 550 String dependentDeprel; 551 Vector<DependencyNode> nodes = new Vector<DependencyNode>(); 552 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false)); 553 while (nodes.size() > 0) { 554 dependent = nodes.remove(0); 555 if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) { 556 dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)); 557 if (dependentDeprel.equals(syntacticHeadDeprel)) { 558 return dependent; 559 } 560 } 561 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false)); 562 } 563 return null; 564 } 565 566 private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid, 567 boolean percentOnly) { 568 int i, j; 569 Vector<DependencyNode> dependents = new Vector<DependencyNode>(); 570 DependencyNode leftChild, rightChild; 571 572 i = governor.getLeftDependentCount() - 1; 573 j = 0; 574 leftChild = governor.getLeftDependent(i--); 575 rightChild = governor.getRightDependent(j++); 576 577 while (leftChild != null && rightChild != null) { 578 if (leftChild == avoid) { 579 leftChild = governor.getLeftDependent(i--); 580 } else if (rightChild == avoid) { 581 rightChild = governor.getRightDependent(j++); 582 } else if (Math.abs(leftChild.getIndex() - avoid.getIndex()) < Math.abs(rightChild.getIndex() - avoid.getIndex())) { 583 if (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex()))) { 584 dependents.add(leftChild); 585 } 586 leftChild = governor.getLeftDependent(i--); 587 } else { 588 if (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex()))) { 589 dependents.add(rightChild); 590 } 591 rightChild = governor.getRightDependent(j++); 592 } 593 } 594 while (leftChild != null) { 595 if (leftChild != avoid && (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex())))) { 596 dependents.add(leftChild); 597 } 598 leftChild = governor.getLeftDependent(i--); 599 } 600 while (rightChild != null) { 601 if (rightChild != avoid && (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex())))) { 602 dependents.add(rightChild); 603 } 604 rightChild = governor.getRightDependent(j++); 605 } 606 return dependents; 607 } 608 609 private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 610 boolean success = true, childSuccess = false; 611 int i, childAttempts = 2; 612 DependencyNode child, possibleSyntacticHead; 613 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) { 614 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node); 615 if (possibleSyntacticHead != null) { 616 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 617 nodeLifted.set(node.getIndex(), false); 618 } else { 619 success = false; 620 } 621 } 622 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) { 623 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node); 624 if (possibleSyntacticHead != null) { 625 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 626 nodeLifted.set(node.getIndex(), false); 627 } else { 628 success = false; 629 } 630 } 631 while (!childSuccess && childAttempts > 0) { 632 childSuccess = true; 633 Vector<DependencyNode> children = new Vector<DependencyNode>(); 634 i = 0; 635 while ((child = node.getLeftDependent(i)) != null) { 636 children.add(child); 637 i++; 638 } 639 i = 0; 640 while ((child = node.getRightDependent(i)) != null) { 641 children.add(child); 642 i++; 643 } 644 for (i = 0; i < children.size(); i++) { 645 child = children.get(i); 646 if (!deprojectivizeWithPath(pdg, child)) { 647 childSuccess = false; 648 } 649 } 650 childAttempts--; 651 } 652 return childSuccess && success; 653 } 654 655 private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid) { 656 DependencyNode dependent; 657 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes; 658 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true)); 659 while (nodes.size() > 0) { 660 dependent = nodes.remove(0); 661 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0) { 662 return dependent; 663 } 664 nodes.addAll(newNodes); 665 } 666 return null; 667 } 668 669 private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 670 boolean success = true, childSuccess = false; 671 int i, childAttempts = 2; 672 DependencyNode child, possibleSyntacticHead; 673 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) { 674 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node 675 .getIndex())); 676 if (possibleSyntacticHead != null) { 677 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 678 nodeLifted.set(node.getIndex(), false); 679 } else { 680 success = false; 681 } 682 } 683 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) { 684 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node 685 .getIndex())); 686 if (possibleSyntacticHead != null) { 687 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 688 nodeLifted.set(node.getIndex(), false); 689 } else { 690 success = false; 691 } 692 } 693 while (!childSuccess && childAttempts > 0) { 694 childSuccess = true; 695 Vector<DependencyNode> children = new Vector<DependencyNode>(); 696 i = 0; 697 while ((child = node.getLeftDependent(i)) != null) { 698 children.add(child); 699 i++; 700 } 701 i = 0; 702 while ((child = node.getRightDependent(i)) != null) { 703 children.add(child); 704 i++; 705 } 706 for (i = 0; i < children.size(); i++) { 707 child = children.get(i); 708 if (!deprojectivizeWithHeadAndPath(pdg, child)) { 709 childSuccess = false; 710 } 711 } 712 childAttempts--; 713 } 714 return childSuccess && success; 715 } 716 717 private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode) 718 throws MaltChainedException { 719 DependencyNode dependent; 720 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes = null, secondChance = new Vector<DependencyNode>(); 721 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true)); 722 while (nodes.size() > 0) { 723 dependent = nodes.remove(0); 724 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0 725 && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) { 726 return dependent; 727 } 728 nodes.addAll(newNodes); 729 if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode) 730 && newNodes.size() != 0) { 731 secondChance.add(dependent); 732 } 733 } 734 if (secondChance.size() > 0) { 735 return secondChance.firstElement(); 736 } 737 return null; 738 } 739 }