001package org.maltparser.transform.pseudo; 002 003import java.util.SortedSet; 004import java.util.TreeSet; 005import java.util.Vector; 006 007import org.apache.log4j.Logger; 008import org.maltparser.core.exception.MaltChainedException; 009import org.maltparser.core.io.dataformat.ColumnDescription; 010import org.maltparser.core.io.dataformat.DataFormatInstance; 011import org.maltparser.core.symbol.SymbolTable; 012import org.maltparser.core.symbol.SymbolTableHandler; 013import org.maltparser.core.syntaxgraph.DependencyStructure; 014import org.maltparser.core.syntaxgraph.node.DependencyNode; 015 016/** 017 * This class contains methods for projectivizing and deprojectivizing 018 * 019 * @author Jens Nilsson 020 */ 021public class PseudoProjectivity { 022 static int id = 0; 023 024 private enum PseudoProjectiveEncoding { 025 NONE, BASELINE, HEAD, PATH, HEADPATH, TRACE 026 }; 027 028 private enum CoveredRootAttachment { 029 NONE, IGNORE, LEFT, RIGHT, HEAD 030 }; 031 032 private enum LiftingOrder { 033 SHORTEST, DEEPEST 034 }; 035 036 private PseudoProjectiveEncoding markingStrategy; 037 private CoveredRootAttachment rootAttachment; 038 private LiftingOrder liftingOrder; 039 private Logger configLogger; 040 041 private SymbolTable deprelSymbolTable; 042 private SymbolTable pppathSymbolTable; 043 private SymbolTable ppliftedSymbolTable; 044 private SymbolTable ppcoveredRootSymbolTable; 045 046 private ColumnDescription deprelColumn; 047 private ColumnDescription pppathColumn; 048 private ColumnDescription ppliftedColumn; 049 private ColumnDescription ppcoveredRootColumn; 050 051 private Vector<Boolean> nodeLifted; 052 private Vector<Vector<DependencyNode>> nodeTrace; 053 private Vector<DependencyNode> headDeprel; 054 private Vector<Boolean> nodePath; 055 private Vector<Boolean> isCoveredRoot; 056 private Vector<Integer> nodeRelationLength; 057 private Vector<String> synacticHeadDeprel; 058 private int counter; 059 060 public PseudoProjectivity() { } 061 062 public void initialize(String markingStrategyString, String coveredRoot, String liftingOrder, Logger configLogger, 063 DataFormatInstance dataFormatInstance, SymbolTableHandler symbolTables) throws MaltChainedException { 064 nodeLifted = new Vector<Boolean>(); 065 nodeTrace = new Vector<Vector<DependencyNode>>(); 066 headDeprel = new Vector<DependencyNode>(); 067 nodePath = new Vector<Boolean>(); 068 isCoveredRoot = new Vector<Boolean>(); 069 nodeRelationLength = new Vector<Integer>(); 070 synacticHeadDeprel = new Vector<String>(); 071 counter = 0; 072 073 this.configLogger = configLogger; 074 if (markingStrategyString.equalsIgnoreCase("none")) { 075 markingStrategy = PseudoProjectiveEncoding.NONE; 076 } else if (markingStrategyString.equalsIgnoreCase("baseline")) { 077 markingStrategy = PseudoProjectiveEncoding.BASELINE; 078 } else if (markingStrategyString.equalsIgnoreCase("head")) { 079 markingStrategy = PseudoProjectiveEncoding.HEAD; 080 } else if (markingStrategyString.equalsIgnoreCase("path")) { 081 markingStrategy = PseudoProjectiveEncoding.PATH; 082 } else if (markingStrategyString.equalsIgnoreCase("head+path")) { 083 markingStrategy = PseudoProjectiveEncoding.HEADPATH; 084 } else if (markingStrategyString.equalsIgnoreCase("trace")) { 085 markingStrategy = PseudoProjectiveEncoding.TRACE; 086 } 087 this.deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL"); 088 this.deprelSymbolTable = symbolTables.getSymbolTable(deprelColumn.getName()); 089 if (markingStrategy == PseudoProjectiveEncoding.HEAD || markingStrategy == PseudoProjectiveEncoding.PATH 090 || markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 091 this.ppliftedColumn = dataFormatInstance.addInternalColumnDescription(symbolTables, "PPLIFTED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy()); 092 this.ppliftedSymbolTable = symbolTables.getSymbolTable(ppliftedColumn.getName()); 093 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) { 094 ppliftedSymbolTable.addSymbol("#true#"); 095 ppliftedSymbolTable.addSymbol("#false#"); 096 } else { 097 ppliftedSymbolTable.addSymbol("#false#"); 098 } 099 } 100 101 if (markingStrategy == PseudoProjectiveEncoding.PATH || markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 102 this.pppathColumn = dataFormatInstance.addInternalColumnDescription(symbolTables, "PPPATH", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy()); 103 this.pppathSymbolTable = symbolTables.getSymbolTable(pppathColumn.getName()); 104 pppathSymbolTable.addSymbol("#true#"); 105 pppathSymbolTable.addSymbol("#false#"); 106 } 107 108 if (coveredRoot.equalsIgnoreCase("none")) { 109 this.rootAttachment = CoveredRootAttachment.NONE; 110 } else if (coveredRoot.equalsIgnoreCase("ignore")) { 111 this.rootAttachment = CoveredRootAttachment.IGNORE; 112 } else if (coveredRoot.equalsIgnoreCase("left")) { 113 this.rootAttachment = CoveredRootAttachment.LEFT; 114 } else if (coveredRoot.equalsIgnoreCase("right")) { 115 this.rootAttachment = CoveredRootAttachment.RIGHT; 116 } else if (coveredRoot.equalsIgnoreCase("head")) { 117 this.rootAttachment = CoveredRootAttachment.HEAD; 118 } 119 120 if (this.rootAttachment != CoveredRootAttachment.NONE) { 121 this.ppcoveredRootColumn = dataFormatInstance.addInternalColumnDescription(symbolTables, "PPCOVERED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy()); 122 this.ppcoveredRootSymbolTable = symbolTables.getSymbolTable(ppcoveredRootColumn.getName()); 123 ppcoveredRootSymbolTable.addSymbol("#true#"); 124 ppcoveredRootSymbolTable.addSymbol("#false#"); 125 } 126 if (liftingOrder.equalsIgnoreCase("shortest")) { 127 this.liftingOrder = LiftingOrder.SHORTEST; 128 } else if (liftingOrder.equalsIgnoreCase("deepest")) { 129 this.liftingOrder = LiftingOrder.DEEPEST; 130 } 131 } 132 133 private void initProjectivization(DependencyStructure pdg) throws MaltChainedException { 134 nodeLifted.clear(); 135 nodeTrace.clear(); 136 headDeprel.clear(); 137 nodePath.clear(); 138 isCoveredRoot.clear(); 139 nodeRelationLength.clear(); 140 141 for (int index : pdg.getDependencyIndices()) { 142 nodeLifted.add(false); 143 nodeTrace.add(new Vector<DependencyNode>()); 144 headDeprel.add(null); 145 nodePath.add(false); 146 isCoveredRoot.add(false); 147 if (ppliftedSymbolTable != null && index != 0) { 148 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#false#")); 149 } 150 if (pppathSymbolTable != null && index != 0) { 151 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#false#")); 152 } 153 if (ppcoveredRootSymbolTable != null && index != 0) { 154 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#false#")); 155 } 156 } 157 computeRelationLength(pdg); 158 } 159 160 public void projectivize(DependencyStructure pdg) throws MaltChainedException { 161 id++; 162 if (!pdg.isTree()) { 163 configLogger.info("\n[Warning: Sentence '" + id + "' cannot projectivize, because the dependency graph is not a tree]\n"); 164 return; 165 } 166 DependencyNode deepestNonProjectiveNode; 167 initProjectivization(pdg); 168 if (rootAttachment == CoveredRootAttachment.IGNORE) { 169 if (markingStrategy != PseudoProjectiveEncoding.NONE) { 170 while (!pdg.isProjective()) { 171 if (liftingOrder == LiftingOrder.DEEPEST) { 172 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg); 173 } else { 174 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg); 175 } 176 if (!attachCoveredRoots(pdg, deepestNonProjectiveNode)) { 177 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true); 178 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead()); 179 setPath(deepestNonProjectiveNode.getHead()); 180 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex()); 181 } 182 } 183 deattachCoveredRootsForProjectivization(pdg); 184 } 185 } else { 186 if (rootAttachment != CoveredRootAttachment.NONE) { 187 for (int index : pdg.getTokenIndices()) { 188 attachCoveredRoots(pdg, pdg.getTokenNode(index)); 189 } 190 } 191 if (markingStrategy != PseudoProjectiveEncoding.NONE) { 192 while (!pdg.isProjective()) { 193 if (liftingOrder == LiftingOrder.DEEPEST) { 194 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg); 195 } else { 196 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg); 197 } 198 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true); 199 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead()); 200 setPath(deepestNonProjectiveNode.getHead()); 201 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex()); 202 } 203 } 204 } 205 // collectTraceStatistics(pdg); 206 assignPseudoProjectiveDeprels(pdg); 207 } 208 209 public void mergeArclabels(DependencyStructure pdg) throws MaltChainedException { 210 assignPseudoProjectiveDeprelsForMerge(pdg); 211 } 212 213 public void splitArclabels(DependencyStructure pdg) throws MaltChainedException { 214 int pathLabelIndex = -1, movedLabelIndex = -1, coveredArcLabelIndex; 215 String label; 216 initDeprojeciviztion(pdg); 217 for (int index : pdg.getTokenIndices()) { 218 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 219 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 220 if (label != null && (pathLabelIndex = label.indexOf("%")) != -1) { 221 label = label.substring(0, pathLabelIndex); 222 setLabel(pdg.getTokenNode(index), label); 223 pdg.getTokenNode(index).getHeadEdge().addLabel(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#true#")); 224 } 225 if (label != null && (movedLabelIndex = label.indexOf("|")) != -1 && label.indexOf("|null") == -1) { 226 if (movedLabelIndex + 1 < label.length()) { 227 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode(label.substring(movedLabelIndex + 1))); 228 } else { 229 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#true#")); 230 } 231 label = label.substring(0, movedLabelIndex); 232 setLabel(pdg.getTokenNode(index), label); 233 } 234 } 235 } 236 for (int index : pdg.getTokenIndices()) { 237 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 238 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 239 if ((coveredArcLabelIndex = label.indexOf("|null")) != -1) { 240 label = label.substring(0, coveredArcLabelIndex); 241 setLabel(pdg.getTokenNode(index), label); 242 pdg.getTokenNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#")); 243 } 244 } 245 } 246 } 247 248 private void setHeadDeprel(DependencyNode node, DependencyNode parent) { 249 if (headDeprel.get(node.getIndex()) == null) { 250 headDeprel.set(node.getIndex(), parent); 251 } 252 nodeTrace.set(node.getIndex(), headDeprel); 253 } 254 255 private void setPath(DependencyNode node) { 256 nodePath.set(node.getIndex(), true); 257 } 258 259 private boolean isCoveredRoot(DependencyNode node) { 260 return isCoveredRoot.get(node.getIndex()); 261 } 262 263 private void deattachCoveredRootsForProjectivization(DependencyStructure pdg) throws MaltChainedException { 264 for (int index : pdg.getTokenIndices()) { 265 if (isCoveredRoot(pdg.getTokenNode(index))) { 266 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getTokenNode(index).getIndex()); 267 } 268 } 269 } 270 271 private boolean attachCoveredRoots(DependencyStructure pdg, DependencyNode deepest) throws MaltChainedException { 272 int i; 273 boolean foundCoveredRoot = false; 274 DependencyNode coveredRootHead; 275 for (i = Math.min(deepest.getIndex(), deepest.getHead().getIndex()) + 1; i < Math.max(deepest.getIndex(), deepest.getHead() 276 .getIndex()); i++) { 277 int leftMostIndex = pdg.getDependencyNode(i).getLeftmostProperDescendantIndex(); 278 if (leftMostIndex == -1) { 279 leftMostIndex = i; 280 } 281 int rightMostIndex = pdg.getDependencyNode(i).getRightmostProperDescendantIndex(); 282 if (rightMostIndex == -1) { 283 rightMostIndex = i; 284 } 285 if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot() 286 && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < leftMostIndex 287 && rightMostIndex < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) { 288 if (rootAttachment == CoveredRootAttachment.LEFT) { 289 if (deepest.getHead().getIndex() < deepest.getIndex()) { 290 coveredRootHead = deepest.getHead(); 291 } else { 292 coveredRootHead = deepest; 293 } 294 } else if (rootAttachment == CoveredRootAttachment.RIGHT) { 295 if (deepest.getIndex() < deepest.getHead().getIndex()) { 296 coveredRootHead = deepest.getHead(); 297 } else { 298 coveredRootHead = deepest; 299 } 300 } else { 301 coveredRootHead = deepest.getHead(); 302 } 303 pdg.moveDependencyEdge(coveredRootHead.getIndex(), pdg.getDependencyNode(i).getIndex()); 304 setCoveredRoot(pdg.getDependencyNode(i)); 305 foundCoveredRoot = true; 306 } 307 } 308 return foundCoveredRoot; 309 } 310 311 private void setCoveredRoot(DependencyNode node) { 312 isCoveredRoot.set(node.getIndex(), true); 313 } 314 315 private DependencyNode getDeepestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException { 316 DependencyNode deepestNonProjectiveNode = null; 317 for (int index : pdg.getDependencyIndices()) { 318 if (!pdg.getDependencyNode(index).isProjective() 319 && (deepestNonProjectiveNode == null 320 || pdg.getDependencyNode(index).getDependencyNodeDepth() > pdg.getDependencyNode(deepestNonProjectiveNode.getIndex()).getDependencyNodeDepth())) { 321 deepestNonProjectiveNode = pdg.getDependencyNode(index); 322 } 323 } 324 325 return deepestNonProjectiveNode; 326 } 327 328 private DependencyNode getShortestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException { 329 DependencyNode shortestNonProjectiveNode = null; 330 for (int index : pdg.getDependencyIndices()) { 331 if (!pdg.getDependencyNode(index).isProjective() 332 && (shortestNonProjectiveNode == null 333 || nodeRelationLength.get(index) < nodeRelationLength.get(shortestNonProjectiveNode.getIndex()) 334 )) { 335// || (nodeRelationLength.get(index) == nodeRelationLength.get(shortestNonProjectiveNode.getIndex())))) { 336 shortestNonProjectiveNode = pdg.getDependencyNode(index); 337 } 338 } 339 return shortestNonProjectiveNode; 340 } 341 342 343 private void computeRelationLength(DependencyStructure pdg) throws MaltChainedException { 344 nodeRelationLength.add(0); 345 for (int index : pdg.getTokenIndices()) { 346 nodeRelationLength.add(Math.abs(pdg.getDependencyNode(index).getIndex() - pdg.getDependencyNode(index).getHead().getIndex())); 347 } 348 } 349 350 private void assignPseudoProjectiveDeprels(DependencyStructure pdg) throws MaltChainedException { 351 int newLabelCode; 352 for (int index : pdg.getTokenIndices()) { 353 if (!isCoveredRoot(pdg.getDependencyNode(index))) { 354 if (this.markingStrategy == PseudoProjectiveEncoding.HEAD || this.markingStrategy == PseudoProjectiveEncoding.PATH 355 || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 356 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) { 357 if (nodeLifted.get(index)) { 358 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#true#"); 359 } else { 360 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#"); 361 } 362 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode); 363 } else { 364 if (nodeLifted.get(index)) { 365 newLabelCode = ppliftedSymbolTable.addSymbol(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode( 366 headDeprel.get(index).getIndex()).getHeadEdge().getLabelCode(deprelSymbolTable))); 367 } else { 368 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#"); 369 } 370 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode); 371 } 372 } 373 374 if (this.markingStrategy == PseudoProjectiveEncoding.PATH || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 375 if (nodePath.get(index)) { 376 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#true#"); 377 } else { 378 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#false#"); 379 } 380 pdg.getDependencyNode(index).getHeadEdge().addLabel(pppathSymbolTable, newLabelCode); 381 } 382 383 } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) { 384 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#")); 385 } 386 } 387 } 388 389 private void setLabel(DependencyNode node, String label) throws MaltChainedException { 390 // node.getLabelCode().clear(); 391 node.getHeadEdge().getLabelSet().put(deprelSymbolTable, deprelSymbolTable.addSymbol(label)); 392 } 393 394 private void assignPseudoProjectiveDeprelsForMerge(DependencyStructure pdg) throws MaltChainedException { 395 Vector<String> originalDeprel = new Vector<String>(); 396 String newLabel; 397 originalDeprel.add(null); 398 for (int index : pdg.getTokenIndices()) { 399 originalDeprel.add(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable))); 400 } 401 for (int index : pdg.getTokenIndices()) { 402 newLabel = null; 403 if (!isCoveredRoot(pdg.getDependencyNode(index))) { 404 if (markingStrategy == PseudoProjectiveEncoding.HEAD) { 405 if (nodeLifted.get(index)) { 406 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 407 + originalDeprel.get(headDeprel.get(index).getIndex()); 408 // } else { 409 // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 410 } 411 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) { 412 if (nodeLifted.get(index) && nodePath.get(index)) { 413 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|%"; 414 } else if (nodeLifted.get(index) && !nodePath.get(index)) { 415 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"; 416 } else if (!nodeLifted.get(index) && nodePath.get(index)) { 417 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "%"; 418 } 419 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 420 if (nodeLifted.get(index) && nodePath.get(index)) { 421 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 422 + originalDeprel.get(headDeprel.get(index).getIndex()) + "%"; 423 } else if (nodeLifted.get(index) && !nodePath.get(index)) { 424 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 425 + originalDeprel.get(headDeprel.get(index).getIndex()); 426 } else if (!nodeLifted.get(index) && nodePath.get(index)) { 427 newLabel = originalDeprel.get(pdg.getDependencyNode(index).getIndex()) + "%"; 428 } 429 } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) { 430 if (nodeLifted.get(index)) { 431 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"; 432 } 433 } 434 } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) { 435 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|null"; 436 } 437 if (newLabel != null) { 438 setLabel(pdg.getDependencyNode(index), newLabel); 439 } 440 } 441 } 442 443 public void deprojectivize(DependencyStructure pdg) throws MaltChainedException { 444 initDeprojeciviztion(pdg); 445 446 for (int index : pdg.getTokenIndices()) { 447 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 448 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(pppathSymbolTable) 449 && pppathSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(pppathSymbolTable)).equals("#true#")) { 450 setPath(pdg.getDependencyNode(index)); 451 } 452 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppliftedSymbolTable) 453 && !ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#false#")) { 454 nodeLifted.set(index, true); 455 if (!ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#true#")) { 456 synacticHeadDeprel.set(index, ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge() 457 .getLabelCode(ppliftedSymbolTable))); 458 } 459 } 460 } 461 } 462 deattachCoveredRootsForDeprojectivization(pdg); 463 if (markingStrategy == PseudoProjectiveEncoding.HEAD && needsDeprojectivizeWithHead(pdg)) { 464 deprojectivizeWithHead(pdg, pdg.getDependencyRoot()); 465 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) { 466 deprojectivizeWithPath(pdg, pdg.getDependencyRoot()); 467 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 468 deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot()); 469 } 470 } 471 472 private void initDeprojeciviztion(DependencyStructure pdg) { 473 nodeLifted.clear(); 474 nodePath.clear(); 475 synacticHeadDeprel.clear(); 476 counter = 0; 477 for (int index : pdg.getDependencyIndices()) { 478 nodeLifted.add(false); 479 nodePath.add(false); 480 synacticHeadDeprel.add(null); 481 } 482 } 483 484 private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg) throws MaltChainedException { 485 for (int index : pdg.getTokenIndices()) { 486 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 487 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppcoveredRootSymbolTable) 488 && ppcoveredRootSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppcoveredRootSymbolTable)).equals( 489 "#true#")) { 490 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex()); 491 } 492 } 493 } 494 } 495 496 // Check whether there is at least one node in the specified dependency structure that can be lifted. 497 // If this is not the case, there is no need to call deprojectivizeWithHead. 498 499 private boolean needsDeprojectivizeWithHead(DependencyStructure pdg) throws MaltChainedException { 500 for (int index : pdg.getDependencyIndices()) { 501 if (nodeLifted.get(index)) { 502 DependencyNode node = pdg.getDependencyNode(index); 503 if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel.get(index)) != null) { 504 return true; 505 } 506 } 507 } 508 return false; 509 } 510 511 private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 512 boolean success = true, childSuccess = false; 513 int i, childAttempts = (counter < 10000 ? 2 : 1); 514 DependencyNode child, possibleSyntacticHead; 515 String syntacticHeadDeprel; 516 counter++; 517 if (nodeLifted.get(node.getIndex())) { 518 syntacticHeadDeprel = synacticHeadDeprel.get(node.getIndex()); 519 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel); 520 if (possibleSyntacticHead != null) { 521 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 522 nodeLifted.set(node.getIndex(), false); 523 } else { 524 success = false; 525 } 526 } 527 while (!childSuccess && childAttempts > 0) { 528 childSuccess = true; 529 Vector<DependencyNode> children = new Vector<DependencyNode>(); 530 i = 0; 531 while ((child = node.getLeftDependent(i)) != null) { 532 children.add(child); 533 i++; 534 } 535 i = 0; 536 while ((child = node.getRightDependent(i)) != null) { 537 children.add(child); 538 i++; 539 } 540 for (i = 0; i < children.size(); i++) { 541 child = children.get(i); 542 if (!deprojectivizeWithHead(pdg, child)) { 543 childSuccess = false; 544 } 545 } 546 childAttempts--; 547 } 548 return childSuccess && success; 549 } 550 551 private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel) 552 throws MaltChainedException { 553 DependencyNode dependent; 554 String dependentDeprel; 555 Vector<DependencyNode> nodes = new Vector<DependencyNode>(); 556 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false)); 557 while (nodes.size() > 0) { 558 dependent = nodes.remove(0); 559 if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) { 560 dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)); 561 if (dependentDeprel.equals(syntacticHeadDeprel)) { 562 return dependent; 563 } 564 } 565 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false)); 566 } 567 return null; 568 } 569 570 571 private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid, 572 boolean percentOnly) { 573 Vector<DependencyNode> output = new Vector<DependencyNode>(); 574 SortedSet<DependencyNode> dependents = new TreeSet<DependencyNode>(); 575 dependents.addAll(governor.getLeftDependents()); 576 dependents.addAll(governor.getRightDependents()); 577 578 579 DependencyNode[] deps = new DependencyNode[dependents.size()]; 580 int[] distances = new int[dependents.size()]; 581 int i = 0; 582 for (DependencyNode dep : dependents) { 583 distances[i] = Math.abs(dep.getIndex() - avoid.getIndex()); 584 deps[i] = dep; 585 i++; 586 } 587 if (distances.length > 1) { 588 int smallest; 589 int n = distances.length; 590 int tmpDist; 591 DependencyNode tmpDep; 592 for (i=0; i < n; i++) { 593 smallest = i; 594 for (int j=i; j < n; j++) { 595 if (distances[j] < distances[smallest]) { 596 smallest = j; 597 } 598 } 599 if (smallest != i) { 600 tmpDist = distances[smallest]; 601 distances[smallest] = distances[i]; 602 distances[i] = tmpDist; 603 tmpDep = deps[smallest]; 604 deps[smallest] = deps[i]; 605 deps[i] = tmpDep; 606 } 607 } 608 } 609 for (i=0; i<distances.length;i++) { 610 if (deps[i] != avoid && (!percentOnly || (percentOnly && nodePath.get(deps[i].getIndex())))) { 611 output.add(deps[i]); 612 } 613 } 614 return output; 615 } 616 617 private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode2(DependencyStructure dg, DependencyNode governor, DependencyNode avoid, 618 boolean percentOnly) { 619 int i, j; 620 Vector<DependencyNode> dependents = new Vector<DependencyNode>(); 621 DependencyNode leftChild, rightChild; 622 623 i = governor.getLeftDependentCount() - 1; 624 j = 0; 625 leftChild = governor.getLeftDependent(i--); 626 rightChild = governor.getRightDependent(j++); 627 628 while (leftChild != null && rightChild != null) { 629 if (leftChild == avoid) { 630 leftChild = governor.getLeftDependent(i--); 631 } else if (rightChild == avoid) { 632 rightChild = governor.getRightDependent(j++); 633 } else if (Math.abs(leftChild.getIndex() - avoid.getIndex()) < Math.abs(rightChild.getIndex() - avoid.getIndex())) { 634 if (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex()))) { 635 dependents.add(leftChild); 636 } 637 leftChild = governor.getLeftDependent(i--); 638 } else { 639 if (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex()))) { 640 dependents.add(rightChild); 641 } 642 rightChild = governor.getRightDependent(j++); 643 } 644 } 645 while (leftChild != null) { 646 if (leftChild != avoid && (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex())))) { 647 dependents.add(leftChild); 648 } 649 leftChild = governor.getLeftDependent(i--); 650 } 651 while (rightChild != null) { 652 if (rightChild != avoid && (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex())))) { 653 dependents.add(rightChild); 654 } 655 rightChild = governor.getRightDependent(j++); 656 } 657 return dependents; 658 } 659 660 private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 661 boolean success = true, childSuccess = false; 662 int i, childAttempts = (counter < 10000 ? 2 : 1); 663 DependencyNode child, possibleSyntacticHead; 664 counter++; 665 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) { 666 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node); 667 if (possibleSyntacticHead != null) { 668 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 669 nodeLifted.set(node.getIndex(), false); 670 } else { 671 success = false; 672 } 673 } 674 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) { 675 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node); 676 if (possibleSyntacticHead != null) { 677 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 678 nodeLifted.set(node.getIndex(), false); 679 } else { 680 success = false; 681 } 682 } 683 while (!childSuccess && childAttempts > 0) { 684 childSuccess = true; 685 Vector<DependencyNode> children = new Vector<DependencyNode>(); 686 i = 0; 687 while ((child = node.getLeftDependent(i)) != null) { 688 children.add(child); 689 i++; 690 } 691 i = 0; 692 while ((child = node.getRightDependent(i)) != null) { 693 children.add(child); 694 i++; 695 } 696 for (i = 0; i < children.size(); i++) { 697 child = children.get(i); 698 if (!deprojectivizeWithPath(pdg, child)) { 699 childSuccess = false; 700 } 701 } 702 childAttempts--; 703 } 704 return childSuccess && success; 705 } 706 707 private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid) { 708 DependencyNode dependent; 709 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes; 710 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true)); 711 while (nodes.size() > 0) { 712 dependent = nodes.remove(0); 713 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0) { 714 return dependent; 715 } 716 nodes.addAll(newNodes); 717 } 718 return null; 719 } 720 721 private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 722 boolean success = true, childSuccess = false; 723 int i, childAttempts = (counter < 10000 ? 2 : 1); 724 DependencyNode child, possibleSyntacticHead; 725 counter++; 726 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) { 727 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node 728 .getIndex())); 729 if (possibleSyntacticHead != null) { 730 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 731 nodeLifted.set(node.getIndex(), false); 732 } else { 733 success = false; 734 } 735 } 736 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) { 737 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node 738 .getIndex())); 739 if (possibleSyntacticHead != null) { 740 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 741 nodeLifted.set(node.getIndex(), false); 742 } else { 743 success = false; 744 } 745 } 746 while (!childSuccess && childAttempts > 0) { 747 childSuccess = true; 748 Vector<DependencyNode> children = new Vector<DependencyNode>(); 749 i = 0; 750 while ((child = node.getLeftDependent(i)) != null) { 751 children.add(child); 752 i++; 753 } 754 i = 0; 755 while ((child = node.getRightDependent(i)) != null) { 756 children.add(child); 757 i++; 758 } 759 for (i = 0; i < children.size(); i++) { 760 child = children.get(i); 761 if (!deprojectivizeWithHeadAndPath(pdg, child)) { 762 childSuccess = false; 763 } 764 } 765 childAttempts--; 766 } 767 return childSuccess && success; 768 } 769 770 private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode) 771 throws MaltChainedException { 772 DependencyNode dependent; 773 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes = null, secondChance = new Vector<DependencyNode>(); 774 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true)); 775 while (nodes.size() > 0) { 776 dependent = nodes.remove(0); 777 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0 778 && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) { 779 return dependent; 780 } 781 nodes.addAll(newNodes); 782 if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode) 783 && newNodes.size() != 0) { 784 secondChance.add(dependent); 785 } 786 } 787 if (secondChance.size() > 0) { 788 return secondChance.firstElement(); 789 } 790 return null; 791 } 792}