001package org.maltparser.transform.pseudo; 002 003import java.util.SortedSet; 004import java.util.TreeSet; 005import java.util.Vector; 006 007import org.apache.log4j.Logger; 008import org.maltparser.core.exception.MaltChainedException; 009import org.maltparser.core.io.dataformat.ColumnDescription; 010import org.maltparser.core.io.dataformat.DataFormatInstance; 011import org.maltparser.core.symbol.SymbolTable; 012import org.maltparser.core.symbol.SymbolTableHandler; 013import org.maltparser.core.syntaxgraph.DependencyStructure; 014import org.maltparser.core.syntaxgraph.node.DependencyNode; 015 016/** 017 * This class contains methods for projectivizing and deprojectivizing 018 * 019 * @author Jens Nilsson 020 */ 021public class PseudoProjectivity { 022 static int id = 0; 023 024 private enum PseudoProjectiveEncoding { 025 NONE, BASELINE, HEAD, PATH, HEADPATH, TRACE 026 }; 027 028 private enum CoveredRootAttachment { 029 NONE, IGNORE, LEFT, RIGHT, HEAD 030 }; 031 032 private enum LiftingOrder { 033 SHORTEST, DEEPEST 034 }; 035 036 private PseudoProjectiveEncoding markingStrategy; 037 private CoveredRootAttachment rootAttachment; 038 private LiftingOrder liftingOrder; 039 private Logger configLogger; 040 041 private SymbolTable deprelSymbolTable; 042 private SymbolTable pppathSymbolTable; 043 private SymbolTable ppliftedSymbolTable; 044 private SymbolTable ppcoveredRootSymbolTable; 045 046 private ColumnDescription deprelColumn; 047 private ColumnDescription pppathColumn; 048 private ColumnDescription ppliftedColumn; 049 private ColumnDescription ppcoveredRootColumn; 050 051 private Vector<Boolean> nodeLifted; 052 private Vector<Vector<DependencyNode>> nodeTrace; 053 private Vector<DependencyNode> headDeprel; 054 private Vector<Boolean> nodePath; 055 private Vector<Boolean> isCoveredRoot; 056 private Vector<Integer> nodeRelationLength; 057 private Vector<String> synacticHeadDeprel; 058 059 060 public PseudoProjectivity() { } 061 062 public void initialize(String markingStrategyString, String coveredRoot, String liftingOrder, Logger configLogger, 063 DataFormatInstance dataFormatInstance, SymbolTableHandler symbolTables) throws MaltChainedException { 064 nodeLifted = new Vector<Boolean>(); 065 nodeTrace = new Vector<Vector<DependencyNode>>(); 066 headDeprel = new Vector<DependencyNode>(); 067 nodePath = new Vector<Boolean>(); 068 isCoveredRoot = new Vector<Boolean>(); 069 nodeRelationLength = new Vector<Integer>(); 070 synacticHeadDeprel = new Vector<String>(); 071 072 this.configLogger = configLogger; 073 if (markingStrategyString.equalsIgnoreCase("none")) { 074 markingStrategy = PseudoProjectiveEncoding.NONE; 075 } else if (markingStrategyString.equalsIgnoreCase("baseline")) { 076 markingStrategy = PseudoProjectiveEncoding.BASELINE; 077 } else if (markingStrategyString.equalsIgnoreCase("head")) { 078 markingStrategy = PseudoProjectiveEncoding.HEAD; 079 } else if (markingStrategyString.equalsIgnoreCase("path")) { 080 markingStrategy = PseudoProjectiveEncoding.PATH; 081 } else if (markingStrategyString.equalsIgnoreCase("head+path")) { 082 markingStrategy = PseudoProjectiveEncoding.HEADPATH; 083 } else if (markingStrategyString.equalsIgnoreCase("trace")) { 084 markingStrategy = PseudoProjectiveEncoding.TRACE; 085 } 086 this.deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL"); 087 this.deprelSymbolTable = symbolTables.getSymbolTable(deprelColumn.getName()); 088 if (markingStrategy == PseudoProjectiveEncoding.HEAD || markingStrategy == PseudoProjectiveEncoding.PATH 089 || markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 090 this.ppliftedColumn = dataFormatInstance.addInternalColumnDescription(symbolTables, "PPLIFTED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy()); 091 this.ppliftedSymbolTable = symbolTables.getSymbolTable(ppliftedColumn.getName()); 092 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) { 093 ppliftedSymbolTable.addSymbol("#true#"); 094 ppliftedSymbolTable.addSymbol("#false#"); 095 } else { 096 ppliftedSymbolTable.addSymbol("#false#"); 097 } 098 } 099 100 if (markingStrategy == PseudoProjectiveEncoding.PATH || markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 101 this.pppathColumn = dataFormatInstance.addInternalColumnDescription(symbolTables, "PPPATH", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy()); 102 this.pppathSymbolTable = symbolTables.getSymbolTable(pppathColumn.getName()); 103 pppathSymbolTable.addSymbol("#true#"); 104 pppathSymbolTable.addSymbol("#false#"); 105 } 106 107 if (coveredRoot.equalsIgnoreCase("none")) { 108 this.rootAttachment = CoveredRootAttachment.NONE; 109 } else if (coveredRoot.equalsIgnoreCase("ignore")) { 110 this.rootAttachment = CoveredRootAttachment.IGNORE; 111 } else if (coveredRoot.equalsIgnoreCase("left")) { 112 this.rootAttachment = CoveredRootAttachment.LEFT; 113 } else if (coveredRoot.equalsIgnoreCase("right")) { 114 this.rootAttachment = CoveredRootAttachment.RIGHT; 115 } else if (coveredRoot.equalsIgnoreCase("head")) { 116 this.rootAttachment = CoveredRootAttachment.HEAD; 117 } 118 119 if (this.rootAttachment != CoveredRootAttachment.NONE) { 120 this.ppcoveredRootColumn = dataFormatInstance.addInternalColumnDescription(symbolTables, "PPCOVERED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy()); 121 this.ppcoveredRootSymbolTable = symbolTables.getSymbolTable(ppcoveredRootColumn.getName()); 122 ppcoveredRootSymbolTable.addSymbol("#true#"); 123 ppcoveredRootSymbolTable.addSymbol("#false#"); 124 } 125 if (liftingOrder.equalsIgnoreCase("shortest")) { 126 this.liftingOrder = LiftingOrder.SHORTEST; 127 } else if (liftingOrder.equalsIgnoreCase("deepest")) { 128 this.liftingOrder = LiftingOrder.DEEPEST; 129 } 130 } 131 132 private void initProjectivization(DependencyStructure pdg) throws MaltChainedException { 133 nodeLifted.clear(); 134 nodeTrace.clear(); 135 headDeprel.clear(); 136 nodePath.clear(); 137 isCoveredRoot.clear(); 138 nodeRelationLength.clear(); 139 140 for (int index : pdg.getDependencyIndices()) { 141 nodeLifted.add(false); 142 nodeTrace.add(new Vector<DependencyNode>()); 143 headDeprel.add(null); 144 nodePath.add(false); 145 isCoveredRoot.add(false); 146 if (ppliftedSymbolTable != null && index != 0) { 147 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#false#")); 148 } 149 if (pppathSymbolTable != null && index != 0) { 150 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#false#")); 151 } 152 if (ppcoveredRootSymbolTable != null && index != 0) { 153 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#false#")); 154 } 155 } 156 computeRelationLength(pdg); 157 } 158 159 public void projectivize(DependencyStructure pdg) throws MaltChainedException { 160 id++; 161 if (!pdg.isTree()) { 162 configLogger.info("\n[Warning: Sentence '" + id + "' cannot projectivize, because the dependency graph is not a tree]\n"); 163 return; 164 } 165 DependencyNode deepestNonProjectiveNode; 166 initProjectivization(pdg); 167 if (rootAttachment == CoveredRootAttachment.IGNORE) { 168 if (markingStrategy != PseudoProjectiveEncoding.NONE) { 169 while (!pdg.isProjective()) { 170 if (liftingOrder == LiftingOrder.DEEPEST) { 171 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg); 172 } else { 173 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg); 174 } 175 if (!attachCoveredRoots(pdg, deepestNonProjectiveNode)) { 176 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true); 177 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead()); 178 setPath(deepestNonProjectiveNode.getHead()); 179 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex()); 180 } 181 } 182 deattachCoveredRootsForProjectivization(pdg); 183 } 184 } else { 185 if (rootAttachment != CoveredRootAttachment.NONE) { 186 for (int index : pdg.getTokenIndices()) { 187 attachCoveredRoots(pdg, pdg.getTokenNode(index)); 188 } 189 } 190 if (markingStrategy != PseudoProjectiveEncoding.NONE) { 191 while (!pdg.isProjective()) { 192 if (liftingOrder == LiftingOrder.DEEPEST) { 193 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg); 194 } else { 195 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg); 196 } 197 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true); 198 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead()); 199 setPath(deepestNonProjectiveNode.getHead()); 200 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex()); 201 } 202 } 203 } 204 // collectTraceStatistics(pdg); 205 assignPseudoProjectiveDeprels(pdg); 206 } 207 208 public void mergeArclabels(DependencyStructure pdg) throws MaltChainedException { 209 assignPseudoProjectiveDeprelsForMerge(pdg); 210 } 211 212 public void splitArclabels(DependencyStructure pdg) throws MaltChainedException { 213 int pathLabelIndex = -1, movedLabelIndex = -1, coveredArcLabelIndex; 214 String label; 215 initDeprojeciviztion(pdg); 216 for (int index : pdg.getTokenIndices()) { 217 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 218 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 219 if (label != null && (pathLabelIndex = label.indexOf("%")) != -1) { 220 label = label.substring(0, pathLabelIndex); 221 setLabel(pdg.getTokenNode(index), label); 222 pdg.getTokenNode(index).getHeadEdge().addLabel(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#true#")); 223 } 224 if (label != null && (movedLabelIndex = label.indexOf("|")) != -1 && label.indexOf("|null") == -1) { 225 if (movedLabelIndex + 1 < label.length()) { 226 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode(label.substring(movedLabelIndex + 1))); 227 } else { 228 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#true#")); 229 } 230 label = label.substring(0, movedLabelIndex); 231 setLabel(pdg.getTokenNode(index), label); 232 } 233 } 234 } 235 for (int index : pdg.getTokenIndices()) { 236 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 237 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 238 if ((coveredArcLabelIndex = label.indexOf("|null")) != -1) { 239 label = label.substring(0, coveredArcLabelIndex); 240 setLabel(pdg.getTokenNode(index), label); 241 pdg.getTokenNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#")); 242 } 243 } 244 } 245 } 246 247 private void setHeadDeprel(DependencyNode node, DependencyNode parent) { 248 if (headDeprel.get(node.getIndex()) == null) { 249 headDeprel.set(node.getIndex(), parent); 250 } 251 nodeTrace.set(node.getIndex(), headDeprel); 252 } 253 254 private void setPath(DependencyNode node) { 255 nodePath.set(node.getIndex(), true); 256 } 257 258 private boolean isCoveredRoot(DependencyNode node) { 259 return isCoveredRoot.get(node.getIndex()); 260 } 261 262 private void deattachCoveredRootsForProjectivization(DependencyStructure pdg) throws MaltChainedException { 263 for (int index : pdg.getTokenIndices()) { 264 if (isCoveredRoot(pdg.getTokenNode(index))) { 265 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getTokenNode(index).getIndex()); 266 } 267 } 268 } 269 270 private boolean attachCoveredRoots(DependencyStructure pdg, DependencyNode deepest) throws MaltChainedException { 271 int i; 272 boolean foundCoveredRoot = false; 273 DependencyNode coveredRootHead; 274 for (i = Math.min(deepest.getIndex(), deepest.getHead().getIndex()) + 1; i < Math.max(deepest.getIndex(), deepest.getHead() 275 .getIndex()); i++) { 276 int leftMostIndex = pdg.getDependencyNode(i).getLeftmostProperDescendantIndex(); 277 if (leftMostIndex == -1) { 278 leftMostIndex = i; 279 } 280 int rightMostIndex = pdg.getDependencyNode(i).getRightmostProperDescendantIndex(); 281 if (rightMostIndex == -1) { 282 rightMostIndex = i; 283 } 284 if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot() 285 && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < leftMostIndex 286 && rightMostIndex < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) { 287 if (rootAttachment == CoveredRootAttachment.LEFT) { 288 if (deepest.getHead().getIndex() < deepest.getIndex()) { 289 coveredRootHead = deepest.getHead(); 290 } else { 291 coveredRootHead = deepest; 292 } 293 } else if (rootAttachment == CoveredRootAttachment.RIGHT) { 294 if (deepest.getIndex() < deepest.getHead().getIndex()) { 295 coveredRootHead = deepest.getHead(); 296 } else { 297 coveredRootHead = deepest; 298 } 299 } else { 300 coveredRootHead = deepest.getHead(); 301 } 302 pdg.moveDependencyEdge(coveredRootHead.getIndex(), pdg.getDependencyNode(i).getIndex()); 303 setCoveredRoot(pdg.getDependencyNode(i)); 304 foundCoveredRoot = true; 305 } 306 } 307 return foundCoveredRoot; 308 } 309 310 private void setCoveredRoot(DependencyNode node) { 311 isCoveredRoot.set(node.getIndex(), true); 312 } 313 314 private DependencyNode getDeepestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException { 315 DependencyNode deepestNonProjectiveNode = null; 316 for (int index : pdg.getDependencyIndices()) { 317 if (!pdg.getDependencyNode(index).isProjective() 318 && (deepestNonProjectiveNode == null 319 || pdg.getDependencyNode(index).getDependencyNodeDepth() > pdg.getDependencyNode(deepestNonProjectiveNode.getIndex()).getDependencyNodeDepth())) { 320 deepestNonProjectiveNode = pdg.getDependencyNode(index); 321 } 322 } 323 324 return deepestNonProjectiveNode; 325 } 326 327 private DependencyNode getShortestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException { 328 DependencyNode shortestNonProjectiveNode = null; 329 for (int index : pdg.getDependencyIndices()) { 330 if (!pdg.getDependencyNode(index).isProjective() 331 && (shortestNonProjectiveNode == null 332 || nodeRelationLength.get(index) < nodeRelationLength.get(shortestNonProjectiveNode.getIndex()) 333 )) { 334// || (nodeRelationLength.get(index) == nodeRelationLength.get(shortestNonProjectiveNode.getIndex())))) { 335 shortestNonProjectiveNode = pdg.getDependencyNode(index); 336 } 337 } 338 return shortestNonProjectiveNode; 339 } 340 341 342 private void computeRelationLength(DependencyStructure pdg) throws MaltChainedException { 343 nodeRelationLength.add(0); 344 for (int index : pdg.getTokenIndices()) { 345 nodeRelationLength.add(Math.abs(pdg.getDependencyNode(index).getIndex() - pdg.getDependencyNode(index).getHead().getIndex())); 346 } 347 } 348 349 private void assignPseudoProjectiveDeprels(DependencyStructure pdg) throws MaltChainedException { 350 int newLabelCode; 351 for (int index : pdg.getTokenIndices()) { 352 if (!isCoveredRoot(pdg.getDependencyNode(index))) { 353 if (this.markingStrategy == PseudoProjectiveEncoding.HEAD || this.markingStrategy == PseudoProjectiveEncoding.PATH 354 || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 355 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) { 356 if (nodeLifted.get(index)) { 357 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#true#"); 358 } else { 359 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#"); 360 } 361 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode); 362 } else { 363 if (nodeLifted.get(index)) { 364 newLabelCode = ppliftedSymbolTable.addSymbol(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode( 365 headDeprel.get(index).getIndex()).getHeadEdge().getLabelCode(deprelSymbolTable))); 366 } else { 367 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#"); 368 } 369 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode); 370 } 371 } 372 373 if (this.markingStrategy == PseudoProjectiveEncoding.PATH || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 374 if (nodePath.get(index)) { 375 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#true#"); 376 } else { 377 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#false#"); 378 } 379 pdg.getDependencyNode(index).getHeadEdge().addLabel(pppathSymbolTable, newLabelCode); 380 } 381 382 } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) { 383 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#")); 384 } 385 } 386 } 387 388 private void setLabel(DependencyNode node, String label) throws MaltChainedException { 389 // node.getLabelCode().clear(); 390 node.getHeadEdge().getLabelSet().put(deprelSymbolTable, deprelSymbolTable.addSymbol(label)); 391 } 392 393 private void assignPseudoProjectiveDeprelsForMerge(DependencyStructure pdg) throws MaltChainedException { 394 Vector<String> originalDeprel = new Vector<String>(); 395 String newLabel; 396 originalDeprel.add(null); 397 for (int index : pdg.getTokenIndices()) { 398 originalDeprel.add(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable))); 399 } 400 for (int index : pdg.getTokenIndices()) { 401 newLabel = null; 402 if (!isCoveredRoot(pdg.getDependencyNode(index))) { 403 if (markingStrategy == PseudoProjectiveEncoding.HEAD) { 404 if (nodeLifted.get(index)) { 405 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 406 + originalDeprel.get(headDeprel.get(index).getIndex()); 407 // } else { 408 // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 409 } 410 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) { 411 if (nodeLifted.get(index) && nodePath.get(index)) { 412 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|%"; 413 } else if (nodeLifted.get(index) && !nodePath.get(index)) { 414 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"; 415 } else if (!nodeLifted.get(index) && nodePath.get(index)) { 416 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "%"; 417 } 418 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 419 if (nodeLifted.get(index) && nodePath.get(index)) { 420 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 421 + originalDeprel.get(headDeprel.get(index).getIndex()) + "%"; 422 } else if (nodeLifted.get(index) && !nodePath.get(index)) { 423 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 424 + originalDeprel.get(headDeprel.get(index).getIndex()); 425 } else if (!nodeLifted.get(index) && nodePath.get(index)) { 426 newLabel = originalDeprel.get(pdg.getDependencyNode(index).getIndex()) + "%"; 427 } 428 } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) { 429 if (nodeLifted.get(index)) { 430 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"; 431 } 432 } 433 } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) { 434 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|null"; 435 } 436 if (newLabel != null) { 437 setLabel(pdg.getDependencyNode(index), newLabel); 438 } 439 } 440 } 441 442 public void deprojectivize(DependencyStructure pdg) throws MaltChainedException { 443 initDeprojeciviztion(pdg); 444 445 for (int index : pdg.getTokenIndices()) { 446 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 447 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(pppathSymbolTable) 448 && pppathSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(pppathSymbolTable)).equals("#true#")) { 449 setPath(pdg.getDependencyNode(index)); 450 } 451 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppliftedSymbolTable) 452 && !ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#false#")) { 453 nodeLifted.set(index, true); 454 if (!ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#true#")) { 455 synacticHeadDeprel.set(index, ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge() 456 .getLabelCode(ppliftedSymbolTable))); 457 } 458 } 459 } 460 } 461 deattachCoveredRootsForDeprojectivization(pdg); 462 if (markingStrategy == PseudoProjectiveEncoding.HEAD && needsDeprojectivizeWithHead(pdg)) { 463 deprojectivizeWithHead(pdg, pdg.getDependencyRoot()); 464 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) { 465 deprojectivizeWithPath(pdg, pdg.getDependencyRoot()); 466 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 467 deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot()); 468 } 469 } 470 471 private void initDeprojeciviztion(DependencyStructure pdg) { 472 nodeLifted.clear(); 473 nodePath.clear(); 474 synacticHeadDeprel.clear(); 475 for (int index : pdg.getDependencyIndices()) { 476 nodeLifted.add(false); 477 nodePath.add(false); 478 synacticHeadDeprel.add(null); 479 } 480 } 481 482 private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg) throws MaltChainedException { 483 for (int index : pdg.getTokenIndices()) { 484 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 485 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppcoveredRootSymbolTable) 486 && ppcoveredRootSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppcoveredRootSymbolTable)).equals( 487 "#true#")) { 488 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex()); 489 } 490 } 491 } 492 } 493 494 // Check whether there is at least one node in the specified dependency structure that can be lifted. 495 // If this is not the case, there is no need to call deprojectivizeWithHead. 496 497 private boolean needsDeprojectivizeWithHead(DependencyStructure pdg) throws MaltChainedException { 498 for (int index : pdg.getDependencyIndices()) { 499 if (nodeLifted.get(index)) { 500 DependencyNode node = pdg.getDependencyNode(index); 501 if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel.get(index)) != null) { 502 return true; 503 } 504 } 505 } 506 return false; 507 } 508 509 private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 510 boolean success = true, childSuccess = false; 511 int i, childAttempts = 2; 512 DependencyNode child, possibleSyntacticHead; 513 String syntacticHeadDeprel; 514 if (nodeLifted.get(node.getIndex())) { 515 syntacticHeadDeprel = synacticHeadDeprel.get(node.getIndex()); 516 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel); 517 if (possibleSyntacticHead != null) { 518 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 519 nodeLifted.set(node.getIndex(), false); 520 } else { 521 success = false; 522 } 523 } 524 while (!childSuccess && childAttempts > 0) { 525 childSuccess = true; 526 Vector<DependencyNode> children = new Vector<DependencyNode>(); 527 i = 0; 528 while ((child = node.getLeftDependent(i)) != null) { 529 children.add(child); 530 i++; 531 } 532 i = 0; 533 while ((child = node.getRightDependent(i)) != null) { 534 children.add(child); 535 i++; 536 } 537 for (i = 0; i < children.size(); i++) { 538 child = children.get(i); 539 if (!deprojectivizeWithHead(pdg, child)) { 540 childSuccess = false; 541 } 542 } 543 childAttempts--; 544 } 545 return childSuccess && success; 546 } 547 548 private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel) 549 throws MaltChainedException { 550 DependencyNode dependent; 551 String dependentDeprel; 552 Vector<DependencyNode> nodes = new Vector<DependencyNode>(); 553 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false)); 554 while (nodes.size() > 0) { 555 dependent = nodes.remove(0); 556 if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) { 557 dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)); 558 if (dependentDeprel.equals(syntacticHeadDeprel)) { 559 return dependent; 560 } 561 } 562 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false)); 563 } 564 return null; 565 } 566 567 568 private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid, 569 boolean percentOnly) { 570 Vector<DependencyNode> output = new Vector<DependencyNode>(); 571 SortedSet<DependencyNode> dependents = new TreeSet<DependencyNode>(); 572 dependents.addAll(governor.getLeftDependents()); 573 dependents.addAll(governor.getRightDependents()); 574 575 576 DependencyNode[] deps = new DependencyNode[dependents.size()]; 577 int[] distances = new int[dependents.size()]; 578 int i = 0; 579 for (DependencyNode dep : dependents) { 580 distances[i] = Math.abs(dep.getIndex() - avoid.getIndex()); 581 deps[i] = dep; 582 i++; 583 } 584 if (distances.length > 1) { 585 int smallest; 586 int n = distances.length; 587 int tmpDist; 588 DependencyNode tmpDep; 589 for (i=0; i < n; i++) { 590 smallest = i; 591 for (int j=i; j < n; j++) { 592 if (distances[j] < distances[smallest]) { 593 smallest = j; 594 } 595 } 596 if (smallest != i) { 597 tmpDist = distances[smallest]; 598 distances[smallest] = distances[i]; 599 distances[i] = tmpDist; 600 tmpDep = deps[smallest]; 601 deps[smallest] = deps[i]; 602 deps[i] = tmpDep; 603 } 604 } 605 } 606 for (i=0; i<distances.length;i++) { 607 if (deps[i] != avoid && (!percentOnly || (percentOnly && nodePath.get(deps[i].getIndex())))) { 608 output.add(deps[i]); 609 } 610 } 611 return output; 612 } 613 614 private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode2(DependencyStructure dg, DependencyNode governor, DependencyNode avoid, 615 boolean percentOnly) { 616 int i, j; 617 Vector<DependencyNode> dependents = new Vector<DependencyNode>(); 618 DependencyNode leftChild, rightChild; 619 620 i = governor.getLeftDependentCount() - 1; 621 j = 0; 622 leftChild = governor.getLeftDependent(i--); 623 rightChild = governor.getRightDependent(j++); 624 625 while (leftChild != null && rightChild != null) { 626 if (leftChild == avoid) { 627 leftChild = governor.getLeftDependent(i--); 628 } else if (rightChild == avoid) { 629 rightChild = governor.getRightDependent(j++); 630 } else if (Math.abs(leftChild.getIndex() - avoid.getIndex()) < Math.abs(rightChild.getIndex() - avoid.getIndex())) { 631 if (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex()))) { 632 dependents.add(leftChild); 633 } 634 leftChild = governor.getLeftDependent(i--); 635 } else { 636 if (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex()))) { 637 dependents.add(rightChild); 638 } 639 rightChild = governor.getRightDependent(j++); 640 } 641 } 642 while (leftChild != null) { 643 if (leftChild != avoid && (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex())))) { 644 dependents.add(leftChild); 645 } 646 leftChild = governor.getLeftDependent(i--); 647 } 648 while (rightChild != null) { 649 if (rightChild != avoid && (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex())))) { 650 dependents.add(rightChild); 651 } 652 rightChild = governor.getRightDependent(j++); 653 } 654 return dependents; 655 } 656 657 private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 658 boolean success = true, childSuccess = false; 659 int i, childAttempts = 2; 660 DependencyNode child, possibleSyntacticHead; 661 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) { 662 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node); 663 if (possibleSyntacticHead != null) { 664 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 665 nodeLifted.set(node.getIndex(), false); 666 } else { 667 success = false; 668 } 669 } 670 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) { 671 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node); 672 if (possibleSyntacticHead != null) { 673 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 674 nodeLifted.set(node.getIndex(), false); 675 } else { 676 success = false; 677 } 678 } 679 while (!childSuccess && childAttempts > 0) { 680 childSuccess = true; 681 Vector<DependencyNode> children = new Vector<DependencyNode>(); 682 i = 0; 683 while ((child = node.getLeftDependent(i)) != null) { 684 children.add(child); 685 i++; 686 } 687 i = 0; 688 while ((child = node.getRightDependent(i)) != null) { 689 children.add(child); 690 i++; 691 } 692 for (i = 0; i < children.size(); i++) { 693 child = children.get(i); 694 if (!deprojectivizeWithPath(pdg, child)) { 695 childSuccess = false; 696 } 697 } 698 childAttempts--; 699 } 700 return childSuccess && success; 701 } 702 703 private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid) { 704 DependencyNode dependent; 705 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes; 706 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true)); 707 while (nodes.size() > 0) { 708 dependent = nodes.remove(0); 709 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0) { 710 return dependent; 711 } 712 nodes.addAll(newNodes); 713 } 714 return null; 715 } 716 717 private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 718 boolean success = true, childSuccess = false; 719 int i, childAttempts = 2; 720 DependencyNode child, possibleSyntacticHead; 721 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) { 722 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node 723 .getIndex())); 724 if (possibleSyntacticHead != null) { 725 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 726 nodeLifted.set(node.getIndex(), false); 727 } else { 728 success = false; 729 } 730 } 731 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) { 732 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node 733 .getIndex())); 734 if (possibleSyntacticHead != null) { 735 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 736 nodeLifted.set(node.getIndex(), false); 737 } else { 738 success = false; 739 } 740 } 741 while (!childSuccess && childAttempts > 0) { 742 childSuccess = true; 743 Vector<DependencyNode> children = new Vector<DependencyNode>(); 744 i = 0; 745 while ((child = node.getLeftDependent(i)) != null) { 746 children.add(child); 747 i++; 748 } 749 i = 0; 750 while ((child = node.getRightDependent(i)) != null) { 751 children.add(child); 752 i++; 753 } 754 for (i = 0; i < children.size(); i++) { 755 child = children.get(i); 756 if (!deprojectivizeWithHeadAndPath(pdg, child)) { 757 childSuccess = false; 758 } 759 } 760 childAttempts--; 761 } 762 return childSuccess && success; 763 } 764 765 private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode) 766 throws MaltChainedException { 767 DependencyNode dependent; 768 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes = null, secondChance = new Vector<DependencyNode>(); 769 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true)); 770 while (nodes.size() > 0) { 771 dependent = nodes.remove(0); 772 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0 773 && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) { 774 return dependent; 775 } 776 nodes.addAll(newNodes); 777 if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode) 778 && newNodes.size() != 0) { 779 secondChance.add(dependent); 780 } 781 } 782 if (secondChance.size() > 0) { 783 return secondChance.firstElement(); 784 } 785 return null; 786 } 787}