001 package org.maltparser.transform.pseudo; 002 003 import java.util.SortedSet; 004 import java.util.TreeSet; 005 import java.util.Vector; 006 007 import org.apache.log4j.Logger; 008 import org.maltparser.core.exception.MaltChainedException; 009 import org.maltparser.core.io.dataformat.ColumnDescription; 010 import org.maltparser.core.io.dataformat.DataFormatInstance; 011 import org.maltparser.core.symbol.SymbolTable; 012 import org.maltparser.core.syntaxgraph.DependencyStructure; 013 import org.maltparser.core.syntaxgraph.node.DependencyNode; 014 015 /** 016 * This class contains methods for projectivizing and deprojectivizing 017 * 018 * @author Jens Nilsson 019 */ 020 public class PseudoProjectivity { 021 static int id = 0; 022 023 private enum PseudoProjectiveEncoding { 024 NONE, BASELINE, HEAD, PATH, HEADPATH, TRACE 025 }; 026 027 private enum CoveredRootAttachment { 028 NONE, IGNORE, LEFT, RIGHT, HEAD 029 }; 030 031 private enum LiftingOrder { 032 SHORTEST, DEEPEST 033 }; 034 035 private PseudoProjectiveEncoding markingStrategy; 036 private CoveredRootAttachment rootAttachment; 037 private LiftingOrder liftingOrder; 038 private Logger configLogger; 039 040 private SymbolTable deprelSymbolTable; 041 private SymbolTable pppathSymbolTable; 042 private SymbolTable ppliftedSymbolTable; 043 private SymbolTable ppcoveredRootSymbolTable; 044 045 private ColumnDescription deprelColumn; 046 private ColumnDescription pppathColumn; 047 private ColumnDescription ppliftedColumn; 048 private ColumnDescription ppcoveredRootColumn; 049 050 private Vector<Boolean> nodeLifted; 051 private Vector<Vector<DependencyNode>> nodeTrace; 052 private Vector<DependencyNode> headDeprel; 053 private Vector<Boolean> nodePath; 054 private Vector<Boolean> isCoveredRoot; 055 private Vector<Integer> nodeRelationLength; 056 private Vector<String> synacticHeadDeprel; 057 058 059 public PseudoProjectivity() { } 060 061 public void initialize(String markingStrategyString, String coveredRoot, String liftingOrder, Logger configLogger, 062 DataFormatInstance dataFormatInstance) throws MaltChainedException { 063 nodeLifted = new Vector<Boolean>(); 064 nodeTrace = new Vector<Vector<DependencyNode>>(); 065 headDeprel = new Vector<DependencyNode>(); 066 nodePath = new Vector<Boolean>(); 067 isCoveredRoot = new Vector<Boolean>(); 068 nodeRelationLength = new Vector<Integer>(); 069 synacticHeadDeprel = new Vector<String>(); 070 071 this.configLogger = configLogger; 072 if (markingStrategyString.equalsIgnoreCase("none")) { 073 markingStrategy = PseudoProjectiveEncoding.NONE; 074 } else if (markingStrategyString.equalsIgnoreCase("baseline")) { 075 markingStrategy = PseudoProjectiveEncoding.BASELINE; 076 } else if (markingStrategyString.equalsIgnoreCase("head")) { 077 markingStrategy = PseudoProjectiveEncoding.HEAD; 078 } else if (markingStrategyString.equalsIgnoreCase("path")) { 079 markingStrategy = PseudoProjectiveEncoding.PATH; 080 } else if (markingStrategyString.equalsIgnoreCase("head+path")) { 081 markingStrategy = PseudoProjectiveEncoding.HEADPATH; 082 } else if (markingStrategyString.equalsIgnoreCase("trace")) { 083 markingStrategy = PseudoProjectiveEncoding.TRACE; 084 } 085 this.deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL"); 086 this.deprelSymbolTable = deprelColumn.getSymbolTable(); 087 // this.deprelSymbolTable = dataFormatInstance.getSymbolTables().getSymbolTable("DEPREL"); 088 if (markingStrategy == PseudoProjectiveEncoding.HEAD || markingStrategy == PseudoProjectiveEncoding.PATH 089 || markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 090 this.ppliftedColumn = dataFormatInstance.addInternalColumnDescription("PPLIFTED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy()); 091 this.ppliftedSymbolTable = ppliftedColumn.getSymbolTable(); 092 // this.ppliftedSymbolTable = dataFormatInstance.getSymbolTables().addSymbolTable("PPLIFTED", deprelSymbolTable); 093 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) { 094 ppliftedSymbolTable.addSymbol("#true#"); 095 ppliftedSymbolTable.addSymbol("#false#"); 096 } else { 097 ppliftedSymbolTable.addSymbol("#false#"); 098 } 099 } 100 101 if (markingStrategy == PseudoProjectiveEncoding.PATH || markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 102 this.pppathColumn = dataFormatInstance.addInternalColumnDescription("PPPATH", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy()); 103 this.pppathSymbolTable = pppathColumn.getSymbolTable(); 104 pppathSymbolTable.addSymbol("#true#"); 105 pppathSymbolTable.addSymbol("#false#"); 106 } 107 108 if (coveredRoot.equalsIgnoreCase("none")) { 109 this.rootAttachment = CoveredRootAttachment.NONE; 110 } else if (coveredRoot.equalsIgnoreCase("ignore")) { 111 this.rootAttachment = CoveredRootAttachment.IGNORE; 112 } else if (coveredRoot.equalsIgnoreCase("left")) { 113 this.rootAttachment = CoveredRootAttachment.LEFT; 114 } else if (coveredRoot.equalsIgnoreCase("right")) { 115 this.rootAttachment = CoveredRootAttachment.RIGHT; 116 } else if (coveredRoot.equalsIgnoreCase("head")) { 117 this.rootAttachment = CoveredRootAttachment.HEAD; 118 } 119 120 if (this.rootAttachment != CoveredRootAttachment.NONE) { 121 this.ppcoveredRootColumn = dataFormatInstance.addInternalColumnDescription("PPCOVERED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy()); 122 this.ppcoveredRootSymbolTable = ppcoveredRootColumn.getSymbolTable(); 123 ppcoveredRootSymbolTable.addSymbol("#true#"); 124 ppcoveredRootSymbolTable.addSymbol("#false#"); 125 } 126 if (liftingOrder.equalsIgnoreCase("shortest")) { 127 this.liftingOrder = LiftingOrder.SHORTEST; 128 } else if (liftingOrder.equalsIgnoreCase("deepest")) { 129 this.liftingOrder = LiftingOrder.DEEPEST; 130 } 131 } 132 133 private void initProjectivization(DependencyStructure pdg) throws MaltChainedException { 134 nodeLifted.clear(); 135 nodeTrace.clear(); 136 headDeprel.clear(); 137 nodePath.clear(); 138 isCoveredRoot.clear(); 139 nodeRelationLength.clear(); 140 141 for (int index : pdg.getDependencyIndices()) { 142 nodeLifted.add(false); 143 nodeTrace.add(new Vector<DependencyNode>()); 144 headDeprel.add(null); 145 nodePath.add(false); 146 isCoveredRoot.add(false); 147 if (ppliftedSymbolTable != null && index != 0) { 148 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#false#")); 149 } 150 if (pppathSymbolTable != null && index != 0) { 151 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#false#")); 152 } 153 if (ppcoveredRootSymbolTable != null && index != 0) { 154 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#false#")); 155 } 156 } 157 computeRelationLength(pdg); 158 } 159 160 public void projectivize(DependencyStructure pdg) throws MaltChainedException { 161 id++; 162 if (!pdg.isTree()) { 163 configLogger.info("\n[Warning: Sentence '" + id + "' cannot projectivize, because the dependency graph is not a tree]\n"); 164 return; 165 } 166 DependencyNode deepestNonProjectiveNode; 167 initProjectivization(pdg); 168 if (rootAttachment == CoveredRootAttachment.IGNORE) { 169 if (markingStrategy != PseudoProjectiveEncoding.NONE) { 170 while (!pdg.isProjective()) { 171 if (liftingOrder == LiftingOrder.DEEPEST) { 172 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg); 173 } else { 174 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg); 175 } 176 if (!attachCoveredRoots(pdg, deepestNonProjectiveNode)) { 177 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true); 178 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead()); 179 setPath(deepestNonProjectiveNode.getHead()); 180 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex()); 181 } 182 } 183 deattachCoveredRootsForProjectivization(pdg); 184 } 185 } else { 186 if (rootAttachment != CoveredRootAttachment.NONE) { 187 for (int index : pdg.getTokenIndices()) { 188 attachCoveredRoots(pdg, pdg.getTokenNode(index)); 189 } 190 } 191 if (markingStrategy != PseudoProjectiveEncoding.NONE) { 192 while (!pdg.isProjective()) { 193 if (liftingOrder == LiftingOrder.DEEPEST) { 194 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg); 195 } else { 196 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg); 197 } 198 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true); 199 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead()); 200 setPath(deepestNonProjectiveNode.getHead()); 201 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex()); 202 } 203 } 204 } 205 // collectTraceStatistics(pdg); 206 assignPseudoProjectiveDeprels(pdg); 207 } 208 209 public void mergeArclabels(DependencyStructure pdg) throws MaltChainedException { 210 assignPseudoProjectiveDeprelsForMerge(pdg); 211 } 212 213 public void splitArclabels(DependencyStructure pdg) throws MaltChainedException { 214 int pathLabelIndex = -1, movedLabelIndex = -1, coveredArcLabelIndex; 215 String label; 216 initDeprojeciviztion(pdg); 217 for (int index : pdg.getTokenIndices()) { 218 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 219 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 220 if (label != null && (pathLabelIndex = label.indexOf("%")) != -1) { 221 label = label.substring(0, pathLabelIndex); 222 setLabel(pdg.getTokenNode(index), label); 223 pdg.getTokenNode(index).getHeadEdge().addLabel(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#true#")); 224 } 225 if (label != null && (movedLabelIndex = label.indexOf("|")) != -1 && label.indexOf("|null") == -1) { 226 if (movedLabelIndex + 1 < label.length()) { 227 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode(label.substring(movedLabelIndex + 1))); 228 } else { 229 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#true#")); 230 } 231 label = label.substring(0, movedLabelIndex); 232 setLabel(pdg.getTokenNode(index), label); 233 } 234 } 235 } 236 for (int index : pdg.getTokenIndices()) { 237 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 238 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 239 if ((coveredArcLabelIndex = label.indexOf("|null")) != -1) { 240 label = label.substring(0, coveredArcLabelIndex); 241 setLabel(pdg.getTokenNode(index), label); 242 pdg.getTokenNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#")); 243 } 244 } 245 } 246 } 247 248 private void setHeadDeprel(DependencyNode node, DependencyNode parent) { 249 if (headDeprel.get(node.getIndex()) == null) { 250 headDeprel.set(node.getIndex(), parent); 251 } 252 nodeTrace.set(node.getIndex(), headDeprel); 253 } 254 255 private void setPath(DependencyNode node) { 256 nodePath.set(node.getIndex(), true); 257 } 258 259 private boolean isCoveredRoot(DependencyNode node) { 260 return isCoveredRoot.get(node.getIndex()); 261 } 262 263 private void deattachCoveredRootsForProjectivization(DependencyStructure pdg) throws MaltChainedException { 264 for (int index : pdg.getTokenIndices()) { 265 if (isCoveredRoot(pdg.getTokenNode(index))) { 266 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getTokenNode(index).getIndex()); 267 } 268 } 269 } 270 271 private boolean attachCoveredRoots(DependencyStructure pdg, DependencyNode deepest) throws MaltChainedException { 272 int i; 273 boolean foundCoveredRoot = false; 274 DependencyNode coveredRootHead; 275 for (i = Math.min(deepest.getIndex(), deepest.getHead().getIndex()) + 1; i < Math.max(deepest.getIndex(), deepest.getHead() 276 .getIndex()); i++) { 277 int leftMostIndex = pdg.getDependencyNode(i).getLeftmostProperDescendantIndex(); 278 if (leftMostIndex == -1) { 279 leftMostIndex = i; 280 } 281 int rightMostIndex = pdg.getDependencyNode(i).getRightmostProperDescendantIndex(); 282 if (rightMostIndex == -1) { 283 rightMostIndex = i; 284 } 285 if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot() 286 && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < leftMostIndex 287 && rightMostIndex < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) { 288 if (rootAttachment == CoveredRootAttachment.LEFT) { 289 if (deepest.getHead().getIndex() < deepest.getIndex()) { 290 coveredRootHead = deepest.getHead(); 291 } else { 292 coveredRootHead = deepest; 293 } 294 } else if (rootAttachment == CoveredRootAttachment.RIGHT) { 295 if (deepest.getIndex() < deepest.getHead().getIndex()) { 296 coveredRootHead = deepest.getHead(); 297 } else { 298 coveredRootHead = deepest; 299 } 300 } else { 301 coveredRootHead = deepest.getHead(); 302 } 303 pdg.moveDependencyEdge(coveredRootHead.getIndex(), pdg.getDependencyNode(i).getIndex()); 304 setCoveredRoot(pdg.getDependencyNode(i)); 305 foundCoveredRoot = true; 306 } 307 } 308 return foundCoveredRoot; 309 } 310 311 private void setCoveredRoot(DependencyNode node) { 312 isCoveredRoot.set(node.getIndex(), true); 313 } 314 315 private DependencyNode getDeepestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException { 316 DependencyNode deepestNonProjectiveNode = null; 317 for (int index : pdg.getDependencyIndices()) { 318 if (!pdg.getDependencyNode(index).isProjective() 319 && (deepestNonProjectiveNode == null 320 || pdg.getDependencyNode(index).getDependencyNodeDepth() > pdg.getDependencyNode(deepestNonProjectiveNode.getIndex()).getDependencyNodeDepth())) { 321 deepestNonProjectiveNode = pdg.getDependencyNode(index); 322 } 323 } 324 325 return deepestNonProjectiveNode; 326 } 327 328 private DependencyNode getShortestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException { 329 DependencyNode shortestNonProjectiveNode = null; 330 for (int index : pdg.getDependencyIndices()) { 331 if (!pdg.getDependencyNode(index).isProjective() 332 && (shortestNonProjectiveNode == null 333 || nodeRelationLength.get(index) < nodeRelationLength.get(shortestNonProjectiveNode.getIndex()) 334 )) { 335 // || (nodeRelationLength.get(index) == nodeRelationLength.get(shortestNonProjectiveNode.getIndex())))) { 336 shortestNonProjectiveNode = pdg.getDependencyNode(index); 337 } 338 } 339 return shortestNonProjectiveNode; 340 } 341 342 343 private void computeRelationLength(DependencyStructure pdg) throws MaltChainedException { 344 nodeRelationLength.add(0); 345 for (int index : pdg.getTokenIndices()) { 346 nodeRelationLength.add(Math.abs(pdg.getDependencyNode(index).getIndex() - pdg.getDependencyNode(index).getHead().getIndex())); 347 } 348 } 349 350 private void assignPseudoProjectiveDeprels(DependencyStructure pdg) throws MaltChainedException { 351 int newLabelCode; 352 for (int index : pdg.getTokenIndices()) { 353 if (!isCoveredRoot(pdg.getDependencyNode(index))) { 354 if (this.markingStrategy == PseudoProjectiveEncoding.HEAD || this.markingStrategy == PseudoProjectiveEncoding.PATH 355 || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 356 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) { 357 if (nodeLifted.get(index)) { 358 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#true#"); 359 } else { 360 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#"); 361 } 362 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode); 363 } else { 364 if (nodeLifted.get(index)) { 365 newLabelCode = ppliftedSymbolTable.addSymbol(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode( 366 headDeprel.get(index).getIndex()).getHeadEdge().getLabelCode(deprelSymbolTable))); 367 } else { 368 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#"); 369 } 370 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode); 371 } 372 } 373 374 if (this.markingStrategy == PseudoProjectiveEncoding.PATH || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 375 if (nodePath.get(index)) { 376 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#true#"); 377 } else { 378 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#false#"); 379 } 380 pdg.getDependencyNode(index).getHeadEdge().addLabel(pppathSymbolTable, newLabelCode); 381 } 382 383 } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) { 384 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#")); 385 } 386 } 387 } 388 389 private void setLabel(DependencyNode node, String label) throws MaltChainedException { 390 // node.getLabelCode().clear(); 391 node.getHeadEdge().getLabelSet().put(deprelSymbolTable, deprelSymbolTable.addSymbol(label)); 392 } 393 394 private void assignPseudoProjectiveDeprelsForMerge(DependencyStructure pdg) throws MaltChainedException { 395 Vector<String> originalDeprel = new Vector<String>(); 396 String newLabel; 397 originalDeprel.add(null); 398 for (int index : pdg.getTokenIndices()) { 399 originalDeprel.add(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable))); 400 } 401 for (int index : pdg.getTokenIndices()) { 402 newLabel = null; 403 if (!isCoveredRoot(pdg.getDependencyNode(index))) { 404 if (markingStrategy == PseudoProjectiveEncoding.HEAD) { 405 if (nodeLifted.get(index)) { 406 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 407 + originalDeprel.get(headDeprel.get(index).getIndex()); 408 // } else { 409 // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)); 410 } 411 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) { 412 if (nodeLifted.get(index) && nodePath.get(index)) { 413 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|%"; 414 } else if (nodeLifted.get(index) && !nodePath.get(index)) { 415 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"; 416 } else if (!nodeLifted.get(index) && nodePath.get(index)) { 417 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "%"; 418 } 419 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 420 if (nodeLifted.get(index) && nodePath.get(index)) { 421 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 422 + originalDeprel.get(headDeprel.get(index).getIndex()) + "%"; 423 } else if (nodeLifted.get(index) && !nodePath.get(index)) { 424 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|" 425 + originalDeprel.get(headDeprel.get(index).getIndex()); 426 } else if (!nodeLifted.get(index) && nodePath.get(index)) { 427 newLabel = originalDeprel.get(pdg.getDependencyNode(index).getIndex()) + "%"; 428 } 429 } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) { 430 if (nodeLifted.get(index)) { 431 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"; 432 } 433 } 434 } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) { 435 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|null"; 436 } 437 if (newLabel != null) { 438 setLabel(pdg.getDependencyNode(index), newLabel); 439 } 440 } 441 } 442 443 public void deprojectivize(DependencyStructure pdg) throws MaltChainedException { 444 initDeprojeciviztion(pdg); 445 446 for (int index : pdg.getTokenIndices()) { 447 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 448 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(pppathSymbolTable) 449 && pppathSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(pppathSymbolTable)).equals("#true#")) { 450 setPath(pdg.getDependencyNode(index)); 451 } 452 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppliftedSymbolTable) 453 && !ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#false#")) { 454 nodeLifted.set(index, true); 455 if (!ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#true#")) { 456 synacticHeadDeprel.set(index, ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge() 457 .getLabelCode(ppliftedSymbolTable))); 458 } 459 } 460 } 461 } 462 deattachCoveredRootsForDeprojectivization(pdg); 463 if (markingStrategy == PseudoProjectiveEncoding.HEAD && needsDeprojectivizeWithHead(pdg)) { 464 deprojectivizeWithHead(pdg, pdg.getDependencyRoot()); 465 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) { 466 deprojectivizeWithPath(pdg, pdg.getDependencyRoot()); 467 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) { 468 deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot()); 469 } 470 } 471 472 private void initDeprojeciviztion(DependencyStructure pdg) { 473 nodeLifted.clear(); 474 nodePath.clear(); 475 synacticHeadDeprel.clear(); 476 for (int index : pdg.getDependencyIndices()) { 477 nodeLifted.add(false); 478 nodePath.add(false); 479 synacticHeadDeprel.add(null); 480 } 481 } 482 483 private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg) throws MaltChainedException { 484 for (int index : pdg.getTokenIndices()) { 485 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) { 486 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppcoveredRootSymbolTable) 487 && ppcoveredRootSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppcoveredRootSymbolTable)).equals( 488 "#true#")) { 489 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex()); 490 } 491 } 492 } 493 } 494 495 // Check whether there is at least one node in the specified dependency structure that can be lifted. 496 // If this is not the case, there is no need to call deprojectivizeWithHead. 497 498 private boolean needsDeprojectivizeWithHead(DependencyStructure pdg) throws MaltChainedException { 499 for (int index : pdg.getDependencyIndices()) { 500 if (nodeLifted.get(index)) { 501 DependencyNode node = pdg.getDependencyNode(index); 502 if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel.get(index)) != null) { 503 return true; 504 } 505 } 506 } 507 return false; 508 } 509 510 private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 511 boolean success = true, childSuccess = false; 512 int i, childAttempts = 2; 513 DependencyNode child, possibleSyntacticHead; 514 String syntacticHeadDeprel; 515 if (nodeLifted.get(node.getIndex())) { 516 syntacticHeadDeprel = synacticHeadDeprel.get(node.getIndex()); 517 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel); 518 if (possibleSyntacticHead != null) { 519 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 520 nodeLifted.set(node.getIndex(), false); 521 } else { 522 success = false; 523 } 524 } 525 while (!childSuccess && childAttempts > 0) { 526 childSuccess = true; 527 Vector<DependencyNode> children = new Vector<DependencyNode>(); 528 i = 0; 529 while ((child = node.getLeftDependent(i)) != null) { 530 children.add(child); 531 i++; 532 } 533 i = 0; 534 while ((child = node.getRightDependent(i)) != null) { 535 children.add(child); 536 i++; 537 } 538 for (i = 0; i < children.size(); i++) { 539 child = children.get(i); 540 if (!deprojectivizeWithHead(pdg, child)) { 541 childSuccess = false; 542 } 543 } 544 childAttempts--; 545 } 546 return childSuccess && success; 547 } 548 549 private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel) 550 throws MaltChainedException { 551 DependencyNode dependent; 552 String dependentDeprel; 553 Vector<DependencyNode> nodes = new Vector<DependencyNode>(); 554 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false)); 555 while (nodes.size() > 0) { 556 dependent = nodes.remove(0); 557 if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) { 558 dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)); 559 if (dependentDeprel.equals(syntacticHeadDeprel)) { 560 return dependent; 561 } 562 } 563 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false)); 564 } 565 return null; 566 } 567 568 569 private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid, 570 boolean percentOnly) { 571 Vector<DependencyNode> output = new Vector<DependencyNode>(); 572 SortedSet<DependencyNode> dependents = new TreeSet<DependencyNode>(); 573 dependents.addAll(governor.getLeftDependents()); 574 dependents.addAll(governor.getRightDependents()); 575 576 577 DependencyNode[] deps = new DependencyNode[dependents.size()]; 578 int[] distances = new int[dependents.size()]; 579 int i = 0; 580 for (DependencyNode dep : dependents) { 581 distances[i] = Math.abs(dep.getIndex() - avoid.getIndex()); 582 deps[i] = dep; 583 i++; 584 } 585 if (distances.length > 1) { 586 int smallest; 587 int n = distances.length; 588 int tmpDist; 589 DependencyNode tmpDep; 590 for (i=0; i < n; i++) { 591 smallest = i; 592 for (int j=i; j < n; j++) { 593 if (distances[j] < distances[smallest]) { 594 smallest = j; 595 } 596 } 597 if (smallest != i) { 598 tmpDist = distances[smallest]; 599 distances[smallest] = distances[i]; 600 distances[i] = tmpDist; 601 tmpDep = deps[smallest]; 602 deps[smallest] = deps[i]; 603 deps[i] = tmpDep; 604 } 605 } 606 } 607 for (i=0; i<distances.length;i++) { 608 if (deps[i] != avoid && (!percentOnly || (percentOnly && nodePath.get(deps[i].getIndex())))) { 609 output.add(deps[i]); 610 } 611 } 612 return output; 613 } 614 615 private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode2(DependencyStructure dg, DependencyNode governor, DependencyNode avoid, 616 boolean percentOnly) { 617 int i, j; 618 Vector<DependencyNode> dependents = new Vector<DependencyNode>(); 619 DependencyNode leftChild, rightChild; 620 621 i = governor.getLeftDependentCount() - 1; 622 j = 0; 623 leftChild = governor.getLeftDependent(i--); 624 rightChild = governor.getRightDependent(j++); 625 626 while (leftChild != null && rightChild != null) { 627 if (leftChild == avoid) { 628 leftChild = governor.getLeftDependent(i--); 629 } else if (rightChild == avoid) { 630 rightChild = governor.getRightDependent(j++); 631 } else if (Math.abs(leftChild.getIndex() - avoid.getIndex()) < Math.abs(rightChild.getIndex() - avoid.getIndex())) { 632 if (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex()))) { 633 dependents.add(leftChild); 634 } 635 leftChild = governor.getLeftDependent(i--); 636 } else { 637 if (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex()))) { 638 dependents.add(rightChild); 639 } 640 rightChild = governor.getRightDependent(j++); 641 } 642 } 643 while (leftChild != null) { 644 if (leftChild != avoid && (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex())))) { 645 dependents.add(leftChild); 646 } 647 leftChild = governor.getLeftDependent(i--); 648 } 649 while (rightChild != null) { 650 if (rightChild != avoid && (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex())))) { 651 dependents.add(rightChild); 652 } 653 rightChild = governor.getRightDependent(j++); 654 } 655 return dependents; 656 } 657 658 private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 659 boolean success = true, childSuccess = false; 660 int i, childAttempts = 2; 661 DependencyNode child, possibleSyntacticHead; 662 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) { 663 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node); 664 if (possibleSyntacticHead != null) { 665 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 666 nodeLifted.set(node.getIndex(), false); 667 } else { 668 success = false; 669 } 670 } 671 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) { 672 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node); 673 if (possibleSyntacticHead != null) { 674 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 675 nodeLifted.set(node.getIndex(), false); 676 } else { 677 success = false; 678 } 679 } 680 while (!childSuccess && childAttempts > 0) { 681 childSuccess = true; 682 Vector<DependencyNode> children = new Vector<DependencyNode>(); 683 i = 0; 684 while ((child = node.getLeftDependent(i)) != null) { 685 children.add(child); 686 i++; 687 } 688 i = 0; 689 while ((child = node.getRightDependent(i)) != null) { 690 children.add(child); 691 i++; 692 } 693 for (i = 0; i < children.size(); i++) { 694 child = children.get(i); 695 if (!deprojectivizeWithPath(pdg, child)) { 696 childSuccess = false; 697 } 698 } 699 childAttempts--; 700 } 701 return childSuccess && success; 702 } 703 704 private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid) { 705 DependencyNode dependent; 706 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes; 707 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true)); 708 while (nodes.size() > 0) { 709 dependent = nodes.remove(0); 710 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0) { 711 return dependent; 712 } 713 nodes.addAll(newNodes); 714 } 715 return null; 716 } 717 718 private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException { 719 boolean success = true, childSuccess = false; 720 int i, childAttempts = 2; 721 DependencyNode child, possibleSyntacticHead; 722 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) { 723 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node 724 .getIndex())); 725 if (possibleSyntacticHead != null) { 726 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 727 nodeLifted.set(node.getIndex(), false); 728 } else { 729 success = false; 730 } 731 } 732 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) { 733 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node 734 .getIndex())); 735 if (possibleSyntacticHead != null) { 736 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex()); 737 nodeLifted.set(node.getIndex(), false); 738 } else { 739 success = false; 740 } 741 } 742 while (!childSuccess && childAttempts > 0) { 743 childSuccess = true; 744 Vector<DependencyNode> children = new Vector<DependencyNode>(); 745 i = 0; 746 while ((child = node.getLeftDependent(i)) != null) { 747 children.add(child); 748 i++; 749 } 750 i = 0; 751 while ((child = node.getRightDependent(i)) != null) { 752 children.add(child); 753 i++; 754 } 755 for (i = 0; i < children.size(); i++) { 756 child = children.get(i); 757 if (!deprojectivizeWithHeadAndPath(pdg, child)) { 758 childSuccess = false; 759 } 760 } 761 childAttempts--; 762 } 763 return childSuccess && success; 764 } 765 766 private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode) 767 throws MaltChainedException { 768 DependencyNode dependent; 769 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes = null, secondChance = new Vector<DependencyNode>(); 770 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true)); 771 while (nodes.size() > 0) { 772 dependent = nodes.remove(0); 773 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0 774 && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) { 775 return dependent; 776 } 777 nodes.addAll(newNodes); 778 if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode) 779 && newNodes.size() != 0) { 780 secondChance.add(dependent); 781 } 782 } 783 if (secondChance.size() > 0) { 784 return secondChance.firstElement(); 785 } 786 return null; 787 } 788 }