001 package org.maltparser.transform.pseudo;
002
003 import java.util.Vector;
004
005 import org.apache.log4j.Logger;
006 import org.maltparser.core.exception.MaltChainedException;
007 import org.maltparser.core.io.dataformat.ColumnDescription;
008 import org.maltparser.core.io.dataformat.DataFormatInstance;
009 import org.maltparser.core.symbol.SymbolTable;
010 import org.maltparser.core.syntaxgraph.DependencyStructure;
011 import org.maltparser.core.syntaxgraph.node.DependencyNode;
012
013 /**
014 * This class contains methods for projectivizing and deprojectivizing
015 *
016 * @author Jens Nilsson
017 */
018 public class PseudoProjectivity {
019 static int id = 0;
020
021 private enum PseudoProjectiveEncoding {
022 NONE, BASELINE, HEAD, PATH, HEADPATH, TRACE
023 };
024
025 private enum CoveredRootAttachment {
026 NONE, IGNORE, LEFT, RIGHT, HEAD
027 };
028
029 private enum LiftingOrder {
030 SHORTEST, DEEPEST
031 };
032
033 private PseudoProjectiveEncoding markingStrategy;
034 private CoveredRootAttachment rootAttachment;
035 private LiftingOrder liftingOrder;
036 private Logger configLogger;
037
038 private SymbolTable deprelSymbolTable;
039 private SymbolTable pppathSymbolTable;
040 private SymbolTable ppliftedSymbolTable;
041 private SymbolTable ppcoveredRootSymbolTable;
042
043 private ColumnDescription deprelColumn;
044 private ColumnDescription pppathColumn;
045 private ColumnDescription ppliftedColumn;
046 private ColumnDescription ppcoveredRootColumn;
047
048 private Vector<Boolean> nodeLifted;
049 private Vector<Vector<DependencyNode>> nodeTrace;
050 private Vector<DependencyNode> headDeprel;
051 private Vector<Boolean> nodePath;
052 private Vector<Boolean> isCoveredRoot;
053 private Vector<Integer> nodeRelationLength;
054 private Vector<String> synacticHeadDeprel;
055
056
057 public PseudoProjectivity() { }
058
059 public void initialize(String markingStrategyString, String coveredRoot, String liftingOrder, Logger configLogger,
060 DataFormatInstance dataFormatInstance) throws MaltChainedException {
061 nodeLifted = new Vector<Boolean>();
062 nodeTrace = new Vector<Vector<DependencyNode>>();
063 headDeprel = new Vector<DependencyNode>();
064 nodePath = new Vector<Boolean>();
065 isCoveredRoot = new Vector<Boolean>();
066 nodeRelationLength = new Vector<Integer>();
067 synacticHeadDeprel = new Vector<String>();
068
069 this.configLogger = configLogger;
070 if (markingStrategyString.equalsIgnoreCase("none")) {
071 markingStrategy = PseudoProjectiveEncoding.NONE;
072 } else if (markingStrategyString.equalsIgnoreCase("baseline")) {
073 markingStrategy = PseudoProjectiveEncoding.BASELINE;
074 } else if (markingStrategyString.equalsIgnoreCase("head")) {
075 markingStrategy = PseudoProjectiveEncoding.HEAD;
076 } else if (markingStrategyString.equalsIgnoreCase("path")) {
077 markingStrategy = PseudoProjectiveEncoding.PATH;
078 } else if (markingStrategyString.equalsIgnoreCase("head+path")) {
079 markingStrategy = PseudoProjectiveEncoding.HEADPATH;
080 } else if (markingStrategyString.equalsIgnoreCase("trace")) {
081 markingStrategy = PseudoProjectiveEncoding.TRACE;
082 }
083 this.deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL");
084 this.deprelSymbolTable = deprelColumn.getSymbolTable();
085 // this.deprelSymbolTable = dataFormatInstance.getSymbolTables().getSymbolTable("DEPREL");
086 if (markingStrategy == PseudoProjectiveEncoding.HEAD || markingStrategy == PseudoProjectiveEncoding.PATH
087 || markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
088 this.ppliftedColumn = dataFormatInstance.addInternalColumnDescription("PPLIFTED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy());
089 this.ppliftedSymbolTable = ppliftedColumn.getSymbolTable();
090 // this.ppliftedSymbolTable = dataFormatInstance.getSymbolTables().addSymbolTable("PPLIFTED", deprelSymbolTable);
091 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) {
092 ppliftedSymbolTable.addSymbol("#true#");
093 ppliftedSymbolTable.addSymbol("#false#");
094 } else {
095 ppliftedSymbolTable.addSymbol("#false#");
096 }
097 }
098
099 if (markingStrategy == PseudoProjectiveEncoding.PATH || markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
100 this.pppathColumn = dataFormatInstance.addInternalColumnDescription("PPPATH", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy());
101 this.pppathSymbolTable = pppathColumn.getSymbolTable();
102 pppathSymbolTable.addSymbol("#true#");
103 pppathSymbolTable.addSymbol("#false#");
104 }
105
106 if (coveredRoot.equalsIgnoreCase("none")) {
107 this.rootAttachment = CoveredRootAttachment.NONE;
108 } else if (coveredRoot.equalsIgnoreCase("ignore")) {
109 this.rootAttachment = CoveredRootAttachment.IGNORE;
110 } else if (coveredRoot.equalsIgnoreCase("left")) {
111 this.rootAttachment = CoveredRootAttachment.LEFT;
112 } else if (coveredRoot.equalsIgnoreCase("right")) {
113 this.rootAttachment = CoveredRootAttachment.RIGHT;
114 } else if (coveredRoot.equalsIgnoreCase("head")) {
115 this.rootAttachment = CoveredRootAttachment.HEAD;
116 }
117
118 if (this.rootAttachment != CoveredRootAttachment.NONE) {
119 this.ppcoveredRootColumn = dataFormatInstance.addInternalColumnDescription("PPCOVERED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy());
120 this.ppcoveredRootSymbolTable = ppcoveredRootColumn.getSymbolTable();
121 ppcoveredRootSymbolTable.addSymbol("#true#");
122 ppcoveredRootSymbolTable.addSymbol("#false#");
123 }
124 if (liftingOrder.equalsIgnoreCase("shortest")) {
125 this.liftingOrder = LiftingOrder.SHORTEST;
126 } else if (liftingOrder.equalsIgnoreCase("deepest")) {
127 this.liftingOrder = LiftingOrder.DEEPEST;
128 }
129 }
130
131 private void initProjectivization(DependencyStructure pdg) throws MaltChainedException {
132 nodeLifted.clear();
133 nodeTrace.clear();
134 headDeprel.clear();
135 nodePath.clear();
136 isCoveredRoot.clear();
137 nodeRelationLength.clear();
138
139 for (int index : pdg.getDependencyIndices()) {
140 nodeLifted.add(false);
141 nodeTrace.add(new Vector<DependencyNode>());
142 headDeprel.add(null);
143 nodePath.add(false);
144 isCoveredRoot.add(false);
145 if (ppliftedSymbolTable != null && index != 0) {
146 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#false#"));
147 }
148 if (pppathSymbolTable != null && index != 0) {
149 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#false#"));
150 }
151 if (ppcoveredRootSymbolTable != null && index != 0) {
152 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#false#"));
153 }
154 }
155 computeRelationLength(pdg);
156 }
157
158 public void projectivize(DependencyStructure pdg) throws MaltChainedException {
159 id++;
160 if (!pdg.isTree()) {
161 configLogger.info("\n[Warning: Sentence '" + id + "' cannot projectivize, because the dependency graph is not a tree]\n");
162 return;
163 }
164 DependencyNode deepestNonProjectiveNode;
165 initProjectivization(pdg);
166 if (rootAttachment == CoveredRootAttachment.IGNORE) {
167 if (markingStrategy != PseudoProjectiveEncoding.NONE) {
168 while (!pdg.isProjective()) {
169 if (liftingOrder == LiftingOrder.DEEPEST) {
170 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg);
171 } else {
172 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg);
173 }
174 if (!attachCoveredRoots(pdg, deepestNonProjectiveNode)) {
175 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true);
176 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead());
177 setPath(deepestNonProjectiveNode.getHead());
178 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex());
179 }
180 }
181 deattachCoveredRootsForProjectivization(pdg);
182 }
183 } else {
184 if (rootAttachment != CoveredRootAttachment.NONE) {
185 for (int index : pdg.getTokenIndices()) {
186 attachCoveredRoots(pdg, pdg.getTokenNode(index));
187 }
188 }
189 if (markingStrategy != PseudoProjectiveEncoding.NONE) {
190 while (!pdg.isProjective()) {
191 if (liftingOrder == LiftingOrder.DEEPEST) {
192 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg);
193 } else {
194 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg);
195 }
196 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true);
197 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead());
198 setPath(deepestNonProjectiveNode.getHead());
199 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex());
200 }
201 }
202 }
203 // collectTraceStatistics(pdg);
204 assignPseudoProjectiveDeprels(pdg);
205 }
206
207 public void mergeArclabels(DependencyStructure pdg) throws MaltChainedException {
208 assignPseudoProjectiveDeprelsForMerge(pdg);
209 }
210
211 public void splitArclabels(DependencyStructure pdg) throws MaltChainedException {
212 int pathLabelIndex = -1, movedLabelIndex = -1, coveredArcLabelIndex;
213 String label;
214 initDeprojeciviztion(pdg);
215 for (int index : pdg.getTokenIndices()) {
216 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
217 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
218 if (label != null && (pathLabelIndex = label.indexOf("%")) != -1) {
219 label = label.substring(0, pathLabelIndex);
220 setLabel(pdg.getTokenNode(index), label);
221 pdg.getTokenNode(index).getHeadEdge().addLabel(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#true#"));
222 }
223 if (label != null && (movedLabelIndex = label.indexOf("|")) != -1 && label.indexOf("|null") == -1) {
224 if (movedLabelIndex + 1 < label.length()) {
225 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode(label.substring(movedLabelIndex + 1)));
226 } else {
227 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#true#"));
228 }
229 label = label.substring(0, movedLabelIndex);
230 setLabel(pdg.getTokenNode(index), label);
231 }
232 }
233 }
234 for (int index : pdg.getTokenIndices()) {
235 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
236 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
237 if ((coveredArcLabelIndex = label.indexOf("|null")) != -1) {
238 label = label.substring(0, coveredArcLabelIndex);
239 setLabel(pdg.getTokenNode(index), label);
240 pdg.getTokenNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#"));
241 }
242 }
243 }
244 }
245
246 private void setHeadDeprel(DependencyNode node, DependencyNode parent) {
247 if (headDeprel.get(node.getIndex()) == null) {
248 headDeprel.set(node.getIndex(), parent);
249 }
250 nodeTrace.set(node.getIndex(), headDeprel);
251 }
252
253 private void setPath(DependencyNode node) {
254 nodePath.set(node.getIndex(), true);
255 }
256
257 private boolean isCoveredRoot(DependencyNode node) {
258 return isCoveredRoot.get(node.getIndex());
259 }
260
261 private void deattachCoveredRootsForProjectivization(DependencyStructure pdg) throws MaltChainedException {
262 for (int index : pdg.getTokenIndices()) {
263 if (isCoveredRoot(pdg.getTokenNode(index))) {
264 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getTokenNode(index).getIndex());
265 }
266 }
267 }
268
269 private boolean attachCoveredRoots(DependencyStructure pdg, DependencyNode deepest) throws MaltChainedException {
270 int i;
271 boolean foundCoveredRoot = false;
272 DependencyNode coveredRootHead;
273 for (i = Math.min(deepest.getIndex(), deepest.getHead().getIndex()) + 1; i < Math.max(deepest.getIndex(), deepest.getHead()
274 .getIndex()); i++) {
275 int leftMostIndex = pdg.getDependencyNode(i).getLeftmostProperDescendantIndex();
276 if (leftMostIndex == -1) {
277 leftMostIndex = i;
278 }
279 int rightMostIndex = pdg.getDependencyNode(i).getRightmostProperDescendantIndex();
280 if (rightMostIndex == -1) {
281 rightMostIndex = i;
282 }
283 if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot()
284 && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < leftMostIndex
285 && rightMostIndex < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) {
286 if (rootAttachment == CoveredRootAttachment.LEFT) {
287 if (deepest.getHead().getIndex() < deepest.getIndex()) {
288 coveredRootHead = deepest.getHead();
289 } else {
290 coveredRootHead = deepest;
291 }
292 } else if (rootAttachment == CoveredRootAttachment.RIGHT) {
293 if (deepest.getIndex() < deepest.getHead().getIndex()) {
294 coveredRootHead = deepest.getHead();
295 } else {
296 coveredRootHead = deepest;
297 }
298 } else {
299 coveredRootHead = deepest.getHead();
300 }
301 pdg.moveDependencyEdge(coveredRootHead.getIndex(), pdg.getDependencyNode(i).getIndex());
302 setCoveredRoot(pdg.getDependencyNode(i));
303 foundCoveredRoot = true;
304 }
305 }
306 return foundCoveredRoot;
307 }
308
309 private void setCoveredRoot(DependencyNode node) {
310 isCoveredRoot.set(node.getIndex(), true);
311 }
312
313 private DependencyNode getDeepestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException {
314 DependencyNode deepestNonProjectiveNode = null;
315 for (int index : pdg.getDependencyIndices()) {
316 if (!pdg.getDependencyNode(index).isProjective()
317 && (deepestNonProjectiveNode == null
318 || pdg.getDependencyNode(index).getDependencyNodeDepth() > pdg.getDependencyNode(deepestNonProjectiveNode.getIndex()).getDependencyNodeDepth())) {
319 deepestNonProjectiveNode = pdg.getDependencyNode(index);
320 }
321 }
322
323 return deepestNonProjectiveNode;
324 }
325
326 private DependencyNode getShortestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException {
327 DependencyNode shortestNonProjectiveNode = null;
328 for (int index : pdg.getDependencyIndices()) {
329 if (!pdg.getDependencyNode(index).isProjective()
330 && (shortestNonProjectiveNode == null
331 || nodeRelationLength.get(index) < nodeRelationLength.get(shortestNonProjectiveNode.getIndex())
332 || (nodeRelationLength.get(index) == nodeRelationLength.get(shortestNonProjectiveNode.getIndex())))) {
333 shortestNonProjectiveNode = pdg.getDependencyNode(index);
334 }
335 }
336 return shortestNonProjectiveNode;
337 }
338
339
340 private void computeRelationLength(DependencyStructure pdg) throws MaltChainedException {
341 nodeRelationLength.add(0);
342 for (int index : pdg.getTokenIndices()) {
343 nodeRelationLength.add(Math.abs(pdg.getDependencyNode(index).getIndex() - pdg.getDependencyNode(index).getHead().getIndex()));
344 }
345 }
346
347 private void assignPseudoProjectiveDeprels(DependencyStructure pdg) throws MaltChainedException {
348 int newLabelCode;
349 for (int index : pdg.getTokenIndices()) {
350 if (!isCoveredRoot(pdg.getDependencyNode(index))) {
351 if (this.markingStrategy == PseudoProjectiveEncoding.HEAD || this.markingStrategy == PseudoProjectiveEncoding.PATH
352 || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
353 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) {
354 if (nodeLifted.get(index)) {
355 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#true#");
356 } else {
357 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#");
358 }
359 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode);
360 } else {
361 if (nodeLifted.get(index)) {
362 newLabelCode = ppliftedSymbolTable.addSymbol(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(
363 headDeprel.get(index).getIndex()).getHeadEdge().getLabelCode(deprelSymbolTable)));
364 } else {
365 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#");
366 }
367 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode);
368 }
369 }
370
371 if (this.markingStrategy == PseudoProjectiveEncoding.PATH || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
372 if (nodePath.get(index)) {
373 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#true#");
374 } else {
375 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#false#");
376 }
377 pdg.getDependencyNode(index).getHeadEdge().addLabel(pppathSymbolTable, newLabelCode);
378 }
379
380 } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) {
381 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#"));
382 }
383 }
384 }
385
386 private void setLabel(DependencyNode node, String label) throws MaltChainedException {
387 // node.getLabelCode().clear();
388 node.getHeadEdge().getLabelSet().put(deprelSymbolTable, deprelSymbolTable.addSymbol(label));
389 }
390
391 private void assignPseudoProjectiveDeprelsForMerge(DependencyStructure pdg) throws MaltChainedException {
392 Vector<String> originalDeprel = new Vector<String>();
393 String newLabel;
394 originalDeprel.add(null);
395 for (int index : pdg.getTokenIndices()) {
396 originalDeprel.add(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)));
397 }
398 for (int index : pdg.getTokenIndices()) {
399 newLabel = null;
400 if (!isCoveredRoot(pdg.getDependencyNode(index))) {
401 if (markingStrategy == PseudoProjectiveEncoding.HEAD) {
402 if (nodeLifted.get(index)) {
403 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
404 + originalDeprel.get(headDeprel.get(index).getIndex());
405 // } else {
406 // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
407 }
408 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) {
409 if (nodeLifted.get(index) && nodePath.get(index)) {
410 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|%";
411 } else if (nodeLifted.get(index) && !nodePath.get(index)) {
412 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|";
413 } else if (!nodeLifted.get(index) && nodePath.get(index)) {
414 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "%";
415 }
416 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
417 if (nodeLifted.get(index) && nodePath.get(index)) {
418 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
419 + originalDeprel.get(headDeprel.get(index).getIndex()) + "%";
420 } else if (nodeLifted.get(index) && !nodePath.get(index)) {
421 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
422 + originalDeprel.get(headDeprel.get(index).getIndex());
423 } else if (!nodeLifted.get(index) && nodePath.get(index)) {
424 newLabel = originalDeprel.get(pdg.getDependencyNode(index).getIndex()) + "%";
425 }
426 } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) {
427 if (nodeLifted.get(index)) {
428 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|";
429 }
430 }
431 } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) {
432 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|null";
433 }
434 if (newLabel != null) {
435 setLabel(pdg.getDependencyNode(index), newLabel);
436 }
437 }
438 }
439
440 public void deprojectivize(DependencyStructure pdg) throws MaltChainedException {
441 initDeprojeciviztion(pdg);
442
443 for (int index : pdg.getTokenIndices()) {
444 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
445 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(pppathSymbolTable)
446 && pppathSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(pppathSymbolTable)).equals("#true#")) {
447 setPath(pdg.getDependencyNode(index));
448 }
449 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppliftedSymbolTable)
450 && !ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#false#")) {
451 nodeLifted.set(index, true);
452 if (!ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#true#")) {
453 synacticHeadDeprel.set(index, ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge()
454 .getLabelCode(ppliftedSymbolTable)));
455 }
456 }
457 }
458 }
459 deattachCoveredRootsForDeprojectivization(pdg);
460 if (markingStrategy == PseudoProjectiveEncoding.HEAD && needsDeprojectivizeWithHead(pdg)) {
461 deprojectivizeWithHead(pdg, pdg.getDependencyRoot());
462 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) {
463 deprojectivizeWithPath(pdg, pdg.getDependencyRoot());
464 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
465 deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot());
466 }
467 }
468
469 private void initDeprojeciviztion(DependencyStructure pdg) {
470 nodeLifted.clear();
471 nodePath.clear();
472 synacticHeadDeprel.clear();
473 for (int index : pdg.getDependencyIndices()) {
474 nodeLifted.add(false);
475 nodePath.add(false);
476 synacticHeadDeprel.add(null);
477 }
478 }
479
480 private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg) throws MaltChainedException {
481 for (int index : pdg.getTokenIndices()) {
482 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
483 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppcoveredRootSymbolTable)
484 && ppcoveredRootSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppcoveredRootSymbolTable)).equals(
485 "#true#")) {
486 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex());
487 }
488 }
489 }
490 }
491
492 // Check whether there is at least one node in the specified dependency structure that can be lifted.
493 // If this is not the case, there is no need to call deprojectivizeWithHead.
494
495 private boolean needsDeprojectivizeWithHead(DependencyStructure pdg) throws MaltChainedException {
496 for (int index : pdg.getDependencyIndices()) {
497 if (nodeLifted.get(index)) {
498 DependencyNode node = pdg.getDependencyNode(index);
499 if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel.get(index)) != null) {
500 return true;
501 }
502 }
503 }
504 return false;
505 }
506
507 private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
508 boolean success = true, childSuccess = false;
509 int i, childAttempts = 2;
510 DependencyNode child, possibleSyntacticHead;
511 String syntacticHeadDeprel;
512 if (nodeLifted.get(node.getIndex())) {
513 syntacticHeadDeprel = synacticHeadDeprel.get(node.getIndex());
514 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel);
515 if (possibleSyntacticHead != null) {
516 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
517 nodeLifted.set(node.getIndex(), false);
518 } else {
519 success = false;
520 }
521 }
522 while (!childSuccess && childAttempts > 0) {
523 childSuccess = true;
524 Vector<DependencyNode> children = new Vector<DependencyNode>();
525 i = 0;
526 while ((child = node.getLeftDependent(i)) != null) {
527 children.add(child);
528 i++;
529 }
530 i = 0;
531 while ((child = node.getRightDependent(i)) != null) {
532 children.add(child);
533 i++;
534 }
535 for (i = 0; i < children.size(); i++) {
536 child = children.get(i);
537 if (!deprojectivizeWithHead(pdg, child)) {
538 childSuccess = false;
539 }
540 }
541 childAttempts--;
542 }
543 return childSuccess && success;
544 }
545
546 private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel)
547 throws MaltChainedException {
548 DependencyNode dependent;
549 String dependentDeprel;
550 Vector<DependencyNode> nodes = new Vector<DependencyNode>();
551 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false));
552 while (nodes.size() > 0) {
553 dependent = nodes.remove(0);
554 if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) {
555 dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable));
556 if (dependentDeprel.equals(syntacticHeadDeprel)) {
557 return dependent;
558 }
559 }
560 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false));
561 }
562 return null;
563 }
564
565 private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid,
566 boolean percentOnly) {
567 int i, j;
568 Vector<DependencyNode> dependents = new Vector<DependencyNode>();
569 DependencyNode leftChild, rightChild;
570
571 i = governor.getLeftDependentCount() - 1;
572 j = 0;
573 leftChild = governor.getLeftDependent(i--);
574 rightChild = governor.getRightDependent(j++);
575
576 while (leftChild != null && rightChild != null) {
577 if (leftChild == avoid) {
578 leftChild = governor.getLeftDependent(i--);
579 } else if (rightChild == avoid) {
580 rightChild = governor.getRightDependent(j++);
581 } else if (Math.abs(leftChild.getIndex() - avoid.getIndex()) < Math.abs(rightChild.getIndex() - avoid.getIndex())) {
582 if (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex()))) {
583 dependents.add(leftChild);
584 }
585 leftChild = governor.getLeftDependent(i--);
586 } else {
587 if (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex()))) {
588 dependents.add(rightChild);
589 }
590 rightChild = governor.getRightDependent(j++);
591 }
592 }
593 while (leftChild != null) {
594 if (leftChild != avoid && (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex())))) {
595 dependents.add(leftChild);
596 }
597 leftChild = governor.getLeftDependent(i--);
598 }
599 while (rightChild != null) {
600 if (rightChild != avoid && (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex())))) {
601 dependents.add(rightChild);
602 }
603 rightChild = governor.getRightDependent(j++);
604 }
605 return dependents;
606 }
607
608 private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
609 boolean success = true, childSuccess = false;
610 int i, childAttempts = 2;
611 DependencyNode child, possibleSyntacticHead;
612 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) {
613 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node);
614 if (possibleSyntacticHead != null) {
615 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
616 nodeLifted.set(node.getIndex(), false);
617 } else {
618 success = false;
619 }
620 }
621 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) {
622 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node);
623 if (possibleSyntacticHead != null) {
624 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
625 nodeLifted.set(node.getIndex(), false);
626 } else {
627 success = false;
628 }
629 }
630 while (!childSuccess && childAttempts > 0) {
631 childSuccess = true;
632 Vector<DependencyNode> children = new Vector<DependencyNode>();
633 i = 0;
634 while ((child = node.getLeftDependent(i)) != null) {
635 children.add(child);
636 i++;
637 }
638 i = 0;
639 while ((child = node.getRightDependent(i)) != null) {
640 children.add(child);
641 i++;
642 }
643 for (i = 0; i < children.size(); i++) {
644 child = children.get(i);
645 if (!deprojectivizeWithPath(pdg, child)) {
646 childSuccess = false;
647 }
648 }
649 childAttempts--;
650 }
651 return childSuccess && success;
652 }
653
654 private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid) {
655 DependencyNode dependent;
656 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes;
657 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true));
658 while (nodes.size() > 0) {
659 dependent = nodes.remove(0);
660 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0) {
661 return dependent;
662 }
663 nodes.addAll(newNodes);
664 }
665 return null;
666 }
667
668 private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
669 boolean success = true, childSuccess = false;
670 int i, childAttempts = 2;
671 DependencyNode child, possibleSyntacticHead;
672 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) {
673 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node
674 .getIndex()));
675 if (possibleSyntacticHead != null) {
676 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
677 nodeLifted.set(node.getIndex(), false);
678 } else {
679 success = false;
680 }
681 }
682 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) {
683 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node
684 .getIndex()));
685 if (possibleSyntacticHead != null) {
686 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
687 nodeLifted.set(node.getIndex(), false);
688 } else {
689 success = false;
690 }
691 }
692 while (!childSuccess && childAttempts > 0) {
693 childSuccess = true;
694 Vector<DependencyNode> children = new Vector<DependencyNode>();
695 i = 0;
696 while ((child = node.getLeftDependent(i)) != null) {
697 children.add(child);
698 i++;
699 }
700 i = 0;
701 while ((child = node.getRightDependent(i)) != null) {
702 children.add(child);
703 i++;
704 }
705 for (i = 0; i < children.size(); i++) {
706 child = children.get(i);
707 if (!deprojectivizeWithHeadAndPath(pdg, child)) {
708 childSuccess = false;
709 }
710 }
711 childAttempts--;
712 }
713 return childSuccess && success;
714 }
715
716 private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode)
717 throws MaltChainedException {
718 DependencyNode dependent;
719 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes = null, secondChance = new Vector<DependencyNode>();
720 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true));
721 while (nodes.size() > 0) {
722 dependent = nodes.remove(0);
723 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0
724 && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) {
725 return dependent;
726 }
727 nodes.addAll(newNodes);
728 if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)
729 && newNodes.size() != 0) {
730 secondChance.add(dependent);
731 }
732 }
733 if (secondChance.size() > 0) {
734 return secondChance.firstElement();
735 }
736 return null;
737 }
738 }