001    package org.maltparser.transform.pseudo;
002    
003    import java.util.SortedSet;
004    import java.util.TreeSet;
005    import java.util.Vector;
006    
007    import org.apache.log4j.Logger;
008    import org.maltparser.core.exception.MaltChainedException;
009    import org.maltparser.core.io.dataformat.ColumnDescription;
010    import org.maltparser.core.io.dataformat.DataFormatInstance;
011    import org.maltparser.core.symbol.SymbolTable;
012    import org.maltparser.core.syntaxgraph.DependencyStructure;
013    import org.maltparser.core.syntaxgraph.node.DependencyNode;
014    
015    /**
016     * This class contains methods for projectivizing and deprojectivizing
017     * 
018     * @author Jens Nilsson
019     */
020    public class PseudoProjectivity {
021            static int id = 0;
022    
023            private enum PseudoProjectiveEncoding {
024                    NONE, BASELINE, HEAD, PATH, HEADPATH, TRACE
025            };
026    
027            private enum CoveredRootAttachment {
028                    NONE, IGNORE, LEFT, RIGHT, HEAD
029            };
030    
031            private enum LiftingOrder {
032                    SHORTEST, DEEPEST
033            };
034    
035            private PseudoProjectiveEncoding markingStrategy;
036            private CoveredRootAttachment rootAttachment;
037            private LiftingOrder liftingOrder;
038            private Logger configLogger;
039    
040            private SymbolTable deprelSymbolTable;
041            private SymbolTable pppathSymbolTable;
042            private SymbolTable ppliftedSymbolTable;
043            private SymbolTable ppcoveredRootSymbolTable;
044            
045            private ColumnDescription deprelColumn;
046            private ColumnDescription pppathColumn;
047            private ColumnDescription ppliftedColumn;
048            private ColumnDescription ppcoveredRootColumn;
049            
050            private Vector<Boolean> nodeLifted;
051            private Vector<Vector<DependencyNode>> nodeTrace;
052            private Vector<DependencyNode> headDeprel;
053            private Vector<Boolean> nodePath;
054            private Vector<Boolean> isCoveredRoot;
055            private Vector<Integer> nodeRelationLength;
056            private Vector<String> synacticHeadDeprel;
057    
058    
059            public PseudoProjectivity() { }
060    
061            public void initialize(String markingStrategyString, String coveredRoot, String liftingOrder, Logger configLogger,
062                            DataFormatInstance dataFormatInstance) throws MaltChainedException {
063                    nodeLifted = new Vector<Boolean>();
064                    nodeTrace = new Vector<Vector<DependencyNode>>();
065                    headDeprel = new Vector<DependencyNode>();
066                    nodePath = new Vector<Boolean>();
067                    isCoveredRoot = new Vector<Boolean>();
068                    nodeRelationLength = new Vector<Integer>();
069                    synacticHeadDeprel = new Vector<String>();
070    
071                    this.configLogger = configLogger;
072                    if (markingStrategyString.equalsIgnoreCase("none")) {
073                            markingStrategy = PseudoProjectiveEncoding.NONE;
074                    } else if (markingStrategyString.equalsIgnoreCase("baseline")) {
075                            markingStrategy = PseudoProjectiveEncoding.BASELINE;
076                    } else if (markingStrategyString.equalsIgnoreCase("head")) {
077                            markingStrategy = PseudoProjectiveEncoding.HEAD;
078                    } else if (markingStrategyString.equalsIgnoreCase("path")) {
079                            markingStrategy = PseudoProjectiveEncoding.PATH;
080                    } else if (markingStrategyString.equalsIgnoreCase("head+path")) {
081                            markingStrategy = PseudoProjectiveEncoding.HEADPATH;
082                    } else if (markingStrategyString.equalsIgnoreCase("trace")) {
083                            markingStrategy = PseudoProjectiveEncoding.TRACE;
084                    }
085                    this.deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL");
086                    this.deprelSymbolTable = deprelColumn.getSymbolTable();
087    //              this.deprelSymbolTable = dataFormatInstance.getSymbolTables().getSymbolTable("DEPREL");
088                    if (markingStrategy == PseudoProjectiveEncoding.HEAD || markingStrategy == PseudoProjectiveEncoding.PATH
089                                    || markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
090                            this.ppliftedColumn = dataFormatInstance.addInternalColumnDescription("PPLIFTED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy());
091                            this.ppliftedSymbolTable = ppliftedColumn.getSymbolTable();
092    //                      this.ppliftedSymbolTable = dataFormatInstance.getSymbolTables().addSymbolTable("PPLIFTED", deprelSymbolTable);
093                            if (this.markingStrategy == PseudoProjectiveEncoding.PATH) {
094                                    ppliftedSymbolTable.addSymbol("#true#");
095                                    ppliftedSymbolTable.addSymbol("#false#");
096                            } else {
097                                    ppliftedSymbolTable.addSymbol("#false#");
098                            }
099                    }
100    
101                    if (markingStrategy == PseudoProjectiveEncoding.PATH || markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
102                            this.pppathColumn = dataFormatInstance.addInternalColumnDescription("PPPATH", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy());
103                            this.pppathSymbolTable = pppathColumn.getSymbolTable();
104                            pppathSymbolTable.addSymbol("#true#");
105                            pppathSymbolTable.addSymbol("#false#");
106                    }
107    
108                    if (coveredRoot.equalsIgnoreCase("none")) {
109                            this.rootAttachment = CoveredRootAttachment.NONE;
110                    } else if (coveredRoot.equalsIgnoreCase("ignore")) {
111                            this.rootAttachment = CoveredRootAttachment.IGNORE;
112                    } else if (coveredRoot.equalsIgnoreCase("left")) {
113                            this.rootAttachment = CoveredRootAttachment.LEFT;
114                    } else if (coveredRoot.equalsIgnoreCase("right")) {
115                            this.rootAttachment = CoveredRootAttachment.RIGHT;
116                    } else if (coveredRoot.equalsIgnoreCase("head")) {
117                            this.rootAttachment = CoveredRootAttachment.HEAD;
118                    }
119    
120                    if (this.rootAttachment != CoveredRootAttachment.NONE) {
121                            this.ppcoveredRootColumn = dataFormatInstance.addInternalColumnDescription("PPCOVERED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy());
122                            this.ppcoveredRootSymbolTable = ppcoveredRootColumn.getSymbolTable();
123                            ppcoveredRootSymbolTable.addSymbol("#true#");
124                            ppcoveredRootSymbolTable.addSymbol("#false#");
125                    }
126                    if (liftingOrder.equalsIgnoreCase("shortest")) {
127                            this.liftingOrder = LiftingOrder.SHORTEST;
128                    } else if (liftingOrder.equalsIgnoreCase("deepest")) {
129                            this.liftingOrder = LiftingOrder.DEEPEST;
130                    }
131            }
132            
133            private void initProjectivization(DependencyStructure pdg) throws MaltChainedException {
134                    nodeLifted.clear();
135                    nodeTrace.clear();
136                    headDeprel.clear();
137                    nodePath.clear();
138                    isCoveredRoot.clear();
139                    nodeRelationLength.clear();
140    
141                    for (int index : pdg.getDependencyIndices()) {
142                            nodeLifted.add(false);
143                            nodeTrace.add(new Vector<DependencyNode>());
144                            headDeprel.add(null);
145                            nodePath.add(false);
146                            isCoveredRoot.add(false);
147                            if (ppliftedSymbolTable != null && index != 0) {
148                                    pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#false#"));
149                            }
150                            if (pppathSymbolTable != null && index != 0) {
151                                    pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#false#"));
152                            }
153                            if (ppcoveredRootSymbolTable != null && index != 0) {
154                                    pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#false#"));
155                            }
156                    }
157                    computeRelationLength(pdg);
158            }
159            
160        public void projectivize(DependencyStructure pdg) throws MaltChainedException {
161            id++;
162            if (!pdg.isTree()) {
163                configLogger.info("\n[Warning: Sentence '" + id + "' cannot projectivize, because the dependency graph is not a tree]\n");
164                return;
165            }
166            DependencyNode deepestNonProjectiveNode;
167            initProjectivization(pdg);
168            if (rootAttachment == CoveredRootAttachment.IGNORE) {
169                    if (markingStrategy != PseudoProjectiveEncoding.NONE) {
170                            while (!pdg.isProjective()) {
171                                    if (liftingOrder == LiftingOrder.DEEPEST) {
172                                            deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg);
173                                    } else {
174                                            deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg);
175                                    }
176                                    if (!attachCoveredRoots(pdg, deepestNonProjectiveNode)) {
177                                            nodeLifted.set(deepestNonProjectiveNode.getIndex(), true);
178                                            setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead());
179                                            setPath(deepestNonProjectiveNode.getHead());
180                                            pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex());
181                                    }
182                            }
183                            deattachCoveredRootsForProjectivization(pdg);
184                    }
185            } else {
186                    if (rootAttachment != CoveredRootAttachment.NONE) {
187                        for (int index : pdg.getTokenIndices()) {
188                            attachCoveredRoots(pdg, pdg.getTokenNode(index));
189                        }
190                    }
191                    if (markingStrategy != PseudoProjectiveEncoding.NONE) {
192                        while (!pdg.isProjective()) {
193                            if (liftingOrder == LiftingOrder.DEEPEST) {
194                                deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg);
195                            } else {
196                                deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg);
197                            }
198                            nodeLifted.set(deepestNonProjectiveNode.getIndex(), true);
199                            setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead());
200                            setPath(deepestNonProjectiveNode.getHead());
201                            pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex());
202                        }
203                    }
204            }
205            // collectTraceStatistics(pdg);
206            assignPseudoProjectiveDeprels(pdg);
207        }
208    
209            public void mergeArclabels(DependencyStructure pdg) throws MaltChainedException {
210                    assignPseudoProjectiveDeprelsForMerge(pdg);
211            }
212    
213            public void splitArclabels(DependencyStructure pdg) throws MaltChainedException {
214                    int pathLabelIndex = -1, movedLabelIndex = -1, coveredArcLabelIndex;
215                    String label;
216                    initDeprojeciviztion(pdg);
217                    for (int index : pdg.getTokenIndices()) {
218                            if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
219                                    label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
220                                    if (label != null && (pathLabelIndex = label.indexOf("%")) != -1) {
221                                            label = label.substring(0, pathLabelIndex);
222                                            setLabel(pdg.getTokenNode(index), label);
223                                            pdg.getTokenNode(index).getHeadEdge().addLabel(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#true#"));
224                                    }
225                                    if (label != null && (movedLabelIndex = label.indexOf("|")) != -1 && label.indexOf("|null") == -1) {
226                                            if (movedLabelIndex + 1 < label.length()) {
227                                                    pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode(label.substring(movedLabelIndex + 1)));
228                                            } else {
229                                                    pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#true#"));
230                                            }
231                                            label = label.substring(0, movedLabelIndex);
232                                            setLabel(pdg.getTokenNode(index), label);
233                                    }
234                            }
235                    }
236                    for (int index : pdg.getTokenIndices()) {
237                            if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
238                                    label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
239                                    if ((coveredArcLabelIndex = label.indexOf("|null")) != -1) {
240                                            label = label.substring(0, coveredArcLabelIndex);
241                                            setLabel(pdg.getTokenNode(index), label);
242                                            pdg.getTokenNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#"));
243                                    }
244                            }
245                    }
246            }
247    
248            private void setHeadDeprel(DependencyNode node, DependencyNode parent) {
249                    if (headDeprel.get(node.getIndex()) == null) {
250                            headDeprel.set(node.getIndex(), parent);
251                    }
252                    nodeTrace.set(node.getIndex(), headDeprel);
253            }
254    
255            private void setPath(DependencyNode node) {
256                    nodePath.set(node.getIndex(), true);
257            }
258    
259            private boolean isCoveredRoot(DependencyNode node) {
260                    return isCoveredRoot.get(node.getIndex());
261            }
262    
263            private void deattachCoveredRootsForProjectivization(DependencyStructure pdg) throws MaltChainedException {
264                    for (int index : pdg.getTokenIndices()) {
265                            if (isCoveredRoot(pdg.getTokenNode(index))) {
266                                    pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getTokenNode(index).getIndex());
267                            }
268                    }
269            }
270    
271            private boolean attachCoveredRoots(DependencyStructure pdg, DependencyNode deepest) throws MaltChainedException {
272                    int i;
273                    boolean foundCoveredRoot = false;
274                    DependencyNode coveredRootHead;
275                    for (i = Math.min(deepest.getIndex(), deepest.getHead().getIndex()) + 1; i < Math.max(deepest.getIndex(), deepest.getHead()
276                                    .getIndex()); i++) {
277                            int leftMostIndex = pdg.getDependencyNode(i).getLeftmostProperDescendantIndex();
278                            if (leftMostIndex == -1) {
279                                    leftMostIndex = i;
280                            }
281                            int rightMostIndex = pdg.getDependencyNode(i).getRightmostProperDescendantIndex();
282                            if (rightMostIndex == -1) {
283                                    rightMostIndex = i;
284                            }
285                            if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot()
286                                            && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < leftMostIndex
287                                            && rightMostIndex < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) {
288                                    if (rootAttachment == CoveredRootAttachment.LEFT) {
289                                            if (deepest.getHead().getIndex() < deepest.getIndex()) {
290                                                    coveredRootHead = deepest.getHead();
291                                            } else {
292                                                    coveredRootHead = deepest;
293                                            }
294                                    } else if (rootAttachment == CoveredRootAttachment.RIGHT) {
295                                            if (deepest.getIndex() < deepest.getHead().getIndex()) {
296                                                    coveredRootHead = deepest.getHead();
297                                            } else {
298                                                    coveredRootHead = deepest;
299                                            }
300                                    } else {
301                                            coveredRootHead = deepest.getHead();
302                                    }
303                                    pdg.moveDependencyEdge(coveredRootHead.getIndex(), pdg.getDependencyNode(i).getIndex());
304                                    setCoveredRoot(pdg.getDependencyNode(i));
305                                    foundCoveredRoot = true;
306                            }
307                    }
308                    return foundCoveredRoot;
309            }
310    
311            private void setCoveredRoot(DependencyNode node) {
312                    isCoveredRoot.set(node.getIndex(), true);
313            }
314    
315            private DependencyNode getDeepestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException {
316                    DependencyNode deepestNonProjectiveNode = null;
317                    for (int index : pdg.getDependencyIndices()) {
318                            if (!pdg.getDependencyNode(index).isProjective()
319                                            && (deepestNonProjectiveNode == null 
320                                            || pdg.getDependencyNode(index).getDependencyNodeDepth() > pdg.getDependencyNode(deepestNonProjectiveNode.getIndex()).getDependencyNodeDepth())) {
321                                    deepestNonProjectiveNode = pdg.getDependencyNode(index);
322                            }
323                    }
324                    
325                    return deepestNonProjectiveNode;
326            }
327    
328            private DependencyNode getShortestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException {
329                    DependencyNode shortestNonProjectiveNode = null;
330                    for (int index : pdg.getDependencyIndices()) {
331                            if (!pdg.getDependencyNode(index).isProjective()
332                                            && (shortestNonProjectiveNode == null
333                                            || nodeRelationLength.get(index) < nodeRelationLength.get(shortestNonProjectiveNode.getIndex()) 
334                                            )) {
335    //                                      || (nodeRelationLength.get(index) == nodeRelationLength.get(shortestNonProjectiveNode.getIndex())))) {
336                                    shortestNonProjectiveNode = pdg.getDependencyNode(index);
337                            }
338                    }
339                    return shortestNonProjectiveNode;
340            }
341    
342    
343            private void computeRelationLength(DependencyStructure pdg) throws MaltChainedException {
344                    nodeRelationLength.add(0);
345                    for (int index : pdg.getTokenIndices()) {
346                            nodeRelationLength.add(Math.abs(pdg.getDependencyNode(index).getIndex() - pdg.getDependencyNode(index).getHead().getIndex()));
347                    }
348            }
349    
350            private void assignPseudoProjectiveDeprels(DependencyStructure pdg) throws MaltChainedException {
351                    int newLabelCode;
352                    for (int index : pdg.getTokenIndices()) {
353                            if (!isCoveredRoot(pdg.getDependencyNode(index))) {
354                                    if (this.markingStrategy == PseudoProjectiveEncoding.HEAD || this.markingStrategy == PseudoProjectiveEncoding.PATH
355                                                    || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
356                                            if (this.markingStrategy == PseudoProjectiveEncoding.PATH) {
357                                                    if (nodeLifted.get(index)) {
358                                                            newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#true#");
359                                                    } else {
360                                                            newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#");
361                                                    }
362                                                    pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode);
363                                            } else {
364                                                    if (nodeLifted.get(index)) {
365                                                            newLabelCode = ppliftedSymbolTable.addSymbol(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(
366                                                                            headDeprel.get(index).getIndex()).getHeadEdge().getLabelCode(deprelSymbolTable)));
367                                                    } else {
368                                                            newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#");
369                                                    }
370                                                    pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode);
371                                            }
372                                    }
373    
374                                    if (this.markingStrategy == PseudoProjectiveEncoding.PATH || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
375                                            if (nodePath.get(index)) {
376                                                    newLabelCode = pppathSymbolTable.getSymbolStringToCode("#true#");
377                                            } else {
378                                                    newLabelCode = pppathSymbolTable.getSymbolStringToCode("#false#");
379                                            }
380                                            pdg.getDependencyNode(index).getHeadEdge().addLabel(pppathSymbolTable, newLabelCode);
381                                    }
382    
383                            } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) {
384                                    pdg.getDependencyNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#"));
385                            }
386                    }
387            }
388    
389            private void setLabel(DependencyNode node, String label) throws MaltChainedException {
390                    // node.getLabelCode().clear();
391                    node.getHeadEdge().getLabelSet().put(deprelSymbolTable, deprelSymbolTable.addSymbol(label));
392            }
393    
394            private void assignPseudoProjectiveDeprelsForMerge(DependencyStructure pdg) throws MaltChainedException {
395                    Vector<String> originalDeprel = new Vector<String>();
396                    String newLabel;
397                    originalDeprel.add(null);
398                    for (int index : pdg.getTokenIndices()) {
399                            originalDeprel.add(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)));
400                    }
401                    for (int index : pdg.getTokenIndices()) {
402                            newLabel = null;
403                            if (!isCoveredRoot(pdg.getDependencyNode(index))) {
404                                    if (markingStrategy == PseudoProjectiveEncoding.HEAD) {
405                                            if (nodeLifted.get(index)) {
406                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
407                                                                    + originalDeprel.get(headDeprel.get(index).getIndex());
408                                                    // } else {
409                                                    // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
410                                            }
411                                    } else if (markingStrategy == PseudoProjectiveEncoding.PATH) {
412                                            if (nodeLifted.get(index) && nodePath.get(index)) {
413                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|%";
414                                            } else if (nodeLifted.get(index) && !nodePath.get(index)) {
415                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|";
416                                            } else if (!nodeLifted.get(index) && nodePath.get(index)) {
417                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "%";
418                                            }
419                                    } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
420                                            if (nodeLifted.get(index) && nodePath.get(index)) {
421                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
422                                                                    + originalDeprel.get(headDeprel.get(index).getIndex()) + "%";
423                                            } else if (nodeLifted.get(index) && !nodePath.get(index)) {
424                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
425                                                                    + originalDeprel.get(headDeprel.get(index).getIndex());
426                                            } else if (!nodeLifted.get(index) && nodePath.get(index)) {
427                                                    newLabel = originalDeprel.get(pdg.getDependencyNode(index).getIndex()) + "%";
428                                            }
429                                    } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) {
430                                            if (nodeLifted.get(index)) {
431                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|";
432                                            }
433                                    }
434                            } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) {
435                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|null";
436                            }
437                            if (newLabel != null) {
438                                    setLabel(pdg.getDependencyNode(index), newLabel);
439                            }
440                    }
441            }
442    
443            public void deprojectivize(DependencyStructure pdg) throws MaltChainedException {
444                    initDeprojeciviztion(pdg);
445    
446                    for (int index : pdg.getTokenIndices()) {
447                            if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
448                                    if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(pppathSymbolTable)
449                                                    && pppathSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(pppathSymbolTable)).equals("#true#")) {
450                                            setPath(pdg.getDependencyNode(index));
451                                    }
452                                    if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppliftedSymbolTable)
453                                                    && !ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#false#")) {
454                                            nodeLifted.set(index, true);
455                                            if (!ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#true#")) {
456                                                    synacticHeadDeprel.set(index, ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge()
457                                                                    .getLabelCode(ppliftedSymbolTable)));
458                                            }
459                                    }
460                            }
461                    }
462                    deattachCoveredRootsForDeprojectivization(pdg);
463                    if (markingStrategy == PseudoProjectiveEncoding.HEAD && needsDeprojectivizeWithHead(pdg)) {
464                            deprojectivizeWithHead(pdg, pdg.getDependencyRoot());
465                    } else if (markingStrategy == PseudoProjectiveEncoding.PATH) {
466                            deprojectivizeWithPath(pdg, pdg.getDependencyRoot());
467                    } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
468                            deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot());
469                    }
470            }
471    
472            private void initDeprojeciviztion(DependencyStructure pdg) {
473                    nodeLifted.clear();
474                    nodePath.clear();
475                    synacticHeadDeprel.clear();
476                    for (int index : pdg.getDependencyIndices()) {
477                            nodeLifted.add(false);
478                            nodePath.add(false);
479                            synacticHeadDeprel.add(null);
480                    }
481            }
482    
483            private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg) throws MaltChainedException {
484                    for (int index : pdg.getTokenIndices()) {
485                            if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
486                                    if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppcoveredRootSymbolTable)
487                                                    && ppcoveredRootSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppcoveredRootSymbolTable)).equals(
488                                                                    "#true#")) {
489                                            pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex());
490                                    }
491                            }
492                    }
493            }
494    
495            // Check whether there is at least one node in the specified dependency structure that can be lifted.
496            // If this is not the case, there is no need to call deprojectivizeWithHead.
497    
498            private boolean needsDeprojectivizeWithHead(DependencyStructure pdg) throws MaltChainedException {
499                    for (int index : pdg.getDependencyIndices()) {
500                            if (nodeLifted.get(index)) {
501                                    DependencyNode node = pdg.getDependencyNode(index);
502                                    if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel.get(index)) != null) {
503                                            return true;
504                                    }
505                        }
506                    }
507                    return false;
508            }
509    
510            private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
511                    boolean success = true, childSuccess = false;
512                    int i, childAttempts = 2;
513                    DependencyNode child, possibleSyntacticHead;
514                    String syntacticHeadDeprel;
515                    if (nodeLifted.get(node.getIndex())) {
516                            syntacticHeadDeprel = synacticHeadDeprel.get(node.getIndex());
517                            possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel);
518                            if (possibleSyntacticHead != null) {
519                                    pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
520                                    nodeLifted.set(node.getIndex(), false);
521                            } else {
522                                    success = false;
523                            }
524                    }
525                    while (!childSuccess && childAttempts > 0) {
526                            childSuccess = true;
527                            Vector<DependencyNode> children = new Vector<DependencyNode>();
528                            i = 0;
529                            while ((child = node.getLeftDependent(i)) != null) {
530                                    children.add(child);
531                                    i++;
532                            }
533                            i = 0;
534                            while ((child = node.getRightDependent(i)) != null) {
535                                    children.add(child);
536                                    i++;
537                            }
538                            for (i = 0; i < children.size(); i++) {
539                                    child = children.get(i);
540                                    if (!deprojectivizeWithHead(pdg, child)) {
541                                            childSuccess = false;
542                                    }
543                            }
544                            childAttempts--;
545                    }
546                    return childSuccess && success;
547            }
548    
549            private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel)
550                            throws MaltChainedException {
551                    DependencyNode dependent;
552                    String dependentDeprel;
553                    Vector<DependencyNode> nodes = new Vector<DependencyNode>();
554                    nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false));
555                    while (nodes.size() > 0) {
556                            dependent = nodes.remove(0);
557                            if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) {
558                                    dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable));
559                                    if (dependentDeprel.equals(syntacticHeadDeprel)) {
560                                            return dependent;
561                                    }
562                            }
563                            nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false));
564                    }
565                    return null;
566            }
567    
568            
569            private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid,
570                            boolean percentOnly) {
571                    Vector<DependencyNode> output = new Vector<DependencyNode>();
572                    SortedSet<DependencyNode> dependents = new TreeSet<DependencyNode>();
573                    dependents.addAll(governor.getLeftDependents());
574                    dependents.addAll(governor.getRightDependents());
575    
576    
577                    DependencyNode[] deps = new DependencyNode[dependents.size()];
578                    int[] distances = new int[dependents.size()];
579                    int i = 0;
580                    for (DependencyNode dep : dependents) {
581                            distances[i] = Math.abs(dep.getIndex() - avoid.getIndex());
582                            deps[i] = dep;
583                            i++;
584                    }
585                    if (distances.length > 1) {
586                            int smallest;
587                            int n = distances.length;
588                            int tmpDist;
589                            DependencyNode tmpDep;
590                            for (i=0; i < n; i++) {
591                                    smallest = i;
592                                    for (int j=i; j < n; j++) {
593                                            if (distances[j] < distances[smallest]) {
594                                                    smallest = j;
595                                            }
596                                    }
597                                    if (smallest != i) {
598                                            tmpDist = distances[smallest];
599                                            distances[smallest] = distances[i];
600                                            distances[i] = tmpDist;
601                                            tmpDep = deps[smallest];
602                                            deps[smallest] = deps[i];
603                                            deps[i] = tmpDep;
604                                    }
605                            }
606                    }
607                    for (i=0; i<distances.length;i++) {
608                            if (deps[i] != avoid && (!percentOnly || (percentOnly && nodePath.get(deps[i].getIndex())))) {
609                                    output.add(deps[i]);
610                            }
611                    }
612                    return output;
613            }
614            
615            private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode2(DependencyStructure dg, DependencyNode governor, DependencyNode avoid,
616                            boolean percentOnly) {
617                    int i, j;
618                    Vector<DependencyNode> dependents = new Vector<DependencyNode>();
619                    DependencyNode leftChild, rightChild;
620    
621                    i = governor.getLeftDependentCount() - 1;
622                    j = 0;
623                    leftChild = governor.getLeftDependent(i--);
624                    rightChild = governor.getRightDependent(j++);
625    
626                    while (leftChild != null && rightChild != null) {
627                            if (leftChild == avoid) {
628                                    leftChild = governor.getLeftDependent(i--);
629                            } else if (rightChild == avoid) {
630                                    rightChild = governor.getRightDependent(j++);
631                            } else if (Math.abs(leftChild.getIndex() - avoid.getIndex()) < Math.abs(rightChild.getIndex() - avoid.getIndex())) {
632                                    if (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex()))) {
633                                            dependents.add(leftChild);
634                                    }
635                                    leftChild = governor.getLeftDependent(i--);
636                            } else {
637                                    if (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex()))) {
638                                            dependents.add(rightChild);
639                                    }
640                                    rightChild = governor.getRightDependent(j++);
641                            }
642                    }
643                    while (leftChild != null) {
644                            if (leftChild != avoid && (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex())))) {
645                                    dependents.add(leftChild);
646                            }
647                            leftChild = governor.getLeftDependent(i--);
648                    }
649                    while (rightChild != null) {
650                            if (rightChild != avoid && (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex())))) {
651                                    dependents.add(rightChild);
652                            }
653                            rightChild = governor.getRightDependent(j++);
654                    }
655                    return dependents;
656            }
657    
658            private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
659                    boolean success = true, childSuccess = false;
660                    int i, childAttempts = 2;
661                    DependencyNode child, possibleSyntacticHead;
662                    if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) {
663                            possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node);
664                            if (possibleSyntacticHead != null) {
665                                    pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
666                                    nodeLifted.set(node.getIndex(), false);
667                            } else {
668                                    success = false;
669                            }
670                    }
671                    if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) {
672                            possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node);
673                            if (possibleSyntacticHead != null) {
674                                    pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
675                                    nodeLifted.set(node.getIndex(), false);
676                            } else {
677                                    success = false;
678                            }
679                    }
680                    while (!childSuccess && childAttempts > 0) {
681                            childSuccess = true;
682                            Vector<DependencyNode> children = new Vector<DependencyNode>();
683                            i = 0;
684                            while ((child = node.getLeftDependent(i)) != null) {
685                                    children.add(child);
686                                    i++;
687                            }
688                            i = 0;
689                            while ((child = node.getRightDependent(i)) != null) {
690                                    children.add(child);
691                                    i++;
692                            }
693                            for (i = 0; i < children.size(); i++) {
694                                    child = children.get(i);
695                                    if (!deprojectivizeWithPath(pdg, child)) {
696                                            childSuccess = false;
697                                    }
698                            }
699                            childAttempts--;
700                    }
701                    return childSuccess && success;
702            }
703    
704            private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid) {
705                    DependencyNode dependent;
706                    Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes;
707                    nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true));
708                    while (nodes.size() > 0) {
709                            dependent = nodes.remove(0);
710                            if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0) {
711                                    return dependent;
712                            }
713                            nodes.addAll(newNodes);
714                    }
715                    return null;
716            }
717    
718            private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
719                    boolean success = true, childSuccess = false;
720                    int i, childAttempts = 2;
721                    DependencyNode child, possibleSyntacticHead;
722                    if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) {
723                            possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node
724                                            .getIndex()));
725                            if (possibleSyntacticHead != null) {
726                                    pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
727                                    nodeLifted.set(node.getIndex(), false);
728                            } else {
729                                    success = false;
730                            }
731                    }
732                    if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) {
733                            possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node
734                                            .getIndex()));
735                            if (possibleSyntacticHead != null) {
736                                    pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
737                                    nodeLifted.set(node.getIndex(), false);
738                            } else {
739                                    success = false;
740                            }
741                    }
742                    while (!childSuccess && childAttempts > 0) {
743                            childSuccess = true;
744                            Vector<DependencyNode> children = new Vector<DependencyNode>();
745                            i = 0;
746                            while ((child = node.getLeftDependent(i)) != null) {
747                                    children.add(child);
748                                    i++;
749                            }
750                            i = 0;
751                            while ((child = node.getRightDependent(i)) != null) {
752                                    children.add(child);
753                                    i++;
754                            }
755                            for (i = 0; i < children.size(); i++) {
756                                    child = children.get(i);
757                                    if (!deprojectivizeWithHeadAndPath(pdg, child)) {
758                                            childSuccess = false;
759                                    }
760                            }
761                            childAttempts--;
762                    }
763                    return childSuccess && success;
764            }
765    
766            private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode)
767                            throws MaltChainedException {
768                    DependencyNode dependent;
769                    Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes = null, secondChance = new Vector<DependencyNode>();
770                    nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true));
771                    while (nodes.size() > 0) {
772                            dependent = nodes.remove(0);
773                            if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0
774                                            && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) {
775                                    return dependent;
776                            }
777                            nodes.addAll(newNodes);
778                            if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)
779                                            && newNodes.size() != 0) {
780                                    secondChance.add(dependent);
781                            }
782                    }
783                    if (secondChance.size() > 0) {
784                            return secondChance.firstElement();
785                    }
786                    return null;
787            }
788    }