001    package org.maltparser.transform.pseudo;
002    
003    import java.util.Vector;
004    
005    import org.apache.log4j.Logger;
006    import org.maltparser.core.exception.MaltChainedException;
007    import org.maltparser.core.io.dataformat.ColumnDescription;
008    import org.maltparser.core.io.dataformat.DataFormatInstance;
009    import org.maltparser.core.symbol.SymbolTable;
010    import org.maltparser.core.syntaxgraph.DependencyStructure;
011    import org.maltparser.core.syntaxgraph.node.DependencyNode;
012    
013    /**
014     * This class contains methods for projectivizing and deprojectivizing
015     * 
016     * @author Jens Nilsson
017     * @since 1.0
018     */
019    public class PseudoProjectivity {
020            static int id = 0;
021    
022            private enum PseudoProjectiveEncoding {
023                    NONE, BASELINE, HEAD, PATH, HEADPATH, TRACE
024            };
025    
026            private enum CoveredRootAttachment {
027                    NONE, IGNORE, LEFT, RIGHT, HEAD
028            };
029    
030            private enum LiftingOrder {
031                    SHORTEST, DEEPEST
032            };
033    
034            private PseudoProjectiveEncoding markingStrategy;
035            private CoveredRootAttachment rootAttachment;
036            private LiftingOrder liftingOrder;
037            private Logger configLogger;
038    
039            private SymbolTable deprelSymbolTable;
040            private SymbolTable pppathSymbolTable;
041            private SymbolTable ppliftedSymbolTable;
042            private SymbolTable ppcoveredRootSymbolTable;
043            
044            private ColumnDescription deprelColumn;
045            private ColumnDescription pppathColumn;
046            private ColumnDescription ppliftedColumn;
047            private ColumnDescription ppcoveredRootColumn;
048            
049            private Vector<Boolean> nodeLifted;
050            private Vector<Vector<DependencyNode>> nodeTrace;
051            private Vector<DependencyNode> headDeprel;
052            private Vector<Boolean> nodePath;
053            private Vector<Boolean> isCoveredRoot;
054            private Vector<Integer> nodeRelationLength;
055            private Vector<String> synacticHeadDeprel;
056    
057    
058            public PseudoProjectivity() { }
059    
060            public void initialize(String markingStrategyString, String coveredRoot, String liftingOrder, Logger configLogger,
061                            DataFormatInstance dataFormatInstance) throws MaltChainedException {
062                    nodeLifted = new Vector<Boolean>();
063                    nodeTrace = new Vector<Vector<DependencyNode>>();
064                    headDeprel = new Vector<DependencyNode>();
065                    nodePath = new Vector<Boolean>();
066                    isCoveredRoot = new Vector<Boolean>();
067                    nodeRelationLength = new Vector<Integer>();
068                    synacticHeadDeprel = new Vector<String>();
069    
070                    this.configLogger = configLogger;
071                    if (markingStrategyString.equalsIgnoreCase("none")) {
072                            markingStrategy = PseudoProjectiveEncoding.NONE;
073                    } else if (markingStrategyString.equalsIgnoreCase("baseline")) {
074                            markingStrategy = PseudoProjectiveEncoding.BASELINE;
075                    } else if (markingStrategyString.equalsIgnoreCase("head")) {
076                            markingStrategy = PseudoProjectiveEncoding.HEAD;
077                    } else if (markingStrategyString.equalsIgnoreCase("path")) {
078                            markingStrategy = PseudoProjectiveEncoding.PATH;
079                    } else if (markingStrategyString.equalsIgnoreCase("head+path")) {
080                            markingStrategy = PseudoProjectiveEncoding.HEADPATH;
081                    } else if (markingStrategyString.equalsIgnoreCase("trace")) {
082                            markingStrategy = PseudoProjectiveEncoding.TRACE;
083                    }
084                    this.deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL");
085                    this.deprelSymbolTable = deprelColumn.getSymbolTable();
086    //              this.deprelSymbolTable = dataFormatInstance.getSymbolTables().getSymbolTable("DEPREL");
087                    if (markingStrategy == PseudoProjectiveEncoding.HEAD || markingStrategy == PseudoProjectiveEncoding.PATH
088                                    || markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
089                            this.ppliftedColumn = dataFormatInstance.addInternalColumnDescription("PPLIFTED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", deprelColumn.getNullValueStrategy());
090                            this.ppliftedSymbolTable = ppliftedColumn.getSymbolTable();
091    //                      this.ppliftedSymbolTable = dataFormatInstance.getSymbolTables().addSymbolTable("PPLIFTED", deprelSymbolTable);
092                            if (this.markingStrategy == PseudoProjectiveEncoding.PATH) {
093                                    ppliftedSymbolTable.addSymbol("#true#");
094                                    ppliftedSymbolTable.addSymbol("#false#");
095                            } else {
096                                    ppliftedSymbolTable.addSymbol("#false#");
097                            }
098                    }
099    
100                    if (markingStrategy == PseudoProjectiveEncoding.PATH || markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
101                            this.pppathColumn = dataFormatInstance.addInternalColumnDescription("PPPATH", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", deprelColumn.getNullValueStrategy());
102                            this.pppathSymbolTable = pppathColumn.getSymbolTable();
103                            pppathSymbolTable.addSymbol("#true#");
104                            pppathSymbolTable.addSymbol("#false#");
105                    }
106    
107                    if (coveredRoot.equalsIgnoreCase("none")) {
108                            this.rootAttachment = CoveredRootAttachment.NONE;
109                    } else if (coveredRoot.equalsIgnoreCase("ignore")) {
110                            this.rootAttachment = CoveredRootAttachment.IGNORE;
111                    } else if (coveredRoot.equalsIgnoreCase("left")) {
112                            this.rootAttachment = CoveredRootAttachment.LEFT;
113                    } else if (coveredRoot.equalsIgnoreCase("right")) {
114                            this.rootAttachment = CoveredRootAttachment.RIGHT;
115                    } else if (coveredRoot.equalsIgnoreCase("head")) {
116                            this.rootAttachment = CoveredRootAttachment.HEAD;
117                    }
118    
119                    if (this.rootAttachment != CoveredRootAttachment.NONE) {
120                            this.ppcoveredRootColumn = dataFormatInstance.addInternalColumnDescription("PPCOVERED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", deprelColumn.getNullValueStrategy());
121                            this.ppcoveredRootSymbolTable = ppcoveredRootColumn.getSymbolTable();
122                            ppcoveredRootSymbolTable.addSymbol("#true#");
123                            ppcoveredRootSymbolTable.addSymbol("#false#");
124                    }
125                    if (liftingOrder.equalsIgnoreCase("shortest")) {
126                            this.liftingOrder = LiftingOrder.SHORTEST;
127                    } else if (liftingOrder.equalsIgnoreCase("deepest")) {
128                            this.liftingOrder = LiftingOrder.DEEPEST;
129                    }
130            }
131            
132            private void initProjectivization(DependencyStructure pdg) throws MaltChainedException {
133                    nodeLifted.clear();
134                    nodeTrace.clear();
135                    headDeprel.clear();
136                    nodePath.clear();
137                    isCoveredRoot.clear();
138                    nodeRelationLength.clear();
139    
140                    for (int index : pdg.getDependencyIndices()) {
141                            nodeLifted.add(false);
142                            nodeTrace.add(new Vector<DependencyNode>());
143                            headDeprel.add(null);
144                            nodePath.add(false);
145                            isCoveredRoot.add(false);
146                            if (ppliftedSymbolTable != null && index != 0) {
147                                    pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#false#"));
148                            }
149                            if (pppathSymbolTable != null && index != 0) {
150                                    pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#false#"));
151                            }
152                            if (ppcoveredRootSymbolTable != null && index != 0) {
153                                    pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#false#"));
154                            }
155                    }
156                    computeRelationLength(pdg);
157            }
158            
159        public void projectivize(DependencyStructure pdg) throws MaltChainedException {
160            id++;
161            if (!pdg.isTree()) {
162                configLogger.info("\n[Warning: Sentence '" + id + "' cannot projectivize, because the dependency graph is not a tree]\n");
163                return;
164            }
165            DependencyNode deepestNonProjectiveNode;
166            initProjectivization(pdg);
167            if (rootAttachment == CoveredRootAttachment.IGNORE) {
168                    if (markingStrategy != PseudoProjectiveEncoding.NONE) {
169                            while (!pdg.isProjective()) {
170                                    if (liftingOrder == LiftingOrder.DEEPEST) {
171                                            deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg);
172                                    } else {
173                                            deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg);
174                                    }
175                                    if (!attachCoveredRoots(pdg, deepestNonProjectiveNode)) {
176                                            nodeLifted.set(deepestNonProjectiveNode.getIndex(), true);
177                                            setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead());
178                                            setPath(deepestNonProjectiveNode.getHead());
179                                            pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex());
180                                    }
181                            }
182                            deattachCoveredRootsForProjectivization(pdg);
183                    }
184            } else {
185                    if (rootAttachment != CoveredRootAttachment.NONE) {
186                        for (int index : pdg.getTokenIndices()) {
187                            attachCoveredRoots(pdg, pdg.getTokenNode(index));
188                        }
189                    }
190                    if (markingStrategy != PseudoProjectiveEncoding.NONE) {
191                        while (!pdg.isProjective()) {
192                            if (liftingOrder == LiftingOrder.DEEPEST) {
193                                deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg);
194                            } else {
195                                deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg);
196                            }
197                            nodeLifted.set(deepestNonProjectiveNode.getIndex(), true);
198                            setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead());
199                            setPath(deepestNonProjectiveNode.getHead());
200                            pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex());
201                        }
202                    }
203            }
204            // collectTraceStatistics(pdg);
205            assignPseudoProjectiveDeprels(pdg);
206        }
207    
208            public void mergeArclabels(DependencyStructure pdg) throws MaltChainedException {
209                    assignPseudoProjectiveDeprelsForMerge(pdg);
210            }
211    
212            public void splitArclabels(DependencyStructure pdg) throws MaltChainedException {
213                    int pathLabelIndex = -1, movedLabelIndex = -1, coveredArcLabelIndex;
214                    String label;
215                    initDeprojeciviztion(pdg);
216                    for (int index : pdg.getTokenIndices()) {
217                            if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
218                                    label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
219                                    if (label != null && (pathLabelIndex = label.indexOf("%")) != -1) {
220                                            label = label.substring(0, pathLabelIndex);
221                                            setLabel(pdg.getTokenNode(index), label);
222                                            pdg.getTokenNode(index).getHeadEdge().addLabel(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#true#"));
223                                    }
224                                    if (label != null && (movedLabelIndex = label.indexOf("|")) != -1 && label.indexOf("|null") == -1) {
225                                            if (movedLabelIndex + 1 < label.length()) {
226                                                    pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode(label.substring(movedLabelIndex + 1)));
227                                            } else {
228                                                    pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#true#"));
229                                            }
230                                            label = label.substring(0, movedLabelIndex);
231                                            setLabel(pdg.getTokenNode(index), label);
232                                    }
233                            }
234                    }
235                    for (int index : pdg.getTokenIndices()) {
236                            if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
237                                    label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
238                                    if ((coveredArcLabelIndex = label.indexOf("|null")) != -1) {
239                                            label = label.substring(0, coveredArcLabelIndex);
240                                            setLabel(pdg.getTokenNode(index), label);
241                                            pdg.getTokenNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#"));
242                                    }
243                            }
244                    }
245            }
246    
247            private void setHeadDeprel(DependencyNode node, DependencyNode parent) {
248                    if (headDeprel.get(node.getIndex()) == null) {
249                            headDeprel.set(node.getIndex(), parent);
250                    }
251                    nodeTrace.set(node.getIndex(), headDeprel);
252            }
253    
254            private void setPath(DependencyNode node) {
255                    nodePath.set(node.getIndex(), true);
256            }
257    
258            private boolean isCoveredRoot(DependencyNode node) {
259                    return isCoveredRoot.get(node.getIndex());
260            }
261    
262            private void deattachCoveredRootsForProjectivization(DependencyStructure pdg) throws MaltChainedException {
263                    for (int index : pdg.getTokenIndices()) {
264                            if (isCoveredRoot(pdg.getTokenNode(index))) {
265                                    pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getTokenNode(index).getIndex());
266                            }
267                    }
268            }
269    
270            private boolean attachCoveredRoots(DependencyStructure pdg, DependencyNode deepest) throws MaltChainedException {
271                    int i;
272                    boolean foundCoveredRoot = false;
273                    DependencyNode coveredRootHead;
274                    for (i = Math.min(deepest.getIndex(), deepest.getHead().getIndex()) + 1; i < Math.max(deepest.getIndex(), deepest.getHead()
275                                    .getIndex()); i++) {
276                            int leftMostIndex = pdg.getDependencyNode(i).getLeftmostProperDescendantIndex();
277                            if (leftMostIndex == -1) {
278                                    leftMostIndex = i;
279                            }
280                            int rightMostIndex = pdg.getDependencyNode(i).getRightmostProperDescendantIndex();
281                            if (rightMostIndex == -1) {
282                                    rightMostIndex = i;
283                            }
284                            if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot()
285                                            && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < leftMostIndex
286                                            && rightMostIndex < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) {
287                                    if (rootAttachment == CoveredRootAttachment.LEFT) {
288                                            if (deepest.getHead().getIndex() < deepest.getIndex()) {
289                                                    coveredRootHead = deepest.getHead();
290                                            } else {
291                                                    coveredRootHead = deepest;
292                                            }
293                                    } else if (rootAttachment == CoveredRootAttachment.RIGHT) {
294                                            if (deepest.getIndex() < deepest.getHead().getIndex()) {
295                                                    coveredRootHead = deepest.getHead();
296                                            } else {
297                                                    coveredRootHead = deepest;
298                                            }
299                                    } else {
300                                            coveredRootHead = deepest.getHead();
301                                    }
302                                    pdg.moveDependencyEdge(coveredRootHead.getIndex(), pdg.getDependencyNode(i).getIndex());
303                                    setCoveredRoot(pdg.getDependencyNode(i));
304                                    foundCoveredRoot = true;
305                            }
306                    }
307                    return foundCoveredRoot;
308            }
309    
310            private void setCoveredRoot(DependencyNode node) {
311                    isCoveredRoot.set(node.getIndex(), true);
312            }
313    
314            private DependencyNode getDeepestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException {
315                    DependencyNode deepestNonProjectiveNode = null;
316                    for (int index : pdg.getDependencyIndices()) {
317                            if (!pdg.getDependencyNode(index).isProjective()
318                                            && (deepestNonProjectiveNode == null 
319                                            || pdg.getDependencyNode(index).getDependencyNodeDepth() > pdg.getDependencyNode(deepestNonProjectiveNode.getIndex()).getDependencyNodeDepth())) {
320                                    deepestNonProjectiveNode = pdg.getDependencyNode(index);
321                            }
322                    }
323                    
324                    return deepestNonProjectiveNode;
325            }
326    
327            private DependencyNode getShortestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException {
328                    DependencyNode shortestNonProjectiveNode = null;
329                    for (int index : pdg.getDependencyIndices()) {
330                            if (!pdg.getDependencyNode(index).isProjective()
331                                            && (shortestNonProjectiveNode == null
332                                            || nodeRelationLength.get(index) < nodeRelationLength.get(shortestNonProjectiveNode.getIndex()) 
333                                            || (nodeRelationLength.get(index) == nodeRelationLength.get(shortestNonProjectiveNode.getIndex())))) {
334                                    shortestNonProjectiveNode = pdg.getDependencyNode(index);
335                            }
336                    }
337                    return shortestNonProjectiveNode;
338            }
339    
340    
341            private void computeRelationLength(DependencyStructure pdg) throws MaltChainedException {
342                    nodeRelationLength.add(0);
343                    for (int index : pdg.getTokenIndices()) {
344                            nodeRelationLength.add(Math.abs(pdg.getDependencyNode(index).getIndex() - pdg.getDependencyNode(index).getHead().getIndex()));
345                    }
346            }
347    
348            private void assignPseudoProjectiveDeprels(DependencyStructure pdg) throws MaltChainedException {
349                    int newLabelCode;
350                    for (int index : pdg.getTokenIndices()) {
351                            if (!isCoveredRoot(pdg.getDependencyNode(index))) {
352                                    if (this.markingStrategy == PseudoProjectiveEncoding.HEAD || this.markingStrategy == PseudoProjectiveEncoding.PATH
353                                                    || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
354                                            if (this.markingStrategy == PseudoProjectiveEncoding.PATH) {
355                                                    if (nodeLifted.get(index)) {
356                                                            newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#true#");
357                                                    } else {
358                                                            newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#");
359                                                    }
360                                                    pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode);
361                                            } else {
362                                                    if (nodeLifted.get(index)) {
363                                                            newLabelCode = ppliftedSymbolTable.addSymbol(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(
364                                                                            headDeprel.get(index).getIndex()).getHeadEdge().getLabelCode(deprelSymbolTable)));
365                                                    } else {
366                                                            newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#");
367                                                    }
368                                                    pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode);
369                                            }
370                                    }
371    
372                                    if (this.markingStrategy == PseudoProjectiveEncoding.PATH || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
373                                            if (nodePath.get(index)) {
374                                                    newLabelCode = pppathSymbolTable.getSymbolStringToCode("#true#");
375                                            } else {
376                                                    newLabelCode = pppathSymbolTable.getSymbolStringToCode("#false#");
377                                            }
378                                            pdg.getDependencyNode(index).getHeadEdge().addLabel(pppathSymbolTable, newLabelCode);
379                                    }
380    
381                            } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) {
382                                    pdg.getDependencyNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#"));
383                            }
384                    }
385            }
386    
387            private void setLabel(DependencyNode node, String label) throws MaltChainedException {
388                    // node.getLabelCode().clear();
389                    node.getHeadEdge().getLabelSet().put(deprelSymbolTable, deprelSymbolTable.addSymbol(label));
390            }
391    
392            private void assignPseudoProjectiveDeprelsForMerge(DependencyStructure pdg) throws MaltChainedException {
393                    Vector<String> originalDeprel = new Vector<String>();
394                    String newLabel;
395                    originalDeprel.add(null);
396                    for (int index : pdg.getTokenIndices()) {
397                            originalDeprel.add(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)));
398                    }
399                    for (int index : pdg.getTokenIndices()) {
400                            newLabel = null;
401                            if (!isCoveredRoot(pdg.getDependencyNode(index))) {
402                                    if (markingStrategy == PseudoProjectiveEncoding.HEAD) {
403                                            if (nodeLifted.get(index)) {
404                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
405                                                                    + originalDeprel.get(headDeprel.get(index).getIndex());
406                                                    // } else {
407                                                    // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
408                                            }
409                                    } else if (markingStrategy == PseudoProjectiveEncoding.PATH) {
410                                            if (nodeLifted.get(index) && nodePath.get(index)) {
411                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|%";
412                                            } else if (nodeLifted.get(index) && !nodePath.get(index)) {
413                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|";
414                                            } else if (!nodeLifted.get(index) && nodePath.get(index)) {
415                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "%";
416                                            }
417                                    } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
418                                            if (nodeLifted.get(index) && nodePath.get(index)) {
419                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
420                                                                    + originalDeprel.get(headDeprel.get(index).getIndex()) + "%";
421                                            } else if (nodeLifted.get(index) && !nodePath.get(index)) {
422                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
423                                                                    + originalDeprel.get(headDeprel.get(index).getIndex());
424                                            } else if (!nodeLifted.get(index) && nodePath.get(index)) {
425                                                    newLabel = originalDeprel.get(pdg.getDependencyNode(index).getIndex()) + "%";
426                                            }
427                                    } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) {
428                                            if (nodeLifted.get(index)) {
429                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|";
430                                            }
431                                    }
432                            } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) {
433                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|null";
434                            }
435                            if (newLabel != null) {
436                                    setLabel(pdg.getDependencyNode(index), newLabel);
437                            }
438                    }
439            }
440    
441            public void deprojectivize(DependencyStructure pdg) throws MaltChainedException {
442                    initDeprojeciviztion(pdg);
443    
444                    for (int index : pdg.getTokenIndices()) {
445                            if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
446                                    if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(pppathSymbolTable)
447                                                    && pppathSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(pppathSymbolTable)).equals("#true#")) {
448                                            setPath(pdg.getDependencyNode(index));
449                                    }
450                                    if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppliftedSymbolTable)
451                                                    && !ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#false#")) {
452                                            nodeLifted.set(index, true);
453                                            if (!ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#true#")) {
454                                                    synacticHeadDeprel.set(index, ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge()
455                                                                    .getLabelCode(ppliftedSymbolTable)));
456                                            }
457                                    }
458                            }
459                    }
460                    deattachCoveredRootsForDeprojectivization(pdg);
461                    if (markingStrategy == PseudoProjectiveEncoding.HEAD && needsDeprojectivizeWithHead(pdg)) {
462                            deprojectivizeWithHead(pdg, pdg.getDependencyRoot());
463                    } else if (markingStrategy == PseudoProjectiveEncoding.PATH) {
464                            deprojectivizeWithPath(pdg, pdg.getDependencyRoot());
465                    } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
466                            deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot());
467                    }
468            }
469    
470            private void initDeprojeciviztion(DependencyStructure pdg) {
471                    nodeLifted.clear();
472                    nodePath.clear();
473                    synacticHeadDeprel.clear();
474                    for (int index : pdg.getDependencyIndices()) {
475                            nodeLifted.add(false);
476                            nodePath.add(false);
477                            synacticHeadDeprel.add(null);
478                    }
479            }
480    
481            private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg) throws MaltChainedException {
482                    for (int index : pdg.getTokenIndices()) {
483                            if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
484                                    if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppcoveredRootSymbolTable)
485                                                    && ppcoveredRootSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppcoveredRootSymbolTable)).equals(
486                                                                    "#true#")) {
487                                            pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex());
488                                    }
489                            }
490                    }
491            }
492    
493            // Check whether there is at least one node in the specified dependency structure that can be lifted.
494            // If this is not the case, there is no need to call deprojectivizeWithHead.
495    
496            private boolean needsDeprojectivizeWithHead(DependencyStructure pdg) throws MaltChainedException {
497                    for (int index : pdg.getDependencyIndices()) {
498                            if (nodeLifted.get(index)) {
499                                    DependencyNode node = pdg.getDependencyNode(index);
500                                    if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel.get(index)) != null) {
501                                            return true;
502                                    }
503                        }
504                    }
505                    return false;
506            }
507    
508            private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
509                    boolean success = true, childSuccess = false;
510                    int i, childAttempts = 2;
511                    DependencyNode child, possibleSyntacticHead;
512                    String syntacticHeadDeprel;
513                    if (nodeLifted.get(node.getIndex())) {
514                            syntacticHeadDeprel = synacticHeadDeprel.get(node.getIndex());
515                            possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel);
516                            if (possibleSyntacticHead != null) {
517                                    pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
518                                    nodeLifted.set(node.getIndex(), false);
519                            } else {
520                                    success = false;
521                            }
522                    }
523                    while (!childSuccess && childAttempts > 0) {
524                            childSuccess = true;
525                            Vector<DependencyNode> children = new Vector<DependencyNode>();
526                            i = 0;
527                            while ((child = node.getLeftDependent(i)) != null) {
528                                    children.add(child);
529                                    i++;
530                            }
531                            i = 0;
532                            while ((child = node.getRightDependent(i)) != null) {
533                                    children.add(child);
534                                    i++;
535                            }
536                            for (i = 0; i < children.size(); i++) {
537                                    child = children.get(i);
538                                    if (!deprojectivizeWithHead(pdg, child)) {
539                                            childSuccess = false;
540                                    }
541                            }
542                            childAttempts--;
543                    }
544                    return childSuccess && success;
545            }
546    
547            private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel)
548                            throws MaltChainedException {
549                    DependencyNode dependent;
550                    String dependentDeprel;
551                    Vector<DependencyNode> nodes = new Vector<DependencyNode>();
552                    nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false));
553                    while (nodes.size() > 0) {
554                            dependent = nodes.remove(0);
555                            if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) {
556                                    dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable));
557                                    if (dependentDeprel.equals(syntacticHeadDeprel)) {
558                                            return dependent;
559                                    }
560                            }
561                            nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false));
562                    }
563                    return null;
564            }
565    
566            private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid,
567                            boolean percentOnly) {
568                    int i, j;
569                    Vector<DependencyNode> dependents = new Vector<DependencyNode>();
570                    DependencyNode leftChild, rightChild;
571    
572                    i = governor.getLeftDependentCount() - 1;
573                    j = 0;
574                    leftChild = governor.getLeftDependent(i--);
575                    rightChild = governor.getRightDependent(j++);
576    
577                    while (leftChild != null && rightChild != null) {
578                            if (leftChild == avoid) {
579                                    leftChild = governor.getLeftDependent(i--);
580                            } else if (rightChild == avoid) {
581                                    rightChild = governor.getRightDependent(j++);
582                            } else if (Math.abs(leftChild.getIndex() - avoid.getIndex()) < Math.abs(rightChild.getIndex() - avoid.getIndex())) {
583                                    if (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex()))) {
584                                            dependents.add(leftChild);
585                                    }
586                                    leftChild = governor.getLeftDependent(i--);
587                            } else {
588                                    if (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex()))) {
589                                            dependents.add(rightChild);
590                                    }
591                                    rightChild = governor.getRightDependent(j++);
592                            }
593                    }
594                    while (leftChild != null) {
595                            if (leftChild != avoid && (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex())))) {
596                                    dependents.add(leftChild);
597                            }
598                            leftChild = governor.getLeftDependent(i--);
599                    }
600                    while (rightChild != null) {
601                            if (rightChild != avoid && (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex())))) {
602                                    dependents.add(rightChild);
603                            }
604                            rightChild = governor.getRightDependent(j++);
605                    }
606                    return dependents;
607            }
608    
609            private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
610                    boolean success = true, childSuccess = false;
611                    int i, childAttempts = 2;
612                    DependencyNode child, possibleSyntacticHead;
613                    if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) {
614                            possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node);
615                            if (possibleSyntacticHead != null) {
616                                    pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
617                                    nodeLifted.set(node.getIndex(), false);
618                            } else {
619                                    success = false;
620                            }
621                    }
622                    if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) {
623                            possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node);
624                            if (possibleSyntacticHead != null) {
625                                    pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
626                                    nodeLifted.set(node.getIndex(), false);
627                            } else {
628                                    success = false;
629                            }
630                    }
631                    while (!childSuccess && childAttempts > 0) {
632                            childSuccess = true;
633                            Vector<DependencyNode> children = new Vector<DependencyNode>();
634                            i = 0;
635                            while ((child = node.getLeftDependent(i)) != null) {
636                                    children.add(child);
637                                    i++;
638                            }
639                            i = 0;
640                            while ((child = node.getRightDependent(i)) != null) {
641                                    children.add(child);
642                                    i++;
643                            }
644                            for (i = 0; i < children.size(); i++) {
645                                    child = children.get(i);
646                                    if (!deprojectivizeWithPath(pdg, child)) {
647                                            childSuccess = false;
648                                    }
649                            }
650                            childAttempts--;
651                    }
652                    return childSuccess && success;
653            }
654    
655            private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid) {
656                    DependencyNode dependent;
657                    Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes;
658                    nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true));
659                    while (nodes.size() > 0) {
660                            dependent = nodes.remove(0);
661                            if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0) {
662                                    return dependent;
663                            }
664                            nodes.addAll(newNodes);
665                    }
666                    return null;
667            }
668    
669            private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
670                    boolean success = true, childSuccess = false;
671                    int i, childAttempts = 2;
672                    DependencyNode child, possibleSyntacticHead;
673                    if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) {
674                            possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node
675                                            .getIndex()));
676                            if (possibleSyntacticHead != null) {
677                                    pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
678                                    nodeLifted.set(node.getIndex(), false);
679                            } else {
680                                    success = false;
681                            }
682                    }
683                    if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) {
684                            possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node
685                                            .getIndex()));
686                            if (possibleSyntacticHead != null) {
687                                    pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
688                                    nodeLifted.set(node.getIndex(), false);
689                            } else {
690                                    success = false;
691                            }
692                    }
693                    while (!childSuccess && childAttempts > 0) {
694                            childSuccess = true;
695                            Vector<DependencyNode> children = new Vector<DependencyNode>();
696                            i = 0;
697                            while ((child = node.getLeftDependent(i)) != null) {
698                                    children.add(child);
699                                    i++;
700                            }
701                            i = 0;
702                            while ((child = node.getRightDependent(i)) != null) {
703                                    children.add(child);
704                                    i++;
705                            }
706                            for (i = 0; i < children.size(); i++) {
707                                    child = children.get(i);
708                                    if (!deprojectivizeWithHeadAndPath(pdg, child)) {
709                                            childSuccess = false;
710                                    }
711                            }
712                            childAttempts--;
713                    }
714                    return childSuccess && success;
715            }
716    
717            private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode)
718                            throws MaltChainedException {
719                    DependencyNode dependent;
720                    Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes = null, secondChance = new Vector<DependencyNode>();
721                    nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true));
722                    while (nodes.size() > 0) {
723                            dependent = nodes.remove(0);
724                            if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0
725                                            && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) {
726                                    return dependent;
727                            }
728                            nodes.addAll(newNodes);
729                            if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)
730                                            && newNodes.size() != 0) {
731                                    secondChance.add(dependent);
732                            }
733                    }
734                    if (secondChance.size() > 0) {
735                            return secondChance.firstElement();
736                    }
737                    return null;
738            }
739    }