001    package org.maltparser.transform.pseudo;
002    
003    import java.util.Vector;
004    
005    import org.apache.log4j.Logger;
006    import org.maltparser.core.exception.MaltChainedException;
007    import org.maltparser.core.io.dataformat.ColumnDescription;
008    import org.maltparser.core.io.dataformat.DataFormatInstance;
009    import org.maltparser.core.symbol.SymbolTable;
010    import org.maltparser.core.syntaxgraph.DependencyStructure;
011    import org.maltparser.core.syntaxgraph.node.DependencyNode;
012    
013    /**
014     * This class contains methods for projectivizing and deprojectivizing
015     * 
016     * @author Jens Nilsson
017     */
018    public class PseudoProjectivity {
019            static int id = 0;
020    
021            private enum PseudoProjectiveEncoding {
022                    NONE, BASELINE, HEAD, PATH, HEADPATH, TRACE
023            };
024    
025            private enum CoveredRootAttachment {
026                    NONE, IGNORE, LEFT, RIGHT, HEAD
027            };
028    
029            private enum LiftingOrder {
030                    SHORTEST, DEEPEST
031            };
032    
033            private PseudoProjectiveEncoding markingStrategy;
034            private CoveredRootAttachment rootAttachment;
035            private LiftingOrder liftingOrder;
036            private Logger configLogger;
037    
038            private SymbolTable deprelSymbolTable;
039            private SymbolTable pppathSymbolTable;
040            private SymbolTable ppliftedSymbolTable;
041            private SymbolTable ppcoveredRootSymbolTable;
042            
043            private ColumnDescription deprelColumn;
044            private ColumnDescription pppathColumn;
045            private ColumnDescription ppliftedColumn;
046            private ColumnDescription ppcoveredRootColumn;
047            
048            private Vector<Boolean> nodeLifted;
049            private Vector<Vector<DependencyNode>> nodeTrace;
050            private Vector<DependencyNode> headDeprel;
051            private Vector<Boolean> nodePath;
052            private Vector<Boolean> isCoveredRoot;
053            private Vector<Integer> nodeRelationLength;
054            private Vector<String> synacticHeadDeprel;
055    
056    
057            public PseudoProjectivity() { }
058    
059            public void initialize(String markingStrategyString, String coveredRoot, String liftingOrder, Logger configLogger,
060                            DataFormatInstance dataFormatInstance) throws MaltChainedException {
061                    nodeLifted = new Vector<Boolean>();
062                    nodeTrace = new Vector<Vector<DependencyNode>>();
063                    headDeprel = new Vector<DependencyNode>();
064                    nodePath = new Vector<Boolean>();
065                    isCoveredRoot = new Vector<Boolean>();
066                    nodeRelationLength = new Vector<Integer>();
067                    synacticHeadDeprel = new Vector<String>();
068    
069                    this.configLogger = configLogger;
070                    if (markingStrategyString.equalsIgnoreCase("none")) {
071                            markingStrategy = PseudoProjectiveEncoding.NONE;
072                    } else if (markingStrategyString.equalsIgnoreCase("baseline")) {
073                            markingStrategy = PseudoProjectiveEncoding.BASELINE;
074                    } else if (markingStrategyString.equalsIgnoreCase("head")) {
075                            markingStrategy = PseudoProjectiveEncoding.HEAD;
076                    } else if (markingStrategyString.equalsIgnoreCase("path")) {
077                            markingStrategy = PseudoProjectiveEncoding.PATH;
078                    } else if (markingStrategyString.equalsIgnoreCase("head+path")) {
079                            markingStrategy = PseudoProjectiveEncoding.HEADPATH;
080                    } else if (markingStrategyString.equalsIgnoreCase("trace")) {
081                            markingStrategy = PseudoProjectiveEncoding.TRACE;
082                    }
083                    this.deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL");
084                    this.deprelSymbolTable = deprelColumn.getSymbolTable();
085    //              this.deprelSymbolTable = dataFormatInstance.getSymbolTables().getSymbolTable("DEPREL");
086                    if (markingStrategy == PseudoProjectiveEncoding.HEAD || markingStrategy == PseudoProjectiveEncoding.PATH
087                                    || markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
088                            this.ppliftedColumn = dataFormatInstance.addInternalColumnDescription("PPLIFTED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy());
089                            this.ppliftedSymbolTable = ppliftedColumn.getSymbolTable();
090    //                      this.ppliftedSymbolTable = dataFormatInstance.getSymbolTables().addSymbolTable("PPLIFTED", deprelSymbolTable);
091                            if (this.markingStrategy == PseudoProjectiveEncoding.PATH) {
092                                    ppliftedSymbolTable.addSymbol("#true#");
093                                    ppliftedSymbolTable.addSymbol("#false#");
094                            } else {
095                                    ppliftedSymbolTable.addSymbol("#false#");
096                            }
097                    }
098    
099                    if (markingStrategy == PseudoProjectiveEncoding.PATH || markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
100                            this.pppathColumn = dataFormatInstance.addInternalColumnDescription("PPPATH", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy());
101                            this.pppathSymbolTable = pppathColumn.getSymbolTable();
102                            pppathSymbolTable.addSymbol("#true#");
103                            pppathSymbolTable.addSymbol("#false#");
104                    }
105    
106                    if (coveredRoot.equalsIgnoreCase("none")) {
107                            this.rootAttachment = CoveredRootAttachment.NONE;
108                    } else if (coveredRoot.equalsIgnoreCase("ignore")) {
109                            this.rootAttachment = CoveredRootAttachment.IGNORE;
110                    } else if (coveredRoot.equalsIgnoreCase("left")) {
111                            this.rootAttachment = CoveredRootAttachment.LEFT;
112                    } else if (coveredRoot.equalsIgnoreCase("right")) {
113                            this.rootAttachment = CoveredRootAttachment.RIGHT;
114                    } else if (coveredRoot.equalsIgnoreCase("head")) {
115                            this.rootAttachment = CoveredRootAttachment.HEAD;
116                    }
117    
118                    if (this.rootAttachment != CoveredRootAttachment.NONE) {
119                            this.ppcoveredRootColumn = dataFormatInstance.addInternalColumnDescription("PPCOVERED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy());
120                            this.ppcoveredRootSymbolTable = ppcoveredRootColumn.getSymbolTable();
121                            ppcoveredRootSymbolTable.addSymbol("#true#");
122                            ppcoveredRootSymbolTable.addSymbol("#false#");
123                    }
124                    if (liftingOrder.equalsIgnoreCase("shortest")) {
125                            this.liftingOrder = LiftingOrder.SHORTEST;
126                    } else if (liftingOrder.equalsIgnoreCase("deepest")) {
127                            this.liftingOrder = LiftingOrder.DEEPEST;
128                    }
129            }
130            
131            private void initProjectivization(DependencyStructure pdg) throws MaltChainedException {
132                    nodeLifted.clear();
133                    nodeTrace.clear();
134                    headDeprel.clear();
135                    nodePath.clear();
136                    isCoveredRoot.clear();
137                    nodeRelationLength.clear();
138    
139                    for (int index : pdg.getDependencyIndices()) {
140                            nodeLifted.add(false);
141                            nodeTrace.add(new Vector<DependencyNode>());
142                            headDeprel.add(null);
143                            nodePath.add(false);
144                            isCoveredRoot.add(false);
145                            if (ppliftedSymbolTable != null && index != 0) {
146                                    pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#false#"));
147                            }
148                            if (pppathSymbolTable != null && index != 0) {
149                                    pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#false#"));
150                            }
151                            if (ppcoveredRootSymbolTable != null && index != 0) {
152                                    pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#false#"));
153                            }
154                    }
155                    computeRelationLength(pdg);
156            }
157            
158        public void projectivize(DependencyStructure pdg) throws MaltChainedException {
159            id++;
160            if (!pdg.isTree()) {
161                configLogger.info("\n[Warning: Sentence '" + id + "' cannot projectivize, because the dependency graph is not a tree]\n");
162                return;
163            }
164            DependencyNode deepestNonProjectiveNode;
165            initProjectivization(pdg);
166            if (rootAttachment == CoveredRootAttachment.IGNORE) {
167                    if (markingStrategy != PseudoProjectiveEncoding.NONE) {
168                            while (!pdg.isProjective()) {
169                                    if (liftingOrder == LiftingOrder.DEEPEST) {
170                                            deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg);
171                                    } else {
172                                            deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg);
173                                    }
174                                    if (!attachCoveredRoots(pdg, deepestNonProjectiveNode)) {
175                                            nodeLifted.set(deepestNonProjectiveNode.getIndex(), true);
176                                            setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead());
177                                            setPath(deepestNonProjectiveNode.getHead());
178                                            pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex());
179                                    }
180                            }
181                            deattachCoveredRootsForProjectivization(pdg);
182                    }
183            } else {
184                    if (rootAttachment != CoveredRootAttachment.NONE) {
185                        for (int index : pdg.getTokenIndices()) {
186                            attachCoveredRoots(pdg, pdg.getTokenNode(index));
187                        }
188                    }
189                    if (markingStrategy != PseudoProjectiveEncoding.NONE) {
190                        while (!pdg.isProjective()) {
191                            if (liftingOrder == LiftingOrder.DEEPEST) {
192                                deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg);
193                            } else {
194                                deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg);
195                            }
196                            nodeLifted.set(deepestNonProjectiveNode.getIndex(), true);
197                            setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead());
198                            setPath(deepestNonProjectiveNode.getHead());
199                            pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex());
200                        }
201                    }
202            }
203            // collectTraceStatistics(pdg);
204            assignPseudoProjectiveDeprels(pdg);
205        }
206    
207            public void mergeArclabels(DependencyStructure pdg) throws MaltChainedException {
208                    assignPseudoProjectiveDeprelsForMerge(pdg);
209            }
210    
211            public void splitArclabels(DependencyStructure pdg) throws MaltChainedException {
212                    int pathLabelIndex = -1, movedLabelIndex = -1, coveredArcLabelIndex;
213                    String label;
214                    initDeprojeciviztion(pdg);
215                    for (int index : pdg.getTokenIndices()) {
216                            if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
217                                    label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
218                                    if (label != null && (pathLabelIndex = label.indexOf("%")) != -1) {
219                                            label = label.substring(0, pathLabelIndex);
220                                            setLabel(pdg.getTokenNode(index), label);
221                                            pdg.getTokenNode(index).getHeadEdge().addLabel(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#true#"));
222                                    }
223                                    if (label != null && (movedLabelIndex = label.indexOf("|")) != -1 && label.indexOf("|null") == -1) {
224                                            if (movedLabelIndex + 1 < label.length()) {
225                                                    pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode(label.substring(movedLabelIndex + 1)));
226                                            } else {
227                                                    pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#true#"));
228                                            }
229                                            label = label.substring(0, movedLabelIndex);
230                                            setLabel(pdg.getTokenNode(index), label);
231                                    }
232                            }
233                    }
234                    for (int index : pdg.getTokenIndices()) {
235                            if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
236                                    label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
237                                    if ((coveredArcLabelIndex = label.indexOf("|null")) != -1) {
238                                            label = label.substring(0, coveredArcLabelIndex);
239                                            setLabel(pdg.getTokenNode(index), label);
240                                            pdg.getTokenNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#"));
241                                    }
242                            }
243                    }
244            }
245    
246            private void setHeadDeprel(DependencyNode node, DependencyNode parent) {
247                    if (headDeprel.get(node.getIndex()) == null) {
248                            headDeprel.set(node.getIndex(), parent);
249                    }
250                    nodeTrace.set(node.getIndex(), headDeprel);
251            }
252    
253            private void setPath(DependencyNode node) {
254                    nodePath.set(node.getIndex(), true);
255            }
256    
257            private boolean isCoveredRoot(DependencyNode node) {
258                    return isCoveredRoot.get(node.getIndex());
259            }
260    
261            private void deattachCoveredRootsForProjectivization(DependencyStructure pdg) throws MaltChainedException {
262                    for (int index : pdg.getTokenIndices()) {
263                            if (isCoveredRoot(pdg.getTokenNode(index))) {
264                                    pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getTokenNode(index).getIndex());
265                            }
266                    }
267            }
268    
269            private boolean attachCoveredRoots(DependencyStructure pdg, DependencyNode deepest) throws MaltChainedException {
270                    int i;
271                    boolean foundCoveredRoot = false;
272                    DependencyNode coveredRootHead;
273                    for (i = Math.min(deepest.getIndex(), deepest.getHead().getIndex()) + 1; i < Math.max(deepest.getIndex(), deepest.getHead()
274                                    .getIndex()); i++) {
275                            int leftMostIndex = pdg.getDependencyNode(i).getLeftmostProperDescendantIndex();
276                            if (leftMostIndex == -1) {
277                                    leftMostIndex = i;
278                            }
279                            int rightMostIndex = pdg.getDependencyNode(i).getRightmostProperDescendantIndex();
280                            if (rightMostIndex == -1) {
281                                    rightMostIndex = i;
282                            }
283                            if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot()
284                                            && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < leftMostIndex
285                                            && rightMostIndex < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) {
286                                    if (rootAttachment == CoveredRootAttachment.LEFT) {
287                                            if (deepest.getHead().getIndex() < deepest.getIndex()) {
288                                                    coveredRootHead = deepest.getHead();
289                                            } else {
290                                                    coveredRootHead = deepest;
291                                            }
292                                    } else if (rootAttachment == CoveredRootAttachment.RIGHT) {
293                                            if (deepest.getIndex() < deepest.getHead().getIndex()) {
294                                                    coveredRootHead = deepest.getHead();
295                                            } else {
296                                                    coveredRootHead = deepest;
297                                            }
298                                    } else {
299                                            coveredRootHead = deepest.getHead();
300                                    }
301                                    pdg.moveDependencyEdge(coveredRootHead.getIndex(), pdg.getDependencyNode(i).getIndex());
302                                    setCoveredRoot(pdg.getDependencyNode(i));
303                                    foundCoveredRoot = true;
304                            }
305                    }
306                    return foundCoveredRoot;
307            }
308    
309            private void setCoveredRoot(DependencyNode node) {
310                    isCoveredRoot.set(node.getIndex(), true);
311            }
312    
313            private DependencyNode getDeepestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException {
314                    DependencyNode deepestNonProjectiveNode = null;
315                    for (int index : pdg.getDependencyIndices()) {
316                            if (!pdg.getDependencyNode(index).isProjective()
317                                            && (deepestNonProjectiveNode == null 
318                                            || pdg.getDependencyNode(index).getDependencyNodeDepth() > pdg.getDependencyNode(deepestNonProjectiveNode.getIndex()).getDependencyNodeDepth())) {
319                                    deepestNonProjectiveNode = pdg.getDependencyNode(index);
320                            }
321                    }
322                    
323                    return deepestNonProjectiveNode;
324            }
325    
326            private DependencyNode getShortestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException {
327                    DependencyNode shortestNonProjectiveNode = null;
328                    for (int index : pdg.getDependencyIndices()) {
329                            if (!pdg.getDependencyNode(index).isProjective()
330                                            && (shortestNonProjectiveNode == null
331                                            || nodeRelationLength.get(index) < nodeRelationLength.get(shortestNonProjectiveNode.getIndex()) 
332                                            || (nodeRelationLength.get(index) == nodeRelationLength.get(shortestNonProjectiveNode.getIndex())))) {
333                                    shortestNonProjectiveNode = pdg.getDependencyNode(index);
334                            }
335                    }
336                    return shortestNonProjectiveNode;
337            }
338    
339    
340            private void computeRelationLength(DependencyStructure pdg) throws MaltChainedException {
341                    nodeRelationLength.add(0);
342                    for (int index : pdg.getTokenIndices()) {
343                            nodeRelationLength.add(Math.abs(pdg.getDependencyNode(index).getIndex() - pdg.getDependencyNode(index).getHead().getIndex()));
344                    }
345            }
346    
347            private void assignPseudoProjectiveDeprels(DependencyStructure pdg) throws MaltChainedException {
348                    int newLabelCode;
349                    for (int index : pdg.getTokenIndices()) {
350                            if (!isCoveredRoot(pdg.getDependencyNode(index))) {
351                                    if (this.markingStrategy == PseudoProjectiveEncoding.HEAD || this.markingStrategy == PseudoProjectiveEncoding.PATH
352                                                    || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
353                                            if (this.markingStrategy == PseudoProjectiveEncoding.PATH) {
354                                                    if (nodeLifted.get(index)) {
355                                                            newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#true#");
356                                                    } else {
357                                                            newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#");
358                                                    }
359                                                    pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode);
360                                            } else {
361                                                    if (nodeLifted.get(index)) {
362                                                            newLabelCode = ppliftedSymbolTable.addSymbol(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(
363                                                                            headDeprel.get(index).getIndex()).getHeadEdge().getLabelCode(deprelSymbolTable)));
364                                                    } else {
365                                                            newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#");
366                                                    }
367                                                    pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode);
368                                            }
369                                    }
370    
371                                    if (this.markingStrategy == PseudoProjectiveEncoding.PATH || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
372                                            if (nodePath.get(index)) {
373                                                    newLabelCode = pppathSymbolTable.getSymbolStringToCode("#true#");
374                                            } else {
375                                                    newLabelCode = pppathSymbolTable.getSymbolStringToCode("#false#");
376                                            }
377                                            pdg.getDependencyNode(index).getHeadEdge().addLabel(pppathSymbolTable, newLabelCode);
378                                    }
379    
380                            } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) {
381                                    pdg.getDependencyNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#"));
382                            }
383                    }
384            }
385    
386            private void setLabel(DependencyNode node, String label) throws MaltChainedException {
387                    // node.getLabelCode().clear();
388                    node.getHeadEdge().getLabelSet().put(deprelSymbolTable, deprelSymbolTable.addSymbol(label));
389            }
390    
391            private void assignPseudoProjectiveDeprelsForMerge(DependencyStructure pdg) throws MaltChainedException {
392                    Vector<String> originalDeprel = new Vector<String>();
393                    String newLabel;
394                    originalDeprel.add(null);
395                    for (int index : pdg.getTokenIndices()) {
396                            originalDeprel.add(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)));
397                    }
398                    for (int index : pdg.getTokenIndices()) {
399                            newLabel = null;
400                            if (!isCoveredRoot(pdg.getDependencyNode(index))) {
401                                    if (markingStrategy == PseudoProjectiveEncoding.HEAD) {
402                                            if (nodeLifted.get(index)) {
403                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
404                                                                    + originalDeprel.get(headDeprel.get(index).getIndex());
405                                                    // } else {
406                                                    // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
407                                            }
408                                    } else if (markingStrategy == PseudoProjectiveEncoding.PATH) {
409                                            if (nodeLifted.get(index) && nodePath.get(index)) {
410                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|%";
411                                            } else if (nodeLifted.get(index) && !nodePath.get(index)) {
412                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|";
413                                            } else if (!nodeLifted.get(index) && nodePath.get(index)) {
414                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "%";
415                                            }
416                                    } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
417                                            if (nodeLifted.get(index) && nodePath.get(index)) {
418                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
419                                                                    + originalDeprel.get(headDeprel.get(index).getIndex()) + "%";
420                                            } else if (nodeLifted.get(index) && !nodePath.get(index)) {
421                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
422                                                                    + originalDeprel.get(headDeprel.get(index).getIndex());
423                                            } else if (!nodeLifted.get(index) && nodePath.get(index)) {
424                                                    newLabel = originalDeprel.get(pdg.getDependencyNode(index).getIndex()) + "%";
425                                            }
426                                    } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) {
427                                            if (nodeLifted.get(index)) {
428                                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|";
429                                            }
430                                    }
431                            } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) {
432                                    newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|null";
433                            }
434                            if (newLabel != null) {
435                                    setLabel(pdg.getDependencyNode(index), newLabel);
436                            }
437                    }
438            }
439    
440            public void deprojectivize(DependencyStructure pdg) throws MaltChainedException {
441                    initDeprojeciviztion(pdg);
442    
443                    for (int index : pdg.getTokenIndices()) {
444                            if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
445                                    if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(pppathSymbolTable)
446                                                    && pppathSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(pppathSymbolTable)).equals("#true#")) {
447                                            setPath(pdg.getDependencyNode(index));
448                                    }
449                                    if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppliftedSymbolTable)
450                                                    && !ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#false#")) {
451                                            nodeLifted.set(index, true);
452                                            if (!ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#true#")) {
453                                                    synacticHeadDeprel.set(index, ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge()
454                                                                    .getLabelCode(ppliftedSymbolTable)));
455                                            }
456                                    }
457                            }
458                    }
459                    deattachCoveredRootsForDeprojectivization(pdg);
460                    if (markingStrategy == PseudoProjectiveEncoding.HEAD && needsDeprojectivizeWithHead(pdg)) {
461                            deprojectivizeWithHead(pdg, pdg.getDependencyRoot());
462                    } else if (markingStrategy == PseudoProjectiveEncoding.PATH) {
463                            deprojectivizeWithPath(pdg, pdg.getDependencyRoot());
464                    } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
465                            deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot());
466                    }
467            }
468    
469            private void initDeprojeciviztion(DependencyStructure pdg) {
470                    nodeLifted.clear();
471                    nodePath.clear();
472                    synacticHeadDeprel.clear();
473                    for (int index : pdg.getDependencyIndices()) {
474                            nodeLifted.add(false);
475                            nodePath.add(false);
476                            synacticHeadDeprel.add(null);
477                    }
478            }
479    
480            private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg) throws MaltChainedException {
481                    for (int index : pdg.getTokenIndices()) {
482                            if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
483                                    if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppcoveredRootSymbolTable)
484                                                    && ppcoveredRootSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppcoveredRootSymbolTable)).equals(
485                                                                    "#true#")) {
486                                            pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex());
487                                    }
488                            }
489                    }
490            }
491    
492            // Check whether there is at least one node in the specified dependency structure that can be lifted.
493            // If this is not the case, there is no need to call deprojectivizeWithHead.
494    
495            private boolean needsDeprojectivizeWithHead(DependencyStructure pdg) throws MaltChainedException {
496                    for (int index : pdg.getDependencyIndices()) {
497                            if (nodeLifted.get(index)) {
498                                    DependencyNode node = pdg.getDependencyNode(index);
499                                    if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel.get(index)) != null) {
500                                            return true;
501                                    }
502                        }
503                    }
504                    return false;
505            }
506    
507            private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
508                    boolean success = true, childSuccess = false;
509                    int i, childAttempts = 2;
510                    DependencyNode child, possibleSyntacticHead;
511                    String syntacticHeadDeprel;
512                    if (nodeLifted.get(node.getIndex())) {
513                            syntacticHeadDeprel = synacticHeadDeprel.get(node.getIndex());
514                            possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel);
515                            if (possibleSyntacticHead != null) {
516                                    pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
517                                    nodeLifted.set(node.getIndex(), false);
518                            } else {
519                                    success = false;
520                            }
521                    }
522                    while (!childSuccess && childAttempts > 0) {
523                            childSuccess = true;
524                            Vector<DependencyNode> children = new Vector<DependencyNode>();
525                            i = 0;
526                            while ((child = node.getLeftDependent(i)) != null) {
527                                    children.add(child);
528                                    i++;
529                            }
530                            i = 0;
531                            while ((child = node.getRightDependent(i)) != null) {
532                                    children.add(child);
533                                    i++;
534                            }
535                            for (i = 0; i < children.size(); i++) {
536                                    child = children.get(i);
537                                    if (!deprojectivizeWithHead(pdg, child)) {
538                                            childSuccess = false;
539                                    }
540                            }
541                            childAttempts--;
542                    }
543                    return childSuccess && success;
544            }
545    
546            private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel)
547                            throws MaltChainedException {
548                    DependencyNode dependent;
549                    String dependentDeprel;
550                    Vector<DependencyNode> nodes = new Vector<DependencyNode>();
551                    nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false));
552                    while (nodes.size() > 0) {
553                            dependent = nodes.remove(0);
554                            if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) {
555                                    dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable));
556                                    if (dependentDeprel.equals(syntacticHeadDeprel)) {
557                                            return dependent;
558                                    }
559                            }
560                            nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false));
561                    }
562                    return null;
563            }
564    
565            private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid,
566                            boolean percentOnly) {
567                    int i, j;
568                    Vector<DependencyNode> dependents = new Vector<DependencyNode>();
569                    DependencyNode leftChild, rightChild;
570    
571                    i = governor.getLeftDependentCount() - 1;
572                    j = 0;
573                    leftChild = governor.getLeftDependent(i--);
574                    rightChild = governor.getRightDependent(j++);
575    
576                    while (leftChild != null && rightChild != null) {
577                            if (leftChild == avoid) {
578                                    leftChild = governor.getLeftDependent(i--);
579                            } else if (rightChild == avoid) {
580                                    rightChild = governor.getRightDependent(j++);
581                            } else if (Math.abs(leftChild.getIndex() - avoid.getIndex()) < Math.abs(rightChild.getIndex() - avoid.getIndex())) {
582                                    if (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex()))) {
583                                            dependents.add(leftChild);
584                                    }
585                                    leftChild = governor.getLeftDependent(i--);
586                            } else {
587                                    if (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex()))) {
588                                            dependents.add(rightChild);
589                                    }
590                                    rightChild = governor.getRightDependent(j++);
591                            }
592                    }
593                    while (leftChild != null) {
594                            if (leftChild != avoid && (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex())))) {
595                                    dependents.add(leftChild);
596                            }
597                            leftChild = governor.getLeftDependent(i--);
598                    }
599                    while (rightChild != null) {
600                            if (rightChild != avoid && (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex())))) {
601                                    dependents.add(rightChild);
602                            }
603                            rightChild = governor.getRightDependent(j++);
604                    }
605                    return dependents;
606            }
607    
608            private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
609                    boolean success = true, childSuccess = false;
610                    int i, childAttempts = 2;
611                    DependencyNode child, possibleSyntacticHead;
612                    if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) {
613                            possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node);
614                            if (possibleSyntacticHead != null) {
615                                    pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
616                                    nodeLifted.set(node.getIndex(), false);
617                            } else {
618                                    success = false;
619                            }
620                    }
621                    if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) {
622                            possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node);
623                            if (possibleSyntacticHead != null) {
624                                    pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
625                                    nodeLifted.set(node.getIndex(), false);
626                            } else {
627                                    success = false;
628                            }
629                    }
630                    while (!childSuccess && childAttempts > 0) {
631                            childSuccess = true;
632                            Vector<DependencyNode> children = new Vector<DependencyNode>();
633                            i = 0;
634                            while ((child = node.getLeftDependent(i)) != null) {
635                                    children.add(child);
636                                    i++;
637                            }
638                            i = 0;
639                            while ((child = node.getRightDependent(i)) != null) {
640                                    children.add(child);
641                                    i++;
642                            }
643                            for (i = 0; i < children.size(); i++) {
644                                    child = children.get(i);
645                                    if (!deprojectivizeWithPath(pdg, child)) {
646                                            childSuccess = false;
647                                    }
648                            }
649                            childAttempts--;
650                    }
651                    return childSuccess && success;
652            }
653    
654            private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid) {
655                    DependencyNode dependent;
656                    Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes;
657                    nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true));
658                    while (nodes.size() > 0) {
659                            dependent = nodes.remove(0);
660                            if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0) {
661                                    return dependent;
662                            }
663                            nodes.addAll(newNodes);
664                    }
665                    return null;
666            }
667    
668            private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
669                    boolean success = true, childSuccess = false;
670                    int i, childAttempts = 2;
671                    DependencyNode child, possibleSyntacticHead;
672                    if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) {
673                            possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node
674                                            .getIndex()));
675                            if (possibleSyntacticHead != null) {
676                                    pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
677                                    nodeLifted.set(node.getIndex(), false);
678                            } else {
679                                    success = false;
680                            }
681                    }
682                    if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) {
683                            possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node
684                                            .getIndex()));
685                            if (possibleSyntacticHead != null) {
686                                    pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
687                                    nodeLifted.set(node.getIndex(), false);
688                            } else {
689                                    success = false;
690                            }
691                    }
692                    while (!childSuccess && childAttempts > 0) {
693                            childSuccess = true;
694                            Vector<DependencyNode> children = new Vector<DependencyNode>();
695                            i = 0;
696                            while ((child = node.getLeftDependent(i)) != null) {
697                                    children.add(child);
698                                    i++;
699                            }
700                            i = 0;
701                            while ((child = node.getRightDependent(i)) != null) {
702                                    children.add(child);
703                                    i++;
704                            }
705                            for (i = 0; i < children.size(); i++) {
706                                    child = children.get(i);
707                                    if (!deprojectivizeWithHeadAndPath(pdg, child)) {
708                                            childSuccess = false;
709                                    }
710                            }
711                            childAttempts--;
712                    }
713                    return childSuccess && success;
714            }
715    
716            private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode)
717                            throws MaltChainedException {
718                    DependencyNode dependent;
719                    Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes = null, secondChance = new Vector<DependencyNode>();
720                    nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true));
721                    while (nodes.size() > 0) {
722                            dependent = nodes.remove(0);
723                            if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0
724                                            && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) {
725                                    return dependent;
726                            }
727                            nodes.addAll(newNodes);
728                            if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)
729                                            && newNodes.size() != 0) {
730                                    secondChance.add(dependent);
731                            }
732                    }
733                    if (secondChance.size() > 0) {
734                            return secondChance.firstElement();
735                    }
736                    return null;
737            }
738    }