001package org.maltparser.transform.pseudo;
002
003import java.util.SortedSet;
004import java.util.TreeSet;
005import java.util.Vector;
006
007import org.apache.log4j.Logger;
008import org.maltparser.core.exception.MaltChainedException;
009import org.maltparser.core.io.dataformat.ColumnDescription;
010import org.maltparser.core.io.dataformat.DataFormatInstance;
011import org.maltparser.core.symbol.SymbolTable;
012import org.maltparser.core.symbol.SymbolTableHandler;
013import org.maltparser.core.syntaxgraph.DependencyStructure;
014import org.maltparser.core.syntaxgraph.node.DependencyNode;
015
016/**
017 * This class contains methods for projectivizing and deprojectivizing
018 * 
019 * @author Jens Nilsson
020 */
021public class PseudoProjectivity {
022        static int id = 0;
023
024        private enum PseudoProjectiveEncoding {
025                NONE, BASELINE, HEAD, PATH, HEADPATH, TRACE
026        };
027
028        private enum CoveredRootAttachment {
029                NONE, IGNORE, LEFT, RIGHT, HEAD
030        };
031
032        private enum LiftingOrder {
033                SHORTEST, DEEPEST
034        };
035
036        private PseudoProjectiveEncoding markingStrategy;
037        private CoveredRootAttachment rootAttachment;
038        private LiftingOrder liftingOrder;
039        private Logger configLogger;
040
041        private SymbolTable deprelSymbolTable;
042        private SymbolTable pppathSymbolTable;
043        private SymbolTable ppliftedSymbolTable;
044        private SymbolTable ppcoveredRootSymbolTable;
045        
046        private ColumnDescription deprelColumn;
047        private ColumnDescription pppathColumn;
048        private ColumnDescription ppliftedColumn;
049        private ColumnDescription ppcoveredRootColumn;
050        
051        private Vector<Boolean> nodeLifted;
052        private Vector<Vector<DependencyNode>> nodeTrace;
053        private Vector<DependencyNode> headDeprel;
054        private Vector<Boolean> nodePath;
055        private Vector<Boolean> isCoveredRoot;
056        private Vector<Integer> nodeRelationLength;
057        private Vector<String> synacticHeadDeprel;
058        private int counter;
059
060        public PseudoProjectivity() { }
061
062        public void initialize(String markingStrategyString, String coveredRoot, String liftingOrder, Logger configLogger,
063                        DataFormatInstance dataFormatInstance, SymbolTableHandler symbolTables) throws MaltChainedException {
064                nodeLifted = new Vector<Boolean>();
065                nodeTrace = new Vector<Vector<DependencyNode>>();
066                headDeprel = new Vector<DependencyNode>();
067                nodePath = new Vector<Boolean>();
068                isCoveredRoot = new Vector<Boolean>();
069                nodeRelationLength = new Vector<Integer>();
070                synacticHeadDeprel = new Vector<String>();
071                counter = 0;
072                                
073                this.configLogger = configLogger;
074                if (markingStrategyString.equalsIgnoreCase("none")) {
075                        markingStrategy = PseudoProjectiveEncoding.NONE;
076                } else if (markingStrategyString.equalsIgnoreCase("baseline")) {
077                        markingStrategy = PseudoProjectiveEncoding.BASELINE;
078                } else if (markingStrategyString.equalsIgnoreCase("head")) {
079                        markingStrategy = PseudoProjectiveEncoding.HEAD;
080                } else if (markingStrategyString.equalsIgnoreCase("path")) {
081                        markingStrategy = PseudoProjectiveEncoding.PATH;
082                } else if (markingStrategyString.equalsIgnoreCase("head+path")) {
083                        markingStrategy = PseudoProjectiveEncoding.HEADPATH;
084                } else if (markingStrategyString.equalsIgnoreCase("trace")) {
085                        markingStrategy = PseudoProjectiveEncoding.TRACE;
086                }
087                this.deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL");
088                this.deprelSymbolTable = symbolTables.getSymbolTable(deprelColumn.getName());
089                if (markingStrategy == PseudoProjectiveEncoding.HEAD || markingStrategy == PseudoProjectiveEncoding.PATH
090                                || markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
091                        this.ppliftedColumn = dataFormatInstance.addInternalColumnDescription(symbolTables, "PPLIFTED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy());
092                        this.ppliftedSymbolTable = symbolTables.getSymbolTable(ppliftedColumn.getName()); 
093                        if (this.markingStrategy == PseudoProjectiveEncoding.PATH) {
094                                ppliftedSymbolTable.addSymbol("#true#");
095                                ppliftedSymbolTable.addSymbol("#false#");
096                        } else {
097                                ppliftedSymbolTable.addSymbol("#false#");
098                        }
099                }
100
101                if (markingStrategy == PseudoProjectiveEncoding.PATH || markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
102                        this.pppathColumn = dataFormatInstance.addInternalColumnDescription(symbolTables, "PPPATH", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy());
103                        this.pppathSymbolTable = symbolTables.getSymbolTable(pppathColumn.getName());
104                        pppathSymbolTable.addSymbol("#true#");
105                        pppathSymbolTable.addSymbol("#false#");
106                }
107
108                if (coveredRoot.equalsIgnoreCase("none")) {
109                        this.rootAttachment = CoveredRootAttachment.NONE;
110                } else if (coveredRoot.equalsIgnoreCase("ignore")) {
111                        this.rootAttachment = CoveredRootAttachment.IGNORE;
112                } else if (coveredRoot.equalsIgnoreCase("left")) {
113                        this.rootAttachment = CoveredRootAttachment.LEFT;
114                } else if (coveredRoot.equalsIgnoreCase("right")) {
115                        this.rootAttachment = CoveredRootAttachment.RIGHT;
116                } else if (coveredRoot.equalsIgnoreCase("head")) {
117                        this.rootAttachment = CoveredRootAttachment.HEAD;
118                }
119
120                if (this.rootAttachment != CoveredRootAttachment.NONE) {
121                        this.ppcoveredRootColumn = dataFormatInstance.addInternalColumnDescription(symbolTables, "PPCOVERED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy());
122                        this.ppcoveredRootSymbolTable = symbolTables.getSymbolTable(ppcoveredRootColumn.getName());
123                        ppcoveredRootSymbolTable.addSymbol("#true#");
124                        ppcoveredRootSymbolTable.addSymbol("#false#");
125                }
126                if (liftingOrder.equalsIgnoreCase("shortest")) {
127                        this.liftingOrder = LiftingOrder.SHORTEST;
128                } else if (liftingOrder.equalsIgnoreCase("deepest")) {
129                        this.liftingOrder = LiftingOrder.DEEPEST;
130                }
131        }
132        
133        private void initProjectivization(DependencyStructure pdg) throws MaltChainedException {
134                nodeLifted.clear();
135                nodeTrace.clear();
136                headDeprel.clear();
137                nodePath.clear();
138                isCoveredRoot.clear();
139                nodeRelationLength.clear();
140
141                for (int index : pdg.getDependencyIndices()) {
142                        nodeLifted.add(false);
143                        nodeTrace.add(new Vector<DependencyNode>());
144                        headDeprel.add(null);
145                        nodePath.add(false);
146                        isCoveredRoot.add(false);
147                        if (ppliftedSymbolTable != null && index != 0) {
148                                pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#false#"));
149                        }
150                        if (pppathSymbolTable != null && index != 0) {
151                                pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#false#"));
152                        }
153                        if (ppcoveredRootSymbolTable != null && index != 0) {
154                                pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#false#"));
155                        }
156                }
157                computeRelationLength(pdg);
158        }
159        
160    public void projectivize(DependencyStructure pdg) throws MaltChainedException {
161        id++;
162        if (!pdg.isTree()) {
163            configLogger.info("\n[Warning: Sentence '" + id + "' cannot projectivize, because the dependency graph is not a tree]\n");
164            return;
165        }
166        DependencyNode deepestNonProjectiveNode;
167        initProjectivization(pdg);
168        if (rootAttachment == CoveredRootAttachment.IGNORE) {
169                if (markingStrategy != PseudoProjectiveEncoding.NONE) {
170                        while (!pdg.isProjective()) {
171                                if (liftingOrder == LiftingOrder.DEEPEST) {
172                                        deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg);
173                                } else {
174                                        deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg);
175                                }
176                                if (!attachCoveredRoots(pdg, deepestNonProjectiveNode)) {
177                                        nodeLifted.set(deepestNonProjectiveNode.getIndex(), true);
178                                        setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead());
179                                        setPath(deepestNonProjectiveNode.getHead());
180                                        pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex());
181                                }
182                        }
183                        deattachCoveredRootsForProjectivization(pdg);
184                }
185        } else {
186                if (rootAttachment != CoveredRootAttachment.NONE) {
187                    for (int index : pdg.getTokenIndices()) {
188                        attachCoveredRoots(pdg, pdg.getTokenNode(index));
189                    }
190                }
191                if (markingStrategy != PseudoProjectiveEncoding.NONE) {
192                    while (!pdg.isProjective()) {
193                        if (liftingOrder == LiftingOrder.DEEPEST) {
194                            deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg);
195                        } else {
196                            deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg);
197                        }
198                        nodeLifted.set(deepestNonProjectiveNode.getIndex(), true);
199                        setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead());
200                        setPath(deepestNonProjectiveNode.getHead());
201                        pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex());
202                    }
203                }
204        }
205        // collectTraceStatistics(pdg);
206        assignPseudoProjectiveDeprels(pdg);
207    }
208
209        public void mergeArclabels(DependencyStructure pdg) throws MaltChainedException {
210                assignPseudoProjectiveDeprelsForMerge(pdg);
211        }
212
213        public void splitArclabels(DependencyStructure pdg) throws MaltChainedException {
214                int pathLabelIndex = -1, movedLabelIndex = -1, coveredArcLabelIndex;
215                String label;
216                initDeprojeciviztion(pdg);
217                for (int index : pdg.getTokenIndices()) {
218                        if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
219                                label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
220                                if (label != null && (pathLabelIndex = label.indexOf("%")) != -1) {
221                                        label = label.substring(0, pathLabelIndex);
222                                        setLabel(pdg.getTokenNode(index), label);
223                                        pdg.getTokenNode(index).getHeadEdge().addLabel(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#true#"));
224                                }
225                                if (label != null && (movedLabelIndex = label.indexOf("|")) != -1 && label.indexOf("|null") == -1) {
226                                        if (movedLabelIndex + 1 < label.length()) {
227                                                pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode(label.substring(movedLabelIndex + 1)));
228                                        } else {
229                                                pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#true#"));
230                                        }
231                                        label = label.substring(0, movedLabelIndex);
232                                        setLabel(pdg.getTokenNode(index), label);
233                                }
234                        }
235                }
236                for (int index : pdg.getTokenIndices()) {
237                        if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
238                                label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
239                                if ((coveredArcLabelIndex = label.indexOf("|null")) != -1) {
240                                        label = label.substring(0, coveredArcLabelIndex);
241                                        setLabel(pdg.getTokenNode(index), label);
242                                        pdg.getTokenNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#"));
243                                }
244                        }
245                }
246        }
247
248        private void setHeadDeprel(DependencyNode node, DependencyNode parent) {
249                if (headDeprel.get(node.getIndex()) == null) {
250                        headDeprel.set(node.getIndex(), parent);
251                }
252                nodeTrace.set(node.getIndex(), headDeprel);
253        }
254
255        private void setPath(DependencyNode node) {
256                nodePath.set(node.getIndex(), true);
257        }
258
259        private boolean isCoveredRoot(DependencyNode node) {
260                return isCoveredRoot.get(node.getIndex());
261        }
262
263        private void deattachCoveredRootsForProjectivization(DependencyStructure pdg) throws MaltChainedException {
264                for (int index : pdg.getTokenIndices()) {
265                        if (isCoveredRoot(pdg.getTokenNode(index))) {
266                                pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getTokenNode(index).getIndex());
267                        }
268                }
269        }
270
271        private boolean attachCoveredRoots(DependencyStructure pdg, DependencyNode deepest) throws MaltChainedException {
272                int i;
273                boolean foundCoveredRoot = false;
274                DependencyNode coveredRootHead;
275                for (i = Math.min(deepest.getIndex(), deepest.getHead().getIndex()) + 1; i < Math.max(deepest.getIndex(), deepest.getHead()
276                                .getIndex()); i++) {
277                        int leftMostIndex = pdg.getDependencyNode(i).getLeftmostProperDescendantIndex();
278                        if (leftMostIndex == -1) {
279                                leftMostIndex = i;
280                        }
281                        int rightMostIndex = pdg.getDependencyNode(i).getRightmostProperDescendantIndex();
282                        if (rightMostIndex == -1) {
283                                rightMostIndex = i;
284                        }
285                        if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot()
286                                        && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < leftMostIndex
287                                        && rightMostIndex < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) {
288                                if (rootAttachment == CoveredRootAttachment.LEFT) {
289                                        if (deepest.getHead().getIndex() < deepest.getIndex()) {
290                                                coveredRootHead = deepest.getHead();
291                                        } else {
292                                                coveredRootHead = deepest;
293                                        }
294                                } else if (rootAttachment == CoveredRootAttachment.RIGHT) {
295                                        if (deepest.getIndex() < deepest.getHead().getIndex()) {
296                                                coveredRootHead = deepest.getHead();
297                                        } else {
298                                                coveredRootHead = deepest;
299                                        }
300                                } else {
301                                        coveredRootHead = deepest.getHead();
302                                }
303                                pdg.moveDependencyEdge(coveredRootHead.getIndex(), pdg.getDependencyNode(i).getIndex());
304                                setCoveredRoot(pdg.getDependencyNode(i));
305                                foundCoveredRoot = true;
306                        }
307                }
308                return foundCoveredRoot;
309        }
310
311        private void setCoveredRoot(DependencyNode node) {
312                isCoveredRoot.set(node.getIndex(), true);
313        }
314
315        private DependencyNode getDeepestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException {
316                DependencyNode deepestNonProjectiveNode = null;
317                for (int index : pdg.getDependencyIndices()) {
318                        if (!pdg.getDependencyNode(index).isProjective()
319                                        && (deepestNonProjectiveNode == null 
320                                        || pdg.getDependencyNode(index).getDependencyNodeDepth() > pdg.getDependencyNode(deepestNonProjectiveNode.getIndex()).getDependencyNodeDepth())) {
321                                deepestNonProjectiveNode = pdg.getDependencyNode(index);
322                        }
323                }
324                
325                return deepestNonProjectiveNode;
326        }
327
328        private DependencyNode getShortestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException {
329                DependencyNode shortestNonProjectiveNode = null;
330                for (int index : pdg.getDependencyIndices()) {
331                        if (!pdg.getDependencyNode(index).isProjective()
332                                        && (shortestNonProjectiveNode == null
333                                        || nodeRelationLength.get(index) < nodeRelationLength.get(shortestNonProjectiveNode.getIndex()) 
334                                        )) {
335//                                      || (nodeRelationLength.get(index) == nodeRelationLength.get(shortestNonProjectiveNode.getIndex())))) {
336                                shortestNonProjectiveNode = pdg.getDependencyNode(index);
337                        }
338                }
339                return shortestNonProjectiveNode;
340        }
341
342
343        private void computeRelationLength(DependencyStructure pdg) throws MaltChainedException {
344                nodeRelationLength.add(0);
345                for (int index : pdg.getTokenIndices()) {
346                        nodeRelationLength.add(Math.abs(pdg.getDependencyNode(index).getIndex() - pdg.getDependencyNode(index).getHead().getIndex()));
347                }
348        }
349
350        private void assignPseudoProjectiveDeprels(DependencyStructure pdg) throws MaltChainedException {
351                int newLabelCode;
352                for (int index : pdg.getTokenIndices()) {
353                        if (!isCoveredRoot(pdg.getDependencyNode(index))) {
354                                if (this.markingStrategy == PseudoProjectiveEncoding.HEAD || this.markingStrategy == PseudoProjectiveEncoding.PATH
355                                                || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
356                                        if (this.markingStrategy == PseudoProjectiveEncoding.PATH) {
357                                                if (nodeLifted.get(index)) {
358                                                        newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#true#");
359                                                } else {
360                                                        newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#");
361                                                }
362                                                pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode);
363                                        } else {
364                                                if (nodeLifted.get(index)) {
365                                                        newLabelCode = ppliftedSymbolTable.addSymbol(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(
366                                                                        headDeprel.get(index).getIndex()).getHeadEdge().getLabelCode(deprelSymbolTable)));
367                                                } else {
368                                                        newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#");
369                                                }
370                                                pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode);
371                                        }
372                                }
373
374                                if (this.markingStrategy == PseudoProjectiveEncoding.PATH || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
375                                        if (nodePath.get(index)) {
376                                                newLabelCode = pppathSymbolTable.getSymbolStringToCode("#true#");
377                                        } else {
378                                                newLabelCode = pppathSymbolTable.getSymbolStringToCode("#false#");
379                                        }
380                                        pdg.getDependencyNode(index).getHeadEdge().addLabel(pppathSymbolTable, newLabelCode);
381                                }
382
383                        } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) {
384                                pdg.getDependencyNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#"));
385                        }
386                }
387        }
388
389        private void setLabel(DependencyNode node, String label) throws MaltChainedException {
390                // node.getLabelCode().clear();
391                node.getHeadEdge().getLabelSet().put(deprelSymbolTable, deprelSymbolTable.addSymbol(label));
392        }
393
394        private void assignPseudoProjectiveDeprelsForMerge(DependencyStructure pdg) throws MaltChainedException {
395                Vector<String> originalDeprel = new Vector<String>();
396                String newLabel;
397                originalDeprel.add(null);
398                for (int index : pdg.getTokenIndices()) {
399                        originalDeprel.add(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)));
400                }
401                for (int index : pdg.getTokenIndices()) {
402                        newLabel = null;
403                        if (!isCoveredRoot(pdg.getDependencyNode(index))) {
404                                if (markingStrategy == PseudoProjectiveEncoding.HEAD) {
405                                        if (nodeLifted.get(index)) {
406                                                newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
407                                                                + originalDeprel.get(headDeprel.get(index).getIndex());
408                                                // } else {
409                                                // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
410                                        }
411                                } else if (markingStrategy == PseudoProjectiveEncoding.PATH) {
412                                        if (nodeLifted.get(index) && nodePath.get(index)) {
413                                                newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|%";
414                                        } else if (nodeLifted.get(index) && !nodePath.get(index)) {
415                                                newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|";
416                                        } else if (!nodeLifted.get(index) && nodePath.get(index)) {
417                                                newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "%";
418                                        }
419                                } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
420                                        if (nodeLifted.get(index) && nodePath.get(index)) {
421                                                newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
422                                                                + originalDeprel.get(headDeprel.get(index).getIndex()) + "%";
423                                        } else if (nodeLifted.get(index) && !nodePath.get(index)) {
424                                                newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
425                                                                + originalDeprel.get(headDeprel.get(index).getIndex());
426                                        } else if (!nodeLifted.get(index) && nodePath.get(index)) {
427                                                newLabel = originalDeprel.get(pdg.getDependencyNode(index).getIndex()) + "%";
428                                        }
429                                } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) {
430                                        if (nodeLifted.get(index)) {
431                                                newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|";
432                                        }
433                                }
434                        } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) {
435                                newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|null";
436                        }
437                        if (newLabel != null) {
438                                setLabel(pdg.getDependencyNode(index), newLabel);
439                        }
440                }
441        }
442
443        public void deprojectivize(DependencyStructure pdg) throws MaltChainedException {
444                initDeprojeciviztion(pdg);
445
446                for (int index : pdg.getTokenIndices()) {
447                        if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
448                                if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(pppathSymbolTable)
449                                                && pppathSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(pppathSymbolTable)).equals("#true#")) {
450                                        setPath(pdg.getDependencyNode(index));
451                                }
452                                if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppliftedSymbolTable)
453                                                && !ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#false#")) {
454                                        nodeLifted.set(index, true);
455                                        if (!ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#true#")) {
456                                                synacticHeadDeprel.set(index, ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge()
457                                                                .getLabelCode(ppliftedSymbolTable)));
458                                        }
459                                }
460                        }
461                }
462                deattachCoveredRootsForDeprojectivization(pdg);
463                if (markingStrategy == PseudoProjectiveEncoding.HEAD && needsDeprojectivizeWithHead(pdg)) {
464                        deprojectivizeWithHead(pdg, pdg.getDependencyRoot());
465                } else if (markingStrategy == PseudoProjectiveEncoding.PATH) {
466                        deprojectivizeWithPath(pdg, pdg.getDependencyRoot());
467                } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
468                        deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot());
469                }
470        }
471
472        private void initDeprojeciviztion(DependencyStructure pdg) {
473                nodeLifted.clear();
474                nodePath.clear();
475                synacticHeadDeprel.clear();
476                counter = 0;
477                for (int index : pdg.getDependencyIndices()) {
478                        nodeLifted.add(false);
479                        nodePath.add(false);
480                        synacticHeadDeprel.add(null);
481                }
482        }
483
484        private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg) throws MaltChainedException {
485                for (int index : pdg.getTokenIndices()) {
486                        if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
487                                if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppcoveredRootSymbolTable)
488                                                && ppcoveredRootSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppcoveredRootSymbolTable)).equals(
489                                                                "#true#")) {
490                                        pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex());
491                                }
492                        }
493                }
494        }
495
496        // Check whether there is at least one node in the specified dependency structure that can be lifted.
497        // If this is not the case, there is no need to call deprojectivizeWithHead.
498
499        private boolean needsDeprojectivizeWithHead(DependencyStructure pdg) throws MaltChainedException {
500                for (int index : pdg.getDependencyIndices()) {
501                        if (nodeLifted.get(index)) {
502                                DependencyNode node = pdg.getDependencyNode(index);
503                                if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel.get(index)) != null) {
504                                        return true;
505                                }
506                    }
507                }
508                return false;
509        }
510
511        private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
512                boolean success = true, childSuccess = false;
513                int i, childAttempts = (counter < 10000 ? 2 : 1);
514                DependencyNode child, possibleSyntacticHead;
515                String syntacticHeadDeprel;
516                counter++;
517                if (nodeLifted.get(node.getIndex())) {
518                        syntacticHeadDeprel = synacticHeadDeprel.get(node.getIndex());
519                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel);
520                        if (possibleSyntacticHead != null) {
521                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
522                                nodeLifted.set(node.getIndex(), false);
523                        } else {
524                                success = false;
525                        }
526                }
527                while (!childSuccess && childAttempts > 0) {
528                        childSuccess = true;
529                        Vector<DependencyNode> children = new Vector<DependencyNode>();
530                        i = 0;
531                        while ((child = node.getLeftDependent(i)) != null) {
532                                children.add(child);
533                                i++;
534                        }
535                        i = 0;
536                        while ((child = node.getRightDependent(i)) != null) {
537                                children.add(child);
538                                i++;
539                        }
540                        for (i = 0; i < children.size(); i++) {
541                                child = children.get(i);
542                                if (!deprojectivizeWithHead(pdg, child)) {
543                                        childSuccess = false;
544                                }
545                        }
546                        childAttempts--;
547                }
548                return childSuccess && success;
549        }
550
551        private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel)
552                        throws MaltChainedException {
553                DependencyNode dependent;
554                String dependentDeprel;
555                Vector<DependencyNode> nodes = new Vector<DependencyNode>();
556                nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false));
557                while (nodes.size() > 0) {
558                        dependent = nodes.remove(0);
559                        if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) {
560                                dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable));
561                                if (dependentDeprel.equals(syntacticHeadDeprel)) {
562                                        return dependent;
563                                }
564                        }
565                        nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false));
566                }
567                return null;
568        }
569
570        
571        private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid,
572                        boolean percentOnly) {
573                Vector<DependencyNode> output = new Vector<DependencyNode>();
574                SortedSet<DependencyNode> dependents = new TreeSet<DependencyNode>();
575                dependents.addAll(governor.getLeftDependents());
576                dependents.addAll(governor.getRightDependents());
577
578
579                DependencyNode[] deps = new DependencyNode[dependents.size()];
580                int[] distances = new int[dependents.size()];
581                int i = 0;
582                for (DependencyNode dep : dependents) {
583                        distances[i] = Math.abs(dep.getIndex() - avoid.getIndex());
584                        deps[i] = dep;
585                        i++;
586                }
587                if (distances.length > 1) {
588                        int smallest;
589                        int n = distances.length;
590                        int tmpDist;
591                        DependencyNode tmpDep;
592                        for (i=0; i < n; i++) {
593                                smallest = i;
594                                for (int j=i; j < n; j++) {
595                                        if (distances[j] < distances[smallest]) {
596                                                smallest = j;
597                                        }
598                                }
599                                if (smallest != i) {
600                                        tmpDist = distances[smallest];
601                                        distances[smallest] = distances[i];
602                                        distances[i] = tmpDist;
603                                        tmpDep = deps[smallest];
604                                        deps[smallest] = deps[i];
605                                        deps[i] = tmpDep;
606                                }
607                        }
608                }
609                for (i=0; i<distances.length;i++) {
610                        if (deps[i] != avoid && (!percentOnly || (percentOnly && nodePath.get(deps[i].getIndex())))) {
611                                output.add(deps[i]);
612                        }
613                }
614                return output;
615        }
616        
617        private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode2(DependencyStructure dg, DependencyNode governor, DependencyNode avoid,
618                        boolean percentOnly) {
619                int i, j;
620                Vector<DependencyNode> dependents = new Vector<DependencyNode>();
621                DependencyNode leftChild, rightChild;
622
623                i = governor.getLeftDependentCount() - 1;
624                j = 0;
625                leftChild = governor.getLeftDependent(i--);
626                rightChild = governor.getRightDependent(j++);
627
628                while (leftChild != null && rightChild != null) {
629                        if (leftChild == avoid) {
630                                leftChild = governor.getLeftDependent(i--);
631                        } else if (rightChild == avoid) {
632                                rightChild = governor.getRightDependent(j++);
633                        } else if (Math.abs(leftChild.getIndex() - avoid.getIndex()) < Math.abs(rightChild.getIndex() - avoid.getIndex())) {
634                                if (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex()))) {
635                                        dependents.add(leftChild);
636                                }
637                                leftChild = governor.getLeftDependent(i--);
638                        } else {
639                                if (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex()))) {
640                                        dependents.add(rightChild);
641                                }
642                                rightChild = governor.getRightDependent(j++);
643                        }
644                }
645                while (leftChild != null) {
646                        if (leftChild != avoid && (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex())))) {
647                                dependents.add(leftChild);
648                        }
649                        leftChild = governor.getLeftDependent(i--);
650                }
651                while (rightChild != null) {
652                        if (rightChild != avoid && (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex())))) {
653                                dependents.add(rightChild);
654                        }
655                        rightChild = governor.getRightDependent(j++);
656                }
657                return dependents;
658        }
659
660        private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
661                boolean success = true, childSuccess = false;
662                int i, childAttempts = (counter < 10000 ? 2 : 1);
663                DependencyNode child, possibleSyntacticHead;
664                counter++;
665                if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) {
666                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node);
667                        if (possibleSyntacticHead != null) {
668                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
669                                nodeLifted.set(node.getIndex(), false);
670                        } else {
671                                success = false;
672                        }
673                }
674                if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) {
675                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node);
676                        if (possibleSyntacticHead != null) {
677                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
678                                nodeLifted.set(node.getIndex(), false);
679                        } else {
680                                success = false;
681                        }
682                }
683                while (!childSuccess && childAttempts > 0) {
684                        childSuccess = true;
685                        Vector<DependencyNode> children = new Vector<DependencyNode>();
686                        i = 0;
687                        while ((child = node.getLeftDependent(i)) != null) {
688                                children.add(child);
689                                i++;
690                        }
691                        i = 0;
692                        while ((child = node.getRightDependent(i)) != null) {
693                                children.add(child);
694                                i++;
695                        }
696                        for (i = 0; i < children.size(); i++) {
697                                child = children.get(i);
698                                if (!deprojectivizeWithPath(pdg, child)) {
699                                        childSuccess = false;
700                                }
701                        }
702                        childAttempts--;
703                }
704                return childSuccess && success;
705        }
706
707        private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid) {
708                DependencyNode dependent;
709                Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes;
710                nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true));
711                while (nodes.size() > 0) {
712                        dependent = nodes.remove(0);
713                        if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0) {
714                                return dependent;
715                        }
716                        nodes.addAll(newNodes);
717                }
718                return null;
719        }
720
721        private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
722                boolean success = true, childSuccess = false;
723                int i, childAttempts = (counter < 10000 ? 2 : 1);
724                DependencyNode child, possibleSyntacticHead;
725                counter++;
726                if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) {
727                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node
728                                        .getIndex()));
729                        if (possibleSyntacticHead != null) {
730                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
731                                nodeLifted.set(node.getIndex(), false);
732                        } else {
733                                success = false;
734                        }
735                }
736                if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) {
737                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node
738                                        .getIndex()));
739                        if (possibleSyntacticHead != null) {
740                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
741                                nodeLifted.set(node.getIndex(), false);
742                        } else {
743                                success = false;
744                        }
745                }
746                while (!childSuccess && childAttempts > 0) {
747                        childSuccess = true;
748                        Vector<DependencyNode> children = new Vector<DependencyNode>();
749                        i = 0;
750                        while ((child = node.getLeftDependent(i)) != null) {
751                                children.add(child);
752                                i++;
753                        }
754                        i = 0;
755                        while ((child = node.getRightDependent(i)) != null) {
756                                children.add(child);
757                                i++;
758                        }
759                        for (i = 0; i < children.size(); i++) {
760                                child = children.get(i);
761                                if (!deprojectivizeWithHeadAndPath(pdg, child)) {
762                                        childSuccess = false;
763                                }
764                        }
765                        childAttempts--;
766                }
767                return childSuccess && success;
768        }
769
770        private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode)
771                        throws MaltChainedException {
772                DependencyNode dependent;
773                Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes = null, secondChance = new Vector<DependencyNode>();
774                nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true));
775                while (nodes.size() > 0) {
776                        dependent = nodes.remove(0);
777                        if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0
778                                        && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) {
779                                return dependent;
780                        }
781                        nodes.addAll(newNodes);
782                        if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)
783                                        && newNodes.size() != 0) {
784                                secondChance.add(dependent);
785                        }
786                }
787                if (secondChance.size() > 0) {
788                        return secondChance.firstElement();
789                }
790                return null;
791        }
792}