001package org.maltparser.transform.pseudo;
002
003import java.util.SortedSet;
004import java.util.TreeSet;
005import java.util.Vector;
006
007import org.apache.log4j.Logger;
008import org.maltparser.core.exception.MaltChainedException;
009import org.maltparser.core.io.dataformat.ColumnDescription;
010import org.maltparser.core.io.dataformat.DataFormatInstance;
011import org.maltparser.core.symbol.SymbolTable;
012import org.maltparser.core.symbol.SymbolTableHandler;
013import org.maltparser.core.syntaxgraph.DependencyStructure;
014import org.maltparser.core.syntaxgraph.node.DependencyNode;
015
016/**
017 * This class contains methods for projectivizing and deprojectivizing
018 * 
019 * @author Jens Nilsson
020 */
021public class PseudoProjectivity {
022        static int id = 0;
023
024        private enum PseudoProjectiveEncoding {
025                NONE, BASELINE, HEAD, PATH, HEADPATH, TRACE
026        };
027
028        private enum CoveredRootAttachment {
029                NONE, IGNORE, LEFT, RIGHT, HEAD
030        };
031
032        private enum LiftingOrder {
033                SHORTEST, DEEPEST
034        };
035
036        private PseudoProjectiveEncoding markingStrategy;
037        private CoveredRootAttachment rootAttachment;
038        private LiftingOrder liftingOrder;
039        private Logger configLogger;
040
041        private SymbolTable deprelSymbolTable;
042        private SymbolTable pppathSymbolTable;
043        private SymbolTable ppliftedSymbolTable;
044        private SymbolTable ppcoveredRootSymbolTable;
045        
046        private ColumnDescription deprelColumn;
047        private ColumnDescription pppathColumn;
048        private ColumnDescription ppliftedColumn;
049        private ColumnDescription ppcoveredRootColumn;
050        
051        private Vector<Boolean> nodeLifted;
052        private Vector<Vector<DependencyNode>> nodeTrace;
053        private Vector<DependencyNode> headDeprel;
054        private Vector<Boolean> nodePath;
055        private Vector<Boolean> isCoveredRoot;
056        private Vector<Integer> nodeRelationLength;
057        private Vector<String> synacticHeadDeprel;
058
059
060        public PseudoProjectivity() { }
061
062        public void initialize(String markingStrategyString, String coveredRoot, String liftingOrder, Logger configLogger,
063                        DataFormatInstance dataFormatInstance, SymbolTableHandler symbolTables) throws MaltChainedException {
064                nodeLifted = new Vector<Boolean>();
065                nodeTrace = new Vector<Vector<DependencyNode>>();
066                headDeprel = new Vector<DependencyNode>();
067                nodePath = new Vector<Boolean>();
068                isCoveredRoot = new Vector<Boolean>();
069                nodeRelationLength = new Vector<Integer>();
070                synacticHeadDeprel = new Vector<String>();
071
072                this.configLogger = configLogger;
073                if (markingStrategyString.equalsIgnoreCase("none")) {
074                        markingStrategy = PseudoProjectiveEncoding.NONE;
075                } else if (markingStrategyString.equalsIgnoreCase("baseline")) {
076                        markingStrategy = PseudoProjectiveEncoding.BASELINE;
077                } else if (markingStrategyString.equalsIgnoreCase("head")) {
078                        markingStrategy = PseudoProjectiveEncoding.HEAD;
079                } else if (markingStrategyString.equalsIgnoreCase("path")) {
080                        markingStrategy = PseudoProjectiveEncoding.PATH;
081                } else if (markingStrategyString.equalsIgnoreCase("head+path")) {
082                        markingStrategy = PseudoProjectiveEncoding.HEADPATH;
083                } else if (markingStrategyString.equalsIgnoreCase("trace")) {
084                        markingStrategy = PseudoProjectiveEncoding.TRACE;
085                }
086                this.deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL");
087                this.deprelSymbolTable = symbolTables.getSymbolTable(deprelColumn.getName());
088                if (markingStrategy == PseudoProjectiveEncoding.HEAD || markingStrategy == PseudoProjectiveEncoding.PATH
089                                || markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
090                        this.ppliftedColumn = dataFormatInstance.addInternalColumnDescription(symbolTables, "PPLIFTED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy());
091                        this.ppliftedSymbolTable = symbolTables.getSymbolTable(ppliftedColumn.getName()); 
092                        if (this.markingStrategy == PseudoProjectiveEncoding.PATH) {
093                                ppliftedSymbolTable.addSymbol("#true#");
094                                ppliftedSymbolTable.addSymbol("#false#");
095                        } else {
096                                ppliftedSymbolTable.addSymbol("#false#");
097                        }
098                }
099
100                if (markingStrategy == PseudoProjectiveEncoding.PATH || markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
101                        this.pppathColumn = dataFormatInstance.addInternalColumnDescription(symbolTables, "PPPATH", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy());
102                        this.pppathSymbolTable = symbolTables.getSymbolTable(pppathColumn.getName());
103                        pppathSymbolTable.addSymbol("#true#");
104                        pppathSymbolTable.addSymbol("#false#");
105                }
106
107                if (coveredRoot.equalsIgnoreCase("none")) {
108                        this.rootAttachment = CoveredRootAttachment.NONE;
109                } else if (coveredRoot.equalsIgnoreCase("ignore")) {
110                        this.rootAttachment = CoveredRootAttachment.IGNORE;
111                } else if (coveredRoot.equalsIgnoreCase("left")) {
112                        this.rootAttachment = CoveredRootAttachment.LEFT;
113                } else if (coveredRoot.equalsIgnoreCase("right")) {
114                        this.rootAttachment = CoveredRootAttachment.RIGHT;
115                } else if (coveredRoot.equalsIgnoreCase("head")) {
116                        this.rootAttachment = CoveredRootAttachment.HEAD;
117                }
118
119                if (this.rootAttachment != CoveredRootAttachment.NONE) {
120                        this.ppcoveredRootColumn = dataFormatInstance.addInternalColumnDescription(symbolTables, "PPCOVERED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", "", deprelColumn.getNullValueStrategy());
121                        this.ppcoveredRootSymbolTable = symbolTables.getSymbolTable(ppcoveredRootColumn.getName());
122                        ppcoveredRootSymbolTable.addSymbol("#true#");
123                        ppcoveredRootSymbolTable.addSymbol("#false#");
124                }
125                if (liftingOrder.equalsIgnoreCase("shortest")) {
126                        this.liftingOrder = LiftingOrder.SHORTEST;
127                } else if (liftingOrder.equalsIgnoreCase("deepest")) {
128                        this.liftingOrder = LiftingOrder.DEEPEST;
129                }
130        }
131        
132        private void initProjectivization(DependencyStructure pdg) throws MaltChainedException {
133                nodeLifted.clear();
134                nodeTrace.clear();
135                headDeprel.clear();
136                nodePath.clear();
137                isCoveredRoot.clear();
138                nodeRelationLength.clear();
139
140                for (int index : pdg.getDependencyIndices()) {
141                        nodeLifted.add(false);
142                        nodeTrace.add(new Vector<DependencyNode>());
143                        headDeprel.add(null);
144                        nodePath.add(false);
145                        isCoveredRoot.add(false);
146                        if (ppliftedSymbolTable != null && index != 0) {
147                                pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#false#"));
148                        }
149                        if (pppathSymbolTable != null && index != 0) {
150                                pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#false#"));
151                        }
152                        if (ppcoveredRootSymbolTable != null && index != 0) {
153                                pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#false#"));
154                        }
155                }
156                computeRelationLength(pdg);
157        }
158        
159    public void projectivize(DependencyStructure pdg) throws MaltChainedException {
160        id++;
161        if (!pdg.isTree()) {
162            configLogger.info("\n[Warning: Sentence '" + id + "' cannot projectivize, because the dependency graph is not a tree]\n");
163            return;
164        }
165        DependencyNode deepestNonProjectiveNode;
166        initProjectivization(pdg);
167        if (rootAttachment == CoveredRootAttachment.IGNORE) {
168                if (markingStrategy != PseudoProjectiveEncoding.NONE) {
169                        while (!pdg.isProjective()) {
170                                if (liftingOrder == LiftingOrder.DEEPEST) {
171                                        deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg);
172                                } else {
173                                        deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg);
174                                }
175                                if (!attachCoveredRoots(pdg, deepestNonProjectiveNode)) {
176                                        nodeLifted.set(deepestNonProjectiveNode.getIndex(), true);
177                                        setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead());
178                                        setPath(deepestNonProjectiveNode.getHead());
179                                        pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex());
180                                }
181                        }
182                        deattachCoveredRootsForProjectivization(pdg);
183                }
184        } else {
185                if (rootAttachment != CoveredRootAttachment.NONE) {
186                    for (int index : pdg.getTokenIndices()) {
187                        attachCoveredRoots(pdg, pdg.getTokenNode(index));
188                    }
189                }
190                if (markingStrategy != PseudoProjectiveEncoding.NONE) {
191                    while (!pdg.isProjective()) {
192                        if (liftingOrder == LiftingOrder.DEEPEST) {
193                            deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg);
194                        } else {
195                            deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg);
196                        }
197                        nodeLifted.set(deepestNonProjectiveNode.getIndex(), true);
198                        setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead());
199                        setPath(deepestNonProjectiveNode.getHead());
200                        pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex());
201                    }
202                }
203        }
204        // collectTraceStatistics(pdg);
205        assignPseudoProjectiveDeprels(pdg);
206    }
207
208        public void mergeArclabels(DependencyStructure pdg) throws MaltChainedException {
209                assignPseudoProjectiveDeprelsForMerge(pdg);
210        }
211
212        public void splitArclabels(DependencyStructure pdg) throws MaltChainedException {
213                int pathLabelIndex = -1, movedLabelIndex = -1, coveredArcLabelIndex;
214                String label;
215                initDeprojeciviztion(pdg);
216                for (int index : pdg.getTokenIndices()) {
217                        if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
218                                label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
219                                if (label != null && (pathLabelIndex = label.indexOf("%")) != -1) {
220                                        label = label.substring(0, pathLabelIndex);
221                                        setLabel(pdg.getTokenNode(index), label);
222                                        pdg.getTokenNode(index).getHeadEdge().addLabel(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#true#"));
223                                }
224                                if (label != null && (movedLabelIndex = label.indexOf("|")) != -1 && label.indexOf("|null") == -1) {
225                                        if (movedLabelIndex + 1 < label.length()) {
226                                                pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode(label.substring(movedLabelIndex + 1)));
227                                        } else {
228                                                pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#true#"));
229                                        }
230                                        label = label.substring(0, movedLabelIndex);
231                                        setLabel(pdg.getTokenNode(index), label);
232                                }
233                        }
234                }
235                for (int index : pdg.getTokenIndices()) {
236                        if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
237                                label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
238                                if ((coveredArcLabelIndex = label.indexOf("|null")) != -1) {
239                                        label = label.substring(0, coveredArcLabelIndex);
240                                        setLabel(pdg.getTokenNode(index), label);
241                                        pdg.getTokenNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#"));
242                                }
243                        }
244                }
245        }
246
247        private void setHeadDeprel(DependencyNode node, DependencyNode parent) {
248                if (headDeprel.get(node.getIndex()) == null) {
249                        headDeprel.set(node.getIndex(), parent);
250                }
251                nodeTrace.set(node.getIndex(), headDeprel);
252        }
253
254        private void setPath(DependencyNode node) {
255                nodePath.set(node.getIndex(), true);
256        }
257
258        private boolean isCoveredRoot(DependencyNode node) {
259                return isCoveredRoot.get(node.getIndex());
260        }
261
262        private void deattachCoveredRootsForProjectivization(DependencyStructure pdg) throws MaltChainedException {
263                for (int index : pdg.getTokenIndices()) {
264                        if (isCoveredRoot(pdg.getTokenNode(index))) {
265                                pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getTokenNode(index).getIndex());
266                        }
267                }
268        }
269
270        private boolean attachCoveredRoots(DependencyStructure pdg, DependencyNode deepest) throws MaltChainedException {
271                int i;
272                boolean foundCoveredRoot = false;
273                DependencyNode coveredRootHead;
274                for (i = Math.min(deepest.getIndex(), deepest.getHead().getIndex()) + 1; i < Math.max(deepest.getIndex(), deepest.getHead()
275                                .getIndex()); i++) {
276                        int leftMostIndex = pdg.getDependencyNode(i).getLeftmostProperDescendantIndex();
277                        if (leftMostIndex == -1) {
278                                leftMostIndex = i;
279                        }
280                        int rightMostIndex = pdg.getDependencyNode(i).getRightmostProperDescendantIndex();
281                        if (rightMostIndex == -1) {
282                                rightMostIndex = i;
283                        }
284                        if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot()
285                                        && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < leftMostIndex
286                                        && rightMostIndex < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) {
287                                if (rootAttachment == CoveredRootAttachment.LEFT) {
288                                        if (deepest.getHead().getIndex() < deepest.getIndex()) {
289                                                coveredRootHead = deepest.getHead();
290                                        } else {
291                                                coveredRootHead = deepest;
292                                        }
293                                } else if (rootAttachment == CoveredRootAttachment.RIGHT) {
294                                        if (deepest.getIndex() < deepest.getHead().getIndex()) {
295                                                coveredRootHead = deepest.getHead();
296                                        } else {
297                                                coveredRootHead = deepest;
298                                        }
299                                } else {
300                                        coveredRootHead = deepest.getHead();
301                                }
302                                pdg.moveDependencyEdge(coveredRootHead.getIndex(), pdg.getDependencyNode(i).getIndex());
303                                setCoveredRoot(pdg.getDependencyNode(i));
304                                foundCoveredRoot = true;
305                        }
306                }
307                return foundCoveredRoot;
308        }
309
310        private void setCoveredRoot(DependencyNode node) {
311                isCoveredRoot.set(node.getIndex(), true);
312        }
313
314        private DependencyNode getDeepestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException {
315                DependencyNode deepestNonProjectiveNode = null;
316                for (int index : pdg.getDependencyIndices()) {
317                        if (!pdg.getDependencyNode(index).isProjective()
318                                        && (deepestNonProjectiveNode == null 
319                                        || pdg.getDependencyNode(index).getDependencyNodeDepth() > pdg.getDependencyNode(deepestNonProjectiveNode.getIndex()).getDependencyNodeDepth())) {
320                                deepestNonProjectiveNode = pdg.getDependencyNode(index);
321                        }
322                }
323                
324                return deepestNonProjectiveNode;
325        }
326
327        private DependencyNode getShortestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException {
328                DependencyNode shortestNonProjectiveNode = null;
329                for (int index : pdg.getDependencyIndices()) {
330                        if (!pdg.getDependencyNode(index).isProjective()
331                                        && (shortestNonProjectiveNode == null
332                                        || nodeRelationLength.get(index) < nodeRelationLength.get(shortestNonProjectiveNode.getIndex()) 
333                                        )) {
334//                                      || (nodeRelationLength.get(index) == nodeRelationLength.get(shortestNonProjectiveNode.getIndex())))) {
335                                shortestNonProjectiveNode = pdg.getDependencyNode(index);
336                        }
337                }
338                return shortestNonProjectiveNode;
339        }
340
341
342        private void computeRelationLength(DependencyStructure pdg) throws MaltChainedException {
343                nodeRelationLength.add(0);
344                for (int index : pdg.getTokenIndices()) {
345                        nodeRelationLength.add(Math.abs(pdg.getDependencyNode(index).getIndex() - pdg.getDependencyNode(index).getHead().getIndex()));
346                }
347        }
348
349        private void assignPseudoProjectiveDeprels(DependencyStructure pdg) throws MaltChainedException {
350                int newLabelCode;
351                for (int index : pdg.getTokenIndices()) {
352                        if (!isCoveredRoot(pdg.getDependencyNode(index))) {
353                                if (this.markingStrategy == PseudoProjectiveEncoding.HEAD || this.markingStrategy == PseudoProjectiveEncoding.PATH
354                                                || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
355                                        if (this.markingStrategy == PseudoProjectiveEncoding.PATH) {
356                                                if (nodeLifted.get(index)) {
357                                                        newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#true#");
358                                                } else {
359                                                        newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#");
360                                                }
361                                                pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode);
362                                        } else {
363                                                if (nodeLifted.get(index)) {
364                                                        newLabelCode = ppliftedSymbolTable.addSymbol(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(
365                                                                        headDeprel.get(index).getIndex()).getHeadEdge().getLabelCode(deprelSymbolTable)));
366                                                } else {
367                                                        newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#");
368                                                }
369                                                pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode);
370                                        }
371                                }
372
373                                if (this.markingStrategy == PseudoProjectiveEncoding.PATH || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
374                                        if (nodePath.get(index)) {
375                                                newLabelCode = pppathSymbolTable.getSymbolStringToCode("#true#");
376                                        } else {
377                                                newLabelCode = pppathSymbolTable.getSymbolStringToCode("#false#");
378                                        }
379                                        pdg.getDependencyNode(index).getHeadEdge().addLabel(pppathSymbolTable, newLabelCode);
380                                }
381
382                        } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) {
383                                pdg.getDependencyNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#"));
384                        }
385                }
386        }
387
388        private void setLabel(DependencyNode node, String label) throws MaltChainedException {
389                // node.getLabelCode().clear();
390                node.getHeadEdge().getLabelSet().put(deprelSymbolTable, deprelSymbolTable.addSymbol(label));
391        }
392
393        private void assignPseudoProjectiveDeprelsForMerge(DependencyStructure pdg) throws MaltChainedException {
394                Vector<String> originalDeprel = new Vector<String>();
395                String newLabel;
396                originalDeprel.add(null);
397                for (int index : pdg.getTokenIndices()) {
398                        originalDeprel.add(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)));
399                }
400                for (int index : pdg.getTokenIndices()) {
401                        newLabel = null;
402                        if (!isCoveredRoot(pdg.getDependencyNode(index))) {
403                                if (markingStrategy == PseudoProjectiveEncoding.HEAD) {
404                                        if (nodeLifted.get(index)) {
405                                                newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
406                                                                + originalDeprel.get(headDeprel.get(index).getIndex());
407                                                // } else {
408                                                // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
409                                        }
410                                } else if (markingStrategy == PseudoProjectiveEncoding.PATH) {
411                                        if (nodeLifted.get(index) && nodePath.get(index)) {
412                                                newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|%";
413                                        } else if (nodeLifted.get(index) && !nodePath.get(index)) {
414                                                newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|";
415                                        } else if (!nodeLifted.get(index) && nodePath.get(index)) {
416                                                newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "%";
417                                        }
418                                } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
419                                        if (nodeLifted.get(index) && nodePath.get(index)) {
420                                                newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
421                                                                + originalDeprel.get(headDeprel.get(index).getIndex()) + "%";
422                                        } else if (nodeLifted.get(index) && !nodePath.get(index)) {
423                                                newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
424                                                                + originalDeprel.get(headDeprel.get(index).getIndex());
425                                        } else if (!nodeLifted.get(index) && nodePath.get(index)) {
426                                                newLabel = originalDeprel.get(pdg.getDependencyNode(index).getIndex()) + "%";
427                                        }
428                                } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) {
429                                        if (nodeLifted.get(index)) {
430                                                newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|";
431                                        }
432                                }
433                        } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) {
434                                newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|null";
435                        }
436                        if (newLabel != null) {
437                                setLabel(pdg.getDependencyNode(index), newLabel);
438                        }
439                }
440        }
441
442        public void deprojectivize(DependencyStructure pdg) throws MaltChainedException {
443                initDeprojeciviztion(pdg);
444
445                for (int index : pdg.getTokenIndices()) {
446                        if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
447                                if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(pppathSymbolTable)
448                                                && pppathSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(pppathSymbolTable)).equals("#true#")) {
449                                        setPath(pdg.getDependencyNode(index));
450                                }
451                                if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppliftedSymbolTable)
452                                                && !ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#false#")) {
453                                        nodeLifted.set(index, true);
454                                        if (!ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#true#")) {
455                                                synacticHeadDeprel.set(index, ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge()
456                                                                .getLabelCode(ppliftedSymbolTable)));
457                                        }
458                                }
459                        }
460                }
461                deattachCoveredRootsForDeprojectivization(pdg);
462                if (markingStrategy == PseudoProjectiveEncoding.HEAD && needsDeprojectivizeWithHead(pdg)) {
463                        deprojectivizeWithHead(pdg, pdg.getDependencyRoot());
464                } else if (markingStrategy == PseudoProjectiveEncoding.PATH) {
465                        deprojectivizeWithPath(pdg, pdg.getDependencyRoot());
466                } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
467                        deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot());
468                }
469        }
470
471        private void initDeprojeciviztion(DependencyStructure pdg) {
472                nodeLifted.clear();
473                nodePath.clear();
474                synacticHeadDeprel.clear();
475                for (int index : pdg.getDependencyIndices()) {
476                        nodeLifted.add(false);
477                        nodePath.add(false);
478                        synacticHeadDeprel.add(null);
479                }
480        }
481
482        private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg) throws MaltChainedException {
483                for (int index : pdg.getTokenIndices()) {
484                        if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
485                                if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppcoveredRootSymbolTable)
486                                                && ppcoveredRootSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppcoveredRootSymbolTable)).equals(
487                                                                "#true#")) {
488                                        pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex());
489                                }
490                        }
491                }
492        }
493
494        // Check whether there is at least one node in the specified dependency structure that can be lifted.
495        // If this is not the case, there is no need to call deprojectivizeWithHead.
496
497        private boolean needsDeprojectivizeWithHead(DependencyStructure pdg) throws MaltChainedException {
498                for (int index : pdg.getDependencyIndices()) {
499                        if (nodeLifted.get(index)) {
500                                DependencyNode node = pdg.getDependencyNode(index);
501                                if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel.get(index)) != null) {
502                                        return true;
503                                }
504                    }
505                }
506                return false;
507        }
508
509        private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
510                boolean success = true, childSuccess = false;
511                int i, childAttempts = 2;
512                DependencyNode child, possibleSyntacticHead;
513                String syntacticHeadDeprel;
514                if (nodeLifted.get(node.getIndex())) {
515                        syntacticHeadDeprel = synacticHeadDeprel.get(node.getIndex());
516                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel);
517                        if (possibleSyntacticHead != null) {
518                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
519                                nodeLifted.set(node.getIndex(), false);
520                        } else {
521                                success = false;
522                        }
523                }
524                while (!childSuccess && childAttempts > 0) {
525                        childSuccess = true;
526                        Vector<DependencyNode> children = new Vector<DependencyNode>();
527                        i = 0;
528                        while ((child = node.getLeftDependent(i)) != null) {
529                                children.add(child);
530                                i++;
531                        }
532                        i = 0;
533                        while ((child = node.getRightDependent(i)) != null) {
534                                children.add(child);
535                                i++;
536                        }
537                        for (i = 0; i < children.size(); i++) {
538                                child = children.get(i);
539                                if (!deprojectivizeWithHead(pdg, child)) {
540                                        childSuccess = false;
541                                }
542                        }
543                        childAttempts--;
544                }
545                return childSuccess && success;
546        }
547
548        private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel)
549                        throws MaltChainedException {
550                DependencyNode dependent;
551                String dependentDeprel;
552                Vector<DependencyNode> nodes = new Vector<DependencyNode>();
553                nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false));
554                while (nodes.size() > 0) {
555                        dependent = nodes.remove(0);
556                        if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) {
557                                dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable));
558                                if (dependentDeprel.equals(syntacticHeadDeprel)) {
559                                        return dependent;
560                                }
561                        }
562                        nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false));
563                }
564                return null;
565        }
566
567        
568        private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid,
569                        boolean percentOnly) {
570                Vector<DependencyNode> output = new Vector<DependencyNode>();
571                SortedSet<DependencyNode> dependents = new TreeSet<DependencyNode>();
572                dependents.addAll(governor.getLeftDependents());
573                dependents.addAll(governor.getRightDependents());
574
575
576                DependencyNode[] deps = new DependencyNode[dependents.size()];
577                int[] distances = new int[dependents.size()];
578                int i = 0;
579                for (DependencyNode dep : dependents) {
580                        distances[i] = Math.abs(dep.getIndex() - avoid.getIndex());
581                        deps[i] = dep;
582                        i++;
583                }
584                if (distances.length > 1) {
585                        int smallest;
586                        int n = distances.length;
587                        int tmpDist;
588                        DependencyNode tmpDep;
589                        for (i=0; i < n; i++) {
590                                smallest = i;
591                                for (int j=i; j < n; j++) {
592                                        if (distances[j] < distances[smallest]) {
593                                                smallest = j;
594                                        }
595                                }
596                                if (smallest != i) {
597                                        tmpDist = distances[smallest];
598                                        distances[smallest] = distances[i];
599                                        distances[i] = tmpDist;
600                                        tmpDep = deps[smallest];
601                                        deps[smallest] = deps[i];
602                                        deps[i] = tmpDep;
603                                }
604                        }
605                }
606                for (i=0; i<distances.length;i++) {
607                        if (deps[i] != avoid && (!percentOnly || (percentOnly && nodePath.get(deps[i].getIndex())))) {
608                                output.add(deps[i]);
609                        }
610                }
611                return output;
612        }
613        
614        private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode2(DependencyStructure dg, DependencyNode governor, DependencyNode avoid,
615                        boolean percentOnly) {
616                int i, j;
617                Vector<DependencyNode> dependents = new Vector<DependencyNode>();
618                DependencyNode leftChild, rightChild;
619
620                i = governor.getLeftDependentCount() - 1;
621                j = 0;
622                leftChild = governor.getLeftDependent(i--);
623                rightChild = governor.getRightDependent(j++);
624
625                while (leftChild != null && rightChild != null) {
626                        if (leftChild == avoid) {
627                                leftChild = governor.getLeftDependent(i--);
628                        } else if (rightChild == avoid) {
629                                rightChild = governor.getRightDependent(j++);
630                        } else if (Math.abs(leftChild.getIndex() - avoid.getIndex()) < Math.abs(rightChild.getIndex() - avoid.getIndex())) {
631                                if (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex()))) {
632                                        dependents.add(leftChild);
633                                }
634                                leftChild = governor.getLeftDependent(i--);
635                        } else {
636                                if (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex()))) {
637                                        dependents.add(rightChild);
638                                }
639                                rightChild = governor.getRightDependent(j++);
640                        }
641                }
642                while (leftChild != null) {
643                        if (leftChild != avoid && (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex())))) {
644                                dependents.add(leftChild);
645                        }
646                        leftChild = governor.getLeftDependent(i--);
647                }
648                while (rightChild != null) {
649                        if (rightChild != avoid && (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex())))) {
650                                dependents.add(rightChild);
651                        }
652                        rightChild = governor.getRightDependent(j++);
653                }
654                return dependents;
655        }
656
657        private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
658                boolean success = true, childSuccess = false;
659                int i, childAttempts = 2;
660                DependencyNode child, possibleSyntacticHead;
661                if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) {
662                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node);
663                        if (possibleSyntacticHead != null) {
664                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
665                                nodeLifted.set(node.getIndex(), false);
666                        } else {
667                                success = false;
668                        }
669                }
670                if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) {
671                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node);
672                        if (possibleSyntacticHead != null) {
673                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
674                                nodeLifted.set(node.getIndex(), false);
675                        } else {
676                                success = false;
677                        }
678                }
679                while (!childSuccess && childAttempts > 0) {
680                        childSuccess = true;
681                        Vector<DependencyNode> children = new Vector<DependencyNode>();
682                        i = 0;
683                        while ((child = node.getLeftDependent(i)) != null) {
684                                children.add(child);
685                                i++;
686                        }
687                        i = 0;
688                        while ((child = node.getRightDependent(i)) != null) {
689                                children.add(child);
690                                i++;
691                        }
692                        for (i = 0; i < children.size(); i++) {
693                                child = children.get(i);
694                                if (!deprojectivizeWithPath(pdg, child)) {
695                                        childSuccess = false;
696                                }
697                        }
698                        childAttempts--;
699                }
700                return childSuccess && success;
701        }
702
703        private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid) {
704                DependencyNode dependent;
705                Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes;
706                nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true));
707                while (nodes.size() > 0) {
708                        dependent = nodes.remove(0);
709                        if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0) {
710                                return dependent;
711                        }
712                        nodes.addAll(newNodes);
713                }
714                return null;
715        }
716
717        private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
718                boolean success = true, childSuccess = false;
719                int i, childAttempts = 2;
720                DependencyNode child, possibleSyntacticHead;
721                if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) {
722                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node
723                                        .getIndex()));
724                        if (possibleSyntacticHead != null) {
725                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
726                                nodeLifted.set(node.getIndex(), false);
727                        } else {
728                                success = false;
729                        }
730                }
731                if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) {
732                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node
733                                        .getIndex()));
734                        if (possibleSyntacticHead != null) {
735                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
736                                nodeLifted.set(node.getIndex(), false);
737                        } else {
738                                success = false;
739                        }
740                }
741                while (!childSuccess && childAttempts > 0) {
742                        childSuccess = true;
743                        Vector<DependencyNode> children = new Vector<DependencyNode>();
744                        i = 0;
745                        while ((child = node.getLeftDependent(i)) != null) {
746                                children.add(child);
747                                i++;
748                        }
749                        i = 0;
750                        while ((child = node.getRightDependent(i)) != null) {
751                                children.add(child);
752                                i++;
753                        }
754                        for (i = 0; i < children.size(); i++) {
755                                child = children.get(i);
756                                if (!deprojectivizeWithHeadAndPath(pdg, child)) {
757                                        childSuccess = false;
758                                }
759                        }
760                        childAttempts--;
761                }
762                return childSuccess && success;
763        }
764
765        private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode)
766                        throws MaltChainedException {
767                DependencyNode dependent;
768                Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes = null, secondChance = new Vector<DependencyNode>();
769                nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true));
770                while (nodes.size() > 0) {
771                        dependent = nodes.remove(0);
772                        if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0
773                                        && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) {
774                                return dependent;
775                        }
776                        nodes.addAll(newNodes);
777                        if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)
778                                        && newNodes.size() != 0) {
779                                secondChance.add(dependent);
780                        }
781                }
782                if (secondChance.size() > 0) {
783                        return secondChance.firstElement();
784                }
785                return null;
786        }
787}