001package org.maltparser.core.lw.graph;
002
003import java.util.ArrayList;
004import java.util.Arrays;
005import java.util.List;
006
007import org.maltparser.core.exception.MaltChainedException;
008import org.maltparser.core.symbol.SymbolTable;
009import org.maltparser.core.syntaxgraph.DependencyStructure;
010import org.maltparser.core.syntaxgraph.edge.Edge;
011import org.maltparser.core.syntaxgraph.node.DependencyNode;
012
013/**
014* A lightweight version of pseudo projectivity and this class can only perform deprojectivizing. The class is based on 
015* the more complex class org.maltparser.transform.pseudo.PseudoProjectivity.
016* 
017* 
018* @author Johan Hall
019*/
020public final class LWDeprojectivizer {
021        public static final int NONE = 0;
022        public static final int BASELINE = 1;
023        public static final int HEAD = 1;
024        public static final int PATH = 1;
025        public static final int HEADPATH = 1;
026        public static final int TRACE = 1;
027
028        public LWDeprojectivizer() { }
029        
030        public static int getMarkingStrategyInt(String markingStrategyString) {
031                if (markingStrategyString.equalsIgnoreCase("none")) {
032                        return LWDeprojectivizer.NONE;
033                } else if (markingStrategyString.equalsIgnoreCase("baseline")) {
034                        return LWDeprojectivizer.BASELINE;
035                } else if (markingStrategyString.equalsIgnoreCase("head")) {
036                        return LWDeprojectivizer.HEAD;
037                } else if (markingStrategyString.equalsIgnoreCase("path")) {
038                        return LWDeprojectivizer.PATH;
039                } else if (markingStrategyString.equalsIgnoreCase("head+path")) {
040                        return LWDeprojectivizer.HEADPATH;
041                } else if (markingStrategyString.equalsIgnoreCase("trace")) {
042                        return LWDeprojectivizer.TRACE;
043                } 
044                return LWDeprojectivizer.NONE;
045        }
046        
047        public void deprojectivize(DependencyStructure pdg, int markingStrategy) throws MaltChainedException {
048                SymbolTable deprelSymbolTable = pdg.getSymbolTables().getSymbolTable("DEPREL");
049                SymbolTable ppliftedSymbolTable =  pdg.getSymbolTables().getSymbolTable("PPLIFTED"); 
050                SymbolTable pppathSymbolTable =  pdg.getSymbolTables().getSymbolTable("PPPATH");
051                 
052                boolean[] nodeLifted = new boolean[pdg.nDependencyNode()]; Arrays.fill(nodeLifted, false);
053                boolean[] nodePath = new boolean[pdg.nDependencyNode()]; Arrays.fill(nodePath, false);
054                String[] synacticHeadDeprel = new String[pdg.nDependencyNode()]; Arrays.fill(synacticHeadDeprel, null);
055                
056                for (int index : pdg.getTokenIndices()) {
057                        Edge e = pdg.getDependencyNode(index).getHeadEdge();
058                        if (e.hasLabel(deprelSymbolTable)) {
059                                if (e.hasLabel(pppathSymbolTable) && pppathSymbolTable.getSymbolCodeToString(e.getLabelCode(pppathSymbolTable)).equals("#true#")) {
060                                        nodePath[pdg.getDependencyNode(index).getIndex()] = true;
061                                }
062                                if (e.hasLabel(ppliftedSymbolTable) && !ppliftedSymbolTable.getSymbolCodeToString(e.getLabelCode(ppliftedSymbolTable)).equals("#false#")) {
063                                        nodeLifted[index] = true;
064                                        
065                                        if (!ppliftedSymbolTable.getSymbolCodeToString(e.getLabelCode(ppliftedSymbolTable)).equals("#true#")) {
066                                                synacticHeadDeprel[index] = ppliftedSymbolTable.getSymbolCodeToString(e.getLabelCode(ppliftedSymbolTable));
067                                        }
068                                }
069                        }
070                }
071                deattachCoveredRootsForDeprojectivization(pdg, deprelSymbolTable);
072                if (markingStrategy == LWDeprojectivizer.HEAD && needsDeprojectivizeWithHead(pdg, nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable)) {
073                        deprojectivizeWithHead(pdg, pdg.getDependencyRoot(), nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable);
074                } else if (markingStrategy == LWDeprojectivizer.PATH) {
075                        deprojectivizeWithPath(pdg, pdg.getDependencyRoot(), nodeLifted, nodePath);
076                } else if (markingStrategy == LWDeprojectivizer.HEADPATH) {
077                        deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot(), nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable);
078                }
079        }
080
081        private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg, SymbolTable deprelSymbolTable) throws MaltChainedException {
082                SymbolTable ppcoveredRootSymbolTable =  pdg.getSymbolTables().getSymbolTable("PPCOVERED");
083                for (int index : pdg.getTokenIndices()) {
084                        Edge e = pdg.getDependencyNode(index).getHeadEdge();
085                        if (e.hasLabel(deprelSymbolTable)) {
086                                if (e.hasLabel(ppcoveredRootSymbolTable) && ppcoveredRootSymbolTable.getSymbolCodeToString(e.getLabelCode(ppcoveredRootSymbolTable)).equals("#true#")) {
087                                        pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex());
088                                }
089                        }
090                }
091        }
092
093        // Check whether there is at least one node in the specified dependency structure that can be lifted.
094        // If this is not the case, there is no need to call deprojectivizeWithHead.
095
096        private boolean needsDeprojectivizeWithHead(DependencyStructure pdg, boolean[] nodeLifted, boolean[] nodePath, String[] synacticHeadDeprel, SymbolTable deprelSymbolTable) throws MaltChainedException {
097                for (int index : pdg.getDependencyIndices()) {
098                        if (nodeLifted[index]) {
099                                DependencyNode node = pdg.getDependencyNode(index);
100                                if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel[index], nodePath, deprelSymbolTable) != null) {
101                                        return true;
102                                }
103                    }
104                }
105                return false;
106        }
107
108        private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node, boolean[] nodeLifted, boolean[] nodePath, String[] synacticHeadDeprel, SymbolTable deprelSymbolTable) throws MaltChainedException {
109                boolean success = true, childSuccess = false;
110                int i, childAttempts = 2;
111                DependencyNode possibleSyntacticHead;
112                String syntacticHeadDeprel;
113                if (nodeLifted[node.getIndex()]) {
114                        syntacticHeadDeprel = synacticHeadDeprel[node.getIndex()];
115                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel, nodePath, deprelSymbolTable);
116                        if (possibleSyntacticHead != null) {
117                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
118                                nodeLifted[node.getIndex()] = false;
119                        } else {
120                                success = false;
121                        }
122                }
123                while (!childSuccess && childAttempts > 0) {
124                        childSuccess = true;
125                        
126                        List<DependencyNode> children = node.getListOfDependents();
127                        for (i = 0; i < children.size(); i++) {
128                                if (!deprojectivizeWithHead(pdg, children.get(i), nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable)) {
129                                        childSuccess = false;
130                                }
131                        }
132                        childAttempts--;
133                }
134                return childSuccess && success;
135        }
136
137        private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel, boolean[] nodePath, SymbolTable deprelSymbolTable)
138                        throws MaltChainedException {
139                DependencyNode dependent;
140                String dependentDeprel;
141                List<DependencyNode> nodes = new ArrayList<DependencyNode>();
142                nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false, nodePath));
143                while (nodes.size() > 0) {
144                        dependent = nodes.remove(0);
145                        if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) {
146                                dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable));
147                                if (dependentDeprel.equals(syntacticHeadDeprel)) {
148                                        return dependent;
149                                }
150                        }
151                        nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false, nodePath));
152                }
153                return null;
154        }
155
156        
157        private List<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid,
158                        boolean percentOnly, boolean[] nodePath) {
159                List<DependencyNode> output = new ArrayList<DependencyNode>();
160                List<DependencyNode> dependents = governor.getListOfDependents();
161//              SortedSet<DependencyNode> dependents = new TreeSet<DependencyNode>();
162//              dependents.addAll(governor.getLeftDependents());
163//              dependents.addAll(governor.getRightDependents());
164
165
166                DependencyNode[] deps = new DependencyNode[dependents.size()];
167                int[] distances = new int[dependents.size()];
168                for (int i = 0; i < dependents.size(); i++) {
169                        distances[i] = Math.abs(dependents.get(i).getIndex() - avoid.getIndex());
170                        deps[i] = dependents.get(i);
171                }
172                if (distances.length > 1) {
173                        int smallest;
174                        int n = distances.length;
175                        int tmpDist;
176                        DependencyNode tmpDep;
177                        for (int i=0; i < n; i++) {
178                                smallest = i;
179                                for (int j=i; j < n; j++) {
180                                        if (distances[j] < distances[smallest]) {
181                                                smallest = j;
182                                        }
183                                }
184                                if (smallest != i) {
185                                        tmpDist = distances[smallest];
186                                        distances[smallest] = distances[i];
187                                        distances[i] = tmpDist;
188                                        tmpDep = deps[smallest];
189                                        deps[smallest] = deps[i];
190                                        deps[i] = tmpDep;
191                                }
192                        }
193                }
194                for (int i=0; i<distances.length;i++) {
195                        if (deps[i] != avoid && (!percentOnly || (percentOnly && nodePath[deps[i].getIndex()]))) {
196                                output.add(deps[i]);
197                        }
198                }
199                return output;
200        }
201        
202        private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node, boolean[] nodeLifted, boolean[] nodePath) throws MaltChainedException {
203                boolean success = true, childSuccess = false;
204                int i, childAttempts = 2;
205                DependencyNode possibleSyntacticHead;
206                if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted[node.getIndex()] && nodePath[node.getIndex()]) {
207                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node, nodePath);
208                        if (possibleSyntacticHead != null) {
209                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
210                                nodeLifted[node.getIndex()] = false;
211                        } else {
212                                success = false;
213                        }
214                }
215                if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted[node.getIndex()]) {
216                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node, nodePath);
217                        if (possibleSyntacticHead != null) {
218                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
219                                nodeLifted[node.getIndex()] = false;
220                        } else {
221                                success = false;
222                        }
223                }
224                while (!childSuccess && childAttempts > 0) {
225                        childSuccess = true;
226                        List<DependencyNode> children = node.getListOfDependents();
227                        for (i = 0; i < children.size(); i++) {
228                                if (!deprojectivizeWithPath(pdg, children.get(i), nodeLifted, nodePath)) {
229                                        childSuccess = false;
230                                }
231                        }
232                        childAttempts--;
233                }
234                return childSuccess && success;
235        }
236
237        private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, boolean[] nodePath) {
238                DependencyNode dependent;
239                List<DependencyNode> nodes = new ArrayList<DependencyNode>(), newNodes;
240                nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true, nodePath));
241                while (nodes.size() > 0) {
242                        dependent = nodes.remove(0);
243                        if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true, nodePath)).size()) == 0) {
244                                return dependent;
245                        }
246                        nodes.addAll(newNodes);
247                }
248                return null;
249        }
250
251        private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node, boolean[] nodeLifted, boolean[] nodePath, String[] synacticHeadDeprel, SymbolTable deprelSymbolTable) throws MaltChainedException {
252                boolean success = true, childSuccess = false;
253                int i, childAttempts = 2;
254                DependencyNode possibleSyntacticHead;
255                if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted[node.getIndex()] && nodePath[node.getIndex()]) {
256                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel[node.getIndex()], nodePath, deprelSymbolTable);
257                        if (possibleSyntacticHead != null) {
258                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
259                                nodeLifted[node.getIndex()] = false;
260                        } else {
261                                success = false;
262                        }
263                }
264                if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted[node.getIndex()]) {
265                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel[node.getIndex()], nodePath, deprelSymbolTable);
266                        if (possibleSyntacticHead != null) {
267                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
268                                nodeLifted[node.getIndex()] = false;
269                        } else {
270                                success = false;
271                        }
272                }
273                while (!childSuccess && childAttempts > 0) {
274                        childSuccess = true;
275                        List<DependencyNode> children = node.getListOfDependents();
276                        for (i = 0; i < children.size(); i++) {
277                                if (!deprojectivizeWithHeadAndPath(pdg, children.get(i), nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable)) {
278                                        childSuccess = false;
279                                }
280                        }
281                        childAttempts--;
282                }
283                return childSuccess && success;
284        }
285
286        private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode, boolean[] nodePath, SymbolTable deprelSymbolTable)
287                        throws MaltChainedException {
288                DependencyNode dependent;
289                List<DependencyNode> nodes = new ArrayList<DependencyNode>(), newNodes = null, secondChance = new ArrayList<DependencyNode>();
290                nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true, nodePath));
291                while (nodes.size() > 0) {
292                        dependent = nodes.remove(0);
293                        if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true, nodePath)).size()) == 0
294                                        && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) {
295                                return dependent;
296                        }
297                        nodes.addAll(newNodes);
298                        if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)
299                                        && newNodes.size() != 0) {
300                                secondChance.add(dependent);
301                        }
302                }
303                if (secondChance.size() > 0) {
304                        return secondChance.get(0);
305                }
306                return null;
307        }
308}