001package org.maltparser.core.lw.graph;
002
003import java.util.ArrayList;
004import java.util.Arrays;
005import java.util.List;
006
007import org.maltparser.core.exception.MaltChainedException;
008import org.maltparser.core.symbol.SymbolTable;
009import org.maltparser.core.syntaxgraph.DependencyStructure;
010import org.maltparser.core.syntaxgraph.edge.Edge;
011import org.maltparser.core.syntaxgraph.node.DependencyNode;
012
013/**
014* A lightweight version of pseudo projectivity and this class can only perform deprojectivizing. The class is based on 
015* the more complex class org.maltparser.transform.pseudo.PseudoProjectivity.
016* 
017* 
018* @author Johan Hall
019*/
020public final class LWDeprojectivizer {
021        public static final int NONE = 0;
022        public static final int BASELINE = 1;
023        public static final int HEAD = 2;
024        public static final int PATH = 3;
025        public static final int HEADPATH = 4;
026        public static final int TRACE = 5;
027
028        public int counter = 0;
029        public LWDeprojectivizer() { }
030        
031        public static int getMarkingStrategyInt(String markingStrategyString) {
032                if (markingStrategyString.equalsIgnoreCase("none")) {
033                        return LWDeprojectivizer.NONE;
034                } else if (markingStrategyString.equalsIgnoreCase("baseline")) {
035                        return LWDeprojectivizer.BASELINE;
036                } else if (markingStrategyString.equalsIgnoreCase("head")) {
037                        return LWDeprojectivizer.HEAD;
038                } else if (markingStrategyString.equalsIgnoreCase("path")) {
039                        return LWDeprojectivizer.PATH;
040                } else if (markingStrategyString.equalsIgnoreCase("head+path")) {
041                        return LWDeprojectivizer.HEADPATH;
042                } else if (markingStrategyString.equalsIgnoreCase("trace")) {
043                        return LWDeprojectivizer.TRACE;
044                } 
045                return LWDeprojectivizer.NONE;
046        }
047        
048        public void deprojectivize(DependencyStructure pdg, int markingStrategy) throws MaltChainedException {
049                SymbolTable deprelSymbolTable = pdg.getSymbolTables().getSymbolTable("DEPREL");
050                SymbolTable ppliftedSymbolTable =  pdg.getSymbolTables().getSymbolTable("PPLIFTED"); 
051                SymbolTable pppathSymbolTable =  pdg.getSymbolTables().getSymbolTable("PPPATH");
052                boolean[] nodeLifted = new boolean[pdg.nDependencyNode()]; Arrays.fill(nodeLifted, false);
053                boolean[] nodePath = new boolean[pdg.nDependencyNode()]; Arrays.fill(nodePath, false);
054                String[] synacticHeadDeprel = new String[pdg.nDependencyNode()]; Arrays.fill(synacticHeadDeprel, null);
055                
056                for (int index : pdg.getTokenIndices()) {
057                        Edge e = pdg.getDependencyNode(index).getHeadEdge();
058                        if (e.hasLabel(deprelSymbolTable)) {
059                                if (e.hasLabel(pppathSymbolTable) && pppathSymbolTable.getSymbolCodeToString(e.getLabelCode(pppathSymbolTable)).equals("#true#")) {
060                                        nodePath[pdg.getDependencyNode(index).getIndex()] = true;
061                                }
062                                if (e.hasLabel(ppliftedSymbolTable) && !ppliftedSymbolTable.getSymbolCodeToString(e.getLabelCode(ppliftedSymbolTable)).equals("#false#")) {
063                                        nodeLifted[index] = true;
064                                        
065                                        if (!ppliftedSymbolTable.getSymbolCodeToString(e.getLabelCode(ppliftedSymbolTable)).equals("#true#")) {
066                                                synacticHeadDeprel[index] = ppliftedSymbolTable.getSymbolCodeToString(e.getLabelCode(ppliftedSymbolTable));
067                                        }
068                                }
069                        }
070                }
071                deattachCoveredRootsForDeprojectivization(pdg, deprelSymbolTable);
072                if (markingStrategy == LWDeprojectivizer.HEAD && needsDeprojectivizeWithHead(pdg, nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable)) {
073                        deprojectivizeWithHead(pdg, pdg.getDependencyRoot(), nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable);
074                } else if (markingStrategy == LWDeprojectivizer.PATH) {
075                        deprojectivizeWithPath(pdg, pdg.getDependencyRoot(), nodeLifted, nodePath);
076                } else if (markingStrategy == LWDeprojectivizer.HEADPATH) {
077                        deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot(), nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable);
078                }
079        }
080
081        private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg, SymbolTable deprelSymbolTable) throws MaltChainedException {
082                SymbolTable ppcoveredRootSymbolTable =  pdg.getSymbolTables().getSymbolTable("PPCOVERED");
083                for (int index : pdg.getTokenIndices()) {
084                        Edge e = pdg.getDependencyNode(index).getHeadEdge();
085                        if (e.hasLabel(deprelSymbolTable)) {
086                                if (e.hasLabel(ppcoveredRootSymbolTable) && ppcoveredRootSymbolTable.getSymbolCodeToString(e.getLabelCode(ppcoveredRootSymbolTable)).equals("#true#")) {
087                                        pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex());
088                                }
089                        }
090                }
091        }
092
093        // Check whether there is at least one node in the specified dependency structure that can be lifted.
094        // If this is not the case, there is no need to call deprojectivizeWithHead.
095
096        private boolean needsDeprojectivizeWithHead(DependencyStructure pdg, boolean[] nodeLifted, boolean[] nodePath, String[] synacticHeadDeprel, SymbolTable deprelSymbolTable) throws MaltChainedException {
097                for (int index : pdg.getDependencyIndices()) {
098                        if (nodeLifted[index]) {
099                                DependencyNode node = pdg.getDependencyNode(index);
100                                if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel[index], nodePath, deprelSymbolTable) != null) {
101                                        return true;
102                                }
103                    }
104                }
105                return false;
106        }
107
108        private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node, boolean[] nodeLifted, boolean[] nodePath, String[] synacticHeadDeprel, SymbolTable deprelSymbolTable) throws MaltChainedException {
109                boolean success = true, childSuccess = false;
110                int i, childAttempts = (counter < 10000 ? 2 : 1);
111                DependencyNode possibleSyntacticHead;
112                String syntacticHeadDeprel;
113                
114                counter++;
115                if (nodeLifted[node.getIndex()]) {
116                        syntacticHeadDeprel = synacticHeadDeprel[node.getIndex()];
117                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel, nodePath, deprelSymbolTable);
118                        if (possibleSyntacticHead != null) {
119                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
120                                nodeLifted[node.getIndex()] = false;
121                        } else {
122                                success = false;
123                        }
124                }
125                while (!childSuccess && childAttempts > 0) {
126                        childSuccess = true;
127
128                        List<DependencyNode> children = node.getListOfDependents();
129                        for (i = 0; i < children.size(); i++) {
130                                if (!deprojectivizeWithHead(pdg, children.get(i), nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable)) {
131                                        childSuccess = false;
132                                }
133                        }
134                        childAttempts--;
135                }
136                return childSuccess && success;
137        }
138
139        private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel, boolean[] nodePath, SymbolTable deprelSymbolTable)
140                        throws MaltChainedException {
141                DependencyNode dependent;
142                String dependentDeprel;
143                List<DependencyNode> nodes = new ArrayList<DependencyNode>();
144                nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false, nodePath));
145                while (nodes.size() > 0) {
146                        dependent = nodes.remove(0);
147                        if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) {
148                                dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable));
149                                if (dependentDeprel.equals(syntacticHeadDeprel)) {
150                                        return dependent;
151                                }
152                        }
153                        nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false, nodePath));
154                }
155                return null;
156        }
157
158        
159        private List<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid,
160                        boolean percentOnly, boolean[] nodePath) {
161                List<DependencyNode> output = new ArrayList<DependencyNode>();
162                List<DependencyNode> dependents = governor.getListOfDependents();
163//              SortedSet<DependencyNode> dependents = new TreeSet<DependencyNode>();
164//              dependents.addAll(governor.getLeftDependents());
165//              dependents.addAll(governor.getRightDependents());
166
167
168                DependencyNode[] deps = new DependencyNode[dependents.size()];
169                int[] distances = new int[dependents.size()];
170                for (int i = 0; i < dependents.size(); i++) {
171                        distances[i] = Math.abs(dependents.get(i).getIndex() - avoid.getIndex());
172                        deps[i] = dependents.get(i);
173                }
174                if (distances.length > 1) {
175                        int smallest;
176                        int n = distances.length;
177                        int tmpDist;
178                        DependencyNode tmpDep;
179                        for (int i=0; i < n; i++) {
180                                smallest = i;
181                                for (int j=i; j < n; j++) {
182                                        if (distances[j] < distances[smallest]) {
183                                                smallest = j;
184                                        }
185                                }
186                                if (smallest != i) {
187                                        tmpDist = distances[smallest];
188                                        distances[smallest] = distances[i];
189                                        distances[i] = tmpDist;
190                                        tmpDep = deps[smallest];
191                                        deps[smallest] = deps[i];
192                                        deps[i] = tmpDep;
193                                }
194                        }
195                }
196                for (int i=0; i<distances.length;i++) {
197                        if (deps[i] != avoid && (!percentOnly || (percentOnly && nodePath[deps[i].getIndex()]))) {
198                                output.add(deps[i]);
199                        }
200                }
201                return output;
202        }
203        
204        private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node, boolean[] nodeLifted, boolean[] nodePath) throws MaltChainedException {
205                boolean success = true, childSuccess = false;
206                int i, childAttempts = (counter < 10000 ? 2 : 1);
207                DependencyNode possibleSyntacticHead;
208                
209                counter++;
210                if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted[node.getIndex()] && nodePath[node.getIndex()]) {
211                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node, nodePath);
212                        if (possibleSyntacticHead != null) {
213                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
214                                nodeLifted[node.getIndex()] = false;
215                        } else {
216                                success = false;
217                        }
218                }
219                if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted[node.getIndex()]) {
220                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node, nodePath);
221                        if (possibleSyntacticHead != null) {
222                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
223                                nodeLifted[node.getIndex()] = false;
224                        } else {
225                                success = false;
226                        }
227                }
228                while (!childSuccess && childAttempts > 0) {
229                        childSuccess = true;
230                        List<DependencyNode> children = node.getListOfDependents();
231                        for (i = 0; i < children.size(); i++) {
232                                if (!deprojectivizeWithPath(pdg, children.get(i), nodeLifted, nodePath)) {
233                                        childSuccess = false;
234                                }
235                        }
236                        childAttempts--;
237                }
238                return childSuccess && success;
239        }
240
241        private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, boolean[] nodePath) {
242                DependencyNode dependent;
243                List<DependencyNode> nodes = new ArrayList<DependencyNode>(), newNodes;
244                nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true, nodePath));
245                while (nodes.size() > 0) {
246                        dependent = nodes.remove(0);
247                        if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true, nodePath)).size()) == 0) {
248                                return dependent;
249                        }
250                        nodes.addAll(newNodes);
251                }
252                return null;
253        }
254
255        private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node, boolean[] nodeLifted, boolean[] nodePath, String[] synacticHeadDeprel, SymbolTable deprelSymbolTable) throws MaltChainedException {
256                boolean success = true, childSuccess = false;
257                int i, childAttempts = (counter < 10000 ? 2 : 1);
258                DependencyNode possibleSyntacticHead;
259                
260                counter++;
261                if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted[node.getIndex()] && nodePath[node.getIndex()]) {
262                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel[node.getIndex()], nodePath, deprelSymbolTable);
263                        if (possibleSyntacticHead != null) {
264                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
265                                nodeLifted[node.getIndex()] = false;
266                        } else {
267                                success = false;
268                        }
269                }
270                if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted[node.getIndex()]) {
271                        possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel[node.getIndex()], nodePath, deprelSymbolTable);
272                        if (possibleSyntacticHead != null) {
273                                pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
274                                nodeLifted[node.getIndex()] = false;
275                        } else {
276                                success = false;
277                        }
278                }
279                while (!childSuccess && childAttempts > 0) {
280                        childSuccess = true;
281                        List<DependencyNode> children = node.getListOfDependents();
282                        for (i = 0; i < children.size(); i++) {
283                                if (!deprojectivizeWithHeadAndPath(pdg, children.get(i), nodeLifted, nodePath, synacticHeadDeprel, deprelSymbolTable)) {
284                                        childSuccess = false;
285                                }
286                        }
287                        childAttempts--;
288                }
289                return childSuccess && success;
290        }
291
292        private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode, boolean[] nodePath, SymbolTable deprelSymbolTable)
293                        throws MaltChainedException {
294                DependencyNode dependent;
295                List<DependencyNode> nodes = new ArrayList<DependencyNode>(), newNodes = null, secondChance = new ArrayList<DependencyNode>();
296                nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true, nodePath));
297                while (nodes.size() > 0) {
298                        dependent = nodes.remove(0);
299                        if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true, nodePath)).size()) == 0
300                                        && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) {
301                                return dependent;
302                        }
303                        nodes.addAll(newNodes);
304                        if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)
305                                        && newNodes.size() != 0) {
306                                secondChance.add(dependent);
307                        }
308                }
309                if (secondChance.size() > 0) {
310                        return secondChance.get(0);
311                }
312                return null;
313        }
314}