001 package org.maltparser.transform.pseudo;
002
003 import java.util.Vector;
004
005 import org.apache.log4j.Logger;
006 import org.maltparser.core.exception.MaltChainedException;
007 import org.maltparser.core.io.dataformat.ColumnDescription;
008 import org.maltparser.core.io.dataformat.DataFormatInstance;
009 import org.maltparser.core.symbol.SymbolTable;
010 import org.maltparser.core.syntaxgraph.DependencyStructure;
011 import org.maltparser.core.syntaxgraph.node.DependencyNode;
012
013 /**
014 * This class contains methods for projectivizing and deprojectivizing
015 *
016 * @author Jens Nilsson
017 * @since 1.0
018 */
019 public class PseudoProjectivity {
020 static int id = 0;
021
022 private enum PseudoProjectiveEncoding {
023 NONE, BASELINE, HEAD, PATH, HEADPATH, TRACE
024 };
025
026 private enum CoveredRootAttachment {
027 NONE, IGNORE, LEFT, RIGHT, HEAD
028 };
029
030 private enum LiftingOrder {
031 SHORTEST, DEEPEST
032 };
033
034 private PseudoProjectiveEncoding markingStrategy;
035 private CoveredRootAttachment rootAttachment;
036 private LiftingOrder liftingOrder;
037 private Logger configLogger;
038
039 private SymbolTable deprelSymbolTable;
040 private SymbolTable pppathSymbolTable;
041 private SymbolTable ppliftedSymbolTable;
042 private SymbolTable ppcoveredRootSymbolTable;
043
044 private ColumnDescription deprelColumn;
045 private ColumnDescription pppathColumn;
046 private ColumnDescription ppliftedColumn;
047 private ColumnDescription ppcoveredRootColumn;
048
049 private Vector<Boolean> nodeLifted;
050 private Vector<Vector<DependencyNode>> nodeTrace;
051 private Vector<DependencyNode> headDeprel;
052 private Vector<Boolean> nodePath;
053 private Vector<Boolean> isCoveredRoot;
054 private Vector<Integer> nodeRelationLength;
055 private Vector<String> synacticHeadDeprel;
056
057
058 public PseudoProjectivity() { }
059
060 public void initialize(String markingStrategyString, String coveredRoot, String liftingOrder, Logger configLogger,
061 DataFormatInstance dataFormatInstance) throws MaltChainedException {
062 nodeLifted = new Vector<Boolean>();
063 nodeTrace = new Vector<Vector<DependencyNode>>();
064 headDeprel = new Vector<DependencyNode>();
065 nodePath = new Vector<Boolean>();
066 isCoveredRoot = new Vector<Boolean>();
067 nodeRelationLength = new Vector<Integer>();
068 synacticHeadDeprel = new Vector<String>();
069
070 this.configLogger = configLogger;
071 if (markingStrategyString.equalsIgnoreCase("none")) {
072 markingStrategy = PseudoProjectiveEncoding.NONE;
073 } else if (markingStrategyString.equalsIgnoreCase("baseline")) {
074 markingStrategy = PseudoProjectiveEncoding.BASELINE;
075 } else if (markingStrategyString.equalsIgnoreCase("head")) {
076 markingStrategy = PseudoProjectiveEncoding.HEAD;
077 } else if (markingStrategyString.equalsIgnoreCase("path")) {
078 markingStrategy = PseudoProjectiveEncoding.PATH;
079 } else if (markingStrategyString.equalsIgnoreCase("head+path")) {
080 markingStrategy = PseudoProjectiveEncoding.HEADPATH;
081 } else if (markingStrategyString.equalsIgnoreCase("trace")) {
082 markingStrategy = PseudoProjectiveEncoding.TRACE;
083 }
084 this.deprelColumn = dataFormatInstance.getColumnDescriptionByName("DEPREL");
085 this.deprelSymbolTable = deprelColumn.getSymbolTable();
086 // this.deprelSymbolTable = dataFormatInstance.getSymbolTables().getSymbolTable("DEPREL");
087 if (markingStrategy == PseudoProjectiveEncoding.HEAD || markingStrategy == PseudoProjectiveEncoding.PATH
088 || markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
089 this.ppliftedColumn = dataFormatInstance.addInternalColumnDescription("PPLIFTED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", deprelColumn.getNullValueStrategy());
090 this.ppliftedSymbolTable = ppliftedColumn.getSymbolTable();
091 // this.ppliftedSymbolTable = dataFormatInstance.getSymbolTables().addSymbolTable("PPLIFTED", deprelSymbolTable);
092 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) {
093 ppliftedSymbolTable.addSymbol("#true#");
094 ppliftedSymbolTable.addSymbol("#false#");
095 } else {
096 ppliftedSymbolTable.addSymbol("#false#");
097 }
098 }
099
100 if (markingStrategy == PseudoProjectiveEncoding.PATH || markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
101 this.pppathColumn = dataFormatInstance.addInternalColumnDescription("PPPATH", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", deprelColumn.getNullValueStrategy());
102 this.pppathSymbolTable = pppathColumn.getSymbolTable();
103 pppathSymbolTable.addSymbol("#true#");
104 pppathSymbolTable.addSymbol("#false#");
105 }
106
107 if (coveredRoot.equalsIgnoreCase("none")) {
108 this.rootAttachment = CoveredRootAttachment.NONE;
109 } else if (coveredRoot.equalsIgnoreCase("ignore")) {
110 this.rootAttachment = CoveredRootAttachment.IGNORE;
111 } else if (coveredRoot.equalsIgnoreCase("left")) {
112 this.rootAttachment = CoveredRootAttachment.LEFT;
113 } else if (coveredRoot.equalsIgnoreCase("right")) {
114 this.rootAttachment = CoveredRootAttachment.RIGHT;
115 } else if (coveredRoot.equalsIgnoreCase("head")) {
116 this.rootAttachment = CoveredRootAttachment.HEAD;
117 }
118
119 if (this.rootAttachment != CoveredRootAttachment.NONE) {
120 this.ppcoveredRootColumn = dataFormatInstance.addInternalColumnDescription("PPCOVERED", "DEPENDENCY_EDGE_LABEL", "BOOLEAN", deprelColumn.getNullValueStrategy());
121 this.ppcoveredRootSymbolTable = ppcoveredRootColumn.getSymbolTable();
122 ppcoveredRootSymbolTable.addSymbol("#true#");
123 ppcoveredRootSymbolTable.addSymbol("#false#");
124 }
125 if (liftingOrder.equalsIgnoreCase("shortest")) {
126 this.liftingOrder = LiftingOrder.SHORTEST;
127 } else if (liftingOrder.equalsIgnoreCase("deepest")) {
128 this.liftingOrder = LiftingOrder.DEEPEST;
129 }
130 }
131
132 private void initProjectivization(DependencyStructure pdg) throws MaltChainedException {
133 nodeLifted.clear();
134 nodeTrace.clear();
135 headDeprel.clear();
136 nodePath.clear();
137 isCoveredRoot.clear();
138 nodeRelationLength.clear();
139
140 for (int index : pdg.getDependencyIndices()) {
141 nodeLifted.add(false);
142 nodeTrace.add(new Vector<DependencyNode>());
143 headDeprel.add(null);
144 nodePath.add(false);
145 isCoveredRoot.add(false);
146 if (ppliftedSymbolTable != null && index != 0) {
147 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#false#"));
148 }
149 if (pppathSymbolTable != null && index != 0) {
150 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#false#"));
151 }
152 if (ppcoveredRootSymbolTable != null && index != 0) {
153 pdg.getDependencyNode(index).getHeadEdge().getLabelSet().put(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#false#"));
154 }
155 }
156 computeRelationLength(pdg);
157 }
158
159 public void projectivize(DependencyStructure pdg) throws MaltChainedException {
160 id++;
161 if (!pdg.isTree()) {
162 configLogger.info("\n[Warning: Sentence '" + id + "' cannot projectivize, because the dependency graph is not a tree]\n");
163 return;
164 }
165 DependencyNode deepestNonProjectiveNode;
166 initProjectivization(pdg);
167 if (rootAttachment == CoveredRootAttachment.IGNORE) {
168 if (markingStrategy != PseudoProjectiveEncoding.NONE) {
169 while (!pdg.isProjective()) {
170 if (liftingOrder == LiftingOrder.DEEPEST) {
171 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg);
172 } else {
173 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg);
174 }
175 if (!attachCoveredRoots(pdg, deepestNonProjectiveNode)) {
176 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true);
177 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead());
178 setPath(deepestNonProjectiveNode.getHead());
179 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex());
180 }
181 }
182 deattachCoveredRootsForProjectivization(pdg);
183 }
184 } else {
185 if (rootAttachment != CoveredRootAttachment.NONE) {
186 for (int index : pdg.getTokenIndices()) {
187 attachCoveredRoots(pdg, pdg.getTokenNode(index));
188 }
189 }
190 if (markingStrategy != PseudoProjectiveEncoding.NONE) {
191 while (!pdg.isProjective()) {
192 if (liftingOrder == LiftingOrder.DEEPEST) {
193 deepestNonProjectiveNode = getDeepestNonProjectiveNode(pdg);
194 } else {
195 deepestNonProjectiveNode = getShortestNonProjectiveNode(pdg);
196 }
197 nodeLifted.set(deepestNonProjectiveNode.getIndex(), true);
198 setHeadDeprel(deepestNonProjectiveNode, deepestNonProjectiveNode.getHead());
199 setPath(deepestNonProjectiveNode.getHead());
200 pdg.moveDependencyEdge(pdg.getDependencyNode(deepestNonProjectiveNode.getHead().getHead().getIndex()).getIndex(), deepestNonProjectiveNode.getIndex());
201 }
202 }
203 }
204 // collectTraceStatistics(pdg);
205 assignPseudoProjectiveDeprels(pdg);
206 }
207
208 public void mergeArclabels(DependencyStructure pdg) throws MaltChainedException {
209 assignPseudoProjectiveDeprelsForMerge(pdg);
210 }
211
212 public void splitArclabels(DependencyStructure pdg) throws MaltChainedException {
213 int pathLabelIndex = -1, movedLabelIndex = -1, coveredArcLabelIndex;
214 String label;
215 initDeprojeciviztion(pdg);
216 for (int index : pdg.getTokenIndices()) {
217 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
218 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
219 if (label != null && (pathLabelIndex = label.indexOf("%")) != -1) {
220 label = label.substring(0, pathLabelIndex);
221 setLabel(pdg.getTokenNode(index), label);
222 pdg.getTokenNode(index).getHeadEdge().addLabel(pppathSymbolTable, pppathSymbolTable.getSymbolStringToCode("#true#"));
223 }
224 if (label != null && (movedLabelIndex = label.indexOf("|")) != -1 && label.indexOf("|null") == -1) {
225 if (movedLabelIndex + 1 < label.length()) {
226 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode(label.substring(movedLabelIndex + 1)));
227 } else {
228 pdg.getTokenNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, ppliftedSymbolTable.getSymbolStringToCode("#true#"));
229 }
230 label = label.substring(0, movedLabelIndex);
231 setLabel(pdg.getTokenNode(index), label);
232 }
233 }
234 }
235 for (int index : pdg.getTokenIndices()) {
236 if (pdg.getTokenNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
237 label = deprelSymbolTable.getSymbolCodeToString(pdg.getTokenNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
238 if ((coveredArcLabelIndex = label.indexOf("|null")) != -1) {
239 label = label.substring(0, coveredArcLabelIndex);
240 setLabel(pdg.getTokenNode(index), label);
241 pdg.getTokenNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#"));
242 }
243 }
244 }
245 }
246
247 private void setHeadDeprel(DependencyNode node, DependencyNode parent) {
248 if (headDeprel.get(node.getIndex()) == null) {
249 headDeprel.set(node.getIndex(), parent);
250 }
251 nodeTrace.set(node.getIndex(), headDeprel);
252 }
253
254 private void setPath(DependencyNode node) {
255 nodePath.set(node.getIndex(), true);
256 }
257
258 private boolean isCoveredRoot(DependencyNode node) {
259 return isCoveredRoot.get(node.getIndex());
260 }
261
262 private void deattachCoveredRootsForProjectivization(DependencyStructure pdg) throws MaltChainedException {
263 for (int index : pdg.getTokenIndices()) {
264 if (isCoveredRoot(pdg.getTokenNode(index))) {
265 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getTokenNode(index).getIndex());
266 }
267 }
268 }
269
270 private boolean attachCoveredRoots(DependencyStructure pdg, DependencyNode deepest) throws MaltChainedException {
271 int i;
272 boolean foundCoveredRoot = false;
273 DependencyNode coveredRootHead;
274 for (i = Math.min(deepest.getIndex(), deepest.getHead().getIndex()) + 1; i < Math.max(deepest.getIndex(), deepest.getHead()
275 .getIndex()); i++) {
276 int leftMostIndex = pdg.getDependencyNode(i).getLeftmostProperDescendantIndex();
277 if (leftMostIndex == -1) {
278 leftMostIndex = i;
279 }
280 int rightMostIndex = pdg.getDependencyNode(i).getRightmostProperDescendantIndex();
281 if (rightMostIndex == -1) {
282 rightMostIndex = i;
283 }
284 if (!nodeLifted.get(i) && pdg.getDependencyNode(i).getHead().isRoot() && !deepest.getHead().isRoot()
285 && Math.min(deepest.getIndex(), deepest.getHead().getIndex()) < leftMostIndex
286 && rightMostIndex < Math.max(deepest.getIndex(), deepest.getHead().getIndex())) {
287 if (rootAttachment == CoveredRootAttachment.LEFT) {
288 if (deepest.getHead().getIndex() < deepest.getIndex()) {
289 coveredRootHead = deepest.getHead();
290 } else {
291 coveredRootHead = deepest;
292 }
293 } else if (rootAttachment == CoveredRootAttachment.RIGHT) {
294 if (deepest.getIndex() < deepest.getHead().getIndex()) {
295 coveredRootHead = deepest.getHead();
296 } else {
297 coveredRootHead = deepest;
298 }
299 } else {
300 coveredRootHead = deepest.getHead();
301 }
302 pdg.moveDependencyEdge(coveredRootHead.getIndex(), pdg.getDependencyNode(i).getIndex());
303 setCoveredRoot(pdg.getDependencyNode(i));
304 foundCoveredRoot = true;
305 }
306 }
307 return foundCoveredRoot;
308 }
309
310 private void setCoveredRoot(DependencyNode node) {
311 isCoveredRoot.set(node.getIndex(), true);
312 }
313
314 private DependencyNode getDeepestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException {
315 DependencyNode deepestNonProjectiveNode = null;
316 for (int index : pdg.getDependencyIndices()) {
317 if (!pdg.getDependencyNode(index).isProjective()
318 && (deepestNonProjectiveNode == null
319 || pdg.getDependencyNode(index).getDependencyNodeDepth() > pdg.getDependencyNode(deepestNonProjectiveNode.getIndex()).getDependencyNodeDepth())) {
320 deepestNonProjectiveNode = pdg.getDependencyNode(index);
321 }
322 }
323
324 return deepestNonProjectiveNode;
325 }
326
327 private DependencyNode getShortestNonProjectiveNode(DependencyStructure pdg) throws MaltChainedException {
328 DependencyNode shortestNonProjectiveNode = null;
329 for (int index : pdg.getDependencyIndices()) {
330 if (!pdg.getDependencyNode(index).isProjective()
331 && (shortestNonProjectiveNode == null
332 || nodeRelationLength.get(index) < nodeRelationLength.get(shortestNonProjectiveNode.getIndex())
333 || (nodeRelationLength.get(index) == nodeRelationLength.get(shortestNonProjectiveNode.getIndex())))) {
334 shortestNonProjectiveNode = pdg.getDependencyNode(index);
335 }
336 }
337 return shortestNonProjectiveNode;
338 }
339
340
341 private void computeRelationLength(DependencyStructure pdg) throws MaltChainedException {
342 nodeRelationLength.add(0);
343 for (int index : pdg.getTokenIndices()) {
344 nodeRelationLength.add(Math.abs(pdg.getDependencyNode(index).getIndex() - pdg.getDependencyNode(index).getHead().getIndex()));
345 }
346 }
347
348 private void assignPseudoProjectiveDeprels(DependencyStructure pdg) throws MaltChainedException {
349 int newLabelCode;
350 for (int index : pdg.getTokenIndices()) {
351 if (!isCoveredRoot(pdg.getDependencyNode(index))) {
352 if (this.markingStrategy == PseudoProjectiveEncoding.HEAD || this.markingStrategy == PseudoProjectiveEncoding.PATH
353 || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
354 if (this.markingStrategy == PseudoProjectiveEncoding.PATH) {
355 if (nodeLifted.get(index)) {
356 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#true#");
357 } else {
358 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#");
359 }
360 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode);
361 } else {
362 if (nodeLifted.get(index)) {
363 newLabelCode = ppliftedSymbolTable.addSymbol(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(
364 headDeprel.get(index).getIndex()).getHeadEdge().getLabelCode(deprelSymbolTable)));
365 } else {
366 newLabelCode = ppliftedSymbolTable.getSymbolStringToCode("#false#");
367 }
368 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppliftedSymbolTable, newLabelCode);
369 }
370 }
371
372 if (this.markingStrategy == PseudoProjectiveEncoding.PATH || this.markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
373 if (nodePath.get(index)) {
374 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#true#");
375 } else {
376 newLabelCode = pppathSymbolTable.getSymbolStringToCode("#false#");
377 }
378 pdg.getDependencyNode(index).getHeadEdge().addLabel(pppathSymbolTable, newLabelCode);
379 }
380
381 } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) {
382 pdg.getDependencyNode(index).getHeadEdge().addLabel(ppcoveredRootSymbolTable, ppcoveredRootSymbolTable.getSymbolStringToCode("#true#"));
383 }
384 }
385 }
386
387 private void setLabel(DependencyNode node, String label) throws MaltChainedException {
388 // node.getLabelCode().clear();
389 node.getHeadEdge().getLabelSet().put(deprelSymbolTable, deprelSymbolTable.addSymbol(label));
390 }
391
392 private void assignPseudoProjectiveDeprelsForMerge(DependencyStructure pdg) throws MaltChainedException {
393 Vector<String> originalDeprel = new Vector<String>();
394 String newLabel;
395 originalDeprel.add(null);
396 for (int index : pdg.getTokenIndices()) {
397 originalDeprel.add(deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)));
398 }
399 for (int index : pdg.getTokenIndices()) {
400 newLabel = null;
401 if (!isCoveredRoot(pdg.getDependencyNode(index))) {
402 if (markingStrategy == PseudoProjectiveEncoding.HEAD) {
403 if (nodeLifted.get(index)) {
404 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
405 + originalDeprel.get(headDeprel.get(index).getIndex());
406 // } else {
407 // newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable));
408 }
409 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) {
410 if (nodeLifted.get(index) && nodePath.get(index)) {
411 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|%";
412 } else if (nodeLifted.get(index) && !nodePath.get(index)) {
413 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|";
414 } else if (!nodeLifted.get(index) && nodePath.get(index)) {
415 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "%";
416 }
417 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
418 if (nodeLifted.get(index) && nodePath.get(index)) {
419 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
420 + originalDeprel.get(headDeprel.get(index).getIndex()) + "%";
421 } else if (nodeLifted.get(index) && !nodePath.get(index)) {
422 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|"
423 + originalDeprel.get(headDeprel.get(index).getIndex());
424 } else if (!nodeLifted.get(index) && nodePath.get(index)) {
425 newLabel = originalDeprel.get(pdg.getDependencyNode(index).getIndex()) + "%";
426 }
427 } else if (markingStrategy == PseudoProjectiveEncoding.TRACE) {
428 if (nodeLifted.get(index)) {
429 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|";
430 }
431 }
432 } else if (!(rootAttachment == CoveredRootAttachment.NONE || rootAttachment == CoveredRootAttachment.IGNORE)) {
433 newLabel = deprelSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(deprelSymbolTable)) + "|null";
434 }
435 if (newLabel != null) {
436 setLabel(pdg.getDependencyNode(index), newLabel);
437 }
438 }
439 }
440
441 public void deprojectivize(DependencyStructure pdg) throws MaltChainedException {
442 initDeprojeciviztion(pdg);
443
444 for (int index : pdg.getTokenIndices()) {
445 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
446 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(pppathSymbolTable)
447 && pppathSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(pppathSymbolTable)).equals("#true#")) {
448 setPath(pdg.getDependencyNode(index));
449 }
450 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppliftedSymbolTable)
451 && !ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#false#")) {
452 nodeLifted.set(index, true);
453 if (!ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppliftedSymbolTable)).equals("#true#")) {
454 synacticHeadDeprel.set(index, ppliftedSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge()
455 .getLabelCode(ppliftedSymbolTable)));
456 }
457 }
458 }
459 }
460 deattachCoveredRootsForDeprojectivization(pdg);
461 if (markingStrategy == PseudoProjectiveEncoding.HEAD && needsDeprojectivizeWithHead(pdg)) {
462 deprojectivizeWithHead(pdg, pdg.getDependencyRoot());
463 } else if (markingStrategy == PseudoProjectiveEncoding.PATH) {
464 deprojectivizeWithPath(pdg, pdg.getDependencyRoot());
465 } else if (markingStrategy == PseudoProjectiveEncoding.HEADPATH) {
466 deprojectivizeWithHeadAndPath(pdg, pdg.getDependencyRoot());
467 }
468 }
469
470 private void initDeprojeciviztion(DependencyStructure pdg) {
471 nodeLifted.clear();
472 nodePath.clear();
473 synacticHeadDeprel.clear();
474 for (int index : pdg.getDependencyIndices()) {
475 nodeLifted.add(false);
476 nodePath.add(false);
477 synacticHeadDeprel.add(null);
478 }
479 }
480
481 private void deattachCoveredRootsForDeprojectivization(DependencyStructure pdg) throws MaltChainedException {
482 for (int index : pdg.getTokenIndices()) {
483 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(deprelSymbolTable)) {
484 if (pdg.getDependencyNode(index).getHeadEdge().hasLabel(ppcoveredRootSymbolTable)
485 && ppcoveredRootSymbolTable.getSymbolCodeToString(pdg.getDependencyNode(index).getHeadEdge().getLabelCode(ppcoveredRootSymbolTable)).equals(
486 "#true#")) {
487 pdg.moveDependencyEdge(pdg.getDependencyRoot().getIndex(), pdg.getDependencyNode(index).getIndex());
488 }
489 }
490 }
491 }
492
493 // Check whether there is at least one node in the specified dependency structure that can be lifted.
494 // If this is not the case, there is no need to call deprojectivizeWithHead.
495
496 private boolean needsDeprojectivizeWithHead(DependencyStructure pdg) throws MaltChainedException {
497 for (int index : pdg.getDependencyIndices()) {
498 if (nodeLifted.get(index)) {
499 DependencyNode node = pdg.getDependencyNode(index);
500 if (breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, synacticHeadDeprel.get(index)) != null) {
501 return true;
502 }
503 }
504 }
505 return false;
506 }
507
508 private boolean deprojectivizeWithHead(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
509 boolean success = true, childSuccess = false;
510 int i, childAttempts = 2;
511 DependencyNode child, possibleSyntacticHead;
512 String syntacticHeadDeprel;
513 if (nodeLifted.get(node.getIndex())) {
514 syntacticHeadDeprel = synacticHeadDeprel.get(node.getIndex());
515 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHead(pdg, node.getHead(), node, syntacticHeadDeprel);
516 if (possibleSyntacticHead != null) {
517 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
518 nodeLifted.set(node.getIndex(), false);
519 } else {
520 success = false;
521 }
522 }
523 while (!childSuccess && childAttempts > 0) {
524 childSuccess = true;
525 Vector<DependencyNode> children = new Vector<DependencyNode>();
526 i = 0;
527 while ((child = node.getLeftDependent(i)) != null) {
528 children.add(child);
529 i++;
530 }
531 i = 0;
532 while ((child = node.getRightDependent(i)) != null) {
533 children.add(child);
534 i++;
535 }
536 for (i = 0; i < children.size(); i++) {
537 child = children.get(i);
538 if (!deprojectivizeWithHead(pdg, child)) {
539 childSuccess = false;
540 }
541 }
542 childAttempts--;
543 }
544 return childSuccess && success;
545 }
546
547 private DependencyNode breadthFirstSearchSortedByDistanceForHead(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprel)
548 throws MaltChainedException {
549 DependencyNode dependent;
550 String dependentDeprel;
551 Vector<DependencyNode> nodes = new Vector<DependencyNode>();
552 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, false));
553 while (nodes.size() > 0) {
554 dependent = nodes.remove(0);
555 if (dependent.getHeadEdge().hasLabel(deprelSymbolTable)) {
556 dependentDeprel = deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable));
557 if (dependentDeprel.equals(syntacticHeadDeprel)) {
558 return dependent;
559 }
560 }
561 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, false));
562 }
563 return null;
564 }
565
566 private Vector<DependencyNode> findAllDependentsVectorSortedByDistanceToPProjNode(DependencyStructure dg, DependencyNode governor, DependencyNode avoid,
567 boolean percentOnly) {
568 int i, j;
569 Vector<DependencyNode> dependents = new Vector<DependencyNode>();
570 DependencyNode leftChild, rightChild;
571
572 i = governor.getLeftDependentCount() - 1;
573 j = 0;
574 leftChild = governor.getLeftDependent(i--);
575 rightChild = governor.getRightDependent(j++);
576
577 while (leftChild != null && rightChild != null) {
578 if (leftChild == avoid) {
579 leftChild = governor.getLeftDependent(i--);
580 } else if (rightChild == avoid) {
581 rightChild = governor.getRightDependent(j++);
582 } else if (Math.abs(leftChild.getIndex() - avoid.getIndex()) < Math.abs(rightChild.getIndex() - avoid.getIndex())) {
583 if (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex()))) {
584 dependents.add(leftChild);
585 }
586 leftChild = governor.getLeftDependent(i--);
587 } else {
588 if (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex()))) {
589 dependents.add(rightChild);
590 }
591 rightChild = governor.getRightDependent(j++);
592 }
593 }
594 while (leftChild != null) {
595 if (leftChild != avoid && (!percentOnly || (percentOnly && nodePath.get(leftChild.getIndex())))) {
596 dependents.add(leftChild);
597 }
598 leftChild = governor.getLeftDependent(i--);
599 }
600 while (rightChild != null) {
601 if (rightChild != avoid && (!percentOnly || (percentOnly && nodePath.get(rightChild.getIndex())))) {
602 dependents.add(rightChild);
603 }
604 rightChild = governor.getRightDependent(j++);
605 }
606 return dependents;
607 }
608
609 private boolean deprojectivizeWithPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
610 boolean success = true, childSuccess = false;
611 int i, childAttempts = 2;
612 DependencyNode child, possibleSyntacticHead;
613 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) {
614 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node);
615 if (possibleSyntacticHead != null) {
616 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
617 nodeLifted.set(node.getIndex(), false);
618 } else {
619 success = false;
620 }
621 }
622 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) {
623 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForPath(pdg, node.getHead(), node);
624 if (possibleSyntacticHead != null) {
625 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
626 nodeLifted.set(node.getIndex(), false);
627 } else {
628 success = false;
629 }
630 }
631 while (!childSuccess && childAttempts > 0) {
632 childSuccess = true;
633 Vector<DependencyNode> children = new Vector<DependencyNode>();
634 i = 0;
635 while ((child = node.getLeftDependent(i)) != null) {
636 children.add(child);
637 i++;
638 }
639 i = 0;
640 while ((child = node.getRightDependent(i)) != null) {
641 children.add(child);
642 i++;
643 }
644 for (i = 0; i < children.size(); i++) {
645 child = children.get(i);
646 if (!deprojectivizeWithPath(pdg, child)) {
647 childSuccess = false;
648 }
649 }
650 childAttempts--;
651 }
652 return childSuccess && success;
653 }
654
655 private DependencyNode breadthFirstSearchSortedByDistanceForPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid) {
656 DependencyNode dependent;
657 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes;
658 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true));
659 while (nodes.size() > 0) {
660 dependent = nodes.remove(0);
661 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0) {
662 return dependent;
663 }
664 nodes.addAll(newNodes);
665 }
666 return null;
667 }
668
669 private boolean deprojectivizeWithHeadAndPath(DependencyStructure pdg, DependencyNode node) throws MaltChainedException {
670 boolean success = true, childSuccess = false;
671 int i, childAttempts = 2;
672 DependencyNode child, possibleSyntacticHead;
673 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex()) && nodePath.get(node.getIndex())) {
674 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node
675 .getIndex()));
676 if (possibleSyntacticHead != null) {
677 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
678 nodeLifted.set(node.getIndex(), false);
679 } else {
680 success = false;
681 }
682 }
683 if (node.hasHead() && node.getHeadEdge().isLabeled() && nodeLifted.get(node.getIndex())) {
684 possibleSyntacticHead = breadthFirstSearchSortedByDistanceForHeadAndPath(pdg, node.getHead(), node, synacticHeadDeprel.get(node
685 .getIndex()));
686 if (possibleSyntacticHead != null) {
687 pdg.moveDependencyEdge(possibleSyntacticHead.getIndex(), node.getIndex());
688 nodeLifted.set(node.getIndex(), false);
689 } else {
690 success = false;
691 }
692 }
693 while (!childSuccess && childAttempts > 0) {
694 childSuccess = true;
695 Vector<DependencyNode> children = new Vector<DependencyNode>();
696 i = 0;
697 while ((child = node.getLeftDependent(i)) != null) {
698 children.add(child);
699 i++;
700 }
701 i = 0;
702 while ((child = node.getRightDependent(i)) != null) {
703 children.add(child);
704 i++;
705 }
706 for (i = 0; i < children.size(); i++) {
707 child = children.get(i);
708 if (!deprojectivizeWithHeadAndPath(pdg, child)) {
709 childSuccess = false;
710 }
711 }
712 childAttempts--;
713 }
714 return childSuccess && success;
715 }
716
717 private DependencyNode breadthFirstSearchSortedByDistanceForHeadAndPath(DependencyStructure dg, DependencyNode start, DependencyNode avoid, String syntacticHeadDeprelCode)
718 throws MaltChainedException {
719 DependencyNode dependent;
720 Vector<DependencyNode> nodes = new Vector<DependencyNode>(), newNodes = null, secondChance = new Vector<DependencyNode>();
721 nodes.addAll(findAllDependentsVectorSortedByDistanceToPProjNode(dg, start, avoid, true));
722 while (nodes.size() > 0) {
723 dependent = nodes.remove(0);
724 if (((newNodes = findAllDependentsVectorSortedByDistanceToPProjNode(dg, dependent, avoid, true)).size()) == 0
725 && deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)) {
726 return dependent;
727 }
728 nodes.addAll(newNodes);
729 if (deprelSymbolTable.getSymbolCodeToString(dependent.getHeadEdge().getLabelCode(deprelSymbolTable)).equals(syntacticHeadDeprelCode)
730 && newNodes.size() != 0) {
731 secondChance.add(dependent);
732 }
733 }
734 if (secondChance.size() > 0) {
735 return secondChance.firstElement();
736 }
737 return null;
738 }
739 }