001package org.maltparser.core.syntaxgraph.ds2ps; 002 003 004import java.util.SortedMap; 005 006import org.maltparser.core.exception.MaltChainedException; 007import org.maltparser.core.helper.SystemLogger; 008import org.maltparser.core.io.dataformat.ColumnDescription; 009import org.maltparser.core.io.dataformat.DataFormatInstance; 010import org.maltparser.core.symbol.SymbolTable; 011import org.maltparser.core.symbol.SymbolTableHandler; 012import org.maltparser.core.syntaxgraph.MappablePhraseStructureGraph; 013import org.maltparser.core.syntaxgraph.edge.Edge; 014import org.maltparser.core.syntaxgraph.headrules.HeadRules; 015import org.maltparser.core.syntaxgraph.node.DependencyNode; 016import org.maltparser.core.syntaxgraph.node.NonTerminalNode; 017import org.maltparser.core.syntaxgraph.node.PhraseStructureNode; 018/** 019* 020* 021* @author Johan Hall 022*/ 023public class LosslessMapping implements Dependency2PhraseStructure { 024 private String DEPREL = "DEPREL"; 025 private String PHRASE = "PHRASE"; 026 private String HEADREL = "HEADREL"; 027 private String ATTACH = "ATTACH"; 028 private String CAT = "CAT"; 029 private String EDGELABEL; 030 private final char EMPTY_SPINE = '*'; 031 private final String EMPTY_LABEL = "??"; 032 private final char SPINE_ELEMENT_SEPARATOR = '|'; 033 private final char LABEL_ELEMENT_SEPARATOR = '~'; 034 private final char QUESTIONMARK = '?'; 035 private String optionString; 036 private HeadRules headRules; 037 private DataFormatInstance dependencyDataFormatInstance; 038 private DataFormatInstance phraseStructuretDataFormatInstance; 039 private SymbolTableHandler symbolTableHandler; 040 private boolean lockUpdate = false; 041 private int nonTerminalCounter; 042 private StringBuilder deprel; 043 private StringBuilder headrel; 044 private StringBuilder phrase; 045 046 public LosslessMapping(DataFormatInstance dependencyDataFormatInstance, DataFormatInstance phraseStructuretDataFormatInstance, SymbolTableHandler symbolTableHandler) { 047 this.symbolTableHandler = symbolTableHandler; 048 setDependencyDataFormatInstance(dependencyDataFormatInstance); 049 setPhraseStructuretDataFormatInstance(phraseStructuretDataFormatInstance); 050 deprel = new StringBuilder(); 051 headrel = new StringBuilder(); 052 phrase = new StringBuilder(); 053 054 if (phraseStructuretDataFormatInstance.getPhraseStructureEdgeLabelColumnDescriptionSet().size() == 1) { 055 for (ColumnDescription column : phraseStructuretDataFormatInstance.getPhraseStructureEdgeLabelColumnDescriptionSet()) { 056 EDGELABEL = column.getName(); 057 } 058 } 059 060 clear(); 061 } 062 063 public void clear() { 064 nonTerminalCounter = 0; 065 } 066 067 public String getOptionString() { 068 return optionString; 069 } 070 071 public void setOptionString(String optionString) { 072 this.optionString = optionString; 073 } 074 075 public DataFormatInstance getDependencyDataFormatInstance() { 076 return dependencyDataFormatInstance; 077 } 078 079 public void setDependencyDataFormatInstance( 080 DataFormatInstance dependencyDataFormatInstance) { 081 this.dependencyDataFormatInstance = dependencyDataFormatInstance; 082 } 083 084 public DataFormatInstance getPhraseStructuretDataFormatInstance() { 085 return phraseStructuretDataFormatInstance; 086 } 087 088 public void setPhraseStructuretDataFormatInstance( 089 DataFormatInstance phraseStructuretDataFormatInstance) { 090 this.phraseStructuretDataFormatInstance = phraseStructuretDataFormatInstance; 091 } 092 093 public void update(MappablePhraseStructureGraph graph, Edge e, Object arg) throws MaltChainedException { 094 if (lockUpdate == false) { 095// if (e.getType() == Edge.PHRASE_STRUCTURE_EDGE && e.getSource() instanceof NonTerminalNode && lockUpdate == false) { 096// if(e.getTarget() instanceof TerminalNode) { 097// PhraseStructureNode top = (PhraseStructureNode)e.getTarget(); 098// while (top.getParent() != null && ((NonTerminalNode)top.getParent()).getLexicalHead() == (PhraseStructureNode)e.getTarget()) { 099// top = top.getParent(); 100// } 101// updateDependenyGraph(graph, top); 102// } 103// else if (e.getSource().isRoot()) { 104// updateDependenyGraph(graph, graph.getPhraseStructureRoot()); 105// } 106// } 107 if (e.getType() == Edge.DEPENDENCY_EDGE && e.getSource() instanceof DependencyNode && e.getTarget() instanceof DependencyNode) { 108 if (e.isLabeled() && e.getLabelSet().size() == 4) { 109 updatePhraseStructureGraph(graph, (Edge)e, false); 110 } 111 } 112 } 113 } 114 115 public void updateDependenyGraph(MappablePhraseStructureGraph graph, PhraseStructureNode top) throws MaltChainedException { 116 if (graph.nTokenNode() == 1 && graph.nNonTerminals() == 0) { 117 // Special case when the root dominates direct a single terminal node 118 Edge e = graph.addDependencyEdge(graph.getDependencyRoot(), graph.getDependencyNode(1)); 119 e.addLabel(graph.getSymbolTables().getSymbolTable(DEPREL), graph.getDefaultRootEdgeLabelSymbol(graph.getSymbolTables().getSymbolTable(DEPREL))); 120 e.addLabel(graph.getSymbolTables().getSymbolTable(HEADREL), graph.getDefaultRootEdgeLabelSymbol(graph.getSymbolTables().getSymbolTable(HEADREL))); 121 e.addLabel(graph.getSymbolTables().getSymbolTable(PHRASE), "*"); 122// e.addLabel(graph.getSymbolTables().getSymbolTable(PHRASE), graph.getDefaultRootEdgeLabelSymbol(graph.getSymbolTables().getSymbolTable(PHRASE))); 123 e.addLabel(graph.getSymbolTables().getSymbolTable(ATTACH), graph.getDefaultRootEdgeLabelSymbol(graph.getSymbolTables().getSymbolTable(ATTACH))); 124 } else { 125 updateDependencyEdges(graph, top); 126 updateDependenyLabels(graph); 127 } 128 } 129 130 131 132 private void updateDependencyEdges(MappablePhraseStructureGraph graph, PhraseStructureNode top) throws MaltChainedException { 133 if (top == null) { 134 return; 135 } 136 DependencyNode head = null; 137 DependencyNode dependent = null; 138 if (top instanceof NonTerminalNode) { 139 for (PhraseStructureNode node : ((NonTerminalNode)top).getChildren()) { 140 if (node instanceof NonTerminalNode) { 141 updateDependencyEdges(graph,node); 142 } else { 143 head = ((NonTerminalNode)top).getLexicalHead(headRules); 144 dependent = (DependencyNode)node; 145 if (head != null && dependent != null && head != dependent) { 146 lockUpdate = true; 147 if (!dependent.hasHead()) { 148 graph.addDependencyEdge(head, dependent); 149 } 150 else if (head != dependent.getHead()) { 151 graph.moveDependencyEdge(head, dependent); 152 } 153 lockUpdate = false; 154 } 155 } 156 } 157 } 158 159 head = null; 160 if (top.getParent() != null) { 161 head = ((NonTerminalNode)top.getParent()).getLexicalHead(headRules); 162 } else if (top.isRoot()) { 163 head = (DependencyNode)top; 164 } 165 166 if (top instanceof NonTerminalNode) { 167 dependent = ((NonTerminalNode)top).getLexicalHead(headRules); 168 } else if (!top.isRoot()) { 169 dependent = (DependencyNode)top; 170 } 171 if (head != null && dependent != null && head != dependent) { 172 lockUpdate = true; 173 if (!dependent.hasHead()) { 174 graph.addDependencyEdge(head, dependent); 175 } 176 else if (head != dependent.getHead()) { 177 graph.moveDependencyEdge(head, dependent); 178 } 179 lockUpdate = false; 180 } 181 } 182 183 private void updateDependenyLabels(MappablePhraseStructureGraph graph) throws MaltChainedException { 184 for (int index :graph.getTokenIndices()) { 185 PhraseStructureNode top = (PhraseStructureNode)graph.getTokenNode(index); 186 187 while (top != null && top.getParent() != null &&graph.getTokenNode(index) == ((NonTerminalNode)top.getParent()).getLexicalHead(headRules)) { 188 top = top.getParent(); 189 } 190 lockUpdate = true; 191 labelDependencyEdge(graph, graph.getTokenNode(index).getHeadEdge(), top); 192 lockUpdate = false; 193 } 194 } 195 196 197// private void updateDependenyLabels(MappablePhraseStructureGraph graph, PhraseStructureNode top) throws MaltChainedException { 198// if (top == null) { 199// return; 200// } 201// DependencyNode head = null; 202// DependencyNode dependent = null; 203// if (top instanceof NonTerminalNode) { 204// for (PhraseStructureNode node : ((NonTerminalNode)top).getChildren()) { 205// if (node instanceof NonTerminalNode) { 206// updateDependenyLabels(graph, node); 207// } else { 208// head = ((NonTerminalNode)top).getLexicalHead(headRules); 209// dependent = (DependencyNode)node; 210// if (head != null && dependent != null && head != dependent) { 211// lockUpdate = true; 212// if (dependent.hasHead()) { 213// Edge e = dependent.getHeadEdge(); 214// labelDependencyEdge(graph, e, node); 215// } 216// lockUpdate = false; 217// } 218// } 219// } 220// } 221// 222// dependent = null; 223// if (top instanceof NonTerminalNode) { 224// dependent = ((NonTerminalNode)top).getLexicalHead(headRules); 225// } 226// 227// if (dependent != null) { 228// lockUpdate = true; 229// if (dependent.hasHead()) { 230// Edge e = dependent.getHeadEdge(); 231// labelDependencyEdge(graph, e, top); 232// } 233// lockUpdate = false; 234// } 235// } 236 237 private void labelDependencyEdge(MappablePhraseStructureGraph graph, Edge e, PhraseStructureNode top) throws MaltChainedException { 238 if (e == null) { 239 return; 240 } 241 SymbolTableHandler symbolTables = graph.getSymbolTables(); 242 deprel.setLength(0); 243 phrase.setLength(0); 244 headrel.setLength(0); 245 246 e.removeLabel(symbolTables.getSymbolTable(DEPREL)); 247 e.removeLabel(symbolTables.getSymbolTable(HEADREL)); 248 e.removeLabel(symbolTables.getSymbolTable(PHRASE)); 249 e.removeLabel(symbolTables.getSymbolTable(ATTACH)); 250 251 int i = 0; 252 SortedMap<String, SymbolTable> edgeLabelSymbolTables = phraseStructuretDataFormatInstance.getPhraseStructureEdgeLabelSymbolTables(symbolTableHandler); 253 SortedMap<String, SymbolTable> nodeLabelSymbolTables = phraseStructuretDataFormatInstance.getPhraseStructureNodeLabelSymbolTables(symbolTableHandler); 254 if (!top.isRoot()) { 255 for (String name : edgeLabelSymbolTables.keySet()) { 256 if (top.hasParentEdgeLabel(symbolTables.getSymbolTable(name))) { 257 deprel.append(top.getParentEdgeLabelSymbol(symbolTables.getSymbolTable(name))); 258 } else { 259 deprel.append(EMPTY_LABEL); 260 } 261 i++; 262 if (i < edgeLabelSymbolTables.size()) { 263 deprel.append(LABEL_ELEMENT_SEPARATOR); 264 } 265 } 266 if (deprel.length() != 0) { 267 e.addLabel(symbolTables.getSymbolTable(DEPREL), deprel.toString()); 268 } 269 } else { 270 String deprelDefaultRootLabel = graph.getDefaultRootEdgeLabelSymbol(symbolTables.getSymbolTable(DEPREL)); 271 if (deprelDefaultRootLabel != null) { 272 e.addLabel(symbolTables.getSymbolTable(DEPREL), deprelDefaultRootLabel); 273 } else { 274 e.addLabel(symbolTables.getSymbolTable(DEPREL), EMPTY_LABEL); 275 } 276 } 277 PhraseStructureNode tmp = (PhraseStructureNode)e.getTarget(); 278 while (tmp != top && tmp.getParent() != null) { // && !tmp.getParent().isRoot()) { 279 i=0; 280 for (String name : edgeLabelSymbolTables.keySet()) { 281 if (tmp.hasParentEdgeLabel(symbolTables.getSymbolTable(name))) { 282 headrel.append(tmp.getParentEdgeLabelSymbol(symbolTables.getSymbolTable(name))); 283 } else { 284 headrel.append(EMPTY_LABEL); 285 } 286 i++; 287 if (i < edgeLabelSymbolTables.size()) { 288 headrel.append(LABEL_ELEMENT_SEPARATOR); 289 } 290 } 291 i=0; 292 headrel.append(SPINE_ELEMENT_SEPARATOR); 293 for (String name : nodeLabelSymbolTables.keySet()) { 294 if (tmp.getParent().hasLabel(symbolTables.getSymbolTable(name))) { 295 phrase.append(tmp.getParent().getLabelSymbol(symbolTables.getSymbolTable(name))); 296 } else { 297 if (tmp.getParent().isRoot()) { 298 String deprelDefaultRootLabel = graph.getDefaultRootEdgeLabelSymbol(symbolTables.getSymbolTable(PHRASE)); 299 if (deprelDefaultRootLabel != null) { 300 phrase.append(deprelDefaultRootLabel); 301 } else { 302 phrase.append(EMPTY_LABEL); 303 } 304 } else { 305 phrase.append(EMPTY_LABEL); 306 } 307 } 308 i++; 309 if (i < nodeLabelSymbolTables.size()) { 310 phrase.append(LABEL_ELEMENT_SEPARATOR); 311 } 312 } 313 phrase.append(SPINE_ELEMENT_SEPARATOR); 314 tmp = tmp.getParent(); 315 } 316 if (phrase.length() == 0) { 317 headrel.append(EMPTY_SPINE); 318 phrase.append(EMPTY_SPINE); 319 } else { 320 headrel.setLength(headrel.length()-1); 321 phrase.setLength(phrase.length()-1); 322 } 323 e.addLabel(symbolTables.getSymbolTable(HEADREL), headrel.toString()); 324 e.addLabel(symbolTables.getSymbolTable(PHRASE), phrase.toString()); 325 int a = 0; 326 tmp = (PhraseStructureNode)e.getSource(); 327 while (top.getParent() != null && tmp.getParent() != null && tmp.getParent() != top.getParent()) { 328 a++; 329 tmp = tmp.getParent(); 330 } 331 e.addLabel(symbolTables.getSymbolTable(ATTACH), Integer.toString(a)); 332 } 333 334 public void connectUnattachedSpines(MappablePhraseStructureGraph graph) throws MaltChainedException { 335 connectUnattachedSpines(graph, graph.getDependencyRoot()); 336 337 if (!graph.getPhraseStructureRoot().isLabeled()) { 338 graph.getPhraseStructureRoot().addLabel(graph.getSymbolTables().addSymbolTable(CAT), graph.getDefaultRootEdgeLabelSymbol(graph.getSymbolTables().getSymbolTable(PHRASE))); 339 340 } 341 } 342 343 private void connectUnattachedSpines(MappablePhraseStructureGraph graph, DependencyNode depNode) throws MaltChainedException { 344 if (!depNode.isRoot()) { 345 PhraseStructureNode dependentSpine = (PhraseStructureNode)depNode; 346 while (dependentSpine.getParent() != null) { 347 dependentSpine = dependentSpine.getParent(); 348 } 349 if (!dependentSpine.isRoot()) { 350 updatePhraseStructureGraph(graph,depNode.getHeadEdge(),true); 351 } 352 } 353 for (int i = 0; i < depNode.getLeftDependentCount(); i++) { 354 connectUnattachedSpines(graph, depNode.getLeftDependent(i)); 355 } 356 for (int i = depNode.getRightDependentCount()-1; i >= 0 ; i--) { 357 connectUnattachedSpines(graph, depNode.getRightDependent(i)); 358 } 359 } 360 361 public void updatePhraseStructureGraph(MappablePhraseStructureGraph graph, Edge depEdge, boolean attachHeadSpineToRoot) throws MaltChainedException { 362 PhraseStructureNode dependentSpine = (PhraseStructureNode)depEdge.getTarget(); 363 364 if (((PhraseStructureNode)depEdge.getTarget()).getParent() == null) { 365 // Restore dependent spine 366 String phraseSpineLabel = null; 367 String edgeSpineLabel = null; 368 int empty_label = 0; 369 370 if (depEdge.hasLabel(graph.getSymbolTables().getSymbolTable(PHRASE))) { 371 phraseSpineLabel = depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(PHRASE)); 372 } 373 if (depEdge.hasLabel(graph.getSymbolTables().getSymbolTable(HEADREL))) { 374 edgeSpineLabel = depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(HEADREL)); 375 } 376 if (phraseSpineLabel != null && phraseSpineLabel.length() > 0 && phraseSpineLabel.charAt(0) != EMPTY_SPINE) { 377 int ps = 0, es = 0, i = 0, j = 0, n = phraseSpineLabel.length()-1, m = edgeSpineLabel.length()-1; 378 PhraseStructureNode child = (PhraseStructureNode)depEdge.getTarget(); 379 while (true) { 380 while (i <= n && phraseSpineLabel.charAt(i) != SPINE_ELEMENT_SEPARATOR) { 381 if (phraseSpineLabel.charAt(i) == QUESTIONMARK) { 382 empty_label++; 383 } else { 384 empty_label = 0; 385 } 386 i++; 387 } 388 if (depEdge.getSource().isRoot() && i >= n) { 389 dependentSpine = graph.getPhraseStructureRoot(); 390 } else { 391 dependentSpine = graph.addNonTerminalNode(++nonTerminalCounter); 392 } 393 394 if (empty_label != 2 && ps != i) { 395 dependentSpine.addLabel(graph.getSymbolTables().addSymbolTable(CAT), phraseSpineLabel.substring(ps,i)); 396 } 397 398 empty_label = 0; 399 if (edgeSpineLabel != null) { 400 while (j <= m && edgeSpineLabel.charAt(j) != SPINE_ELEMENT_SEPARATOR) { 401 if (edgeSpineLabel.charAt(j) == QUESTIONMARK) { 402 empty_label++; 403 } else { 404 empty_label = 0; 405 } 406 j++; 407 } 408 } 409 lockUpdate = true; 410 Edge e = graph.addPhraseStructureEdge(dependentSpine, child); 411 if (empty_label != 2 && es != j && edgeSpineLabel != null && e != null) { 412 e.addLabel(graph.getSymbolTables().addSymbolTable(EDGELABEL), edgeSpineLabel.substring(es,j)); 413 } else if (es == j) { 414 e.addLabel(graph.getSymbolTables().addSymbolTable(EDGELABEL), EMPTY_LABEL); 415 } 416 417 lockUpdate = false; 418 child = dependentSpine; 419 if (i >= n) { break; } 420 empty_label = 0; 421 ps = i = i + 1; 422 es = j = j + 1; 423 } 424 } 425 426 // Recursively attach the dependent spines to target node. 427 DependencyNode target = (DependencyNode)depEdge.getTarget(); 428 for (int i = 0; i < target.getLeftDependentCount(); i++) { 429 updatePhraseStructureGraph(graph, target.getLeftDependent(i).getHeadEdge(), attachHeadSpineToRoot); 430 } 431 for (int i = target.getRightDependentCount()-1; i >= 0 ; i--) { 432 updatePhraseStructureGraph(graph, target.getRightDependent(i).getHeadEdge(), attachHeadSpineToRoot); 433 } 434 } else { 435 // If dependent spine already exist, then set dependentSpine to the highest nonterminal 436 // of the dependent spine. 437 while (dependentSpine.getParent() != null && !dependentSpine.getParent().isRoot()) { 438 dependentSpine = dependentSpine.getParent(); 439 } 440 } 441 442 443 PhraseStructureNode headSpine = null; 444 if (((PhraseStructureNode)depEdge.getSource()).getParent() != null) { 445 // If head spine exist, then attach dependent spine to the head spine at the attachment level a. 446 int a = 0; 447 headSpine = ((PhraseStructureNode)depEdge.getSource()).getParent(); 448 if (depEdge.hasLabel(graph.getSymbolTables().getSymbolTable(ATTACH))) { 449 try { 450 a = Integer.parseInt((depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(ATTACH)))); 451 } catch (NumberFormatException e) { 452 throw new MaltChainedException(e.getMessage()); 453 } 454 } 455 for (int i = 0; i < a && headSpine != null; i++) { 456 headSpine = headSpine.getParent(); 457 } 458 459 if ((headSpine == null || headSpine == dependentSpine) && attachHeadSpineToRoot) { 460 headSpine = graph.getPhraseStructureRoot(); 461 } 462 if (headSpine != null) { 463 lockUpdate = true; 464 Edge e = graph.addPhraseStructureEdge(headSpine, dependentSpine); 465 if (depEdge.hasLabel(graph.getSymbolTables().getSymbolTable(DEPREL)) && !depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(DEPREL)).equals(EMPTY_LABEL) & e != null) { 466 e.addLabel(graph.getSymbolTables().addSymbolTable(EDGELABEL), depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(DEPREL))); 467 } 468 lockUpdate = false; 469 } 470 } 471 else if (depEdge.getSource().isRoot() && !depEdge.isLabeled()) { 472 headSpine = graph.getPhraseStructureRoot(); 473 lockUpdate = true; 474 Edge e = graph.addPhraseStructureEdge(headSpine, dependentSpine); 475 if (depEdge.hasLabel(graph.getSymbolTables().getSymbolTable(DEPREL)) && !depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(DEPREL)).equals(EMPTY_LABEL) & e != null) { 476 e.addLabel(graph.getSymbolTables().addSymbolTable(EDGELABEL), depEdge.getLabelSymbol(graph.getSymbolTables().getSymbolTable(DEPREL))); 477 } else { 478 e.addLabel(graph.getSymbolTables().addSymbolTable(EDGELABEL), graph.getDefaultRootEdgeLabelSymbol(graph.getSymbolTables().getSymbolTable(DEPREL))); 479 } 480 lockUpdate = false; 481 // Recursively attach the dependent spines to target node. 482 DependencyNode target = (DependencyNode)depEdge.getTarget(); 483 for (int i = 0; i < target.getLeftDependentCount(); i++) { 484 updatePhraseStructureGraph(graph, target.getLeftDependent(i).getHeadEdge(), attachHeadSpineToRoot); 485 } 486 for (int i = target.getRightDependentCount()-1; i >= 0 ; i--) { 487 updatePhraseStructureGraph(graph, target.getRightDependent(i).getHeadEdge(), attachHeadSpineToRoot); 488 } 489 } 490 } 491 492 public HeadRules getHeadRules() { 493 return headRules; 494 } 495 496 public void setHeadRules(HeadRules headRules) { 497 this.headRules = headRules; 498 } 499 500 public void setHeadRules(String headRulesURL) throws MaltChainedException { 501 if (headRulesURL != null && headRulesURL.length() > 0 && !headRulesURL.equals("*")) { 502 headRules = new HeadRules(SystemLogger.logger(), phraseStructuretDataFormatInstance, symbolTableHandler); 503 headRules.parseHeadRules(headRulesURL); 504 } 505 } 506}