001 package org.maltparser.parser.algorithm.covington; 002 003 004 import java.util.ArrayList; 005 import java.util.HashMap; 006 import java.util.HashSet; 007 008 import org.maltparser.core.exception.MaltChainedException; 009 import org.maltparser.core.symbol.SymbolTable; 010 import org.maltparser.core.symbol.TableHandler; 011 import org.maltparser.core.syntaxgraph.DependencyStructure; 012 import org.maltparser.core.syntaxgraph.LabelSet; 013 import org.maltparser.core.syntaxgraph.edge.Edge; 014 import org.maltparser.core.syntaxgraph.node.DependencyNode; 015 import org.maltparser.parser.SingleMalt; 016 import org.maltparser.parser.algorithm.ParsingAlgorithm; 017 import org.maltparser.parser.algorithm.ParsingException; 018 import org.maltparser.parser.algorithm.helper.TransitionTable; 019 import org.maltparser.parser.algorithm.helper.TransitionTableHandler; 020 import org.maltparser.parser.history.History; 021 import org.maltparser.parser.history.GuideUserHistory; 022 import org.maltparser.parser.history.action.GuideUserAction; 023 import org.maltparser.parser.history.container.ActionContainer; 024 025 026 /** 027 * 028 * @author Joakim Nivre 029 * @author Johan Hall 030 * @since 1.0 031 */ 032 public abstract class Covington implements ParsingAlgorithm { 033 // Behavior 034 public final static int MALT_0_4 = 1; 035 public final static int MALT_1_0 = 2; 036 037 // Transitions 038 protected static final int SHIFT = 1; 039 protected static final int NOARC = 2; 040 protected static final int RIGHTARC = 3; 041 protected static final int LEFTARC = 4; 042 043 044 protected GuideUserHistory history; 045 protected ArrayList<ActionContainer> actionContainers; 046 protected ActionContainer transActionContainer; 047 protected ActionContainer pushActionContainer; 048 protected TransitionTable pushTable; 049 protected HashSet<ActionContainer> arcLabelActionContainers; 050 protected SingleMalt configuration; 051 protected GuideUserAction currentAction; 052 protected HashMap<String, TableHandler> tableHandlers; 053 protected TransitionTableHandler transitionTableHandler; 054 protected boolean allowShift; 055 protected boolean complexTransition = false; 056 protected int behavior; 057 protected ArrayList<DependencyNode> input; 058 protected int right; 059 protected int left; 060 protected int leftstop; 061 protected int rightstop; 062 063 public Covington(SingleMalt configuration) throws MaltChainedException { 064 this.configuration = configuration; 065 initAllowRoot(); 066 initAllowShift(); 067 initBehavior(); 068 input = new ArrayList<DependencyNode>(); 069 initHistory(); 070 } 071 072 073 public DependencyStructure parse(DependencyStructure parseDependencyGraph) throws MaltChainedException { 074 clear(parseDependencyGraph); 075 right = 1; 076 while (right <= rightstop) { 077 left = right -1; 078 while (left >= leftstop) { 079 currentAction = history.getEmptyGuideUserAction(); 080 configuration.predict(currentAction); 081 transition(parseDependencyGraph); 082 } 083 right++; 084 } 085 086 parseDependencyGraph.linkAllTreesToRoot(); 087 return parseDependencyGraph; 088 } 089 090 public DependencyStructure oracleParse(DependencyStructure goldDependencyGraph, DependencyStructure parseDependencyGraph) throws MaltChainedException { 091 // if (!(goldDependencyGraph instanceof SingleHeadedDependencyGraph)) { 092 // throw new ParsingException("The gold standard graph must be a single headed graph. "); 093 // } 094 095 clear(parseDependencyGraph); 096 right = 1; 097 while (right <= rightstop) { 098 left = right - 1; 099 while (left >= leftstop) { 100 currentAction = history.getEmptyGuideUserAction(); 101 oraclePredict(goldDependencyGraph); 102 configuration.setInstance(currentAction); 103 transition(parseDependencyGraph); 104 } 105 right++; 106 } 107 parseDependencyGraph.linkAllTreesToRoot(); 108 return parseDependencyGraph; 109 } 110 111 protected void transition(DependencyStructure parseDependencyGraph) throws MaltChainedException { 112 currentAction.getAction(actionContainers); 113 while (checkParserAction(parseDependencyGraph) == false) { 114 if (configuration.getMode() == SingleMalt.LEARN || configuration.predictFromKBestList(currentAction) == false) { 115 updateActionContainers(NOARC, null); // default parser action 116 break; 117 } 118 currentAction.getAction(actionContainers); 119 } 120 Edge e = null; 121 switch (transActionContainer.getActionCode()) { 122 case LEFTARC: 123 e = parseDependencyGraph.addDependencyEdge(input.get(right).getIndex(), input.get(left).getIndex()); 124 addEdgeLabels(e); 125 break; 126 case RIGHTARC: 127 e = parseDependencyGraph.addDependencyEdge(input.get(left).getIndex(), input.get(right).getIndex()); 128 addEdgeLabels(e); 129 break; 130 default: 131 break; 132 } 133 updateLeft(parseDependencyGraph, actionContainers.get(0).getActionCode()); 134 } 135 136 protected boolean checkParserAction(DependencyStructure dg) throws MaltChainedException { 137 int trans = transActionContainer.getActionCode(); 138 139 if (trans == SHIFT && allowShift == false) { 140 return false; 141 } 142 if ((trans == LEFTARC || trans == RIGHTARC) && !isActionContainersLabeled()) { 143 // LabelCode cannot be found for transition LEFTARC and RIGHTARC 144 return false; 145 } 146 /* if ((trans == SHIFT || trans == REDUCE) && parserAction.getLastLabelCode() != null) { 147 // LabelCode can be found for transition SHIFT and REDUCE 148 return false; 149 }*/ 150 if (trans == LEFTARC && input.get(left).isRoot()) { 151 // The token on top of the stack is the root for LEFTARC 152 return false; 153 } 154 if (trans == LEFTARC && dg.hasLabeledDependency(input.get(left).getIndex())) { 155 156 157 // The token on top of the stack has already a head for transition LEFTARC 158 return false; 159 } 160 if (trans == RIGHTARC && dg.hasLabeledDependency(input.get(right).getIndex())) { 161 // The token on top of the stack has already a head for transition LEFTARC 162 return false; 163 } 164 return true; 165 } 166 167 protected void oraclePredict(DependencyStructure gold) throws MaltChainedException { 168 if (!input.get(left).isRoot() && gold.getTokenNode(input.get(left).getIndex()).getHead().getIndex() == input.get(right).getIndex()) { 169 updateActionContainers(LEFTARC, gold.getTokenNode(input.get(left).getIndex()).getHeadEdge().getLabelSet()); 170 } else if (gold.getTokenNode(input.get(right).getIndex()).getHead().getIndex() == input.get(left).getIndex()) { 171 updateActionContainers(RIGHTARC, gold.getTokenNode(input.get(right).getIndex()).getHeadEdge().getLabelSet()); 172 } else if (allowShift == true && (!(gold.getTokenNode(input.get(right).getIndex()).hasLeftDependent() 173 && gold.getTokenNode(input.get(right).getIndex()).getLeftmostDependent().getIndex() < input.get(left).getIndex()) 174 && !(gold.getTokenNode(input.get(right).getIndex()).getHead().getIndex() < input.get(left).getIndex() 175 && (!gold.getTokenNode(input.get(right).getIndex()).getHead().isRoot() || leftstop == 0)))) { 176 updateActionContainers(SHIFT, null); 177 } else { 178 updateActionContainers(NOARC, null); 179 } 180 } 181 182 protected int getTransition() { 183 return transActionContainer.getActionCode(); 184 } 185 186 protected void updateActionContainers(int transition, LabelSet arcLabels) throws MaltChainedException { 187 if (complexTransition) { 188 switch (transition) { 189 case SHIFT: 190 pushActionContainer.setAction(1); 191 transActionContainer.setAction(transition); 192 break; 193 case NOARC: 194 pushActionContainer.setAction(2); 195 transActionContainer.setAction(transition); 196 break; 197 case RIGHTARC: 198 pushActionContainer.setAction(1); 199 transActionContainer.setAction(transition); 200 break; 201 case LEFTARC: 202 pushActionContainer.setAction(2); 203 transActionContainer.setAction(transition); 204 break; 205 default: 206 throw new ParsingException("Unknown transition "+transition+". "); 207 } 208 } else { 209 transActionContainer.setAction(transition); 210 } 211 if (arcLabels == null) { 212 for (ActionContainer container : arcLabelActionContainers) { 213 container.setAction(-1); 214 } 215 } else { 216 for (ActionContainer container : arcLabelActionContainers) { 217 container.setAction(arcLabels.get(container.getTable()).intValue()); 218 } 219 } 220 currentAction.addAction(actionContainers); 221 } 222 223 protected boolean isActionContainersLabeled() { 224 for (int i = 1; i < actionContainers.size(); i++) { 225 if (actionContainers.get(i).getActionCode() < 0) { 226 return false; 227 } 228 } 229 return true; 230 } 231 232 // protected LabelSet getArcLabels(DependencyGraph parseDependencyGraph) throws MaltChainedException { 233 // LabelSet arcLabels = parseDependencyGraph.checkOutNewLabelSet(); 234 // for (ActionContainer container : arcLabelActionContainers) { 235 // arcLabels.put((SymbolTable)container.getTable(), container.getActionCode()); 236 // } 237 // return arcLabels; 238 // } 239 240 protected void addEdgeLabels(Edge e) throws MaltChainedException { 241 if (e != null) { 242 for (ActionContainer container : arcLabelActionContainers) { 243 e.addLabel((SymbolTable)container.getTable(), container.getActionCode()); 244 } 245 } 246 } 247 248 public DependencyNode getLeftNode(int index) throws MaltChainedException { 249 if (index < 0) { 250 throw new ParsingException("Left index must be non-negative in feature specification. "); 251 } 252 if (behavior == Covington.MALT_0_4) { 253 int tmpindex = 0; 254 int tmpleft = left; 255 while (tmpindex < index && tmpleft >= 0) { 256 if (input.get(tmpleft).hasHead() && input.get(tmpleft).getHead().getIndex() < tmpleft) { 257 tmpleft = input.get(tmpleft).getHead().getIndex(); 258 tmpindex++; 259 } else if (input.get(tmpleft).hasHead()) { 260 tmpleft--; 261 } else { 262 tmpleft--; 263 tmpindex++; 264 } 265 } 266 if (tmpleft >= 0) { 267 return input.get(tmpleft); 268 } 269 } else { 270 if (left-index >= 0) { 271 return input.get(left-index); 272 } 273 } 274 return null; 275 } 276 277 public DependencyNode getRightNode(int index) throws MaltChainedException { 278 if (index < 0) { 279 throw new ParsingException("Right index must be non-negative in feature specification. "); 280 } 281 if (right+index < input.size()) { 282 return input.get(right+index); 283 } 284 return null; 285 } 286 287 public DependencyNode getLeftContextNode(int index) throws MaltChainedException { 288 if (index < 0) { 289 throw new ParsingException("LeftContext index must be non-negative in feature specification. "); 290 } 291 292 int tmpindex = 0; 293 for (int i = left+1; i < right; i++) { 294 if (!input.get(i).hasAncestorInside(left, right)) { 295 if (tmpindex == index) { 296 return input.get(i); 297 } else { 298 tmpindex++; 299 } 300 } 301 } 302 return null; 303 } 304 305 public DependencyNode getRightContextNode(int index) throws MaltChainedException { 306 if (index < 0) { 307 throw new ParsingException("RightContext index must be non-negative in feature specification. "); 308 } 309 int tmpindex = 0; 310 for (int i = right-1; i > left; i--) { 311 if (!input.get(i).hasAncestorInside(left, right)) { 312 if (tmpindex == index) { 313 return input.get(i); 314 } else { 315 tmpindex++; 316 } 317 } 318 } 319 return null; 320 } 321 322 public DependencyNode getLeftTarget() { 323 return input.get(left); 324 } 325 326 public DependencyNode getRightTarget() { 327 return input.get(right); 328 } 329 330 public DependencyNode getNode(String dataStructure, int index) throws MaltChainedException { 331 if (dataStructure.equals("Left")) { 332 if (index < 0) { 333 throw new ParsingException("Left index must be non-negative in feature specification. "); 334 } 335 if (behavior == Covington.MALT_0_4) { 336 int tmpindex = 0; 337 int tmpleft = left; 338 while (tmpindex < index && tmpleft >= 0) { 339 if (input.get(tmpleft).hasHead() && input.get(tmpleft).getHead().getIndex() < tmpleft) { 340 tmpleft = input.get(tmpleft).getHead().getIndex(); 341 tmpindex++; 342 } else if (input.get(tmpleft).hasHead()) { 343 tmpleft--; 344 } else { 345 tmpleft--; 346 tmpindex++; 347 } 348 } 349 if (tmpleft >= 0) { 350 return input.get(tmpleft); 351 } 352 } else { 353 if (left-index >= 0) { 354 return input.get(left-index); 355 } 356 } 357 } else if (dataStructure.equals("Right")) { 358 if (index < 0) { 359 throw new ParsingException("Right index must be non-negative in feature specification. "); 360 } 361 if (right+index < input.size()) { 362 return input.get(right+index); 363 } 364 } else if (dataStructure.equals("LeftContext")) { 365 if (index < 0) { 366 throw new ParsingException("LeftContext index must be non-negative in feature specification. "); 367 } 368 369 int tmpindex = 0; 370 for (int i = left+1; i < right; i++) { 371 if (!input.get(i).hasAncestorInside(left, right)) { 372 if (tmpindex == index) { 373 return input.get(i); 374 } else { 375 tmpindex++; 376 } 377 } 378 } 379 } else if (dataStructure.equals("RightContext")) { 380 if (index < 0) { 381 throw new ParsingException("RightContext index must be non-negative in feature specification. "); 382 } 383 int tmpindex = 0; 384 for (int i = right-1; i > left; i--) { 385 if (!input.get(i).hasAncestorInside(left, right)) { 386 if (tmpindex == index) { 387 return input.get(i); 388 } else { 389 tmpindex++; 390 } 391 } 392 } 393 } else { 394 throw new ParsingException("Undefined data structure in feature specification. "); 395 } 396 return null; 397 } 398 399 private void clear(DependencyStructure dg) throws MaltChainedException { 400 // dg.clear(); 401 402 input.clear(); 403 history.clear(); //parserAction.clear(); 404 405 for (int i = 0; i <= dg.getHighestTokenIndex(); i++) { 406 DependencyNode node = dg.getDependencyNode(i); 407 if (node != null) { 408 input.add(node); 409 } 410 } 411 412 rightstop = dg.getHighestTokenIndex(); 413 } 414 415 private void initAllowRoot() throws MaltChainedException { 416 if ((Boolean)configuration.getOptionValue("covington", "allow_root") == true) { 417 leftstop = 0; 418 } else { 419 leftstop = 1; 420 } 421 } 422 423 private void initAllowShift() throws MaltChainedException { 424 allowShift = (Boolean)configuration.getOptionValue("covington", "allow_shift"); 425 } 426 427 private void initBehavior() throws MaltChainedException { 428 if ((Boolean)configuration.getOptionValue("malt0.4", "behavior") == true) { 429 behavior = MALT_0_4; 430 } else { 431 behavior = MALT_1_0; 432 } 433 } 434 435 protected void addTransition(ActionContainer transitionContainer, GuideUserAction action, int value) throws MaltChainedException { 436 transitionContainer.setAction(value); 437 for (ActionContainer container : arcLabelActionContainers) { 438 container.setAction(-1); 439 } 440 currentAction.addAction(actionContainers); 441 } 442 443 protected void initHistory() throws MaltChainedException { 444 String decisionSettings = configuration.getOptionValue("guide", "decision_settings").toString().trim(); 445 transitionTableHandler = new TransitionTableHandler(); 446 tableHandlers = new HashMap<String, TableHandler>(); 447 448 String[] decisionElements = decisionSettings.split(",|#|;|\\+"); 449 int nTrans = 0; 450 int nArc = 0; 451 for (int i = 0; i < decisionElements.length; i++) { 452 int index = decisionElements[i].indexOf('.'); 453 if (index == -1) { 454 throw new ParsingException("Decision settings '"+decisionSettings+"' contain an item '"+decisionElements[i]+"' that does not follow the format {TableHandler}.{Table}. "); 455 } 456 if (decisionElements[i].substring(0,index).equals("T")) { 457 if (!tableHandlers.containsKey("T")) { 458 tableHandlers.put("T", transitionTableHandler); 459 } 460 if (decisionElements[i].substring(index+1).equals("TRANS")) { 461 if (nTrans == 0) { 462 TransitionTable ttable = (TransitionTable)transitionTableHandler.addSymbolTable("TRANS"); 463 ttable.addTransition(SHIFT, "SH", false, null); 464 ttable.addTransition(NOARC, "NA", false, null); 465 ttable.addTransition(RIGHTARC, "RA", true, null); 466 ttable.addTransition(LEFTARC, "LA", true, null); 467 } else { 468 throw new ParsingException("Illegal decision settings '"+decisionSettings+"'"); 469 } 470 nTrans++; 471 } else if (decisionElements[i].substring(index+1).equals("PUSH")) { 472 if (nArc == 0) { 473 complexTransition = true; 474 pushTable = (TransitionTable)transitionTableHandler.addSymbolTable("PUSH"); 475 pushTable.addTransition(1, "YES", true, null); 476 pushTable.addTransition(2, "NO", true, null); 477 } else { 478 throw new ParsingException("Illegal decision settings '"+decisionSettings+"'"); 479 } 480 nArc++; 481 } 482 } else if (decisionElements[i].substring(0,index).equals("A")) { 483 if (!tableHandlers.containsKey("A")) { 484 //tableHandlers.put("A", sentence.getDataFormatInstance().getSymbolTables()); 485 tableHandlers.put("A", configuration.getSymbolTables()); 486 } 487 } else { 488 throw new ParsingException("The decision settings '"+decisionSettings+"' contains an unknown table handler '"+decisionElements[i].substring(0,index)+"'. " + 489 "Only T (Transition table handler) and A (ArcLabel table handler) is allowed for '"+getName()+"' parsing algorithm. "); 490 } 491 } 492 history = new History(decisionSettings, getConfiguration().getOptionValue("guide", "classitem_separator").toString(), tableHandlers); 493 actionContainers = history.getActionContainers(); 494 if (actionContainers.size() < 1) { 495 throw new ParsingException("Problem when initialize the history (sequence of actions). There are no action containers. "); 496 } 497 498 for (int i = 0; i < actionContainers.size(); i++) { 499 if (actionContainers.get(i).getTableContainerName().equals("T.TRANS")) { 500 transActionContainer = actionContainers.get(i); 501 } else if (actionContainers.get(i).getTableContainerName().equals("T.PUSH")) { 502 pushActionContainer = actionContainers.get(i); 503 } else if (actionContainers.get(i).getTableContainerName().startsWith("A.")) { 504 if (arcLabelActionContainers == null) { 505 arcLabelActionContainers = new HashSet<ActionContainer>(); 506 } 507 arcLabelActionContainers.add(actionContainers.get(i)); 508 } 509 } 510 currentAction = history.getEmptyGuideUserAction(); 511 if (transActionContainer == null) { 512 throw new ParsingException("The decision settings does not contain T.TRANS or T.PUSH;T.TRANS"); 513 } else { 514 if (complexTransition && pushActionContainer == null) { 515 throw new ParsingException("The decision settings does not contain T.TRANS or T.PUSH;T.TRANS"); 516 } 517 } 518 519 addTransition(transActionContainer, currentAction, SHIFT); 520 addTransition(transActionContainer, currentAction, NOARC); 521 522 } 523 524 public GuideUserHistory getHistory() { 525 return history; 526 } 527 528 public SingleMalt getConfiguration() { 529 return configuration; 530 } 531 532 public abstract String getName(); 533 protected abstract void updateLeft(DependencyStructure dg, int trans) throws MaltChainedException; 534 } 535 536