001    package org.maltparser.parser.algorithm.covington;
002    
003    
004    import java.util.ArrayList;
005    import java.util.HashMap;
006    import java.util.HashSet;
007    
008    import org.maltparser.core.exception.MaltChainedException;
009    import org.maltparser.core.symbol.SymbolTable;
010    import org.maltparser.core.symbol.TableHandler;
011    import org.maltparser.core.syntaxgraph.DependencyStructure;
012    import org.maltparser.core.syntaxgraph.LabelSet;
013    import org.maltparser.core.syntaxgraph.edge.Edge;
014    import org.maltparser.core.syntaxgraph.node.DependencyNode;
015    import org.maltparser.parser.SingleMalt;
016    import org.maltparser.parser.algorithm.ParsingAlgorithm;
017    import org.maltparser.parser.algorithm.ParsingException;
018    import org.maltparser.parser.algorithm.helper.TransitionTable;
019    import org.maltparser.parser.algorithm.helper.TransitionTableHandler;
020    import org.maltparser.parser.history.History;
021    import org.maltparser.parser.history.GuideUserHistory;
022    import org.maltparser.parser.history.action.GuideUserAction;
023    import org.maltparser.parser.history.container.ActionContainer;
024    
025    
026    /**
027     * 
028     * @author Joakim Nivre
029     * @author Johan Hall
030     * @since 1.0
031    */
032    public abstract class Covington implements ParsingAlgorithm {
033            // Behavior
034            public final static int MALT_0_4  = 1;
035            public final static int MALT_1_0  = 2;
036            
037            // Transitions
038            protected static final int SHIFT = 1;
039            protected static final int NOARC = 2;
040            protected static final int RIGHTARC = 3;
041            protected static final int LEFTARC = 4;
042            
043    
044            protected GuideUserHistory history;
045            protected ArrayList<ActionContainer> actionContainers;
046            protected ActionContainer transActionContainer;
047            protected ActionContainer pushActionContainer;
048            protected TransitionTable pushTable;
049            protected HashSet<ActionContainer> arcLabelActionContainers;
050            protected SingleMalt configuration;
051            protected GuideUserAction currentAction;
052            protected HashMap<String, TableHandler> tableHandlers;
053            protected TransitionTableHandler transitionTableHandler;
054            protected boolean allowShift;
055            protected boolean complexTransition = false;
056            protected int behavior;
057            protected ArrayList<DependencyNode> input;
058            protected int right;
059            protected int left;
060            protected int leftstop;
061            protected int rightstop;
062            
063            public Covington(SingleMalt configuration) throws MaltChainedException {
064                    this.configuration = configuration;
065                    initAllowRoot();
066                    initAllowShift();
067                    initBehavior();
068                    input = new ArrayList<DependencyNode>();
069                    initHistory();
070            }
071            
072            
073            public DependencyStructure parse(DependencyStructure parseDependencyGraph) throws MaltChainedException {
074                    clear(parseDependencyGraph);
075                    right = 1;
076                    while (right <= rightstop) {
077                            left = right -1;
078                            while (left >= leftstop) {
079                                    currentAction = history.getEmptyGuideUserAction();
080                                    configuration.predict(currentAction);
081                                    transition(parseDependencyGraph);
082                            }
083                            right++;
084                    }
085                    
086                    parseDependencyGraph.linkAllTreesToRoot();
087                    return parseDependencyGraph;
088            }
089            
090            public DependencyStructure oracleParse(DependencyStructure goldDependencyGraph, DependencyStructure parseDependencyGraph) throws MaltChainedException {
091    //              if (!(goldDependencyGraph instanceof SingleHeadedDependencyGraph)) {
092    //                      throw new ParsingException("The gold standard graph must be a single headed graph. ");
093    //              }
094                    
095                    clear(parseDependencyGraph);
096                    right = 1;
097                    while (right <= rightstop) {
098                            left = right - 1;
099                            while (left >= leftstop) { 
100                                    currentAction = history.getEmptyGuideUserAction();
101                                    oraclePredict(goldDependencyGraph);
102                                    configuration.setInstance(currentAction);
103                                    transition(parseDependencyGraph);
104                            }
105                            right++;
106                    }
107                    parseDependencyGraph.linkAllTreesToRoot();
108                    return parseDependencyGraph;
109            }
110            
111            protected void transition(DependencyStructure parseDependencyGraph) throws MaltChainedException {
112                    currentAction.getAction(actionContainers);
113                    while (checkParserAction(parseDependencyGraph) == false) {
114                            if (configuration.getMode() == SingleMalt.LEARN || configuration.predictFromKBestList(currentAction) == false) {
115                                    updateActionContainers(NOARC, null); // default parser action
116                                    break;
117                            }
118                            currentAction.getAction(actionContainers);
119                    }
120                    Edge e = null;
121                    switch (transActionContainer.getActionCode()) {
122                    case LEFTARC:
123                            e = parseDependencyGraph.addDependencyEdge(input.get(right).getIndex(), input.get(left).getIndex());
124                            addEdgeLabels(e);
125                            break;
126                    case RIGHTARC:
127                            e = parseDependencyGraph.addDependencyEdge(input.get(left).getIndex(), input.get(right).getIndex());
128                            addEdgeLabels(e);
129                            break;
130                    default:
131                            break;
132                    }
133                    updateLeft(parseDependencyGraph, actionContainers.get(0).getActionCode());      
134            }
135            
136            protected boolean checkParserAction(DependencyStructure dg) throws MaltChainedException {
137                    int trans = transActionContainer.getActionCode();
138                    
139                    if (trans == SHIFT && allowShift == false) {
140                            return false;
141                    }
142                    if ((trans == LEFTARC || trans == RIGHTARC) && !isActionContainersLabeled()) {
143                            // LabelCode cannot be found for transition LEFTARC and RIGHTARC
144                            return false;
145                    }
146                    /* if ((trans == SHIFT || trans == REDUCE) && parserAction.getLastLabelCode() != null) {
147                        // LabelCode can be found for transition SHIFT and REDUCE 
148                            return false;
149                    }*/
150                    if (trans == LEFTARC && input.get(left).isRoot()) { 
151                            // The token on top of the stack is the root for LEFTARC
152                            return false;
153                    }
154                    if (trans == LEFTARC && dg.hasLabeledDependency(input.get(left).getIndex())) { 
155                            
156                            
157                            // The token on top of the stack has already a head for transition LEFTARC
158                            return false;
159                    }
160                    if (trans == RIGHTARC && dg.hasLabeledDependency(input.get(right).getIndex())) { 
161                            // The token on top of the stack has already a head for transition LEFTARC
162                            return false;
163                    }
164                    return true;
165            }
166            
167            protected void oraclePredict(DependencyStructure gold) throws MaltChainedException {
168                    if (!input.get(left).isRoot() && gold.getTokenNode(input.get(left).getIndex()).getHead().getIndex() == input.get(right).getIndex()) {
169                            updateActionContainers(LEFTARC, gold.getTokenNode(input.get(left).getIndex()).getHeadEdge().getLabelSet());
170                    } else if (gold.getTokenNode(input.get(right).getIndex()).getHead().getIndex() == input.get(left).getIndex()) {
171                            updateActionContainers(RIGHTARC, gold.getTokenNode(input.get(right).getIndex()).getHeadEdge().getLabelSet());
172                    } else if (allowShift == true && (!(gold.getTokenNode(input.get(right).getIndex()).hasLeftDependent() 
173                                    && gold.getTokenNode(input.get(right).getIndex()).getLeftmostDependent().getIndex() < input.get(left).getIndex())
174                                    && !(gold.getTokenNode(input.get(right).getIndex()).getHead().getIndex() < input.get(left).getIndex() 
175                                                    && (!gold.getTokenNode(input.get(right).getIndex()).getHead().isRoot() || leftstop == 0)))) {
176                            updateActionContainers(SHIFT, null);
177                    } else {
178                            updateActionContainers(NOARC, null);
179                    }
180            }
181            
182            protected int getTransition() {
183                    return transActionContainer.getActionCode();
184            }
185            
186            protected void updateActionContainers(int transition, LabelSet arcLabels) throws MaltChainedException {
187                    if (complexTransition) {
188                            switch (transition) {
189                            case SHIFT:
190                                    pushActionContainer.setAction(1);
191                                    transActionContainer.setAction(transition);
192                                    break;
193                            case NOARC:
194                                    pushActionContainer.setAction(2);
195                                    transActionContainer.setAction(transition);
196                                    break;
197                            case RIGHTARC:
198                                    pushActionContainer.setAction(1);
199                                    transActionContainer.setAction(transition);
200                                    break;
201                            case LEFTARC:
202                                    pushActionContainer.setAction(2);
203                                    transActionContainer.setAction(transition);
204                                    break;
205                            default:
206                                    throw new ParsingException("Unknown transition "+transition+". ");
207                            }
208                    } else { 
209                            transActionContainer.setAction(transition);
210                    }
211                    if (arcLabels == null) {
212                            for (ActionContainer container : arcLabelActionContainers) {
213                                    container.setAction(-1);
214                            }
215                    } else {
216                            for (ActionContainer container : arcLabelActionContainers) {
217                                    container.setAction(arcLabels.get(container.getTable()).intValue());
218                            }               
219                    }
220                    currentAction.addAction(actionContainers);
221            }
222            
223            protected boolean isActionContainersLabeled() {
224                    for (int i = 1; i < actionContainers.size(); i++) {
225                            if (actionContainers.get(i).getActionCode() < 0) {
226                                    return false;
227                            }
228                    }
229                    return true;
230            }
231            
232    //      protected LabelSet getArcLabels(DependencyGraph parseDependencyGraph) throws MaltChainedException {
233    //              LabelSet arcLabels = parseDependencyGraph.checkOutNewLabelSet();
234    //              for (ActionContainer container : arcLabelActionContainers) {
235    //                      arcLabels.put((SymbolTable)container.getTable(), container.getActionCode());
236    //              }
237    //              return arcLabels;
238    //      }
239            
240            protected void addEdgeLabels(Edge e) throws MaltChainedException {
241                    if (e != null) { 
242                            for (ActionContainer container : arcLabelActionContainers) {
243                                    e.addLabel((SymbolTable)container.getTable(), container.getActionCode());
244                            }
245                    }
246            }
247            
248            public DependencyNode getLeftNode(int index) throws MaltChainedException {
249                    if (index < 0) {
250                            throw new ParsingException("Left index must be non-negative in feature specification. ");
251                    }
252                    if (behavior == Covington.MALT_0_4) {
253                            int tmpindex = 0;
254                            int tmpleft = left;
255                            while (tmpindex < index && tmpleft >= 0) {
256                                    if (input.get(tmpleft).hasHead() && input.get(tmpleft).getHead().getIndex() < tmpleft) {
257                                            tmpleft = input.get(tmpleft).getHead().getIndex();
258                                            tmpindex++;
259                                    } else if (input.get(tmpleft).hasHead()) {
260                                            tmpleft--;
261                                    } else {
262                                            tmpleft--;
263                                            tmpindex++;
264                                    }
265                            }
266                            if (tmpleft >= 0) {
267                                    return input.get(tmpleft);
268                            }
269                    } else {
270                            if (left-index >= 0) {
271                                    return input.get(left-index);
272                            }
273                    }
274                    return null;
275            }
276            
277            public DependencyNode getRightNode(int index) throws MaltChainedException {
278                    if (index < 0) {
279                            throw new ParsingException("Right index must be non-negative in feature specification. ");
280                    }
281                    if (right+index < input.size()) {
282                            return input.get(right+index);
283                    }
284                    return null;
285            }
286            
287            public DependencyNode getLeftContextNode(int index) throws MaltChainedException {
288                    if (index < 0) {
289                            throw new ParsingException("LeftContext index must be non-negative in feature specification. ");
290                    }
291                    
292                    int tmpindex = 0;
293                    for (int i = left+1; i < right; i++) {
294                            if (!input.get(i).hasAncestorInside(left, right)) {
295                                    if (tmpindex == index) {
296                                            return input.get(i);
297                                    } else {
298                                            tmpindex++;
299                                    }
300                            }
301                    }
302                    return null;
303            }
304            
305            public DependencyNode getRightContextNode(int index) throws MaltChainedException {
306                    if (index < 0) {
307                            throw new ParsingException("RightContext index must be non-negative in feature specification. ");
308                    }
309                    int tmpindex = 0;
310                    for (int i = right-1; i > left; i--) {
311                            if (!input.get(i).hasAncestorInside(left, right)) {
312                                    if (tmpindex == index) {
313                                            return input.get(i);
314                                    } else {
315                                            tmpindex++;
316                                    }
317                            }
318                    }
319                    return null;
320            }
321            
322            public DependencyNode getLeftTarget() {
323                    return input.get(left);
324            }
325            
326            public DependencyNode getRightTarget() {
327                    return input.get(right);
328            }
329            
330            public DependencyNode getNode(String dataStructure, int index) throws MaltChainedException {
331                    if (dataStructure.equals("Left")) {
332                            if (index < 0) {
333                                    throw new ParsingException("Left index must be non-negative in feature specification. ");
334                            }
335                            if (behavior == Covington.MALT_0_4) {
336                                    int tmpindex = 0;
337                                    int tmpleft = left;
338                                    while (tmpindex < index && tmpleft >= 0) {
339                                            if (input.get(tmpleft).hasHead() && input.get(tmpleft).getHead().getIndex() < tmpleft) {
340                                                    tmpleft = input.get(tmpleft).getHead().getIndex();
341                                                    tmpindex++;
342                                            } else if (input.get(tmpleft).hasHead()) {
343                                                    tmpleft--;
344                                            } else {
345                                                    tmpleft--;
346                                                    tmpindex++;
347                                            }
348                                    }
349                                    if (tmpleft >= 0) {
350                                            return input.get(tmpleft);
351                                    }
352                            } else {
353                                    if (left-index >= 0) {
354                                            return input.get(left-index);
355                                    }
356                            }
357                    } else if (dataStructure.equals("Right")) {
358                            if (index < 0) {
359                                    throw new ParsingException("Right index must be non-negative in feature specification. ");
360                            }
361                            if (right+index < input.size()) {
362                                    return input.get(right+index);
363                            }
364                    } else if (dataStructure.equals("LeftContext")) {
365                            if (index < 0) {
366                                    throw new ParsingException("LeftContext index must be non-negative in feature specification. ");
367                            }
368                            
369                            int tmpindex = 0;
370                            for (int i = left+1; i < right; i++) {
371                                    if (!input.get(i).hasAncestorInside(left, right)) {
372                                            if (tmpindex == index) {
373                                                    return input.get(i);
374                                            } else {
375                                                    tmpindex++;
376                                            }
377                                    }
378                            }
379                    } else if (dataStructure.equals("RightContext")) {
380                            if (index < 0) {
381                                    throw new ParsingException("RightContext index must be non-negative in feature specification. ");
382                            }
383                            int tmpindex = 0;
384                            for (int i = right-1; i > left; i--) {
385                                    if (!input.get(i).hasAncestorInside(left, right)) {
386                                            if (tmpindex == index) {
387                                                    return input.get(i);
388                                            } else {
389                                                    tmpindex++;
390                                            }
391                                    }
392                            }
393                    } else {
394                            throw new ParsingException("Undefined data structure in feature specification. ");
395                    }
396                    return null;
397            }
398    
399            private void clear(DependencyStructure dg) throws MaltChainedException {
400    //              dg.clear();
401    
402                    input.clear();
403                    history.clear(); //parserAction.clear();
404                    
405                    for (int i = 0; i <= dg.getHighestTokenIndex(); i++) {
406                            DependencyNode node = dg.getDependencyNode(i);
407                            if (node != null) { 
408                                    input.add(node);
409                            }
410                    }       
411    
412                    rightstop = dg.getHighestTokenIndex();
413            }
414            
415            private void initAllowRoot() throws MaltChainedException {
416                    if ((Boolean)configuration.getOptionValue("covington", "allow_root") == true) {
417                            leftstop = 0;
418                    } else {
419                            leftstop = 1;
420                    }
421            }
422            
423            private void initAllowShift() throws MaltChainedException {
424                    allowShift = (Boolean)configuration.getOptionValue("covington", "allow_shift");
425            }
426            
427            private void initBehavior() throws MaltChainedException {
428                    if ((Boolean)configuration.getOptionValue("malt0.4", "behavior") == true) {
429                            behavior = MALT_0_4;
430                    } else {
431                            behavior = MALT_1_0;
432                    }               
433            }
434            
435            protected void addTransition(ActionContainer transitionContainer, GuideUserAction action, int value) throws MaltChainedException {
436                    transitionContainer.setAction(value);
437                    for (ActionContainer container : arcLabelActionContainers) {
438                            container.setAction(-1);
439                    }
440                    currentAction.addAction(actionContainers);
441            }
442            
443            protected void initHistory() throws MaltChainedException {
444                    String decisionSettings = configuration.getOptionValue("guide", "decision_settings").toString().trim();
445                    transitionTableHandler = new TransitionTableHandler();
446                    tableHandlers = new HashMap<String, TableHandler>();
447    
448                    String[] decisionElements =  decisionSettings.split(",|#|;|\\+");
449                    int nTrans = 0;
450                    int nArc = 0;
451                    for (int i = 0; i < decisionElements.length; i++) {
452                            int index = decisionElements[i].indexOf('.');
453                            if (index == -1) {
454                                    throw new ParsingException("Decision settings '"+decisionSettings+"' contain an item '"+decisionElements[i]+"' that does not follow the format {TableHandler}.{Table}. ");
455                            }
456                            if (decisionElements[i].substring(0,index).equals("T")) {
457                                    if (!tableHandlers.containsKey("T")) {
458                                            tableHandlers.put("T", transitionTableHandler);
459                                    }
460                                    if (decisionElements[i].substring(index+1).equals("TRANS")) {
461                                            if (nTrans == 0) {
462                                                    TransitionTable ttable = (TransitionTable)transitionTableHandler.addSymbolTable("TRANS");
463                                                    ttable.addTransition(SHIFT, "SH", false, null);
464                                                    ttable.addTransition(NOARC, "NA", false, null);
465                                                    ttable.addTransition(RIGHTARC, "RA", true, null);
466                                                    ttable.addTransition(LEFTARC, "LA", true, null);
467                                            } else {
468                                                    throw new ParsingException("Illegal decision settings '"+decisionSettings+"'");
469                                            }
470                                            nTrans++;
471                                    }  else if (decisionElements[i].substring(index+1).equals("PUSH")) {
472                                            if (nArc == 0) {
473                                                    complexTransition = true;
474                                                    pushTable = (TransitionTable)transitionTableHandler.addSymbolTable("PUSH");
475                                                    pushTable.addTransition(1, "YES", true, null);
476                                                    pushTable.addTransition(2, "NO", true, null);
477                                            } else {
478                                                    throw new ParsingException("Illegal decision settings '"+decisionSettings+"'");
479                                            }
480                                            nArc++;
481                                    }
482                            } else if (decisionElements[i].substring(0,index).equals("A")) {
483                                    if (!tableHandlers.containsKey("A")) {
484                                            //tableHandlers.put("A", sentence.getDataFormatInstance().getSymbolTables());
485                                            tableHandlers.put("A", configuration.getSymbolTables());
486                                    }
487                            } else {
488                                    throw new ParsingException("The decision settings '"+decisionSettings+"' contains an unknown table handler '"+decisionElements[i].substring(0,index)+"'. " +
489                                                    "Only T (Transition table handler) and A (ArcLabel table handler) is allowed for '"+getName()+"' parsing algorithm. ");
490                            }
491                    }
492                    history = new History(decisionSettings, getConfiguration().getOptionValue("guide", "classitem_separator").toString(), tableHandlers);
493                    actionContainers = history.getActionContainers();
494                    if (actionContainers.size() < 1) {
495                            throw new ParsingException("Problem when initialize the history (sequence of actions). There are no action containers. ");
496                    }
497                    
498                    for (int i = 0; i < actionContainers.size(); i++) {
499                            if (actionContainers.get(i).getTableContainerName().equals("T.TRANS")) {
500                                    transActionContainer = actionContainers.get(i);
501                            } else if (actionContainers.get(i).getTableContainerName().equals("T.PUSH")) {
502                                    pushActionContainer = actionContainers.get(i); 
503                            } else if (actionContainers.get(i).getTableContainerName().startsWith("A.")) {
504                                    if (arcLabelActionContainers == null) {
505                                            arcLabelActionContainers = new HashSet<ActionContainer>();
506                                    }
507                                    arcLabelActionContainers.add(actionContainers.get(i));
508                            }
509                    }
510                    currentAction = history.getEmptyGuideUserAction();
511                    if (transActionContainer == null) {
512                            throw new ParsingException("The decision settings does not contain T.TRANS or T.PUSH;T.TRANS");
513                    } else {
514                            if (complexTransition && pushActionContainer == null) {
515                                    throw new ParsingException("The decision settings does not contain T.TRANS or T.PUSH;T.TRANS");
516                            }
517                    }
518    
519                    addTransition(transActionContainer, currentAction, SHIFT);
520                    addTransition(transActionContainer, currentAction, NOARC);
521    
522            }
523            
524            public GuideUserHistory getHistory() {
525                    return history;
526            }
527            
528            public SingleMalt getConfiguration() {
529                    return configuration;
530            }
531            
532            public abstract String getName();
533            protected abstract void updateLeft(DependencyStructure dg, int trans) throws MaltChainedException;
534    }
535    
536