001package org.maltparser.core.lw.graph;
002
003import java.io.BufferedReader;
004import java.io.FileInputStream;
005import java.io.IOException;
006import java.io.InputStreamReader;
007import java.util.ArrayList;
008import java.util.Formatter;
009import java.util.SortedSet;
010
011import org.maltparser.concurrent.graph.dataformat.ColumnDescription;
012import org.maltparser.concurrent.graph.dataformat.DataFormat;
013import org.maltparser.core.exception.MaltChainedException;
014import org.maltparser.core.symbol.SymbolTableHandler;
015import org.maltparser.core.symbol.hash.HashSymbolTableHandler;
016import org.maltparser.core.syntaxgraph.DependencyStructure;
017import org.maltparser.core.syntaxgraph.edge.Edge;
018import org.maltparser.core.syntaxgraph.node.DependencyNode;
019
020
021public class LWTest {
022        private static final String IGNORE_COLUMN_SIGN = "_";
023    public static String[] readSentences(BufferedReader reader) throws IOException {
024        ArrayList<String> tokens = new ArrayList<String>();
025        String line;
026                while ((line = reader.readLine()) != null) {
027                        if (line.trim().length() == 0) {
028                                break;
029                        } else {
030                                tokens.add(line.trim());
031                        }
032
033                }
034        return tokens.toArray(new String[tokens.size()]);
035    }
036    
037        public static DependencyStructure getOldDependencyGraph(DataFormat dataFormat, SymbolTableHandler symbolTableHandlers, String[] tokens) throws MaltChainedException {
038                DependencyStructure oldGraph = new org.maltparser.core.syntaxgraph.DependencyGraph(symbolTableHandlers);
039                for (int i = 0; i < tokens.length; i++) {
040                    oldGraph.addDependencyNode(i+1);
041                }
042                for (int i = 0; i < tokens.length; i++) {
043                    DependencyNode node = oldGraph.getDependencyNode(i+1);
044                    String[] items = tokens[i].split("\t");
045                    Edge edge = null;
046                    for (int j = 0; j < items.length; j++) {
047                        ColumnDescription column = dataFormat.getColumnDescription(j);
048
049                            if (column.getCategory() == ColumnDescription.INPUT && node != null) {
050                                oldGraph.addLabel(node, column.getName(), items[j]);
051                            } else if (column.getCategory() == ColumnDescription.HEAD) {
052                                if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals(IGNORE_COLUMN_SIGN)) {
053                                        edge = oldGraph.addDependencyEdge(Integer.parseInt(items[j]), i+1);
054                                }
055                            } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) {
056                                oldGraph.addLabel(edge, column.getName(), items[j]);
057                                }
058                    }
059                }
060
061                oldGraph.setDefaultRootEdgeLabel(oldGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT");
062                return oldGraph;
063        }
064        
065        public static void main(String[] args) {
066                long startTime = System.currentTimeMillis();
067                String inFile = args[0];
068                String charSet = "UTF-8";
069
070        BufferedReader reader = null;
071        
072        try {
073                DataFormat dataFormat = DataFormat.parseDataFormatXMLfile("/appdata/dataformat/conllx.xml");
074                reader = new BufferedReader(new InputStreamReader(new FileInputStream(inFile), charSet));
075                int sentenceCounter = 0;
076                while (true) {
077                        String[] goldTokens = readSentences(reader);
078                        if (goldTokens.length == 0) {
079                                break;
080                        }
081                        sentenceCounter++;
082                        SymbolTableHandler newTable = new HashSymbolTableHandler();
083                        DependencyStructure newGraph = new LWDependencyGraph(dataFormat, newTable, goldTokens, "ROOT");
084//                      SymbolTableHandler oldTable = new HashSymbolTableHandler();
085//                      DependencyStructure oldGraph = getOldDependencyGraph(dataFormat, oldTable, goldTokens);
086                        int newGraphINT;
087                        int oldGraphINT;
088                        boolean newGraphBOOL;
089                        boolean oldGraphBOOL;
090                        SortedSet<LWNode> newGraphSortedSet;
091                        SortedSet<DependencyNode> oldGraphSortedSet;
092                        
093//                      for (int i = 0; i < newGraph.nDependencyNode(); i++) {
094//                              newGraphINT = newGraph.getDependencyNode(i).getIndex();
095//                              oldGraphINT = oldGraph.getDependencyNode(i).getIndex();
096
097                                
098//                              newGraphINT = newGraph.getNode(i).getHeadIndex();
099//                              newGraphINT = newGraph.getDependencyNode(i).getHead() != null ? newGraph.getDependencyNode(i).getHead().getIndex() : -1;
100//                              oldGraphINT = oldGraph.getDependencyNode(i).getHead() != null ? oldGraph.getDependencyNode(i).getHead().getIndex() : -1;
101                                
102
103//                              newGraphINT = newGraph.getDependencyNode(i).getPredecessor() != null ? newGraph.getDependencyNode(i).getPredecessor().getIndex() : -1;
104//                              oldGraphINT = oldGraph.getDependencyNode(i).getPredecessor() != null ? oldGraph.getDependencyNode(i).getPredecessor().getIndex() : -1;
105            
106//                              newGraphINT = newGraph.getTokenNode(i).getSuccessor() != null ? newGraph.getTokenNode(i).getSuccessor().getIndex() : -1;
107//                              oldGraphINT = oldGraph.getTokenNode(i).getSuccessor() != null ? oldGraph.getTokenNode(i).getSuccessor().getIndex() : -1;
108        
109//                              newGraphINT = newGraph.getDependencyNode(i).getLeftDependentCount();
110//                              oldGraphINT = oldGraph.getDependencyNode(i).getLeftDependentCount();
111//
112//                              newGraphINT = newGraph.getDependencyNode(i).getRightDependentCount();
113//                              oldGraphINT = oldGraph.getDependencyNode(i).getRightDependentCount();
114                                
115//                              newGraphINT = newGraph.getDependencyNode(i).getRightmostDependent() != null ? newGraph.getNode(i).getRightmostDependent().getIndex() : -1;
116//                              oldGraphINT = oldGraph.getDependencyNode(i).getRightmostDependent() != null ? oldGraph.getDependencyNode(i).getRightmostDependent       ().getIndex() : -1;
117//                              newGraphINT = newGraph.getDependencyNode(i).findComponent().getIndex();
118//                              oldGraphINT = oldGraph.getDependencyNode(i).findComponent().getIndex();
119//
120//                              newGraphINT = newGraph.getDependencyNode(i).getRank();
121//                              oldGraphINT = oldGraph.getDependencyNode(i).getRank();
122
123                                
124//                              newGraphBOOL = newGraph.getDependencyNode(i).isRoot();
125//                              oldGraphBOOL = oldGraph.getDependencyNode(i).isRoot();
126                                
127//                              newGraphBOOL = newGraph.getDependencyNode(i).hasRightDependent();
128//                              oldGraphBOOL = oldGraph.getDependencyNode(i).hasRightDependent();
129                                
130//                              newGraphBOOL = newGraph.getDependencyNode(i).hasHead();
131//                              oldGraphBOOL = oldGraph.getDependencyNode(i).hasHead();
132//                      if (newGraphBOOL != oldGraphBOOL) {
133//                              System.out.println(newGraphBOOL + "\t" + oldGraphBOOL);
134//                      }
135                                
136//                              newGraphSortedSet = newGraph.getNode(i).getRightDependents();
137//                              oldGraphSortedSet = oldGraph.getDependencyNode(i).getLeftDependents();
138//                              if (newGraphSortedSet.size() != oldGraphSortedSet.size()) {
139//                                      System.out.println(newGraphSortedSet + "\t" + oldGraphSortedSet);
140//                              } else {
141//                                      Iterator<DependencyNode> it = oldGraphSortedSet.iterator();
142//                                      for (Node n : newGraphSortedSet) {
143//                                              DependencyNode o = it.next();
144//                                              if (n.getIndex() != o.getIndex()) {
145//                                                      System.out.println(n.getIndex() + "\t" + o.getIndex());
146//                                              }
147//                                      }
148//                              }
149//                              if (newGraphINT != oldGraphINT) {
150//                                      System.out.println(newGraphINT + "\t" + oldGraphINT);
151//                              }
152//                      }
153                        
154                        
155//                      System.out.println(oldGraph);
156                }
157        } catch (IOException e) {
158                        e.printStackTrace();
159        } catch (LWGraphException e) {
160                        e.printStackTrace();
161        } catch (MaltChainedException e) {
162                        e.printStackTrace();
163        } finally {
164                if (reader != null) {
165                        try {
166                                reader.close();
167                } catch (IOException e) {
168                                e.printStackTrace();
169                }
170                }
171        }
172        long elapsed = System.currentTimeMillis() - startTime;
173        System.out.println("Finished init basic   : " + new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)");
174        }
175
176
177}