001package org.maltparser.core.lw.graph; 002 003import java.io.BufferedReader; 004import java.io.FileInputStream; 005import java.io.IOException; 006import java.io.InputStreamReader; 007import java.util.ArrayList; 008import java.util.Formatter; 009import java.util.SortedSet; 010 011import org.maltparser.concurrent.graph.dataformat.ColumnDescription; 012import org.maltparser.concurrent.graph.dataformat.DataFormat; 013import org.maltparser.core.exception.MaltChainedException; 014import org.maltparser.core.symbol.SymbolTableHandler; 015import org.maltparser.core.symbol.hash.HashSymbolTableHandler; 016import org.maltparser.core.syntaxgraph.DependencyStructure; 017import org.maltparser.core.syntaxgraph.edge.Edge; 018import org.maltparser.core.syntaxgraph.node.DependencyNode; 019 020 021public class LWTest { 022 private static final String IGNORE_COLUMN_SIGN = "_"; 023 public static String[] readSentences(BufferedReader reader) throws IOException { 024 ArrayList<String> tokens = new ArrayList<String>(); 025 String line; 026 while ((line = reader.readLine()) != null) { 027 if (line.trim().length() == 0) { 028 break; 029 } else { 030 tokens.add(line.trim()); 031 } 032 033 } 034 return tokens.toArray(new String[tokens.size()]); 035 } 036 037 public static DependencyStructure getOldDependencyGraph(DataFormat dataFormat, SymbolTableHandler symbolTableHandlers, String[] tokens) throws MaltChainedException { 038 DependencyStructure oldGraph = new org.maltparser.core.syntaxgraph.DependencyGraph(symbolTableHandlers); 039 for (int i = 0; i < tokens.length; i++) { 040 oldGraph.addDependencyNode(i+1); 041 } 042 for (int i = 0; i < tokens.length; i++) { 043 DependencyNode node = oldGraph.getDependencyNode(i+1); 044 String[] items = tokens[i].split("\t"); 045 Edge edge = null; 046 for (int j = 0; j < items.length; j++) { 047 ColumnDescription column = dataFormat.getColumnDescription(j); 048 049 if (column.getCategory() == ColumnDescription.INPUT && node != null) { 050 oldGraph.addLabel(node, column.getName(), items[j]); 051 } else if (column.getCategory() == ColumnDescription.HEAD) { 052 if (column.getCategory() != ColumnDescription.IGNORE && !items[j].equals(IGNORE_COLUMN_SIGN)) { 053 edge = oldGraph.addDependencyEdge(Integer.parseInt(items[j]), i+1); 054 } 055 } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) { 056 oldGraph.addLabel(edge, column.getName(), items[j]); 057 } 058 } 059 } 060 061 oldGraph.setDefaultRootEdgeLabel(oldGraph.getSymbolTables().getSymbolTable("DEPREL"), "ROOT"); 062 return oldGraph; 063 } 064 065 public static void main(String[] args) { 066 long startTime = System.currentTimeMillis(); 067 String inFile = args[0]; 068 String charSet = "UTF-8"; 069 070 BufferedReader reader = null; 071 072 try { 073 DataFormat dataFormat = DataFormat.parseDataFormatXMLfile("/appdata/dataformat/conllx.xml"); 074 reader = new BufferedReader(new InputStreamReader(new FileInputStream(inFile), charSet)); 075 int sentenceCounter = 0; 076 while (true) { 077 String[] goldTokens = readSentences(reader); 078 if (goldTokens.length == 0) { 079 break; 080 } 081 sentenceCounter++; 082 SymbolTableHandler newTable = new HashSymbolTableHandler(); 083 DependencyStructure newGraph = new LWDependencyGraph(dataFormat, newTable, goldTokens, "ROOT"); 084// SymbolTableHandler oldTable = new HashSymbolTableHandler(); 085// DependencyStructure oldGraph = getOldDependencyGraph(dataFormat, oldTable, goldTokens); 086 int newGraphINT; 087 int oldGraphINT; 088 boolean newGraphBOOL; 089 boolean oldGraphBOOL; 090 SortedSet<LWNode> newGraphSortedSet; 091 SortedSet<DependencyNode> oldGraphSortedSet; 092 093// for (int i = 0; i < newGraph.nDependencyNode(); i++) { 094// newGraphINT = newGraph.getDependencyNode(i).getIndex(); 095// oldGraphINT = oldGraph.getDependencyNode(i).getIndex(); 096 097 098// newGraphINT = newGraph.getNode(i).getHeadIndex(); 099// newGraphINT = newGraph.getDependencyNode(i).getHead() != null ? newGraph.getDependencyNode(i).getHead().getIndex() : -1; 100// oldGraphINT = oldGraph.getDependencyNode(i).getHead() != null ? oldGraph.getDependencyNode(i).getHead().getIndex() : -1; 101 102 103// newGraphINT = newGraph.getDependencyNode(i).getPredecessor() != null ? newGraph.getDependencyNode(i).getPredecessor().getIndex() : -1; 104// oldGraphINT = oldGraph.getDependencyNode(i).getPredecessor() != null ? oldGraph.getDependencyNode(i).getPredecessor().getIndex() : -1; 105 106// newGraphINT = newGraph.getTokenNode(i).getSuccessor() != null ? newGraph.getTokenNode(i).getSuccessor().getIndex() : -1; 107// oldGraphINT = oldGraph.getTokenNode(i).getSuccessor() != null ? oldGraph.getTokenNode(i).getSuccessor().getIndex() : -1; 108 109// newGraphINT = newGraph.getDependencyNode(i).getLeftDependentCount(); 110// oldGraphINT = oldGraph.getDependencyNode(i).getLeftDependentCount(); 111// 112// newGraphINT = newGraph.getDependencyNode(i).getRightDependentCount(); 113// oldGraphINT = oldGraph.getDependencyNode(i).getRightDependentCount(); 114 115// newGraphINT = newGraph.getDependencyNode(i).getRightmostDependent() != null ? newGraph.getNode(i).getRightmostDependent().getIndex() : -1; 116// oldGraphINT = oldGraph.getDependencyNode(i).getRightmostDependent() != null ? oldGraph.getDependencyNode(i).getRightmostDependent ().getIndex() : -1; 117// newGraphINT = newGraph.getDependencyNode(i).findComponent().getIndex(); 118// oldGraphINT = oldGraph.getDependencyNode(i).findComponent().getIndex(); 119// 120// newGraphINT = newGraph.getDependencyNode(i).getRank(); 121// oldGraphINT = oldGraph.getDependencyNode(i).getRank(); 122 123 124// newGraphBOOL = newGraph.getDependencyNode(i).isRoot(); 125// oldGraphBOOL = oldGraph.getDependencyNode(i).isRoot(); 126 127// newGraphBOOL = newGraph.getDependencyNode(i).hasRightDependent(); 128// oldGraphBOOL = oldGraph.getDependencyNode(i).hasRightDependent(); 129 130// newGraphBOOL = newGraph.getDependencyNode(i).hasHead(); 131// oldGraphBOOL = oldGraph.getDependencyNode(i).hasHead(); 132// if (newGraphBOOL != oldGraphBOOL) { 133// System.out.println(newGraphBOOL + "\t" + oldGraphBOOL); 134// } 135 136// newGraphSortedSet = newGraph.getNode(i).getRightDependents(); 137// oldGraphSortedSet = oldGraph.getDependencyNode(i).getLeftDependents(); 138// if (newGraphSortedSet.size() != oldGraphSortedSet.size()) { 139// System.out.println(newGraphSortedSet + "\t" + oldGraphSortedSet); 140// } else { 141// Iterator<DependencyNode> it = oldGraphSortedSet.iterator(); 142// for (Node n : newGraphSortedSet) { 143// DependencyNode o = it.next(); 144// if (n.getIndex() != o.getIndex()) { 145// System.out.println(n.getIndex() + "\t" + o.getIndex()); 146// } 147// } 148// } 149// if (newGraphINT != oldGraphINT) { 150// System.out.println(newGraphINT + "\t" + oldGraphINT); 151// } 152// } 153 154 155// System.out.println(oldGraph); 156 } 157 } catch (IOException e) { 158 e.printStackTrace(); 159 } catch (LWGraphException e) { 160 e.printStackTrace(); 161 } catch (MaltChainedException e) { 162 e.printStackTrace(); 163 } finally { 164 if (reader != null) { 165 try { 166 reader.close(); 167 } catch (IOException e) { 168 e.printStackTrace(); 169 } 170 } 171 } 172 long elapsed = System.currentTimeMillis() - startTime; 173 System.out.println("Finished init basic : " + new Formatter().format("%02d:%02d:%02d", elapsed/3600000, elapsed%3600000/60000, elapsed%60000/1000)+" ("+elapsed+" ms)"); 174 } 175 176 177}