001package org.maltparser.core.syntaxgraph.writer; 002 003import java.io.BufferedWriter; 004import java.io.FileNotFoundException; 005import java.io.FileOutputStream; 006import java.io.IOException; 007import java.io.OutputStream; 008import java.io.OutputStreamWriter; 009import java.io.UnsupportedEncodingException; 010import java.util.ArrayList; 011import java.util.Iterator; 012 013import org.maltparser.core.exception.MaltChainedException; 014import org.maltparser.core.io.dataformat.ColumnDescription; 015import org.maltparser.core.io.dataformat.DataFormatException; 016import org.maltparser.core.io.dataformat.DataFormatInstance; 017import org.maltparser.core.symbol.SymbolTableHandler; 018import org.maltparser.core.syntaxgraph.DependencyStructure; 019import org.maltparser.core.syntaxgraph.TokenStructure; 020import org.maltparser.core.syntaxgraph.node.TokenNode; 021/** 022* 023* 024* @author Johan Hall 025*/ 026public class TabWriter implements SyntaxGraphWriter { 027 private BufferedWriter writer; 028 private DataFormatInstance dataFormatInstance; 029 private final StringBuilder output; 030 private boolean closeStream = true; 031// private String ID = "ID"; 032// private String IGNORE_COLUMN_SIGN = "_"; 033 private final char TAB = '\t'; 034 private final char NEWLINE = '\n'; 035 036 037 public TabWriter() { 038 output = new StringBuilder(); 039 } 040 041 public void open(String fileName, String charsetName) throws MaltChainedException { 042 try { 043 open(new OutputStreamWriter(new FileOutputStream(fileName),charsetName)); 044 } catch (FileNotFoundException e) { 045 throw new DataFormatException("The output file '"+fileName+"' cannot be found.", e); 046 } catch (UnsupportedEncodingException e) { 047 throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e); 048 } 049 } 050 051 public void open(OutputStream os, String charsetName) throws MaltChainedException { 052 try { 053 if (os == System.out || os == System.err) { 054 closeStream = false; 055 } 056 open(new OutputStreamWriter(os, charsetName)); 057 } catch (UnsupportedEncodingException e) { 058 throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e); 059 } 060 } 061 062 private void open(OutputStreamWriter osw) throws MaltChainedException { 063 setWriter(new BufferedWriter(osw)); 064 } 065 066 public void writeProlog() throws MaltChainedException { 067 068 } 069 070 public void writeComments(TokenStructure syntaxGraph, int at_index) throws MaltChainedException { 071 ArrayList<String> commentList = syntaxGraph.getComment(at_index); 072 if (commentList != null) { 073 try { 074 for (int i = 0; i < commentList.size(); i++) { 075 writer.write(commentList.get(i)); 076 writer.write(NEWLINE); 077 } 078 } catch (IOException e) { 079 close(); 080 throw new DataFormatException("Could not write to the output file. ", e); 081 } 082 } 083 } 084 085 public void writeSentence(TokenStructure syntaxGraph) throws MaltChainedException { 086 if (syntaxGraph == null || dataFormatInstance == null || !syntaxGraph.hasTokens()) { 087 return; 088 } 089 Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); 090 final SymbolTableHandler symbolTables = syntaxGraph.getSymbolTables(); 091 092 for (int i : syntaxGraph.getTokenIndices()) { 093 writeComments(syntaxGraph, i); 094 try { 095 ColumnDescription column = null; 096 while (columns.hasNext()) { 097 column = columns.next(); 098 099 if (column.getCategory() == ColumnDescription.INPUT) { // && column.getType() != ColumnDescription.IGNORE) { 100 TokenNode node = syntaxGraph.getTokenNode(i); 101 if (!column.getName().equals("ID")) { 102 if (node.hasLabel(symbolTables.getSymbolTable(column.getName()))) { 103 output.append(node.getLabelSymbol(symbolTables.getSymbolTable(column.getName()))); 104 if (output.length() != 0) { 105 writer.write(output.toString()); 106 } else { 107 writer.write('_'); 108 } 109 } else { 110 writer.write('_'); 111 } 112 } else { 113 writer.write(Integer.toString(i)); 114 } 115 } else if (column.getCategory() == ColumnDescription.HEAD /* && column.getType() != ColumnDescription.IGNORE */&& syntaxGraph instanceof DependencyStructure) { 116 if (((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHead()) { 117 writer.write(Integer.toString(((DependencyStructure)syntaxGraph).getDependencyNode(i).getHead().getIndex())); 118 } else { 119 writer.write(Integer.toString(0)); 120 } 121 122 } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL /* && column.getType() != ColumnDescription.IGNORE */ && syntaxGraph instanceof DependencyStructure) { 123 if (((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHead() && ((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHeadEdgeLabel(symbolTables.getSymbolTable(column.getName()))) { 124 output.append(((DependencyStructure)syntaxGraph).getDependencyNode(i).getHeadEdgeLabelSymbol(symbolTables.getSymbolTable(column.getName()))); 125 } else { 126 output.append(((DependencyStructure)syntaxGraph).getDefaultRootEdgeLabelSymbol(symbolTables.getSymbolTable(column.getName()))); 127 } 128 129 if (output.length() != 0) { 130 writer.write(output.toString()); 131 } 132 } else { 133 writer.write(column.getDefaultOutput()); 134 } 135 if (columns.hasNext()) { 136 writer.write(TAB); 137 } 138 output.setLength(0); 139 } 140 writer.write(NEWLINE); 141 columns = dataFormatInstance.iterator(); 142 } catch (IOException e) { 143 close(); 144 throw new DataFormatException("Could not write to the output file. ", e); 145 } 146 } 147 writeComments(syntaxGraph, syntaxGraph.nTokenNode() + 1); 148 try { 149 writer.write('\n'); 150 writer.flush(); 151 } catch (IOException e) { 152 close(); 153 throw new DataFormatException("Could not write to the output file. ", e); 154 } 155 } 156 157 public void writeEpilog() throws MaltChainedException { 158 159 } 160 161 public BufferedWriter getWriter() { 162 return writer; 163 } 164 165 public void setWriter(BufferedWriter writer) throws MaltChainedException { 166 close(); 167 this.writer = writer; 168 } 169 170 public DataFormatInstance getDataFormatInstance() { 171 return dataFormatInstance; 172 } 173 174 public void setDataFormatInstance(DataFormatInstance dataFormatInstance) { 175 this.dataFormatInstance = dataFormatInstance; 176 } 177 178 public String getOptions() { 179 return null; 180 } 181 182 public void setOptions(String optionString) throws MaltChainedException { 183 184 } 185 186 public void close() throws MaltChainedException { 187 try { 188 if (writer != null) { 189 writer.flush(); 190 if (closeStream) { 191 writer.close(); 192 } 193 writer = null; 194 } 195 } catch (IOException e) { 196 throw new DataFormatException("Could not close the output file. ", e); 197 } 198 199 } 200}