001package org.maltparser.core.syntaxgraph.writer; 002 003import java.io.BufferedWriter; 004import java.io.FileNotFoundException; 005import java.io.FileOutputStream; 006import java.io.IOException; 007import java.io.OutputStream; 008import java.io.OutputStreamWriter; 009import java.io.UnsupportedEncodingException; 010import java.util.Iterator; 011 012import org.maltparser.core.exception.MaltChainedException; 013import org.maltparser.core.io.dataformat.ColumnDescription; 014import org.maltparser.core.io.dataformat.DataFormatException; 015import org.maltparser.core.io.dataformat.DataFormatInstance; 016import org.maltparser.core.symbol.SymbolTableHandler; 017import org.maltparser.core.syntaxgraph.DependencyStructure; 018import org.maltparser.core.syntaxgraph.TokenStructure; 019import org.maltparser.core.syntaxgraph.node.TokenNode; 020/** 021* 022* 023* @author Johan Hall 024*/ 025public class TabWriter implements SyntaxGraphWriter { 026 private BufferedWriter writer; 027 private DataFormatInstance dataFormatInstance; 028 private final StringBuilder output; 029 private boolean closeStream = true; 030// private String ID = "ID"; 031// private String IGNORE_COLUMN_SIGN = "_"; 032 private final char TAB = '\t'; 033 private final char NEWLINE = '\n'; 034 035 036 public TabWriter() { 037 output = new StringBuilder(); 038 } 039 040 public void open(String fileName, String charsetName) throws MaltChainedException { 041 try { 042 open(new OutputStreamWriter(new FileOutputStream(fileName),charsetName)); 043 } catch (FileNotFoundException e) { 044 throw new DataFormatException("The output file '"+fileName+"' cannot be found.", e); 045 } catch (UnsupportedEncodingException e) { 046 throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e); 047 } 048 } 049 050 public void open(OutputStream os, String charsetName) throws MaltChainedException { 051 try { 052 if (os == System.out || os == System.err) { 053 closeStream = false; 054 } 055 open(new OutputStreamWriter(os, charsetName)); 056 } catch (UnsupportedEncodingException e) { 057 throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e); 058 } 059 } 060 061 private void open(OutputStreamWriter osw) throws MaltChainedException { 062 setWriter(new BufferedWriter(osw)); 063 } 064 065 public void writeProlog() throws MaltChainedException { 066 067 } 068 069 public void writeSentence(TokenStructure syntaxGraph) throws MaltChainedException { 070 if (syntaxGraph == null || dataFormatInstance == null || !syntaxGraph.hasTokens()) { 071 return; 072 } 073 Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); 074 final SymbolTableHandler symbolTables = syntaxGraph.getSymbolTables(); 075 076 for (int i : syntaxGraph.getTokenIndices()) { 077 try { 078 ColumnDescription column = null; 079 while (columns.hasNext()) { 080 column = columns.next(); 081 082 if (column.getCategory() == ColumnDescription.INPUT) { // && column.getType() != ColumnDescription.IGNORE) { 083 TokenNode node = syntaxGraph.getTokenNode(i); 084 if (!column.getName().equals("ID")) { 085 if (node.hasLabel(symbolTables.getSymbolTable(column.getName()))) { 086 output.append(node.getLabelSymbol(symbolTables.getSymbolTable(column.getName()))); 087 if (output.length() != 0) { 088 writer.write(output.toString()); 089 } else { 090 writer.write('_'); 091 } 092 } else { 093 writer.write('_'); 094 } 095 } else { 096 writer.write(Integer.toString(i)); 097 } 098 } else if (column.getCategory() == ColumnDescription.HEAD /* && column.getType() != ColumnDescription.IGNORE */&& syntaxGraph instanceof DependencyStructure) { 099 if (((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHead()) { 100 writer.write(Integer.toString(((DependencyStructure)syntaxGraph).getDependencyNode(i).getHead().getIndex())); 101 } else { 102 writer.write(Integer.toString(0)); 103 } 104 105 } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL /* && column.getType() != ColumnDescription.IGNORE */ && syntaxGraph instanceof DependencyStructure) { 106 if (((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHead() && ((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHeadEdgeLabel(symbolTables.getSymbolTable(column.getName()))) { 107 output.append(((DependencyStructure)syntaxGraph).getDependencyNode(i).getHeadEdgeLabelSymbol(symbolTables.getSymbolTable(column.getName()))); 108 } else { 109 output.append(((DependencyStructure)syntaxGraph).getDefaultRootEdgeLabelSymbol(symbolTables.getSymbolTable(column.getName()))); 110 } 111 112 if (output.length() != 0) { 113 writer.write(output.toString()); 114 } 115 } else { 116 writer.write(column.getDefaultOutput()); 117 } 118 if (columns.hasNext()) { 119 writer.write(TAB); 120 } 121 output.setLength(0); 122 } 123 writer.write(NEWLINE); 124 columns = dataFormatInstance.iterator(); 125 } catch (IOException e) { 126 close(); 127 throw new DataFormatException("Could not write to the output file. ", e); 128 } 129 } 130 131 try { 132 writer.write('\n'); 133 writer.flush(); 134 } catch (IOException e) { 135 close(); 136 throw new DataFormatException("Could not write to the output file. ", e); 137 } 138 } 139 140 public void writeEpilog() throws MaltChainedException { 141 142 } 143 144 public BufferedWriter getWriter() { 145 return writer; 146 } 147 148 public void setWriter(BufferedWriter writer) throws MaltChainedException { 149 close(); 150 this.writer = writer; 151 } 152 153 public DataFormatInstance getDataFormatInstance() { 154 return dataFormatInstance; 155 } 156 157 public void setDataFormatInstance(DataFormatInstance dataFormatInstance) { 158 this.dataFormatInstance = dataFormatInstance; 159 } 160 161 public String getOptions() { 162 return null; 163 } 164 165 public void setOptions(String optionString) throws MaltChainedException { 166 167 } 168 169 public void close() throws MaltChainedException { 170 try { 171 if (writer != null) { 172 writer.flush(); 173 if (closeStream) { 174 writer.close(); 175 } 176 writer = null; 177 } 178 } catch (IOException e) { 179 throw new DataFormatException("Could not close the output file. ", e); 180 } 181 182 } 183}