001 package org.maltparser.core.syntaxgraph.writer; 002 003 import java.io.BufferedWriter; 004 import java.io.FileNotFoundException; 005 import java.io.FileOutputStream; 006 import java.io.IOException; 007 import java.io.OutputStream; 008 import java.io.OutputStreamWriter; 009 import java.io.UnsupportedEncodingException; 010 import java.util.Iterator; 011 012 import org.maltparser.core.exception.MaltChainedException; 013 import org.maltparser.core.io.dataformat.ColumnDescription; 014 import org.maltparser.core.io.dataformat.DataFormatException; 015 import org.maltparser.core.io.dataformat.DataFormatInstance; 016 import org.maltparser.core.syntaxgraph.DependencyStructure; 017 import org.maltparser.core.syntaxgraph.TokenStructure; 018 import org.maltparser.core.syntaxgraph.node.TokenNode; 019 /** 020 * 021 * 022 * @author Johan Hall 023 */ 024 public class TabWriter implements SyntaxGraphWriter { 025 private BufferedWriter writer; 026 private DataFormatInstance dataFormatInstance; 027 private final StringBuilder output; 028 029 // private String ID = "ID"; 030 // private String IGNORE_COLUMN_SIGN = "_"; 031 private final char TAB = '\t'; 032 private final char NEWLINE = '\n'; 033 034 035 public TabWriter() { 036 output = new StringBuilder(); 037 } 038 039 public void open(String fileName, String charsetName) throws MaltChainedException { 040 try { 041 open(new OutputStreamWriter(new FileOutputStream(fileName),charsetName)); 042 } catch (FileNotFoundException e) { 043 throw new DataFormatException("The output file '"+fileName+"' cannot be found.", e); 044 } catch (UnsupportedEncodingException e) { 045 throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e); 046 } 047 } 048 049 public void open(OutputStream os, String charsetName) throws MaltChainedException { 050 try { 051 open(new OutputStreamWriter(os, charsetName)); 052 } catch (UnsupportedEncodingException e) { 053 throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e); 054 } 055 } 056 057 public void open(OutputStreamWriter osw) throws MaltChainedException { 058 setWriter(new BufferedWriter(osw)); 059 } 060 061 public void writeProlog() throws MaltChainedException { 062 063 } 064 065 public void writeSentence(TokenStructure syntaxGraph) throws MaltChainedException { 066 if (syntaxGraph == null || dataFormatInstance == null || !syntaxGraph.hasTokens()) { 067 return; 068 } 069 Iterator<ColumnDescription> columns = dataFormatInstance.iterator(); 070 071 for (int i : syntaxGraph.getTokenIndices()) { 072 try { 073 ColumnDescription column = null; 074 while (columns.hasNext()) { 075 column = columns.next(); 076 077 if (column.getCategory() == ColumnDescription.INPUT && column.getType() != ColumnDescription.IGNORE) { 078 TokenNode node = syntaxGraph.getTokenNode(i); 079 if (!column.getName().equals("ID")) { 080 if (node.hasLabel(column.getSymbolTable())) { 081 output.append(node.getLabelSymbol(column.getSymbolTable())); 082 if (output.length() != 0) { 083 writer.write(output.toString()); 084 } else { 085 writer.write('_'); 086 } 087 } else { 088 writer.write('_'); 089 } 090 } else { 091 writer.write(Integer.toString(i)); 092 } 093 } else if (column.getCategory() == ColumnDescription.HEAD && column.getType() != ColumnDescription.IGNORE && syntaxGraph instanceof DependencyStructure) { 094 if (((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHead()) { 095 writer.write(Integer.toString(((DependencyStructure)syntaxGraph).getDependencyNode(i).getHead().getIndex())); 096 } else { 097 writer.write(Integer.toString(0)); 098 } 099 100 } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && column.getType() != ColumnDescription.IGNORE && syntaxGraph instanceof DependencyStructure) { 101 if (((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHead() && ((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHeadEdgeLabel(column.getSymbolTable())) { 102 output.append(((DependencyStructure)syntaxGraph).getDependencyNode(i).getHeadEdgeLabelSymbol(column.getSymbolTable())); 103 } else { 104 output.append(((DependencyStructure)syntaxGraph).getDefaultRootEdgeLabelSymbol(column.getSymbolTable())); 105 } 106 107 if (output.length() != 0) { 108 writer.write(output.toString()); 109 } 110 } else { 111 writer.write(column.getDefaultOutput()); 112 } 113 if (columns.hasNext()) { 114 writer.write(TAB); 115 } 116 output.setLength(0); 117 } 118 writer.write(NEWLINE); 119 columns = dataFormatInstance.iterator(); 120 } catch (IOException e) { 121 close(); 122 throw new DataFormatException("Could not write to the output file. ", e); 123 } 124 } 125 126 try { 127 writer.write('\n'); 128 writer.flush(); 129 } catch (IOException e) { 130 close(); 131 throw new DataFormatException("Could not write to the output file. ", e); 132 } 133 } 134 135 public void writeEpilog() throws MaltChainedException { 136 137 } 138 139 public BufferedWriter getWriter() { 140 return writer; 141 } 142 143 public void setWriter(BufferedWriter writer) throws MaltChainedException { 144 close(); 145 this.writer = writer; 146 } 147 148 public DataFormatInstance getDataFormatInstance() { 149 return dataFormatInstance; 150 } 151 152 public void setDataFormatInstance(DataFormatInstance dataFormatInstance) { 153 this.dataFormatInstance = dataFormatInstance; 154 } 155 156 public String getOptions() { 157 return null; 158 } 159 160 public void setOptions(String optionString) throws MaltChainedException { 161 162 } 163 164 public void close() throws MaltChainedException { 165 try { 166 if (writer != null) { 167 writer.flush(); 168 writer.close(); 169 writer = null; 170 } 171 } catch (IOException e) { 172 throw new DataFormatException("Could not close the output file. ", e); 173 } 174 175 } 176 }