001 package org.maltparser.core.syntaxgraph.writer; 002 003 import java.io.BufferedWriter; 004 import java.io.FileNotFoundException; 005 import java.io.FileOutputStream; 006 import java.io.IOException; 007 import java.io.OutputStream; 008 import java.io.OutputStreamWriter; 009 import java.io.UnsupportedEncodingException; 010 import java.util.SortedMap; 011 012 import org.maltparser.core.exception.MaltChainedException; 013 import org.maltparser.core.io.dataformat.ColumnDescription; 014 import org.maltparser.core.io.dataformat.DataFormatException; 015 import org.maltparser.core.io.dataformat.DataFormatInstance; 016 import org.maltparser.core.symbol.SymbolTable; 017 import org.maltparser.core.syntaxgraph.PhraseStructure; 018 import org.maltparser.core.syntaxgraph.TokenStructure; 019 import org.maltparser.core.syntaxgraph.node.NonTerminalNode; 020 import org.maltparser.core.syntaxgraph.node.PhraseStructureNode; 021 import org.maltparser.core.syntaxgraph.node.TokenNode; 022 /** 023 * 024 * 025 * @author Johan Hall 026 */ 027 public class BracketWriter implements SyntaxGraphWriter { 028 private BufferedWriter writer; 029 private DataFormatInstance dataFormatInstance; 030 private SortedMap<String,ColumnDescription> inputColumns; 031 private SortedMap<String,ColumnDescription> edgeLabelColumns; 032 private SortedMap<String,ColumnDescription> phraseLabelColumns; 033 private char STARTING_BRACKET = '('; 034 private String EMPTY_EDGELABEL = "??"; 035 private char CLOSING_BRACKET = ')'; 036 private char INPUT_SEPARATOR = ' '; 037 private char EDGELABEL_SEPARATOR = '-'; 038 private char SENTENCE_SEPARATOR = '\n'; 039 private String optionString; 040 041 public BracketWriter() { 042 } 043 044 public void open(String fileName, String charsetName) throws MaltChainedException { 045 try { 046 open(new OutputStreamWriter(new FileOutputStream(fileName),charsetName)); 047 } catch (FileNotFoundException e) { 048 throw new DataFormatException("The output file '"+fileName+"' cannot be found.", e); 049 } catch (UnsupportedEncodingException e) { 050 throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e); 051 } 052 } 053 054 public void open(OutputStream os, String charsetName) throws MaltChainedException { 055 try { 056 open(new OutputStreamWriter(os, charsetName)); 057 } catch (UnsupportedEncodingException e) { 058 throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e); 059 } 060 } 061 062 public void open(OutputStreamWriter osw) throws MaltChainedException { 063 setWriter(new BufferedWriter(osw)); 064 } 065 066 public void writeEpilog() throws MaltChainedException { 067 068 } 069 070 public void writeProlog() throws MaltChainedException { 071 072 } 073 074 public void writeSentence(TokenStructure syntaxGraph) throws MaltChainedException { 075 if (syntaxGraph == null || dataFormatInstance == null) { 076 return; 077 } 078 if (syntaxGraph instanceof PhraseStructure && syntaxGraph.hasTokens()) { 079 PhraseStructure phraseStructure = ((PhraseStructure) syntaxGraph); 080 writeElement(((PhraseStructure) syntaxGraph).getPhraseStructureRoot()); 081 try { 082 writer.write(SENTENCE_SEPARATOR); 083 writer.flush(); 084 } catch (IOException e) { 085 close(); 086 throw new DataFormatException("Could not write to the output file. ", e); 087 } 088 } 089 } 090 091 092 private void writeElement(PhraseStructureNode element) throws MaltChainedException { 093 try { 094 if (element instanceof TokenNode) { 095 PhraseStructureNode t = (PhraseStructureNode)element; 096 SymbolTable table = null; 097 writer.write(STARTING_BRACKET); 098 int i = 0; 099 for (String inputColumn : inputColumns.keySet()) { 100 if (i != 0) { 101 writer.write(INPUT_SEPARATOR); 102 } 103 table = inputColumns.get(inputColumn).getSymbolTable(); 104 if (t.hasLabel(table)) { 105 writer.write(t.getLabelSymbol(table)); 106 } 107 if (i == 0) { 108 for (String edgeLabelColumn : edgeLabelColumns.keySet()) { 109 table = edgeLabelColumns.get(edgeLabelColumn).getSymbolTable(); 110 if (t.hasParentEdgeLabel(table) && !t.getParent().isRoot() && !t.getParentEdgeLabelSymbol(table).equals(EMPTY_EDGELABEL)) { 111 writer.write(EDGELABEL_SEPARATOR); 112 writer.write(t.getParentEdgeLabelSymbol(table)); 113 } 114 } 115 } 116 i++; 117 } 118 writer.write(CLOSING_BRACKET); 119 } else { 120 NonTerminalNode nt = (NonTerminalNode)element; 121 writer.write(STARTING_BRACKET); 122 SymbolTable table = null; 123 int i = 0; 124 for (String phraseLabelColumn : phraseLabelColumns.keySet()) { 125 if (i != 0) { 126 writer.write(INPUT_SEPARATOR); 127 } 128 table = phraseLabelColumns.get(phraseLabelColumn).getSymbolTable(); 129 if (nt.hasLabel(table)) { 130 writer.write(nt.getLabelSymbol(table)); 131 } 132 if (i == 0) { 133 for (String edgeLabelColumn : edgeLabelColumns.keySet()) { 134 table = edgeLabelColumns.get(edgeLabelColumn).getSymbolTable(); 135 if (nt.hasParentEdgeLabel(table) && !nt.getParent().isRoot() && !nt.getParentEdgeLabelSymbol(table).equals(EMPTY_EDGELABEL)) { 136 writer.write(EDGELABEL_SEPARATOR); 137 writer.write(nt.getParentEdgeLabelSymbol(table)); 138 } 139 } 140 } 141 i++; 142 } 143 for (PhraseStructureNode node : ((NonTerminalNode)element).getChildren()) { 144 writeElement(node); 145 } 146 writer.write(CLOSING_BRACKET); 147 } 148 } catch (IOException e) { 149 throw new DataFormatException("Could not write to the output file. ", e); 150 } 151 } 152 153 public BufferedWriter getWriter() { 154 return writer; 155 } 156 157 public void setWriter(BufferedWriter writer) throws MaltChainedException { 158 close(); 159 this.writer = writer; 160 } 161 162 public DataFormatInstance getDataFormatInstance() { 163 return dataFormatInstance; 164 } 165 166 public void setDataFormatInstance(DataFormatInstance dataFormatInstance) { 167 this.dataFormatInstance = dataFormatInstance; 168 inputColumns = dataFormatInstance.getInputColumnDescriptions(); 169 edgeLabelColumns = dataFormatInstance.getPhraseStructureEdgeLabelColumnDescriptions(); 170 phraseLabelColumns = dataFormatInstance.getPhraseStructureNodeLabelColumnDescriptions(); 171 } 172 173 public String getOptions() { 174 return optionString; 175 } 176 177 public void setOptions(String optionString) throws MaltChainedException { 178 this.optionString = optionString; 179 } 180 181 public void close() throws MaltChainedException { 182 try { 183 if (writer != null) { 184 writer.flush(); 185 writer.close(); 186 writer = null; 187 } 188 } catch (IOException e) { 189 throw new DataFormatException("Could not close the output file. ", e); 190 } 191 192 } 193 }