001package org.maltparser.core.syntaxgraph.writer;
002
003import java.io.BufferedWriter;
004import java.io.FileNotFoundException;
005import java.io.FileOutputStream;
006import java.io.IOException;
007import java.io.OutputStream;
008import java.io.OutputStreamWriter;
009import java.io.UnsupportedEncodingException;
010import java.util.ArrayList;
011import java.util.Iterator;
012
013import org.maltparser.core.exception.MaltChainedException;
014import org.maltparser.core.io.dataformat.ColumnDescription;
015import org.maltparser.core.io.dataformat.DataFormatException;
016import org.maltparser.core.io.dataformat.DataFormatInstance;
017import org.maltparser.core.symbol.SymbolTableHandler;
018import org.maltparser.core.syntaxgraph.DependencyStructure;
019import org.maltparser.core.syntaxgraph.TokenStructure;
020import org.maltparser.core.syntaxgraph.node.TokenNode;
021/**
022*
023*
024* @author Johan Hall
025*/
026public class TabWriter implements SyntaxGraphWriter {
027        private BufferedWriter writer;
028        private DataFormatInstance dataFormatInstance;
029        private final StringBuilder output;
030        private boolean closeStream = true;
031//      private String ID = "ID";
032//      private String IGNORE_COLUMN_SIGN = "_";
033        private final char TAB = '\t';
034        private final char NEWLINE = '\n';
035
036        
037        public TabWriter() { 
038                output = new StringBuilder();
039        }
040        
041        public void open(String fileName, String charsetName) throws MaltChainedException {
042                try {
043                        open(new OutputStreamWriter(new FileOutputStream(fileName),charsetName));
044                } catch (FileNotFoundException e) {
045                        throw new DataFormatException("The output file '"+fileName+"' cannot be found.", e);
046                } catch (UnsupportedEncodingException e) {
047                        throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e);
048                }       
049        }
050        
051        public void open(OutputStream os, String charsetName) throws MaltChainedException {
052                try {
053                        if (os == System.out || os == System.err) {
054                                closeStream = false;
055                        }
056                        open(new OutputStreamWriter(os, charsetName));
057                } catch (UnsupportedEncodingException e) {
058                        throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e);
059                }
060        }
061        
062        private void open(OutputStreamWriter osw) throws MaltChainedException {
063                setWriter(new BufferedWriter(osw));
064        }
065        
066        public void writeProlog() throws MaltChainedException {
067                
068        }
069        
070        public void writeComments(TokenStructure syntaxGraph, int at_index) throws MaltChainedException {
071                ArrayList<String> commentList = syntaxGraph.getComment(at_index);
072                if (commentList != null) {
073                        try {
074                                for (int i = 0; i < commentList.size(); i++) {
075                                        writer.write(commentList.get(i));
076                                        writer.write(NEWLINE);
077                                }
078                        } catch (IOException e) {
079                                close();
080                                throw new DataFormatException("Could not write to the output file. ", e);
081                        }
082                }
083        }
084        
085        public void writeSentence(TokenStructure syntaxGraph) throws MaltChainedException {
086                if (syntaxGraph == null || dataFormatInstance == null || !syntaxGraph.hasTokens()) {
087                        return;
088                }
089                Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
090                final SymbolTableHandler symbolTables = syntaxGraph.getSymbolTables();
091                
092                for (int i : syntaxGraph.getTokenIndices()) {
093                        writeComments(syntaxGraph, i);
094                        try {
095                                ColumnDescription column = null;
096                                while (columns.hasNext()) {
097                                        column = columns.next();
098
099                                        if (column.getCategory() == ColumnDescription.INPUT) { // && column.getType() != ColumnDescription.IGNORE) {
100                                                TokenNode node = syntaxGraph.getTokenNode(i); 
101                                                if (!column.getName().equals("ID")) {
102                                                        if (node.hasLabel(symbolTables.getSymbolTable(column.getName()))) {
103                                                                output.append(node.getLabelSymbol(symbolTables.getSymbolTable(column.getName())));
104                                                                if (output.length() != 0) {
105                                                                        writer.write(output.toString());
106                                                                } else {
107                                                                        writer.write('_');
108                                                                }
109                                                        } else {
110                                                                writer.write('_');
111                                                        }
112                                                } else {
113                                                        writer.write(Integer.toString(i));
114                                                }
115                                        } else if (column.getCategory() == ColumnDescription.HEAD /* && column.getType() != ColumnDescription.IGNORE */&& syntaxGraph instanceof DependencyStructure) {
116                                                if (((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHead()) {
117                                                        writer.write(Integer.toString(((DependencyStructure)syntaxGraph).getDependencyNode(i).getHead().getIndex()));
118                                                } else {
119                                                        writer.write(Integer.toString(0));
120                                                }
121                                                
122                                        } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL /* && column.getType() != ColumnDescription.IGNORE */ && syntaxGraph instanceof DependencyStructure) {
123                                                if (((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHead() && ((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHeadEdgeLabel(symbolTables.getSymbolTable(column.getName()))) {
124                                                        output.append(((DependencyStructure)syntaxGraph).getDependencyNode(i).getHeadEdgeLabelSymbol(symbolTables.getSymbolTable(column.getName())));
125                                                } else {
126                                                        output.append(((DependencyStructure)syntaxGraph).getDefaultRootEdgeLabelSymbol(symbolTables.getSymbolTable(column.getName())));
127                                                }
128                                                
129                                                if (output.length() != 0) {
130                                                        writer.write(output.toString());
131                                                }
132                                        } else {
133                                                writer.write(column.getDefaultOutput());
134                                        }
135                                        if (columns.hasNext()) {
136                                                writer.write(TAB);
137                                        }
138                                        output.setLength(0);
139                                }
140                                writer.write(NEWLINE);
141                                columns = dataFormatInstance.iterator();
142                        } catch (IOException e) {
143                                close();
144                                throw new DataFormatException("Could not write to the output file. ", e);
145                        }
146                }
147                writeComments(syntaxGraph, syntaxGraph.nTokenNode() + 1);
148                try {
149                        writer.write('\n');
150                        writer.flush();
151                } catch (IOException e) {
152                        close();
153                        throw new DataFormatException("Could not write to the output file. ", e);
154                }
155        }
156        
157        public void writeEpilog() throws MaltChainedException  {
158                
159        }
160        
161        public BufferedWriter getWriter() {
162                return writer;
163        }
164
165        public void setWriter(BufferedWriter writer) throws MaltChainedException  {
166                close();
167                this.writer = writer;
168        }
169        
170        public DataFormatInstance getDataFormatInstance() {
171                return dataFormatInstance;
172        }
173
174        public void setDataFormatInstance(DataFormatInstance dataFormatInstance) {
175                this.dataFormatInstance = dataFormatInstance;
176        }
177
178        public String getOptions() {
179                return null;
180        }
181        
182        public void setOptions(String optionString) throws MaltChainedException {
183                
184        }
185        
186        public void close() throws MaltChainedException {
187                try {
188                        if (writer != null) {
189                                writer.flush();
190                                if (closeStream) {
191                                        writer.close();
192                                }
193                                writer = null;
194                        }
195                }   catch (IOException e) {
196                        throw new DataFormatException("Could not close the output file. ", e);
197                } 
198
199        }
200}