001package org.maltparser.core.syntaxgraph.writer;
002
003import java.io.BufferedWriter;
004import java.io.FileNotFoundException;
005import java.io.FileOutputStream;
006import java.io.IOException;
007import java.io.OutputStream;
008import java.io.OutputStreamWriter;
009import java.io.UnsupportedEncodingException;
010import java.util.Iterator;
011
012import org.maltparser.core.exception.MaltChainedException;
013import org.maltparser.core.io.dataformat.ColumnDescription;
014import org.maltparser.core.io.dataformat.DataFormatException;
015import org.maltparser.core.io.dataformat.DataFormatInstance;
016import org.maltparser.core.symbol.SymbolTableHandler;
017import org.maltparser.core.syntaxgraph.DependencyStructure;
018import org.maltparser.core.syntaxgraph.TokenStructure;
019import org.maltparser.core.syntaxgraph.node.TokenNode;
020/**
021*
022*
023* @author Johan Hall
024*/
025public class TabWriter implements SyntaxGraphWriter {
026        private BufferedWriter writer;
027        private DataFormatInstance dataFormatInstance;
028        private final StringBuilder output;
029        private boolean closeStream = true;
030//      private String ID = "ID";
031//      private String IGNORE_COLUMN_SIGN = "_";
032        private final char TAB = '\t';
033        private final char NEWLINE = '\n';
034
035        
036        public TabWriter() { 
037                output = new StringBuilder();
038        }
039        
040        public void open(String fileName, String charsetName) throws MaltChainedException {
041                try {
042                        open(new OutputStreamWriter(new FileOutputStream(fileName),charsetName));
043                } catch (FileNotFoundException e) {
044                        throw new DataFormatException("The output file '"+fileName+"' cannot be found.", e);
045                } catch (UnsupportedEncodingException e) {
046                        throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e);
047                }       
048        }
049        
050        public void open(OutputStream os, String charsetName) throws MaltChainedException {
051                try {
052                        if (os == System.out || os == System.err) {
053                                closeStream = false;
054                        }
055                        open(new OutputStreamWriter(os, charsetName));
056                } catch (UnsupportedEncodingException e) {
057                        throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e);
058                }
059        }
060        
061        private void open(OutputStreamWriter osw) throws MaltChainedException {
062                setWriter(new BufferedWriter(osw));
063        }
064        
065        public void writeProlog() throws MaltChainedException {
066                
067        }
068        
069        public void writeSentence(TokenStructure syntaxGraph) throws MaltChainedException {
070                if (syntaxGraph == null || dataFormatInstance == null || !syntaxGraph.hasTokens()) {
071                        return;
072                }
073                Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
074                final SymbolTableHandler symbolTables = syntaxGraph.getSymbolTables();
075                
076                for (int i : syntaxGraph.getTokenIndices()) {
077                        try {
078                                ColumnDescription column = null;
079                                while (columns.hasNext()) {
080                                        column = columns.next();
081
082                                        if (column.getCategory() == ColumnDescription.INPUT) { // && column.getType() != ColumnDescription.IGNORE) {
083                                                TokenNode node = syntaxGraph.getTokenNode(i); 
084                                                if (!column.getName().equals("ID")) {
085                                                        if (node.hasLabel(symbolTables.getSymbolTable(column.getName()))) {
086                                                                output.append(node.getLabelSymbol(symbolTables.getSymbolTable(column.getName())));
087                                                                if (output.length() != 0) {
088                                                                        writer.write(output.toString());
089                                                                } else {
090                                                                        writer.write('_');
091                                                                }
092                                                        } else {
093                                                                writer.write('_');
094                                                        }
095                                                } else {
096                                                        writer.write(Integer.toString(i));
097                                                }
098                                        } else if (column.getCategory() == ColumnDescription.HEAD /* && column.getType() != ColumnDescription.IGNORE */&& syntaxGraph instanceof DependencyStructure) {
099                                                if (((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHead()) {
100                                                        writer.write(Integer.toString(((DependencyStructure)syntaxGraph).getDependencyNode(i).getHead().getIndex()));
101                                                } else {
102                                                        writer.write(Integer.toString(0));
103                                                }
104                                                
105                                        } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL /* && column.getType() != ColumnDescription.IGNORE */ && syntaxGraph instanceof DependencyStructure) {
106                                                if (((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHead() && ((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHeadEdgeLabel(symbolTables.getSymbolTable(column.getName()))) {
107                                                        output.append(((DependencyStructure)syntaxGraph).getDependencyNode(i).getHeadEdgeLabelSymbol(symbolTables.getSymbolTable(column.getName())));
108                                                } else {
109                                                        output.append(((DependencyStructure)syntaxGraph).getDefaultRootEdgeLabelSymbol(symbolTables.getSymbolTable(column.getName())));
110                                                }
111                                                
112                                                if (output.length() != 0) {
113                                                        writer.write(output.toString());
114                                                }
115                                        } else {
116                                                writer.write(column.getDefaultOutput());
117                                        }
118                                        if (columns.hasNext()) {
119                                                writer.write(TAB);
120                                        }
121                                        output.setLength(0);
122                                }
123                                writer.write(NEWLINE);
124                                columns = dataFormatInstance.iterator();
125                        } catch (IOException e) {
126                                close();
127                                throw new DataFormatException("Could not write to the output file. ", e);
128                        }
129                }
130                
131                try {
132                        writer.write('\n');
133                        writer.flush();
134                } catch (IOException e) {
135                        close();
136                        throw new DataFormatException("Could not write to the output file. ", e);
137                }
138        }
139        
140        public void writeEpilog() throws MaltChainedException  {
141                
142        }
143        
144        public BufferedWriter getWriter() {
145                return writer;
146        }
147
148        public void setWriter(BufferedWriter writer) throws MaltChainedException  {
149                close();
150                this.writer = writer;
151        }
152        
153        public DataFormatInstance getDataFormatInstance() {
154                return dataFormatInstance;
155        }
156
157        public void setDataFormatInstance(DataFormatInstance dataFormatInstance) {
158                this.dataFormatInstance = dataFormatInstance;
159        }
160
161        public String getOptions() {
162                return null;
163        }
164        
165        public void setOptions(String optionString) throws MaltChainedException {
166                
167        }
168        
169        public void close() throws MaltChainedException {
170                try {
171                        if (writer != null) {
172                                writer.flush();
173                                if (closeStream) {
174                                        writer.close();
175                                }
176                                writer = null;
177                        }
178                }   catch (IOException e) {
179                        throw new DataFormatException("Could not close the output file. ", e);
180                } 
181
182        }
183}