001    package org.maltparser.core.syntaxgraph.writer;
002    
003    import java.io.BufferedWriter;
004    import java.io.FileNotFoundException;
005    import java.io.FileOutputStream;
006    import java.io.IOException;
007    import java.io.OutputStream;
008    import java.io.OutputStreamWriter;
009    import java.io.UnsupportedEncodingException;
010    import java.util.Iterator;
011    
012    import org.maltparser.core.exception.MaltChainedException;
013    import org.maltparser.core.io.dataformat.ColumnDescription;
014    import org.maltparser.core.io.dataformat.DataFormatException;
015    import org.maltparser.core.io.dataformat.DataFormatInstance;
016    import org.maltparser.core.syntaxgraph.DependencyStructure;
017    import org.maltparser.core.syntaxgraph.TokenStructure;
018    import org.maltparser.core.syntaxgraph.node.TokenNode;
019    /**
020    *
021    *
022    * @author Johan Hall
023    */
024    public class TabWriter implements SyntaxGraphWriter {
025            private BufferedWriter writer;
026            private DataFormatInstance dataFormatInstance;
027            private final StringBuilder output;
028            
029    //      private String ID = "ID";
030    //      private String IGNORE_COLUMN_SIGN = "_";
031            private final char TAB = '\t';
032            private final char NEWLINE = '\n';
033    
034            
035            public TabWriter() { 
036                    output = new StringBuilder();
037            }
038            
039            public void open(String fileName, String charsetName) throws MaltChainedException {
040                    try {
041                            open(new OutputStreamWriter(new FileOutputStream(fileName),charsetName));
042                    } catch (FileNotFoundException e) {
043                            throw new DataFormatException("The output file '"+fileName+"' cannot be found.", e);
044                    } catch (UnsupportedEncodingException e) {
045                            throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e);
046                    }       
047            }
048            
049            public void open(OutputStream os, String charsetName) throws MaltChainedException {
050                    try {
051                            open(new OutputStreamWriter(os, charsetName));
052                    } catch (UnsupportedEncodingException e) {
053                            throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e);
054                    }
055            }
056            
057            public void open(OutputStreamWriter osw) throws MaltChainedException {
058                    setWriter(new BufferedWriter(osw));
059            }
060            
061            public void writeProlog() throws MaltChainedException {
062                    
063            }
064            
065            public void writeSentence(TokenStructure syntaxGraph) throws MaltChainedException {
066                    if (syntaxGraph == null || dataFormatInstance == null || !syntaxGraph.hasTokens()) {
067                            return;
068                    }
069                    Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
070                    
071                    for (int i : syntaxGraph.getTokenIndices()) {
072                            try {
073                                    ColumnDescription column = null;
074                                    while (columns.hasNext()) {
075                                            column = columns.next();
076    
077                                            if (column.getCategory() == ColumnDescription.INPUT && column.getType() != ColumnDescription.IGNORE) {
078                                                    TokenNode node = syntaxGraph.getTokenNode(i); 
079                                                    if (!column.getName().equals("ID")) {
080                                                            if (node.hasLabel(column.getSymbolTable())) {
081                                                                    output.append(node.getLabelSymbol(column.getSymbolTable()));
082                                                                    if (output.length() != 0) {
083                                                                            writer.write(output.toString());
084                                                                    } else {
085                                                                            writer.write('_');
086                                                                    }
087                                                            } else {
088                                                                    writer.write('_');
089                                                            }
090                                                    } else {
091                                                            writer.write(Integer.toString(i));
092                                                    }
093                                            } else if (column.getCategory() == ColumnDescription.HEAD && column.getType() != ColumnDescription.IGNORE && syntaxGraph instanceof DependencyStructure) {
094                                                    if (((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHead()) {
095                                                            writer.write(Integer.toString(((DependencyStructure)syntaxGraph).getDependencyNode(i).getHead().getIndex()));
096                                                    } else {
097                                                            writer.write(Integer.toString(0));
098                                                    }
099                                                    
100                                            } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && column.getType() != ColumnDescription.IGNORE && syntaxGraph instanceof DependencyStructure) {
101                                                    if (((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHead() && ((DependencyStructure)syntaxGraph).getDependencyNode(i).hasHeadEdgeLabel(column.getSymbolTable())) {
102                                                            output.append(((DependencyStructure)syntaxGraph).getDependencyNode(i).getHeadEdgeLabelSymbol(column.getSymbolTable()));
103                                                    } else {
104                                                            output.append(((DependencyStructure)syntaxGraph).getDefaultRootEdgeLabelSymbol(column.getSymbolTable()));
105                                                    }
106                                                    
107                                                    if (output.length() != 0) {
108                                                            writer.write(output.toString());
109                                                    }
110                                            } else {
111                                                    writer.write(column.getDefaultOutput());
112                                            }
113                                            if (columns.hasNext()) {
114                                                    writer.write(TAB);
115                                            }
116                                            output.setLength(0);
117                                    }
118                                    writer.write(NEWLINE);
119                                    columns = dataFormatInstance.iterator();
120                            } catch (IOException e) {
121                                    close();
122                                    throw new DataFormatException("Could not write to the output file. ", e);
123                            }
124                    }
125                    
126                    try {
127                            writer.write('\n');
128                    } catch (IOException e) {
129                            close();
130                            throw new DataFormatException("Could not write to the output file. ", e);
131                    }
132            }
133            
134            public void writeEpilog() throws MaltChainedException  {
135                    
136            }
137            
138            public BufferedWriter getWriter() {
139                    return writer;
140            }
141    
142            public void setWriter(BufferedWriter writer) throws MaltChainedException  {
143                    close();
144                    this.writer = writer;
145            }
146            
147            public DataFormatInstance getDataFormatInstance() {
148                    return dataFormatInstance;
149            }
150    
151            public void setDataFormatInstance(DataFormatInstance dataFormatInstance) {
152                    this.dataFormatInstance = dataFormatInstance;
153            }
154    
155            public String getOptions() {
156                    return null;
157            }
158            
159            public void setOptions(String optionString) throws MaltChainedException {
160                    
161            }
162            
163            public void close() throws MaltChainedException {
164                    try {
165                            if (writer != null) {
166                                    writer.flush();
167                                    writer.close();
168                                    writer = null;
169                            }
170                    }   catch (IOException e) {
171                            throw new DataFormatException("Could not close the output file. ", e);
172                    } 
173    
174            }
175    }