001    package org.maltparser.core.syntaxgraph.reader;
002    
003    import java.io.BufferedReader;
004    import java.io.File;
005    import java.io.FileInputStream;
006    import java.io.FileNotFoundException;
007    import java.io.IOException;
008    import java.io.InputStream;
009    import java.io.InputStreamReader;
010    import java.io.UnsupportedEncodingException;
011    import java.net.URL;
012    import java.util.Iterator;
013    
014    import org.maltparser.core.exception.MaltChainedException;
015    import org.maltparser.core.io.dataformat.ColumnDescription;
016    import org.maltparser.core.io.dataformat.DataFormatException;
017    import org.maltparser.core.io.dataformat.DataFormatInstance;
018    import org.maltparser.core.syntaxgraph.DependencyStructure;
019    import org.maltparser.core.syntaxgraph.Element;
020    import org.maltparser.core.syntaxgraph.TokenStructure;
021    import org.maltparser.core.syntaxgraph.edge.Edge;
022    /**
023    *
024    *
025    * @author Johan Hall
026    */
027    public class TabReader implements SyntaxGraphReader {
028            private BufferedReader reader;
029            private int sentenceCount;
030            private final StringBuilder input;
031            private DataFormatInstance dataFormatInstance;
032            private static final String IGNORE_COLUMN_SIGN = "_";
033            private static final char TAB = '\t';
034            private static final char NEWLINE = '\n';
035            private static final char CARRIAGE_RETURN = '\r';
036            private String fileName = null;
037            private URL url = null;
038            private String charsetName;
039            private int nIterations;
040            private int cIterations;
041            
042            
043            public TabReader() { 
044                    input = new StringBuilder();
045                    nIterations = 1;
046                    cIterations = 1;
047            }
048            
049            private void reopen() throws MaltChainedException {
050                    close();
051                    if (fileName != null) {
052                            open(fileName, charsetName);
053                    } else if (url != null) {
054                            open(url, charsetName);
055                    } else {
056                            throw new DataFormatException("The input stream cannot be reopen. ");
057                    }
058            }
059            
060            public void open(String fileName, String charsetName) throws MaltChainedException {
061                    setFileName(fileName);
062                    setCharsetName(charsetName);
063                    try {
064                            open(new FileInputStream(fileName), charsetName);
065                    } catch (FileNotFoundException e) {
066                            throw new DataFormatException("The input file '"+fileName+"' cannot be found. ", e);
067                    }
068            }
069            
070            public void open(URL url, String charsetName) throws MaltChainedException {
071                    setUrl(url);
072                    setCharsetName(charsetName);
073                    if (url == null) {
074                            throw new DataFormatException("The input file cannot be found. ");
075                    }
076                    try {
077                            open(url.openStream(), charsetName);
078                    } catch (IOException e) {
079                            throw new DataFormatException("The URL '"+url.toString()+"' cannot be opened. ", e);
080                    }
081            }
082            
083            public void open(InputStream is, String charsetName) throws MaltChainedException {
084                    try {
085                            open(new InputStreamReader(is, charsetName));
086                    } catch (UnsupportedEncodingException e) {
087                            throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported. ", e);
088                    }
089            }
090            
091            public void open(InputStreamReader isr) throws MaltChainedException {
092                    setReader(new BufferedReader(isr));
093                    setSentenceCount(0);
094            }
095            
096            public void readProlog() throws MaltChainedException {
097                    
098            }
099            
100            public boolean readSentence(TokenStructure syntaxGraph) throws MaltChainedException  {
101                    if (syntaxGraph == null || dataFormatInstance == null) {
102                            return false;
103                    }
104                    
105                    Element node = null;
106                    Edge edge = null;
107                    input.setLength(0);
108                    int i = 0;
109                    int terminalCounter = 0;
110                    int nNewLines = 0;
111                    syntaxGraph.clear();
112                    Iterator<ColumnDescription> columns = dataFormatInstance.iterator();
113                    while (true) {
114                            int c;
115    
116                            try {
117                                    c = reader.read();
118                            } catch (IOException e) {
119                                    close();
120                                    throw new DataFormatException("Error when reading from the input file. ", e);
121                            }
122                            if (c == TAB || c == NEWLINE || c == CARRIAGE_RETURN || c == -1) {
123                                    if (input.length() != 0) {                                      
124                                            if (i == 0) {
125                                                    terminalCounter++;
126                                                    node = syntaxGraph.addTokenNode(terminalCounter);
127                                            }
128                                            ColumnDescription column = null;
129                                            if (columns.hasNext()) {
130                                                    column = columns.next();
131                                                    if (column.getCategory() == ColumnDescription.INPUT && node != null) {
132                                                            syntaxGraph.addLabel(node, column.getName(), input.toString());
133                                                    } else if (column.getCategory() == ColumnDescription.HEAD) {
134                                                            if (syntaxGraph instanceof DependencyStructure) {
135                                                                    if (column.getType() != ColumnDescription.IGNORE && !input.toString().equals(IGNORE_COLUMN_SIGN)) { // bugfix
136                                                                    //if (!input.toString().equals(IGNORE_COLUMN_SIGN)) {
137                                                                            edge = ((DependencyStructure)syntaxGraph).addDependencyEdge(Integer.parseInt(input.toString()), terminalCounter);
138                                                                    }
139                                                            } 
140                                                            else {
141                                                                    close();
142                                                                    throw new DataFormatException("The input graph is not a dependency graph and therefore it is not possible to add dependncy edges. ");
143                                                            }
144                                                    } else if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL && edge != null) {
145                                                            if (column.getType() != ColumnDescription.IGNORE && !input.toString().equals(IGNORE_COLUMN_SIGN)) { // bugfix
146                                                                    syntaxGraph.addLabel(edge, column.getName(), input.toString());
147                                                            } // bugfix
148                                                    }
149                                            }
150                                            input.setLength(0);
151                                            nNewLines = 0;
152                                            i++;
153                                    }
154                                    if (c == NEWLINE) {
155                                            nNewLines++;
156                                            i = 0;
157                                            columns = dataFormatInstance.iterator();
158                                    }
159                            } else {
160                                    input.append((char)c);
161                            }
162                            
163                            if (nNewLines == 2 && c == NEWLINE) {
164                                    if (syntaxGraph.hasTokens()) {
165                                            sentenceCount++;
166                                    }
167                                    return true;
168                            } else if (c == -1) {
169                                    if (syntaxGraph.hasTokens()) {
170                                            sentenceCount++;
171                                    }
172                                    if (cIterations < nIterations) {
173                                            cIterations++;
174                                            reopen();
175                                            return true;
176                                    }
177                                    
178                                    return false;                                   
179                            }
180                    }
181            }
182            
183            public void readEpilog() throws MaltChainedException {
184                    
185            }
186            
187            public BufferedReader getReader() {
188                    return reader;
189            }
190    
191            public void setReader(BufferedReader reader) throws MaltChainedException {
192                    close();
193                    this.reader = reader;
194            }
195            
196            public DataFormatInstance getDataFormatInstance() {
197                    return dataFormatInstance;
198            }
199    
200            public void setDataFormatInstance(DataFormatInstance dataFormatInstance) {
201                    this.dataFormatInstance = dataFormatInstance;
202            }
203    
204            public int getSentenceCount() throws MaltChainedException {
205                    return sentenceCount;
206            }
207            
208            public void setSentenceCount(int sentenceCount) {
209                    this.sentenceCount = sentenceCount;
210            }
211            
212            public String getOptions() {
213                    return null;
214            }
215            
216            public void setOptions(String optionString) throws MaltChainedException {
217                    
218            }
219            
220            public String getFileName() {
221                    return fileName;
222            }
223    
224            public void setFileName(String fileName) {
225                    this.fileName = fileName;
226            }
227    
228            public URL getUrl() {
229                    return url;
230            }
231    
232            public void setUrl(URL url) {
233                    this.url = url;
234            }
235    
236            public String getCharsetName() {
237                    return charsetName;
238            }
239    
240            public void setCharsetName(String charsetName) {
241                    this.charsetName = charsetName;
242            }
243    
244            public int getNIterations() {
245                    return nIterations;
246            }
247    
248            public void setNIterations(int iterations) {
249                    nIterations = iterations;
250            }
251    
252            public int getIterationCounter() {
253                    return cIterations;
254            }
255    
256            public void close() throws MaltChainedException {
257                    try {
258                            if (reader != null) {
259                                    reader.close();
260                                    reader = null;
261                            }
262                    } catch (IOException e) {
263                            throw new DataFormatException("Error when closing the input file. ", e);
264                    } 
265            }
266            
267            public void clear() throws MaltChainedException {
268                    close();
269                    input.setLength(0);
270                    dataFormatInstance = null;
271                    sentenceCount = 0;
272            }
273    }