001    package org.maltparser.core.syntaxgraph.writer;
002    
003    import java.io.BufferedWriter;
004    import java.io.FileNotFoundException;
005    import java.io.FileOutputStream;
006    import java.io.IOException;
007    import java.io.OutputStream;
008    import java.io.OutputStreamWriter;
009    import java.io.UnsupportedEncodingException;
010    import java.util.SortedMap;
011    import java.util.regex.PatternSyntaxException;
012    
013    import org.maltparser.core.exception.MaltChainedException;
014    import org.maltparser.core.io.dataformat.ColumnDescription;
015    import org.maltparser.core.io.dataformat.DataFormatException;
016    import org.maltparser.core.io.dataformat.DataFormatInstance;
017    import org.maltparser.core.symbol.SymbolTable;
018    import org.maltparser.core.syntaxgraph.PhraseStructure;
019    import org.maltparser.core.syntaxgraph.TokenStructure;
020    import org.maltparser.core.syntaxgraph.node.NonTerminalNode;
021    import org.maltparser.core.syntaxgraph.node.PhraseStructureNode;
022    import org.maltparser.core.syntaxgraph.node.TokenNode;
023    /**
024    *
025    *
026    * @author Johan Hall
027    */
028    public class BracketWriter implements SyntaxGraphWriter {
029            private enum PennWriterFormat {
030                    DEFAULT, PRETTY
031            };
032            private PennWriterFormat format;
033            private BufferedWriter writer;
034            private DataFormatInstance dataFormatInstance;
035            private SortedMap<String,ColumnDescription> inputColumns;
036            private SortedMap<String,ColumnDescription> edgeLabelColumns;
037            private SortedMap<String,ColumnDescription> phraseLabelColumns;
038            private char STARTING_BRACKET = '(';
039            private String EMPTY_EDGELABEL = "??";
040            private char CLOSING_BRACKET = ')';
041            private char INPUT_SEPARATOR = ' ';
042            private char EDGELABEL_SEPARATOR = '-';
043            private char SENTENCE_SEPARATOR = '\n';
044            private String optionString;
045            private boolean closeStream = true;
046            
047            public BracketWriter() { 
048            }
049    
050            public void open(String fileName, String charsetName) throws MaltChainedException {
051                    try {
052                            open(new OutputStreamWriter(new FileOutputStream(fileName),charsetName));
053                    } catch (FileNotFoundException e) {
054                            throw new DataFormatException("The output file '"+fileName+"' cannot be found.", e);
055                    } catch (UnsupportedEncodingException e) {
056                            throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e);
057                    }       
058            }
059            
060            public void open(OutputStream os, String charsetName) throws MaltChainedException {
061                    try {
062                            if (os == System.out || os == System.err) {
063                                    closeStream = false;
064                            }
065                            open(new OutputStreamWriter(os, charsetName));
066                    } catch (UnsupportedEncodingException e) {
067                            throw new DataFormatException("The character encoding set '"+charsetName+"' isn't supported.", e);
068                    }
069            }
070            
071            private void open(OutputStreamWriter osw) throws MaltChainedException {
072                    setWriter(new BufferedWriter(osw));
073            }
074    
075            public void writeEpilog() throws MaltChainedException {
076    
077            }
078            
079            public void writeProlog() throws MaltChainedException {
080            
081            }
082            
083            public void writeSentence(TokenStructure syntaxGraph) throws MaltChainedException {
084                    if (syntaxGraph == null || dataFormatInstance == null) {
085                            return;
086                    }
087                    if (syntaxGraph instanceof PhraseStructure && syntaxGraph.hasTokens()) {
088    //                      PhraseStructure phraseStructure = ((PhraseStructure) syntaxGraph);
089                            if (format == PennWriterFormat.PRETTY) {
090                                    writeElement(((PhraseStructure) syntaxGraph).getPhraseStructureRoot(), 0);
091                            } else {
092                                    writeElement(((PhraseStructure) syntaxGraph).getPhraseStructureRoot());
093                            }
094                            try {
095                                    writer.write(SENTENCE_SEPARATOR);
096                                    writer.flush();
097                            } catch (IOException e) {
098                                    close();
099                                    throw new DataFormatException("Could not write to the output file. ", e);
100                            }
101                    }
102            }
103            
104            private void writeElement(PhraseStructureNode element) throws MaltChainedException {
105                    try {
106                            if (element instanceof TokenNode) {
107                                    PhraseStructureNode t = (PhraseStructureNode)element;
108                                    SymbolTable table = null;
109                                    writer.write(STARTING_BRACKET);
110                                    int i = 0;
111                                    for (String inputColumn : inputColumns.keySet()) {
112                                            if (i != 0) {
113                                                    writer.write(INPUT_SEPARATOR);
114                                            }
115                                            table = inputColumns.get(inputColumn).getSymbolTable();
116                                            if (t.hasLabel(table)) {
117                                                    writer.write(t.getLabelSymbol(table));
118                                            }
119                                            if (i == 0) {
120                                                    for (String edgeLabelColumn : edgeLabelColumns.keySet()) {
121                                                            table = edgeLabelColumns.get(edgeLabelColumn).getSymbolTable();
122                                                            if (t.hasParentEdgeLabel(table) && !t.getParent().isRoot() && !t.getParentEdgeLabelSymbol(table).equals(EMPTY_EDGELABEL)) {
123                                                                    writer.write(EDGELABEL_SEPARATOR);
124                                                                    writer.write(t.getParentEdgeLabelSymbol(table));
125                                                            }
126                                                    }
127                                            }
128                                            i++;
129                                    }
130                                    writer.write(CLOSING_BRACKET);
131                            } else {
132                                    NonTerminalNode nt = (NonTerminalNode)element;
133                                    writer.write(STARTING_BRACKET);
134                                    SymbolTable table = null;
135                                    int i = 0;
136                                    for (String phraseLabelColumn : phraseLabelColumns.keySet()) {
137                                            if (i != 0) {
138                                                    writer.write(INPUT_SEPARATOR);
139                                            }
140                                            table = phraseLabelColumns.get(phraseLabelColumn).getSymbolTable();
141                                            if (nt.hasLabel(table)) { 
142                                                    writer.write(nt.getLabelSymbol(table));
143                                            }
144                                            if (i == 0) {
145                                                    for (String edgeLabelColumn : edgeLabelColumns.keySet()) {
146                                                            table = edgeLabelColumns.get(edgeLabelColumn).getSymbolTable();
147                                                            if (nt.hasParentEdgeLabel(table) && !nt.getParent().isRoot() && !nt.getParentEdgeLabelSymbol(table).equals(EMPTY_EDGELABEL)) {
148                                                                    writer.write(EDGELABEL_SEPARATOR);
149                                                                    writer.write(nt.getParentEdgeLabelSymbol(table));
150                                                            }
151                                                    }
152                                            }
153                                            i++;
154                                    }
155                                    for (PhraseStructureNode node : ((NonTerminalNode)element).getChildren()) {
156                                            writeElement(node);
157                                    }
158                                    writer.write(CLOSING_BRACKET);
159                            }
160                    } catch (IOException e) {
161                            throw new DataFormatException("Could not write to the output file. ", e);
162                    }
163            }
164            
165            private String getIndentation(int depth) {
166                    StringBuilder sb = new StringBuilder("");
167                    for (int i = 0; i < depth; i++) {
168                            sb.append("\t");
169                    }
170                    return sb.toString();
171            }
172            
173            private void writeElement(PhraseStructureNode element, int depth) throws MaltChainedException {
174                    try {
175                            if (element instanceof TokenNode) {
176                                    PhraseStructureNode t = (PhraseStructureNode)element;
177                                    SymbolTable table = null;
178                                    writer.write("\n" + getIndentation(depth) + STARTING_BRACKET);
179                                    int i = 0;
180                                    for (String inputColumn : inputColumns.keySet()) {
181                                            if (i != 0) {
182                                                    writer.write(INPUT_SEPARATOR);
183                                            }
184                                            table = inputColumns.get(inputColumn).getSymbolTable();
185                                            if (t.hasLabel(table)) {
186                                                    writer.write(encodeString(t.getLabelSymbol(table)));
187                                            }
188                                            if (i == 0) {
189                                                    for (String edgeLabelColumn : edgeLabelColumns.keySet()) {
190                                                            table = edgeLabelColumns.get(edgeLabelColumn).getSymbolTable();
191                                                            if (t.hasParentEdgeLabel(table) && !t.getParent().isRoot() && !t.getParentEdgeLabelSymbol(table).equals(EMPTY_EDGELABEL)) {
192                                                                    writer.write(EDGELABEL_SEPARATOR);
193                                                                    writer.write(t.getParentEdgeLabelSymbol(table));
194                                                            }
195                                                    }
196                                            }
197                                            i++;
198                                    }
199                                    writer.write(CLOSING_BRACKET);
200                            } else {
201                                    NonTerminalNode nt = (NonTerminalNode)element;
202                                    writer.write("\n" + getIndentation(depth) + STARTING_BRACKET);
203                                    SymbolTable table = null;
204                                    int i = 0;
205                                    for (String phraseLabelColumn : phraseLabelColumns.keySet()) {
206                                            if (i != 0) {
207                                                    writer.write(INPUT_SEPARATOR);
208                                            }
209                                            table = phraseLabelColumns.get(phraseLabelColumn).getSymbolTable();
210                                            if (nt.hasLabel(table)) { 
211                                                    writer.write(nt.getLabelSymbol(table));
212                                            }
213                                            if (i == 0) {
214                                                    for (String edgeLabelColumn : edgeLabelColumns.keySet()) {
215                                                            table = edgeLabelColumns.get(edgeLabelColumn).getSymbolTable();
216                                                            if (nt.hasParentEdgeLabel(table) && !nt.getParent().isRoot() && !nt.getParentEdgeLabelSymbol(table).equals(EMPTY_EDGELABEL)) {
217                                                                    writer.write(EDGELABEL_SEPARATOR);
218                                                                    writer.write(nt.getParentEdgeLabelSymbol(table));
219                                                            }
220                                                    }
221                                            }
222                                            i++;
223                                    }
224                                    for (PhraseStructureNode node : ((NonTerminalNode)element).getChildren()) {
225                                            writeElement(node, depth + 1);
226                                    }
227                                    writer.write("\n" + getIndentation(depth) + CLOSING_BRACKET);
228                            }
229                    } catch (IOException e) {
230                            throw new DataFormatException("Could not write to the output file. ", e);
231                    }
232            }
233            
234            public BufferedWriter getWriter() {
235                    return writer;
236            }
237    
238            public void setWriter(BufferedWriter writer) throws MaltChainedException {
239                    close();
240                    this.writer = writer;
241            }
242            
243            public DataFormatInstance getDataFormatInstance() {
244                    return dataFormatInstance;
245            }
246    
247            public void setDataFormatInstance(DataFormatInstance dataFormatInstance) {
248                    this.dataFormatInstance = dataFormatInstance;
249                    inputColumns = dataFormatInstance.getInputColumnDescriptions();
250                    edgeLabelColumns = dataFormatInstance.getPhraseStructureEdgeLabelColumnDescriptions();
251                    phraseLabelColumns = dataFormatInstance.getPhraseStructureNodeLabelColumnDescriptions();
252            }
253    
254            public String getOptions() {
255                    return optionString;
256            }
257            
258            public void setOptions(String optionString) throws MaltChainedException {
259                    this.optionString = optionString;
260                    format = PennWriterFormat.DEFAULT;
261    
262                    String[] argv;
263                    try {
264                            argv = optionString.split("[_\\p{Blank}]");
265                    } catch (PatternSyntaxException e) {
266                            throw new DataFormatException("Could not split the bracket writer option '"+optionString+"'. ", e);
267                    }
268                    for (int i=0; i < argv.length-1; i++) {
269                            if(argv[i].charAt(0) != '-') {
270                                    throw new DataFormatException("The argument flag should start with the following character '-', not with "+argv[i].charAt(0));
271                            }
272                            if(++i>=argv.length) {
273                                    throw new DataFormatException("The last argument does not have any value. ");
274                            }
275                            switch(argv[i-1].charAt(1)) {
276                            case 'f': 
277                                    if (argv[i].equals("p")) {
278                                            format = PennWriterFormat.PRETTY;
279                                    } else if (argv[i].equals("p")) {
280                                            format = PennWriterFormat.DEFAULT;
281                                    }
282                                    break;
283                            default:
284                                    throw new DataFormatException("Unknown bracket writer option: '"+argv[i-1]+"' with value '"+argv[i]+"'. ");             
285                            }
286                    }       
287            }
288            
289            public void close() throws MaltChainedException {
290                    try {
291                            if (writer != null) {
292                                    writer.flush();
293                                    if (closeStream) {
294                                            writer.close();
295                                    }
296                                    writer = null;
297                            }
298                    }   catch (IOException e) {
299                            throw new DataFormatException("Could not close the output file. ", e);
300                    } 
301            }
302            
303            private String encodeString(String string) {
304                    return string.replace("(", "-LRB-").replace(")", "-RRB-").replace("[", "-LSB-").replace("]", "-RSB-").replace("{", "-LCB-").replace("}", "-RCB-");
305            }
306    }