001    package org.maltparser.core.io.dataformat;
002    
003    import java.net.URL;
004    import java.util.HashSet;
005    import java.util.SortedMap;
006    import java.util.TreeMap;
007    
008    import javax.xml.parsers.DocumentBuilder;
009    import javax.xml.parsers.DocumentBuilderFactory;
010    import javax.xml.parsers.ParserConfigurationException;
011    
012    import org.maltparser.core.exception.MaltChainedException;
013    import org.maltparser.core.helper.SystemLogger;
014    import org.maltparser.core.helper.Util;
015    import org.maltparser.core.symbol.SymbolTableHandler;
016    import org.w3c.dom.Element;
017    import org.w3c.dom.NodeList;
018    import org.xml.sax.SAXException;
019    
020    /**
021     *  
022     *
023     * @author Johan Hall
024     * @since 1.0
025    **/
026    public class DataFormatSpecification {  
027            public enum DataStructure {
028                    DEPENDENCY,  // Dependency structure
029                    PHRASE, // Phrase structure
030            };
031            private int entryPositionCounter;
032            private String dataFormatName;
033            private DataStructure dataStructure;
034            private final SortedMap<String, DataFormatEntry> entries;
035            private final HashSet<Dependency> dependencies;
036    //      private final HashSet<SyntaxGraphReader> supportedReaders;
037    //      private final HashSet<SyntaxGraphWriter> supportedWriters;
038            
039            public DataFormatSpecification() {
040                    entries = new TreeMap<String, DataFormatEntry>();
041                    entryPositionCounter = 0;
042                    dependencies = new HashSet<Dependency>();
043    //              supportedReaders = new HashSet<SyntaxGraphReader>();
044    //              supportedWriters = new HashSet<SyntaxGraphWriter>();
045            }
046            
047            public DataFormatInstance createDataFormatInstance(SymbolTableHandler symbolTables, String nullValueStrategy, String rootLabel) throws MaltChainedException {
048                    return new DataFormatInstance(entries, symbolTables, nullValueStrategy, rootLabel, this);
049    
050            }
051            
052            public void parseDataFormatXMLfile(String fileName) throws MaltChainedException {
053                    URL url = Util.findURL(fileName);
054                    if (url == null) {
055                            throw new DataFormatException("The data format specifcation file '"+fileName+"'cannot be found. ");
056                    }
057                    parseDataFormatXMLfile(url);
058            }
059            
060            public HashSet<Dependency> getDependencies() {
061                    return dependencies;
062            }
063            
064            private void parseDataFormatXMLfile(URL url) throws MaltChainedException {
065                    if (url == null) {
066                            throw new DataFormatException("The data format specifcation file cannot be found. ");
067                    }
068                    
069                    if (SystemLogger.logger().isInfoEnabled()) {
070                            SystemLogger.logger().debug("Loading data format specification '"+url.toString()+"' ...\n");
071                    }
072                    
073            try {
074                DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
075                DocumentBuilder db = dbf.newDocumentBuilder();
076    
077                    Element root = db.parse(url.openStream()).getDocumentElement();
078                    if (root.getNodeName().equals("dataformat")) { 
079                            dataFormatName = root.getAttribute("name");
080                            if (root.getAttribute("datastructure").length() > 0) {
081                                    dataStructure = DataStructure.valueOf(root.getAttribute("datastructure").toUpperCase());
082                            } else {
083                                    dataStructure = DataStructure.DEPENDENCY;
084                            }
085                    } else {
086                            throw new DataFormatException("Data format specification file must contain one 'dataformat' element. ");
087                    }
088                    NodeList cols = root.getElementsByTagName("column");
089                Element col = null;
090                for (int i = 0, n = cols.getLength(); i < n; i++) {
091                    col = (Element)cols.item(i);
092                    DataFormatEntry entry = new DataFormatEntry(i, col.getAttribute("name"), col.getAttribute("category"),col.getAttribute("type"), col.getAttribute("default"));
093                    entries.put(entry.getDataFormatEntryName(), entry);
094                }
095                NodeList deps = root.getElementsByTagName("dependencies");
096                if (deps.getLength() > 0) {
097                    NodeList dep = ((Element)deps.item(0)).getElementsByTagName("dependency");
098                    for (int i = 0, n = dep.getLength(); i < n; i++) {
099                            Element e = (Element)dep.item(i);
100                            dependencies.add(new Dependency(e.getAttribute("name"), e.getAttribute("url"), e.getAttribute("map"), e.getAttribute("urlmap")));
101                    }
102                }
103            } catch (java.io.IOException e) {
104                    throw new DataFormatException("Cannot find the file "+url.toString()+". ", e);
105            } catch (ParserConfigurationException e) {
106                    throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e);
107            } catch (SAXException e) {
108                    throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e);
109            }
110            }
111            
112            public void addEntry(String dataFormatEntryName, String category, String type, String defaultOutput) {
113                    DataFormatEntry entry = new DataFormatEntry(entryPositionCounter++, dataFormatEntryName, category, type, defaultOutput);
114                    entries.put(entry.getDataFormatEntryName(), entry);
115            }
116            
117            public DataFormatEntry getEntry(String dataFormatEntryName) {
118                    return entries.get(dataFormatEntryName);
119            }
120    
121            public String getDataFormatName() {
122                    return dataFormatName;
123            }
124    
125            public DataStructure getDataStructure() {
126                    return dataStructure;
127            }
128    
129            public String toString() {
130                    final StringBuilder sb = new StringBuilder();
131                    sb.append("Data format specification: ");
132                    sb.append(dataFormatName);
133                    sb.append('\n');
134                    for (DataFormatEntry dfe : entries.values()) {
135                            sb.append(dfe);
136                            sb.append('\n');
137                    }
138                    return sb.toString();
139            }
140            
141            public class Dependency {
142                    protected String dependentOn;
143                    protected String urlString;
144                    protected String map;
145                    protected String mapUrl;
146                    
147                    public Dependency(String dependentOn, String urlString, String map, String mapUrl) {
148                            setDependentOn(dependentOn);
149                            setUrlString(urlString);
150                            setMap(map);
151                            setMapUrl(mapUrl);
152                    }
153                    
154                    public String getDependentOn() {
155                            return dependentOn;
156                    }
157                    protected void setDependentOn(String dependentOn) {
158                            this.dependentOn = dependentOn;
159                    }
160                    
161                    public String getUrlString() {
162                            return urlString;
163                    }
164    
165                    public void setUrlString(String urlString) {
166                            this.urlString = urlString;
167                    }
168    
169                    public String getMap() {
170                            return map;
171                    }
172                    protected void setMap(String map) {
173                            this.map = map;
174                    }
175    
176                    public String getMapUrl() {
177                            return mapUrl;
178                    }
179    
180                    public void setMapUrl(String mapUrl) {
181                            this.mapUrl = mapUrl;
182                    }
183            }
184    }