001    package org.maltparser.core.io.dataformat;
002    
003    import java.net.URL;
004    import java.util.LinkedHashMap;
005    import java.util.Map;
006    
007    import javax.xml.parsers.DocumentBuilder;
008    import javax.xml.parsers.DocumentBuilderFactory;
009    import javax.xml.parsers.ParserConfigurationException;
010    
011    import org.maltparser.core.exception.MaltChainedException;
012    import org.maltparser.core.helper.HashSet;
013    import org.maltparser.core.helper.URLFinder;
014    import org.maltparser.core.symbol.SymbolTableHandler;
015    import org.w3c.dom.Element;
016    import org.w3c.dom.NodeList;
017    import org.xml.sax.SAXException;
018    
019    /**
020     *  
021     *
022     * @author Johan Hall
023     * @since 1.0
024    **/
025    public class DataFormatSpecification {  
026            public enum DataStructure {
027                    DEPENDENCY,  // Dependency structure
028                    PHRASE, // Phrase structure
029            };
030    //      private int entryPositionCounter;
031            private String dataFormatName;
032            private DataStructure dataStructure;
033            private final Map<String, DataFormatEntry> entries;
034            private final HashSet<Dependency> dependencies;
035    //      private final HashSet<SyntaxGraphReader> supportedReaders;
036    //      private final HashSet<SyntaxGraphWriter> supportedWriters;
037            
038            public DataFormatSpecification() {
039                    entries = new LinkedHashMap<String, DataFormatEntry>();
040    //              entryPositionCounter = 0;
041                    dependencies = new HashSet<Dependency>();
042    //              supportedReaders = new HashSet<SyntaxGraphReader>();
043    //              supportedWriters = new HashSet<SyntaxGraphWriter>();
044            }
045            
046            public DataFormatInstance createDataFormatInstance(SymbolTableHandler symbolTables, String nullValueStrategy) throws MaltChainedException {
047                    return new DataFormatInstance(entries, symbolTables, nullValueStrategy, this); //rootLabel, this);
048    
049            }
050            
051            public void parseDataFormatXMLfile(String fileName) throws MaltChainedException {
052                    final URLFinder f = new URLFinder();
053                    URL url = f.findURL(fileName);
054                    if (url == null) {
055                            throw new DataFormatException("The data format specifcation file '"+fileName+"'cannot be found. ");
056                    }
057                    parseDataFormatXMLfile(url);
058            }
059            
060            public HashSet<Dependency> getDependencies() {
061                    return dependencies;
062            }
063            
064            public void parseDataFormatXMLfile(URL url) throws MaltChainedException {
065                    if (url == null) {
066                            throw new DataFormatException("The data format specifcation file cannot be found. ");
067                    }
068                                    
069            try {
070                DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
071                DocumentBuilder db = dbf.newDocumentBuilder();
072    
073                    Element root = db.parse(url.openStream()).getDocumentElement();
074                    if (root.getNodeName().equals("dataformat")) { 
075                            dataFormatName = root.getAttribute("name");
076                            if (root.getAttribute("datastructure").length() > 0) {
077                                    dataStructure = DataStructure.valueOf(root.getAttribute("datastructure").toUpperCase());
078                            } else {
079                                    dataStructure = DataStructure.DEPENDENCY;
080                            }
081                    } else {
082                            throw new DataFormatException("Data format specification file must contain one 'dataformat' element. ");
083                    }
084                    NodeList cols = root.getElementsByTagName("column");
085                Element col = null;
086                for (int i = 0, n = cols.getLength(); i < n; i++) {
087                    col = (Element)cols.item(i);
088                    DataFormatEntry entry = new DataFormatEntry(col.getAttribute("name"), col.getAttribute("category"),col.getAttribute("type"), col.getAttribute("default"));
089                    entries.put(entry.getDataFormatEntryName(), entry);
090                }
091                NodeList deps = root.getElementsByTagName("dependencies");
092                if (deps.getLength() > 0) {
093                    NodeList dep = ((Element)deps.item(0)).getElementsByTagName("dependency");
094                    for (int i = 0, n = dep.getLength(); i < n; i++) {
095                            Element e = (Element)dep.item(i);
096                            dependencies.add(new Dependency(e.getAttribute("name"), e.getAttribute("url"), e.getAttribute("map"), e.getAttribute("urlmap")));
097                    }
098                }
099            } catch (java.io.IOException e) {
100                    throw new DataFormatException("Cannot find the file "+url.toString()+". ", e);
101            } catch (ParserConfigurationException e) {
102                    throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e);
103            } catch (SAXException e) {
104                    throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e);
105            }
106            }
107            
108            public void addEntry(String dataFormatEntryName, String category, String type, String defaultOutput) {
109                    DataFormatEntry entry = new DataFormatEntry(dataFormatEntryName, category, type, defaultOutput);
110                    entries.put(entry.getDataFormatEntryName(), entry);
111            }
112            
113            public DataFormatEntry getEntry(String dataFormatEntryName) {
114                    return entries.get(dataFormatEntryName);
115            }
116    
117            public String getDataFormatName() {
118                    return dataFormatName;
119            }
120    
121            public DataStructure getDataStructure() {
122                    return dataStructure;
123            }
124    
125            public String toString() {
126                    final StringBuilder sb = new StringBuilder();
127                    sb.append("Data format specification: ");
128                    sb.append(dataFormatName);
129                    sb.append('\n');
130                    for (DataFormatEntry dfe : entries.values()) {
131                            sb.append(dfe);
132                            sb.append('\n');
133                    }
134                    return sb.toString();
135            }
136            
137            public class Dependency {
138                    protected String dependentOn;
139                    protected String urlString;
140                    protected String map;
141                    protected String mapUrl;
142                    
143                    public Dependency(String dependentOn, String urlString, String map, String mapUrl) {
144                            setDependentOn(dependentOn);
145                            setUrlString(urlString);
146                            setMap(map);
147                            setMapUrl(mapUrl);
148                    }
149                    
150                    public String getDependentOn() {
151                            return dependentOn;
152                    }
153                    protected void setDependentOn(String dependentOn) {
154                            this.dependentOn = dependentOn;
155                    }
156                    
157                    public String getUrlString() {
158                            return urlString;
159                    }
160    
161                    public void setUrlString(String urlString) {
162                            this.urlString = urlString;
163                    }
164    
165                    public String getMap() {
166                            return map;
167                    }
168                    protected void setMap(String map) {
169                            this.map = map;
170                    }
171    
172                    public String getMapUrl() {
173                            return mapUrl;
174                    }
175    
176                    public void setMapUrl(String mapUrl) {
177                            this.mapUrl = mapUrl;
178                    }
179    
180                    @Override
181                    public String toString() {
182                            return "Dependency [dependentOn=" + dependentOn + ", map=" + map
183                                            + ", mapUrl=" + mapUrl + ", urlString=" + urlString + "]";
184                    }
185            }
186    }