001    package org.maltparser.core.io.dataformat;
002    
003    import java.net.URL;
004    import java.util.LinkedHashMap;
005    import java.util.Map;
006    
007    import javax.xml.parsers.DocumentBuilder;
008    import javax.xml.parsers.DocumentBuilderFactory;
009    import javax.xml.parsers.ParserConfigurationException;
010    
011    import org.maltparser.core.exception.MaltChainedException;
012    import org.maltparser.core.helper.HashSet;
013    import org.maltparser.core.helper.Util;
014    import org.maltparser.core.symbol.SymbolTableHandler;
015    import org.w3c.dom.Element;
016    import org.w3c.dom.NodeList;
017    import org.xml.sax.SAXException;
018    
019    /**
020     *  
021     *
022     * @author Johan Hall
023     * @since 1.0
024    **/
025    public class DataFormatSpecification {  
026            public enum DataStructure {
027                    DEPENDENCY,  // Dependency structure
028                    PHRASE, // Phrase structure
029            };
030    //      private int entryPositionCounter;
031            private String dataFormatName;
032            private DataStructure dataStructure;
033            private final Map<String, DataFormatEntry> entries;
034            private final HashSet<Dependency> dependencies;
035    //      private final HashSet<SyntaxGraphReader> supportedReaders;
036    //      private final HashSet<SyntaxGraphWriter> supportedWriters;
037            
038            public DataFormatSpecification() {
039                    entries = new LinkedHashMap<String, DataFormatEntry>();
040    //              entryPositionCounter = 0;
041                    dependencies = new HashSet<Dependency>();
042    //              supportedReaders = new HashSet<SyntaxGraphReader>();
043    //              supportedWriters = new HashSet<SyntaxGraphWriter>();
044            }
045            
046            public DataFormatInstance createDataFormatInstance(SymbolTableHandler symbolTables, String nullValueStrategy) throws MaltChainedException {
047                    return new DataFormatInstance(entries, symbolTables, nullValueStrategy, this); //rootLabel, this);
048    
049            }
050            
051            public void parseDataFormatXMLfile(String fileName) throws MaltChainedException {
052                    URL url = Util.findURL(fileName);
053                    if (url == null) {
054                            throw new DataFormatException("The data format specifcation file '"+fileName+"'cannot be found. ");
055                    }
056                    parseDataFormatXMLfile(url);
057            }
058            
059            public HashSet<Dependency> getDependencies() {
060                    return dependencies;
061            }
062            
063            public void parseDataFormatXMLfile(URL url) throws MaltChainedException {
064                    if (url == null) {
065                            throw new DataFormatException("The data format specifcation file cannot be found. ");
066                    }
067                                    
068            try {
069                DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
070                DocumentBuilder db = dbf.newDocumentBuilder();
071    
072                    Element root = db.parse(url.openStream()).getDocumentElement();
073                    if (root.getNodeName().equals("dataformat")) { 
074                            dataFormatName = root.getAttribute("name");
075                            if (root.getAttribute("datastructure").length() > 0) {
076                                    dataStructure = DataStructure.valueOf(root.getAttribute("datastructure").toUpperCase());
077                            } else {
078                                    dataStructure = DataStructure.DEPENDENCY;
079                            }
080                    } else {
081                            throw new DataFormatException("Data format specification file must contain one 'dataformat' element. ");
082                    }
083                    NodeList cols = root.getElementsByTagName("column");
084                Element col = null;
085                for (int i = 0, n = cols.getLength(); i < n; i++) {
086                    col = (Element)cols.item(i);
087                    DataFormatEntry entry = new DataFormatEntry(col.getAttribute("name"), col.getAttribute("category"),col.getAttribute("type"), col.getAttribute("default"));
088                    entries.put(entry.getDataFormatEntryName(), entry);
089                }
090                NodeList deps = root.getElementsByTagName("dependencies");
091                if (deps.getLength() > 0) {
092                    NodeList dep = ((Element)deps.item(0)).getElementsByTagName("dependency");
093                    for (int i = 0, n = dep.getLength(); i < n; i++) {
094                            Element e = (Element)dep.item(i);
095                            dependencies.add(new Dependency(e.getAttribute("name"), e.getAttribute("url"), e.getAttribute("map"), e.getAttribute("urlmap")));
096                    }
097                }
098            } catch (java.io.IOException e) {
099                    throw new DataFormatException("Cannot find the file "+url.toString()+". ", e);
100            } catch (ParserConfigurationException e) {
101                    throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e);
102            } catch (SAXException e) {
103                    throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e);
104            }
105            }
106            
107            public void addEntry(String dataFormatEntryName, String category, String type, String defaultOutput) {
108                    DataFormatEntry entry = new DataFormatEntry(dataFormatEntryName, category, type, defaultOutput);
109                    entries.put(entry.getDataFormatEntryName(), entry);
110            }
111            
112            public DataFormatEntry getEntry(String dataFormatEntryName) {
113                    return entries.get(dataFormatEntryName);
114            }
115    
116            public String getDataFormatName() {
117                    return dataFormatName;
118            }
119    
120            public DataStructure getDataStructure() {
121                    return dataStructure;
122            }
123    
124            public String toString() {
125                    final StringBuilder sb = new StringBuilder();
126                    sb.append("Data format specification: ");
127                    sb.append(dataFormatName);
128                    sb.append('\n');
129                    for (DataFormatEntry dfe : entries.values()) {
130                            sb.append(dfe);
131                            sb.append('\n');
132                    }
133                    return sb.toString();
134            }
135            
136            public class Dependency {
137                    protected String dependentOn;
138                    protected String urlString;
139                    protected String map;
140                    protected String mapUrl;
141                    
142                    public Dependency(String dependentOn, String urlString, String map, String mapUrl) {
143                            setDependentOn(dependentOn);
144                            setUrlString(urlString);
145                            setMap(map);
146                            setMapUrl(mapUrl);
147                    }
148                    
149                    public String getDependentOn() {
150                            return dependentOn;
151                    }
152                    protected void setDependentOn(String dependentOn) {
153                            this.dependentOn = dependentOn;
154                    }
155                    
156                    public String getUrlString() {
157                            return urlString;
158                    }
159    
160                    public void setUrlString(String urlString) {
161                            this.urlString = urlString;
162                    }
163    
164                    public String getMap() {
165                            return map;
166                    }
167                    protected void setMap(String map) {
168                            this.map = map;
169                    }
170    
171                    public String getMapUrl() {
172                            return mapUrl;
173                    }
174    
175                    public void setMapUrl(String mapUrl) {
176                            this.mapUrl = mapUrl;
177                    }
178    
179                    @Override
180                    public String toString() {
181                            return "Dependency [dependentOn=" + dependentOn + ", map=" + map
182                                            + ", mapUrl=" + mapUrl + ", urlString=" + urlString + "]";
183                    }
184            }
185    }