001package org.maltparser.core.io.dataformat;
002
003import java.net.URL;
004import java.util.LinkedHashMap;
005import java.util.Map;
006
007import javax.xml.parsers.DocumentBuilder;
008import javax.xml.parsers.DocumentBuilderFactory;
009import javax.xml.parsers.ParserConfigurationException;
010
011import org.maltparser.core.exception.MaltChainedException;
012import org.maltparser.core.helper.HashSet;
013import org.maltparser.core.helper.URLFinder;
014import org.maltparser.core.symbol.SymbolTableHandler;
015import org.w3c.dom.Element;
016import org.w3c.dom.NodeList;
017import org.xml.sax.SAXException;
018
019/**
020 *  
021 *
022 * @author Johan Hall
023 * @since 1.0
024**/
025public class DataFormatSpecification {  
026        public enum DataStructure {
027                DEPENDENCY,  // Dependency structure
028                PHRASE, // Phrase structure
029        };
030//      private int entryPositionCounter;
031        private String dataFormatName;
032        private DataStructure dataStructure;
033        private final Map<String, DataFormatEntry> entries;
034        private final HashSet<Dependency> dependencies;
035//      private final HashSet<SyntaxGraphReader> supportedReaders;
036//      private final HashSet<SyntaxGraphWriter> supportedWriters;
037        
038        public DataFormatSpecification() {
039                entries = new LinkedHashMap<String, DataFormatEntry>();
040//              entryPositionCounter = 0;
041                dependencies = new HashSet<Dependency>();
042//              supportedReaders = new HashSet<SyntaxGraphReader>();
043//              supportedWriters = new HashSet<SyntaxGraphWriter>();
044        }
045        
046        public DataFormatInstance createDataFormatInstance(SymbolTableHandler symbolTables, String nullValueStrategy) throws MaltChainedException {
047                return new DataFormatInstance(entries, symbolTables, nullValueStrategy, this); //rootLabel, this);
048
049        }
050        
051        public void parseDataFormatXMLfile(String fileName) throws MaltChainedException {
052                final URLFinder f = new URLFinder();
053                URL url = f.findURL(fileName);
054                if (url == null) {
055                        throw new DataFormatException("The data format specifcation file '"+fileName+"'cannot be found. ");
056                }
057                parseDataFormatXMLfile(url);
058        }
059        
060        public HashSet<Dependency> getDependencies() {
061                return dependencies;
062        }
063        
064        public void parseDataFormatXMLfile(URL url) throws MaltChainedException {
065                if (url == null) {
066                        throw new DataFormatException("The data format specifcation file cannot be found. ");
067                }
068
069        try {
070            DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
071            DocumentBuilder db = dbf.newDocumentBuilder();
072
073                Element root = db.parse(url.openStream()).getDocumentElement();
074                if (root.getNodeName().equals("dataformat")) { 
075                        dataFormatName = root.getAttribute("name");
076                        if (root.getAttribute("datastructure").length() > 0) {
077                                dataStructure = DataStructure.valueOf(root.getAttribute("datastructure").toUpperCase());
078                        } else {
079                                dataStructure = DataStructure.DEPENDENCY;
080                        }
081                } else {
082                        throw new DataFormatException("Data format specification file must contain one 'dataformat' element. ");
083                }
084                NodeList cols = root.getElementsByTagName("column");
085            Element col = null;
086            for (int i = 0, n = cols.getLength(); i < n; i++) {
087                col = (Element)cols.item(i);
088                DataFormatEntry entry = new DataFormatEntry(col.getAttribute("name"), col.getAttribute("category"),col.getAttribute("type"), col.getAttribute("default"));
089                entries.put(entry.getDataFormatEntryName(), entry);
090            }
091            NodeList deps = root.getElementsByTagName("dependencies");
092            if (deps.getLength() > 0) {
093                NodeList dep = ((Element)deps.item(0)).getElementsByTagName("dependency");
094                for (int i = 0, n = dep.getLength(); i < n; i++) {
095                        Element e = (Element)dep.item(i);
096                        dependencies.add(new Dependency(e.getAttribute("name"), e.getAttribute("url"), e.getAttribute("map"), e.getAttribute("urlmap")));
097                }
098            }
099        } catch (java.io.IOException e) {
100                throw new DataFormatException("Cannot find the file "+url.toString()+". ", e);
101        } catch (ParserConfigurationException e) {
102                throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e);
103        } catch (SAXException e) {
104                throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e);
105        }
106        }
107        
108        public void addEntry(String dataFormatEntryName, String category, String type, String defaultOutput) {
109                DataFormatEntry entry = new DataFormatEntry(dataFormatEntryName, category, type, defaultOutput);
110                entries.put(entry.getDataFormatEntryName(), entry);
111        }
112        
113        public DataFormatEntry getEntry(String dataFormatEntryName) {
114                return entries.get(dataFormatEntryName);
115        }
116
117        public String getDataFormatName() {
118                return dataFormatName;
119        }
120
121        public DataStructure getDataStructure() {
122                return dataStructure;
123        }
124
125        public String toString() {
126                final StringBuilder sb = new StringBuilder();
127                sb.append("Data format specification: ");
128                sb.append(dataFormatName);
129                sb.append('\n');
130                for (DataFormatEntry dfe : entries.values()) {
131                        sb.append(dfe);
132                        sb.append('\n');
133                }
134                return sb.toString();
135        }
136        
137        public class Dependency {
138                protected String dependentOn;
139                protected String urlString;
140                protected String map;
141                protected String mapUrl;
142                
143                public Dependency(String dependentOn, String urlString, String map, String mapUrl) {
144                        setDependentOn(dependentOn);
145                        setUrlString(urlString);
146                        setMap(map);
147                        setMapUrl(mapUrl);
148                }
149                
150                public String getDependentOn() {
151                        return dependentOn;
152                }
153                protected void setDependentOn(String dependentOn) {
154                        this.dependentOn = dependentOn;
155                }
156                
157                public String getUrlString() {
158                        return urlString;
159                }
160
161                public void setUrlString(String urlString) {
162                        this.urlString = urlString;
163                }
164
165                public String getMap() {
166                        return map;
167                }
168                protected void setMap(String map) {
169                        this.map = map;
170                }
171
172                public String getMapUrl() {
173                        return mapUrl;
174                }
175
176                public void setMapUrl(String mapUrl) {
177                        this.mapUrl = mapUrl;
178                }
179
180                @Override
181                public String toString() {
182                        return "Dependency [dependentOn=" + dependentOn + ", map=" + map
183                                        + ", mapUrl=" + mapUrl + ", urlString=" + urlString + "]";
184                }
185        }
186}