001 package org.maltparser.core.io.dataformat;
002
003 import java.net.URL;
004 import java.util.LinkedHashMap;
005 import java.util.Map;
006
007 import javax.xml.parsers.DocumentBuilder;
008 import javax.xml.parsers.DocumentBuilderFactory;
009 import javax.xml.parsers.ParserConfigurationException;
010
011 import org.maltparser.core.exception.MaltChainedException;
012 import org.maltparser.core.helper.HashSet;
013 import org.maltparser.core.helper.Util;
014 import org.maltparser.core.symbol.SymbolTableHandler;
015 import org.w3c.dom.Element;
016 import org.w3c.dom.NodeList;
017 import org.xml.sax.SAXException;
018
019 /**
020 *
021 *
022 * @author Johan Hall
023 * @since 1.0
024 **/
025 public class DataFormatSpecification {
026 public enum DataStructure {
027 DEPENDENCY, // Dependency structure
028 PHRASE, // Phrase structure
029 };
030 // private int entryPositionCounter;
031 private String dataFormatName;
032 private DataStructure dataStructure;
033 private final Map<String, DataFormatEntry> entries;
034 private final HashSet<Dependency> dependencies;
035 // private final HashSet<SyntaxGraphReader> supportedReaders;
036 // private final HashSet<SyntaxGraphWriter> supportedWriters;
037
038 public DataFormatSpecification() {
039 entries = new LinkedHashMap<String, DataFormatEntry>();
040 // entryPositionCounter = 0;
041 dependencies = new HashSet<Dependency>();
042 // supportedReaders = new HashSet<SyntaxGraphReader>();
043 // supportedWriters = new HashSet<SyntaxGraphWriter>();
044 }
045
046 public DataFormatInstance createDataFormatInstance(SymbolTableHandler symbolTables, String nullValueStrategy) throws MaltChainedException {
047 return new DataFormatInstance(entries, symbolTables, nullValueStrategy, this); //rootLabel, this);
048
049 }
050
051 public void parseDataFormatXMLfile(String fileName) throws MaltChainedException {
052 URL url = Util.findURL(fileName);
053 if (url == null) {
054 throw new DataFormatException("The data format specifcation file '"+fileName+"'cannot be found. ");
055 }
056 parseDataFormatXMLfile(url);
057 }
058
059 public HashSet<Dependency> getDependencies() {
060 return dependencies;
061 }
062
063 public void parseDataFormatXMLfile(URL url) throws MaltChainedException {
064 if (url == null) {
065 throw new DataFormatException("The data format specifcation file cannot be found. ");
066 }
067
068 try {
069 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
070 DocumentBuilder db = dbf.newDocumentBuilder();
071
072 Element root = db.parse(url.openStream()).getDocumentElement();
073 if (root.getNodeName().equals("dataformat")) {
074 dataFormatName = root.getAttribute("name");
075 if (root.getAttribute("datastructure").length() > 0) {
076 dataStructure = DataStructure.valueOf(root.getAttribute("datastructure").toUpperCase());
077 } else {
078 dataStructure = DataStructure.DEPENDENCY;
079 }
080 } else {
081 throw new DataFormatException("Data format specification file must contain one 'dataformat' element. ");
082 }
083 NodeList cols = root.getElementsByTagName("column");
084 Element col = null;
085 for (int i = 0, n = cols.getLength(); i < n; i++) {
086 col = (Element)cols.item(i);
087 DataFormatEntry entry = new DataFormatEntry(col.getAttribute("name"), col.getAttribute("category"),col.getAttribute("type"), col.getAttribute("default"));
088 entries.put(entry.getDataFormatEntryName(), entry);
089 }
090 NodeList deps = root.getElementsByTagName("dependencies");
091 if (deps.getLength() > 0) {
092 NodeList dep = ((Element)deps.item(0)).getElementsByTagName("dependency");
093 for (int i = 0, n = dep.getLength(); i < n; i++) {
094 Element e = (Element)dep.item(i);
095 dependencies.add(new Dependency(e.getAttribute("name"), e.getAttribute("url"), e.getAttribute("map"), e.getAttribute("urlmap")));
096 }
097 }
098 } catch (java.io.IOException e) {
099 throw new DataFormatException("Cannot find the file "+url.toString()+". ", e);
100 } catch (ParserConfigurationException e) {
101 throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e);
102 } catch (SAXException e) {
103 throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e);
104 }
105 }
106
107 public void addEntry(String dataFormatEntryName, String category, String type, String defaultOutput) {
108 DataFormatEntry entry = new DataFormatEntry(dataFormatEntryName, category, type, defaultOutput);
109 entries.put(entry.getDataFormatEntryName(), entry);
110 }
111
112 public DataFormatEntry getEntry(String dataFormatEntryName) {
113 return entries.get(dataFormatEntryName);
114 }
115
116 public String getDataFormatName() {
117 return dataFormatName;
118 }
119
120 public DataStructure getDataStructure() {
121 return dataStructure;
122 }
123
124 public String toString() {
125 final StringBuilder sb = new StringBuilder();
126 sb.append("Data format specification: ");
127 sb.append(dataFormatName);
128 sb.append('\n');
129 for (DataFormatEntry dfe : entries.values()) {
130 sb.append(dfe);
131 sb.append('\n');
132 }
133 return sb.toString();
134 }
135
136 public class Dependency {
137 protected String dependentOn;
138 protected String urlString;
139 protected String map;
140 protected String mapUrl;
141
142 public Dependency(String dependentOn, String urlString, String map, String mapUrl) {
143 setDependentOn(dependentOn);
144 setUrlString(urlString);
145 setMap(map);
146 setMapUrl(mapUrl);
147 }
148
149 public String getDependentOn() {
150 return dependentOn;
151 }
152 protected void setDependentOn(String dependentOn) {
153 this.dependentOn = dependentOn;
154 }
155
156 public String getUrlString() {
157 return urlString;
158 }
159
160 public void setUrlString(String urlString) {
161 this.urlString = urlString;
162 }
163
164 public String getMap() {
165 return map;
166 }
167 protected void setMap(String map) {
168 this.map = map;
169 }
170
171 public String getMapUrl() {
172 return mapUrl;
173 }
174
175 public void setMapUrl(String mapUrl) {
176 this.mapUrl = mapUrl;
177 }
178
179 @Override
180 public String toString() {
181 return "Dependency [dependentOn=" + dependentOn + ", map=" + map
182 + ", mapUrl=" + mapUrl + ", urlString=" + urlString + "]";
183 }
184 }
185 }