001package org.maltparser.core.io.dataformat; 002 003import java.net.URL; 004import java.util.LinkedHashMap; 005import java.util.Map; 006 007import javax.xml.parsers.DocumentBuilder; 008import javax.xml.parsers.DocumentBuilderFactory; 009import javax.xml.parsers.ParserConfigurationException; 010 011import org.maltparser.core.exception.MaltChainedException; 012import org.maltparser.core.helper.HashSet; 013import org.maltparser.core.helper.URLFinder; 014import org.maltparser.core.symbol.SymbolTableHandler; 015import org.w3c.dom.Element; 016import org.w3c.dom.NodeList; 017import org.xml.sax.SAXException; 018 019/** 020 * 021 * 022 * @author Johan Hall 023 * @since 1.0 024**/ 025public class DataFormatSpecification { 026 public enum DataStructure { 027 DEPENDENCY, // Dependency structure 028 PHRASE, // Phrase structure 029 }; 030// private int entryPositionCounter; 031 private String dataFormatName; 032 private DataStructure dataStructure; 033 private final Map<String, DataFormatEntry> entries; 034 private final HashSet<Dependency> dependencies; 035// private final HashSet<SyntaxGraphReader> supportedReaders; 036// private final HashSet<SyntaxGraphWriter> supportedWriters; 037 038 public DataFormatSpecification() { 039 entries = new LinkedHashMap<String, DataFormatEntry>(); 040// entryPositionCounter = 0; 041 dependencies = new HashSet<Dependency>(); 042// supportedReaders = new HashSet<SyntaxGraphReader>(); 043// supportedWriters = new HashSet<SyntaxGraphWriter>(); 044 } 045 046 public DataFormatInstance createDataFormatInstance(SymbolTableHandler symbolTables, String nullValueStrategy) throws MaltChainedException { 047 return new DataFormatInstance(entries, symbolTables, nullValueStrategy, this); //rootLabel, this); 048 049 } 050 051 public void parseDataFormatXMLfile(String fileName) throws MaltChainedException { 052 final URLFinder f = new URLFinder(); 053 URL url = f.findURL(fileName); 054 if (url == null) { 055 throw new DataFormatException("The data format specifcation file '"+fileName+"'cannot be found. "); 056 } 057 parseDataFormatXMLfile(url); 058 } 059 060 public HashSet<Dependency> getDependencies() { 061 return dependencies; 062 } 063 064 public void parseDataFormatXMLfile(URL url) throws MaltChainedException { 065 if (url == null) { 066 throw new DataFormatException("The data format specifcation file cannot be found. "); 067 } 068 069 try { 070 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 071 DocumentBuilder db = dbf.newDocumentBuilder(); 072 073 Element root = db.parse(url.openStream()).getDocumentElement(); 074 if (root.getNodeName().equals("dataformat")) { 075 dataFormatName = root.getAttribute("name"); 076 if (root.getAttribute("datastructure").length() > 0) { 077 dataStructure = DataStructure.valueOf(root.getAttribute("datastructure").toUpperCase()); 078 } else { 079 dataStructure = DataStructure.DEPENDENCY; 080 } 081 } else { 082 throw new DataFormatException("Data format specification file must contain one 'dataformat' element. "); 083 } 084 NodeList cols = root.getElementsByTagName("column"); 085 Element col = null; 086 for (int i = 0, n = cols.getLength(); i < n; i++) { 087 col = (Element)cols.item(i); 088 DataFormatEntry entry = new DataFormatEntry(col.getAttribute("name"), col.getAttribute("category"),col.getAttribute("type"), col.getAttribute("default")); 089 entries.put(entry.getDataFormatEntryName(), entry); 090 } 091 NodeList deps = root.getElementsByTagName("dependencies"); 092 if (deps.getLength() > 0) { 093 NodeList dep = ((Element)deps.item(0)).getElementsByTagName("dependency"); 094 for (int i = 0, n = dep.getLength(); i < n; i++) { 095 Element e = (Element)dep.item(i); 096 dependencies.add(new Dependency(e.getAttribute("name"), e.getAttribute("url"), e.getAttribute("map"), e.getAttribute("urlmap"))); 097 } 098 } 099 } catch (java.io.IOException e) { 100 throw new DataFormatException("Cannot find the file "+url.toString()+". ", e); 101 } catch (ParserConfigurationException e) { 102 throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e); 103 } catch (SAXException e) { 104 throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e); 105 } 106 } 107 108 public void addEntry(String dataFormatEntryName, String category, String type, String defaultOutput) { 109 DataFormatEntry entry = new DataFormatEntry(dataFormatEntryName, category, type, defaultOutput); 110 entries.put(entry.getDataFormatEntryName(), entry); 111 } 112 113 public DataFormatEntry getEntry(String dataFormatEntryName) { 114 return entries.get(dataFormatEntryName); 115 } 116 117 public String getDataFormatName() { 118 return dataFormatName; 119 } 120 121 public DataStructure getDataStructure() { 122 return dataStructure; 123 } 124 125 public String toString() { 126 final StringBuilder sb = new StringBuilder(); 127 sb.append("Data format specification: "); 128 sb.append(dataFormatName); 129 sb.append('\n'); 130 for (DataFormatEntry dfe : entries.values()) { 131 sb.append(dfe); 132 sb.append('\n'); 133 } 134 return sb.toString(); 135 } 136 137 public class Dependency { 138 protected String dependentOn; 139 protected String urlString; 140 protected String map; 141 protected String mapUrl; 142 143 public Dependency(String dependentOn, String urlString, String map, String mapUrl) { 144 setDependentOn(dependentOn); 145 setUrlString(urlString); 146 setMap(map); 147 setMapUrl(mapUrl); 148 } 149 150 public String getDependentOn() { 151 return dependentOn; 152 } 153 protected void setDependentOn(String dependentOn) { 154 this.dependentOn = dependentOn; 155 } 156 157 public String getUrlString() { 158 return urlString; 159 } 160 161 public void setUrlString(String urlString) { 162 this.urlString = urlString; 163 } 164 165 public String getMap() { 166 return map; 167 } 168 protected void setMap(String map) { 169 this.map = map; 170 } 171 172 public String getMapUrl() { 173 return mapUrl; 174 } 175 176 public void setMapUrl(String mapUrl) { 177 this.mapUrl = mapUrl; 178 } 179 180 @Override 181 public String toString() { 182 return "Dependency [dependentOn=" + dependentOn + ", map=" + map 183 + ", mapUrl=" + mapUrl + ", urlString=" + urlString + "]"; 184 } 185 } 186}