001 package org.maltparser.core.io.dataformat; 002 003 import java.net.URL; 004 import java.util.LinkedHashMap; 005 import java.util.Map; 006 007 import javax.xml.parsers.DocumentBuilder; 008 import javax.xml.parsers.DocumentBuilderFactory; 009 import javax.xml.parsers.ParserConfigurationException; 010 011 import org.maltparser.core.exception.MaltChainedException; 012 import org.maltparser.core.helper.HashSet; 013 import org.maltparser.core.helper.Util; 014 import org.maltparser.core.symbol.SymbolTableHandler; 015 import org.w3c.dom.Element; 016 import org.w3c.dom.NodeList; 017 import org.xml.sax.SAXException; 018 019 /** 020 * 021 * 022 * @author Johan Hall 023 * @since 1.0 024 **/ 025 public class DataFormatSpecification { 026 public enum DataStructure { 027 DEPENDENCY, // Dependency structure 028 PHRASE, // Phrase structure 029 }; 030 // private int entryPositionCounter; 031 private String dataFormatName; 032 private DataStructure dataStructure; 033 private final Map<String, DataFormatEntry> entries; 034 private final HashSet<Dependency> dependencies; 035 // private final HashSet<SyntaxGraphReader> supportedReaders; 036 // private final HashSet<SyntaxGraphWriter> supportedWriters; 037 038 public DataFormatSpecification() { 039 entries = new LinkedHashMap<String, DataFormatEntry>(); 040 // entryPositionCounter = 0; 041 dependencies = new HashSet<Dependency>(); 042 // supportedReaders = new HashSet<SyntaxGraphReader>(); 043 // supportedWriters = new HashSet<SyntaxGraphWriter>(); 044 } 045 046 public DataFormatInstance createDataFormatInstance(SymbolTableHandler symbolTables, String nullValueStrategy) throws MaltChainedException { 047 return new DataFormatInstance(entries, symbolTables, nullValueStrategy, this); //rootLabel, this); 048 049 } 050 051 public void parseDataFormatXMLfile(String fileName) throws MaltChainedException { 052 URL url = Util.findURL(fileName); 053 if (url == null) { 054 throw new DataFormatException("The data format specifcation file '"+fileName+"'cannot be found. "); 055 } 056 parseDataFormatXMLfile(url); 057 } 058 059 public HashSet<Dependency> getDependencies() { 060 return dependencies; 061 } 062 063 public void parseDataFormatXMLfile(URL url) throws MaltChainedException { 064 if (url == null) { 065 throw new DataFormatException("The data format specifcation file cannot be found. "); 066 } 067 068 try { 069 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 070 DocumentBuilder db = dbf.newDocumentBuilder(); 071 072 Element root = db.parse(url.openStream()).getDocumentElement(); 073 if (root.getNodeName().equals("dataformat")) { 074 dataFormatName = root.getAttribute("name"); 075 if (root.getAttribute("datastructure").length() > 0) { 076 dataStructure = DataStructure.valueOf(root.getAttribute("datastructure").toUpperCase()); 077 } else { 078 dataStructure = DataStructure.DEPENDENCY; 079 } 080 } else { 081 throw new DataFormatException("Data format specification file must contain one 'dataformat' element. "); 082 } 083 NodeList cols = root.getElementsByTagName("column"); 084 Element col = null; 085 for (int i = 0, n = cols.getLength(); i < n; i++) { 086 col = (Element)cols.item(i); 087 DataFormatEntry entry = new DataFormatEntry(col.getAttribute("name"), col.getAttribute("category"),col.getAttribute("type"), col.getAttribute("default")); 088 entries.put(entry.getDataFormatEntryName(), entry); 089 } 090 NodeList deps = root.getElementsByTagName("dependencies"); 091 if (deps.getLength() > 0) { 092 NodeList dep = ((Element)deps.item(0)).getElementsByTagName("dependency"); 093 for (int i = 0, n = dep.getLength(); i < n; i++) { 094 Element e = (Element)dep.item(i); 095 dependencies.add(new Dependency(e.getAttribute("name"), e.getAttribute("url"), e.getAttribute("map"), e.getAttribute("urlmap"))); 096 } 097 } 098 } catch (java.io.IOException e) { 099 throw new DataFormatException("Cannot find the file "+url.toString()+". ", e); 100 } catch (ParserConfigurationException e) { 101 throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e); 102 } catch (SAXException e) { 103 throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e); 104 } 105 } 106 107 public void addEntry(String dataFormatEntryName, String category, String type, String defaultOutput) { 108 DataFormatEntry entry = new DataFormatEntry(dataFormatEntryName, category, type, defaultOutput); 109 entries.put(entry.getDataFormatEntryName(), entry); 110 } 111 112 public DataFormatEntry getEntry(String dataFormatEntryName) { 113 return entries.get(dataFormatEntryName); 114 } 115 116 public String getDataFormatName() { 117 return dataFormatName; 118 } 119 120 public DataStructure getDataStructure() { 121 return dataStructure; 122 } 123 124 public String toString() { 125 final StringBuilder sb = new StringBuilder(); 126 sb.append("Data format specification: "); 127 sb.append(dataFormatName); 128 sb.append('\n'); 129 for (DataFormatEntry dfe : entries.values()) { 130 sb.append(dfe); 131 sb.append('\n'); 132 } 133 return sb.toString(); 134 } 135 136 public class Dependency { 137 protected String dependentOn; 138 protected String urlString; 139 protected String map; 140 protected String mapUrl; 141 142 public Dependency(String dependentOn, String urlString, String map, String mapUrl) { 143 setDependentOn(dependentOn); 144 setUrlString(urlString); 145 setMap(map); 146 setMapUrl(mapUrl); 147 } 148 149 public String getDependentOn() { 150 return dependentOn; 151 } 152 protected void setDependentOn(String dependentOn) { 153 this.dependentOn = dependentOn; 154 } 155 156 public String getUrlString() { 157 return urlString; 158 } 159 160 public void setUrlString(String urlString) { 161 this.urlString = urlString; 162 } 163 164 public String getMap() { 165 return map; 166 } 167 protected void setMap(String map) { 168 this.map = map; 169 } 170 171 public String getMapUrl() { 172 return mapUrl; 173 } 174 175 public void setMapUrl(String mapUrl) { 176 this.mapUrl = mapUrl; 177 } 178 179 @Override 180 public String toString() { 181 return "Dependency [dependentOn=" + dependentOn + ", map=" + map 182 + ", mapUrl=" + mapUrl + ", urlString=" + urlString + "]"; 183 } 184 } 185 }