001 package org.maltparser.core.io.dataformat; 002 003 import java.net.URL; 004 import java.util.HashSet; 005 import java.util.SortedMap; 006 import java.util.TreeMap; 007 008 import javax.xml.parsers.DocumentBuilder; 009 import javax.xml.parsers.DocumentBuilderFactory; 010 import javax.xml.parsers.ParserConfigurationException; 011 012 import org.maltparser.core.exception.MaltChainedException; 013 import org.maltparser.core.helper.SystemLogger; 014 import org.maltparser.core.helper.Util; 015 import org.maltparser.core.symbol.SymbolTableHandler; 016 import org.w3c.dom.Element; 017 import org.w3c.dom.NodeList; 018 import org.xml.sax.SAXException; 019 020 /** 021 * 022 * 023 * @author Johan Hall 024 * @since 1.0 025 **/ 026 public class DataFormatSpecification { 027 public enum DataStructure { 028 DEPENDENCY, // Dependency structure 029 PHRASE, // Phrase structure 030 }; 031 private int entryPositionCounter; 032 private String dataFormatName; 033 private DataStructure dataStructure; 034 private final SortedMap<String, DataFormatEntry> entries; 035 private final HashSet<Dependency> dependencies; 036 // private final HashSet<SyntaxGraphReader> supportedReaders; 037 // private final HashSet<SyntaxGraphWriter> supportedWriters; 038 039 public DataFormatSpecification() { 040 entries = new TreeMap<String, DataFormatEntry>(); 041 entryPositionCounter = 0; 042 dependencies = new HashSet<Dependency>(); 043 // supportedReaders = new HashSet<SyntaxGraphReader>(); 044 // supportedWriters = new HashSet<SyntaxGraphWriter>(); 045 } 046 047 public DataFormatInstance createDataFormatInstance(SymbolTableHandler symbolTables, String nullValueStrategy, String rootLabel) throws MaltChainedException { 048 return new DataFormatInstance(entries, symbolTables, nullValueStrategy, rootLabel, this); 049 050 } 051 052 public void parseDataFormatXMLfile(String fileName) throws MaltChainedException { 053 URL url = Util.findURL(fileName); 054 if (url == null) { 055 throw new DataFormatException("The data format specifcation file '"+fileName+"'cannot be found. "); 056 } 057 parseDataFormatXMLfile(url); 058 } 059 060 public HashSet<Dependency> getDependencies() { 061 return dependencies; 062 } 063 064 private void parseDataFormatXMLfile(URL url) throws MaltChainedException { 065 if (url == null) { 066 throw new DataFormatException("The data format specifcation file cannot be found. "); 067 } 068 069 if (SystemLogger.logger().isInfoEnabled()) { 070 SystemLogger.logger().debug("Loading data format specification '"+url.toString()+"' ...\n"); 071 } 072 073 try { 074 DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); 075 DocumentBuilder db = dbf.newDocumentBuilder(); 076 077 Element root = db.parse(url.openStream()).getDocumentElement(); 078 if (root.getNodeName().equals("dataformat")) { 079 dataFormatName = root.getAttribute("name"); 080 if (root.getAttribute("datastructure").length() > 0) { 081 dataStructure = DataStructure.valueOf(root.getAttribute("datastructure").toUpperCase()); 082 } else { 083 dataStructure = DataStructure.DEPENDENCY; 084 } 085 } else { 086 throw new DataFormatException("Data format specification file must contain one 'dataformat' element. "); 087 } 088 NodeList cols = root.getElementsByTagName("column"); 089 Element col = null; 090 for (int i = 0, n = cols.getLength(); i < n; i++) { 091 col = (Element)cols.item(i); 092 DataFormatEntry entry = new DataFormatEntry(i, col.getAttribute("name"), col.getAttribute("category"),col.getAttribute("type"), col.getAttribute("default")); 093 entries.put(entry.getDataFormatEntryName(), entry); 094 } 095 NodeList deps = root.getElementsByTagName("dependencies"); 096 if (deps.getLength() > 0) { 097 NodeList dep = ((Element)deps.item(0)).getElementsByTagName("dependency"); 098 for (int i = 0, n = dep.getLength(); i < n; i++) { 099 Element e = (Element)dep.item(i); 100 dependencies.add(new Dependency(e.getAttribute("name"), e.getAttribute("url"), e.getAttribute("map"), e.getAttribute("urlmap"))); 101 } 102 } 103 } catch (java.io.IOException e) { 104 throw new DataFormatException("Cannot find the file "+url.toString()+". ", e); 105 } catch (ParserConfigurationException e) { 106 throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e); 107 } catch (SAXException e) { 108 throw new DataFormatException("Problem parsing the file "+url.toString()+". ", e); 109 } 110 } 111 112 public void addEntry(String dataFormatEntryName, String category, String type, String defaultOutput) { 113 DataFormatEntry entry = new DataFormatEntry(entryPositionCounter++, dataFormatEntryName, category, type, defaultOutput); 114 entries.put(entry.getDataFormatEntryName(), entry); 115 } 116 117 public DataFormatEntry getEntry(String dataFormatEntryName) { 118 return entries.get(dataFormatEntryName); 119 } 120 121 public String getDataFormatName() { 122 return dataFormatName; 123 } 124 125 public DataStructure getDataStructure() { 126 return dataStructure; 127 } 128 129 public String toString() { 130 final StringBuilder sb = new StringBuilder(); 131 sb.append("Data format specification: "); 132 sb.append(dataFormatName); 133 sb.append('\n'); 134 for (DataFormatEntry dfe : entries.values()) { 135 sb.append(dfe); 136 sb.append('\n'); 137 } 138 return sb.toString(); 139 } 140 141 public class Dependency { 142 protected String dependentOn; 143 protected String urlString; 144 protected String map; 145 protected String mapUrl; 146 147 public Dependency(String dependentOn, String urlString, String map, String mapUrl) { 148 setDependentOn(dependentOn); 149 setUrlString(urlString); 150 setMap(map); 151 setMapUrl(mapUrl); 152 } 153 154 public String getDependentOn() { 155 return dependentOn; 156 } 157 protected void setDependentOn(String dependentOn) { 158 this.dependentOn = dependentOn; 159 } 160 161 public String getUrlString() { 162 return urlString; 163 } 164 165 public void setUrlString(String urlString) { 166 this.urlString = urlString; 167 } 168 169 public String getMap() { 170 return map; 171 } 172 protected void setMap(String map) { 173 this.map = map; 174 } 175 176 public String getMapUrl() { 177 return mapUrl; 178 } 179 180 public void setMapUrl(String mapUrl) { 181 this.mapUrl = mapUrl; 182 } 183 } 184 }