001package org.maltparser.core.syntaxgraph.reader; 002 003import java.text.SimpleDateFormat; 004import java.util.Date; 005import java.util.LinkedHashMap; 006import java.util.SortedMap; 007import java.util.TreeMap; 008 009import org.maltparser.core.helper.Util; 010import org.maltparser.core.symbol.SymbolTable; 011/** 012* 013* 014* @author Johan Hall 015*/ 016public class TigerXMLHeader { 017 public enum Domain { 018 T, // feature for terminal nodes 019 NT, // feature for nonterminal nodes 020 FREC, //feature for both 021 EL, // edge label (same as "edgelabel" in TigerXML schema) 022 SEL // secondary edge Label (same as "secedgelabel" in TigerXML schema) 023 }; 024 025 private String corpusID; 026 private String corpusVersion; 027 private String external; 028 private String metaName; 029 private String metaAuthor; 030 private String metaDescription; 031 private String metaInDate; 032 private String metaFormat; 033 private String metaHistory; 034// private SymbolTableHandler symbolTableHandler; 035 private FeatureEdgeLabel edgeLabels; 036 private FeatureEdgeLabel secEdgeLabels; 037 private LinkedHashMap<String,FeatureEdgeLabel> features; 038 039 public TigerXMLHeader() { 040// setSymbolTableHandler(symbolTableHandler); 041 features = new LinkedHashMap<String,FeatureEdgeLabel>(); 042 } 043 044 public boolean isTigerXMLWritable() { 045 return true; 046 //return features.size() > 0; 047 } 048 049 public void addFeature(String featureName, String domainName) { 050 if (!features.containsKey(featureName)) { 051 features.put(featureName, new FeatureEdgeLabel(featureName, domainName)); 052 } 053 } 054 055 public void addFeatureValue(String featureName, String name) { 056 addFeatureValue(featureName, name, "\t"); 057 } 058 059 public void addFeatureValue(String featureName, String name, String desc) { 060 if (features.containsKey(featureName)) { 061 if (desc == null || desc.length() == 0) { 062 features.get(featureName).addValue(name, "\t"); 063 } else { 064 features.get(featureName).addValue(name, desc); 065 } 066 } 067 } 068 069 public void addEdgeLabelValue(String name) { 070 addEdgeLabelValue(name, "\t"); 071 } 072 073 public void addEdgeLabelValue(String name, String desc) { 074 if (edgeLabels == null) { 075 edgeLabels = new FeatureEdgeLabel("edgelabel", Domain.EL); 076 } 077 if (desc == null || desc.length() == 0) { 078 edgeLabels.addValue(name, "\t"); 079 } else { 080 edgeLabels.addValue(name, desc); 081 } 082 } 083 084 public void addSecEdgeLabelValue(String name) { 085 addSecEdgeLabelValue(name, "\t"); 086 } 087 088 public void addSecEdgeLabelValue(String name, String desc) { 089 if (secEdgeLabels == null) { 090 secEdgeLabels = new FeatureEdgeLabel("secedgelabel", Domain.SEL); 091 } 092 if (desc == null || desc.length() == 0) { 093 secEdgeLabels.addValue(name, "\t"); 094 } else { 095 secEdgeLabels.addValue(name, desc); 096 } 097 } 098 099 public String getCorpusID() { 100 return corpusID; 101 } 102 103 public void setCorpusID(String corpusID) { 104 this.corpusID = corpusID; 105 } 106 107 public String getCorpusVersion() { 108 return corpusVersion; 109 } 110 111 public void setCorpusVersion(String corpusVersion) { 112 this.corpusVersion = corpusVersion; 113 } 114 115 public void setExternal(String external) { 116 this.external = external; 117 } 118 119 public String getExternal() { 120 return external; 121 } 122 123 public void setMeta(String metaElement, String value) { 124 if (metaElement.equals("name")) { setMetaName(value); } 125 if (metaElement.equals("author")) { setMetaAuthor(value); } 126 if (metaElement.equals("description")) { setMetaDescription(value); } 127 if (metaElement.equals("date")) { setMetaInDate(value); } 128 if (metaElement.equals("format")) { setMetaFormat(value); } 129 if (metaElement.equals("history")) { setMetaHistory(value); } 130 } 131 132 public String getMetaName() { 133 return metaName; 134 } 135 136 public void setMetaName(String metaName) { 137 this.metaName = metaName; 138 } 139 140 public String getMetaAuthor() { 141 return metaAuthor; 142 } 143 144 public void setMetaAuthor(String metaAuthor) { 145 this.metaAuthor = metaAuthor; 146 } 147 148 public String getMetaDescription() { 149 return metaDescription; 150 } 151 152 public void setMetaDescription(String metaDescription) { 153 this.metaDescription = metaDescription; 154 } 155 156 public String getMetaInDate() { 157 return metaInDate; 158 } 159 160 public String getMetaCurrentDate() { 161 return getMetaCurrentDate("yyyy-MM-dd HH:mm:ss"); 162 } 163 164 public String getMetaCurrentDate(String format) { 165 return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss").format(new Date()); 166 } 167 168 public void setMetaInDate(String metaInDate) { 169 this.metaInDate = metaInDate; 170 } 171 172 public String getMetaFormat() { 173 return metaFormat; 174 } 175 176 public void setMetaFormat(String metaFormat) { 177 this.metaFormat = metaFormat; 178 } 179 180 public String getMetaHistory() { 181 return metaHistory; 182 } 183 184 public void setMetaHistory(String metaHistory) { 185 this.metaHistory = metaHistory; 186 } 187 188// public SymbolTableHandler getSymbolTableHandler() { 189// return symbolTableHandler; 190// } 191// 192// protected void setSymbolTableHandler(SymbolTableHandler symbolTableHandler) { 193// this.symbolTableHandler = symbolTableHandler; 194// } 195 196 public String toTigerXML() { 197 final StringBuilder sb = new StringBuilder(); 198 199 if (getCorpusVersion() == null) { 200 sb.append("<corpus id=\""); 201 sb.append(((getCorpusID() == null)?"GeneratedByMaltParser":getCorpusID())); 202 sb.append("\">\n"); 203 } else { 204 sb.append("<corpus id=\""); 205 sb.append(((getCorpusID() == null)?"GeneratedByMaltParser":getCorpusID())); 206 sb.append("\" version=\""); 207 sb.append(getCorpusVersion()); 208 sb.append("\">\n"); 209 } 210 sb.append(" <head>\n"); 211 sb.append(" <meta>\n"); 212 sb.append(" <name>"); 213 sb.append(((getMetaName() == null)?"GeneratedByMaltParser":Util.xmlEscape(getMetaName()))); 214 sb.append("</name>\n"); 215 sb.append(" <author>MaltParser</author>\n"); 216 sb.append(" <date>"); 217 sb.append(getMetaCurrentDate()); 218 sb.append("</date>\n"); 219 220 sb.append(" <description>"); 221 sb.append(Util.xmlEscape("Unfortunately, you have to add the annotations header data yourself. Maybe in later releases this will be fixed. ")); 222 sb.append("</description>\n"); 223 224// if (getMetaDescription() != null) { 225// sb.append(" <description>"); 226// sb.append(Util.xmlEscape(getMetaDescription())); 227// sb.append("</description>\n"); 228// } 229// if (getMetaFormat() != null) { 230// sb.append(" <format>"); 231// sb.append(Util.xmlEscape(getMetaFormat())); 232// sb.append("</format>\n"); 233// } 234// if (getMetaHistory() != null) { 235// sb.append(" <history>"); 236// sb.append(Util.xmlEscape(getMetaHistory())); 237// sb.append("</history>\n"); 238// } 239 sb.append(" </meta>\n"); 240 sb.append(" <annotation/>\n"); 241// sb.append(" <annotation>\n"); 242// for (String name : features.keySet()) { 243// sb.append(features.get(name).toTigerXML()); 244// } 245// if (edgeLabels != null) { 246// sb.append(edgeLabels.toTigerXML()); 247// } 248// if (secEdgeLabels != null) { 249// sb.append(secEdgeLabels.toTigerXML()); 250// } 251// sb.append(" </annotation>\n"); 252 sb.append(" </head>\n"); 253 sb.append(" <body>\n"); 254 return sb.toString(); 255 } 256 257 public String toString() { 258 return toTigerXML(); 259 } 260 261 protected class FeatureEdgeLabel { 262 private String name; 263 private Domain domain; 264 // values: key mapped to \t (tab) indicates that the description part is missing 265 private SortedMap<String, String> values; 266 private SymbolTable table; 267 268 public FeatureEdgeLabel(String name, String domainName) { 269 setName(name); 270 setDomain(domainName); 271 } 272 273 public FeatureEdgeLabel(String name, Domain domain) { 274 setName(name); 275 setDomain(domain); 276 } 277 278 public String getName() { 279 return name; 280 } 281 282 public void setName(String name) { 283 this.name = name; 284 } 285 286 public void setDomain(String domainName) { 287 domain = Domain.valueOf(domainName); 288 } 289 290 public void setDomain(Domain domain) { 291 this.domain = domain; 292 } 293 294 public String getDomainName() { 295 return domain.toString(); 296 } 297 298 public Domain getDomain() { 299 return domain; 300 } 301 302 public SymbolTable getTable() { 303 return table; 304 } 305 306 public void setTable(SymbolTable table) { 307 this.table = table; 308 } 309 310 public void addValue(String name) { 311 addValue(name, "\t"); 312 } 313 314 public void addValue(String name, String desc) { 315 if (values == null) { 316 values = new TreeMap<String,String>(); 317 } 318 values.put(name, desc); 319 } 320 321 public String toTigerXML() { 322 final StringBuilder sb = new StringBuilder(); 323 if (domain == Domain.T || domain == Domain.FREC || domain == Domain.NT) { 324 sb.append(" <feature domain=\""); 325 sb.append(getDomainName()); 326 sb.append("\" name=\""); 327 sb.append(getName()); 328 sb.append((values == null)?"\" />\n":"\">\n"); 329 } 330 if (domain == Domain.EL) { 331 sb.append((values != null)?" <edgelabel>\n":" <edgelabel />\n"); 332 } 333 if (domain == Domain.SEL) { 334 sb.append((values != null)?" <secedgelabel>\n":" <secedgelabel />\n"); 335 } 336 if (values != null) { 337 for (String name : values.keySet()) { 338 sb.append(" <value name=\""); 339 sb.append(name); 340 if (values.get(name).equals("\t")) { 341 sb.append("\" />\n"); 342 } else { 343 sb.append("\">"); 344 sb.append(Util.xmlEscape(values.get(name))); 345 sb.append("</value>\n"); 346 } 347 } 348 } 349 if (domain == Domain.T || domain == Domain.FREC || domain == Domain.NT) { 350 if (values != null) { 351 sb.append(" </feature>\n"); 352 } 353 } 354 if (domain == Domain.EL && values != null) { 355 sb.append(" </edgelabel>\n"); 356 } 357 if (domain == Domain.SEL && values != null) { 358 sb.append(" </secedgelabel>\n"); 359 } 360 return sb.toString(); 361 } 362 363 public String toString() { 364 return toTigerXML(); 365 } 366 } 367} 368 369 370