001package org.maltparser.core.io.dataformat; 002 003import java.util.Iterator; 004import java.util.Map; 005import java.util.SortedMap; 006import java.util.SortedSet; 007import java.util.TreeMap; 008import java.util.TreeSet; 009 010import org.maltparser.core.exception.MaltChainedException; 011import org.maltparser.core.symbol.SymbolTable; 012import org.maltparser.core.symbol.SymbolTableHandler; 013 014/** 015 * 016 * 017 * @author Johan Hall 018 * @since 1.0 019**/ 020public class DataFormatInstance implements Iterable<ColumnDescription> { 021 private final SortedSet<ColumnDescription> columnDescriptions; 022 private SortedMap<String,ColumnDescription> headColumnDescriptions; 023 private SortedMap<String,ColumnDescription> dependencyEdgeLabelColumnDescriptions; 024 private SortedMap<String,ColumnDescription> phraseStructureEdgeLabelColumnDescriptions; 025 private SortedMap<String,ColumnDescription> phraseStructureNodeLabelColumnDescriptions; 026 private SortedMap<String,ColumnDescription> secondaryEdgeLabelColumnDescriptions; 027 private SortedMap<String,ColumnDescription> inputColumnDescriptions; 028 private SortedMap<String,ColumnDescription> ignoreColumnDescriptions; 029 030 private SortedSet<ColumnDescription> headColumnDescriptionSet; 031 private SortedSet<ColumnDescription> dependencyEdgeLabelColumnDescriptionSet; 032 private SortedSet<ColumnDescription> phraseStructureEdgeLabelColumnDescriptionSet; 033 private SortedSet<ColumnDescription> phraseStructureNodeLabelColumnDescriptionSet; 034 private SortedSet<ColumnDescription> secondaryEdgeLabelColumnDescriptionSet; 035 private SortedSet<ColumnDescription> inputColumnDescriptionSet; 036 private SortedSet<ColumnDescription> ignoreColumnDescriptionSet; 037 038 private SortedMap<String,SymbolTable> dependencyEdgeLabelSymbolTables; 039 private SortedMap<String,SymbolTable> phraseStructureEdgeLabelSymbolTables; 040 private SortedMap<String,SymbolTable> phraseStructureNodeLabelSymbolTables; 041 private SortedMap<String,SymbolTable> secondaryEdgeLabelSymbolTables; 042 private SortedMap<String,SymbolTable> inputSymbolTables; 043 044 // Internal 045 private SortedMap<String,ColumnDescription> internalColumnDescriptions; 046 private SortedSet<ColumnDescription> internalColumnDescriptionSet; 047 048 private final DataFormatSpecification dataFormarSpec; 049 050 public DataFormatInstance(Map<String, DataFormatEntry> entries, SymbolTableHandler symbolTables, String nullValueStrategy, DataFormatSpecification dataFormarSpec) throws MaltChainedException { 051 this.columnDescriptions = new TreeSet<ColumnDescription>(); 052 this.dataFormarSpec = dataFormarSpec; 053 createColumnDescriptions(symbolTables, entries, nullValueStrategy); 054 055 } 056 057 public ColumnDescription addInternalColumnDescription(SymbolTableHandler symbolTables, String name, String category, String type, String defaultOutput, String nullValueStrategy) throws MaltChainedException { 058 if (internalColumnDescriptions == null) { 059 internalColumnDescriptions = new TreeMap<String,ColumnDescription>(); 060 internalColumnDescriptionSet = new TreeSet<ColumnDescription>(); 061 } 062 063 if (!internalColumnDescriptions.containsKey(name)) { 064 ColumnDescription internalColumn = new ColumnDescription(name, ColumnDescription.getCategory(category), ColumnDescription.getType(type), defaultOutput, nullValueStrategy, true); 065 symbolTables.addSymbolTable(internalColumn.getName(), internalColumn.getCategory(), internalColumn.getType(), internalColumn.getNullValueStrategy()); 066 internalColumnDescriptions.put(name, internalColumn); 067 internalColumnDescriptionSet.add(internalColumn); 068 return internalColumn; 069 } else { 070 return internalColumnDescriptions.get(name); 071 } 072 } 073 074 public ColumnDescription addInternalColumnDescription(SymbolTableHandler symbolTables, String name, int category, int type, String defaultOutput, String nullValueStrategy) throws MaltChainedException { 075 if (internalColumnDescriptions == null) { 076 internalColumnDescriptions = new TreeMap<String,ColumnDescription>(); 077 internalColumnDescriptionSet = new TreeSet<ColumnDescription>(); 078 } 079 080 if (!internalColumnDescriptions.containsKey(name)) { 081 ColumnDescription internalColumn = new ColumnDescription(name, category, type, defaultOutput, nullValueStrategy, true); 082 symbolTables.addSymbolTable(internalColumn.getName(), internalColumn.getCategory(), internalColumn.getType(), internalColumn.getNullValueStrategy()); 083 internalColumnDescriptions.put(name, internalColumn); 084 internalColumnDescriptionSet.add(internalColumn); 085 return internalColumn; 086 } else { 087 return internalColumnDescriptions.get(name); 088 } 089 } 090 091 public ColumnDescription addInternalColumnDescription(SymbolTableHandler symbolTables, String name, ColumnDescription column) throws MaltChainedException { 092 return addInternalColumnDescription(symbolTables, name, column.getCategory(), column.getType(), column.getDefaultOutput(), column.getNullValueStrategy()); 093 } 094 095 private void createColumnDescriptions(SymbolTableHandler symbolTables, Map<String, DataFormatEntry> entries, String nullValueStrategy) throws MaltChainedException { 096 for (DataFormatEntry entry : entries.values()) { 097 ColumnDescription column = new ColumnDescription(entry.getDataFormatEntryName(), ColumnDescription.getCategory(entry.getCategory()), ColumnDescription.getType(entry.getType()), entry.getDefaultOutput(), nullValueStrategy, false); 098 symbolTables.addSymbolTable(column.getName(), column.getCategory(), column.getType(), column.getNullValueStrategy()); 099 columnDescriptions.add(column); 100 101 } 102 } 103 104 public ColumnDescription getColumnDescriptionByName(String name) { 105 for (ColumnDescription column : columnDescriptions) { 106 if (column.getName().equals(name)) { 107 return column; 108 } 109 } 110 if (internalColumnDescriptionSet != null) { 111 for (ColumnDescription internalColumn : internalColumnDescriptionSet) { 112 if (internalColumn.getName().equals(name)) { 113 return internalColumn; 114 } 115 } 116 } 117 return null; 118 } 119 120// public int getNumberOfColumnDescriptions() { 121// return columnDescriptions.size(); 122// } 123 124 public Iterator<ColumnDescription> iterator() { 125 return columnDescriptions.iterator(); 126 } 127 128 public DataFormatSpecification getDataFormarSpec() { 129 return dataFormarSpec; 130 } 131 132 protected void createHeadColumnDescriptions() { 133 headColumnDescriptions = new TreeMap<String,ColumnDescription>(); 134 for (ColumnDescription column : columnDescriptions) { 135 if (column.getCategory() == ColumnDescription.HEAD) { 136 headColumnDescriptions.put(column.getName(), column); 137 } 138 } 139 } 140 141 public ColumnDescription getHeadColumnDescription() { 142 if (headColumnDescriptions == null) { 143 createHeadColumnDescriptions(); 144 } 145 return headColumnDescriptions.get(headColumnDescriptions.firstKey()); 146 } 147 148 public SortedMap<String,ColumnDescription> getHeadColumnDescriptions() { 149 if (headColumnDescriptions == null) { 150 createHeadColumnDescriptions(); 151 } 152 return headColumnDescriptions; 153 } 154 155 protected void createDependencyEdgeLabelSymbolTables(SymbolTableHandler symbolTables) throws MaltChainedException { 156 dependencyEdgeLabelSymbolTables = new TreeMap<String,SymbolTable>(); 157 for (ColumnDescription column : columnDescriptions) { 158 if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL) { 159 dependencyEdgeLabelSymbolTables.put(column.getName(), symbolTables.getSymbolTable(column.getName())); 160 } 161 } 162 } 163 164 public SortedMap<String,SymbolTable> getDependencyEdgeLabelSymbolTables(SymbolTableHandler symbolTables) throws MaltChainedException { 165 if (dependencyEdgeLabelSymbolTables == null) { 166 createDependencyEdgeLabelSymbolTables(symbolTables); 167 } 168 return dependencyEdgeLabelSymbolTables; 169 } 170 171 protected void createDependencyEdgeLabelColumnDescriptions() { 172 dependencyEdgeLabelColumnDescriptions = new TreeMap<String,ColumnDescription>(); 173 for (ColumnDescription column : columnDescriptions) { 174 if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL) { 175 dependencyEdgeLabelColumnDescriptions.put(column.getName(), column); 176 } 177 } 178 } 179 180 public SortedMap<String,ColumnDescription> getDependencyEdgeLabelColumnDescriptions() { 181 if (dependencyEdgeLabelColumnDescriptions == null) { 182 createDependencyEdgeLabelColumnDescriptions(); 183 } 184 return dependencyEdgeLabelColumnDescriptions; 185 } 186 187 188 189 protected void createPhraseStructureEdgeLabelSymbolTables(SymbolTableHandler symbolTables) throws MaltChainedException { 190 phraseStructureEdgeLabelSymbolTables = new TreeMap<String, SymbolTable>(); 191 for (ColumnDescription column : columnDescriptions) { 192 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL) { 193 phraseStructureEdgeLabelSymbolTables.put(column.getName(), symbolTables.getSymbolTable(column.getName())); 194 } 195 } 196 } 197 198 public SortedMap<String,SymbolTable> getPhraseStructureEdgeLabelSymbolTables(SymbolTableHandler symbolTables) throws MaltChainedException { 199 if (phraseStructureEdgeLabelSymbolTables == null) { 200 createPhraseStructureEdgeLabelSymbolTables(symbolTables); 201 } 202 return phraseStructureEdgeLabelSymbolTables; 203 } 204 205 protected void createPhraseStructureEdgeLabelColumnDescriptions() { 206 phraseStructureEdgeLabelColumnDescriptions = new TreeMap<String,ColumnDescription>(); 207 for (ColumnDescription column : columnDescriptions) { 208 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL) { 209 phraseStructureEdgeLabelColumnDescriptions.put(column.getName(), column); 210 } 211 } 212 } 213 214 public SortedMap<String,ColumnDescription> getPhraseStructureEdgeLabelColumnDescriptions() { 215 if (phraseStructureEdgeLabelColumnDescriptions == null) { 216 createPhraseStructureEdgeLabelColumnDescriptions(); 217 } 218 return phraseStructureEdgeLabelColumnDescriptions; 219 } 220 221 protected void createPhraseStructureNodeLabelSymbolTables(SymbolTableHandler symbolTables) throws MaltChainedException { 222 phraseStructureNodeLabelSymbolTables = new TreeMap<String,SymbolTable>(); 223 for (ColumnDescription column : columnDescriptions) { 224 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_NODE_LABEL) { 225 phraseStructureNodeLabelSymbolTables.put(column.getName(), symbolTables.getSymbolTable(column.getName())); 226 } 227 } 228 } 229 230 public SortedMap<String,SymbolTable> getPhraseStructureNodeLabelSymbolTables(SymbolTableHandler symbolTables) throws MaltChainedException { 231 if (phraseStructureNodeLabelSymbolTables == null) { 232 createPhraseStructureNodeLabelSymbolTables(symbolTables); 233 } 234 return phraseStructureNodeLabelSymbolTables; 235 } 236 237 protected void createPhraseStructureNodeLabelColumnDescriptions() { 238 phraseStructureNodeLabelColumnDescriptions = new TreeMap<String,ColumnDescription>(); 239 for (ColumnDescription column : columnDescriptions) { 240 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_NODE_LABEL) { 241 phraseStructureNodeLabelColumnDescriptions.put(column.getName(), column); 242 } 243 } 244 } 245 246 public SortedMap<String,ColumnDescription> getPhraseStructureNodeLabelColumnDescriptions() { 247 if (phraseStructureNodeLabelColumnDescriptions == null) { 248 createPhraseStructureNodeLabelColumnDescriptions(); 249 } 250 return phraseStructureNodeLabelColumnDescriptions; 251 } 252 253 protected void createSecondaryEdgeLabelSymbolTables(SymbolTableHandler symbolTables) throws MaltChainedException { 254 secondaryEdgeLabelSymbolTables = new TreeMap<String,SymbolTable>(); 255 for (ColumnDescription column : columnDescriptions) { 256 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL) { 257 secondaryEdgeLabelSymbolTables.put(column.getName(), symbolTables.getSymbolTable(column.getName())); 258 } 259 } 260 } 261 262 public SortedMap<String,SymbolTable> getSecondaryEdgeLabelSymbolTables(SymbolTableHandler symbolTables) throws MaltChainedException { 263 if (secondaryEdgeLabelSymbolTables == null) { 264 createSecondaryEdgeLabelSymbolTables(symbolTables); 265 } 266 return secondaryEdgeLabelSymbolTables; 267 } 268 269 protected void createSecondaryEdgeLabelColumnDescriptions() { 270 secondaryEdgeLabelColumnDescriptions = new TreeMap<String,ColumnDescription>(); 271 for (ColumnDescription column : columnDescriptions) { 272 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL) { 273 secondaryEdgeLabelColumnDescriptions.put(column.getName(), column); 274 } 275 } 276 } 277 278 public SortedMap<String,ColumnDescription> getSecondaryEdgeLabelColumnDescriptions() { 279 if (secondaryEdgeLabelColumnDescriptions == null) { 280 createSecondaryEdgeLabelColumnDescriptions(); 281 } 282 return secondaryEdgeLabelColumnDescriptions; 283 } 284 285 protected void createInputSymbolTables(SymbolTableHandler symbolTables) throws MaltChainedException { 286 inputSymbolTables = new TreeMap<String,SymbolTable>(); 287 for (ColumnDescription column : columnDescriptions) { 288 if (column.getCategory() == ColumnDescription.INPUT) { 289 inputSymbolTables.put(column.getName(), symbolTables.getSymbolTable(column.getName())); 290 } 291 } 292 } 293 294 public SortedMap<String,SymbolTable> getInputSymbolTables(SymbolTableHandler symbolTables) throws MaltChainedException { 295 if (inputSymbolTables == null) { 296 createInputSymbolTables(symbolTables); 297 } 298 return inputSymbolTables; 299 } 300 301 protected void createInputColumnDescriptions() { 302 inputColumnDescriptions = new TreeMap<String,ColumnDescription>(); 303 for (ColumnDescription column : columnDescriptions) { 304 if (column.getCategory() == ColumnDescription.INPUT) { 305 inputColumnDescriptions.put(column.getName(), column); 306 } 307 } 308 } 309 310 public SortedMap<String,ColumnDescription> getInputColumnDescriptions() { 311 if (inputColumnDescriptions == null) { 312 createInputColumnDescriptions(); 313 } 314 return inputColumnDescriptions; 315 } 316 317 protected void createIgnoreColumnDescriptions() { 318 ignoreColumnDescriptions = new TreeMap<String,ColumnDescription>(); 319 for (ColumnDescription column : columnDescriptions) { 320 if (column.getCategory() == ColumnDescription.IGNORE) { 321// if (column.getType() == ColumnDescription.IGNORE) { 322 ignoreColumnDescriptions.put(column.getName(), column); 323 } 324 } 325 } 326 327 public SortedMap<String,ColumnDescription> getIgnoreColumnDescriptions() { 328 if (ignoreColumnDescriptions == null) { 329 createIgnoreColumnDescriptions(); 330 } 331 return ignoreColumnDescriptions; 332 } 333 334 public SortedSet<ColumnDescription> getHeadColumnDescriptionSet() { 335 if (headColumnDescriptionSet == null) { 336 headColumnDescriptionSet = new TreeSet<ColumnDescription>(); 337 for (ColumnDescription column : columnDescriptions) { 338 if (column.getCategory() == ColumnDescription.HEAD) { 339 headColumnDescriptionSet.add(column); 340 } 341 } 342 } 343 return headColumnDescriptionSet; 344 } 345 346 public SortedSet<ColumnDescription> getDependencyEdgeLabelColumnDescriptionSet() { 347 if (dependencyEdgeLabelColumnDescriptionSet == null) { 348 dependencyEdgeLabelColumnDescriptionSet = new TreeSet<ColumnDescription>(); 349 for (ColumnDescription column : columnDescriptions) { 350 if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL) { 351 dependencyEdgeLabelColumnDescriptionSet.add(column); 352 } 353 } 354 } 355 return dependencyEdgeLabelColumnDescriptionSet; 356 } 357 358 public SortedSet<ColumnDescription> getPhraseStructureEdgeLabelColumnDescriptionSet() { 359 if (phraseStructureEdgeLabelColumnDescriptionSet == null) { 360 phraseStructureEdgeLabelColumnDescriptionSet = new TreeSet<ColumnDescription>(); 361 for (ColumnDescription column : columnDescriptions) { 362 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL) { 363 phraseStructureEdgeLabelColumnDescriptionSet.add(column); 364 } 365 } 366 } 367 return phraseStructureEdgeLabelColumnDescriptionSet; 368 } 369 370 public SortedSet<ColumnDescription> getPhraseStructureNodeLabelColumnDescriptionSet() { 371 if (phraseStructureNodeLabelColumnDescriptionSet == null) { 372 phraseStructureNodeLabelColumnDescriptionSet = new TreeSet<ColumnDescription>(); 373 for (ColumnDescription column : columnDescriptions) { 374 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_NODE_LABEL) { 375 phraseStructureNodeLabelColumnDescriptionSet.add(column); 376 } 377 } 378 } 379 return phraseStructureNodeLabelColumnDescriptionSet; 380 } 381 382 public SortedSet<ColumnDescription> getSecondaryEdgeLabelColumnDescriptionSet() { 383 if (secondaryEdgeLabelColumnDescriptionSet == null) { 384 secondaryEdgeLabelColumnDescriptionSet = new TreeSet<ColumnDescription>(); 385 for (ColumnDescription column : columnDescriptions) { 386 if (column.getCategory() == ColumnDescription.SECONDARY_EDGE_LABEL) { 387 secondaryEdgeLabelColumnDescriptionSet.add(column); 388 } 389 } 390 } 391 return secondaryEdgeLabelColumnDescriptionSet; 392 } 393 394 public SortedSet<ColumnDescription> getInputColumnDescriptionSet() { 395 if (inputColumnDescriptionSet == null) { 396 inputColumnDescriptionSet = new TreeSet<ColumnDescription>(); 397 for (ColumnDescription column : columnDescriptions) { 398 if (column.getCategory() == ColumnDescription.INPUT) { 399 inputColumnDescriptionSet.add(column); 400 } 401 } 402 } 403 return inputColumnDescriptionSet; 404 } 405 406 public SortedSet<ColumnDescription> getIgnoreColumnDescriptionSet() { 407 if (ignoreColumnDescriptionSet == null) { 408 ignoreColumnDescriptionSet = new TreeSet<ColumnDescription>(); 409 for (ColumnDescription column : columnDescriptions) { 410 if (column.getCategory() == ColumnDescription.IGNORE) { 411 ignoreColumnDescriptionSet.add(column); 412 } 413 } 414 } 415 return ignoreColumnDescriptionSet; 416 } 417 418// public SymbolTableHandler getSymbolTables() { 419// return symbolTables; 420// } 421 422 public String toString() { 423 final StringBuilder sb = new StringBuilder(); 424 for (ColumnDescription column : columnDescriptions) { 425 sb.append(column); 426 sb.append('\n'); 427 } 428 return sb.toString(); 429 } 430}