001 package org.maltparser.core.io.dataformat; 002 003 import java.util.Iterator; 004 import java.util.Map; 005 import java.util.SortedMap; 006 import java.util.SortedSet; 007 import java.util.TreeMap; 008 import java.util.TreeSet; 009 010 import org.maltparser.core.exception.MaltChainedException; 011 import org.maltparser.core.symbol.SymbolTable; 012 import org.maltparser.core.symbol.SymbolTableHandler; 013 014 /** 015 * 016 * 017 * @author Johan Hall 018 * @since 1.0 019 **/ 020 public class DataFormatInstance implements Iterable<ColumnDescription> { 021 private final SortedSet<ColumnDescription> columnDescriptions; 022 private SortedMap<String,ColumnDescription> headColumnDescriptions; 023 private SortedMap<String,ColumnDescription> dependencyEdgeLabelColumnDescriptions; 024 private SortedMap<String,ColumnDescription> phraseStructureEdgeLabelColumnDescriptions; 025 private SortedMap<String,ColumnDescription> phraseStructureNodeLabelColumnDescriptions; 026 private SortedMap<String,ColumnDescription> secondaryEdgeLabelColumnDescriptions; 027 private SortedMap<String,ColumnDescription> inputColumnDescriptions; 028 private SortedMap<String,ColumnDescription> ignoreColumnDescriptions; 029 030 private SortedSet<ColumnDescription> headColumnDescriptionSet; 031 private SortedSet<ColumnDescription> dependencyEdgeLabelColumnDescriptionSet; 032 private SortedSet<ColumnDescription> phraseStructureEdgeLabelColumnDescriptionSet; 033 private SortedSet<ColumnDescription> phraseStructureNodeLabelColumnDescriptionSet; 034 private SortedSet<ColumnDescription> secondaryEdgeLabelColumnDescriptionSet; 035 private SortedSet<ColumnDescription> inputColumnDescriptionSet; 036 private SortedSet<ColumnDescription> ignoreColumnDescriptionSet; 037 038 private SortedMap<String,SymbolTable> dependencyEdgeLabelSymbolTables; 039 private SortedMap<String,SymbolTable> phraseStructureEdgeLabelSymbolTables; 040 private SortedMap<String,SymbolTable> phraseStructureNodeLabelSymbolTables; 041 private SortedMap<String,SymbolTable> secondaryEdgeLabelSymbolTables; 042 private SortedMap<String,SymbolTable> inputSymbolTables; 043 044 // Internal 045 private SortedMap<String,ColumnDescription> internalColumnDescriptions; 046 private SortedSet<ColumnDescription> internalColumnDescriptionSet; 047 048 private SymbolTableHandler symbolTables; 049 private DataFormatSpecification dataFormarSpec; 050 051 public DataFormatInstance(Map<String, DataFormatEntry> entries, SymbolTableHandler symbolTables, String nullValueStrategy, DataFormatSpecification spec) throws MaltChainedException { 052 this.columnDescriptions = new TreeSet<ColumnDescription>(); 053 this.symbolTables = symbolTables; 054 createColumnDescriptions(entries, nullValueStrategy); 055 setDataFormarSpec(spec); 056 } 057 058 public ColumnDescription addInternalColumnDescription(String name, String category, String type, String defaultOutput, String nullValueStrategy) throws MaltChainedException { 059 if (internalColumnDescriptions == null) { 060 internalColumnDescriptions = new TreeMap<String,ColumnDescription>(); 061 internalColumnDescriptionSet = new TreeSet<ColumnDescription>(); 062 } 063 064 if (!internalColumnDescriptions.containsKey(name)) { 065 ColumnDescription internalColumn = new ColumnDescription(name, category, type, defaultOutput, symbolTables, nullValueStrategy, true); 066 internalColumnDescriptions.put(name, internalColumn); 067 internalColumnDescriptionSet.add(internalColumn); 068 return internalColumn; 069 } else { 070 return internalColumnDescriptions.get(name); 071 } 072 } 073 074 public ColumnDescription addInternalColumnDescription(String name, int category, int type, String defaultOutput, String nullValueStrategy) throws MaltChainedException { 075 if (internalColumnDescriptions == null) { 076 internalColumnDescriptions = new TreeMap<String,ColumnDescription>(); 077 internalColumnDescriptionSet = new TreeSet<ColumnDescription>(); 078 } 079 080 if (!internalColumnDescriptions.containsKey(name)) { 081 ColumnDescription internalColumn = new ColumnDescription(name, category, type, defaultOutput, symbolTables, nullValueStrategy, true); 082 internalColumnDescriptions.put(name, internalColumn); 083 internalColumnDescriptionSet.add(internalColumn); 084 return internalColumn; 085 } else { 086 return internalColumnDescriptions.get(name); 087 } 088 } 089 090 public ColumnDescription addInternalColumnDescription(String name, ColumnDescription column) throws MaltChainedException { 091 return addInternalColumnDescription(name, column.getCategory(), column.getType(), column.getDefaultOutput(), column.getNullValueStrategy()); 092 } 093 094 private void createColumnDescriptions(Map<String, DataFormatEntry> entries, String nullValueStrategy) throws MaltChainedException { 095 for (DataFormatEntry entry : entries.values()) { 096 columnDescriptions.add(new ColumnDescription(entry.getDataFormatEntryName(), entry.getCategory(), entry.getType(), entry.getDefaultOutput(), symbolTables, nullValueStrategy, false)); 097 } 098 } 099 100 public ColumnDescription getColumnDescriptionByName(String name) { 101 for (ColumnDescription column : columnDescriptions) { 102 if (column.getName().equals(name)) { 103 return column; 104 } 105 } 106 if (internalColumnDescriptionSet != null) { 107 for (ColumnDescription internalColumn : internalColumnDescriptionSet) { 108 if (internalColumn.getName().equals(name)) { 109 return internalColumn; 110 } 111 } 112 } 113 return null; 114 } 115 116 // public int getNumberOfColumnDescriptions() { 117 // return columnDescriptions.size(); 118 // } 119 120 public Iterator<ColumnDescription> iterator() { 121 return columnDescriptions.iterator(); 122 } 123 124 public DataFormatSpecification getDataFormarSpec() { 125 return dataFormarSpec; 126 } 127 128 private void setDataFormarSpec(DataFormatSpecification dataFormarSpec) { 129 this.dataFormarSpec = dataFormarSpec; 130 } 131 132 protected void createHeadColumnDescriptions() { 133 headColumnDescriptions = new TreeMap<String,ColumnDescription>(); 134 for (ColumnDescription column : columnDescriptions) { 135 if (column.getCategory() == ColumnDescription.HEAD) { 136 headColumnDescriptions.put(column.getName(), column); 137 } 138 } 139 } 140 141 public ColumnDescription getHeadColumnDescription() { 142 if (headColumnDescriptions == null) { 143 createHeadColumnDescriptions(); 144 } 145 return headColumnDescriptions.get(headColumnDescriptions.firstKey()); 146 } 147 148 public SortedMap<String,ColumnDescription> getHeadColumnDescriptions() { 149 if (headColumnDescriptions == null) { 150 createHeadColumnDescriptions(); 151 } 152 return headColumnDescriptions; 153 } 154 155 protected void createDependencyEdgeLabelSymbolTables() { 156 dependencyEdgeLabelSymbolTables = new TreeMap<String,SymbolTable>(); 157 for (ColumnDescription column : columnDescriptions) { 158 if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL) { 159 dependencyEdgeLabelSymbolTables.put(column.getSymbolTable().getName(), column.getSymbolTable()); 160 } 161 } 162 } 163 164 public SortedMap<String,SymbolTable> getDependencyEdgeLabelSymbolTables() { 165 if (dependencyEdgeLabelSymbolTables == null) { 166 createDependencyEdgeLabelSymbolTables(); 167 } 168 return dependencyEdgeLabelSymbolTables; 169 } 170 171 protected void createDependencyEdgeLabelColumnDescriptions() { 172 dependencyEdgeLabelColumnDescriptions = new TreeMap<String,ColumnDescription>(); 173 for (ColumnDescription column : columnDescriptions) { 174 if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL) { 175 dependencyEdgeLabelColumnDescriptions.put(column.getName(), column); 176 } 177 } 178 } 179 180 public SortedMap<String,ColumnDescription> getDependencyEdgeLabelColumnDescriptions() { 181 if (dependencyEdgeLabelColumnDescriptions == null) { 182 createDependencyEdgeLabelColumnDescriptions(); 183 } 184 return dependencyEdgeLabelColumnDescriptions; 185 } 186 187 188 189 protected void createPhraseStructureEdgeLabelSymbolTables() { 190 phraseStructureEdgeLabelSymbolTables = new TreeMap<String, SymbolTable>(); 191 for (ColumnDescription column : columnDescriptions) { 192 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL) { 193 phraseStructureEdgeLabelSymbolTables.put(column.getSymbolTable().getName(), column.getSymbolTable()); 194 } 195 } 196 } 197 198 public SortedMap<String,SymbolTable> getPhraseStructureEdgeLabelSymbolTables() { 199 if (phraseStructureEdgeLabelSymbolTables == null) { 200 createPhraseStructureEdgeLabelSymbolTables(); 201 } 202 return phraseStructureEdgeLabelSymbolTables; 203 } 204 205 protected void createPhraseStructureEdgeLabelColumnDescriptions() { 206 phraseStructureEdgeLabelColumnDescriptions = new TreeMap<String,ColumnDescription>(); 207 for (ColumnDescription column : columnDescriptions) { 208 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL) { 209 phraseStructureEdgeLabelColumnDescriptions.put(column.getName(), column); 210 } 211 } 212 } 213 214 public SortedMap<String,ColumnDescription> getPhraseStructureEdgeLabelColumnDescriptions() { 215 if (phraseStructureEdgeLabelColumnDescriptions == null) { 216 createPhraseStructureEdgeLabelColumnDescriptions(); 217 } 218 return phraseStructureEdgeLabelColumnDescriptions; 219 } 220 221 protected void createPhraseStructureNodeLabelSymbolTables() { 222 phraseStructureNodeLabelSymbolTables = new TreeMap<String,SymbolTable>(); 223 for (ColumnDescription column : columnDescriptions) { 224 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_NODE_LABEL) { 225 phraseStructureNodeLabelSymbolTables.put(column.getSymbolTable().getName(), column.getSymbolTable()); 226 } 227 } 228 } 229 230 public SortedMap<String,SymbolTable> getPhraseStructureNodeLabelSymbolTables() { 231 if (phraseStructureNodeLabelSymbolTables == null) { 232 createPhraseStructureNodeLabelSymbolTables(); 233 } 234 return phraseStructureNodeLabelSymbolTables; 235 } 236 237 protected void createPhraseStructureNodeLabelColumnDescriptions() { 238 phraseStructureNodeLabelColumnDescriptions = new TreeMap<String,ColumnDescription>(); 239 for (ColumnDescription column : columnDescriptions) { 240 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_NODE_LABEL) { 241 phraseStructureNodeLabelColumnDescriptions.put(column.getName(), column); 242 } 243 } 244 } 245 246 public SortedMap<String,ColumnDescription> getPhraseStructureNodeLabelColumnDescriptions() { 247 if (phraseStructureNodeLabelColumnDescriptions == null) { 248 createPhraseStructureNodeLabelColumnDescriptions(); 249 } 250 return phraseStructureNodeLabelColumnDescriptions; 251 } 252 253 protected void createSecondaryEdgeLabelSymbolTables() { 254 secondaryEdgeLabelSymbolTables = new TreeMap<String,SymbolTable>(); 255 for (ColumnDescription column : columnDescriptions) { 256 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL) { 257 secondaryEdgeLabelSymbolTables.put(column.getSymbolTable().getName(), column.getSymbolTable()); 258 } 259 } 260 } 261 262 public SortedMap<String,SymbolTable> getSecondaryEdgeLabelSymbolTables() { 263 if (secondaryEdgeLabelSymbolTables == null) { 264 createSecondaryEdgeLabelSymbolTables(); 265 } 266 return secondaryEdgeLabelSymbolTables; 267 } 268 269 protected void createSecondaryEdgeLabelColumnDescriptions() { 270 secondaryEdgeLabelColumnDescriptions = new TreeMap<String,ColumnDescription>(); 271 for (ColumnDescription column : columnDescriptions) { 272 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL) { 273 secondaryEdgeLabelColumnDescriptions.put(column.getName(), column); 274 } 275 } 276 } 277 278 public SortedMap<String,ColumnDescription> getSecondaryEdgeLabelColumnDescriptions() { 279 if (secondaryEdgeLabelColumnDescriptions == null) { 280 createSecondaryEdgeLabelColumnDescriptions(); 281 } 282 return secondaryEdgeLabelColumnDescriptions; 283 } 284 285 protected void createInputSymbolTables() { 286 inputSymbolTables = new TreeMap<String,SymbolTable>(); 287 for (ColumnDescription column : columnDescriptions) { 288 if (column.getCategory() == ColumnDescription.INPUT) { 289 inputSymbolTables.put(column.getSymbolTable().getName(), column.getSymbolTable()); 290 } 291 } 292 } 293 294 public SortedMap<String,SymbolTable> getInputSymbolTables() { 295 if (inputSymbolTables == null) { 296 createInputSymbolTables(); 297 } 298 return inputSymbolTables; 299 } 300 301 protected void createInputColumnDescriptions() { 302 inputColumnDescriptions = new TreeMap<String,ColumnDescription>(); 303 for (ColumnDescription column : columnDescriptions) { 304 if (column.getCategory() == ColumnDescription.INPUT) { 305 inputColumnDescriptions.put(column.getName(), column); 306 } 307 } 308 } 309 310 public SortedMap<String,ColumnDescription> getInputColumnDescriptions() { 311 if (inputColumnDescriptions == null) { 312 createInputColumnDescriptions(); 313 } 314 return inputColumnDescriptions; 315 } 316 317 protected void createIgnoreColumnDescriptions() { 318 ignoreColumnDescriptions = new TreeMap<String,ColumnDescription>(); 319 for (ColumnDescription column : columnDescriptions) { 320 if (column.getCategory() == ColumnDescription.IGNORE) { 321 // if (column.getType() == ColumnDescription.IGNORE) { 322 ignoreColumnDescriptions.put(column.getName(), column); 323 } 324 } 325 } 326 327 public SortedMap<String,ColumnDescription> getIgnoreColumnDescriptions() { 328 if (ignoreColumnDescriptions == null) { 329 createIgnoreColumnDescriptions(); 330 } 331 return ignoreColumnDescriptions; 332 } 333 334 public SortedSet<ColumnDescription> getHeadColumnDescriptionSet() { 335 if (headColumnDescriptionSet == null) { 336 headColumnDescriptionSet = new TreeSet<ColumnDescription>(); 337 for (ColumnDescription column : columnDescriptions) { 338 if (column.getCategory() == ColumnDescription.HEAD) { 339 headColumnDescriptionSet.add(column); 340 } 341 } 342 } 343 return headColumnDescriptionSet; 344 } 345 346 public SortedSet<ColumnDescription> getDependencyEdgeLabelColumnDescriptionSet() { 347 if (dependencyEdgeLabelColumnDescriptionSet == null) { 348 dependencyEdgeLabelColumnDescriptionSet = new TreeSet<ColumnDescription>(); 349 for (ColumnDescription column : columnDescriptions) { 350 if (column.getCategory() == ColumnDescription.DEPENDENCY_EDGE_LABEL) { 351 dependencyEdgeLabelColumnDescriptionSet.add(column); 352 } 353 } 354 } 355 return dependencyEdgeLabelColumnDescriptionSet; 356 } 357 358 public SortedSet<ColumnDescription> getPhraseStructureEdgeLabelColumnDescriptionSet() { 359 if (phraseStructureEdgeLabelColumnDescriptionSet == null) { 360 phraseStructureEdgeLabelColumnDescriptionSet = new TreeSet<ColumnDescription>(); 361 for (ColumnDescription column : columnDescriptions) { 362 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_EDGE_LABEL) { 363 phraseStructureEdgeLabelColumnDescriptionSet.add(column); 364 } 365 } 366 } 367 return phraseStructureEdgeLabelColumnDescriptionSet; 368 } 369 370 public SortedSet<ColumnDescription> getPhraseStructureNodeLabelColumnDescriptionSet() { 371 if (phraseStructureNodeLabelColumnDescriptionSet == null) { 372 phraseStructureNodeLabelColumnDescriptionSet = new TreeSet<ColumnDescription>(); 373 for (ColumnDescription column : columnDescriptions) { 374 if (column.getCategory() == ColumnDescription.PHRASE_STRUCTURE_NODE_LABEL) { 375 phraseStructureNodeLabelColumnDescriptionSet.add(column); 376 } 377 } 378 } 379 return phraseStructureNodeLabelColumnDescriptionSet; 380 } 381 382 public SortedSet<ColumnDescription> getSecondaryEdgeLabelColumnDescriptionSet() { 383 if (secondaryEdgeLabelColumnDescriptionSet == null) { 384 secondaryEdgeLabelColumnDescriptionSet = new TreeSet<ColumnDescription>(); 385 for (ColumnDescription column : columnDescriptions) { 386 if (column.getCategory() == ColumnDescription.SECONDARY_EDGE_LABEL) { 387 secondaryEdgeLabelColumnDescriptionSet.add(column); 388 } 389 } 390 } 391 return secondaryEdgeLabelColumnDescriptionSet; 392 } 393 394 public SortedSet<ColumnDescription> getInputColumnDescriptionSet() { 395 if (inputColumnDescriptionSet == null) { 396 inputColumnDescriptionSet = new TreeSet<ColumnDescription>(); 397 for (ColumnDescription column : columnDescriptions) { 398 if (column.getCategory() == ColumnDescription.INPUT) { 399 inputColumnDescriptionSet.add(column); 400 } 401 } 402 } 403 return inputColumnDescriptionSet; 404 } 405 406 public SortedSet<ColumnDescription> getIgnoreColumnDescriptionSet() { 407 if (ignoreColumnDescriptionSet == null) { 408 ignoreColumnDescriptionSet = new TreeSet<ColumnDescription>(); 409 for (ColumnDescription column : columnDescriptions) { 410 if (column.getCategory() == ColumnDescription.IGNORE) { 411 ignoreColumnDescriptionSet.add(column); 412 } 413 } 414 } 415 return ignoreColumnDescriptionSet; 416 } 417 418 public SymbolTableHandler getSymbolTables() { 419 return symbolTables; 420 } 421 422 public String toString() { 423 final StringBuilder sb = new StringBuilder(); 424 for (ColumnDescription column : columnDescriptions) { 425 sb.append(column); 426 sb.append('\n'); 427 } 428 return sb.toString(); 429 } 430 }