001 package org.maltparser.core.config; 002 003 import java.io.BufferedInputStream; 004 import java.io.BufferedOutputStream; 005 import java.io.BufferedReader; 006 import java.io.BufferedWriter; 007 import java.io.File; 008 import java.io.FileInputStream; 009 import java.io.FileNotFoundException; 010 import java.io.FileOutputStream; 011 import java.io.FileReader; 012 import java.io.FileWriter; 013 import java.io.IOException; 014 import java.io.InputStream; 015 import java.io.InputStreamReader; 016 import java.io.OutputStreamWriter; 017 import java.io.UnsupportedEncodingException; 018 import java.net.MalformedURLException; 019 import java.net.URL; 020 import java.util.Date; 021 import java.util.Enumeration; 022 import java.util.HashMap; 023 import java.util.Set; 024 import java.util.SortedSet; 025 import java.util.TreeSet; 026 import java.util.jar.JarEntry; 027 import java.util.jar.JarFile; 028 import java.util.jar.JarInputStream; 029 import java.util.jar.JarOutputStream; 030 031 import org.maltparser.core.config.version.Versioning; 032 import org.maltparser.core.exception.MaltChainedException; 033 import org.maltparser.core.helper.HashSet; 034 import org.maltparser.core.helper.SystemInfo; 035 import org.maltparser.core.helper.SystemLogger; 036 import org.maltparser.core.helper.URLFinder; 037 import org.maltparser.core.io.dataformat.DataFormatInstance; 038 import org.maltparser.core.io.dataformat.DataFormatManager; 039 import org.maltparser.core.io.dataformat.DataFormatSpecification.DataStructure; 040 import org.maltparser.core.io.dataformat.DataFormatSpecification.Dependency; 041 import org.maltparser.core.options.OptionManager; 042 import org.maltparser.core.symbol.SymbolTableHandler; 043 import org.maltparser.core.symbol.trie.TrieSymbolTableHandler; 044 045 046 /** 047 * This class contains methods for handle the configuration directory. 048 * 049 * @author Johan Hall 050 */ 051 public class ConfigurationDir { 052 protected static final int BUFFER = 4096; 053 protected File configDirectory; 054 protected String name; 055 protected String type; 056 protected File workingDirectory; 057 protected URL url = null; 058 protected int containerIndex; 059 protected BufferedWriter infoFile = null; 060 protected String createdByMaltParserVersion; 061 062 private SymbolTableHandler symbolTables; 063 private DataFormatManager dataFormatManager; 064 private HashMap<String,DataFormatInstance> dataFormatInstances; 065 private URL inputFormatURL; 066 private URL outputFormatURL; 067 068 /** 069 * Creates a configuration directory from a mco-file specified by an URL. 070 * 071 * @param url an URL to a mco-file 072 * @throws MaltChainedException 073 */ 074 public ConfigurationDir(URL url) throws MaltChainedException { 075 initWorkingDirectory(); 076 setUrl(url); 077 initNameNTypeFromInfoFile(url); 078 // initData(); 079 } 080 081 /** 082 * Creates a new configuration directory or a configuration directory from a mco-file 083 * 084 * @param name the name of the configuration 085 * @param type the type of configuration 086 * @param containerIndex the container index 087 * @throws MaltChainedException 088 */ 089 public ConfigurationDir(String name, String type, int containerIndex) throws MaltChainedException { 090 setContainerIndex(containerIndex); 091 092 initWorkingDirectory(); 093 if (name != null && name.length() > 0 && type != null && type.length() > 0) { 094 setName(name); 095 setType(type); 096 } else { 097 throw new ConfigurationException("The configuration name is not specified. "); 098 } 099 setConfigDirectory(new File(workingDirectory.getPath()+File.separator+getName())); 100 } 101 102 public void initDataFormat() throws MaltChainedException { 103 String inputFormatName = OptionManager.instance().getOptionValue(containerIndex, "input", "format").toString().trim(); 104 String outputFormatName = OptionManager.instance().getOptionValue(containerIndex, "output", "format").toString().trim(); 105 final URLFinder f = new URLFinder(); 106 107 if (configDirectory != null && configDirectory.exists()) { 108 if (outputFormatName.length() == 0 || inputFormatName.equals(outputFormatName)) { 109 URL inputFormatURL = f.findURLinJars(inputFormatName); 110 if (inputFormatURL != null) { 111 outputFormatName = inputFormatName = this.copyToConfig(inputFormatURL); 112 } else { 113 outputFormatName = inputFormatName = this.copyToConfig(inputFormatName); 114 } 115 } else { 116 URL inputFormatURL = f.findURLinJars(inputFormatName); 117 if (inputFormatURL != null) { 118 inputFormatName = this.copyToConfig(inputFormatURL); 119 } else { 120 inputFormatName = this.copyToConfig(inputFormatName); 121 } 122 URL outputFormatURL = f.findURLinJars(outputFormatName); 123 if (inputFormatURL != null) { 124 outputFormatName = this.copyToConfig(outputFormatURL); 125 } else { 126 outputFormatName = this.copyToConfig(outputFormatName); 127 } 128 } 129 OptionManager.instance().overloadOptionValue(containerIndex, "input", "format", inputFormatName); 130 } else { 131 if (outputFormatName.length() == 0) { 132 outputFormatName = inputFormatName; 133 } 134 } 135 dataFormatInstances = new HashMap<String, DataFormatInstance>(3); 136 137 inputFormatURL = findURL(inputFormatName); 138 outputFormatURL = findURL(outputFormatName); 139 if (outputFormatURL != null) { 140 try { 141 InputStream is = outputFormatURL.openStream(); 142 } catch (FileNotFoundException e) { 143 outputFormatURL = f.findURL(outputFormatName); 144 } catch (IOException e) { 145 outputFormatURL = f.findURL(outputFormatName); 146 } 147 } else { 148 outputFormatURL = f.findURL(outputFormatName); 149 } 150 dataFormatManager = new DataFormatManager(inputFormatURL, outputFormatURL); 151 symbolTables = new TrieSymbolTableHandler(); 152 153 if (dataFormatManager.getInputDataFormatSpec().getDataStructure() == DataStructure.PHRASE) { 154 String mode = OptionManager.instance().getOptionValue(containerIndex, "config", "flowchart").toString().trim(); 155 if (mode.equals("learn")) { 156 Set<Dependency> deps = dataFormatManager.getInputDataFormatSpec().getDependencies(); 157 for (Dependency dep : deps) { 158 URL depFormatURL = f.findURLinJars(dep.getUrlString()); 159 if (depFormatURL != null) { 160 this.copyToConfig(depFormatURL); 161 } else { 162 this.copyToConfig(dep.getUrlString()); 163 } 164 } 165 } 166 else if (mode.equals("parse")) { 167 Set<Dependency> deps = dataFormatManager.getInputDataFormatSpec().getDependencies(); 168 String nullValueStategy = OptionManager.instance().getOptionValue(containerIndex, "singlemalt", "null_value").toString(); 169 for (Dependency dep : deps) { 170 // URL depFormatURL = f.findURLinJars(dep.getUrlString()); 171 DataFormatInstance dataFormatInstance = dataFormatManager.getDataFormatSpec(dep.getDependentOn()).createDataFormatInstance(symbolTables, nullValueStategy); 172 addDataFormatInstance(dataFormatManager.getDataFormatSpec(dep.getDependentOn()).getDataFormatName(), dataFormatInstance); 173 dataFormatManager.setInputDataFormatSpec(dataFormatManager.getDataFormatSpec(dep.getDependentOn())); 174 // dataFormatManager.setOutputDataFormatSpec(dataFormatManager.getDataFormatSpec(dep.getDependentOn())); 175 } 176 } 177 } 178 } 179 180 private URL findURL(String specModelFileName) throws MaltChainedException { 181 URL url = null; 182 File specFile = this.getFile(specModelFileName); 183 if (specFile.exists()) { 184 try { 185 url = new URL("file:///"+specFile.getAbsolutePath()); 186 } catch (MalformedURLException e) { 187 throw new MaltChainedException("Malformed URL: "+specFile, e); 188 } 189 } else { 190 url = this.getConfigFileEntryURL(specModelFileName); 191 } 192 return url; 193 } 194 195 /** 196 * Creates an output stream writer, where the corresponding file will be included in the configuration directory 197 * 198 * @param fileName a file name 199 * @param charSet a char set 200 * @return an output stream writer for writing to a file within the configuration directory 201 * @throws MaltChainedException 202 */ 203 public OutputStreamWriter getOutputStreamWriter(String fileName, String charSet) throws MaltChainedException { 204 try { 205 return new OutputStreamWriter(new FileOutputStream(configDirectory.getPath()+File.separator+fileName), charSet); 206 } catch (FileNotFoundException e) { 207 throw new ConfigurationException("The file '"+fileName+"' cannot be created. ", e); 208 } catch (UnsupportedEncodingException e) { 209 throw new ConfigurationException("The char set '"+charSet+"' is not supported. ", e); 210 } 211 } 212 213 /** 214 * Creates an output stream writer, where the corresponding file will be included in the 215 * configuration directory. Uses UTF-8 for character encoding. 216 * 217 * @param fileName a file name 218 * @return an output stream writer for writing to a file within the configuration directory 219 * @throws MaltChainedException 220 */ 221 public OutputStreamWriter getOutputStreamWriter(String fileName) throws MaltChainedException { 222 try { 223 return new OutputStreamWriter(new FileOutputStream(configDirectory.getPath()+File.separator+fileName, true), "UTF-8"); 224 } catch (FileNotFoundException e) { 225 throw new ConfigurationException("The file '"+fileName+"' cannot be created. ", e); 226 } catch (UnsupportedEncodingException e) { 227 throw new ConfigurationException("The char set 'UTF-8' is not supported. ", e); 228 } 229 } 230 /** 231 * This method acts the same as getOutputStreamWriter with the difference that the writer append in the file 232 * if it already exists instead of deleting the previous content before starting to write. 233 * 234 * @param fileName a file name 235 * @return an output stream writer for writing to a file within the configuration directory 236 * @throws MaltChainedException 237 */ 238 public OutputStreamWriter getAppendOutputStreamWriter(String fileName) throws MaltChainedException { 239 try { 240 return new OutputStreamWriter(new FileOutputStream(configDirectory.getPath()+File.separator+fileName, true), "UTF-8"); 241 } catch (FileNotFoundException e) { 242 throw new ConfigurationException("The file '"+fileName+"' cannot be created. ", e); 243 } catch (UnsupportedEncodingException e) { 244 throw new ConfigurationException("The char set 'UTF-8' is not supported. ", e); 245 } 246 } 247 248 /** 249 * Creates an input stream reader for reading a file within the configuration directory 250 * 251 * @param fileName a file name 252 * @param charSet a char set 253 * @return an input stream reader for reading a file within the configuration directory 254 * @throws MaltChainedException 255 */ 256 public InputStreamReader getInputStreamReader(String fileName, String charSet) throws MaltChainedException { 257 try { 258 return new InputStreamReader(new FileInputStream(configDirectory.getPath()+File.separator+fileName), charSet); 259 } catch (FileNotFoundException e) { 260 throw new ConfigurationException("The file '"+fileName+"' cannot be found. ", e); 261 } catch (UnsupportedEncodingException e) { 262 throw new ConfigurationException("The char set '"+charSet+"' is not supported. ", e); 263 } 264 } 265 266 /** 267 * Creates an input stream reader for reading a file within the configuration directory. 268 * Uses UTF-8 for character encoding. 269 * 270 * @param fileName a file name 271 * @return an input stream reader for reading a file within the configuration directory 272 * @throws MaltChainedException 273 */ 274 public InputStreamReader getInputStreamReader(String fileName) throws MaltChainedException { 275 return getInputStreamReader(fileName, "UTF-8"); 276 } 277 278 public JarEntry getConfigFileEntry(String fileName) throws MaltChainedException { 279 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco"); 280 try { 281 JarFile mcoFile = new JarFile(mcoPath.getAbsolutePath()); 282 JarEntry entry = mcoFile.getJarEntry(getName()+'/'+fileName); 283 if (entry == null) { 284 entry = mcoFile.getJarEntry(getName()+'\\'+fileName); 285 } 286 return entry; 287 } catch (FileNotFoundException e) { 288 throw new ConfigurationException("The file entry '"+fileName+"' in mco-file '"+mcoPath+"' cannot be found. ", e); 289 } catch (IOException e) { 290 throw new ConfigurationException("The file entry '"+fileName+"' in mco-file '"+mcoPath+"' cannot be found. ", e); 291 } 292 } 293 294 public InputStream getInputStreamFromConfigFileEntry(String fileName) throws MaltChainedException { 295 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco"); 296 try { 297 JarFile mcoFile = new JarFile(mcoPath.getAbsolutePath()); 298 JarEntry entry = mcoFile.getJarEntry(getName()+'/'+fileName); 299 if (entry == null) { 300 entry = mcoFile.getJarEntry(getName()+'\\'+fileName); 301 } 302 if (entry == null) { 303 throw new FileNotFoundException(); 304 } 305 return mcoFile.getInputStream(entry); 306 } catch (FileNotFoundException e) { 307 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be found. ", e); 308 } catch (IOException e) { 309 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be loaded. ", e); 310 } 311 } 312 313 public InputStreamReader getInputStreamReaderFromConfigFileEntry(String fileName, String charSet) throws MaltChainedException { 314 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco"); 315 try { 316 JarFile mcoFile = new JarFile(mcoPath.getAbsolutePath()); 317 JarEntry entry = mcoFile.getJarEntry(getName()+'/'+fileName); 318 if (entry == null) { 319 entry = mcoFile.getJarEntry(getName()+'\\'+fileName); 320 } 321 if (entry == null) { 322 throw new FileNotFoundException(); 323 } 324 return new InputStreamReader(mcoFile.getInputStream(entry), charSet); 325 } catch (FileNotFoundException e) { 326 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be found. ", e); 327 } catch (UnsupportedEncodingException e) { 328 throw new ConfigurationException("The char set '"+charSet+"' is not supported. ", e); 329 } catch (IOException e) { 330 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be loaded. ", e); 331 } 332 } 333 334 public InputStreamReader getInputStreamReaderFromConfigFile(String fileName) throws MaltChainedException { 335 return getInputStreamReaderFromConfigFileEntry(fileName, "UTF-8"); 336 } 337 338 /** 339 * Returns a file handler object of a file within the configuration directory 340 * 341 * @param fileName a file name 342 * @return a file handler object of a file within the configuration directory 343 * @throws MaltChainedException 344 */ 345 public File getFile(String fileName) throws MaltChainedException { 346 return new File(configDirectory.getPath()+File.separator+fileName); 347 } 348 349 public URL getConfigFileEntryURL(String fileName) throws MaltChainedException { 350 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco"); 351 try { 352 if (!mcoPath.exists()) { 353 throw new ConfigurationException("Couldn't find mco-file '" +mcoPath.getAbsolutePath()+ "'"); 354 } 355 // new URL("file", null, mcoPath.getAbsolutePath()); 356 URL url = new URL("jar:"+new URL("file", null, mcoPath.getAbsolutePath())+"!/"+getName()+'/'+fileName + "\n"); 357 try { 358 InputStream is = url.openStream(); 359 is.close(); 360 } catch (IOException e) { 361 url = new URL("jar:"+new URL("file", null, mcoPath.getAbsolutePath())+"!/"+getName()+'\\'+fileName + "\n"); 362 } 363 return url; 364 } catch (MalformedURLException e) { 365 throw new ConfigurationException("Couldn't find the URL '" +"jar:"+mcoPath.getAbsolutePath()+"!/"+getName()+'/'+fileName+ "'", e); 366 } 367 } 368 369 /** 370 * Copies a file into the configuration directory. 371 * 372 * @param source a path to file 373 * @throws MaltChainedException 374 */ 375 public String copyToConfig(File source) throws MaltChainedException { 376 byte[] readBuffer = new byte[BUFFER]; 377 String destination = configDirectory.getPath()+File.separator+source.getName(); 378 try { 379 BufferedInputStream bis = new BufferedInputStream(new FileInputStream(source)); 380 BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destination), BUFFER); 381 382 int n = 0; 383 while ((n = bis.read(readBuffer, 0, BUFFER)) != -1) { 384 bos.write(readBuffer, 0, n); 385 } 386 bos.flush(); 387 bos.close(); 388 bis.close(); 389 } catch (FileNotFoundException e) { 390 throw new ConfigurationException("The source file '"+source+"' cannot be found or the destination file '"+destination+"' cannot be created when coping the file. ", e); 391 } catch (IOException e) { 392 throw new ConfigurationException("The source file '"+source+"' cannot be copied to destination '"+destination+"'. ", e); 393 } 394 return source.getName(); 395 } 396 397 398 public String copyToConfig(String fileUrl) throws MaltChainedException { 399 final URLFinder f = new URLFinder(); 400 URL url = f.findURL(fileUrl); 401 if (url == null) { 402 throw new ConfigurationException("The file or URL '"+fileUrl+"' could not be found. "); 403 } 404 return copyToConfig(url); 405 } 406 407 public String copyToConfig(URL url) throws MaltChainedException { 408 if (url == null) { 409 throw new ConfigurationException("URL could not be found. "); 410 } 411 byte[] readBuffer = new byte[BUFFER]; 412 String destFileName = url.getPath(); 413 int indexSlash = destFileName.lastIndexOf('/'); 414 if (indexSlash == -1) { 415 indexSlash = destFileName.lastIndexOf('\\'); 416 } 417 418 if (indexSlash != -1) { 419 destFileName = destFileName.substring(indexSlash+1); 420 } 421 422 String destination = configDirectory.getPath()+File.separator+destFileName; 423 try { 424 BufferedInputStream bis = new BufferedInputStream(url.openStream()); 425 BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destination), BUFFER); 426 427 int n = 0; 428 while ((n = bis.read(readBuffer, 0, BUFFER)) != -1) { 429 bos.write(readBuffer, 0, n); 430 } 431 bos.flush(); 432 bos.close(); 433 bis.close(); 434 } catch (FileNotFoundException e) { 435 throw new ConfigurationException("The destination file '"+destination+"' cannot be created when coping the file. ", e); 436 } catch (IOException e) { 437 throw new ConfigurationException("The URL '"+url+"' cannot be copied to destination '"+destination+"'. ", e); 438 } 439 return destFileName; 440 } 441 442 443 /** 444 * Removes the configuration directory, if it exists and it contains a .info file. 445 * 446 * @throws MaltChainedException 447 */ 448 public void deleteConfigDirectory() throws MaltChainedException { 449 if (!configDirectory.exists()) { 450 return; 451 } 452 File infoFile = new File(configDirectory.getPath()+File.separator+getName()+"_"+getType()+".info"); 453 if (infoFile.exists()) { 454 deleteConfigDirectory(configDirectory); 455 } else { 456 throw new ConfigurationException("There exists a directory that is not a MaltParser configuration directory. "); 457 } 458 } 459 460 private void deleteConfigDirectory(File directory) throws MaltChainedException { 461 if (directory.exists()) { 462 File[] files = directory.listFiles(); 463 for (int i = 0; i < files.length; i++) { 464 if (files[i].isDirectory()) { 465 deleteConfigDirectory(files[i]); 466 } else { 467 files[i].delete(); 468 } 469 } 470 } else { 471 throw new ConfigurationException("The directory '"+directory.getPath()+ "' cannot be found. "); 472 } 473 directory.delete(); 474 } 475 476 /** 477 * Returns a file handler object for the configuration directory 478 * 479 * @return a file handler object for the configuration directory 480 */ 481 public File getConfigDirectory() { 482 return configDirectory; 483 } 484 485 protected void setConfigDirectory(File dir) { 486 this.configDirectory = dir; 487 } 488 489 /** 490 * Creates the configuration directory 491 * 492 * @throws MaltChainedException 493 */ 494 public void createConfigDirectory() throws MaltChainedException { 495 checkConfigDirectory(); 496 configDirectory.mkdir(); 497 createInfoFile(); 498 } 499 500 protected void checkConfigDirectory() throws MaltChainedException { 501 if (configDirectory.exists() && !configDirectory.isDirectory()) { 502 throw new ConfigurationException("The configuration directory name already exists and is not a directory. "); 503 } 504 505 if (configDirectory.exists()) { 506 deleteConfigDirectory(); 507 } 508 } 509 510 protected void createInfoFile() throws MaltChainedException { 511 infoFile = new BufferedWriter(getOutputStreamWriter(getName()+"_"+getType()+".info")); 512 try { 513 infoFile.write("CONFIGURATION\n"); 514 infoFile.write("Configuration name: "+getName()+"\n"); 515 infoFile.write("Configuration type: "+getType()+"\n"); 516 infoFile.write("Created: "+new Date(System.currentTimeMillis())+"\n"); 517 518 infoFile.write("\nSYSTEM\n"); 519 infoFile.write("Operating system architecture: "+System.getProperty("os.arch")+"\n"); 520 infoFile.write("Operating system name: "+System.getProperty("os.name")+"\n"); 521 infoFile.write("JRE vendor name: "+System.getProperty("java.vendor")+"\n"); 522 infoFile.write("JRE version number: "+System.getProperty("java.version")+"\n"); 523 524 infoFile.write("\nMALTPARSER\n"); 525 infoFile.write("Version: "+SystemInfo.getVersion()+"\n"); 526 infoFile.write("Build date: "+SystemInfo.getBuildDate()+"\n"); 527 Set<String> excludeGroups = new HashSet<String>(); 528 excludeGroups.add("system"); 529 infoFile.write("\nSETTINGS\n"); 530 infoFile.write(OptionManager.instance().toStringPrettyValues(containerIndex, excludeGroups)); 531 infoFile.flush(); 532 } catch (IOException e) { 533 throw new ConfigurationException("Could not create the maltparser info file. "); 534 } 535 } 536 537 /** 538 * Returns a writer to the configuration information file 539 * 540 * @return a writer to the configuration information file 541 * @throws MaltChainedException 542 */ 543 public BufferedWriter getInfoFileWriter() throws MaltChainedException { 544 return infoFile; 545 } 546 547 /** 548 * Creates the malt configuration file (.mco). This file is compressed. 549 * 550 * @throws MaltChainedException 551 */ 552 public void createConfigFile() throws MaltChainedException { 553 try { 554 JarOutputStream jos = new JarOutputStream(new FileOutputStream(workingDirectory.getPath()+File.separator+getName()+".mco")); 555 // configLogger.info("Creates configuration file '"+workingDirectory.getPath()+File.separator+getName()+".mco' ...\n"); 556 createConfigFile(configDirectory.getPath(), jos); 557 jos.close(); 558 } catch (FileNotFoundException e) { 559 throw new ConfigurationException("The maltparser configurtation file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be found. ", e); 560 } catch (IOException e) { 561 throw new ConfigurationException("The maltparser configurtation file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be created. ", e); 562 } 563 } 564 565 private void createConfigFile(String directory, JarOutputStream jos) throws MaltChainedException { 566 byte[] readBuffer = new byte[BUFFER]; 567 try { 568 File zipDir = new File(directory); 569 String[] dirList = zipDir.list(); 570 571 int bytesIn = 0; 572 573 for (int i = 0; i < dirList.length; i++) { 574 File f = new File(zipDir, dirList[i]); 575 if (f.isDirectory()) { 576 String filePath = f.getPath(); 577 createConfigFile(filePath, jos); 578 continue; 579 } 580 581 FileInputStream fis = new FileInputStream(f); 582 583 String entryPath = f.getPath().substring(workingDirectory.getPath().length()+1); 584 entryPath = entryPath.replace('\\', '/'); 585 JarEntry entry = new JarEntry(entryPath); 586 jos.putNextEntry(entry); 587 588 while ((bytesIn = fis.read(readBuffer)) != -1) { 589 jos.write(readBuffer, 0, bytesIn); 590 } 591 592 fis.close(); 593 } 594 } catch (FileNotFoundException e) { 595 throw new ConfigurationException("The directory '"+directory+"' cannot be found. ", e); 596 } catch (IOException e) { 597 throw new ConfigurationException("The directory '"+directory+"' cannot be compressed into a mco file. ", e); 598 } 599 } 600 601 602 public void copyConfigFile(File in, File out, Versioning versioning) throws MaltChainedException { 603 try { 604 JarFile jar = new JarFile(in); 605 JarOutputStream tempJar = new JarOutputStream(new FileOutputStream(out)); 606 byte[] buffer = new byte[BUFFER]; 607 int bytesRead; 608 final StringBuilder sb = new StringBuilder(); 609 final URLFinder f = new URLFinder(); 610 611 for (Enumeration<JarEntry> entries = jar.entries(); entries.hasMoreElements(); ) { 612 JarEntry inEntry = (JarEntry) entries.nextElement(); 613 InputStream entryStream = jar.getInputStream(inEntry); 614 JarEntry outEntry = versioning.getJarEntry(inEntry); 615 616 if (!versioning.hasChanges(inEntry, outEntry)) { 617 tempJar.putNextEntry(outEntry); 618 while ((bytesRead = entryStream.read(buffer)) != -1) { 619 tempJar.write(buffer, 0, bytesRead); 620 } 621 } else { 622 tempJar.putNextEntry(outEntry); 623 BufferedReader br = new BufferedReader(new InputStreamReader(entryStream)); 624 String line = null; 625 sb.setLength(0); 626 while ((line = br.readLine()) != null) { 627 sb.append(line); 628 sb.append('\n'); 629 } 630 String outString = versioning.modifyJarEntry(inEntry, outEntry, sb); 631 tempJar.write(outString.getBytes()); 632 } 633 } 634 if (versioning.getFeatureModelXML() != null && versioning.getFeatureModelXML().startsWith("/appdata")) { 635 int index = versioning.getFeatureModelXML().lastIndexOf('/'); 636 BufferedInputStream bis = new BufferedInputStream(f.findURLinJars(versioning.getFeatureModelXML()).openStream()); 637 tempJar.putNextEntry(new JarEntry(versioning.getNewConfigName()+"/" +versioning.getFeatureModelXML().substring(index+1))); 638 int n = 0; 639 while ((n = bis.read(buffer, 0, BUFFER)) != -1) { 640 tempJar.write(buffer, 0, n); 641 } 642 bis.close(); 643 } 644 if (versioning.getInputFormatXML() != null && versioning.getInputFormatXML().startsWith("/appdata")) { 645 int index = versioning.getInputFormatXML().lastIndexOf('/'); 646 BufferedInputStream bis = new BufferedInputStream(f.findURLinJars(versioning.getInputFormatXML()).openStream()); 647 tempJar.putNextEntry(new JarEntry(versioning.getNewConfigName()+"/" +versioning.getInputFormatXML().substring(index+1))); 648 int n = 0; 649 while ((n = bis.read(buffer, 0, BUFFER)) != -1) { 650 tempJar.write(buffer, 0, n); 651 } 652 bis.close(); 653 } 654 tempJar.flush(); 655 tempJar.close(); 656 jar.close(); 657 } catch (IOException e) { 658 throw new ConfigurationException("", e); 659 } 660 } 661 662 protected void initNameNTypeFromInfoFile(URL url) throws MaltChainedException { 663 if (url == null) { 664 throw new ConfigurationException("The URL cannot be found. "); 665 } 666 try { 667 JarEntry je; 668 JarInputStream jis = new JarInputStream(url.openConnection().getInputStream()); 669 while ((je = jis.getNextJarEntry()) != null) { 670 String entryName = je.getName(); 671 if (entryName.endsWith(".info")) { 672 int indexUnderScore = entryName.lastIndexOf('_'); 673 int indexSeparator = entryName.lastIndexOf(File.separator); 674 if (indexSeparator == -1) { 675 indexSeparator = entryName.lastIndexOf('/'); 676 } 677 if (indexSeparator == -1) { 678 indexSeparator = entryName.lastIndexOf('\\'); 679 } 680 int indexDot = entryName.lastIndexOf('.'); 681 if (indexUnderScore == -1 || indexDot == -1) { 682 throw new ConfigurationException("Could not find the configuration name and type from the URL '"+url.toString()+"'. "); 683 } 684 setName(entryName.substring(indexSeparator+1, indexUnderScore)); 685 setType(entryName.substring(indexUnderScore+1, indexDot)); 686 setConfigDirectory(new File(workingDirectory.getPath()+File.separator+getName())); 687 jis.close(); 688 return; 689 } 690 } 691 692 } catch (IOException e) { 693 throw new ConfigurationException("Could not find the configuration name and type from the URL '"+url.toString()+"'. ", e); 694 } 695 } 696 697 /** 698 * Prints the content of the configuration information file to the system logger 699 * 700 * @throws MaltChainedException 701 */ 702 public void echoInfoFile() throws MaltChainedException { 703 checkConfigDirectory(); 704 JarInputStream jis; 705 try { 706 if (url == null) { 707 jis = new JarInputStream(new FileInputStream(workingDirectory.getPath()+File.separator+getName()+".mco")); 708 } else { 709 jis = new JarInputStream(url.openConnection().getInputStream()); 710 } 711 JarEntry je; 712 713 while ((je = jis.getNextJarEntry()) != null) { 714 String entryName = je.getName(); 715 716 if (entryName.endsWith(getName()+"_"+getType()+".info")) { 717 int c; 718 while ((c = jis.read()) != -1) { 719 SystemLogger.logger().info((char)c); 720 } 721 } 722 } 723 jis.close(); 724 } catch (FileNotFoundException e) { 725 throw new ConfigurationException("Could not print configuration information file. The configuration file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be found. ", e); 726 } catch (IOException e) { 727 throw new ConfigurationException("Could not print configuration information file. ", e); 728 } 729 730 } 731 732 /** 733 * Unpacks the malt configuration file (.mco). 734 * 735 * @throws MaltChainedException 736 */ 737 public void unpackConfigFile() throws MaltChainedException { 738 checkConfigDirectory(); 739 JarInputStream jis; 740 try { 741 if (url == null) { 742 jis = new JarInputStream(new FileInputStream(workingDirectory.getPath()+File.separator+getName()+".mco")); 743 } else { 744 jis = new JarInputStream(url.openConnection().getInputStream()); 745 } 746 unpackConfigFile(jis); 747 jis.close(); 748 } catch (FileNotFoundException e) { 749 throw new ConfigurationException("Could not unpack configuration. The configuration file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be found. ", e); 750 } catch (IOException e) { 751 if (configDirectory.exists()) { 752 deleteConfigDirectory(); 753 } 754 throw new ConfigurationException("Could not unpack configuration. ", e); 755 } 756 initCreatedByMaltParserVersionFromInfoFile(); 757 } 758 759 protected void unpackConfigFile(JarInputStream jis) throws MaltChainedException { 760 try { 761 JarEntry je; 762 byte[] readBuffer = new byte[BUFFER]; 763 SortedSet<String> directoryCache = new TreeSet<String>(); 764 while ((je = jis.getNextJarEntry()) != null) { 765 String entryName = je.getName(); 766 767 if (entryName.startsWith("/")) { 768 entryName = entryName.substring(1); 769 } 770 if (entryName.endsWith(File.separator) || entryName.endsWith("/")) { 771 return; 772 } 773 int index = -1; 774 if (File.separator.equals("\\")) { 775 entryName = entryName.replace('/', '\\'); 776 index = entryName.lastIndexOf("\\"); 777 } else if (File.separator.equals("/")) { 778 entryName = entryName.replace('\\', '/'); 779 index = entryName.lastIndexOf("/"); 780 } 781 if (index > 0) { 782 String dirName = entryName.substring(0, index); 783 if (!directoryCache.contains(dirName)) { 784 File directory = new File(workingDirectory.getPath()+File.separator+dirName); 785 if (!(directory.exists() && directory.isDirectory())) { 786 if (!directory.mkdirs()) { 787 throw new ConfigurationException("Unable to make directory '" + dirName +"'. "); 788 } 789 directoryCache.add(dirName); 790 } 791 } 792 } 793 794 if (new File(workingDirectory.getPath()+File.separator+entryName).isDirectory() && new File(workingDirectory.getPath()+File.separator+entryName).exists()) { 795 continue; 796 } 797 BufferedOutputStream bos; 798 try { 799 bos = new BufferedOutputStream(new FileOutputStream(workingDirectory.getPath()+File.separator+entryName), BUFFER); 800 } catch (FileNotFoundException e) { 801 throw new ConfigurationException("Could not unpack configuration. The file '"+workingDirectory.getPath()+File.separator+entryName+"' cannot be unpacked. ", e); 802 } 803 int n = 0; 804 while ((n = jis.read(readBuffer, 0, BUFFER)) != -1) { 805 bos.write(readBuffer, 0, n); 806 } 807 bos.flush(); 808 bos.close(); 809 } 810 } catch (IOException e) { 811 throw new ConfigurationException("Could not unpack configuration. ", e); 812 } 813 } 814 815 /** 816 * Returns the name of the configuration directory 817 * 818 * @return the name of the configuration directory 819 */ 820 public String getName() { 821 return name; 822 } 823 824 protected void setName(String name) { 825 this.name = name; 826 } 827 828 /** 829 * Returns the type of the configuration directory 830 * 831 * @return the type of the configuration directory 832 */ 833 public String getType() { 834 return type; 835 } 836 837 protected void setType(String type) { 838 this.type = type; 839 } 840 841 /** 842 * Returns a file handler object for the working directory 843 * 844 * @return a file handler object for the working directory 845 */ 846 public File getWorkingDirectory() { 847 return workingDirectory; 848 } 849 850 /** 851 * Initialize the working directory 852 * 853 * @throws MaltChainedException 854 */ 855 public void initWorkingDirectory() throws MaltChainedException { 856 try { 857 initWorkingDirectory(OptionManager.instance().getOptionValue(0, "config", "workingdir").toString()); 858 } catch (NullPointerException e) { 859 throw new ConfigurationException("The configuration cannot be found.", e); 860 } 861 } 862 863 /** 864 * Initialize the working directory according to the path. If the path is equals to "user.dir" or current directory, then the current directory 865 * will be the working directory. 866 * 867 * @param pathPrefixString the path to the working directory 868 * @throws MaltChainedException 869 */ 870 public void initWorkingDirectory(String pathPrefixString) throws MaltChainedException { 871 if (pathPrefixString == null || pathPrefixString.equalsIgnoreCase("user.dir") || pathPrefixString.equalsIgnoreCase(".")) { 872 workingDirectory = new File(System.getProperty("user.dir")); 873 } else { 874 workingDirectory = new File(pathPrefixString); 875 } 876 877 if (workingDirectory == null || !workingDirectory.isDirectory()) { 878 new ConfigurationException("The specified working directory '"+pathPrefixString+"' is not a directory. "); 879 } 880 } 881 882 /** 883 * Returns the URL to the malt configuration file (.mco) 884 * 885 * @return the URL to the malt configuration file (.mco) 886 */ 887 public URL getUrl() { 888 return url; 889 } 890 891 protected void setUrl(URL url) { 892 this.url = url; 893 } 894 895 /** 896 * Returns the option container index 897 * 898 * @return the option container index 899 */ 900 public int getContainerIndex() { 901 return containerIndex; 902 } 903 904 /** 905 * Sets the option container index 906 * 907 * @param containerIndex a option container index 908 */ 909 public void setContainerIndex(int containerIndex) { 910 this.containerIndex = containerIndex; 911 } 912 913 /** 914 * Returns the version number of MaltParser which created the malt configuration file (.mco) 915 * 916 * @return the version number of MaltParser which created the malt configuration file (.mco) 917 */ 918 public String getCreatedByMaltParserVersion() { 919 return createdByMaltParserVersion; 920 } 921 922 /** 923 * Sets the version number of MaltParser which created the malt configuration file (.mco) 924 * 925 * @param createdByMaltParserVersion a version number of MaltParser 926 */ 927 public void setCreatedByMaltParserVersion(String createdByMaltParserVersion) { 928 this.createdByMaltParserVersion = createdByMaltParserVersion; 929 } 930 931 public void initCreatedByMaltParserVersionFromInfoFile() throws MaltChainedException { 932 try { 933 BufferedReader br = new BufferedReader(getInputStreamReaderFromConfigFileEntry(getName()+"_"+getType()+".info", "UTF-8")); 934 String line = null; 935 while ((line = br.readLine()) != null) { 936 if (line.startsWith("Version: ")) { 937 setCreatedByMaltParserVersion(line.substring(31)); 938 break; 939 } 940 } 941 br.close(); 942 } catch (FileNotFoundException e) { 943 throw new ConfigurationException("Could not retrieve the version number of the MaltParser configuration.", e); 944 } catch (IOException e) { 945 throw new ConfigurationException("Could not retrieve the version number of the MaltParser configuration.", e); 946 } 947 } 948 949 public void versioning() throws MaltChainedException { 950 initCreatedByMaltParserVersionFromInfoFile(); 951 SystemLogger.logger().info("\nCurrent version : " + SystemInfo.getVersion() + "\n"); 952 SystemLogger.logger().info("Parser model version : " + createdByMaltParserVersion + "\n"); 953 if (SystemInfo.getVersion() == null) { 954 throw new ConfigurationException("Couln't determine the version of MaltParser"); 955 } else if (createdByMaltParserVersion == null) { 956 throw new ConfigurationException("Couln't determine the version of the parser model"); 957 } else if (SystemInfo.getVersion().equals(createdByMaltParserVersion)) { 958 SystemLogger.logger().info("The parser model "+getName()+".mco has already the same version as the current version of MaltParser. \n"); 959 return; 960 } 961 962 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco"); 963 File newMcoPath = new File(workingDirectory.getPath()+File.separator+getName()+"."+SystemInfo.getVersion().trim()+".mco"); 964 Versioning versioning = new Versioning(name, type, mcoPath, createdByMaltParserVersion); 965 if (!versioning.support(createdByMaltParserVersion)) { 966 SystemLogger.logger().warn("The parser model '"+ name+ ".mco' is created by MaltParser "+getCreatedByMaltParserVersion()+", which cannot be converted to a MaltParser "+SystemInfo.getVersion()+" parser model.\n"); 967 SystemLogger.logger().warn("Please retrain the parser model with MaltParser "+SystemInfo.getVersion() +" or download MaltParser "+getCreatedByMaltParserVersion()+" from http://maltparser.org/download.html\n"); 968 return; 969 } 970 SystemLogger.logger().info("Converts the parser model '"+ mcoPath.getName()+ "' into '"+newMcoPath.getName()+"'....\n"); 971 copyConfigFile(mcoPath, newMcoPath, versioning); 972 } 973 974 protected void checkNConvertConfigVersion() throws MaltChainedException { 975 if (createdByMaltParserVersion.startsWith("1.0")) { 976 SystemLogger.logger().info(" Converts the MaltParser configuration "); 977 SystemLogger.logger().info("1.0"); 978 SystemLogger.logger().info(" to "); 979 SystemLogger.logger().info(SystemInfo.getVersion()); 980 SystemLogger.logger().info("\n"); 981 File[] configFiles = configDirectory.listFiles(); 982 for (int i = 0, n = configFiles.length; i < n; i++) { 983 if (configFiles[i].getName().endsWith(".mod")) { 984 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"odm0."+configFiles[i].getName())); 985 } 986 if (configFiles[i].getName().endsWith(getName()+".dsm")) { 987 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"odm0.dsm")); 988 } 989 if (configFiles[i].getName().equals("savedoptions.sop")) { 990 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old")); 991 } 992 if (configFiles[i].getName().equals("symboltables.sym")) { 993 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"symboltables.sym.old")); 994 } 995 } 996 try { 997 BufferedReader br = new BufferedReader(new FileReader(configDirectory.getPath()+File.separator+"savedoptions.sop.old")); 998 BufferedWriter bw = new BufferedWriter(new FileWriter(configDirectory.getPath()+File.separator+"savedoptions.sop")); 999 String line; 1000 while ((line = br.readLine()) != null) { 1001 if (line.startsWith("0\tguide\tprediction_strategy")) { 1002 bw.write("0\tguide\tdecision_settings\tT.TRANS+A.DEPREL\n"); 1003 } else { 1004 bw.write(line); 1005 bw.write('\n'); 1006 } 1007 } 1008 br.close(); 1009 bw.flush(); 1010 bw.close(); 1011 new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old").delete(); 1012 } catch (FileNotFoundException e) { 1013 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e); 1014 } catch (IOException e) { 1015 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e); 1016 } 1017 try { 1018 BufferedReader br = new BufferedReader(new FileReader(configDirectory.getPath()+File.separator+"symboltables.sym.old")); 1019 BufferedWriter bw = new BufferedWriter(new FileWriter(configDirectory.getPath()+File.separator+"symboltables.sym")); 1020 String line; 1021 while ((line = br.readLine()) != null) { 1022 if (line.startsWith("AllCombinedClassTable")) { 1023 bw.write("T.TRANS+A.DEPREL\n"); 1024 } else { 1025 bw.write(line); 1026 bw.write('\n'); 1027 } 1028 } 1029 br.close(); 1030 bw.flush(); 1031 bw.close(); 1032 new File(configDirectory.getPath()+File.separator+"symboltables.sym.old").delete(); 1033 } catch (FileNotFoundException e) { 1034 throw new ConfigurationException("Could convert symboltables.sym version 1.0.4 to version 1.1. ", e); 1035 } catch (IOException e) { 1036 throw new ConfigurationException("Could convert symboltables.sym version 1.0.4 to version 1.1. ", e); 1037 } 1038 } 1039 if (!createdByMaltParserVersion.startsWith("1.3")) { 1040 SystemLogger.logger().info(" Converts the MaltParser configuration "); 1041 SystemLogger.logger().info(createdByMaltParserVersion); 1042 SystemLogger.logger().info(" to "); 1043 SystemLogger.logger().info(SystemInfo.getVersion()); 1044 SystemLogger.logger().info("\n"); 1045 1046 1047 new File(configDirectory.getPath()+File.separator+"savedoptions.sop").renameTo(new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old")); 1048 try { 1049 BufferedReader br = new BufferedReader(new FileReader(configDirectory.getPath()+File.separator+"savedoptions.sop.old")); 1050 BufferedWriter bw = new BufferedWriter(new FileWriter(configDirectory.getPath()+File.separator+"savedoptions.sop")); 1051 String line; 1052 while ((line = br.readLine()) != null) { 1053 int index = line.indexOf('\t'); 1054 int container = 0; 1055 if (index > -1) { 1056 container = Integer.parseInt(line.substring(0,index)); 1057 } 1058 1059 if (line.startsWith(container+"\tnivre\tpost_processing")) { 1060 } else if (line.startsWith(container+"\tmalt0.4\tbehavior")) { 1061 if (line.endsWith("true")) { 1062 SystemLogger.logger().info("MaltParser 1.3 doesn't support MaltParser 0.4 emulation."); 1063 br.close(); 1064 bw.flush(); 1065 bw.close(); 1066 deleteConfigDirectory(); 1067 System.exit(0); 1068 } 1069 } else if (line.startsWith(container+"\tsinglemalt\tparsing_algorithm")) { 1070 bw.write(container); 1071 bw.write("\tsinglemalt\tparsing_algorithm\t"); 1072 if (line.endsWith("NivreStandard")) { 1073 bw.write("class org.maltparser.parser.algorithm.nivre.NivreArcStandardFactory"); 1074 } else if (line.endsWith("NivreEager")) { 1075 bw.write("class org.maltparser.parser.algorithm.nivre.NivreArcEagerFactory"); 1076 } else if (line.endsWith("CovingtonNonProjective")) { 1077 bw.write("class org.maltparser.parser.algorithm.covington.CovingtonNonProjFactory"); 1078 } else if (line.endsWith("CovingtonProjective")) { 1079 bw.write("class org.maltparser.parser.algorithm.covington.CovingtonProjFactory"); 1080 } 1081 bw.write('\n'); 1082 } else { 1083 bw.write(line); 1084 bw.write('\n'); 1085 } 1086 } 1087 br.close(); 1088 bw.flush(); 1089 bw.close(); 1090 new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old").delete(); 1091 } catch (FileNotFoundException e) { 1092 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e); 1093 } catch (IOException e) { 1094 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e); 1095 } 1096 } 1097 } 1098 1099 /** 1100 * Terminates the configuration directory 1101 * 1102 * @throws MaltChainedException 1103 */ 1104 public void terminate() throws MaltChainedException { 1105 if (infoFile != null) { 1106 try { 1107 infoFile.flush(); 1108 infoFile.close(); 1109 } catch (IOException e) { 1110 throw new ConfigurationException("Could not close configuration information file. ", e); 1111 } 1112 } 1113 symbolTables = null; 1114 // configuration = null; 1115 } 1116 1117 /* (non-Javadoc) 1118 * @see java.lang.Object#finalize() 1119 */ 1120 protected void finalize() throws Throwable { 1121 try { 1122 if (infoFile != null) { 1123 infoFile.flush(); 1124 infoFile.close(); 1125 } 1126 } finally { 1127 super.finalize(); 1128 } 1129 } 1130 1131 public SymbolTableHandler getSymbolTables() { 1132 return symbolTables; 1133 } 1134 1135 public void setSymbolTables(SymbolTableHandler symbolTables) { 1136 this.symbolTables = symbolTables; 1137 } 1138 1139 public DataFormatManager getDataFormatManager() { 1140 return dataFormatManager; 1141 } 1142 1143 public void setDataFormatManager(DataFormatManager dataFormatManager) { 1144 this.dataFormatManager = dataFormatManager; 1145 } 1146 1147 public Set<String> getDataFormatInstanceKeys() { 1148 return dataFormatInstances.keySet(); 1149 } 1150 1151 public boolean addDataFormatInstance(String key, DataFormatInstance dataFormatInstance) { 1152 if (!dataFormatInstances.containsKey(key)) { 1153 dataFormatInstances.put(key, dataFormatInstance); 1154 return true; 1155 } 1156 return false; 1157 } 1158 1159 public DataFormatInstance getDataFormatInstance(String key) { 1160 return dataFormatInstances.get(key); 1161 } 1162 1163 public int sizeDataFormatInstance() { 1164 return dataFormatInstances.size(); 1165 } 1166 1167 public DataFormatInstance getInputDataFormatInstance() { 1168 return dataFormatInstances.get(dataFormatManager.getInputDataFormatSpec().getDataFormatName()); 1169 } 1170 1171 public URL getInputFormatURL() { 1172 return inputFormatURL; 1173 } 1174 1175 public URL getOutputFormatURL() { 1176 return outputFormatURL; 1177 } 1178 1179 1180 }