001 package org.maltparser.core.config; 002 003 import java.io.BufferedInputStream; 004 import java.io.BufferedOutputStream; 005 import java.io.BufferedReader; 006 import java.io.BufferedWriter; 007 import java.io.File; 008 import java.io.FileInputStream; 009 import java.io.FileNotFoundException; 010 import java.io.FileOutputStream; 011 import java.io.FileReader; 012 import java.io.FileWriter; 013 import java.io.IOException; 014 import java.io.InputStream; 015 import java.io.InputStreamReader; 016 import java.io.OutputStreamWriter; 017 import java.io.UnsupportedEncodingException; 018 import java.net.MalformedURLException; 019 import java.net.URL; 020 import java.util.Date; 021 import java.util.Enumeration; 022 import java.util.HashMap; 023 import java.util.Set; 024 import java.util.SortedSet; 025 import java.util.TreeSet; 026 import java.util.jar.JarEntry; 027 import java.util.jar.JarFile; 028 import java.util.jar.JarInputStream; 029 import java.util.jar.JarOutputStream; 030 031 import org.maltparser.core.config.version.Versioning; 032 import org.maltparser.core.exception.MaltChainedException; 033 import org.maltparser.core.helper.HashSet; 034 import org.maltparser.core.helper.SystemInfo; 035 import org.maltparser.core.helper.SystemLogger; 036 import org.maltparser.core.helper.URLFinder; 037 import org.maltparser.core.io.dataformat.DataFormatInstance; 038 import org.maltparser.core.io.dataformat.DataFormatManager; 039 import org.maltparser.core.io.dataformat.DataFormatSpecification.DataStructure; 040 import org.maltparser.core.io.dataformat.DataFormatSpecification.Dependency; 041 import org.maltparser.core.options.OptionManager; 042 import org.maltparser.core.symbol.SymbolTableHandler; 043 import org.maltparser.core.symbol.trie.TrieSymbolTableHandler; 044 045 046 /** 047 * This class contains methods for handle the configuration directory. 048 * 049 * @author Johan Hall 050 */ 051 public class ConfigurationDir { 052 protected static final int BUFFER = 4096; 053 protected File configDirectory; 054 protected String name; 055 protected String type; 056 protected File workingDirectory; 057 protected URL url = null; 058 protected int containerIndex; 059 protected BufferedWriter infoFile = null; 060 protected String createdByMaltParserVersion; 061 062 private SymbolTableHandler symbolTables; 063 private DataFormatManager dataFormatManager; 064 private HashMap<String,DataFormatInstance> dataFormatInstances; 065 private URL inputFormatURL; 066 private URL outputFormatURL; 067 068 /** 069 * Creates a configuration directory from a mco-file specified by an URL. 070 * 071 * @param url an URL to a mco-file 072 * @throws MaltChainedException 073 */ 074 public ConfigurationDir(URL url) throws MaltChainedException { 075 initWorkingDirectory(); 076 setUrl(url); 077 initNameNTypeFromInfoFile(url); 078 // initData(); 079 } 080 081 /** 082 * Creates a new configuration directory or a configuration directory from a mco-file 083 * 084 * @param name the name of the configuration 085 * @param type the type of configuration 086 * @param containerIndex the container index 087 * @throws MaltChainedException 088 */ 089 public ConfigurationDir(String name, String type, int containerIndex) throws MaltChainedException { 090 setContainerIndex(containerIndex); 091 092 initWorkingDirectory(); 093 if (name != null && name.length() > 0 && type != null && type.length() > 0) { 094 setName(name); 095 setType(type); 096 } else { 097 throw new ConfigurationException("The configuration name is not specified. "); 098 } 099 setConfigDirectory(new File(workingDirectory.getPath()+File.separator+getName())); 100 } 101 102 public void initDataFormat() throws MaltChainedException { 103 String inputFormatName = OptionManager.instance().getOptionValue(containerIndex, "input", "format").toString().trim(); 104 String outputFormatName = OptionManager.instance().getOptionValue(containerIndex, "output", "format").toString().trim(); 105 final URLFinder f = new URLFinder(); 106 107 if (configDirectory != null && configDirectory.exists()) { 108 if (outputFormatName.length() == 0 || inputFormatName.equals(outputFormatName)) { 109 URL inputFormatURL = f.findURLinJars(inputFormatName); 110 if (inputFormatURL != null) { 111 outputFormatName = inputFormatName = this.copyToConfig(inputFormatURL); 112 } else { 113 outputFormatName = inputFormatName = this.copyToConfig(inputFormatName); 114 } 115 } else { 116 URL inputFormatURL = f.findURLinJars(inputFormatName); 117 if (inputFormatURL != null) { 118 inputFormatName = this.copyToConfig(inputFormatURL); 119 } else { 120 inputFormatName = this.copyToConfig(inputFormatName); 121 } 122 URL outputFormatURL = f.findURLinJars(outputFormatName); 123 if (inputFormatURL != null) { 124 outputFormatName = this.copyToConfig(outputFormatURL); 125 } else { 126 outputFormatName = this.copyToConfig(outputFormatName); 127 } 128 } 129 OptionManager.instance().overloadOptionValue(containerIndex, "input", "format", inputFormatName); 130 } else { 131 if (outputFormatName.length() == 0) { 132 outputFormatName = inputFormatName; 133 } 134 } 135 dataFormatInstances = new HashMap<String, DataFormatInstance>(3); 136 137 inputFormatURL = findURL(inputFormatName); 138 outputFormatURL = findURL(outputFormatName); 139 if (outputFormatURL != null) { 140 try { 141 InputStream is = outputFormatURL.openStream(); 142 } catch (FileNotFoundException e) { 143 outputFormatURL = f.findURL(outputFormatName); 144 } catch (IOException e) { 145 outputFormatURL = f.findURL(outputFormatName); 146 } 147 } else { 148 outputFormatURL = f.findURL(outputFormatName); 149 } 150 dataFormatManager = new DataFormatManager(inputFormatURL, outputFormatURL); 151 152 String mode = OptionManager.instance().getOptionValue(containerIndex, "config", "flowchart").toString().trim(); 153 if (mode.equals("parse")) { 154 symbolTables = new TrieSymbolTableHandler(TrieSymbolTableHandler.ADD_NEW_TO_TMP_STORAGE); 155 // symbolTables = new TrieSymbolTableHandler(TrieSymbolTableHandler.ADD_NEW_TO_TRIE); 156 } else { 157 symbolTables = new TrieSymbolTableHandler(TrieSymbolTableHandler.ADD_NEW_TO_TRIE); 158 } 159 if (dataFormatManager.getInputDataFormatSpec().getDataStructure() == DataStructure.PHRASE) { 160 if (mode.equals("learn")) { 161 Set<Dependency> deps = dataFormatManager.getInputDataFormatSpec().getDependencies(); 162 for (Dependency dep : deps) { 163 URL depFormatURL = f.findURLinJars(dep.getUrlString()); 164 if (depFormatURL != null) { 165 this.copyToConfig(depFormatURL); 166 } else { 167 this.copyToConfig(dep.getUrlString()); 168 } 169 } 170 } 171 else if (mode.equals("parse")) { 172 Set<Dependency> deps = dataFormatManager.getInputDataFormatSpec().getDependencies(); 173 String nullValueStategy = OptionManager.instance().getOptionValue(containerIndex, "singlemalt", "null_value").toString(); 174 for (Dependency dep : deps) { 175 // URL depFormatURL = f.findURLinJars(dep.getUrlString()); 176 DataFormatInstance dataFormatInstance = dataFormatManager.getDataFormatSpec(dep.getDependentOn()).createDataFormatInstance(symbolTables, nullValueStategy); 177 addDataFormatInstance(dataFormatManager.getDataFormatSpec(dep.getDependentOn()).getDataFormatName(), dataFormatInstance); 178 dataFormatManager.setInputDataFormatSpec(dataFormatManager.getDataFormatSpec(dep.getDependentOn())); 179 // dataFormatManager.setOutputDataFormatSpec(dataFormatManager.getDataFormatSpec(dep.getDependentOn())); 180 } 181 } 182 } 183 } 184 185 private URL findURL(String specModelFileName) throws MaltChainedException { 186 URL url = null; 187 File specFile = this.getFile(specModelFileName); 188 if (specFile.exists()) { 189 try { 190 url = new URL("file:///"+specFile.getAbsolutePath()); 191 } catch (MalformedURLException e) { 192 throw new MaltChainedException("Malformed URL: "+specFile, e); 193 } 194 } else { 195 url = this.getConfigFileEntryURL(specModelFileName); 196 } 197 return url; 198 } 199 200 /** 201 * Creates an output stream writer, where the corresponding file will be included in the configuration directory 202 * 203 * @param fileName a file name 204 * @param charSet a char set 205 * @return an output stream writer for writing to a file within the configuration directory 206 * @throws MaltChainedException 207 */ 208 public OutputStreamWriter getOutputStreamWriter(String fileName, String charSet) throws MaltChainedException { 209 try { 210 return new OutputStreamWriter(new FileOutputStream(configDirectory.getPath()+File.separator+fileName), charSet); 211 } catch (FileNotFoundException e) { 212 throw new ConfigurationException("The file '"+fileName+"' cannot be created. ", e); 213 } catch (UnsupportedEncodingException e) { 214 throw new ConfigurationException("The char set '"+charSet+"' is not supported. ", e); 215 } 216 } 217 218 /** 219 * Creates an output stream writer, where the corresponding file will be included in the 220 * configuration directory. Uses UTF-8 for character encoding. 221 * 222 * @param fileName a file name 223 * @return an output stream writer for writing to a file within the configuration directory 224 * @throws MaltChainedException 225 */ 226 public OutputStreamWriter getOutputStreamWriter(String fileName) throws MaltChainedException { 227 try { 228 return new OutputStreamWriter(new FileOutputStream(configDirectory.getPath()+File.separator+fileName, true), "UTF-8"); 229 } catch (FileNotFoundException e) { 230 throw new ConfigurationException("The file '"+fileName+"' cannot be created. ", e); 231 } catch (UnsupportedEncodingException e) { 232 throw new ConfigurationException("The char set 'UTF-8' is not supported. ", e); 233 } 234 } 235 /** 236 * This method acts the same as getOutputStreamWriter with the difference that the writer append in the file 237 * if it already exists instead of deleting the previous content before starting to write. 238 * 239 * @param fileName a file name 240 * @return an output stream writer for writing to a file within the configuration directory 241 * @throws MaltChainedException 242 */ 243 public OutputStreamWriter getAppendOutputStreamWriter(String fileName) throws MaltChainedException { 244 try { 245 return new OutputStreamWriter(new FileOutputStream(configDirectory.getPath()+File.separator+fileName, true), "UTF-8"); 246 } catch (FileNotFoundException e) { 247 throw new ConfigurationException("The file '"+fileName+"' cannot be created. ", e); 248 } catch (UnsupportedEncodingException e) { 249 throw new ConfigurationException("The char set 'UTF-8' is not supported. ", e); 250 } 251 } 252 253 /** 254 * Creates an input stream reader for reading a file within the configuration directory 255 * 256 * @param fileName a file name 257 * @param charSet a char set 258 * @return an input stream reader for reading a file within the configuration directory 259 * @throws MaltChainedException 260 */ 261 public InputStreamReader getInputStreamReader(String fileName, String charSet) throws MaltChainedException { 262 try { 263 return new InputStreamReader(new FileInputStream(configDirectory.getPath()+File.separator+fileName), charSet); 264 } catch (FileNotFoundException e) { 265 throw new ConfigurationException("The file '"+fileName+"' cannot be found. ", e); 266 } catch (UnsupportedEncodingException e) { 267 throw new ConfigurationException("The char set '"+charSet+"' is not supported. ", e); 268 } 269 } 270 271 /** 272 * Creates an input stream reader for reading a file within the configuration directory. 273 * Uses UTF-8 for character encoding. 274 * 275 * @param fileName a file name 276 * @return an input stream reader for reading a file within the configuration directory 277 * @throws MaltChainedException 278 */ 279 public InputStreamReader getInputStreamReader(String fileName) throws MaltChainedException { 280 return getInputStreamReader(fileName, "UTF-8"); 281 } 282 283 public JarEntry getConfigFileEntry(String fileName) throws MaltChainedException { 284 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco"); 285 try { 286 JarFile mcoFile = new JarFile(mcoPath.getAbsolutePath()); 287 JarEntry entry = mcoFile.getJarEntry(getName()+'/'+fileName); 288 if (entry == null) { 289 entry = mcoFile.getJarEntry(getName()+'\\'+fileName); 290 } 291 return entry; 292 } catch (FileNotFoundException e) { 293 throw new ConfigurationException("The file entry '"+fileName+"' in mco-file '"+mcoPath+"' cannot be found. ", e); 294 } catch (IOException e) { 295 throw new ConfigurationException("The file entry '"+fileName+"' in mco-file '"+mcoPath+"' cannot be found. ", e); 296 } 297 } 298 299 public InputStream getInputStreamFromConfigFileEntry(String fileName) throws MaltChainedException { 300 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco"); 301 try { 302 JarFile mcoFile = new JarFile(mcoPath.getAbsolutePath()); 303 JarEntry entry = mcoFile.getJarEntry(getName()+'/'+fileName); 304 if (entry == null) { 305 entry = mcoFile.getJarEntry(getName()+'\\'+fileName); 306 } 307 if (entry == null) { 308 throw new FileNotFoundException(); 309 } 310 return mcoFile.getInputStream(entry); 311 } catch (FileNotFoundException e) { 312 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be found. ", e); 313 } catch (IOException e) { 314 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be loaded. ", e); 315 } 316 } 317 318 public InputStreamReader getInputStreamReaderFromConfigFileEntry(String fileName, String charSet) throws MaltChainedException { 319 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco"); 320 try { 321 JarFile mcoFile = new JarFile(mcoPath.getAbsolutePath()); 322 JarEntry entry = mcoFile.getJarEntry(getName()+'/'+fileName); 323 if (entry == null) { 324 entry = mcoFile.getJarEntry(getName()+'\\'+fileName); 325 } 326 if (entry == null) { 327 throw new FileNotFoundException(); 328 } 329 return new InputStreamReader(mcoFile.getInputStream(entry), charSet); 330 } catch (FileNotFoundException e) { 331 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be found. ", e); 332 } catch (UnsupportedEncodingException e) { 333 throw new ConfigurationException("The char set '"+charSet+"' is not supported. ", e); 334 } catch (IOException e) { 335 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be loaded. ", e); 336 } 337 } 338 339 public InputStreamReader getInputStreamReaderFromConfigFile(String fileName) throws MaltChainedException { 340 return getInputStreamReaderFromConfigFileEntry(fileName, "UTF-8"); 341 } 342 343 /** 344 * Returns a file handler object of a file within the configuration directory 345 * 346 * @param fileName a file name 347 * @return a file handler object of a file within the configuration directory 348 * @throws MaltChainedException 349 */ 350 public File getFile(String fileName) throws MaltChainedException { 351 return new File(configDirectory.getPath()+File.separator+fileName); 352 } 353 354 public URL getConfigFileEntryURL(String fileName) throws MaltChainedException { 355 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco"); 356 try { 357 if (!mcoPath.exists()) { 358 throw new ConfigurationException("Couldn't find mco-file '" +mcoPath.getAbsolutePath()+ "'"); 359 } 360 // new URL("file", null, mcoPath.getAbsolutePath()); 361 URL url = new URL("jar:"+new URL("file", null, mcoPath.getAbsolutePath())+"!/"+getName()+'/'+fileName + "\n"); 362 try { 363 InputStream is = url.openStream(); 364 is.close(); 365 } catch (IOException e) { 366 url = new URL("jar:"+new URL("file", null, mcoPath.getAbsolutePath())+"!/"+getName()+'\\'+fileName + "\n"); 367 } 368 return url; 369 } catch (MalformedURLException e) { 370 throw new ConfigurationException("Couldn't find the URL '" +"jar:"+mcoPath.getAbsolutePath()+"!/"+getName()+'/'+fileName+ "'", e); 371 } 372 } 373 374 /** 375 * Copies a file into the configuration directory. 376 * 377 * @param source a path to file 378 * @throws MaltChainedException 379 */ 380 public String copyToConfig(File source) throws MaltChainedException { 381 byte[] readBuffer = new byte[BUFFER]; 382 String destination = configDirectory.getPath()+File.separator+source.getName(); 383 try { 384 BufferedInputStream bis = new BufferedInputStream(new FileInputStream(source)); 385 BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destination), BUFFER); 386 387 int n = 0; 388 while ((n = bis.read(readBuffer, 0, BUFFER)) != -1) { 389 bos.write(readBuffer, 0, n); 390 } 391 bos.flush(); 392 bos.close(); 393 bis.close(); 394 } catch (FileNotFoundException e) { 395 throw new ConfigurationException("The source file '"+source+"' cannot be found or the destination file '"+destination+"' cannot be created when coping the file. ", e); 396 } catch (IOException e) { 397 throw new ConfigurationException("The source file '"+source+"' cannot be copied to destination '"+destination+"'. ", e); 398 } 399 return source.getName(); 400 } 401 402 403 public String copyToConfig(String fileUrl) throws MaltChainedException { 404 final URLFinder f = new URLFinder(); 405 URL url = f.findURL(fileUrl); 406 if (url == null) { 407 throw new ConfigurationException("The file or URL '"+fileUrl+"' could not be found. "); 408 } 409 return copyToConfig(url); 410 } 411 412 public String copyToConfig(URL url) throws MaltChainedException { 413 if (url == null) { 414 throw new ConfigurationException("URL could not be found. "); 415 } 416 byte[] readBuffer = new byte[BUFFER]; 417 String destFileName = url.getPath(); 418 int indexSlash = destFileName.lastIndexOf('/'); 419 if (indexSlash == -1) { 420 indexSlash = destFileName.lastIndexOf('\\'); 421 } 422 423 if (indexSlash != -1) { 424 destFileName = destFileName.substring(indexSlash+1); 425 } 426 427 String destination = configDirectory.getPath()+File.separator+destFileName; 428 try { 429 BufferedInputStream bis = new BufferedInputStream(url.openStream()); 430 BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destination), BUFFER); 431 432 int n = 0; 433 while ((n = bis.read(readBuffer, 0, BUFFER)) != -1) { 434 bos.write(readBuffer, 0, n); 435 } 436 bos.flush(); 437 bos.close(); 438 bis.close(); 439 } catch (FileNotFoundException e) { 440 throw new ConfigurationException("The destination file '"+destination+"' cannot be created when coping the file. ", e); 441 } catch (IOException e) { 442 throw new ConfigurationException("The URL '"+url+"' cannot be copied to destination '"+destination+"'. ", e); 443 } 444 return destFileName; 445 } 446 447 448 /** 449 * Removes the configuration directory, if it exists and it contains a .info file. 450 * 451 * @throws MaltChainedException 452 */ 453 public void deleteConfigDirectory() throws MaltChainedException { 454 if (!configDirectory.exists()) { 455 return; 456 } 457 File infoFile = new File(configDirectory.getPath()+File.separator+getName()+"_"+getType()+".info"); 458 if (infoFile.exists()) { 459 deleteConfigDirectory(configDirectory); 460 } else { 461 throw new ConfigurationException("There exists a directory that is not a MaltParser configuration directory. "); 462 } 463 } 464 465 private void deleteConfigDirectory(File directory) throws MaltChainedException { 466 if (directory.exists()) { 467 File[] files = directory.listFiles(); 468 for (int i = 0; i < files.length; i++) { 469 if (files[i].isDirectory()) { 470 deleteConfigDirectory(files[i]); 471 } else { 472 files[i].delete(); 473 } 474 } 475 } else { 476 throw new ConfigurationException("The directory '"+directory.getPath()+ "' cannot be found. "); 477 } 478 directory.delete(); 479 } 480 481 /** 482 * Returns a file handler object for the configuration directory 483 * 484 * @return a file handler object for the configuration directory 485 */ 486 public File getConfigDirectory() { 487 return configDirectory; 488 } 489 490 protected void setConfigDirectory(File dir) { 491 this.configDirectory = dir; 492 } 493 494 /** 495 * Creates the configuration directory 496 * 497 * @throws MaltChainedException 498 */ 499 public void createConfigDirectory() throws MaltChainedException { 500 checkConfigDirectory(); 501 configDirectory.mkdir(); 502 createInfoFile(); 503 } 504 505 protected void checkConfigDirectory() throws MaltChainedException { 506 if (configDirectory.exists() && !configDirectory.isDirectory()) { 507 throw new ConfigurationException("The configuration directory name already exists and is not a directory. "); 508 } 509 510 if (configDirectory.exists()) { 511 deleteConfigDirectory(); 512 } 513 } 514 515 protected void createInfoFile() throws MaltChainedException { 516 infoFile = new BufferedWriter(getOutputStreamWriter(getName()+"_"+getType()+".info")); 517 try { 518 infoFile.write("CONFIGURATION\n"); 519 infoFile.write("Configuration name: "+getName()+"\n"); 520 infoFile.write("Configuration type: "+getType()+"\n"); 521 infoFile.write("Created: "+new Date(System.currentTimeMillis())+"\n"); 522 523 infoFile.write("\nSYSTEM\n"); 524 infoFile.write("Operating system architecture: "+System.getProperty("os.arch")+"\n"); 525 infoFile.write("Operating system name: "+System.getProperty("os.name")+"\n"); 526 infoFile.write("JRE vendor name: "+System.getProperty("java.vendor")+"\n"); 527 infoFile.write("JRE version number: "+System.getProperty("java.version")+"\n"); 528 529 infoFile.write("\nMALTPARSER\n"); 530 infoFile.write("Version: "+SystemInfo.getVersion()+"\n"); 531 infoFile.write("Build date: "+SystemInfo.getBuildDate()+"\n"); 532 Set<String> excludeGroups = new HashSet<String>(); 533 excludeGroups.add("system"); 534 infoFile.write("\nSETTINGS\n"); 535 infoFile.write(OptionManager.instance().toStringPrettyValues(containerIndex, excludeGroups)); 536 infoFile.flush(); 537 } catch (IOException e) { 538 throw new ConfigurationException("Could not create the maltparser info file. "); 539 } 540 } 541 542 /** 543 * Returns a writer to the configuration information file 544 * 545 * @return a writer to the configuration information file 546 * @throws MaltChainedException 547 */ 548 public BufferedWriter getInfoFileWriter() throws MaltChainedException { 549 return infoFile; 550 } 551 552 /** 553 * Creates the malt configuration file (.mco). This file is compressed. 554 * 555 * @throws MaltChainedException 556 */ 557 public void createConfigFile() throws MaltChainedException { 558 try { 559 JarOutputStream jos = new JarOutputStream(new FileOutputStream(workingDirectory.getPath()+File.separator+getName()+".mco")); 560 // configLogger.info("Creates configuration file '"+workingDirectory.getPath()+File.separator+getName()+".mco' ...\n"); 561 createConfigFile(configDirectory.getPath(), jos); 562 jos.close(); 563 } catch (FileNotFoundException e) { 564 throw new ConfigurationException("The maltparser configurtation file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be found. ", e); 565 } catch (IOException e) { 566 throw new ConfigurationException("The maltparser configurtation file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be created. ", e); 567 } 568 } 569 570 private void createConfigFile(String directory, JarOutputStream jos) throws MaltChainedException { 571 byte[] readBuffer = new byte[BUFFER]; 572 try { 573 File zipDir = new File(directory); 574 String[] dirList = zipDir.list(); 575 576 int bytesIn = 0; 577 578 for (int i = 0; i < dirList.length; i++) { 579 File f = new File(zipDir, dirList[i]); 580 if (f.isDirectory()) { 581 String filePath = f.getPath(); 582 createConfigFile(filePath, jos); 583 continue; 584 } 585 586 FileInputStream fis = new FileInputStream(f); 587 588 String entryPath = f.getPath().substring(workingDirectory.getPath().length()+1); 589 entryPath = entryPath.replace('\\', '/'); 590 JarEntry entry = new JarEntry(entryPath); 591 jos.putNextEntry(entry); 592 593 while ((bytesIn = fis.read(readBuffer)) != -1) { 594 jos.write(readBuffer, 0, bytesIn); 595 } 596 597 fis.close(); 598 } 599 } catch (FileNotFoundException e) { 600 throw new ConfigurationException("The directory '"+directory+"' cannot be found. ", e); 601 } catch (IOException e) { 602 throw new ConfigurationException("The directory '"+directory+"' cannot be compressed into a mco file. ", e); 603 } 604 } 605 606 607 public void copyConfigFile(File in, File out, Versioning versioning) throws MaltChainedException { 608 try { 609 JarFile jar = new JarFile(in); 610 JarOutputStream tempJar = new JarOutputStream(new FileOutputStream(out)); 611 byte[] buffer = new byte[BUFFER]; 612 int bytesRead; 613 final StringBuilder sb = new StringBuilder(); 614 final URLFinder f = new URLFinder(); 615 616 for (Enumeration<JarEntry> entries = jar.entries(); entries.hasMoreElements(); ) { 617 JarEntry inEntry = (JarEntry) entries.nextElement(); 618 InputStream entryStream = jar.getInputStream(inEntry); 619 JarEntry outEntry = versioning.getJarEntry(inEntry); 620 621 if (!versioning.hasChanges(inEntry, outEntry)) { 622 tempJar.putNextEntry(outEntry); 623 while ((bytesRead = entryStream.read(buffer)) != -1) { 624 tempJar.write(buffer, 0, bytesRead); 625 } 626 } else { 627 tempJar.putNextEntry(outEntry); 628 BufferedReader br = new BufferedReader(new InputStreamReader(entryStream)); 629 String line = null; 630 sb.setLength(0); 631 while ((line = br.readLine()) != null) { 632 sb.append(line); 633 sb.append('\n'); 634 } 635 String outString = versioning.modifyJarEntry(inEntry, outEntry, sb); 636 tempJar.write(outString.getBytes()); 637 } 638 } 639 if (versioning.getFeatureModelXML() != null && versioning.getFeatureModelXML().startsWith("/appdata")) { 640 int index = versioning.getFeatureModelXML().lastIndexOf('/'); 641 BufferedInputStream bis = new BufferedInputStream(f.findURLinJars(versioning.getFeatureModelXML()).openStream()); 642 tempJar.putNextEntry(new JarEntry(versioning.getNewConfigName()+"/" +versioning.getFeatureModelXML().substring(index+1))); 643 int n = 0; 644 while ((n = bis.read(buffer, 0, BUFFER)) != -1) { 645 tempJar.write(buffer, 0, n); 646 } 647 bis.close(); 648 } 649 if (versioning.getInputFormatXML() != null && versioning.getInputFormatXML().startsWith("/appdata")) { 650 int index = versioning.getInputFormatXML().lastIndexOf('/'); 651 BufferedInputStream bis = new BufferedInputStream(f.findURLinJars(versioning.getInputFormatXML()).openStream()); 652 tempJar.putNextEntry(new JarEntry(versioning.getNewConfigName()+"/" +versioning.getInputFormatXML().substring(index+1))); 653 int n = 0; 654 while ((n = bis.read(buffer, 0, BUFFER)) != -1) { 655 tempJar.write(buffer, 0, n); 656 } 657 bis.close(); 658 } 659 tempJar.flush(); 660 tempJar.close(); 661 jar.close(); 662 } catch (IOException e) { 663 throw new ConfigurationException("", e); 664 } 665 } 666 667 protected void initNameNTypeFromInfoFile(URL url) throws MaltChainedException { 668 if (url == null) { 669 throw new ConfigurationException("The URL cannot be found. "); 670 } 671 try { 672 JarEntry je; 673 JarInputStream jis = new JarInputStream(url.openConnection().getInputStream()); 674 while ((je = jis.getNextJarEntry()) != null) { 675 String entryName = je.getName(); 676 if (entryName.endsWith(".info")) { 677 int indexUnderScore = entryName.lastIndexOf('_'); 678 int indexSeparator = entryName.lastIndexOf(File.separator); 679 if (indexSeparator == -1) { 680 indexSeparator = entryName.lastIndexOf('/'); 681 } 682 if (indexSeparator == -1) { 683 indexSeparator = entryName.lastIndexOf('\\'); 684 } 685 int indexDot = entryName.lastIndexOf('.'); 686 if (indexUnderScore == -1 || indexDot == -1) { 687 throw new ConfigurationException("Could not find the configuration name and type from the URL '"+url.toString()+"'. "); 688 } 689 setName(entryName.substring(indexSeparator+1, indexUnderScore)); 690 setType(entryName.substring(indexUnderScore+1, indexDot)); 691 setConfigDirectory(new File(workingDirectory.getPath()+File.separator+getName())); 692 jis.close(); 693 return; 694 } 695 } 696 697 } catch (IOException e) { 698 throw new ConfigurationException("Could not find the configuration name and type from the URL '"+url.toString()+"'. ", e); 699 } 700 } 701 702 /** 703 * Prints the content of the configuration information file to the system logger 704 * 705 * @throws MaltChainedException 706 */ 707 public void echoInfoFile() throws MaltChainedException { 708 checkConfigDirectory(); 709 JarInputStream jis; 710 try { 711 if (url == null) { 712 jis = new JarInputStream(new FileInputStream(workingDirectory.getPath()+File.separator+getName()+".mco")); 713 } else { 714 jis = new JarInputStream(url.openConnection().getInputStream()); 715 } 716 JarEntry je; 717 718 while ((je = jis.getNextJarEntry()) != null) { 719 String entryName = je.getName(); 720 721 if (entryName.endsWith(getName()+"_"+getType()+".info")) { 722 int c; 723 while ((c = jis.read()) != -1) { 724 SystemLogger.logger().info((char)c); 725 } 726 } 727 } 728 jis.close(); 729 } catch (FileNotFoundException e) { 730 throw new ConfigurationException("Could not print configuration information file. The configuration file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be found. ", e); 731 } catch (IOException e) { 732 throw new ConfigurationException("Could not print configuration information file. ", e); 733 } 734 735 } 736 737 /** 738 * Unpacks the malt configuration file (.mco). 739 * 740 * @throws MaltChainedException 741 */ 742 public void unpackConfigFile() throws MaltChainedException { 743 checkConfigDirectory(); 744 JarInputStream jis; 745 try { 746 if (url == null) { 747 jis = new JarInputStream(new FileInputStream(workingDirectory.getPath()+File.separator+getName()+".mco")); 748 } else { 749 jis = new JarInputStream(url.openConnection().getInputStream()); 750 } 751 unpackConfigFile(jis); 752 jis.close(); 753 } catch (FileNotFoundException e) { 754 throw new ConfigurationException("Could not unpack configuration. The configuration file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be found. ", e); 755 } catch (IOException e) { 756 if (configDirectory.exists()) { 757 deleteConfigDirectory(); 758 } 759 throw new ConfigurationException("Could not unpack configuration. ", e); 760 } 761 initCreatedByMaltParserVersionFromInfoFile(); 762 } 763 764 protected void unpackConfigFile(JarInputStream jis) throws MaltChainedException { 765 try { 766 JarEntry je; 767 byte[] readBuffer = new byte[BUFFER]; 768 SortedSet<String> directoryCache = new TreeSet<String>(); 769 while ((je = jis.getNextJarEntry()) != null) { 770 String entryName = je.getName(); 771 772 if (entryName.startsWith("/")) { 773 entryName = entryName.substring(1); 774 } 775 if (entryName.endsWith(File.separator) || entryName.endsWith("/")) { 776 return; 777 } 778 int index = -1; 779 if (File.separator.equals("\\")) { 780 entryName = entryName.replace('/', '\\'); 781 index = entryName.lastIndexOf("\\"); 782 } else if (File.separator.equals("/")) { 783 entryName = entryName.replace('\\', '/'); 784 index = entryName.lastIndexOf("/"); 785 } 786 if (index > 0) { 787 String dirName = entryName.substring(0, index); 788 if (!directoryCache.contains(dirName)) { 789 File directory = new File(workingDirectory.getPath()+File.separator+dirName); 790 if (!(directory.exists() && directory.isDirectory())) { 791 if (!directory.mkdirs()) { 792 throw new ConfigurationException("Unable to make directory '" + dirName +"'. "); 793 } 794 directoryCache.add(dirName); 795 } 796 } 797 } 798 799 if (new File(workingDirectory.getPath()+File.separator+entryName).isDirectory() && new File(workingDirectory.getPath()+File.separator+entryName).exists()) { 800 continue; 801 } 802 BufferedOutputStream bos; 803 try { 804 bos = new BufferedOutputStream(new FileOutputStream(workingDirectory.getPath()+File.separator+entryName), BUFFER); 805 } catch (FileNotFoundException e) { 806 throw new ConfigurationException("Could not unpack configuration. The file '"+workingDirectory.getPath()+File.separator+entryName+"' cannot be unpacked. ", e); 807 } 808 int n = 0; 809 while ((n = jis.read(readBuffer, 0, BUFFER)) != -1) { 810 bos.write(readBuffer, 0, n); 811 } 812 bos.flush(); 813 bos.close(); 814 } 815 } catch (IOException e) { 816 throw new ConfigurationException("Could not unpack configuration. ", e); 817 } 818 } 819 820 /** 821 * Returns the name of the configuration directory 822 * 823 * @return the name of the configuration directory 824 */ 825 public String getName() { 826 return name; 827 } 828 829 protected void setName(String name) { 830 this.name = name; 831 } 832 833 /** 834 * Returns the type of the configuration directory 835 * 836 * @return the type of the configuration directory 837 */ 838 public String getType() { 839 return type; 840 } 841 842 protected void setType(String type) { 843 this.type = type; 844 } 845 846 /** 847 * Returns a file handler object for the working directory 848 * 849 * @return a file handler object for the working directory 850 */ 851 public File getWorkingDirectory() { 852 return workingDirectory; 853 } 854 855 /** 856 * Initialize the working directory 857 * 858 * @throws MaltChainedException 859 */ 860 public void initWorkingDirectory() throws MaltChainedException { 861 try { 862 initWorkingDirectory(OptionManager.instance().getOptionValue(0, "config", "workingdir").toString()); 863 } catch (NullPointerException e) { 864 throw new ConfigurationException("The configuration cannot be found.", e); 865 } 866 } 867 868 /** 869 * Initialize the working directory according to the path. If the path is equals to "user.dir" or current directory, then the current directory 870 * will be the working directory. 871 * 872 * @param pathPrefixString the path to the working directory 873 * @throws MaltChainedException 874 */ 875 public void initWorkingDirectory(String pathPrefixString) throws MaltChainedException { 876 if (pathPrefixString == null || pathPrefixString.equalsIgnoreCase("user.dir") || pathPrefixString.equalsIgnoreCase(".")) { 877 workingDirectory = new File(System.getProperty("user.dir")); 878 } else { 879 workingDirectory = new File(pathPrefixString); 880 } 881 882 if (workingDirectory == null || !workingDirectory.isDirectory()) { 883 new ConfigurationException("The specified working directory '"+pathPrefixString+"' is not a directory. "); 884 } 885 } 886 887 /** 888 * Returns the URL to the malt configuration file (.mco) 889 * 890 * @return the URL to the malt configuration file (.mco) 891 */ 892 public URL getUrl() { 893 return url; 894 } 895 896 protected void setUrl(URL url) { 897 this.url = url; 898 } 899 900 /** 901 * Returns the option container index 902 * 903 * @return the option container index 904 */ 905 public int getContainerIndex() { 906 return containerIndex; 907 } 908 909 /** 910 * Sets the option container index 911 * 912 * @param containerIndex a option container index 913 */ 914 public void setContainerIndex(int containerIndex) { 915 this.containerIndex = containerIndex; 916 } 917 918 /** 919 * Returns the version number of MaltParser which created the malt configuration file (.mco) 920 * 921 * @return the version number of MaltParser which created the malt configuration file (.mco) 922 */ 923 public String getCreatedByMaltParserVersion() { 924 return createdByMaltParserVersion; 925 } 926 927 /** 928 * Sets the version number of MaltParser which created the malt configuration file (.mco) 929 * 930 * @param createdByMaltParserVersion a version number of MaltParser 931 */ 932 public void setCreatedByMaltParserVersion(String createdByMaltParserVersion) { 933 this.createdByMaltParserVersion = createdByMaltParserVersion; 934 } 935 936 public void initCreatedByMaltParserVersionFromInfoFile() throws MaltChainedException { 937 try { 938 BufferedReader br = new BufferedReader(getInputStreamReaderFromConfigFileEntry(getName()+"_"+getType()+".info", "UTF-8")); 939 String line = null; 940 while ((line = br.readLine()) != null) { 941 if (line.startsWith("Version: ")) { 942 setCreatedByMaltParserVersion(line.substring(31)); 943 break; 944 } 945 } 946 br.close(); 947 } catch (FileNotFoundException e) { 948 throw new ConfigurationException("Could not retrieve the version number of the MaltParser configuration.", e); 949 } catch (IOException e) { 950 throw new ConfigurationException("Could not retrieve the version number of the MaltParser configuration.", e); 951 } 952 } 953 954 public void versioning() throws MaltChainedException { 955 initCreatedByMaltParserVersionFromInfoFile(); 956 SystemLogger.logger().info("\nCurrent version : " + SystemInfo.getVersion() + "\n"); 957 SystemLogger.logger().info("Parser model version : " + createdByMaltParserVersion + "\n"); 958 if (SystemInfo.getVersion() == null) { 959 throw new ConfigurationException("Couln't determine the version of MaltParser"); 960 } else if (createdByMaltParserVersion == null) { 961 throw new ConfigurationException("Couln't determine the version of the parser model"); 962 } else if (SystemInfo.getVersion().equals(createdByMaltParserVersion)) { 963 SystemLogger.logger().info("The parser model "+getName()+".mco has already the same version as the current version of MaltParser. \n"); 964 return; 965 } 966 967 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco"); 968 File newMcoPath = new File(workingDirectory.getPath()+File.separator+getName()+"."+SystemInfo.getVersion().trim()+".mco"); 969 Versioning versioning = new Versioning(name, type, mcoPath, createdByMaltParserVersion); 970 if (!versioning.support(createdByMaltParserVersion)) { 971 SystemLogger.logger().warn("The parser model '"+ name+ ".mco' is created by MaltParser "+getCreatedByMaltParserVersion()+", which cannot be converted to a MaltParser "+SystemInfo.getVersion()+" parser model.\n"); 972 SystemLogger.logger().warn("Please retrain the parser model with MaltParser "+SystemInfo.getVersion() +" or download MaltParser "+getCreatedByMaltParserVersion()+" from http://maltparser.org/download.html\n"); 973 return; 974 } 975 SystemLogger.logger().info("Converts the parser model '"+ mcoPath.getName()+ "' into '"+newMcoPath.getName()+"'....\n"); 976 copyConfigFile(mcoPath, newMcoPath, versioning); 977 } 978 979 protected void checkNConvertConfigVersion() throws MaltChainedException { 980 if (createdByMaltParserVersion.startsWith("1.0")) { 981 SystemLogger.logger().info(" Converts the MaltParser configuration "); 982 SystemLogger.logger().info("1.0"); 983 SystemLogger.logger().info(" to "); 984 SystemLogger.logger().info(SystemInfo.getVersion()); 985 SystemLogger.logger().info("\n"); 986 File[] configFiles = configDirectory.listFiles(); 987 for (int i = 0, n = configFiles.length; i < n; i++) { 988 if (configFiles[i].getName().endsWith(".mod")) { 989 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"odm0."+configFiles[i].getName())); 990 } 991 if (configFiles[i].getName().endsWith(getName()+".dsm")) { 992 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"odm0.dsm")); 993 } 994 if (configFiles[i].getName().equals("savedoptions.sop")) { 995 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old")); 996 } 997 if (configFiles[i].getName().equals("symboltables.sym")) { 998 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"symboltables.sym.old")); 999 } 1000 } 1001 try { 1002 BufferedReader br = new BufferedReader(new FileReader(configDirectory.getPath()+File.separator+"savedoptions.sop.old")); 1003 BufferedWriter bw = new BufferedWriter(new FileWriter(configDirectory.getPath()+File.separator+"savedoptions.sop")); 1004 String line; 1005 while ((line = br.readLine()) != null) { 1006 if (line.startsWith("0\tguide\tprediction_strategy")) { 1007 bw.write("0\tguide\tdecision_settings\tT.TRANS+A.DEPREL\n"); 1008 } else { 1009 bw.write(line); 1010 bw.write('\n'); 1011 } 1012 } 1013 br.close(); 1014 bw.flush(); 1015 bw.close(); 1016 new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old").delete(); 1017 } catch (FileNotFoundException e) { 1018 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e); 1019 } catch (IOException e) { 1020 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e); 1021 } 1022 try { 1023 BufferedReader br = new BufferedReader(new FileReader(configDirectory.getPath()+File.separator+"symboltables.sym.old")); 1024 BufferedWriter bw = new BufferedWriter(new FileWriter(configDirectory.getPath()+File.separator+"symboltables.sym")); 1025 String line; 1026 while ((line = br.readLine()) != null) { 1027 if (line.startsWith("AllCombinedClassTable")) { 1028 bw.write("T.TRANS+A.DEPREL\n"); 1029 } else { 1030 bw.write(line); 1031 bw.write('\n'); 1032 } 1033 } 1034 br.close(); 1035 bw.flush(); 1036 bw.close(); 1037 new File(configDirectory.getPath()+File.separator+"symboltables.sym.old").delete(); 1038 } catch (FileNotFoundException e) { 1039 throw new ConfigurationException("Could convert symboltables.sym version 1.0.4 to version 1.1. ", e); 1040 } catch (IOException e) { 1041 throw new ConfigurationException("Could convert symboltables.sym version 1.0.4 to version 1.1. ", e); 1042 } 1043 } 1044 if (!createdByMaltParserVersion.startsWith("1.3")) { 1045 SystemLogger.logger().info(" Converts the MaltParser configuration "); 1046 SystemLogger.logger().info(createdByMaltParserVersion); 1047 SystemLogger.logger().info(" to "); 1048 SystemLogger.logger().info(SystemInfo.getVersion()); 1049 SystemLogger.logger().info("\n"); 1050 1051 1052 new File(configDirectory.getPath()+File.separator+"savedoptions.sop").renameTo(new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old")); 1053 try { 1054 BufferedReader br = new BufferedReader(new FileReader(configDirectory.getPath()+File.separator+"savedoptions.sop.old")); 1055 BufferedWriter bw = new BufferedWriter(new FileWriter(configDirectory.getPath()+File.separator+"savedoptions.sop")); 1056 String line; 1057 while ((line = br.readLine()) != null) { 1058 int index = line.indexOf('\t'); 1059 int container = 0; 1060 if (index > -1) { 1061 container = Integer.parseInt(line.substring(0,index)); 1062 } 1063 1064 if (line.startsWith(container+"\tnivre\tpost_processing")) { 1065 } else if (line.startsWith(container+"\tmalt0.4\tbehavior")) { 1066 if (line.endsWith("true")) { 1067 SystemLogger.logger().info("MaltParser 1.3 doesn't support MaltParser 0.4 emulation."); 1068 br.close(); 1069 bw.flush(); 1070 bw.close(); 1071 deleteConfigDirectory(); 1072 System.exit(0); 1073 } 1074 } else if (line.startsWith(container+"\tsinglemalt\tparsing_algorithm")) { 1075 bw.write(container); 1076 bw.write("\tsinglemalt\tparsing_algorithm\t"); 1077 if (line.endsWith("NivreStandard")) { 1078 bw.write("class org.maltparser.parser.algorithm.nivre.NivreArcStandardFactory"); 1079 } else if (line.endsWith("NivreEager")) { 1080 bw.write("class org.maltparser.parser.algorithm.nivre.NivreArcEagerFactory"); 1081 } else if (line.endsWith("CovingtonNonProjective")) { 1082 bw.write("class org.maltparser.parser.algorithm.covington.CovingtonNonProjFactory"); 1083 } else if (line.endsWith("CovingtonProjective")) { 1084 bw.write("class org.maltparser.parser.algorithm.covington.CovingtonProjFactory"); 1085 } 1086 bw.write('\n'); 1087 } else { 1088 bw.write(line); 1089 bw.write('\n'); 1090 } 1091 } 1092 br.close(); 1093 bw.flush(); 1094 bw.close(); 1095 new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old").delete(); 1096 } catch (FileNotFoundException e) { 1097 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e); 1098 } catch (IOException e) { 1099 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e); 1100 } 1101 } 1102 } 1103 1104 /** 1105 * Terminates the configuration directory 1106 * 1107 * @throws MaltChainedException 1108 */ 1109 public void terminate() throws MaltChainedException { 1110 if (infoFile != null) { 1111 try { 1112 infoFile.flush(); 1113 infoFile.close(); 1114 } catch (IOException e) { 1115 throw new ConfigurationException("Could not close configuration information file. ", e); 1116 } 1117 } 1118 symbolTables = null; 1119 // configuration = null; 1120 } 1121 1122 /* (non-Javadoc) 1123 * @see java.lang.Object#finalize() 1124 */ 1125 protected void finalize() throws Throwable { 1126 try { 1127 if (infoFile != null) { 1128 infoFile.flush(); 1129 infoFile.close(); 1130 } 1131 } finally { 1132 super.finalize(); 1133 } 1134 } 1135 1136 public SymbolTableHandler getSymbolTables() { 1137 return symbolTables; 1138 } 1139 1140 public void setSymbolTables(SymbolTableHandler symbolTables) { 1141 this.symbolTables = symbolTables; 1142 } 1143 1144 public DataFormatManager getDataFormatManager() { 1145 return dataFormatManager; 1146 } 1147 1148 public void setDataFormatManager(DataFormatManager dataFormatManager) { 1149 this.dataFormatManager = dataFormatManager; 1150 } 1151 1152 public Set<String> getDataFormatInstanceKeys() { 1153 return dataFormatInstances.keySet(); 1154 } 1155 1156 public boolean addDataFormatInstance(String key, DataFormatInstance dataFormatInstance) { 1157 if (!dataFormatInstances.containsKey(key)) { 1158 dataFormatInstances.put(key, dataFormatInstance); 1159 return true; 1160 } 1161 return false; 1162 } 1163 1164 public DataFormatInstance getDataFormatInstance(String key) { 1165 return dataFormatInstances.get(key); 1166 } 1167 1168 public int sizeDataFormatInstance() { 1169 return dataFormatInstances.size(); 1170 } 1171 1172 public DataFormatInstance getInputDataFormatInstance() { 1173 return dataFormatInstances.get(dataFormatManager.getInputDataFormatSpec().getDataFormatName()); 1174 } 1175 1176 public URL getInputFormatURL() { 1177 return inputFormatURL; 1178 } 1179 1180 public URL getOutputFormatURL() { 1181 return outputFormatURL; 1182 } 1183 1184 1185 }