001 package org.maltparser.core.config; 002 003 import java.io.BufferedInputStream; 004 import java.io.BufferedOutputStream; 005 import java.io.BufferedReader; 006 import java.io.BufferedWriter; 007 import java.io.File; 008 import java.io.FileInputStream; 009 import java.io.FileNotFoundException; 010 import java.io.FileOutputStream; 011 import java.io.FileReader; 012 import java.io.FileWriter; 013 import java.io.IOException; 014 import java.io.InputStream; 015 import java.io.InputStreamReader; 016 import java.io.OutputStreamWriter; 017 import java.io.UnsupportedEncodingException; 018 import java.net.MalformedURLException; 019 import java.net.URL; 020 import java.util.Date; 021 import java.util.Enumeration; 022 import java.util.HashMap; 023 import java.util.Set; 024 import java.util.SortedSet; 025 import java.util.TreeSet; 026 import java.util.jar.JarEntry; 027 import java.util.jar.JarFile; 028 import java.util.jar.JarInputStream; 029 import java.util.jar.JarOutputStream; 030 031 import org.maltparser.core.config.version.Versioning; 032 import org.maltparser.core.exception.MaltChainedException; 033 import org.maltparser.core.helper.HashSet; 034 import org.maltparser.core.helper.SystemInfo; 035 import org.maltparser.core.helper.SystemLogger; 036 import org.maltparser.core.helper.Util; 037 import org.maltparser.core.io.dataformat.DataFormatInstance; 038 import org.maltparser.core.io.dataformat.DataFormatManager; 039 import org.maltparser.core.io.dataformat.DataFormatSpecification.DataStructure; 040 import org.maltparser.core.io.dataformat.DataFormatSpecification.Dependency; 041 import org.maltparser.core.options.OptionManager; 042 import org.maltparser.core.symbol.SymbolTableHandler; 043 import org.maltparser.core.symbol.trie.TrieSymbolTableHandler; 044 045 046 /** 047 * This class contains methods for handle the configuration directory. 048 * 049 * @author Johan Hall 050 */ 051 public class ConfigurationDir { 052 protected static final int BUFFER = 4096; 053 protected File configDirectory; 054 protected String name; 055 protected String type; 056 protected File workingDirectory; 057 protected URL url = null; 058 protected int containerIndex; 059 protected BufferedWriter infoFile = null; 060 protected String createdByMaltParserVersion; 061 062 private SymbolTableHandler symbolTables; 063 private DataFormatManager dataFormatManager; 064 private HashMap<String,DataFormatInstance> dataFormatInstances; 065 private URL inputFormatURL; 066 private URL outputFormatURL; 067 068 /** 069 * Creates a configuration directory from a mco-file specified by an URL. 070 * 071 * @param url an URL to a mco-file 072 * @throws MaltChainedException 073 */ 074 public ConfigurationDir(URL url) throws MaltChainedException { 075 initWorkingDirectory(); 076 setUrl(url); 077 initNameNTypeFromInfoFile(url); 078 // initData(); 079 } 080 081 /** 082 * Creates a new configuration directory or a configuration directory from a mco-file 083 * 084 * @param name the name of the configuration 085 * @param type the type of configuration 086 * @param containerIndex the container index 087 * @throws MaltChainedException 088 */ 089 public ConfigurationDir(String name, String type, int containerIndex) throws MaltChainedException { 090 setContainerIndex(containerIndex); 091 092 initWorkingDirectory(); 093 if (name != null && name.length() > 0 && type != null && type.length() > 0) { 094 setName(name); 095 setType(type); 096 } else { 097 throw new ConfigurationException("The configuration name is not specified. "); 098 } 099 setConfigDirectory(new File(workingDirectory.getPath()+File.separator+getName())); 100 } 101 102 public void initDataFormat() throws MaltChainedException { 103 String inputFormatName = OptionManager.instance().getOptionValue(containerIndex, "input", "format").toString().trim(); 104 String outputFormatName = OptionManager.instance().getOptionValue(containerIndex, "output", "format").toString().trim(); 105 106 if (configDirectory != null && configDirectory.exists()) { 107 if (outputFormatName.length() == 0 || inputFormatName.equals(outputFormatName)) { 108 URL inputFormatURL = Util.findURLinJars(inputFormatName); 109 if (inputFormatURL != null) { 110 outputFormatName = inputFormatName = this.copyToConfig(inputFormatURL); 111 } else { 112 outputFormatName = inputFormatName = this.copyToConfig(inputFormatName); 113 } 114 } else { 115 URL inputFormatURL = Util.findURLinJars(inputFormatName); 116 if (inputFormatURL != null) { 117 inputFormatName = this.copyToConfig(inputFormatURL); 118 } else { 119 inputFormatName = this.copyToConfig(inputFormatName); 120 } 121 URL outputFormatURL = Util.findURLinJars(outputFormatName); 122 if (inputFormatURL != null) { 123 outputFormatName = this.copyToConfig(outputFormatURL); 124 } else { 125 outputFormatName = this.copyToConfig(outputFormatName); 126 } 127 } 128 OptionManager.instance().overloadOptionValue(containerIndex, "input", "format", inputFormatName); 129 } else { 130 if (outputFormatName.length() == 0) { 131 outputFormatName = inputFormatName; 132 } 133 } 134 dataFormatInstances = new HashMap<String, DataFormatInstance>(3); 135 136 inputFormatURL = findURL(inputFormatName); 137 outputFormatURL = findURL(outputFormatName); 138 if (outputFormatURL != null) { 139 try { 140 InputStream is = outputFormatURL.openStream(); 141 } catch (FileNotFoundException e) { 142 outputFormatURL = Util.findURL(outputFormatName); 143 } catch (IOException e) { 144 outputFormatURL = Util.findURL(outputFormatName); 145 } 146 } else { 147 outputFormatURL = Util.findURL(outputFormatName); 148 } 149 dataFormatManager = new DataFormatManager(inputFormatURL, outputFormatURL); 150 symbolTables = new TrieSymbolTableHandler(); 151 152 if (dataFormatManager.getInputDataFormatSpec().getDataStructure() == DataStructure.PHRASE) { 153 String mode = OptionManager.instance().getOptionValue(containerIndex, "config", "flowchart").toString().trim(); 154 if (mode.equals("learn")) { 155 Set<Dependency> deps = dataFormatManager.getInputDataFormatSpec().getDependencies(); 156 for (Dependency dep : deps) { 157 URL depFormatURL = Util.findURLinJars(dep.getUrlString()); 158 if (depFormatURL != null) { 159 this.copyToConfig(depFormatURL); 160 } else { 161 this.copyToConfig(dep.getUrlString()); 162 } 163 } 164 } 165 else if (mode.equals("parse")) { 166 Set<Dependency> deps = dataFormatManager.getInputDataFormatSpec().getDependencies(); 167 String nullValueStategy = OptionManager.instance().getOptionValue(containerIndex, "singlemalt", "null_value").toString(); 168 for (Dependency dep : deps) { 169 // URL depFormatURL = Util.findURLinJars(dep.getUrlString()); 170 DataFormatInstance dataFormatInstance = dataFormatManager.getDataFormatSpec(dep.getDependentOn()).createDataFormatInstance(symbolTables, nullValueStategy); 171 addDataFormatInstance(dataFormatManager.getDataFormatSpec(dep.getDependentOn()).getDataFormatName(), dataFormatInstance); 172 dataFormatManager.setInputDataFormatSpec(dataFormatManager.getDataFormatSpec(dep.getDependentOn())); 173 // dataFormatManager.setOutputDataFormatSpec(dataFormatManager.getDataFormatSpec(dep.getDependentOn())); 174 } 175 } 176 } 177 } 178 179 private URL findURL(String specModelFileName) throws MaltChainedException { 180 URL url = null; 181 File specFile = this.getFile(specModelFileName); 182 if (specFile.exists()) { 183 try { 184 url = new URL("file:///"+specFile.getAbsolutePath()); 185 } catch (MalformedURLException e) { 186 throw new MaltChainedException("Malformed URL: "+specFile, e); 187 } 188 } else { 189 url = this.getConfigFileEntryURL(specModelFileName); 190 } 191 return url; 192 } 193 194 /** 195 * Creates an output stream writer, where the corresponding file will be included in the configuration directory 196 * 197 * @param fileName a file name 198 * @param charSet a char set 199 * @return an output stream writer for writing to a file within the configuration directory 200 * @throws MaltChainedException 201 */ 202 public OutputStreamWriter getOutputStreamWriter(String fileName, String charSet) throws MaltChainedException { 203 try { 204 return new OutputStreamWriter(new FileOutputStream(configDirectory.getPath()+File.separator+fileName), charSet); 205 } catch (FileNotFoundException e) { 206 throw new ConfigurationException("The file '"+fileName+"' cannot be created. ", e); 207 } catch (UnsupportedEncodingException e) { 208 throw new ConfigurationException("The char set '"+charSet+"' is not supported. ", e); 209 } 210 } 211 212 /** 213 * Creates an output stream writer, where the corresponding file will be included in the 214 * configuration directory. Uses UTF-8 for character encoding. 215 * 216 * @param fileName a file name 217 * @return an output stream writer for writing to a file within the configuration directory 218 * @throws MaltChainedException 219 */ 220 public OutputStreamWriter getOutputStreamWriter(String fileName) throws MaltChainedException { 221 try { 222 return new OutputStreamWriter(new FileOutputStream(configDirectory.getPath()+File.separator+fileName, true), "UTF-8"); 223 } catch (FileNotFoundException e) { 224 throw new ConfigurationException("The file '"+fileName+"' cannot be created. ", e); 225 } catch (UnsupportedEncodingException e) { 226 throw new ConfigurationException("The char set 'UTF-8' is not supported. ", e); 227 } 228 } 229 /** 230 * This method acts the same as getOutputStreamWriter with the difference that the writer append in the file 231 * if it already exists instead of deleting the previous content before starting to write. 232 * 233 * @param fileName a file name 234 * @return an output stream writer for writing to a file within the configuration directory 235 * @throws MaltChainedException 236 */ 237 public OutputStreamWriter getAppendOutputStreamWriter(String fileName) throws MaltChainedException { 238 try { 239 return new OutputStreamWriter(new FileOutputStream(configDirectory.getPath()+File.separator+fileName, true), "UTF-8"); 240 } catch (FileNotFoundException e) { 241 throw new ConfigurationException("The file '"+fileName+"' cannot be created. ", e); 242 } catch (UnsupportedEncodingException e) { 243 throw new ConfigurationException("The char set 'UTF-8' is not supported. ", e); 244 } 245 } 246 247 /** 248 * Creates an input stream reader for reading a file within the configuration directory 249 * 250 * @param fileName a file name 251 * @param charSet a char set 252 * @return an input stream reader for reading a file within the configuration directory 253 * @throws MaltChainedException 254 */ 255 public InputStreamReader getInputStreamReader(String fileName, String charSet) throws MaltChainedException { 256 try { 257 return new InputStreamReader(new FileInputStream(configDirectory.getPath()+File.separator+fileName), charSet); 258 } catch (FileNotFoundException e) { 259 throw new ConfigurationException("The file '"+fileName+"' cannot be found. ", e); 260 } catch (UnsupportedEncodingException e) { 261 throw new ConfigurationException("The char set '"+charSet+"' is not supported. ", e); 262 } 263 } 264 265 /** 266 * Creates an input stream reader for reading a file within the configuration directory. 267 * Uses UTF-8 for character encoding. 268 * 269 * @param fileName a file name 270 * @return an input stream reader for reading a file within the configuration directory 271 * @throws MaltChainedException 272 */ 273 public InputStreamReader getInputStreamReader(String fileName) throws MaltChainedException { 274 return getInputStreamReader(fileName, "UTF-8"); 275 } 276 277 public JarEntry getConfigFileEntry(String fileName) throws MaltChainedException { 278 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco"); 279 try { 280 JarFile mcoFile = new JarFile(mcoPath.getAbsolutePath()); 281 JarEntry entry = mcoFile.getJarEntry(getName()+'/'+fileName); 282 if (entry == null) { 283 entry = mcoFile.getJarEntry(getName()+'\\'+fileName); 284 } 285 return entry; 286 } catch (FileNotFoundException e) { 287 throw new ConfigurationException("The file entry '"+fileName+"' in mco-file '"+mcoPath+"' cannot be found. ", e); 288 } catch (IOException e) { 289 throw new ConfigurationException("The file entry '"+fileName+"' in mco-file '"+mcoPath+"' cannot be found. ", e); 290 } 291 } 292 293 public InputStream getInputStreamFromConfigFileEntry(String fileName) throws MaltChainedException { 294 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco"); 295 try { 296 JarFile mcoFile = new JarFile(mcoPath.getAbsolutePath()); 297 JarEntry entry = mcoFile.getJarEntry(getName()+'/'+fileName); 298 if (entry == null) { 299 entry = mcoFile.getJarEntry(getName()+'\\'+fileName); 300 } 301 if (entry == null) { 302 throw new FileNotFoundException(); 303 } 304 return mcoFile.getInputStream(entry); 305 } catch (FileNotFoundException e) { 306 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be found. ", e); 307 } catch (IOException e) { 308 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be loaded. ", e); 309 } 310 } 311 312 public InputStreamReader getInputStreamReaderFromConfigFileEntry(String fileName, String charSet) throws MaltChainedException { 313 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco"); 314 try { 315 JarFile mcoFile = new JarFile(mcoPath.getAbsolutePath()); 316 JarEntry entry = mcoFile.getJarEntry(getName()+'/'+fileName); 317 if (entry == null) { 318 entry = mcoFile.getJarEntry(getName()+'\\'+fileName); 319 } 320 if (entry == null) { 321 throw new FileNotFoundException(); 322 } 323 return new InputStreamReader(mcoFile.getInputStream(entry), charSet); 324 } catch (FileNotFoundException e) { 325 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be found. ", e); 326 } catch (UnsupportedEncodingException e) { 327 throw new ConfigurationException("The char set '"+charSet+"' is not supported. ", e); 328 } catch (IOException e) { 329 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be loaded. ", e); 330 } 331 } 332 333 public InputStreamReader getInputStreamReaderFromConfigFile(String fileName) throws MaltChainedException { 334 return getInputStreamReaderFromConfigFileEntry(fileName, "UTF-8"); 335 } 336 337 /** 338 * Returns a file handler object of a file within the configuration directory 339 * 340 * @param fileName a file name 341 * @return a file handler object of a file within the configuration directory 342 * @throws MaltChainedException 343 */ 344 public File getFile(String fileName) throws MaltChainedException { 345 return new File(configDirectory.getPath()+File.separator+fileName); 346 } 347 348 public URL getConfigFileEntryURL(String fileName) throws MaltChainedException { 349 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco"); 350 try { 351 if (!mcoPath.exists()) { 352 throw new ConfigurationException("Couldn't find mco-file '" +mcoPath.getAbsolutePath()+ "'"); 353 } 354 // new URL("file", null, mcoPath.getAbsolutePath()); 355 URL url = new URL("jar:"+new URL("file", null, mcoPath.getAbsolutePath())+"!/"+getName()+'/'+fileName + "\n"); 356 try { 357 InputStream is = url.openStream(); 358 is.close(); 359 } catch (IOException e) { 360 url = new URL("jar:"+new URL("file", null, mcoPath.getAbsolutePath())+"!/"+getName()+'\\'+fileName + "\n"); 361 } 362 return url; 363 } catch (MalformedURLException e) { 364 throw new ConfigurationException("Couldn't find the URL '" +"jar:"+mcoPath.getAbsolutePath()+"!/"+getName()+'/'+fileName+ "'", e); 365 } 366 } 367 368 /** 369 * Copies a file into the configuration directory. 370 * 371 * @param source a path to file 372 * @throws MaltChainedException 373 */ 374 public String copyToConfig(File source) throws MaltChainedException { 375 byte[] readBuffer = new byte[BUFFER]; 376 String destination = configDirectory.getPath()+File.separator+source.getName(); 377 try { 378 BufferedInputStream bis = new BufferedInputStream(new FileInputStream(source)); 379 BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destination), BUFFER); 380 381 int n = 0; 382 while ((n = bis.read(readBuffer, 0, BUFFER)) != -1) { 383 bos.write(readBuffer, 0, n); 384 } 385 bos.flush(); 386 bos.close(); 387 bis.close(); 388 } catch (FileNotFoundException e) { 389 throw new ConfigurationException("The source file '"+source+"' cannot be found or the destination file '"+destination+"' cannot be created when coping the file. ", e); 390 } catch (IOException e) { 391 throw new ConfigurationException("The source file '"+source+"' cannot be copied to destination '"+destination+"'. ", e); 392 } 393 return source.getName(); 394 } 395 396 397 public String copyToConfig(String fileUrl) throws MaltChainedException { 398 URL url = Util.findURL(fileUrl); 399 if (url == null) { 400 throw new ConfigurationException("The file or URL '"+fileUrl+"' could not be found. "); 401 } 402 return copyToConfig(url); 403 } 404 405 public String copyToConfig(URL url) throws MaltChainedException { 406 if (url == null) { 407 throw new ConfigurationException("URL could not be found. "); 408 } 409 byte[] readBuffer = new byte[BUFFER]; 410 String destFileName = url.getPath(); 411 int indexSlash = destFileName.lastIndexOf('/'); 412 if (indexSlash == -1) { 413 indexSlash = destFileName.lastIndexOf('\\'); 414 } 415 416 if (indexSlash != -1) { 417 destFileName = destFileName.substring(indexSlash+1); 418 } 419 420 String destination = configDirectory.getPath()+File.separator+destFileName; 421 try { 422 BufferedInputStream bis = new BufferedInputStream(url.openStream()); 423 BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destination), BUFFER); 424 425 int n = 0; 426 while ((n = bis.read(readBuffer, 0, BUFFER)) != -1) { 427 bos.write(readBuffer, 0, n); 428 } 429 bos.flush(); 430 bos.close(); 431 bis.close(); 432 } catch (FileNotFoundException e) { 433 throw new ConfigurationException("The destination file '"+destination+"' cannot be created when coping the file. ", e); 434 } catch (IOException e) { 435 throw new ConfigurationException("The URL '"+url+"' cannot be copied to destination '"+destination+"'. ", e); 436 } 437 return destFileName; 438 } 439 440 441 /** 442 * Removes the configuration directory, if it exists and it contains a .info file. 443 * 444 * @throws MaltChainedException 445 */ 446 public void deleteConfigDirectory() throws MaltChainedException { 447 if (!configDirectory.exists()) { 448 return; 449 } 450 File infoFile = new File(configDirectory.getPath()+File.separator+getName()+"_"+getType()+".info"); 451 if (infoFile.exists()) { 452 deleteConfigDirectory(configDirectory); 453 } else { 454 throw new ConfigurationException("There exists a directory that is not a MaltParser configuration directory. "); 455 } 456 } 457 458 private void deleteConfigDirectory(File directory) throws MaltChainedException { 459 if (directory.exists()) { 460 File[] files = directory.listFiles(); 461 for (int i = 0; i < files.length; i++) { 462 if (files[i].isDirectory()) { 463 deleteConfigDirectory(files[i]); 464 } else { 465 files[i].delete(); 466 } 467 } 468 } else { 469 throw new ConfigurationException("The directory '"+directory.getPath()+ "' cannot be found. "); 470 } 471 directory.delete(); 472 } 473 474 /** 475 * Returns a file handler object for the configuration directory 476 * 477 * @return a file handler object for the configuration directory 478 */ 479 public File getConfigDirectory() { 480 return configDirectory; 481 } 482 483 protected void setConfigDirectory(File dir) { 484 this.configDirectory = dir; 485 } 486 487 /** 488 * Creates the configuration directory 489 * 490 * @throws MaltChainedException 491 */ 492 public void createConfigDirectory() throws MaltChainedException { 493 checkConfigDirectory(); 494 configDirectory.mkdir(); 495 createInfoFile(); 496 } 497 498 protected void checkConfigDirectory() throws MaltChainedException { 499 if (configDirectory.exists() && !configDirectory.isDirectory()) { 500 throw new ConfigurationException("The configuration directory name already exists and is not a directory. "); 501 } 502 503 if (configDirectory.exists()) { 504 deleteConfigDirectory(); 505 } 506 } 507 508 protected void createInfoFile() throws MaltChainedException { 509 infoFile = new BufferedWriter(getOutputStreamWriter(getName()+"_"+getType()+".info")); 510 try { 511 infoFile.write("CONFIGURATION\n"); 512 infoFile.write("Configuration name: "+getName()+"\n"); 513 infoFile.write("Configuration type: "+getType()+"\n"); 514 infoFile.write("Created: "+new Date(System.currentTimeMillis())+"\n"); 515 516 infoFile.write("\nSYSTEM\n"); 517 infoFile.write("Operating system architecture: "+System.getProperty("os.arch")+"\n"); 518 infoFile.write("Operating system name: "+System.getProperty("os.name")+"\n"); 519 infoFile.write("JRE vendor name: "+System.getProperty("java.vendor")+"\n"); 520 infoFile.write("JRE version number: "+System.getProperty("java.version")+"\n"); 521 522 infoFile.write("\nMALTPARSER\n"); 523 infoFile.write("Version: "+SystemInfo.getVersion()+"\n"); 524 infoFile.write("Build date: "+SystemInfo.getBuildDate()+"\n"); 525 Set<String> excludeGroups = new HashSet<String>(); 526 excludeGroups.add("system"); 527 infoFile.write("\nSETTINGS\n"); 528 infoFile.write(OptionManager.instance().toStringPrettyValues(containerIndex, excludeGroups)); 529 infoFile.flush(); 530 } catch (IOException e) { 531 throw new ConfigurationException("Could not create the maltparser info file. "); 532 } 533 } 534 535 /** 536 * Returns a writer to the configuration information file 537 * 538 * @return a writer to the configuration information file 539 * @throws MaltChainedException 540 */ 541 public BufferedWriter getInfoFileWriter() throws MaltChainedException { 542 return infoFile; 543 } 544 545 /** 546 * Creates the malt configuration file (.mco). This file is compressed. 547 * 548 * @throws MaltChainedException 549 */ 550 public void createConfigFile() throws MaltChainedException { 551 try { 552 JarOutputStream jos = new JarOutputStream(new FileOutputStream(workingDirectory.getPath()+File.separator+getName()+".mco")); 553 // configLogger.info("Creates configuration file '"+workingDirectory.getPath()+File.separator+getName()+".mco' ...\n"); 554 createConfigFile(configDirectory.getPath(), jos); 555 jos.close(); 556 } catch (FileNotFoundException e) { 557 throw new ConfigurationException("The maltparser configurtation file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be found. ", e); 558 } catch (IOException e) { 559 throw new ConfigurationException("The maltparser configurtation file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be created. ", e); 560 } 561 } 562 563 private void createConfigFile(String directory, JarOutputStream jos) throws MaltChainedException { 564 byte[] readBuffer = new byte[BUFFER]; 565 try { 566 File zipDir = new File(directory); 567 String[] dirList = zipDir.list(); 568 569 int bytesIn = 0; 570 571 for (int i = 0; i < dirList.length; i++) { 572 File f = new File(zipDir, dirList[i]); 573 if (f.isDirectory()) { 574 String filePath = f.getPath(); 575 createConfigFile(filePath, jos); 576 continue; 577 } 578 579 FileInputStream fis = new FileInputStream(f); 580 581 String entryPath = f.getPath().substring(workingDirectory.getPath().length()+1); 582 entryPath = entryPath.replace('\\', '/'); 583 JarEntry entry = new JarEntry(entryPath); 584 jos.putNextEntry(entry); 585 586 while ((bytesIn = fis.read(readBuffer)) != -1) { 587 jos.write(readBuffer, 0, bytesIn); 588 } 589 590 fis.close(); 591 } 592 } catch (FileNotFoundException e) { 593 throw new ConfigurationException("The directory '"+directory+"' cannot be found. ", e); 594 } catch (IOException e) { 595 throw new ConfigurationException("The directory '"+directory+"' cannot be compressed into a mco file. ", e); 596 } 597 } 598 599 600 public void copyConfigFile(File in, File out, Versioning versioning) throws MaltChainedException { 601 try { 602 JarFile jar = new JarFile(in); 603 JarOutputStream tempJar = new JarOutputStream(new FileOutputStream(out)); 604 byte[] buffer = new byte[BUFFER]; 605 int bytesRead; 606 StringBuilder sb = new StringBuilder(); 607 608 for (Enumeration<JarEntry> entries = jar.entries(); entries.hasMoreElements(); ) { 609 JarEntry inEntry = (JarEntry) entries.nextElement(); 610 InputStream entryStream = jar.getInputStream(inEntry); 611 JarEntry outEntry = versioning.getJarEntry(inEntry); 612 613 if (!versioning.hasChanges(inEntry, outEntry)) { 614 tempJar.putNextEntry(outEntry); 615 while ((bytesRead = entryStream.read(buffer)) != -1) { 616 tempJar.write(buffer, 0, bytesRead); 617 } 618 } else { 619 tempJar.putNextEntry(outEntry); 620 BufferedReader br = new BufferedReader(new InputStreamReader(entryStream)); 621 String line = null; 622 sb.setLength(0); 623 while ((line = br.readLine()) != null) { 624 sb.append(line); 625 sb.append('\n'); 626 } 627 String outString = versioning.modifyJarEntry(inEntry, outEntry, sb); 628 tempJar.write(outString.getBytes()); 629 } 630 } 631 if (versioning.getFeatureModelXML() != null && versioning.getFeatureModelXML().startsWith("/appdata")) { 632 int index = versioning.getFeatureModelXML().lastIndexOf('/'); 633 BufferedInputStream bis = new BufferedInputStream(Util.findURLinJars(versioning.getFeatureModelXML()).openStream()); 634 tempJar.putNextEntry(new JarEntry(versioning.getNewConfigName()+"/" +versioning.getFeatureModelXML().substring(index+1))); 635 int n = 0; 636 while ((n = bis.read(buffer, 0, BUFFER)) != -1) { 637 tempJar.write(buffer, 0, n); 638 } 639 bis.close(); 640 } 641 if (versioning.getInputFormatXML() != null && versioning.getInputFormatXML().startsWith("/appdata")) { 642 int index = versioning.getInputFormatXML().lastIndexOf('/'); 643 BufferedInputStream bis = new BufferedInputStream(Util.findURLinJars(versioning.getInputFormatXML()).openStream()); 644 tempJar.putNextEntry(new JarEntry(versioning.getNewConfigName()+"/" +versioning.getInputFormatXML().substring(index+1))); 645 int n = 0; 646 while ((n = bis.read(buffer, 0, BUFFER)) != -1) { 647 tempJar.write(buffer, 0, n); 648 } 649 bis.close(); 650 } 651 tempJar.flush(); 652 tempJar.close(); 653 jar.close(); 654 } catch (IOException e) { 655 throw new ConfigurationException("", e); 656 } 657 } 658 659 protected void initNameNTypeFromInfoFile(URL url) throws MaltChainedException { 660 if (url == null) { 661 throw new ConfigurationException("The URL cannot be found. "); 662 } 663 try { 664 JarEntry je; 665 JarInputStream jis = new JarInputStream(url.openConnection().getInputStream()); 666 while ((je = jis.getNextJarEntry()) != null) { 667 String entryName = je.getName(); 668 if (entryName.endsWith(".info")) { 669 int indexUnderScore = entryName.lastIndexOf('_'); 670 int indexSeparator = entryName.lastIndexOf(File.separator); 671 if (indexSeparator == -1) { 672 indexSeparator = entryName.lastIndexOf('/'); 673 } 674 if (indexSeparator == -1) { 675 indexSeparator = entryName.lastIndexOf('\\'); 676 } 677 int indexDot = entryName.lastIndexOf('.'); 678 if (indexUnderScore == -1 || indexDot == -1) { 679 throw new ConfigurationException("Could not find the configuration name and type from the URL '"+url.toString()+"'. "); 680 } 681 setName(entryName.substring(indexSeparator+1, indexUnderScore)); 682 setType(entryName.substring(indexUnderScore+1, indexDot)); 683 setConfigDirectory(new File(workingDirectory.getPath()+File.separator+getName())); 684 jis.close(); 685 return; 686 } 687 } 688 689 } catch (IOException e) { 690 throw new ConfigurationException("Could not find the configuration name and type from the URL '"+url.toString()+"'. ", e); 691 } 692 } 693 694 /** 695 * Prints the content of the configuration information file to the system logger 696 * 697 * @throws MaltChainedException 698 */ 699 public void echoInfoFile() throws MaltChainedException { 700 checkConfigDirectory(); 701 JarInputStream jis; 702 try { 703 if (url == null) { 704 jis = new JarInputStream(new FileInputStream(workingDirectory.getPath()+File.separator+getName()+".mco")); 705 } else { 706 jis = new JarInputStream(url.openConnection().getInputStream()); 707 } 708 JarEntry je; 709 710 while ((je = jis.getNextJarEntry()) != null) { 711 String entryName = je.getName(); 712 713 if (entryName.endsWith(getName()+"_"+getType()+".info")) { 714 int c; 715 while ((c = jis.read()) != -1) { 716 SystemLogger.logger().info((char)c); 717 } 718 } 719 } 720 jis.close(); 721 } catch (FileNotFoundException e) { 722 throw new ConfigurationException("Could not print configuration information file. The configuration file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be found. ", e); 723 } catch (IOException e) { 724 throw new ConfigurationException("Could not print configuration information file. ", e); 725 } 726 727 } 728 729 /** 730 * Unpacks the malt configuration file (.mco). 731 * 732 * @throws MaltChainedException 733 */ 734 public void unpackConfigFile() throws MaltChainedException { 735 checkConfigDirectory(); 736 JarInputStream jis; 737 try { 738 if (url == null) { 739 jis = new JarInputStream(new FileInputStream(workingDirectory.getPath()+File.separator+getName()+".mco")); 740 } else { 741 jis = new JarInputStream(url.openConnection().getInputStream()); 742 } 743 unpackConfigFile(jis); 744 jis.close(); 745 } catch (FileNotFoundException e) { 746 throw new ConfigurationException("Could not unpack configuration. The configuration file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be found. ", e); 747 } catch (IOException e) { 748 if (configDirectory.exists()) { 749 deleteConfigDirectory(); 750 } 751 throw new ConfigurationException("Could not unpack configuration. ", e); 752 } 753 initCreatedByMaltParserVersionFromInfoFile(); 754 } 755 756 protected void unpackConfigFile(JarInputStream jis) throws MaltChainedException { 757 try { 758 JarEntry je; 759 byte[] readBuffer = new byte[BUFFER]; 760 SortedSet<String> directoryCache = new TreeSet<String>(); 761 while ((je = jis.getNextJarEntry()) != null) { 762 String entryName = je.getName(); 763 764 if (entryName.startsWith("/")) { 765 entryName = entryName.substring(1); 766 } 767 if (entryName.endsWith(File.separator) || entryName.endsWith("/")) { 768 return; 769 } 770 int index = -1; 771 if (File.separator.equals("\\")) { 772 entryName = entryName.replace('/', '\\'); 773 index = entryName.lastIndexOf("\\"); 774 } else if (File.separator.equals("/")) { 775 entryName = entryName.replace('\\', '/'); 776 index = entryName.lastIndexOf("/"); 777 } 778 if (index > 0) { 779 String dirName = entryName.substring(0, index); 780 if (!directoryCache.contains(dirName)) { 781 File directory = new File(workingDirectory.getPath()+File.separator+dirName); 782 if (!(directory.exists() && directory.isDirectory())) { 783 if (!directory.mkdirs()) { 784 throw new ConfigurationException("Unable to make directory '" + dirName +"'. "); 785 } 786 directoryCache.add(dirName); 787 } 788 } 789 } 790 791 if (new File(workingDirectory.getPath()+File.separator+entryName).isDirectory() && new File(workingDirectory.getPath()+File.separator+entryName).exists()) { 792 continue; 793 } 794 BufferedOutputStream bos; 795 try { 796 bos = new BufferedOutputStream(new FileOutputStream(workingDirectory.getPath()+File.separator+entryName), BUFFER); 797 } catch (FileNotFoundException e) { 798 throw new ConfigurationException("Could not unpack configuration. The file '"+workingDirectory.getPath()+File.separator+entryName+"' cannot be unpacked. ", e); 799 } 800 int n = 0; 801 while ((n = jis.read(readBuffer, 0, BUFFER)) != -1) { 802 bos.write(readBuffer, 0, n); 803 } 804 bos.flush(); 805 bos.close(); 806 } 807 } catch (IOException e) { 808 throw new ConfigurationException("Could not unpack configuration. ", e); 809 } 810 } 811 812 /** 813 * Returns the name of the configuration directory 814 * 815 * @return the name of the configuration directory 816 */ 817 public String getName() { 818 return name; 819 } 820 821 protected void setName(String name) { 822 this.name = name; 823 } 824 825 /** 826 * Returns the type of the configuration directory 827 * 828 * @return the type of the configuration directory 829 */ 830 public String getType() { 831 return type; 832 } 833 834 protected void setType(String type) { 835 this.type = type; 836 } 837 838 /** 839 * Returns a file handler object for the working directory 840 * 841 * @return a file handler object for the working directory 842 */ 843 public File getWorkingDirectory() { 844 return workingDirectory; 845 } 846 847 /** 848 * Initialize the working directory 849 * 850 * @throws MaltChainedException 851 */ 852 public void initWorkingDirectory() throws MaltChainedException { 853 try { 854 initWorkingDirectory(OptionManager.instance().getOptionValue(0, "config", "workingdir").toString()); 855 } catch (NullPointerException e) { 856 throw new ConfigurationException("The configuration cannot be found.", e); 857 } 858 } 859 860 /** 861 * Initialize the working directory according to the path. If the path is equals to "user.dir" or current directory, then the current directory 862 * will be the working directory. 863 * 864 * @param pathPrefixString the path to the working directory 865 * @throws MaltChainedException 866 */ 867 public void initWorkingDirectory(String pathPrefixString) throws MaltChainedException { 868 if (pathPrefixString == null || pathPrefixString.equalsIgnoreCase("user.dir") || pathPrefixString.equalsIgnoreCase(".")) { 869 workingDirectory = new File(System.getProperty("user.dir")); 870 } else { 871 workingDirectory = new File(pathPrefixString); 872 } 873 874 if (workingDirectory == null || !workingDirectory.isDirectory()) { 875 new ConfigurationException("The specified working directory '"+pathPrefixString+"' is not a directory. "); 876 } 877 } 878 879 /** 880 * Returns the URL to the malt configuration file (.mco) 881 * 882 * @return the URL to the malt configuration file (.mco) 883 */ 884 public URL getUrl() { 885 return url; 886 } 887 888 protected void setUrl(URL url) { 889 this.url = url; 890 } 891 892 /** 893 * Returns the option container index 894 * 895 * @return the option container index 896 */ 897 public int getContainerIndex() { 898 return containerIndex; 899 } 900 901 /** 902 * Sets the option container index 903 * 904 * @param containerIndex a option container index 905 */ 906 public void setContainerIndex(int containerIndex) { 907 this.containerIndex = containerIndex; 908 } 909 910 /** 911 * Returns the version number of MaltParser which created the malt configuration file (.mco) 912 * 913 * @return the version number of MaltParser which created the malt configuration file (.mco) 914 */ 915 public String getCreatedByMaltParserVersion() { 916 return createdByMaltParserVersion; 917 } 918 919 /** 920 * Sets the version number of MaltParser which created the malt configuration file (.mco) 921 * 922 * @param createdByMaltParserVersion a version number of MaltParser 923 */ 924 public void setCreatedByMaltParserVersion(String createdByMaltParserVersion) { 925 this.createdByMaltParserVersion = createdByMaltParserVersion; 926 } 927 928 public void initCreatedByMaltParserVersionFromInfoFile() throws MaltChainedException { 929 try { 930 BufferedReader br = new BufferedReader(getInputStreamReaderFromConfigFileEntry(getName()+"_"+getType()+".info", "UTF-8")); 931 String line = null; 932 while ((line = br.readLine()) != null) { 933 if (line.startsWith("Version: ")) { 934 setCreatedByMaltParserVersion(line.substring(31)); 935 break; 936 } 937 } 938 br.close(); 939 } catch (FileNotFoundException e) { 940 throw new ConfigurationException("Could not retrieve the version number of the MaltParser configuration.", e); 941 } catch (IOException e) { 942 throw new ConfigurationException("Could not retrieve the version number of the MaltParser configuration.", e); 943 } 944 } 945 946 public void versioning() throws MaltChainedException { 947 initCreatedByMaltParserVersionFromInfoFile(); 948 SystemLogger.logger().info("\nCurrent version : " + SystemInfo.getVersion() + "\n"); 949 SystemLogger.logger().info("Parser model version : " + createdByMaltParserVersion + "\n"); 950 if (SystemInfo.getVersion() == null) { 951 throw new ConfigurationException("Couln't determine the version of MaltParser"); 952 } else if (createdByMaltParserVersion == null) { 953 throw new ConfigurationException("Couln't determine the version of the parser model"); 954 } else if (SystemInfo.getVersion().equals(createdByMaltParserVersion)) { 955 SystemLogger.logger().info("The parser model "+getName()+".mco has already the same version as the current version of MaltParser. \n"); 956 return; 957 } 958 959 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco"); 960 File newMcoPath = new File(workingDirectory.getPath()+File.separator+getName()+"."+SystemInfo.getVersion().trim()+".mco"); 961 Versioning versioning = new Versioning(name, type, mcoPath, createdByMaltParserVersion); 962 if (!versioning.support(createdByMaltParserVersion)) { 963 SystemLogger.logger().warn("The parser model '"+ name+ ".mco' is created by MaltParser "+getCreatedByMaltParserVersion()+", which cannot be converted to a MaltParser "+SystemInfo.getVersion()+" parser model.\n"); 964 SystemLogger.logger().warn("Please retrain the parser model with MaltParser "+SystemInfo.getVersion() +" or download MaltParser "+getCreatedByMaltParserVersion()+" from http://maltparser.org/download.html\n"); 965 return; 966 } 967 SystemLogger.logger().info("Converts the parser model '"+ mcoPath.getName()+ "' into '"+newMcoPath.getName()+"'....\n"); 968 copyConfigFile(mcoPath, newMcoPath, versioning); 969 } 970 971 protected void checkNConvertConfigVersion() throws MaltChainedException { 972 if (createdByMaltParserVersion.startsWith("1.0")) { 973 SystemLogger.logger().info(" Converts the MaltParser configuration "); 974 SystemLogger.logger().info("1.0"); 975 SystemLogger.logger().info(" to "); 976 SystemLogger.logger().info(SystemInfo.getVersion()); 977 SystemLogger.logger().info("\n"); 978 File[] configFiles = configDirectory.listFiles(); 979 for (int i = 0, n = configFiles.length; i < n; i++) { 980 if (configFiles[i].getName().endsWith(".mod")) { 981 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"odm0."+configFiles[i].getName())); 982 } 983 if (configFiles[i].getName().endsWith(getName()+".dsm")) { 984 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"odm0.dsm")); 985 } 986 if (configFiles[i].getName().equals("savedoptions.sop")) { 987 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old")); 988 } 989 if (configFiles[i].getName().equals("symboltables.sym")) { 990 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"symboltables.sym.old")); 991 } 992 } 993 try { 994 BufferedReader br = new BufferedReader(new FileReader(configDirectory.getPath()+File.separator+"savedoptions.sop.old")); 995 BufferedWriter bw = new BufferedWriter(new FileWriter(configDirectory.getPath()+File.separator+"savedoptions.sop")); 996 String line; 997 while ((line = br.readLine()) != null) { 998 if (line.startsWith("0\tguide\tprediction_strategy")) { 999 bw.write("0\tguide\tdecision_settings\tT.TRANS+A.DEPREL\n"); 1000 } else { 1001 bw.write(line); 1002 bw.write('\n'); 1003 } 1004 } 1005 br.close(); 1006 bw.flush(); 1007 bw.close(); 1008 new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old").delete(); 1009 } catch (FileNotFoundException e) { 1010 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e); 1011 } catch (IOException e) { 1012 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e); 1013 } 1014 try { 1015 BufferedReader br = new BufferedReader(new FileReader(configDirectory.getPath()+File.separator+"symboltables.sym.old")); 1016 BufferedWriter bw = new BufferedWriter(new FileWriter(configDirectory.getPath()+File.separator+"symboltables.sym")); 1017 String line; 1018 while ((line = br.readLine()) != null) { 1019 if (line.startsWith("AllCombinedClassTable")) { 1020 bw.write("T.TRANS+A.DEPREL\n"); 1021 } else { 1022 bw.write(line); 1023 bw.write('\n'); 1024 } 1025 } 1026 br.close(); 1027 bw.flush(); 1028 bw.close(); 1029 new File(configDirectory.getPath()+File.separator+"symboltables.sym.old").delete(); 1030 } catch (FileNotFoundException e) { 1031 throw new ConfigurationException("Could convert symboltables.sym version 1.0.4 to version 1.1. ", e); 1032 } catch (IOException e) { 1033 throw new ConfigurationException("Could convert symboltables.sym version 1.0.4 to version 1.1. ", e); 1034 } 1035 } 1036 if (!createdByMaltParserVersion.startsWith("1.3")) { 1037 SystemLogger.logger().info(" Converts the MaltParser configuration "); 1038 SystemLogger.logger().info(createdByMaltParserVersion); 1039 SystemLogger.logger().info(" to "); 1040 SystemLogger.logger().info(SystemInfo.getVersion()); 1041 SystemLogger.logger().info("\n"); 1042 1043 1044 new File(configDirectory.getPath()+File.separator+"savedoptions.sop").renameTo(new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old")); 1045 try { 1046 BufferedReader br = new BufferedReader(new FileReader(configDirectory.getPath()+File.separator+"savedoptions.sop.old")); 1047 BufferedWriter bw = new BufferedWriter(new FileWriter(configDirectory.getPath()+File.separator+"savedoptions.sop")); 1048 String line; 1049 while ((line = br.readLine()) != null) { 1050 int index = line.indexOf('\t'); 1051 int container = 0; 1052 if (index > -1) { 1053 container = Integer.parseInt(line.substring(0,index)); 1054 } 1055 1056 if (line.startsWith(container+"\tnivre\tpost_processing")) { 1057 } else if (line.startsWith(container+"\tmalt0.4\tbehavior")) { 1058 if (line.endsWith("true")) { 1059 SystemLogger.logger().info("MaltParser 1.3 doesn't support MaltParser 0.4 emulation."); 1060 br.close(); 1061 bw.flush(); 1062 bw.close(); 1063 deleteConfigDirectory(); 1064 System.exit(0); 1065 } 1066 } else if (line.startsWith(container+"\tsinglemalt\tparsing_algorithm")) { 1067 bw.write(container); 1068 bw.write("\tsinglemalt\tparsing_algorithm\t"); 1069 if (line.endsWith("NivreStandard")) { 1070 bw.write("class org.maltparser.parser.algorithm.nivre.NivreArcStandardFactory"); 1071 } else if (line.endsWith("NivreEager")) { 1072 bw.write("class org.maltparser.parser.algorithm.nivre.NivreArcEagerFactory"); 1073 } else if (line.endsWith("CovingtonNonProjective")) { 1074 bw.write("class org.maltparser.parser.algorithm.covington.CovingtonNonProjFactory"); 1075 } else if (line.endsWith("CovingtonProjective")) { 1076 bw.write("class org.maltparser.parser.algorithm.covington.CovingtonProjFactory"); 1077 } 1078 bw.write('\n'); 1079 } else { 1080 bw.write(line); 1081 bw.write('\n'); 1082 } 1083 } 1084 br.close(); 1085 bw.flush(); 1086 bw.close(); 1087 new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old").delete(); 1088 } catch (FileNotFoundException e) { 1089 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e); 1090 } catch (IOException e) { 1091 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e); 1092 } 1093 } 1094 } 1095 1096 /** 1097 * Terminates the configuration directory 1098 * 1099 * @throws MaltChainedException 1100 */ 1101 public void terminate() throws MaltChainedException { 1102 if (infoFile != null) { 1103 try { 1104 infoFile.flush(); 1105 infoFile.close(); 1106 } catch (IOException e) { 1107 throw new ConfigurationException("Could not close configuration information file. ", e); 1108 } 1109 } 1110 symbolTables = null; 1111 // configuration = null; 1112 } 1113 1114 /* (non-Javadoc) 1115 * @see java.lang.Object#finalize() 1116 */ 1117 protected void finalize() throws Throwable { 1118 try { 1119 if (infoFile != null) { 1120 infoFile.flush(); 1121 infoFile.close(); 1122 } 1123 } finally { 1124 super.finalize(); 1125 } 1126 } 1127 1128 public SymbolTableHandler getSymbolTables() { 1129 return symbolTables; 1130 } 1131 1132 public void setSymbolTables(SymbolTableHandler symbolTables) { 1133 this.symbolTables = symbolTables; 1134 } 1135 1136 public DataFormatManager getDataFormatManager() { 1137 return dataFormatManager; 1138 } 1139 1140 public void setDataFormatManager(DataFormatManager dataFormatManager) { 1141 this.dataFormatManager = dataFormatManager; 1142 } 1143 1144 public Set<String> getDataFormatInstanceKeys() { 1145 return dataFormatInstances.keySet(); 1146 } 1147 1148 public boolean addDataFormatInstance(String key, DataFormatInstance dataFormatInstance) { 1149 if (!dataFormatInstances.containsKey(key)) { 1150 dataFormatInstances.put(key, dataFormatInstance); 1151 return true; 1152 } 1153 return false; 1154 } 1155 1156 public DataFormatInstance getDataFormatInstance(String key) { 1157 return dataFormatInstances.get(key); 1158 } 1159 1160 public int sizeDataFormatInstance() { 1161 return dataFormatInstances.size(); 1162 } 1163 1164 public DataFormatInstance getInputDataFormatInstance() { 1165 return dataFormatInstances.get(dataFormatManager.getInputDataFormatSpec().getDataFormatName()); 1166 } 1167 1168 public URL getInputFormatURL() { 1169 return inputFormatURL; 1170 } 1171 1172 public URL getOutputFormatURL() { 1173 return outputFormatURL; 1174 } 1175 1176 1177 }