001 package org.maltparser.core.config;
002
003 import java.io.BufferedInputStream;
004 import java.io.BufferedOutputStream;
005 import java.io.BufferedReader;
006 import java.io.BufferedWriter;
007 import java.io.File;
008 import java.io.FileInputStream;
009 import java.io.FileNotFoundException;
010 import java.io.FileOutputStream;
011 import java.io.FileReader;
012 import java.io.FileWriter;
013 import java.io.IOException;
014 import java.io.InputStream;
015 import java.io.InputStreamReader;
016 import java.io.OutputStreamWriter;
017 import java.io.UnsupportedEncodingException;
018 import java.net.MalformedURLException;
019 import java.net.URL;
020 import java.util.Date;
021 import java.util.Enumeration;
022 import java.util.HashMap;
023 import java.util.Set;
024 import java.util.SortedSet;
025 import java.util.TreeSet;
026 import java.util.jar.JarEntry;
027 import java.util.jar.JarFile;
028 import java.util.jar.JarInputStream;
029 import java.util.jar.JarOutputStream;
030
031 import org.maltparser.core.config.version.Versioning;
032 import org.maltparser.core.exception.MaltChainedException;
033 import org.maltparser.core.helper.HashSet;
034 import org.maltparser.core.helper.SystemInfo;
035 import org.maltparser.core.helper.SystemLogger;
036 import org.maltparser.core.helper.URLFinder;
037 import org.maltparser.core.io.dataformat.DataFormatInstance;
038 import org.maltparser.core.io.dataformat.DataFormatManager;
039 import org.maltparser.core.io.dataformat.DataFormatSpecification.DataStructure;
040 import org.maltparser.core.io.dataformat.DataFormatSpecification.Dependency;
041 import org.maltparser.core.options.OptionManager;
042 import org.maltparser.core.symbol.SymbolTableHandler;
043 import org.maltparser.core.symbol.trie.TrieSymbolTableHandler;
044
045
046 /**
047 * This class contains methods for handle the configuration directory.
048 *
049 * @author Johan Hall
050 */
051 public class ConfigurationDir {
052 protected static final int BUFFER = 4096;
053 protected File configDirectory;
054 protected String name;
055 protected String type;
056 protected File workingDirectory;
057 protected URL url = null;
058 protected int containerIndex;
059 protected BufferedWriter infoFile = null;
060 protected String createdByMaltParserVersion;
061
062 private SymbolTableHandler symbolTables;
063 private DataFormatManager dataFormatManager;
064 private HashMap<String,DataFormatInstance> dataFormatInstances;
065 private URL inputFormatURL;
066 private URL outputFormatURL;
067
068 /**
069 * Creates a configuration directory from a mco-file specified by an URL.
070 *
071 * @param url an URL to a mco-file
072 * @throws MaltChainedException
073 */
074 public ConfigurationDir(URL url) throws MaltChainedException {
075 initWorkingDirectory();
076 setUrl(url);
077 initNameNTypeFromInfoFile(url);
078 // initData();
079 }
080
081 /**
082 * Creates a new configuration directory or a configuration directory from a mco-file
083 *
084 * @param name the name of the configuration
085 * @param type the type of configuration
086 * @param containerIndex the container index
087 * @throws MaltChainedException
088 */
089 public ConfigurationDir(String name, String type, int containerIndex) throws MaltChainedException {
090 setContainerIndex(containerIndex);
091
092 initWorkingDirectory();
093 if (name != null && name.length() > 0 && type != null && type.length() > 0) {
094 setName(name);
095 setType(type);
096 } else {
097 throw new ConfigurationException("The configuration name is not specified. ");
098 }
099 setConfigDirectory(new File(workingDirectory.getPath()+File.separator+getName()));
100 }
101
102 public void initDataFormat() throws MaltChainedException {
103 String inputFormatName = OptionManager.instance().getOptionValue(containerIndex, "input", "format").toString().trim();
104 String outputFormatName = OptionManager.instance().getOptionValue(containerIndex, "output", "format").toString().trim();
105 final URLFinder f = new URLFinder();
106
107 if (configDirectory != null && configDirectory.exists()) {
108 if (outputFormatName.length() == 0 || inputFormatName.equals(outputFormatName)) {
109 URL inputFormatURL = f.findURLinJars(inputFormatName);
110 if (inputFormatURL != null) {
111 outputFormatName = inputFormatName = this.copyToConfig(inputFormatURL);
112 } else {
113 outputFormatName = inputFormatName = this.copyToConfig(inputFormatName);
114 }
115 } else {
116 URL inputFormatURL = f.findURLinJars(inputFormatName);
117 if (inputFormatURL != null) {
118 inputFormatName = this.copyToConfig(inputFormatURL);
119 } else {
120 inputFormatName = this.copyToConfig(inputFormatName);
121 }
122 URL outputFormatURL = f.findURLinJars(outputFormatName);
123 if (inputFormatURL != null) {
124 outputFormatName = this.copyToConfig(outputFormatURL);
125 } else {
126 outputFormatName = this.copyToConfig(outputFormatName);
127 }
128 }
129 OptionManager.instance().overloadOptionValue(containerIndex, "input", "format", inputFormatName);
130 } else {
131 if (outputFormatName.length() == 0) {
132 outputFormatName = inputFormatName;
133 }
134 }
135 dataFormatInstances = new HashMap<String, DataFormatInstance>(3);
136
137 inputFormatURL = findURL(inputFormatName);
138 outputFormatURL = findURL(outputFormatName);
139 if (outputFormatURL != null) {
140 try {
141 InputStream is = outputFormatURL.openStream();
142 } catch (FileNotFoundException e) {
143 outputFormatURL = f.findURL(outputFormatName);
144 } catch (IOException e) {
145 outputFormatURL = f.findURL(outputFormatName);
146 }
147 } else {
148 outputFormatURL = f.findURL(outputFormatName);
149 }
150 dataFormatManager = new DataFormatManager(inputFormatURL, outputFormatURL);
151
152 String mode = OptionManager.instance().getOptionValue(containerIndex, "config", "flowchart").toString().trim();
153 if (mode.equals("parse")) {
154 symbolTables = new TrieSymbolTableHandler(TrieSymbolTableHandler.ADD_NEW_TO_TMP_STORAGE);
155 // symbolTables = new TrieSymbolTableHandler(TrieSymbolTableHandler.ADD_NEW_TO_TRIE);
156 } else {
157 symbolTables = new TrieSymbolTableHandler(TrieSymbolTableHandler.ADD_NEW_TO_TRIE);
158 }
159 if (dataFormatManager.getInputDataFormatSpec().getDataStructure() == DataStructure.PHRASE) {
160 if (mode.equals("learn")) {
161 Set<Dependency> deps = dataFormatManager.getInputDataFormatSpec().getDependencies();
162 for (Dependency dep : deps) {
163 URL depFormatURL = f.findURLinJars(dep.getUrlString());
164 if (depFormatURL != null) {
165 this.copyToConfig(depFormatURL);
166 } else {
167 this.copyToConfig(dep.getUrlString());
168 }
169 }
170 }
171 else if (mode.equals("parse")) {
172 Set<Dependency> deps = dataFormatManager.getInputDataFormatSpec().getDependencies();
173 String nullValueStategy = OptionManager.instance().getOptionValue(containerIndex, "singlemalt", "null_value").toString();
174 for (Dependency dep : deps) {
175 // URL depFormatURL = f.findURLinJars(dep.getUrlString());
176 DataFormatInstance dataFormatInstance = dataFormatManager.getDataFormatSpec(dep.getDependentOn()).createDataFormatInstance(symbolTables, nullValueStategy);
177 addDataFormatInstance(dataFormatManager.getDataFormatSpec(dep.getDependentOn()).getDataFormatName(), dataFormatInstance);
178 dataFormatManager.setInputDataFormatSpec(dataFormatManager.getDataFormatSpec(dep.getDependentOn()));
179 // dataFormatManager.setOutputDataFormatSpec(dataFormatManager.getDataFormatSpec(dep.getDependentOn()));
180 }
181 }
182 }
183 }
184
185 private URL findURL(String specModelFileName) throws MaltChainedException {
186 URL url = null;
187 File specFile = this.getFile(specModelFileName);
188 if (specFile.exists()) {
189 try {
190 url = new URL("file:///"+specFile.getAbsolutePath());
191 } catch (MalformedURLException e) {
192 throw new MaltChainedException("Malformed URL: "+specFile, e);
193 }
194 } else {
195 url = this.getConfigFileEntryURL(specModelFileName);
196 }
197 return url;
198 }
199
200 /**
201 * Creates an output stream writer, where the corresponding file will be included in the configuration directory
202 *
203 * @param fileName a file name
204 * @param charSet a char set
205 * @return an output stream writer for writing to a file within the configuration directory
206 * @throws MaltChainedException
207 */
208 public OutputStreamWriter getOutputStreamWriter(String fileName, String charSet) throws MaltChainedException {
209 try {
210 return new OutputStreamWriter(new FileOutputStream(configDirectory.getPath()+File.separator+fileName), charSet);
211 } catch (FileNotFoundException e) {
212 throw new ConfigurationException("The file '"+fileName+"' cannot be created. ", e);
213 } catch (UnsupportedEncodingException e) {
214 throw new ConfigurationException("The char set '"+charSet+"' is not supported. ", e);
215 }
216 }
217
218 /**
219 * Creates an output stream writer, where the corresponding file will be included in the
220 * configuration directory. Uses UTF-8 for character encoding.
221 *
222 * @param fileName a file name
223 * @return an output stream writer for writing to a file within the configuration directory
224 * @throws MaltChainedException
225 */
226 public OutputStreamWriter getOutputStreamWriter(String fileName) throws MaltChainedException {
227 try {
228 return new OutputStreamWriter(new FileOutputStream(configDirectory.getPath()+File.separator+fileName, true), "UTF-8");
229 } catch (FileNotFoundException e) {
230 throw new ConfigurationException("The file '"+fileName+"' cannot be created. ", e);
231 } catch (UnsupportedEncodingException e) {
232 throw new ConfigurationException("The char set 'UTF-8' is not supported. ", e);
233 }
234 }
235 /**
236 * This method acts the same as getOutputStreamWriter with the difference that the writer append in the file
237 * if it already exists instead of deleting the previous content before starting to write.
238 *
239 * @param fileName a file name
240 * @return an output stream writer for writing to a file within the configuration directory
241 * @throws MaltChainedException
242 */
243 public OutputStreamWriter getAppendOutputStreamWriter(String fileName) throws MaltChainedException {
244 try {
245 return new OutputStreamWriter(new FileOutputStream(configDirectory.getPath()+File.separator+fileName, true), "UTF-8");
246 } catch (FileNotFoundException e) {
247 throw new ConfigurationException("The file '"+fileName+"' cannot be created. ", e);
248 } catch (UnsupportedEncodingException e) {
249 throw new ConfigurationException("The char set 'UTF-8' is not supported. ", e);
250 }
251 }
252
253 /**
254 * Creates an input stream reader for reading a file within the configuration directory
255 *
256 * @param fileName a file name
257 * @param charSet a char set
258 * @return an input stream reader for reading a file within the configuration directory
259 * @throws MaltChainedException
260 */
261 public InputStreamReader getInputStreamReader(String fileName, String charSet) throws MaltChainedException {
262 try {
263 return new InputStreamReader(new FileInputStream(configDirectory.getPath()+File.separator+fileName), charSet);
264 } catch (FileNotFoundException e) {
265 throw new ConfigurationException("The file '"+fileName+"' cannot be found. ", e);
266 } catch (UnsupportedEncodingException e) {
267 throw new ConfigurationException("The char set '"+charSet+"' is not supported. ", e);
268 }
269 }
270
271 /**
272 * Creates an input stream reader for reading a file within the configuration directory.
273 * Uses UTF-8 for character encoding.
274 *
275 * @param fileName a file name
276 * @return an input stream reader for reading a file within the configuration directory
277 * @throws MaltChainedException
278 */
279 public InputStreamReader getInputStreamReader(String fileName) throws MaltChainedException {
280 return getInputStreamReader(fileName, "UTF-8");
281 }
282
283 public JarEntry getConfigFileEntry(String fileName) throws MaltChainedException {
284 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco");
285 try {
286 JarFile mcoFile = new JarFile(mcoPath.getAbsolutePath());
287 JarEntry entry = mcoFile.getJarEntry(getName()+'/'+fileName);
288 if (entry == null) {
289 entry = mcoFile.getJarEntry(getName()+'\\'+fileName);
290 }
291 return entry;
292 } catch (FileNotFoundException e) {
293 throw new ConfigurationException("The file entry '"+fileName+"' in mco-file '"+mcoPath+"' cannot be found. ", e);
294 } catch (IOException e) {
295 throw new ConfigurationException("The file entry '"+fileName+"' in mco-file '"+mcoPath+"' cannot be found. ", e);
296 }
297 }
298
299 public InputStream getInputStreamFromConfigFileEntry(String fileName) throws MaltChainedException {
300 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco");
301 try {
302 JarFile mcoFile = new JarFile(mcoPath.getAbsolutePath());
303 JarEntry entry = mcoFile.getJarEntry(getName()+'/'+fileName);
304 if (entry == null) {
305 entry = mcoFile.getJarEntry(getName()+'\\'+fileName);
306 }
307 if (entry == null) {
308 throw new FileNotFoundException();
309 }
310 return mcoFile.getInputStream(entry);
311 } catch (FileNotFoundException e) {
312 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be found. ", e);
313 } catch (IOException e) {
314 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be loaded. ", e);
315 }
316 }
317
318 public InputStreamReader getInputStreamReaderFromConfigFileEntry(String fileName, String charSet) throws MaltChainedException {
319 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco");
320 try {
321 JarFile mcoFile = new JarFile(mcoPath.getAbsolutePath());
322 JarEntry entry = mcoFile.getJarEntry(getName()+'/'+fileName);
323 if (entry == null) {
324 entry = mcoFile.getJarEntry(getName()+'\\'+fileName);
325 }
326 if (entry == null) {
327 throw new FileNotFoundException();
328 }
329 return new InputStreamReader(mcoFile.getInputStream(entry), charSet);
330 } catch (FileNotFoundException e) {
331 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be found. ", e);
332 } catch (UnsupportedEncodingException e) {
333 throw new ConfigurationException("The char set '"+charSet+"' is not supported. ", e);
334 } catch (IOException e) {
335 throw new ConfigurationException("The file entry '"+fileName+"' in the mco file '"+mcoPath+"' cannot be loaded. ", e);
336 }
337 }
338
339 public InputStreamReader getInputStreamReaderFromConfigFile(String fileName) throws MaltChainedException {
340 return getInputStreamReaderFromConfigFileEntry(fileName, "UTF-8");
341 }
342
343 /**
344 * Returns a file handler object of a file within the configuration directory
345 *
346 * @param fileName a file name
347 * @return a file handler object of a file within the configuration directory
348 * @throws MaltChainedException
349 */
350 public File getFile(String fileName) throws MaltChainedException {
351 return new File(configDirectory.getPath()+File.separator+fileName);
352 }
353
354 public URL getConfigFileEntryURL(String fileName) throws MaltChainedException {
355 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco");
356 try {
357 if (!mcoPath.exists()) {
358 throw new ConfigurationException("Couldn't find mco-file '" +mcoPath.getAbsolutePath()+ "'");
359 }
360 // new URL("file", null, mcoPath.getAbsolutePath());
361 URL url = new URL("jar:"+new URL("file", null, mcoPath.getAbsolutePath())+"!/"+getName()+'/'+fileName + "\n");
362 try {
363 InputStream is = url.openStream();
364 is.close();
365 } catch (IOException e) {
366 url = new URL("jar:"+new URL("file", null, mcoPath.getAbsolutePath())+"!/"+getName()+'\\'+fileName + "\n");
367 }
368 return url;
369 } catch (MalformedURLException e) {
370 throw new ConfigurationException("Couldn't find the URL '" +"jar:"+mcoPath.getAbsolutePath()+"!/"+getName()+'/'+fileName+ "'", e);
371 }
372 }
373
374 /**
375 * Copies a file into the configuration directory.
376 *
377 * @param source a path to file
378 * @throws MaltChainedException
379 */
380 public String copyToConfig(File source) throws MaltChainedException {
381 byte[] readBuffer = new byte[BUFFER];
382 String destination = configDirectory.getPath()+File.separator+source.getName();
383 try {
384 BufferedInputStream bis = new BufferedInputStream(new FileInputStream(source));
385 BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destination), BUFFER);
386
387 int n = 0;
388 while ((n = bis.read(readBuffer, 0, BUFFER)) != -1) {
389 bos.write(readBuffer, 0, n);
390 }
391 bos.flush();
392 bos.close();
393 bis.close();
394 } catch (FileNotFoundException e) {
395 throw new ConfigurationException("The source file '"+source+"' cannot be found or the destination file '"+destination+"' cannot be created when coping the file. ", e);
396 } catch (IOException e) {
397 throw new ConfigurationException("The source file '"+source+"' cannot be copied to destination '"+destination+"'. ", e);
398 }
399 return source.getName();
400 }
401
402
403 public String copyToConfig(String fileUrl) throws MaltChainedException {
404 final URLFinder f = new URLFinder();
405 URL url = f.findURL(fileUrl);
406 if (url == null) {
407 throw new ConfigurationException("The file or URL '"+fileUrl+"' could not be found. ");
408 }
409 return copyToConfig(url);
410 }
411
412 public String copyToConfig(URL url) throws MaltChainedException {
413 if (url == null) {
414 throw new ConfigurationException("URL could not be found. ");
415 }
416 byte[] readBuffer = new byte[BUFFER];
417 String destFileName = url.getPath();
418 int indexSlash = destFileName.lastIndexOf('/');
419 if (indexSlash == -1) {
420 indexSlash = destFileName.lastIndexOf('\\');
421 }
422
423 if (indexSlash != -1) {
424 destFileName = destFileName.substring(indexSlash+1);
425 }
426
427 String destination = configDirectory.getPath()+File.separator+destFileName;
428 try {
429 BufferedInputStream bis = new BufferedInputStream(url.openStream());
430 BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destination), BUFFER);
431
432 int n = 0;
433 while ((n = bis.read(readBuffer, 0, BUFFER)) != -1) {
434 bos.write(readBuffer, 0, n);
435 }
436 bos.flush();
437 bos.close();
438 bis.close();
439 } catch (FileNotFoundException e) {
440 throw new ConfigurationException("The destination file '"+destination+"' cannot be created when coping the file. ", e);
441 } catch (IOException e) {
442 throw new ConfigurationException("The URL '"+url+"' cannot be copied to destination '"+destination+"'. ", e);
443 }
444 return destFileName;
445 }
446
447
448 /**
449 * Removes the configuration directory, if it exists and it contains a .info file.
450 *
451 * @throws MaltChainedException
452 */
453 public void deleteConfigDirectory() throws MaltChainedException {
454 if (!configDirectory.exists()) {
455 return;
456 }
457 File infoFile = new File(configDirectory.getPath()+File.separator+getName()+"_"+getType()+".info");
458 if (infoFile.exists()) {
459 deleteConfigDirectory(configDirectory);
460 } else {
461 throw new ConfigurationException("There exists a directory that is not a MaltParser configuration directory. ");
462 }
463 }
464
465 private void deleteConfigDirectory(File directory) throws MaltChainedException {
466 if (directory.exists()) {
467 File[] files = directory.listFiles();
468 for (int i = 0; i < files.length; i++) {
469 if (files[i].isDirectory()) {
470 deleteConfigDirectory(files[i]);
471 } else {
472 files[i].delete();
473 }
474 }
475 } else {
476 throw new ConfigurationException("The directory '"+directory.getPath()+ "' cannot be found. ");
477 }
478 directory.delete();
479 }
480
481 /**
482 * Returns a file handler object for the configuration directory
483 *
484 * @return a file handler object for the configuration directory
485 */
486 public File getConfigDirectory() {
487 return configDirectory;
488 }
489
490 protected void setConfigDirectory(File dir) {
491 this.configDirectory = dir;
492 }
493
494 /**
495 * Creates the configuration directory
496 *
497 * @throws MaltChainedException
498 */
499 public void createConfigDirectory() throws MaltChainedException {
500 checkConfigDirectory();
501 configDirectory.mkdir();
502 createInfoFile();
503 }
504
505 protected void checkConfigDirectory() throws MaltChainedException {
506 if (configDirectory.exists() && !configDirectory.isDirectory()) {
507 throw new ConfigurationException("The configuration directory name already exists and is not a directory. ");
508 }
509
510 if (configDirectory.exists()) {
511 deleteConfigDirectory();
512 }
513 }
514
515 protected void createInfoFile() throws MaltChainedException {
516 infoFile = new BufferedWriter(getOutputStreamWriter(getName()+"_"+getType()+".info"));
517 try {
518 infoFile.write("CONFIGURATION\n");
519 infoFile.write("Configuration name: "+getName()+"\n");
520 infoFile.write("Configuration type: "+getType()+"\n");
521 infoFile.write("Created: "+new Date(System.currentTimeMillis())+"\n");
522
523 infoFile.write("\nSYSTEM\n");
524 infoFile.write("Operating system architecture: "+System.getProperty("os.arch")+"\n");
525 infoFile.write("Operating system name: "+System.getProperty("os.name")+"\n");
526 infoFile.write("JRE vendor name: "+System.getProperty("java.vendor")+"\n");
527 infoFile.write("JRE version number: "+System.getProperty("java.version")+"\n");
528
529 infoFile.write("\nMALTPARSER\n");
530 infoFile.write("Version: "+SystemInfo.getVersion()+"\n");
531 infoFile.write("Build date: "+SystemInfo.getBuildDate()+"\n");
532 Set<String> excludeGroups = new HashSet<String>();
533 excludeGroups.add("system");
534 infoFile.write("\nSETTINGS\n");
535 infoFile.write(OptionManager.instance().toStringPrettyValues(containerIndex, excludeGroups));
536 infoFile.flush();
537 } catch (IOException e) {
538 throw new ConfigurationException("Could not create the maltparser info file. ");
539 }
540 }
541
542 /**
543 * Returns a writer to the configuration information file
544 *
545 * @return a writer to the configuration information file
546 * @throws MaltChainedException
547 */
548 public BufferedWriter getInfoFileWriter() throws MaltChainedException {
549 return infoFile;
550 }
551
552 /**
553 * Creates the malt configuration file (.mco). This file is compressed.
554 *
555 * @throws MaltChainedException
556 */
557 public void createConfigFile() throws MaltChainedException {
558 try {
559 JarOutputStream jos = new JarOutputStream(new FileOutputStream(workingDirectory.getPath()+File.separator+getName()+".mco"));
560 // configLogger.info("Creates configuration file '"+workingDirectory.getPath()+File.separator+getName()+".mco' ...\n");
561 createConfigFile(configDirectory.getPath(), jos);
562 jos.close();
563 } catch (FileNotFoundException e) {
564 throw new ConfigurationException("The maltparser configurtation file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be found. ", e);
565 } catch (IOException e) {
566 throw new ConfigurationException("The maltparser configurtation file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be created. ", e);
567 }
568 }
569
570 private void createConfigFile(String directory, JarOutputStream jos) throws MaltChainedException {
571 byte[] readBuffer = new byte[BUFFER];
572 try {
573 File zipDir = new File(directory);
574 String[] dirList = zipDir.list();
575
576 int bytesIn = 0;
577
578 for (int i = 0; i < dirList.length; i++) {
579 File f = new File(zipDir, dirList[i]);
580 if (f.isDirectory()) {
581 String filePath = f.getPath();
582 createConfigFile(filePath, jos);
583 continue;
584 }
585
586 FileInputStream fis = new FileInputStream(f);
587
588 String entryPath = f.getPath().substring(workingDirectory.getPath().length()+1);
589 entryPath = entryPath.replace('\\', '/');
590 JarEntry entry = new JarEntry(entryPath);
591 jos.putNextEntry(entry);
592
593 while ((bytesIn = fis.read(readBuffer)) != -1) {
594 jos.write(readBuffer, 0, bytesIn);
595 }
596
597 fis.close();
598 }
599 } catch (FileNotFoundException e) {
600 throw new ConfigurationException("The directory '"+directory+"' cannot be found. ", e);
601 } catch (IOException e) {
602 throw new ConfigurationException("The directory '"+directory+"' cannot be compressed into a mco file. ", e);
603 }
604 }
605
606
607 public void copyConfigFile(File in, File out, Versioning versioning) throws MaltChainedException {
608 try {
609 JarFile jar = new JarFile(in);
610 JarOutputStream tempJar = new JarOutputStream(new FileOutputStream(out));
611 byte[] buffer = new byte[BUFFER];
612 int bytesRead;
613 final StringBuilder sb = new StringBuilder();
614 final URLFinder f = new URLFinder();
615
616 for (Enumeration<JarEntry> entries = jar.entries(); entries.hasMoreElements(); ) {
617 JarEntry inEntry = (JarEntry) entries.nextElement();
618 InputStream entryStream = jar.getInputStream(inEntry);
619 JarEntry outEntry = versioning.getJarEntry(inEntry);
620
621 if (!versioning.hasChanges(inEntry, outEntry)) {
622 tempJar.putNextEntry(outEntry);
623 while ((bytesRead = entryStream.read(buffer)) != -1) {
624 tempJar.write(buffer, 0, bytesRead);
625 }
626 } else {
627 tempJar.putNextEntry(outEntry);
628 BufferedReader br = new BufferedReader(new InputStreamReader(entryStream));
629 String line = null;
630 sb.setLength(0);
631 while ((line = br.readLine()) != null) {
632 sb.append(line);
633 sb.append('\n');
634 }
635 String outString = versioning.modifyJarEntry(inEntry, outEntry, sb);
636 tempJar.write(outString.getBytes());
637 }
638 }
639 if (versioning.getFeatureModelXML() != null && versioning.getFeatureModelXML().startsWith("/appdata")) {
640 int index = versioning.getFeatureModelXML().lastIndexOf('/');
641 BufferedInputStream bis = new BufferedInputStream(f.findURLinJars(versioning.getFeatureModelXML()).openStream());
642 tempJar.putNextEntry(new JarEntry(versioning.getNewConfigName()+"/" +versioning.getFeatureModelXML().substring(index+1)));
643 int n = 0;
644 while ((n = bis.read(buffer, 0, BUFFER)) != -1) {
645 tempJar.write(buffer, 0, n);
646 }
647 bis.close();
648 }
649 if (versioning.getInputFormatXML() != null && versioning.getInputFormatXML().startsWith("/appdata")) {
650 int index = versioning.getInputFormatXML().lastIndexOf('/');
651 BufferedInputStream bis = new BufferedInputStream(f.findURLinJars(versioning.getInputFormatXML()).openStream());
652 tempJar.putNextEntry(new JarEntry(versioning.getNewConfigName()+"/" +versioning.getInputFormatXML().substring(index+1)));
653 int n = 0;
654 while ((n = bis.read(buffer, 0, BUFFER)) != -1) {
655 tempJar.write(buffer, 0, n);
656 }
657 bis.close();
658 }
659 tempJar.flush();
660 tempJar.close();
661 jar.close();
662 } catch (IOException e) {
663 throw new ConfigurationException("", e);
664 }
665 }
666
667 protected void initNameNTypeFromInfoFile(URL url) throws MaltChainedException {
668 if (url == null) {
669 throw new ConfigurationException("The URL cannot be found. ");
670 }
671 try {
672 JarEntry je;
673 JarInputStream jis = new JarInputStream(url.openConnection().getInputStream());
674 while ((je = jis.getNextJarEntry()) != null) {
675 String entryName = je.getName();
676 if (entryName.endsWith(".info")) {
677 int indexUnderScore = entryName.lastIndexOf('_');
678 int indexSeparator = entryName.lastIndexOf(File.separator);
679 if (indexSeparator == -1) {
680 indexSeparator = entryName.lastIndexOf('/');
681 }
682 if (indexSeparator == -1) {
683 indexSeparator = entryName.lastIndexOf('\\');
684 }
685 int indexDot = entryName.lastIndexOf('.');
686 if (indexUnderScore == -1 || indexDot == -1) {
687 throw new ConfigurationException("Could not find the configuration name and type from the URL '"+url.toString()+"'. ");
688 }
689 setName(entryName.substring(indexSeparator+1, indexUnderScore));
690 setType(entryName.substring(indexUnderScore+1, indexDot));
691 setConfigDirectory(new File(workingDirectory.getPath()+File.separator+getName()));
692 jis.close();
693 return;
694 }
695 }
696
697 } catch (IOException e) {
698 throw new ConfigurationException("Could not find the configuration name and type from the URL '"+url.toString()+"'. ", e);
699 }
700 }
701
702 /**
703 * Prints the content of the configuration information file to the system logger
704 *
705 * @throws MaltChainedException
706 */
707 public void echoInfoFile() throws MaltChainedException {
708 checkConfigDirectory();
709 JarInputStream jis;
710 try {
711 if (url == null) {
712 jis = new JarInputStream(new FileInputStream(workingDirectory.getPath()+File.separator+getName()+".mco"));
713 } else {
714 jis = new JarInputStream(url.openConnection().getInputStream());
715 }
716 JarEntry je;
717
718 while ((je = jis.getNextJarEntry()) != null) {
719 String entryName = je.getName();
720
721 if (entryName.endsWith(getName()+"_"+getType()+".info")) {
722 int c;
723 while ((c = jis.read()) != -1) {
724 SystemLogger.logger().info((char)c);
725 }
726 }
727 }
728 jis.close();
729 } catch (FileNotFoundException e) {
730 throw new ConfigurationException("Could not print configuration information file. The configuration file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be found. ", e);
731 } catch (IOException e) {
732 throw new ConfigurationException("Could not print configuration information file. ", e);
733 }
734
735 }
736
737 /**
738 * Unpacks the malt configuration file (.mco).
739 *
740 * @throws MaltChainedException
741 */
742 public void unpackConfigFile() throws MaltChainedException {
743 checkConfigDirectory();
744 JarInputStream jis;
745 try {
746 if (url == null) {
747 jis = new JarInputStream(new FileInputStream(workingDirectory.getPath()+File.separator+getName()+".mco"));
748 } else {
749 jis = new JarInputStream(url.openConnection().getInputStream());
750 }
751 unpackConfigFile(jis);
752 jis.close();
753 } catch (FileNotFoundException e) {
754 throw new ConfigurationException("Could not unpack configuration. The configuration file '"+workingDirectory.getPath()+File.separator+getName()+".mco"+"' cannot be found. ", e);
755 } catch (IOException e) {
756 if (configDirectory.exists()) {
757 deleteConfigDirectory();
758 }
759 throw new ConfigurationException("Could not unpack configuration. ", e);
760 }
761 initCreatedByMaltParserVersionFromInfoFile();
762 }
763
764 protected void unpackConfigFile(JarInputStream jis) throws MaltChainedException {
765 try {
766 JarEntry je;
767 byte[] readBuffer = new byte[BUFFER];
768 SortedSet<String> directoryCache = new TreeSet<String>();
769 while ((je = jis.getNextJarEntry()) != null) {
770 String entryName = je.getName();
771
772 if (entryName.startsWith("/")) {
773 entryName = entryName.substring(1);
774 }
775 if (entryName.endsWith(File.separator) || entryName.endsWith("/")) {
776 return;
777 }
778 int index = -1;
779 if (File.separator.equals("\\")) {
780 entryName = entryName.replace('/', '\\');
781 index = entryName.lastIndexOf("\\");
782 } else if (File.separator.equals("/")) {
783 entryName = entryName.replace('\\', '/');
784 index = entryName.lastIndexOf("/");
785 }
786 if (index > 0) {
787 String dirName = entryName.substring(0, index);
788 if (!directoryCache.contains(dirName)) {
789 File directory = new File(workingDirectory.getPath()+File.separator+dirName);
790 if (!(directory.exists() && directory.isDirectory())) {
791 if (!directory.mkdirs()) {
792 throw new ConfigurationException("Unable to make directory '" + dirName +"'. ");
793 }
794 directoryCache.add(dirName);
795 }
796 }
797 }
798
799 if (new File(workingDirectory.getPath()+File.separator+entryName).isDirectory() && new File(workingDirectory.getPath()+File.separator+entryName).exists()) {
800 continue;
801 }
802 BufferedOutputStream bos;
803 try {
804 bos = new BufferedOutputStream(new FileOutputStream(workingDirectory.getPath()+File.separator+entryName), BUFFER);
805 } catch (FileNotFoundException e) {
806 throw new ConfigurationException("Could not unpack configuration. The file '"+workingDirectory.getPath()+File.separator+entryName+"' cannot be unpacked. ", e);
807 }
808 int n = 0;
809 while ((n = jis.read(readBuffer, 0, BUFFER)) != -1) {
810 bos.write(readBuffer, 0, n);
811 }
812 bos.flush();
813 bos.close();
814 }
815 } catch (IOException e) {
816 throw new ConfigurationException("Could not unpack configuration. ", e);
817 }
818 }
819
820 /**
821 * Returns the name of the configuration directory
822 *
823 * @return the name of the configuration directory
824 */
825 public String getName() {
826 return name;
827 }
828
829 protected void setName(String name) {
830 this.name = name;
831 }
832
833 /**
834 * Returns the type of the configuration directory
835 *
836 * @return the type of the configuration directory
837 */
838 public String getType() {
839 return type;
840 }
841
842 protected void setType(String type) {
843 this.type = type;
844 }
845
846 /**
847 * Returns a file handler object for the working directory
848 *
849 * @return a file handler object for the working directory
850 */
851 public File getWorkingDirectory() {
852 return workingDirectory;
853 }
854
855 /**
856 * Initialize the working directory
857 *
858 * @throws MaltChainedException
859 */
860 public void initWorkingDirectory() throws MaltChainedException {
861 try {
862 initWorkingDirectory(OptionManager.instance().getOptionValue(0, "config", "workingdir").toString());
863 } catch (NullPointerException e) {
864 throw new ConfigurationException("The configuration cannot be found.", e);
865 }
866 }
867
868 /**
869 * Initialize the working directory according to the path. If the path is equals to "user.dir" or current directory, then the current directory
870 * will be the working directory.
871 *
872 * @param pathPrefixString the path to the working directory
873 * @throws MaltChainedException
874 */
875 public void initWorkingDirectory(String pathPrefixString) throws MaltChainedException {
876 if (pathPrefixString == null || pathPrefixString.equalsIgnoreCase("user.dir") || pathPrefixString.equalsIgnoreCase(".")) {
877 workingDirectory = new File(System.getProperty("user.dir"));
878 } else {
879 workingDirectory = new File(pathPrefixString);
880 }
881
882 if (workingDirectory == null || !workingDirectory.isDirectory()) {
883 new ConfigurationException("The specified working directory '"+pathPrefixString+"' is not a directory. ");
884 }
885 }
886
887 /**
888 * Returns the URL to the malt configuration file (.mco)
889 *
890 * @return the URL to the malt configuration file (.mco)
891 */
892 public URL getUrl() {
893 return url;
894 }
895
896 protected void setUrl(URL url) {
897 this.url = url;
898 }
899
900 /**
901 * Returns the option container index
902 *
903 * @return the option container index
904 */
905 public int getContainerIndex() {
906 return containerIndex;
907 }
908
909 /**
910 * Sets the option container index
911 *
912 * @param containerIndex a option container index
913 */
914 public void setContainerIndex(int containerIndex) {
915 this.containerIndex = containerIndex;
916 }
917
918 /**
919 * Returns the version number of MaltParser which created the malt configuration file (.mco)
920 *
921 * @return the version number of MaltParser which created the malt configuration file (.mco)
922 */
923 public String getCreatedByMaltParserVersion() {
924 return createdByMaltParserVersion;
925 }
926
927 /**
928 * Sets the version number of MaltParser which created the malt configuration file (.mco)
929 *
930 * @param createdByMaltParserVersion a version number of MaltParser
931 */
932 public void setCreatedByMaltParserVersion(String createdByMaltParserVersion) {
933 this.createdByMaltParserVersion = createdByMaltParserVersion;
934 }
935
936 public void initCreatedByMaltParserVersionFromInfoFile() throws MaltChainedException {
937 try {
938 BufferedReader br = new BufferedReader(getInputStreamReaderFromConfigFileEntry(getName()+"_"+getType()+".info", "UTF-8"));
939 String line = null;
940 while ((line = br.readLine()) != null) {
941 if (line.startsWith("Version: ")) {
942 setCreatedByMaltParserVersion(line.substring(31));
943 break;
944 }
945 }
946 br.close();
947 } catch (FileNotFoundException e) {
948 throw new ConfigurationException("Could not retrieve the version number of the MaltParser configuration.", e);
949 } catch (IOException e) {
950 throw new ConfigurationException("Could not retrieve the version number of the MaltParser configuration.", e);
951 }
952 }
953
954 public void versioning() throws MaltChainedException {
955 initCreatedByMaltParserVersionFromInfoFile();
956 SystemLogger.logger().info("\nCurrent version : " + SystemInfo.getVersion() + "\n");
957 SystemLogger.logger().info("Parser model version : " + createdByMaltParserVersion + "\n");
958 if (SystemInfo.getVersion() == null) {
959 throw new ConfigurationException("Couln't determine the version of MaltParser");
960 } else if (createdByMaltParserVersion == null) {
961 throw new ConfigurationException("Couln't determine the version of the parser model");
962 } else if (SystemInfo.getVersion().equals(createdByMaltParserVersion)) {
963 SystemLogger.logger().info("The parser model "+getName()+".mco has already the same version as the current version of MaltParser. \n");
964 return;
965 }
966
967 File mcoPath = new File(workingDirectory.getPath()+File.separator+getName()+".mco");
968 File newMcoPath = new File(workingDirectory.getPath()+File.separator+getName()+"."+SystemInfo.getVersion().trim()+".mco");
969 Versioning versioning = new Versioning(name, type, mcoPath, createdByMaltParserVersion);
970 if (!versioning.support(createdByMaltParserVersion)) {
971 SystemLogger.logger().warn("The parser model '"+ name+ ".mco' is created by MaltParser "+getCreatedByMaltParserVersion()+", which cannot be converted to a MaltParser "+SystemInfo.getVersion()+" parser model.\n");
972 SystemLogger.logger().warn("Please retrain the parser model with MaltParser "+SystemInfo.getVersion() +" or download MaltParser "+getCreatedByMaltParserVersion()+" from http://maltparser.org/download.html\n");
973 return;
974 }
975 SystemLogger.logger().info("Converts the parser model '"+ mcoPath.getName()+ "' into '"+newMcoPath.getName()+"'....\n");
976 copyConfigFile(mcoPath, newMcoPath, versioning);
977 }
978
979 protected void checkNConvertConfigVersion() throws MaltChainedException {
980 if (createdByMaltParserVersion.startsWith("1.0")) {
981 SystemLogger.logger().info(" Converts the MaltParser configuration ");
982 SystemLogger.logger().info("1.0");
983 SystemLogger.logger().info(" to ");
984 SystemLogger.logger().info(SystemInfo.getVersion());
985 SystemLogger.logger().info("\n");
986 File[] configFiles = configDirectory.listFiles();
987 for (int i = 0, n = configFiles.length; i < n; i++) {
988 if (configFiles[i].getName().endsWith(".mod")) {
989 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"odm0."+configFiles[i].getName()));
990 }
991 if (configFiles[i].getName().endsWith(getName()+".dsm")) {
992 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"odm0.dsm"));
993 }
994 if (configFiles[i].getName().equals("savedoptions.sop")) {
995 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old"));
996 }
997 if (configFiles[i].getName().equals("symboltables.sym")) {
998 configFiles[i].renameTo(new File(configDirectory.getPath()+File.separator+"symboltables.sym.old"));
999 }
1000 }
1001 try {
1002 BufferedReader br = new BufferedReader(new FileReader(configDirectory.getPath()+File.separator+"savedoptions.sop.old"));
1003 BufferedWriter bw = new BufferedWriter(new FileWriter(configDirectory.getPath()+File.separator+"savedoptions.sop"));
1004 String line;
1005 while ((line = br.readLine()) != null) {
1006 if (line.startsWith("0\tguide\tprediction_strategy")) {
1007 bw.write("0\tguide\tdecision_settings\tT.TRANS+A.DEPREL\n");
1008 } else {
1009 bw.write(line);
1010 bw.write('\n');
1011 }
1012 }
1013 br.close();
1014 bw.flush();
1015 bw.close();
1016 new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old").delete();
1017 } catch (FileNotFoundException e) {
1018 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e);
1019 } catch (IOException e) {
1020 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e);
1021 }
1022 try {
1023 BufferedReader br = new BufferedReader(new FileReader(configDirectory.getPath()+File.separator+"symboltables.sym.old"));
1024 BufferedWriter bw = new BufferedWriter(new FileWriter(configDirectory.getPath()+File.separator+"symboltables.sym"));
1025 String line;
1026 while ((line = br.readLine()) != null) {
1027 if (line.startsWith("AllCombinedClassTable")) {
1028 bw.write("T.TRANS+A.DEPREL\n");
1029 } else {
1030 bw.write(line);
1031 bw.write('\n');
1032 }
1033 }
1034 br.close();
1035 bw.flush();
1036 bw.close();
1037 new File(configDirectory.getPath()+File.separator+"symboltables.sym.old").delete();
1038 } catch (FileNotFoundException e) {
1039 throw new ConfigurationException("Could convert symboltables.sym version 1.0.4 to version 1.1. ", e);
1040 } catch (IOException e) {
1041 throw new ConfigurationException("Could convert symboltables.sym version 1.0.4 to version 1.1. ", e);
1042 }
1043 }
1044 if (!createdByMaltParserVersion.startsWith("1.3")) {
1045 SystemLogger.logger().info(" Converts the MaltParser configuration ");
1046 SystemLogger.logger().info(createdByMaltParserVersion);
1047 SystemLogger.logger().info(" to ");
1048 SystemLogger.logger().info(SystemInfo.getVersion());
1049 SystemLogger.logger().info("\n");
1050
1051
1052 new File(configDirectory.getPath()+File.separator+"savedoptions.sop").renameTo(new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old"));
1053 try {
1054 BufferedReader br = new BufferedReader(new FileReader(configDirectory.getPath()+File.separator+"savedoptions.sop.old"));
1055 BufferedWriter bw = new BufferedWriter(new FileWriter(configDirectory.getPath()+File.separator+"savedoptions.sop"));
1056 String line;
1057 while ((line = br.readLine()) != null) {
1058 int index = line.indexOf('\t');
1059 int container = 0;
1060 if (index > -1) {
1061 container = Integer.parseInt(line.substring(0,index));
1062 }
1063
1064 if (line.startsWith(container+"\tnivre\tpost_processing")) {
1065 } else if (line.startsWith(container+"\tmalt0.4\tbehavior")) {
1066 if (line.endsWith("true")) {
1067 SystemLogger.logger().info("MaltParser 1.3 doesn't support MaltParser 0.4 emulation.");
1068 br.close();
1069 bw.flush();
1070 bw.close();
1071 deleteConfigDirectory();
1072 System.exit(0);
1073 }
1074 } else if (line.startsWith(container+"\tsinglemalt\tparsing_algorithm")) {
1075 bw.write(container);
1076 bw.write("\tsinglemalt\tparsing_algorithm\t");
1077 if (line.endsWith("NivreStandard")) {
1078 bw.write("class org.maltparser.parser.algorithm.nivre.NivreArcStandardFactory");
1079 } else if (line.endsWith("NivreEager")) {
1080 bw.write("class org.maltparser.parser.algorithm.nivre.NivreArcEagerFactory");
1081 } else if (line.endsWith("CovingtonNonProjective")) {
1082 bw.write("class org.maltparser.parser.algorithm.covington.CovingtonNonProjFactory");
1083 } else if (line.endsWith("CovingtonProjective")) {
1084 bw.write("class org.maltparser.parser.algorithm.covington.CovingtonProjFactory");
1085 }
1086 bw.write('\n');
1087 } else {
1088 bw.write(line);
1089 bw.write('\n');
1090 }
1091 }
1092 br.close();
1093 bw.flush();
1094 bw.close();
1095 new File(configDirectory.getPath()+File.separator+"savedoptions.sop.old").delete();
1096 } catch (FileNotFoundException e) {
1097 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e);
1098 } catch (IOException e) {
1099 throw new ConfigurationException("Could convert savedoptions.sop version 1.0.4 to version 1.1. ", e);
1100 }
1101 }
1102 }
1103
1104 /**
1105 * Terminates the configuration directory
1106 *
1107 * @throws MaltChainedException
1108 */
1109 public void terminate() throws MaltChainedException {
1110 if (infoFile != null) {
1111 try {
1112 infoFile.flush();
1113 infoFile.close();
1114 } catch (IOException e) {
1115 throw new ConfigurationException("Could not close configuration information file. ", e);
1116 }
1117 }
1118 symbolTables = null;
1119 // configuration = null;
1120 }
1121
1122 /* (non-Javadoc)
1123 * @see java.lang.Object#finalize()
1124 */
1125 protected void finalize() throws Throwable {
1126 try {
1127 if (infoFile != null) {
1128 infoFile.flush();
1129 infoFile.close();
1130 }
1131 } finally {
1132 super.finalize();
1133 }
1134 }
1135
1136 public SymbolTableHandler getSymbolTables() {
1137 return symbolTables;
1138 }
1139
1140 public void setSymbolTables(SymbolTableHandler symbolTables) {
1141 this.symbolTables = symbolTables;
1142 }
1143
1144 public DataFormatManager getDataFormatManager() {
1145 return dataFormatManager;
1146 }
1147
1148 public void setDataFormatManager(DataFormatManager dataFormatManager) {
1149 this.dataFormatManager = dataFormatManager;
1150 }
1151
1152 public Set<String> getDataFormatInstanceKeys() {
1153 return dataFormatInstances.keySet();
1154 }
1155
1156 public boolean addDataFormatInstance(String key, DataFormatInstance dataFormatInstance) {
1157 if (!dataFormatInstances.containsKey(key)) {
1158 dataFormatInstances.put(key, dataFormatInstance);
1159 return true;
1160 }
1161 return false;
1162 }
1163
1164 public DataFormatInstance getDataFormatInstance(String key) {
1165 return dataFormatInstances.get(key);
1166 }
1167
1168 public int sizeDataFormatInstance() {
1169 return dataFormatInstances.size();
1170 }
1171
1172 public DataFormatInstance getInputDataFormatInstance() {
1173 return dataFormatInstances.get(dataFormatManager.getInputDataFormatSpec().getDataFormatName());
1174 }
1175
1176 public URL getInputFormatURL() {
1177 return inputFormatURL;
1178 }
1179
1180 public URL getOutputFormatURL() {
1181 return outputFormatURL;
1182 }
1183
1184
1185 }