001 package org.maltparser.core.helper; 002 003 004 import java.io.File; 005 import java.io.IOException; 006 import java.io.InputStream; 007 import java.net.MalformedURLException; 008 import java.net.URL; 009 010 import org.apache.log4j.Logger; 011 import org.maltparser.core.exception.MaltChainedException; 012 import org.maltparser.core.plugin.Plugin; 013 import org.maltparser.core.plugin.PluginLoader; 014 015 /** 016 * 017 * 018 * @author Johan Hall 019 */ 020 public class Util { 021 private static final char AMP_CHAR = '&'; 022 private static final char LT_CHAR = '<'; 023 private static final char GT_CHAR = '>'; 024 private static final char QUOT_CHAR = '"'; 025 private static final char APOS_CHAR = '\''; 026 027 public static String xmlEscape(String str) { 028 boolean needEscape = false; 029 char c; 030 for (int i = 0; i < str.length(); i++) { 031 c = str.charAt(i); 032 if (c == AMP_CHAR || c == LT_CHAR || c == GT_CHAR || c == QUOT_CHAR || c == APOS_CHAR) { 033 needEscape = true; 034 break; 035 } 036 } 037 if (!needEscape) { 038 return str; 039 } 040 final StringBuilder sb = new StringBuilder(); 041 for (int i = 0; i < str.length(); i++) { 042 c = str.charAt(i); 043 if (str.charAt(i) == AMP_CHAR) { 044 sb.append("&"); 045 } else if ( str.charAt(i) == LT_CHAR) { 046 sb.append("<"); 047 } else if (str.charAt(i) == GT_CHAR) { 048 sb.append(">"); 049 } else if (str.charAt(i) == QUOT_CHAR) { 050 sb.append("""); 051 } else if (str.charAt(i) == APOS_CHAR) { 052 sb.append("'"); 053 } else { 054 sb.append(c); 055 } 056 } 057 return sb.toString(); 058 } 059 060 /** 061 * Search for a file according the following priority: 062 * <ol> 063 * <li>The local file system 064 * <li>Specified as an URL (starting with http:, file:, ftp: or jar: 065 * <li>MaltParser distribution file (malt.jar) 066 * <li>MaltParser plugins 067 * </ol> 068 * 069 * If the file string is found, an URL object is returned, otherwise <b>null</b> 070 * 071 * @param fileString the file string to convert into an URL. 072 * @return an URL object, if the file string is found, otherwise <b>null</b> 073 * @throws MaltChainedException 074 */ 075 public static URL findURL(String fileString) throws MaltChainedException { 076 File specFile = new File(fileString); 077 078 try { 079 if (specFile.exists()) { 080 // found the file in the file system 081 return new URL("file:///"+specFile.getAbsolutePath()); 082 } else if (fileString.startsWith("http:") || fileString.startsWith("file:") || fileString.startsWith("ftp:") || fileString.startsWith("jar:")) { 083 // the input string is an URL string starting with http, file, ftp or jar 084 return new URL(fileString); 085 } else { 086 // search in malt.jar and its plugins 087 if (Thread.currentThread().getClass().getResource(fileString) != null) { 088 // found the input string in the malt.jar file 089 return Thread.currentThread().getClass().getResource(fileString); 090 } else { 091 for (Plugin plugin : PluginLoader.instance()) { 092 URL url = null; 093 if (!fileString.startsWith("/")) { 094 url = new URL("jar:"+plugin.getUrl() + "!/" + fileString); 095 } else { 096 url = new URL("jar:"+plugin.getUrl() + "!" + fileString); 097 } 098 099 try { 100 InputStream is = url.openStream(); 101 is.close(); 102 } catch (IOException e) { 103 continue; 104 } 105 // found the input string in one of the plugins 106 return url; 107 } 108 // could not convert the input string into an URL 109 return null; 110 } 111 } 112 } catch (MalformedURLException e) { 113 throw new MaltChainedException("Malformed URL: "+fileString, e); 114 } 115 } 116 117 public static int simpleTicer(Logger logger, long startTime, int nTicxRow, int inTic, int subject) { 118 logger.info("."); 119 int tic = inTic + 1; 120 if (tic >= nTicxRow) { 121 ticInfo(logger, startTime, subject); 122 tic = 0; 123 } 124 return tic; 125 } 126 127 public static void startTicer(Logger logger, long startTime, int nTicxRow, int subject) { 128 logger.info("."); 129 for (int i = 1; i <= nTicxRow; i++) { 130 logger.info(" "); 131 } 132 ticInfo(logger, startTime, subject); 133 } 134 135 public static void endTicer(Logger logger, long startTime, int nTicxRow, int inTic, int subject) { 136 for (int i = inTic; i <= nTicxRow; i++) { 137 logger.info(" "); 138 } 139 ticInfo(logger, startTime, subject); 140 } 141 142 private static void ticInfo(Logger logger, long startTime, int subject) { 143 logger.info("\t"); 144 int a = 1000000; 145 if (subject != 0) { 146 while (subject/a == 0) { 147 logger.info(" "); 148 a /= 10; 149 } 150 } else { 151 logger.info(" "); 152 } 153 logger.info(subject); 154 logger.info("\t"); 155 long time = (System.currentTimeMillis()-startTime)/1000; 156 a = 1000000; 157 if (time != 0) { 158 while (time/a == 0 ) { 159 logger.info(" "); 160 a /= 10; 161 } 162 logger.info(time); 163 logger.info("s"); 164 } else { 165 logger.info(" 0s"); 166 } 167 logger.info("\t"); 168 long memory = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/1000000; 169 a = 1000000; 170 if (memory != 0) { 171 while (memory/a == 0 ) { 172 logger.info(" "); 173 a /= 10; 174 } 175 logger.info(memory); 176 logger.info("MB\n"); 177 } else { 178 logger.info(" 0MB\n"); 179 } 180 } 181 }