001 package org.maltparser.core.helper;
002
003
004 import java.io.BufferedInputStream;
005 import java.io.BufferedOutputStream;
006 import java.io.Closeable;
007 import java.io.File;
008 import java.io.FileInputStream;
009 import java.io.FileNotFoundException;
010 import java.io.FileOutputStream;
011 import java.io.IOException;
012 import java.io.InputStream;
013 import java.net.MalformedURLException;
014 import java.net.URL;
015
016 import org.apache.log4j.Logger;
017 import org.maltparser.core.exception.MaltChainedException;
018 import org.maltparser.core.plugin.Plugin;
019 import org.maltparser.core.plugin.PluginLoader;
020
021 /**
022 *
023 *
024 * @author Johan Hall
025 */
026 public class Util {
027 private static final int BUFFER = 4096;
028 private static final char AMP_CHAR = '&';
029 private static final char LT_CHAR = '<';
030 private static final char GT_CHAR = '>';
031 private static final char QUOT_CHAR = '"';
032 private static final char APOS_CHAR = '\'';
033
034 public static String xmlEscape(String str) {
035 boolean needEscape = false;
036 char c;
037 for (int i = 0; i < str.length(); i++) {
038 c = str.charAt(i);
039 if (c == AMP_CHAR || c == LT_CHAR || c == GT_CHAR || c == QUOT_CHAR || c == APOS_CHAR) {
040 needEscape = true;
041 break;
042 }
043 }
044 if (!needEscape) {
045 return str;
046 }
047 final StringBuilder sb = new StringBuilder();
048 for (int i = 0; i < str.length(); i++) {
049 c = str.charAt(i);
050 if (str.charAt(i) == AMP_CHAR) {
051 sb.append("&");
052 } else if ( str.charAt(i) == LT_CHAR) {
053 sb.append("<");
054 } else if (str.charAt(i) == GT_CHAR) {
055 sb.append(">");
056 } else if (str.charAt(i) == QUOT_CHAR) {
057 sb.append(""");
058 } else if (str.charAt(i) == APOS_CHAR) {
059 sb.append("'");
060 } else {
061 sb.append(c);
062 }
063 }
064 return sb.toString();
065 }
066
067 /**
068 * Search for a file according the following priority:
069 * <ol>
070 * <li>The local file system
071 * <li>Specified as an URL (starting with http:, file:, ftp: or jar:
072 * <li>MaltParser distribution file (malt.jar)
073 * <li>MaltParser plugins
074 * </ol>
075 *
076 * If the file string is found, an URL object is returned, otherwise <b>null</b>
077 *
078 * @param fileString the file string to convert into an URL.
079 * @return an URL object, if the file string is found, otherwise <b>null</b>
080 * @throws MaltChainedException
081 */
082 public static URL findURL(String fileString) throws MaltChainedException {
083 File specFile = new File(fileString);
084
085 try {
086 if (specFile.exists()) {
087 // found the file in the file system
088 return new URL("file:///"+specFile.getAbsolutePath());
089 } else if (fileString.startsWith("http:") || fileString.startsWith("file:") || fileString.startsWith("ftp:") || fileString.startsWith("jar:")) {
090 // the input string is an URL string starting with http, file, ftp or jar
091 return new URL(fileString);
092 } else {
093 return findURLinJars(fileString);
094 }
095 } catch (MalformedURLException e) {
096 throw new MaltChainedException("Malformed URL: "+fileString, e);
097 }
098 }
099
100 public static URL findURLinJars(String fileString) throws MaltChainedException {
101 try {
102 // search in malt.jar and its plugins
103 if (Thread.currentThread().getClass().getResource(fileString) != null) {
104 // found the input string in the malt.jar file
105 return Thread.currentThread().getClass().getResource(fileString);
106 } else {
107 for (Plugin plugin : PluginLoader.instance()) {
108 URL url = null;
109 if (!fileString.startsWith("/")) {
110 url = new URL("jar:"+plugin.getUrl() + "!/" + fileString);
111 } else {
112 url = new URL("jar:"+plugin.getUrl() + "!" + fileString);
113 }
114
115 try {
116 InputStream is = url.openStream();
117 is.close();
118 } catch (IOException e) {
119 continue;
120 }
121 // found the input string in one of the plugins
122 return url;
123 }
124 // could not convert the input string into an URL
125 return null;
126 }
127 } catch (MalformedURLException e) {
128 throw new MaltChainedException("Malformed URL: "+fileString, e);
129 }
130 }
131
132 public static int simpleTicer(Logger logger, long startTime, int nTicxRow, int inTic, int subject) {
133 logger.info(".");
134 int tic = inTic + 1;
135 if (tic >= nTicxRow) {
136 ticInfo(logger, startTime, subject);
137 tic = 0;
138 }
139 return tic;
140 }
141
142 public static void startTicer(Logger logger, long startTime, int nTicxRow, int subject) {
143 logger.info(".");
144 for (int i = 1; i <= nTicxRow; i++) {
145 logger.info(" ");
146 }
147 ticInfo(logger, startTime, subject);
148 }
149
150 public static void endTicer(Logger logger, long startTime, int nTicxRow, int inTic, int subject) {
151 for (int i = inTic; i <= nTicxRow; i++) {
152 logger.info(" ");
153 }
154 ticInfo(logger, startTime, subject);
155 }
156
157 private static void ticInfo(Logger logger, long startTime, int subject) {
158 logger.info("\t");
159 int a = 1000000;
160 if (subject != 0) {
161 while (subject/a == 0) {
162 logger.info(" ");
163 a /= 10;
164 }
165 } else {
166 logger.info(" ");
167 }
168 logger.info(subject);
169 logger.info("\t");
170 long time = (System.currentTimeMillis()-startTime)/1000;
171 a = 1000000;
172 if (time != 0) {
173 while (time/a == 0 ) {
174 logger.info(" ");
175 a /= 10;
176 }
177 logger.info(time);
178 logger.info("s");
179 } else {
180 logger.info(" 0s");
181 }
182 logger.info("\t");
183 long memory = (Runtime.getRuntime().totalMemory() - Runtime.getRuntime().freeMemory())/1000000;
184 a = 1000000;
185 if (memory != 0) {
186 while (memory/a == 0 ) {
187 logger.info(" ");
188 a /= 10;
189 }
190 logger.info(memory);
191 logger.info("MB\n");
192 } else {
193 logger.info(" 0MB\n");
194 }
195 }
196
197 public static void copyfile(String source, String destination) throws MaltChainedException {
198 try {
199 byte[] readBuffer = new byte[BUFFER];
200 BufferedInputStream bis = new BufferedInputStream(new FileInputStream(source));
201 BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(destination), BUFFER);
202 int n = 0;
203 while ((n = bis.read(readBuffer, 0, BUFFER)) != -1) {
204 bos.write(readBuffer, 0, n);
205 }
206 bos.flush();
207 bos.close();
208 bis.close();
209 } catch (FileNotFoundException e) {
210 throw new MaltChainedException("The destination file '"+destination+"' cannot be created when coping the file. ", e);
211 } catch (IOException e) {
212 throw new MaltChainedException("The source file '"+source+"' cannot be copied to destination '"+destination+"'. ", e);
213 }
214 }
215
216 /**
217 * @param s the string to parse for the double value
218 * @throws IllegalArgumentException if s is empty or represents NaN or Infinity
219 * @throws NumberFormatException see {@link Double#parseDouble(String)}
220 */
221 public static double atof(String s) {
222 if (s == null || s.length() < 1) throw new IllegalArgumentException("Can't convert empty string to integer");
223 double d = Double.parseDouble(s);
224 if (Double.isNaN(d) || Double.isInfinite(d)) {
225 throw new IllegalArgumentException("NaN or Infinity in input: " + s);
226 }
227 return (d);
228 }
229
230 /**
231 * @param s the string to parse for the integer value
232 * @throws IllegalArgumentException if s is empty
233 * @throws NumberFormatException see {@link Integer#parseInt(String)}
234 */
235 public static int atoi(String s) throws NumberFormatException {
236 if (s == null || s.length() < 1) throw new IllegalArgumentException("Can't convert empty string to integer");
237 // Integer.parseInt doesn't accept '+' prefixed strings
238 if (s.charAt(0) == '+') s = s.substring(1);
239 return Integer.parseInt(s);
240 }
241
242 public static void closeQuietly(Closeable c) {
243 if (c == null) return;
244 try {
245 c.close();
246 } catch (Throwable t) {}
247 }
248
249 public static double[] copyOf(double[] original, int newLength) {
250 double[] copy = new double[newLength];
251 System.arraycopy(original, 0, copy, 0, Math.min(original.length, newLength));
252 return copy;
253 }
254
255 public static int[] copyOf(int[] original, int newLength) {
256 int[] copy = new int[newLength];
257 System.arraycopy(original, 0, copy, 0, Math.min(original.length, newLength));
258 return copy;
259 }
260
261 public static boolean equals(double[] a, double[] a2) {
262 if (a == a2) return true;
263 if (a == null || a2 == null) return false;
264
265 int length = a.length;
266 if (a2.length != length) return false;
267
268 for (int i = 0; i < length; i++)
269 if (a[i] != a2[i]) return false;
270
271 return true;
272 }
273 }