001package org.maltparser.core.helper; 002 003/* 004 * Copyright 2009 Google Inc. 005 * 006 * Licensed under the Apache License, Version 2.0 (the "License"); you may not 007 * use this file except in compliance with the License. You may obtain a copy of 008 * the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 014 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 015 * License for the specific language governing permissions and limitations under 016 * the License. 017 */ 018import java.io.IOException; 019import java.io.ObjectInputStream; 020import java.io.ObjectOutputStream; 021import java.io.Serializable; 022import java.lang.reflect.Array; 023import java.util.AbstractSet; 024import java.util.Collection; 025import java.util.Iterator; 026import java.util.NoSuchElementException; 027 028/** 029 * A memory-efficient hash set. 030 * 031 * @param <E> the element type 032 */ 033public class HashSet<E> extends AbstractSet<E> implements Serializable { 034 private static final long serialVersionUID = 7526471155622776147L; 035 private class SetIterator implements Iterator<E> { 036 private int index = 0; 037 private int last = -1; 038 039 public SetIterator() { 040 advanceToItem(); 041 } 042 043 public boolean hasNext() { 044 return index < table.length; 045 } 046 047 @SuppressWarnings("unchecked") 048 public E next() { 049 if (!hasNext()) { 050 throw new NoSuchElementException(); 051 } 052 last = index; 053 E toReturn = (E) unmaskNull(table[index++]); 054 advanceToItem(); 055 return toReturn; 056 } 057 058 public void remove() { 059 if (last < 0) { 060 throw new IllegalStateException(); 061 } 062 internalRemove(last); 063 if (table[last] != null) { 064 index = last; 065 } 066 last = -1; 067 } 068 069 private void advanceToItem() { 070 for (; index < table.length; ++index) { 071 if (table[index] != null) { 072 return; 073 } 074 } 075 } 076 } 077 078 /** 079 * In the interest of memory-savings, we start with the smallest feasible 080 * power-of-two table size that can hold three items without rehashing. If we 081 * started with a size of 2, we'd have to expand as soon as the second item 082 * was added. 083 */ 084 private static final int INITIAL_TABLE_SIZE = 4; 085 086 private static final Object NULL_ITEM = new Serializable() { 087 Object readResolve() { 088 return NULL_ITEM; 089 } 090 }; 091 092 static Object maskNull(Object o) { 093 return (o == null) ? NULL_ITEM : o; 094 } 095 096 static Object unmaskNull(Object o) { 097 return (o == NULL_ITEM) ? null : o; 098 } 099 100 /** 101 * Number of objects in this set; transient due to custom serialization. 102 * Default access to avoid synthetic accessors from inner classes. 103 */ 104 transient int size = 0; 105 106 /** 107 * Backing store for all the objects; transient due to custom serialization. 108 * Default access to avoid synthetic accessors from inner classes. 109 */ 110 transient Object[] table; 111 112 public HashSet() { 113 table = new Object[INITIAL_TABLE_SIZE]; 114 } 115 116 public HashSet(Collection<? extends E> c) { 117 int newCapacity = INITIAL_TABLE_SIZE; 118 int expectedSize = c.size(); 119 while (newCapacity * 3 < expectedSize * 4) { 120 newCapacity <<= 1; 121 } 122 123 table = new Object[newCapacity]; 124 super.addAll(c); 125 } 126 127 @Override 128 public boolean add(E e) { 129 ensureSizeFor(size + 1); 130 int index = findOrEmpty(e); 131 if (table[index] == null) { 132 ++size; 133 table[index] = maskNull(e); 134 return true; 135 } 136 return false; 137 } 138 139 @Override 140 public boolean addAll(Collection<? extends E> c) { 141 ensureSizeFor(size + c.size()); 142 return super.addAll(c); 143 } 144 145 @Override 146 public void clear() { 147 table = new Object[INITIAL_TABLE_SIZE]; 148 size = 0; 149 } 150 151 @Override 152 public boolean contains(Object o) { 153 return find(o) >= 0; 154 } 155 156 @Override 157 public Iterator<E> iterator() { 158 return new SetIterator(); 159 } 160 161 @Override 162 public boolean remove(Object o) { 163 int index = find(o); 164 if (index < 0) { 165 return false; 166 } 167 internalRemove(index); 168 return true; 169 } 170 171 @Override 172 public int size() { 173 return size; 174 } 175 176 @Override 177 public Object[] toArray() { 178 return toArray(new Object[size]); 179 } 180 181 @SuppressWarnings("unchecked") 182 @Override 183 public <T> T[] toArray(T[] a) { 184 if (a.length < size) { 185 a = (T[]) Array.newInstance(a.getClass().getComponentType(), size); 186 } 187 int index = 0; 188 for (int i = 0; i < table.length; ++i) { 189 Object e = table[i]; 190 if (e != null) { 191 a[index++] = (T) unmaskNull(e); 192 } 193 } 194 while (index < a.length) { 195 a[index++] = null; 196 } 197 return a; 198 } 199 200 /** 201 * Adapted from org.apache.commons.collections.map.AbstractHashedMap. 202 */ 203 @SuppressWarnings("unchecked") 204 protected void doReadObject(ObjectInputStream in) throws IOException, 205 ClassNotFoundException { 206 table = new Object[in.readInt()]; 207 int items = in.readInt(); 208 for (int i = 0; i < items; i++) { 209 add((E) in.readObject()); 210 } 211 } 212 213 /** 214 * Adapted from org.apache.commons.collections.map.AbstractHashedMap. 215 */ 216 protected void doWriteObject(ObjectOutputStream out) throws IOException { 217 out.writeInt(table.length); 218 out.writeInt(size); 219 for (int i = 0; i < table.length; ++i) { 220 Object e = table[i]; 221 if (e != null) { 222 out.writeObject(unmaskNull(e)); 223 } 224 } 225 } 226 227 /** 228 * Returns whether two items are equal for the purposes of this set. 229 */ 230 protected boolean itemEquals(Object a, Object b) { 231 return (a == null) ? (b == null) : a.equals(b); 232 } 233 234 /** 235 * Return the hashCode for an item. 236 */ 237 protected int itemHashCode(Object o) { 238 return (o == null) ? 0 : o.hashCode(); 239 } 240 241 /** 242 * Works just like {@link #addAll(Collection)}, but for arrays. Used to avoid 243 * having to synthesize a collection in {@link Sets}. 244 */ 245 void addAll(E[] elements) { 246 ensureSizeFor(size + elements.length); 247 for (E e : elements) { 248 int index = findOrEmpty(e); 249 if (table[index] == null) { 250 ++size; 251 table[index] = maskNull(e); 252 } 253 } 254 } 255 256 /** 257 * Removes the item at the specified index, and performs internal management 258 * to make sure we don't wind up with a hole in the table. Default access to 259 * avoid synthetic accessors from inner classes. 260 */ 261 void internalRemove(int index) { 262 table[index] = null; 263 --size; 264 plugHole(index); 265 } 266 267 /** 268 * Ensures the set is large enough to contain the specified number of entries. 269 */ 270 private void ensureSizeFor(int expectedSize) { 271 if (table.length * 3 >= expectedSize * 4) { 272 return; 273 } 274 275 int newCapacity = table.length << 1; 276 while (newCapacity * 3 < expectedSize * 4) { 277 newCapacity <<= 1; 278 } 279 280 Object[] oldTable = table; 281 table = new Object[newCapacity]; 282 for (Object o : oldTable) { 283 if (o != null) { 284 int newIndex = getIndex(unmaskNull(o)); 285 while (table[newIndex] != null) { 286 if (++newIndex == table.length) { 287 newIndex = 0; 288 } 289 } 290 table[newIndex] = o; 291 } 292 } 293 } 294 295 /** 296 * Returns the index in the table at which a particular item resides, or -1 if 297 * the item is not in the table. 298 */ 299 private int find(Object o) { 300 int index = getIndex(o); 301 while (true) { 302 Object existing = table[index]; 303 if (existing == null) { 304 return -1; 305 } 306 if (itemEquals(o, unmaskNull(existing))) { 307 return index; 308 } 309 if (++index == table.length) { 310 index = 0; 311 } 312 } 313 } 314 315 /** 316 * Returns the index in the table at which a particular item resides, or the 317 * index of an empty slot in the table where this item should be inserted if 318 * it is not already in the table. 319 */ 320 private int findOrEmpty(Object o) { 321 int index = getIndex(o); 322 while (true) { 323 Object existing = table[index]; 324 if (existing == null) { 325 return index; 326 } 327 if (itemEquals(o, unmaskNull(existing))) { 328 return index; 329 } 330 if (++index == table.length) { 331 index = 0; 332 } 333 } 334 } 335 336 private int getIndex(Object o) { 337 int h = itemHashCode(o); 338 // Copied from Apache's AbstractHashedMap; prevents power-of-two collisions. 339 h += ~(h << 9); 340 h ^= (h >>> 14); 341 h += (h << 4); 342 h ^= (h >>> 10); 343 // Power of two trick. 344 return h & (table.length - 1); 345 } 346 347 /** 348 * Tricky, we left a hole in the map, which we have to fill. The only way to 349 * do this is to search forwards through the map shuffling back values that 350 * match this index until we hit a null. 351 */ 352 private void plugHole(int hole) { 353 int index = hole + 1; 354 if (index == table.length) { 355 index = 0; 356 } 357 while (table[index] != null) { 358 int targetIndex = getIndex(unmaskNull(table[index])); 359 if (hole < index) { 360 /* 361 * "Normal" case, the index is past the hole and the "bad range" is from 362 * hole (exclusive) to index (inclusive). 363 */ 364 if (!(hole < targetIndex && targetIndex <= index)) { 365 // Plug it! 366 table[hole] = table[index]; 367 table[index] = null; 368 hole = index; 369 } 370 } else { 371 /* 372 * "Wrapped" case, the index is before the hole (we've wrapped) and the 373 * "good range" is from index (exclusive) to hole (inclusive). 374 */ 375 if (index < targetIndex && targetIndex <= hole) { 376 // Plug it! 377 table[hole] = table[index]; 378 table[index] = null; 379 hole = index; 380 } 381 } 382 if (++index == table.length) { 383 index = 0; 384 } 385 } 386 } 387 388 private void readObject(ObjectInputStream in) throws IOException, 389 ClassNotFoundException { 390 in.defaultReadObject(); 391 doReadObject(in); 392 } 393 394 private void writeObject(ObjectOutputStream out) throws IOException { 395 out.defaultWriteObject(); 396 doWriteObject(out); 397 } 398}