001package org.maltparser.core.helper;
002
003/*
004 * Copyright 2009 Google Inc.
005 * 
006 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
007 * use this file except in compliance with the License. You may obtain a copy of
008 * the License at
009 * 
010 * http://www.apache.org/licenses/LICENSE-2.0
011 * 
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
014 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
015 * License for the specific language governing permissions and limitations under
016 * the License.
017 */
018import java.io.IOException;
019import java.io.ObjectInputStream;
020import java.io.ObjectOutputStream;
021import java.io.Serializable;
022import java.lang.reflect.Array;
023import java.util.AbstractSet;
024import java.util.Collection;
025import java.util.Iterator;
026import java.util.NoSuchElementException;
027
028/**
029 * A memory-efficient hash set.
030 * 
031 * @param <E> the element type
032 */
033public class HashSet<E> extends AbstractSet<E> implements Serializable {
034  private static final long serialVersionUID = 7526471155622776147L;
035  private class SetIterator implements Iterator<E> {
036    private int index = 0;
037    private int last = -1;
038
039    public SetIterator() {
040      advanceToItem();
041    }
042
043    public boolean hasNext() {
044      return index < table.length;
045    }
046
047    @SuppressWarnings("unchecked")
048    public E next() {
049      if (!hasNext()) {
050        throw new NoSuchElementException();
051      }
052      last = index;
053      E toReturn = (E) unmaskNull(table[index++]);
054      advanceToItem();
055      return toReturn;
056    }
057
058    public void remove() {
059      if (last < 0) {
060        throw new IllegalStateException();
061      }
062      internalRemove(last);
063      if (table[last] != null) {
064        index = last;
065      }
066      last = -1;
067    }
068
069    private void advanceToItem() {
070      for (; index < table.length; ++index) {
071        if (table[index] != null) {
072          return;
073        }
074      }
075    }
076  }
077
078  /**
079   * In the interest of memory-savings, we start with the smallest feasible
080   * power-of-two table size that can hold three items without rehashing. If we
081   * started with a size of 2, we'd have to expand as soon as the second item
082   * was added.
083   */
084  private static final int INITIAL_TABLE_SIZE = 4;
085
086  private static final Object NULL_ITEM = new Serializable() {
087    Object readResolve() {
088      return NULL_ITEM;
089    }
090  };
091
092  static Object maskNull(Object o) {
093    return (o == null) ? NULL_ITEM : o;
094  }
095
096  static Object unmaskNull(Object o) {
097    return (o == NULL_ITEM) ? null : o;
098  }
099
100  /**
101   * Number of objects in this set; transient due to custom serialization.
102   * Default access to avoid synthetic accessors from inner classes.
103   */
104  transient int size = 0;
105
106  /**
107   * Backing store for all the objects; transient due to custom serialization.
108   * Default access to avoid synthetic accessors from inner classes.
109   */
110  transient Object[] table;
111
112  public HashSet() {
113    table = new Object[INITIAL_TABLE_SIZE];
114  }
115
116  public HashSet(Collection<? extends E> c) {
117    int newCapacity = INITIAL_TABLE_SIZE;
118    int expectedSize = c.size();
119    while (newCapacity * 3 < expectedSize * 4) {
120      newCapacity <<= 1;
121    }
122
123    table = new Object[newCapacity];
124    super.addAll(c);
125  }
126
127  @Override
128  public boolean add(E e) {
129    ensureSizeFor(size + 1);
130    int index = findOrEmpty(e);
131    if (table[index] == null) {
132      ++size;
133      table[index] = maskNull(e);
134      return true;
135    }
136    return false;
137  }
138
139  @Override
140  public boolean addAll(Collection<? extends E> c) {
141    ensureSizeFor(size + c.size());
142    return super.addAll(c);
143  }
144
145  @Override
146  public void clear() {
147    table = new Object[INITIAL_TABLE_SIZE];
148    size = 0;
149  }
150
151  @Override
152  public boolean contains(Object o) {
153    return find(o) >= 0;
154  }
155
156  @Override
157  public Iterator<E> iterator() {
158    return new SetIterator();
159  }
160
161  @Override
162  public boolean remove(Object o) {
163    int index = find(o);
164    if (index < 0) {
165      return false;
166    }
167    internalRemove(index);
168    return true;
169  }
170
171  @Override
172  public int size() {
173    return size;
174  }
175
176  @Override
177  public Object[] toArray() {
178    return toArray(new Object[size]);
179  }
180
181  @SuppressWarnings("unchecked")
182  @Override
183  public <T> T[] toArray(T[] a) {
184    if (a.length < size) {
185      a = (T[]) Array.newInstance(a.getClass().getComponentType(), size);
186    }
187    int index = 0;
188    for (int i = 0; i < table.length; ++i) {
189      Object e = table[i];
190      if (e != null) {
191        a[index++] = (T) unmaskNull(e);
192      }
193    }
194    while (index < a.length) {
195      a[index++] = null;
196    }
197    return a;
198  }
199
200  /**
201   * Adapted from org.apache.commons.collections.map.AbstractHashedMap.
202   */
203  @SuppressWarnings("unchecked")
204  protected void doReadObject(ObjectInputStream in) throws IOException,
205      ClassNotFoundException {
206    table = new Object[in.readInt()];
207    int items = in.readInt();
208    for (int i = 0; i < items; i++) {
209      add((E) in.readObject());
210    }
211  }
212
213  /**
214   * Adapted from org.apache.commons.collections.map.AbstractHashedMap.
215   */
216  protected void doWriteObject(ObjectOutputStream out) throws IOException {
217    out.writeInt(table.length);
218    out.writeInt(size);
219    for (int i = 0; i < table.length; ++i) {
220      Object e = table[i];
221      if (e != null) {
222        out.writeObject(unmaskNull(e));
223      }
224    }
225  }
226
227  /**
228   * Returns whether two items are equal for the purposes of this set.
229   */
230  protected boolean itemEquals(Object a, Object b) {
231    return (a == null) ? (b == null) : a.equals(b);
232  }
233
234  /**
235   * Return the hashCode for an item.
236   */
237  protected int itemHashCode(Object o) {
238    return (o == null) ? 0 : o.hashCode();
239  }
240
241  /**
242   * Works just like {@link #addAll(Collection)}, but for arrays. Used to avoid
243   * having to synthesize a collection in {@link Sets}.
244   */
245  void addAll(E[] elements) {
246    ensureSizeFor(size + elements.length);
247    for (E e : elements) {
248      int index = findOrEmpty(e);
249      if (table[index] == null) {
250        ++size;
251        table[index] = maskNull(e);
252      }
253    }
254  }
255
256  /**
257   * Removes the item at the specified index, and performs internal management
258   * to make sure we don't wind up with a hole in the table. Default access to
259   * avoid synthetic accessors from inner classes.
260   */
261  void internalRemove(int index) {
262    table[index] = null;
263    --size;
264    plugHole(index);
265  }
266
267  /**
268   * Ensures the set is large enough to contain the specified number of entries.
269   */
270  private void ensureSizeFor(int expectedSize) {
271    if (table.length * 3 >= expectedSize * 4) {
272      return;
273    }
274
275    int newCapacity = table.length << 1;
276    while (newCapacity * 3 < expectedSize * 4) {
277      newCapacity <<= 1;
278    }
279
280    Object[] oldTable = table;
281    table = new Object[newCapacity];
282    for (Object o : oldTable) {
283      if (o != null) {
284        int newIndex = getIndex(unmaskNull(o));
285        while (table[newIndex] != null) {
286          if (++newIndex == table.length) {
287            newIndex = 0;
288          }
289        }
290        table[newIndex] = o;
291      }
292    }
293  }
294
295  /**
296   * Returns the index in the table at which a particular item resides, or -1 if
297   * the item is not in the table.
298   */
299  private int find(Object o) {
300    int index = getIndex(o);
301    while (true) {
302      Object existing = table[index];
303      if (existing == null) {
304        return -1;
305      }
306      if (itemEquals(o, unmaskNull(existing))) {
307        return index;
308      }
309      if (++index == table.length) {
310        index = 0;
311      }
312    }
313  }
314
315  /**
316   * Returns the index in the table at which a particular item resides, or the
317   * index of an empty slot in the table where this item should be inserted if
318   * it is not already in the table.
319   */
320  private int findOrEmpty(Object o) {
321    int index = getIndex(o);
322    while (true) {
323      Object existing = table[index];
324      if (existing == null) {
325        return index;
326      }
327      if (itemEquals(o, unmaskNull(existing))) {
328        return index;
329      }
330      if (++index == table.length) {
331        index = 0;
332      }
333    }
334  }
335
336  private int getIndex(Object o) {
337    int h = itemHashCode(o);
338    // Copied from Apache's AbstractHashedMap; prevents power-of-two collisions.
339    h += ~(h << 9);
340    h ^= (h >>> 14);
341    h += (h << 4);
342    h ^= (h >>> 10);
343    // Power of two trick.
344    return h & (table.length - 1);
345  }
346
347  /**
348   * Tricky, we left a hole in the map, which we have to fill. The only way to
349   * do this is to search forwards through the map shuffling back values that
350   * match this index until we hit a null.
351   */
352  private void plugHole(int hole) {
353    int index = hole + 1;
354    if (index == table.length) {
355      index = 0;
356    }
357    while (table[index] != null) {
358      int targetIndex = getIndex(unmaskNull(table[index]));
359      if (hole < index) {
360        /*
361         * "Normal" case, the index is past the hole and the "bad range" is from
362         * hole (exclusive) to index (inclusive).
363         */
364        if (!(hole < targetIndex && targetIndex <= index)) {
365          // Plug it!
366          table[hole] = table[index];
367          table[index] = null;
368          hole = index;
369        }
370      } else {
371        /*
372         * "Wrapped" case, the index is before the hole (we've wrapped) and the
373         * "good range" is from index (exclusive) to hole (inclusive).
374         */
375        if (index < targetIndex && targetIndex <= hole) {
376          // Plug it!
377          table[hole] = table[index];
378          table[index] = null;
379          hole = index;
380        }
381      }
382      if (++index == table.length) {
383        index = 0;
384      }
385    }
386  }
387
388  private void readObject(ObjectInputStream in) throws IOException,
389      ClassNotFoundException {
390    in.defaultReadObject();
391    doReadObject(in);
392  }
393
394  private void writeObject(ObjectOutputStream out) throws IOException {
395    out.defaultWriteObject();
396    doWriteObject(out);
397  }
398}