001    package org.maltparser.core.helper;
002    
003    /*
004     * Copyright 2009 Google Inc.
005     * 
006     * Licensed under the Apache License, Version 2.0 (the "License"); you may not
007     * use this file except in compliance with the License. You may obtain a copy of
008     * the License at
009     * 
010     * http://www.apache.org/licenses/LICENSE-2.0
011     * 
012     * Unless required by applicable law or agreed to in writing, software
013     * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
014     * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
015     * License for the specific language governing permissions and limitations under
016     * the License.
017     */
018    import java.io.IOException;
019    import java.io.ObjectInputStream;
020    import java.io.ObjectOutputStream;
021    import java.io.Serializable;
022    import java.lang.reflect.Array;
023    import java.util.AbstractSet;
024    import java.util.Collection;
025    import java.util.Iterator;
026    import java.util.NoSuchElementException;
027    
028    /**
029     * A memory-efficient hash set.
030     * 
031     * @param <E> the element type
032     */
033    public class HashSet<E> extends AbstractSet<E> implements Serializable {
034      private static final long serialVersionUID = 7526471155622776147L;
035      private class SetIterator implements Iterator<E> {
036        private int index = 0;
037        private int last = -1;
038    
039        public SetIterator() {
040          advanceToItem();
041        }
042    
043        public boolean hasNext() {
044          return index < table.length;
045        }
046    
047        @SuppressWarnings("unchecked")
048        public E next() {
049          if (!hasNext()) {
050            throw new NoSuchElementException();
051          }
052          last = index;
053          E toReturn = (E) unmaskNull(table[index++]);
054          advanceToItem();
055          return toReturn;
056        }
057    
058        public void remove() {
059          if (last < 0) {
060            throw new IllegalStateException();
061          }
062          internalRemove(last);
063          if (table[last] != null) {
064            index = last;
065          }
066          last = -1;
067        }
068    
069        private void advanceToItem() {
070          for (; index < table.length; ++index) {
071            if (table[index] != null) {
072              return;
073            }
074          }
075        }
076      }
077    
078      /**
079       * In the interest of memory-savings, we start with the smallest feasible
080       * power-of-two table size that can hold three items without rehashing. If we
081       * started with a size of 2, we'd have to expand as soon as the second item
082       * was added.
083       */
084      private static final int INITIAL_TABLE_SIZE = 4;
085    
086      private static final Object NULL_ITEM = new Serializable() {
087        Object readResolve() {
088          return NULL_ITEM;
089        }
090      };
091    
092      static Object maskNull(Object o) {
093        return (o == null) ? NULL_ITEM : o;
094      }
095    
096      static Object unmaskNull(Object o) {
097        return (o == NULL_ITEM) ? null : o;
098      }
099    
100      /**
101       * Number of objects in this set; transient due to custom serialization.
102       * Default access to avoid synthetic accessors from inner classes.
103       */
104      transient int size = 0;
105    
106      /**
107       * Backing store for all the objects; transient due to custom serialization.
108       * Default access to avoid synthetic accessors from inner classes.
109       */
110      transient Object[] table;
111    
112      public HashSet() {
113        table = new Object[INITIAL_TABLE_SIZE];
114      }
115    
116      public HashSet(Collection<? extends E> c) {
117        int newCapacity = INITIAL_TABLE_SIZE;
118        int expectedSize = c.size();
119        while (newCapacity * 3 < expectedSize * 4) {
120          newCapacity <<= 1;
121        }
122    
123        table = new Object[newCapacity];
124        super.addAll(c);
125      }
126    
127      @Override
128      public boolean add(E e) {
129        ensureSizeFor(size + 1);
130        int index = findOrEmpty(e);
131        if (table[index] == null) {
132          ++size;
133          table[index] = maskNull(e);
134          return true;
135        }
136        return false;
137      }
138    
139      @Override
140      public boolean addAll(Collection<? extends E> c) {
141        ensureSizeFor(size + c.size());
142        return super.addAll(c);
143      }
144    
145      @Override
146      public void clear() {
147        table = new Object[INITIAL_TABLE_SIZE];
148        size = 0;
149      }
150    
151      @Override
152      public boolean contains(Object o) {
153        return find(o) >= 0;
154      }
155    
156      @Override
157      public Iterator<E> iterator() {
158        return new SetIterator();
159      }
160    
161      @Override
162      public boolean remove(Object o) {
163        int index = find(o);
164        if (index < 0) {
165          return false;
166        }
167        internalRemove(index);
168        return true;
169      }
170    
171      @Override
172      public int size() {
173        return size;
174      }
175    
176      @Override
177      public Object[] toArray() {
178        return toArray(new Object[size]);
179      }
180    
181      @SuppressWarnings("unchecked")
182      @Override
183      public <T> T[] toArray(T[] a) {
184        if (a.length < size) {
185          a = (T[]) Array.newInstance(a.getClass().getComponentType(), size);
186        }
187        int index = 0;
188        for (int i = 0; i < table.length; ++i) {
189          Object e = table[i];
190          if (e != null) {
191            a[index++] = (T) unmaskNull(e);
192          }
193        }
194        while (index < a.length) {
195          a[index++] = null;
196        }
197        return a;
198      }
199    
200      /**
201       * Adapted from org.apache.commons.collections.map.AbstractHashedMap.
202       */
203      @SuppressWarnings("unchecked")
204      protected void doReadObject(ObjectInputStream in) throws IOException,
205          ClassNotFoundException {
206        table = new Object[in.readInt()];
207        int items = in.readInt();
208        for (int i = 0; i < items; i++) {
209          add((E) in.readObject());
210        }
211      }
212    
213      /**
214       * Adapted from org.apache.commons.collections.map.AbstractHashedMap.
215       */
216      protected void doWriteObject(ObjectOutputStream out) throws IOException {
217        out.writeInt(table.length);
218        out.writeInt(size);
219        for (int i = 0; i < table.length; ++i) {
220          Object e = table[i];
221          if (e != null) {
222            out.writeObject(unmaskNull(e));
223          }
224        }
225      }
226    
227      /**
228       * Returns whether two items are equal for the purposes of this set.
229       */
230      protected boolean itemEquals(Object a, Object b) {
231        return (a == null) ? (b == null) : a.equals(b);
232      }
233    
234      /**
235       * Return the hashCode for an item.
236       */
237      protected int itemHashCode(Object o) {
238        return (o == null) ? 0 : o.hashCode();
239      }
240    
241      /**
242       * Works just like {@link #addAll(Collection)}, but for arrays. Used to avoid
243       * having to synthesize a collection in {@link Sets}.
244       */
245      void addAll(E[] elements) {
246        ensureSizeFor(size + elements.length);
247        for (E e : elements) {
248          int index = findOrEmpty(e);
249          if (table[index] == null) {
250            ++size;
251            table[index] = maskNull(e);
252          }
253        }
254      }
255    
256      /**
257       * Removes the item at the specified index, and performs internal management
258       * to make sure we don't wind up with a hole in the table. Default access to
259       * avoid synthetic accessors from inner classes.
260       */
261      void internalRemove(int index) {
262        table[index] = null;
263        --size;
264        plugHole(index);
265      }
266    
267      /**
268       * Ensures the set is large enough to contain the specified number of entries.
269       */
270      private void ensureSizeFor(int expectedSize) {
271        if (table.length * 3 >= expectedSize * 4) {
272          return;
273        }
274    
275        int newCapacity = table.length << 1;
276        while (newCapacity * 3 < expectedSize * 4) {
277          newCapacity <<= 1;
278        }
279    
280        Object[] oldTable = table;
281        table = new Object[newCapacity];
282        for (Object o : oldTable) {
283          if (o != null) {
284            int newIndex = getIndex(unmaskNull(o));
285            while (table[newIndex] != null) {
286              if (++newIndex == table.length) {
287                newIndex = 0;
288              }
289            }
290            table[newIndex] = o;
291          }
292        }
293      }
294    
295      /**
296       * Returns the index in the table at which a particular item resides, or -1 if
297       * the item is not in the table.
298       */
299      private int find(Object o) {
300        int index = getIndex(o);
301        while (true) {
302          Object existing = table[index];
303          if (existing == null) {
304            return -1;
305          }
306          if (itemEquals(o, unmaskNull(existing))) {
307            return index;
308          }
309          if (++index == table.length) {
310            index = 0;
311          }
312        }
313      }
314    
315      /**
316       * Returns the index in the table at which a particular item resides, or the
317       * index of an empty slot in the table where this item should be inserted if
318       * it is not already in the table.
319       */
320      private int findOrEmpty(Object o) {
321        int index = getIndex(o);
322        while (true) {
323          Object existing = table[index];
324          if (existing == null) {
325            return index;
326          }
327          if (itemEquals(o, unmaskNull(existing))) {
328            return index;
329          }
330          if (++index == table.length) {
331            index = 0;
332          }
333        }
334      }
335    
336      private int getIndex(Object o) {
337        int h = itemHashCode(o);
338        // Copied from Apache's AbstractHashedMap; prevents power-of-two collisions.
339        h += ~(h << 9);
340        h ^= (h >>> 14);
341        h += (h << 4);
342        h ^= (h >>> 10);
343        // Power of two trick.
344        return h & (table.length - 1);
345      }
346    
347      /**
348       * Tricky, we left a hole in the map, which we have to fill. The only way to
349       * do this is to search forwards through the map shuffling back values that
350       * match this index until we hit a null.
351       */
352      private void plugHole(int hole) {
353        int index = hole + 1;
354        if (index == table.length) {
355          index = 0;
356        }
357        while (table[index] != null) {
358          int targetIndex = getIndex(unmaskNull(table[index]));
359          if (hole < index) {
360            /*
361             * "Normal" case, the index is past the hole and the "bad range" is from
362             * hole (exclusive) to index (inclusive).
363             */
364            if (!(hole < targetIndex && targetIndex <= index)) {
365              // Plug it!
366              table[hole] = table[index];
367              table[index] = null;
368              hole = index;
369            }
370          } else {
371            /*
372             * "Wrapped" case, the index is before the hole (we've wrapped) and the
373             * "good range" is from index (exclusive) to hole (inclusive).
374             */
375            if (index < targetIndex && targetIndex <= hole) {
376              // Plug it!
377              table[hole] = table[index];
378              table[index] = null;
379              hole = index;
380            }
381          }
382          if (++index == table.length) {
383            index = 0;
384          }
385        }
386      }
387    
388      private void readObject(ObjectInputStream in) throws IOException,
389          ClassNotFoundException {
390        in.defaultReadObject();
391        doReadObject(in);
392      }
393    
394      private void writeObject(ObjectOutputStream out) throws IOException {
395        out.defaultWriteObject();
396        doWriteObject(out);
397      }
398    }