public class HashSet<E>{//1.7版本 private transient HashMap<E,Object> map; private static final Object PRESENT = new Object(); //0-1.会调用HashMap的无参构造方法 public HashSet() { map = new HashMap<>(); } //loadFactor 负载因子 默认0.75 public HashSet(int initialCapacity, float loadFactor) { map = new HashMap<>(initialCapacity, loadFactor); } public HashSet(int initialCapacity) { map = new HashMap<>(initialCapacity); } //1. 添加第一个学生,e= Student{id=1, name='lili'} //2.Hashset的add方法就是调用HashMap的put方法 public boolean add(E e) { //24.返回true return map.put(e, PRESENT)==null; } } //---------------以下是HashMap源码----------------------------- static final Entry<?,?>[] EMPTY_TABLE = {}; transient Entry<K,V>[] table = (Entry<K,V>[]) EMPTY_TABLE; 主数组是一个Entry类型的数组 static final int DEFAULT_INITIAL_CAPACITY = 1 << 4; // aka 16 默认初始容量是16 //默认负载因子为0.75,加载因子是表示Hsah表中元素的填满的程度 //太大容易引起哈西冲突,太小容易浪费 0.75是经过大量运算后得到的最好值 //这个值其实可以自己改,但是不建议改,因为这个0.75是大量运算得到的 static final float DEFAULT_LOAD_FACTOR = 0.75f; int threshold; //0-2.会给初始容量、负载因子赋默认值 public HashMap() { this(DEFAULT_INITIAL_CAPACITY, DEFAULT_LOAD_FACTOR); } //0-3.调用到有参构造函数,将threshold=16 public HashMap(int initialCapacity, float loadFactor) { if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY; this.loadFactor = loadFactor; //loadFactor = 0.75f threshold = initialCapacity;//threshold =16 init();// 啥也没干 } void init() { } //3. key=Student{id=1, name='lili'} value=PRESENT public V put(K key, V value) { //4.走if if (table == EMPTY_TABLE) { //给table指定长度 inflateTable(threshold);//threshold = 16; } if (key == null) return putForNullKey(value); //14.对key进行hash运算 int hash = hash(key); //17.得到数组下标为 i=15 int i = indexFor(hash, table.length); for (Entry<K,V> e = table[i]; e != null; e = e.next) { Object k; if (e.hash == hash && ((k = e.key) == key || key.equals(k))) { V oldValue = e.value; e.value = value; e.recordAccess(this); return oldValue; } } modCount++; //18.hash = 2271 key=Student{id=1, name='lili'} value= PRESENT i=15 addEntry(hash, key, value, i); return null; } //15.hash方法返回这个key对应的哈希值,内部进行二次散列,为了尽量保证不同的key得到不同的哈希码 final int hash(Object k) { int h = hashSeed; if (0 != h && k instanceof String) { return sun.misc.Hashing.stringHash32((String) k); } //k.hashCode()函数调用的是key键值类型自带的哈希函数, //由于不同的对象其hashCode()有可能相同,所以需对hashCode()再次哈希,以降低相同率 h ^= k.hashCode(); /* 接下来的一串与运算和异或运算,称之为“扰动函数”, 扰动的核心思想在于使计算出来的值在保留原有相关特性的基础上, 增加其值的不确定性,从而降低冲突的概率。 不同的版本实现的方式不一样,但其根本思想是一致的。 往右移动的目的,就是为了将h的高位利用起来,减少哈西冲突 */ h ^= (h >>> 20) ^ (h >>> 12); return h ^ (h >>> 7) ^ (h >>> 4); } //16.根据h & (length-1)表达式,算出来数组下标 static int indexFor(int h, int length) { // assert Integer.bitCount(length) == 1 : "length must be a non-zero power of 2"; return h & (length-1); } //5.toSize=16; private void inflateTable(int toSize) { // Find a power of 2 >= toSize //10.capacity = 16 一顿操作就是为了算出来数组的初始长度 最接近给定长度的二次幂 //如果给定长度是5,最后得到的capacity是8 2^3 int capacity = roundUpToPowerOf2(toSize); //capacity(中文翻译:容量) //11.threshold = 12 是否扩容的条件 threshold = (int) Math.min(capacity * loadFactor, MAXIMUM_CAPACITY + 1); //12.table = new Entry[16]; table = new Entry[capacity]; initHashSeedAsNeeded(capacity); } //9.return number = 16 private static int roundUpToPowerOf2(int number) { // assert number >= 0 : "number must be non-negative"; return number >= MAXIMUM_CAPACITY ? MAXIMUM_CAPACITY //6.(number - 1) << 1 15<< 1 15*2 = 30 : (number > 1) ? Integer.highestOneBit((number - 1) << 1) : 1; } //7.i =30 为了算数组的初始长度,给定长度的二次幂 public static int highestOneBit(int i) { // HD, Figure 3-1 i |= (i >> 1); i |= (i >> 2); i |= (i >> 4); i |= (i >> 8); i |= (i >> 16); return i - (i >>> 1);//8.return 16 } //13.?????????????????????????????? final boolean initHashSeedAsNeeded(int capacity) { boolean currentAltHashing = hashSeed != 0; boolean useAltHashing = sun.misc.VM.isBooted() && (capacity >= Holder.ALTERNATIVE_HASHING_THRESHOLD); boolean switching = currentAltHashing ^ useAltHashing; if (switching) { hashSeed = useAltHashing ? sun.misc.Hashing.randomHashSeed(this) : 0; } return switching; } private V putForNullKey(V value) { for (Entry<K,V> e = table[0]; e != null; e = e.next) { if (e.key == null) { V oldValue = e.value; e.value = value; e.recordAccess(this); return oldValue; } } modCount++; addEntry(0, null, value, 0); //23.返回null return null; } void addEntry(int hash, K key, V value, int bucketIndex) { //如果数组里面的元素超过12,并且当前要插入的元素对应的下标已经有值了,就需要扩容了,扩为原来的2倍 if ((size >= threshold) && (null != table[bucketIndex])) { resize(2 * table.length); //重新根据hash和table的长度,算出新的数组下标位置,在创建Entry hash = (null != key) ? hash(key) : 0; bucketIndex = indexFor(hash, table.length); } //19.hash = 2271 key=Student{id=1, name='lili'} value= PRESENT bucketIndex=15 createEntry(hash, key, value, bucketIndex); } void createEntry(int hash, K key, V value, int bucketIndex) { //20.table[15] =null e=null; Entry<K,V> e = table[bucketIndex]; //21.table[15] = Entry(2271,Student{id=1, name='lili'},value=PRESENT,e=null),HashSet成功放入第一个元素 table[bucketIndex] = new Entry<>(hash, key, value, e); //22.数组长度+1 size++; } void resize(int newCapacity) { Entry[] oldTable = table; int oldCapacity = oldTable.length; if (oldCapacity == MAXIMUM_CAPACITY) { threshold = Integer.MAX_VALUE; return; } Entry[] newTable = new Entry[newCapacity]; //转让方法:将老数组中的东西都重新放入新数组中 transfer(newTable, initHashSeedAsNeeded(newCapacity)); //老数组替换为新数组 table = newTable; //重新计算threshold threshold = (int)Math.min(newCapacity * loadFactor, MAXIMUM_CAPACITY + 1); } void transfer(Entry[] newTable, boolean rehash) { int newCapacity = newTable.length; for (Entry<K,V> e : table) { while(null != e) { Entry<K,V> next = e.next; if (rehash) { e.hash = null == e.key ? 0 : hash(e.key); } //将哈希值,和新的数组容量传进去,重新计算key在新数组中的位置 int i = indexFor(e.hash, newCapacity); //头插法 获取链表上元素给e.next e.next = newTable[i]; //然后将e放在i位置 newTable[i] = e; //e再指向下一个节点继续遍历 e = next; } } } }
顺道带上了HashSet,HashSet也是用HashMap实现的。