type hmap struct { count int // map中元素个数,len()函数返回 flags uint8 B uint8 // map中的bucket的对数值,真实bucket数为2 ^ B个 noverflow uint16 // 溢出bucket的数量 hash0 uint32 // hash seed buckets unsafe.Pointer // Buckets数组的指针,count = 0时,值为nil oldbuckets unsafe.Pointer // 前bucket数组指针,只在map扩容过程中非nil nevacuate uintptr // map扩容过程中,记录已迁移bucket的计数值(小于该值已转移) extra *mapextra // optional fields }
Map底层数据结构有3个核心字段:
type bmap struct { tophash [bucketCnt]uint8 // bucketCnt默认值为8 // Followed by bucketCnt keys and then bucketCnt elems. // NOTE: packing all the keys together and then all the elems together makes the // code a bit more complicated than alternating key/elem/key/elem/... but it allows // us to eliminate padding which would be needed for, e.g., map[int64]int8. // Followed by an overflow pointer. }
Bucket底层数据结构:
func makemap(t *maptype, hint int, h *hmap) *hmap { mem, overflow := math.MulUintptr(uintptr(hint), t.bucket.size) // 申请内存大小 if overflow || mem > maxAlloc { hint = 0 } // initialize Hmap if h == nil { h = new(hmap) } h.hash0 = fastrand() B := uint8(0) for overLoadFactor(hint, B) { // 计算B的大小 B++ } h.B = B if h.B != 0 { var nextOverflow *bmap h.buckets, nextOverflow = makeBucketArray(t, h.B, nil) // 分配bucket Array内存空间 if nextOverflow != nil { h.extra = new(mapextra) h.extra.nextOverflow = nextOverflow } } return h }
// 确定bucket数目因子B,是否可以承载元素个数count // Args: // count: map元素个数 // B: map中bucket数目因子,bucket数为 2 ^ B // Return: // 是否可以承载 // // 补充: // bucketCnt = 8 // loadFactorNum = 13 // loadFactorDen = 2 // bucketShirt(B) => 1 << B func overLoadFactor(count int, B uint8) bool { return count > bucketCnt && uintptr(count) > loadFactorNum*(bucketShift(B)/loadFactorDen) }
需要注意的是,golang中:
所以函数可以理解为:
count > bucketCnt
决定,当count <= 8 时,B = 0,即只用一个bucketloadFactorNum*(bucketShift(B)/loadFactorDen)
=> loadFactorNum/loadFactorDen * bucketShift(B)
loadFactorNum/loadFactorDen
= 6.5,即一个bucket平均承载元素数, 原函数中的表达式,是避免了整除bucketShift(B)
,表示1 << B,即bucket数量// 创建bucket列表 // Args: // b: map.B func makeBucketArray(t *maptype, b uint8, dirtyalloc unsafe.Pointer) (buckets unsafe.Pointer, nextOverflow *bmap) { base := bucketShift(b) nbuckets := base // bucket 数 if b >= 4 { nbuckets += bucketShift(b - 4) sz := t.bucket.size * nbuckets up := roundupsize(sz) if up != sz { nbuckets = up / t.bucket.size } } if dirtyalloc == nil { buckets = newarray(t.bucket, int(nbuckets)) } else { buckets = dirtyalloc size := t.bucket.size * nbuckets if t.bucket.ptrdata != 0 { memclrHasPointers(buckets, size) } else { memclrNoHeapPointers(buckets, size) } } if base != nbuckets { nextOverflow = (*bmap)(add(buckets, base*uintptr(t.bucketsize))) last := (*bmap)(add(buckets, (nbuckets-1)*uintptr(t.bucketsize))) last.setoverflow(t, (*bmap)(buckets)) } return buckets, nextOverflow }
func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer { // map为nil或者没有元素时,返回零值 if h == nil || h.count == 0 { return unsafe.Pointer(&zeroVal[0]) } // 并发读写 if h.flags&hashWriting != 0 { throw("concurrent map read and map write") } // 获取key的哈希值,找到对应的bucket hash := t.hasher(key, uintptr(h.hash0)) m := bucketMask(h.B) b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize))) // 找到哈希值的高位(被哈希到同一个bucket的多个元素,通过哈希值高位不同匹配元素) top := tophash(hash) // 下面这个loop // 1. 从当前bucket中查找匹配key值的value // 2. 如果当前bucket没有,则从overflow(t)中查找,overflow为bucket的溢出桶。当一个bucket的坑位(8个)不够存储当前哈希值的元素,会新建用溢出bucket,通过链表的方式连接(**链地址法解决哈希冲突**) bucketloop: for ; b != nil; b = b.overflow(t) { for i := uintptr(0); i < bucketCnt; i++ { if b.tophash[i] != top { if b.tophash[i] == emptyRest { break bucketloop } continue } k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize)) if t.indirectkey() { k = *((*unsafe.Pointer)(k)) } if t.key.equal(key, k) { e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize)) if t.indirectelem() { e = *((*unsafe.Pointer)(e)) } return e } } } return unsafe.Pointer(&zeroVal[0]) }
map获取元素值的步骤:
map赋值和map查询有类似步骤
func hashGrow(t *maptype, h *hmap) { bigger := uint8(1) // 判断是增量扩容,还是等量扩容 if !overLoadFactor(h.count+1, h.B) { bigger = 0 h.flags |= sameSizeGrow } oldbuckets := h.buckets newbuckets, nextOverflow := makeBucketArray(t, h.B+bigger, nil) flags := h.flags &^ (iterator | oldIterator) if h.flags&iterator != 0 { flags |= oldIterator } h.B += bigger h.flags = flags h.oldbuckets = oldbuckets h.buckets = newbuckets h.nevacuate = 0 h.noverflow = 0 if h.extra != nil && h.extra.overflow != nil { if h.extra.oldoverflow != nil { throw("oldoverflow is not nil") } h.extra.oldoverflow = h.extra.overflow h.extra.overflow = nil } if nextOverflow != nil { if h.extra == nil { h.extra = new(mapextra) } h.extra.nextOverflow = nextOverflow } } // the actual copying of the hash table data is done incrementally // by growWork() and evacuate().
hashGrow
函数会根据当前count数量和B的值,判断等量扩容,还是增量扩容
hashGrow
函数仅为Map设置扩容的前置条件,如flags
,新bucket array
,赋值老bucketoldbuckets
… 但不会直接赋值map数据;map
数据的迁移,会分布在每一次字典数据的获取和赋值中;具体赋值过程在growWork() and evacuate()
两个函数