设计概述
runtime/map.go
mapbucketbucketbucketbucketbucketbucketbucketbucketoverflow bucketbucketbucketbucketbucketmapbucketmapbucketmapbucket
map
map
// A header for a Go map.
type hmap struct {count int // map的大小,也就是len()的值flags uint8 // 状态标识,用于控制goroutine写入和扩容的状态,详见下文B uint8 // 桶的数量,2^B个noverflow uint16 // 溢出桶(overflow)个数hash0 uint32 // 哈希因子buckets unsafe.Pointer // 2^B个bucket的数组oldbuckets unsafe.Pointer // 扩容后的旧bucket数组nevacuate uintptr // 迁移计数器,此指针之前的所有桶已被迁移,即nevacuate指向桶数组已迁移桶的最高下标extra *mapextra // 可选的域
}
// bmap表示一个桶
type bmap struct {// tophash包含该bucket中键的哈希值的高八位// 如果tophash[0] < minTopHash,tophash[0]表示桶迁移状态。tophash [bucketCnt]uint8 // bucketCnt == 8,一个桶有八个位置// 跟在tophash后面的事八个键和八个值。// 将所有键、值分别放在一起,即以|k|k|k|k|v|v|v|v|的方式保存,可以避免内存对齐造成空间浪费。// 如map[int64]int8,为了做内存对齐,占一字节的int8需要用7个字节填充对齐。
}
bmaptophashbmapcmd/compile/internal/gc.bmap
type bmap struct {topbits [8]uint8keys [8]keytypevalues [8]valuetypepad uintptroverflow uintptr
}
type mapextra struct {// 如果键值都不包含指针,并且允许内联,会将bucket类型标志为不包含指针。这样做避免了GC扫描整个map。// 为了保证空闲的overflow bucket在GC中存活,将各个指向溢出桶的指针保存到overflow和oldoverflow中。// overflow和oldoverflow只在键值均不包含指针时使用。// overflow包含了hmap.buckets的溢出桶,oldoverflow包含了hmap.oldbuckets的溢出桶。// 这种间接寻址使得可以在hiter(用于对map进行迭代)中保存slice的指针。overflow *[]*bmap // 指向一个元素为*bmap的slice的指针oldoverflow *[]*bmap// nextOverflow为空闲溢出桶的指针nextOverflow *bmap
}
map
map
m := make(map[string]int)
// 指定map长度
m := make(map[string]int, 8)
makemap
func makemap64(t *maptype, hint int64, h *hmap) *hmap {// 当hint类型为int64时校验将其转换为int再转换为int64是否值不变,// 如果hint的值发生变化,则将hint设置为0.if int64(int(hint)) != hint {hint = 0}return makemap(t, int(hint), h)
}// 当hint<=8且map需要在堆上分配时调用该方法创建map.
// 该方法仅分配一个hmap首部,初始化哈希因子后返回。
func makemap_small() *hmap {h := new(hmap)h.hash0 = fastrand()return h
}// makemap实现了标准的map初始化动作。
// 如果编译器确定map或者第一个bucket可以在栈上创建,h和bucket可能不是nil。
// 如果h != nil,map可以在h中直接创建。
// 如果h.buckets != nil,指向的桶可以用作第一个桶。
func makemap(t *maptype, hint int, h *hmap) *hmap {// 将hint和t.bucket.size相乘,并检查乘积是否溢出。// mem即hint个t.bucket.size大小的桶所需的内存大小。mem, overflow := math.MulUintptr(uintptr(hint), t.bucket.size)// 如果乘积溢出,或所需内存超过最大可分配内存,将hint设为0.if overflow || mem > maxAlloc {hint = 0}// 分配hmap首部if h == nil {h = new(hmap)}// 初始化哈希因子h.hash0 = fastrand()// 找到可以容纳所需元素数的参数B。B := uint8(0)// 将hint个元素放到2^B个桶中,检查每个桶的平均元素数是否大于loadFactor(即6.5)// 将B自增直至满足平均每个桶的元素数<=6.5.for overLoadFactor(hint, B) {B++}h.B = B// 分配哈希表// 如果B == 0,桶被延迟分配。// 如果hint很大,分配内存需要花费一定时间。if h.B != 0 {var nextOverflow *bmap// makeBucketArray初始化并返回一个桶数组。// 有可能会预分配一些溢出桶,即 nextOverflow。// 如果预分配了溢出桶,则将它放到h.extra.nextOverflow中。h.buckets, nextOverflow = makeBucketArray(t, h.B, nil)if nextOverflow != nil {h.extra = new(mapextra)h.extra.nextOverflow = nextOverflow}}return h
}
mapmap
maphinthintmakeBucketArray
h*hmapmapsliceslice
func makeslice(et *_type, len, cap int) slice
slice
// runtime/slice.go
type slice struct {array unsafe.Pointer // 元素指针len int // 长度 cap int // 容量
}
mapmapmapslicesliceappendslice
map
mapkeykeykey
mapaccess1mapaccess2mapaccessKmapaccess1_fatmapaccess2_fatmap
在Go语言编译的类型检查期间,会根据接受参数的个数决定使用的运行时方法:
runtime.mapaccess1runtime.mapaccess2mapaccessKmap_fatzeroVal
mapaccess1
mapaccess1map
// mapaccess1返回指向h[key]的一个指针。mapaccess1永远不会返回nil,
// 而会返回一个零对象的引用,用于当键不在map中时代表元素的类型。
// 因为返回的指针会使整个map存活,尽量不要长时间持有它。
func mapaccess1(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {if raceenabled && h != nil {callerpc := getcallerpc()pc := funcPC(mapaccess1)racereadpc(unsafe.Pointer(h), callerpc, pc)raceReadObjectPC(t.key, key, callerpc, pc)}if msanenabled && h != nil {msanread(key, t.key.size)}// 如果map首部为nil或map中没有元素,返回零值。if h == nil || h.count == 0 {// 如果该maptype的哈希函数会panic,运行一下该maptype的哈希函数,然后将一个空字节返回。if t.hashMightPanic() {t.hasher(key, 0) // see issue 23734}return unsafe.Pointer(&zeroVal[0])}// 并发读写,直接panic.if h.flags&hashWriting != 0 {throw("concurrent map read and map write")}// 根据键和哈希因子求哈希值hash := t.hasher(key, uintptr(h.hash0))// m是B个二进制1,通过hash&m能对哈希值进行取余操作。m := bucketMask(h.B)// hash&m相当于hash%tableSize,即哈希值对数组大小取余,定位到键应该位于第几个桶。// (hash&m)*uintptr(t.bucketsize)则偏移了(hash&m)个桶的内存位置。// 桶数组起始地址+偏移值定位到桶的内存位置。b := (*bmap)(add(h.buckets, (hash&m)*uintptr(t.bucketsize)))// 如果有oldbuckets,则说明当前map仍在迁移中if c := h.oldbuckets; c != nil {// 如果不是容量不变的迁移,说明现在的数组大小比之前增长了一倍,则将掩码右移一位得oldbuckets的掩码。if !h.sameSizeGrow() {m >>= 1}// 数组起始地址+偏移值在旧数组中定位到桶的内存位置。oldb := (*bmap)(add(c, (hash&m)*uintptr(t.bucketsize)))// 如果还没迁移,就在旧数组中找键值对。// 由上文map首部知,evacuated()实际是检查了桶oldb的tophash域的第一个byte的值。if !evacuated(oldb) {b = oldb}}// 根据哈希值计算tophash值,tophash即哈希值的高8位。// 由于小于minTopHash的tophash用于指示桶是否已经迁移,// 如果tophash<minTopHash,则tophash+=minTopHash.top := tophash(hash)// 从定位到的bucket及其溢出链查找key
bucketloop:for ; b != nil; b = b.overflow(t) { // b.overflow(t)跳到下一个溢出桶// 遍历一个桶的八个位置for i := uintptr(0); i < bucketCnt; i++ {// 先比较tophashif b.tophash[i] != top {// 如果tophash值为emptyRest,则往后的位置已经没有元素了,// 也没有更多溢出桶了。if b.tophash[i] == emptyRest {break bucketloop}continue}// 到达此处,说明key的tophash与桶内槽位的tophash相等。// 计算键的位置,由dataOffset+i*uintptr(t.keysize)可以看出,// 桶内元素的内存布局为|k|k|k|k|k|k|k|k|v|v|v|v|v|v|v|v|.k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))// 检查保存的是是否是指向key的指针,若此处保存了指针,则对其进行解引用。if t.indirectkey() {k = *((*unsafe.Pointer)(k))}// equal方法真正比较了完整的哈希值。if t.key.equal(key, k) {// e为值的位置。e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize))// 解引用。if t.indirectelem() {e = *((*unsafe.Pointer)(e))}return e}}}// 没找到,返回零值return unsafe.Pointer(&zeroVal[0])
}
值得关注的点:
buckettophashmapaccessunsafe.Pointeruintptr
源码里判断这个 bucket 是否已经搬迁完毕,用到的函数:
// Possible tophash values. We reserve a few possibilities for special marks.
// Each bucket (including its overflow buckets, if any) will have either all or none of its
// entries in the evacuated* states (except during the evacuate() method, which only happens
// during map writes and thus no one else can observe the map during that time).
emptyRest = 0 // this cell is empty, and there are no more non-empty cells at higher indexes or overflows.
emptyOne = 1 // this cell is empty
evacuatedX = 2 // key/elem is valid. Entry has been evacuated to first half of larger table.
evacuatedY = 3 // same as above, but evacuated to second half of larger table.
evacuatedEmpty = 4 // cell is empty, bucket is evacuated.
minTopHash = 5 // minimum tophash for a normal filled cell.func evacuated(b *bmap) bool {h := b.tophash[0]return h > emptyOne && h < minTopHash
}
tophashtophashevacuatedEmptyevacuatedXevacuatedYbucket
mapaccess2
mapaccess2mapaccess1mapaccess2truefalse
mapaccessK
mapaccessKmapaccess1mapaccessK
键值在桶中的布局
在map的首部部分提到,一个桶中的键值的排列顺序为:
|k|k|k|k|k|k|k|k|v|v|v|v|v|v|v|v|
上文提及,这样做的好处是在内存对齐中不会浪费空间。下面分析一下这样布局如何寻址某一个键/值。
mapaccess1dataOffset
// unsafe.Offsetof返回一个字段在结构体中的偏移值。
// dataOffset是bmap的大小。
dataOffset = unsafe.Offsetof(struct {b bmapv int64}{}.v)
dataOffsetbmapbmap
type bmap struct {tophash [bucketCnt]uint8// 在此结构体后依次紧随bucketCnt个keys和BucketCnt个elems.
}
bmapmapaccess1
// b是桶的地址,b+dataOffset即第一个键的地址,
// b+dataOffset+i*keySize即第i个键的地址。
k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))
定位值:
// b是桶的地址
// b+dataOffset是第一个键的地址
// b+dataOffset+bucketCnt*keySize是第一个值的地址
// b+dataOffset+bucketCnt*keySize+i*elemSize是第i个值的地址
e := add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize))
桶数组的创建
上文提到,一个桶后紧随8个键和8个值,这里分析桶数组是怎么创建的。
makemapmakeBucketArray
// makeBucketArray初始化一个支撑map的桶数组。
// 1<<b即至少需要分配的桶数。
// dirtyalloc要么为nil,要么是一个之前分配的t和b都相同的桶数组。
// 如果dirtyalloc为nil,将分配一个新的桶数组,否则将会把dirtyalloc清空并重用。
func makeBucketArray(t *maptype, b uint8, dirtyalloc unsafe.Pointer) (buckets unsafe.Pointer, nextOverflow *bmap) {base := bucketShift(b) // 1<<bnbuckets := base// b较小时,不太可能有需要溢出桶的情况,仅在b>=4时,分配溢出桶if b >= 4 {// 额外分配了2^(b-4)个桶// 这是根据map将会插入元素的数量的中位数// 为将来需要的溢出桶分配空间nbuckets += bucketShift(b - 4)sz := t.bucket.size * nbucketsup := roundupsize(sz)if up != sz {nbuckets = up / t.bucket.size}}if dirtyalloc == nil {// newarray创建了新的桶数组buckets = newarray(t.bucket, int(nbuckets))} else {// 重用旧的桶数组buckets = dirtyalloc// t和b相同的情况下,size亦相同size := t.bucket.size * nbuckets// 清空内存if t.bucket.ptrdata != 0 {memclrHasPointers(buckets, size)} else {memclrNoHeapPointers(buckets, size)}}if base != nbuckets {// base != nbuckets,说明预分配了一些溢出桶。// 为了保证跟踪这些溢出桶的花销最小,在这里约定,// 如果一个预分配的溢出桶的溢出指针为nil,通过碰撞指针能得到更多的溢出桶。// 那么最后一个溢出桶就需要一个非nil的指针,这里设置为buckets。// buckets即桶数组开头,加上base个bucketSize,得到第一个溢出桶的地址nextOverflow = (*bmap)(add(buckets, base*uintptr(t.bucketsize)))// buckets加上(nbuckets-1)个bucketSize,得到最后一个溢出桶的地址last := (*bmap)(add(buckets, (nbuckets-1)*uintptr(t.bucketsize)))// 将最后一个溢出桶的溢出链设置为桶数组的地址,指示后面没有更多的溢出桶了。last.setoverflow(t, (*bmap)(buckets))}return buckets, nextOverflow
}
map
key的定位流程
tophash
k/v的插入和赋值
// 和 mapaccess 类似,但在key不存在时分配一个槽位给它。
func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {if h == nil {panic(plainError("assignment to entry in nil map"))}if raceenabled {callerpc := getcallerpc()pc := funcPC(mapassign)racewritepc(unsafe.Pointer(h), callerpc, pc)raceReadObjectPC(t.key, key, callerpc, pc)}if msanenabled {msanread(key, t.key.size)}// 并发写时将终止进程if h.flags&hashWriting != 0 {throw("concurrent map writes")}// 计算键的哈希值hash := t.hasher(key, uintptr(h.hash0))h.flags ^= hashWriting// 如果桶数组还没分配,分配一个长度为1的桶数组if h.buckets == nil {h.buckets = newobject(t.bucket) // newarray(t.bucket, 1)}again:// 对哈希进行取余,得到桶的位置bucket := hash & bucketMask(h.B)// 如果map正在发生迁移,将桶从旧数组迁移到新数组if h.growing() {growWork(t, h, bucket)}// 数组头地址+bucketSize个桶的大小,得到应该插入的桶的位置b := (*bmap)(add(h.buckets, bucket*uintptr(t.bucketsize)))top := tophash(hash)// inserti即应该插入的tophash数组的下标var inserti *uint8// 指向应该插入键的内存地址var insertk unsafe.Pointer// 指向应该插入值的内存地址var elem unsafe.Pointerbucketloop:for {// 检查桶中的每个键for i := uintptr(0); i < bucketCnt; i++ {// tophash不相等if b.tophash[i] != top {// 如果第i个值的tophash指示槽位为空,且要插入的下标inserti还未设置if isEmpty(b.tophash[i]) && inserti == nil {inserti = &b.tophash[i]// b为应插入的桶的位置,b+dataOffset为桶中第一个键的位置// b+dataOffset+i*keySize为桶中第i个键的位置insertk = add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))// b+dataOffset+bucketCnt*keySize+i*elemSize为第i个值的位置elem = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize))}// 如果第i个键的tophash == emptyRest,说明后面没有更多k/v了,也没有更多溢出桶了if b.tophash[i] == emptyRest {break bucketloop}continue}// b+dataOffset+i*keySize得到第i个键的位置k := add(unsafe.Pointer(b), dataOffset+i*uintptr(t.keysize))if t.indirectkey() {k = *((*unsafe.Pointer)(k))}// 如果定位到的键和要插入的键不相等,则继续检查桶内的下一个键。if !t.key.equal(key, k) {continue}// 相同的键已经在桶中,更新之。if t.needkeyupdate() {typedmemmove(t.key, k, key)}// 第i个值的位置elem = add(unsafe.Pointer(b), dataOffset+bucketCnt*uintptr(t.keysize)+i*uintptr(t.elemsize))goto done}ovf := b.overflow(t)if ovf == nil {break}// 继续遍历下一个溢出桶b = ovf}// 在map中没有找到键,分配新的槽位// 如果达到了最大加载因子,或有太多的溢出桶,且目前没有正在进行扩容,则开始扩容。if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {hashGrow(t, h) // hashGrow将会让桶数组增大一倍goto again // 重新开始全部工作}// 没有设置插入索引inserti,说明桶和溢出桶都满了,需要分配一个新的溢出桶。// 在新溢出桶的第一个位置插入k, vif inserti == nil {newb := h.newoverflow(t, b)inserti = &newb.tophash[0]insertk = add(unsafe.Pointer(newb), dataOffset)elem = add(insertk, bucketCnt*uintptr(t.keysize))}// 间接键,bucket槽位->指向新内存块的指针kmem->新内存块if t.indirectkey() {// kmem指向新分配的内存块kmem := newobject(t.key)// insertk指向要插入的槽位,将要插入的槽位的值设置为刚分配内存块的kmem指针// bucket槽位->kmem->新分配的内存块*(*unsafe.Pointer)(insertk) = kmem// 将insertk设置为kmem,即指向新分配的内存块insertk = kmem}// 间接值,bucket槽位->指向新内存块的指针vmem->新内存块if t.indirectelem() {vmem := newobject(t.elem)// elem即bucket槽位的地址,让bucket槽位指向vmem*(*unsafe.Pointer)(elem) = vmem}// 将要key指向的键赋值到insertk指向的内存typedmemmove(t.key, insertk, key)// 在此前inserti已被赋值为tophash数组元素的地址// 设置其指向的tophash槽位为当前键的tophash*inserti = toph.count++done:// h.flags与hashWriting向与为零,说明写标志被清除了if h.flags&hashWriting == 0 {throw("concurrent map writes")}// 即h.flags = h.flags&(^hashWriting),去除写标志h.flags &^= hashWriting// 如果是间接值,去除间接层// 即a->b->c变为a->cif t.indirectelem() {elem = *((*unsafe.Pointer)(elem))}return elem
}
k/v的删除
mapaccesstophashemptyRestemptyResttophashemptyResttophashemptyOne
emptyOnetophashemptyOne
func mapdelete(t *maptype, h *hmap, key unsafe.Pointer) {.........if t.indirectelem() {*(*unsafe.Pointer)(e) = nil} else if t.elem.ptrdata != 0 {memclrHasPointers(e, t.elem.size)} else {memclrNoHeapPointers(e, t.elem.size)}b.tophash[i] = emptyOne.........
}
各标志的含义如下:
const (emptyRest = 0 // this cell is empty, and there are no more non-empty cells at higher indexes or overflows.emptyOne = 1 // this cell is emptyevacuatedX = 2 // key/elem is valid. Entry has been evacuated to first half of larger table.evacuatedY = 3 // same as above, but evacuated to second half of larger table.evacuatedEmpty = 4 // cell is empty, bucket is evacuated.minTopHash = 5 // minimum tophash for a normal filled cell.
)
evacuatedXevacuatedY
哈希表的扩容
mapmapassignmapmapoverflow buckethashGrow
func mapassign(t *maptype, h *hmap, key unsafe.Pointer) unsafe.Pointer {...// If we hit the max load factor or we have too many overflow buckets,// and we're not already in the middle of growing, start growing.if !h.growing() && (overLoadFactor(h.count+1, h.B) || tooManyOverflowBuckets(h.noverflow, h.B)) {hashGrow(t, h)goto again // Growing the table invalidates everything, so try again}...
}
hashGrow
func hashGrow(t *maptype, h *hmap) {bigger := uint8(1)if !overLoadFactor(h.count+1, h.B) {// 没有达到加载因子,不增大桶数组大小,进行等量迁移。bigger = 0h.flags |= sameSizeGrow}oldbuckets := h.bucketsnewbuckets, nextOverflow := makeBucketArray(t, h.B+bigger, nil)// 设置iterator相关标志位// iterator表示可能有迭代器使用buckets// oldIterator表示可能有迭代器使用oldbucketsflags := h.flags &^ (iterator | oldIterator)if h.flags&iterator != 0 {flags |= oldIterator}// commit the grow (atomic wrt gc)// 将map的一些meta更新h.B += biggerh.flags = flagsh.oldbuckets = oldbucketsh.buckets = newbucketsh.nevacuate = 0h.noverflow = 0if h.extra != nil && h.extra.overflow != nil {// Promote current overflow buckets to the old generation.if h.extra.oldoverflow != nil {throw("oldoverflow is not nil")}h.extra.oldoverflow = h.extra.overflowh.extra.overflow = nil}if nextOverflow != nil {if h.extra == nil {h.extra = new(mapextra)}h.extra.nextOverflow = nextOverflow}// 实际的迁移由growWork()和evacuate()完成。
}
由上述代码可以看出,哈希表的扩容可分为两种:
sameSizeGrow
maphashGrowmapassignmapdeletegrowWork
增量式迁移
growWork
// bucket就是指向迁移目的地的指针。
func growWork(t *maptype, h *hmap, bucket uintptr) {// 如果是等量扩容,bucket&h.oldbucketmask()得到的旧桶相对位置与新桶相对位置相同;// 如果是容量翻倍扩容,bucket&h.oldbucketmask()实际上通过掩码将bucket的最高位置零,得到旧桶相对位置。evacuate(t, h, bucket&h.oldbucketmask())// 再迁移一个桶。if h.growing() {evacuate(t, h, h.nevacuate)}
}
evacuateevacDst
// evacDst抽象为一个迁移目的地。
type evacDst struct {b *bmap // 要迁移到的桶i int // 键值对应该放到该桶数组的下标k unsafe.Pointer // 指向键的目的地e unsafe.Pointer // 指向值的目的地
}
evacuate
func evacuate(t *maptype, h *hmap, oldbucket uintptr) {// oldbucket加上目标旧桶偏移值,得到旧桶指针。b := (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))// h.noldbuckets计算了此次扩容之前桶数组的大小。newbit := h.noldbuckets()if !evacuated(b) {// x和y分别指代需要迁移的桶的两个目的地:一个的相对位置与旧桶的相对位置相同;// 如果是扩容一倍:另一个桶y的相对位置则是旧桶的相对位置加上旧桶数组的大小。// 扩容一倍时,因为哈希取模操作,旧桶的键值对将分散到两个桶中;// 等量扩容时,旧桶中所有值都将迁移到对应其位置的一个桶中。(如果有溢出,则在该桶后面添加溢出桶)var xy [2]evacDstx := &xy[0]x.b = (*bmap)(add(h.buckets, oldbucket*uintptr(t.bucketsize)))x.k = add(unsafe.Pointer(x.b), dataOffset)x.e = add(x.k, bucketCnt*uintptr(t.keysize))if !h.sameSizeGrow() {// 仅在扩容一倍时计算y指针,否则GC将发现坏指针。y := &xy[1]y.b = (*bmap)(add(h.buckets, (oldbucket+newbit)*uintptr(t.bucketsize))) // 此处加上扩容前桶数组大小,得到了扩容后旧桶对应的新桶的位置。y.k = add(unsafe.Pointer(y.b), dataOffset)y.e = add(y.k, bucketCnt*uintptr(t.keysize))}// 遍历该桶及其溢出桶。for ; b != nil; b = b.overflow(t) {k := add(unsafe.Pointer(b), dataOffset) // 键指针e := add(k, bucketCnt*uintptr(t.keysize)) // 值指针// 遍历桶的所有槽位for i := 0; i < bucketCnt; i, k, e = i+1, add(k, uintptr(t.keysize)), add(e, uintptr(t.elemsize)) {top := b.tophash[i]if isEmpty(top) {// 将tophash表示为已迁移的空值b.tophash[i] = evacuatedEmptycontinue}if top < minTopHash {throw("bad map state")}k2 := kif t.indirectkey() {k2 = *((*unsafe.Pointer)(k2))}var useY uint8if !h.sameSizeGrow() {// 计算哈希值来判断应该迁移到x还是y。hash := t.hasher(k2, uintptr(h.hash0))if h.flags&iterator != 0 && !t.reflexivekey() && !t.key.equal(k2, k2) {useY = top & 1top = tophash(hash)} else {if hash&newbit != 0 {useY = 1}}}if evacuatedX+1 != evacuatedY || evacuatedX^1 != evacuatedY {throw("bad evacuatedN")}b.tophash[i] = evacuatedX + useY // evacuatedX + 1 == evacuatedYdst := &xy[useY] // evacuation destinationif dst.i == bucketCnt {dst.b = h.newoverflow(t, dst.b)dst.i = 0dst.k = add(unsafe.Pointer(dst.b), dataOffset)dst.e = add(dst.k, bucketCnt*uintptr(t.keysize))}dst.b.tophash[dst.i&(bucketCnt-1)] = top // mask dst.i as an optimization, to avoid a bounds checkif t.indirectkey() {*(*unsafe.Pointer)(dst.k) = k2 // copy pointer} else {typedmemmove(t.key, dst.k, k) // copy elem}if t.indirectelem() {*(*unsafe.Pointer)(dst.e) = *(*unsafe.Pointer)(e)} else {typedmemmove(t.elem, dst.e, e)}dst.i++// These updates might push these pointers past the end of the// key or elem arrays. That's ok, as we have the overflow pointer// at the end of the bucket to protect against pointing past the// end of the bucket.dst.k = add(dst.k, uintptr(t.keysize))dst.e = add(dst.e, uintptr(t.elemsize))}}// 至此,此位置桶及其溢出桶都已迁移完成。// 如果没有设置oldIterator,说明没有迭代器正在遍历旧桶数组,帮助GC回收垃圾。if h.flags&oldIterator == 0 && t.bucket.ptrdata != 0 {b := add(h.oldbuckets, oldbucket*uintptr(t.bucketsize))// 仍保留tophash,因为它指示了迁移状态。ptr := add(b, dataOffset)n := uintptr(t.bucketsize) - dataOffsetmemclrHasPointers(ptr, n) // 清理指针帮助GC回收垃圾}}// 指针h.nevacuate是迁移指示器,此指针之前的所有桶已被迁移。// 如果当前迁移的桶是h.nevacuate指示的桶,则需要让h.nevacuate向前移动。if oldbucket == h.nevacuate {advanceEvacuationMark(h, t, newbit)}
}func advanceEvacuationMark(h *hmap, t *maptype, newbit uintptr) {h.nevacuate++stop := h.nevacuate + 1024if stop > newbit {stop = newbit}// 一直自增nevacuate,让其指向已搬迁的连续最大桶下标。for h.nevacuate != stop && bucketEvacuated(t, h, h.nevacuate) {h.nevacuate++}if h.nevacuate == newbit { // newbit即旧桶数组大小,nevacuate指示的位置已超出桶数组边界,说明迁移完成。// 扩容完成,释放旧桶数组。可以通过判断oldbuckets是否为nil判断是否正在扩容。h.oldbuckets = nil// 能够将 overflow bucket 也释放掉。if h.extra != nil {h.extra.oldoverflow = nil}h.flags &^= sameSizeGrow // 将sameSizeGrow标志位去掉}
}
由上述代码看出:
nevacuatenevacuate
map
map
When iterating over a map with a range loop, the iteration order is not specified and is not guaranteed to be the same from one iteration to the next. If you require a stable iteration order you must maintain a separate data structure that specifies that order.
mapmap
func main() {m := map[string]int{"hello": 1,"world": 2,}for k, v := range m {fmt.Printf("k = %s, v = %d\n", k, v)}// 输出可能是以下两种之一:// k = hello, v = 1// k = world, v = 2//// k = world, v = 2// k = hello, v = 1
}
mapiterinitmap
// decide where to startr := uintptr(fastrand())if h.B > 31-bucketCntBits {r += uintptr(fastrand()) << 31}
mapmapmapiterinitmapiternextmap
hitermap
func mapiternext(it *hiter) {......if h.growing() && it.B == h.B {// 迭代在一次扩容过程中开始,且扩容尚未完成。// 如果当前查看的桶还未迁移,需要迭代旧桶,并返回会被迁移到这个桶的键值对。// 定位到旧桶的位置。oldbucket := bucket & it.h.oldbucketmask()b = (*bmap)(add(h.oldbuckets, oldbucket*uintptr(t.bucketsize)))if !evacuated(b) {// 还未迁移旧桶,则设置checkBucket为新桶。checkBucket = bucket} else {// 已迁移,无需检查。直接在新的桶数组定位到第bucket个桶。b = (*bmap)(add(it.buckets, bucket*uintptr(t.bucketsize)))checkBucket = noCheck}} else {// 当前未在扩容,或者当前正在扩容,但迭代在扩容前开始。b = (*bmap)(add(it.buckets, bucket*uintptr(t.bucketsize)))checkBucket = noCheck}......
}
if checkBucket != noCheck && !h.sameSizeGrow() {// 迭代在扩容过程中发生,且当前扩容还未完成。// 当前在处理一个仍未迁移的桶。if t.reflexivekey() || t.key.equal(k, k) {// 计算哈希值,确定该key是否会落到新桶。若不是,则跳过。hash := t.hasher(k, uintptr(h.hash0))if hash&bucketMask(it.B) != checkBucket {continue}} else {// Hash isn't repeatable if k != k (NaNs). We need a// repeatable and randomish choice of which direction// to send NaNs during evacuation. We'll use the low// bit of tophash to decide which way NaNs go.// NOTE: this case is why we need two evacuate tophash// values, evacuatedX and evacuatedY, that differ in// their low bit.if checkBucket>>(it.B-1) != uintptr(b.tophash[offi]&1) {continue}}}
遍历可以分为下述两大类情况:
tophashevacuatedXevacuatedY
为了验证在扩容前开始的迭代不会访问到新创建的桶数组,编写代码如下:
func main() {// 传入的hint使得创建的桶数组的大小为2.m := make(map[int]int, 13)// 插入13个元素,此时加载因子为13/2=6.5,恰好在需要扩容前。for i := 0; i < 13; i++ {m[i] = i}var count intvar sum int// 在扩容前开始遍历for k := range m {sum += kif count == 2 {// 在遍历过程中往ma插入1000个键值对for i := 1000; i < 2000; i++ {m[i] = i}}count++}fmt.Printf("sum = %d\ncount = %d\n", sum, count)
}
sum
map
the language specification中是这样描述的:
The iteration order over maps is not specified and is not guaranteed to be the same from one iteration to the next. If a map entry that has not yet been reached is removed during iteration, the corresponding iteration value will not be produced. If a map entry is created during iteration, that entry may be produced during the iteration or may be skipped. The choice may vary for each entry created and from one iteration to the next. If the map is nil, the number of iterations is 0.
mapmap
map
map 不是线程安全的。
在查找、赋值、遍历、删除的过程中都会检测写标志,一旦发现写标志置位(等于1),则直接panic。赋值和删除函数在检测完写标志是复位之后,先将写标志位置位,才会进行之后的操作。
检测写标志:
if h.flags&hashWriting == 0 {throw("concurrent map writes")
}
设置写标志:
h.flags |= hashWriting