之前我们已经了解了GMP的基础知识,对G、M、P各自的职责和分工都有了大致的认识,本篇文章主要是介绍一个goroutine是怎么被创建出来的
从一个简单的例子开始
func TestNewGoroutine(t *testing.T) { go func() { sayHello() }() } func sayHello() { println("hello gmp") }
编译文件 go tool compile -N -l -S new_goroutine_test.go
得到汇编代码
"".TestNewGoroutine STEXT size=71 args=0x8 locals=0x18 0x0000 00000 (new_goroutine_test.go:5) TEXT "".TestNewGoroutine(SB), ABIInternal, $24-8 0x0000 00000 (new_goroutine_test.go:5) MOVQ (TLS), CX 0x0009 00009 (new_goroutine_test.go:5) CMPQ SP, 16(CX) 0x000d 00013 (new_goroutine_test.go:5) PCDATA $0, $-2 0x000d 00013 (new_goroutine_test.go:5) JLS 64 0x000f 00015 (new_goroutine_test.go:5) PCDATA $0, $-1 0x000f 00015 (new_goroutine_test.go:5) SUBQ $24, SP 0x0013 00019 (new_goroutine_test.go:5) MOVQ BP, 16(SP) 0x0018 00024 (new_goroutine_test.go:5) LEAQ 16(SP), BP 0x001d 00029 (new_goroutine_test.go:5) FUNCDATA $0, gclocals·2a5305abe05176240e61b8620e19a815(SB) 0x001d 00029 (new_goroutine_test.go:5) FUNCDATA $1, gclocals·33cdeccccebe80329f1fdbee7f5874cb(SB) 0x001d 00029 (new_goroutine_test.go:6) MOVL $0, (SP) 0x0024 00036 (new_goroutine_test.go:6) LEAQ "".TestNewGoroutine.func1·f(SB), AX 0x002b 00043 (new_goroutine_test.go:6) MOVQ AX, 8(SP) 0x0030 00048 (new_goroutine_test.go:6) PCDATA $1, $0 0x0030 00048 (new_goroutine_test.go:6) CALL runtime.newproc(SB) //对应第6行的go关键字 0x0035 00053 (new_goroutine_test.go:9) MOVQ 16(SP), BP 0x003a 00058 (new_goroutine_test.go:9) ADDQ $24, SP 0x003e 00062 (new_goroutine_test.go:9) RET 0x003f 00063 (new_goroutine_test.go:9) NOP 0x003f 00063 (new_goroutine_test.go:5) PCDATA $1, $-1 0x003f 00063 (new_goroutine_test.go:5) PCDATA $0, $-2 0x003f 00063 (new_goroutine_test.go:5) NOP 0x0040 00064 (new_goroutine_test.go:5) CALL runtime.morestack_noctxt(SB) 0x0045 00069 (new_goroutine_test.go:5) PCDATA $0, $-1 0x0045 00069 (new_goroutine_test.go:5) JMP 0
从以上汇编代码可以看到当用go关键字创建一个goroutine的时候,对应的函数就是runtime.newproc(SB)
,所以我们来到runtime.newproc
函数。
func newproc(siz int32, fn *funcval) { argp := add(unsafe.Pointer(&fn), sys.PtrSize)//跳过函数指针,获得第一个参数地址 gp := getg()//拿到当前g pc := getcallerpc()//获得调用者下一条要执行的指令地址 //系统调用 切换到g0栈 因为g0栈空间大 systemstack(func() { newg := newproc1(fn, argp, siz, gp, pc)//真正创建一个goroutine _p_ := getg().m.p.ptr()//拿到当前g绑定的p runqput(_p_, newg, true)//将g放入p的本地runq if mainStarted {//M0是否启动了 wakep()//唤醒p } }) }
由于g0栈空间分配在系统空间且栈空间大,这里会产生系统调用,切换到g0栈空间来真正创建goroutine,接下来看看newproc1
函数。
func newproc1(fn *funcval, argp unsafe.Pointer, narg int32, callergp *g, callerpc uintptr) *g { _g_ := getg()//g0 if fn == nil { _g_.m.throwing = -1 // do not dump full stacks throw("go of nil func value") } //锁住当前m,因为下面操作会修改m属性,加锁防止并发 acquirem() // disable preemption because it can be holding p in a local var siz := narg siz = (siz + 7) &^ 7 //8字节对其,找到>=siz的最小的8的倍数 右侧为0左边一定保留 为1则左边清0 类似于C中的 &=~ // We could allocate a larger initial stack if necessary. // Not worth it: this is almost always an error. // 4*sizeof(uintreg): extra space added below // sizeof(uintreg): caller's LR (arm) or return address (x86, in gostartcall). if siz >= _StackMin-4*sys.RegSize-sys.RegSize {//检查参数是不是过大 throw("newproc: function arguments too large for new goroutine") } _p_ := _g_.m.p.ptr() newg := gfget(_p_)//从p的空闲g队列获取g,一个g执行完后会被回收至p的gFree链表中,这样就可以实现g的重复利用 if newg == nil { //gFree为空才会真正取创建goroutine newg = malg(_StackMin)//2kb的栈 casgstatus(newg, _Gidle, _Gdead)//cas修改goroutine状态 allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack. } if newg.stack.hi == 0 { throw("newproc1: newg missing stack") } if readgstatus(newg) != _Gdead { throw("newproc1: new g is not Gdead") } //调整栈顶地址 totalSize := 4*sys.RegSize + uintptr(siz) + sys.MinFrameSize // extra space in case of reads slightly beyond frame totalSize += -totalSize & (sys.SpAlign - 1) // align to spAlign sp := newg.stack.hi - totalSize spArg := sp if usesLR { // caller's LR *(*uintptr)(unsafe.Pointer(sp)) = 0 prepGoExitFrame(sp) spArg += sys.MinFrameSize } if narg > 0 { //参数拷贝至goroutine栈 memmove(unsafe.Pointer(spArg), argp, uintptr(narg)) // This is a stack-to-stack copy. If write barriers // are enabled and the source stack is grey (the // destination is always black), then perform a // barrier copy. We do this *after* the memmove // because the destination stack may have garbage on // it. if writeBarrier.needed && !_g_.m.curg.gcscandone { f := findfunc(fn.fn) stkmap := (*stackmap)(funcdata(f, _FUNCDATA_ArgsPointerMaps)) if stkmap.nbit > 0 { // We're in the prologue, so it's always stack map index 0. bv := stackmapdata(stkmap, 0) bulkBarrierBitmap(spArg, spArg, uintptr(bv.n)*sys.PtrSize, 0, bv.bytedata) } } } //清空goroutine的执行现场,因为G可能从p中取的,需要清楚原有数据 memclrNoHeapPointers(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched)) newg.sched.sp = sp//栈顶 newg.stktopsp = sp //pc为入口指令指令 这里pc=goexit+1 这样在执行完该goroutine后会调用goexit回收 newg.sched.pc = funcPC(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in same function /* goexit指令如下 TEXT runtime·goexit(SB),NOSPLIT,$0-0 BYTE $0x90 // NOP CALL runtime·goexit1(SB) // does not return 第二条指令 // traceback from goexit1 must hit code range of goexit BYTE $0x90 // NOP */ newg.sched.g = guintptr(unsafe.Pointer(newg)) gostartcallfn(&newg.sched, fn)//pc被用于sp,当RET的时候pop出goexit,模拟goexit调用fn newg.gopc = callerpc//调用方pc newg.ancestors = saveAncestors(callergp)//记录goroutine调用链 newg.startpc = fn.fn if _g_.m.curg != nil { newg.labels = _g_.m.curg.labels } if isSystemGoroutine(newg, false) { atomic.Xadd(&sched.ngsys, +1) } casgstatus(newg, _Gdead, _Grunnable)//状态置为runnabe //goroutine id生成 if _p_.goidcache == _p_.goidcacheend { // Sched.goidgen is the last allocated id, // this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch]. // At startup sched.goidgen=0, so main goroutine receives goid=1. _p_.goidcache = atomic.Xadd64(&sched.goidgen, _GoidCacheBatch) _p_.goidcache -= _GoidCacheBatch - 1 _p_.goidcacheend = _p_.goidcache + _GoidCacheBatch } newg.goid = int64(_p_.goidcache) _p_.goidcache++ if raceenabled { newg.racectx = racegostart(callerpc) } if trace.enabled { traceGoCreate(newg, newg.startpc) } releasem(_g_.m)//释放m return newg }
其中,gostartcallfn
函数很是巧妙,它主要的作用就是将指令goexit+1
地址压入了goroutine的栈,这样当该goroutine运行结束时会自动调用goexit
进行资源回收。我们看看它是怎么做到的。
// adjust Gobuf as if it executed a call to fn // and then did an immediate gosave. // 设置好g的执行现场Gobuf,让它处于正在调用fn的状态 func gostartcallfn(gobuf *gobuf, fv *funcval) { var fn unsafe.Pointer//函数入口地址 if fv != nil { fn = unsafe.Pointer(fv.fn) } else { fn = unsafe.Pointer(funcPC(nilfunc)) } gostartcall(gobuf, fn, unsafe.Pointer(fv)) } // adjust Gobuf as if it executed a call to fn with context ctxt // and then did an immediate gosave. func gostartcall(buf *gobuf, fn, ctxt unsafe.Pointer) { sp := buf.sp if sys.RegSize > sys.PtrSize { sp -= sys.PtrSize *(*uintptr)(unsafe.Pointer(sp)) = 0 } sp -= sys.PtrSize//为返回值预留空间 *(*uintptr)(unsafe.Pointer(sp)) = buf.pc//这就将buf.pc(为指令goexit+1的地址)压入goroutine的栈顶 buf.sp = sp//调整执行现场的栈顶sp buf.pc = uintptr(fn)//调整指令计数器pc为fn入口地址 buf.ctxt = ctxt }
这样一个goroutine就被创建好了,接下来就是等待某个m里执行它了,m调度的逻辑将在之后展开。
为了让大家更好的理解整个流程,这里我画了一个整体流程图。