Source file src/runtime/proc.go
1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package runtime 6 7 import ( 8 "internal/abi" 9 "internal/cpu" 10 "internal/goarch" 11 "internal/goexperiment" 12 "internal/goos" 13 "internal/runtime/atomic" 14 "internal/runtime/exithook" 15 "internal/runtime/maps" 16 "internal/runtime/sys" 17 "internal/strconv" 18 "internal/stringslite" 19 "unsafe" 20 ) 21 22 // set using cmd/go/internal/modload.ModInfoProg 23 var modinfo string 24 25 // Goroutine scheduler 26 // The scheduler's job is to distribute ready-to-run goroutines over worker threads. 27 // 28 // The main concepts are: 29 // G - goroutine. 30 // M - worker thread, or machine. 31 // P - processor, a resource that is required to execute Go code. 32 // M must have an associated P to execute Go code, however it can be 33 // blocked or in a syscall w/o an associated P. 34 // 35 // Design doc at https://golang.org/s/go11sched. 36 37 // Worker thread parking/unparking. 38 // We need to balance between keeping enough running worker threads to utilize 39 // available hardware parallelism and parking excessive running worker threads 40 // to conserve CPU resources and power. This is not simple for two reasons: 41 // (1) scheduler state is intentionally distributed (in particular, per-P work 42 // queues), so it is not possible to compute global predicates on fast paths; 43 // (2) for optimal thread management we would need to know the future (don't park 44 // a worker thread when a new goroutine will be readied in near future). 45 // 46 // Three rejected approaches that would work badly: 47 // 1. Centralize all scheduler state (would inhibit scalability). 48 // 2. Direct goroutine handoff. That is, when we ready a new goroutine and there 49 // is a spare P, unpark a thread and handoff it the thread and the goroutine. 50 // This would lead to thread state thrashing, as the thread that readied the 51 // goroutine can be out of work the very next moment, we will need to park it. 52 // Also, it would destroy locality of computation as we want to preserve 53 // dependent goroutines on the same thread; and introduce additional latency. 54 // 3. Unpark an additional thread whenever we ready a goroutine and there is an 55 // idle P, but don't do handoff. This would lead to excessive thread parking/ 56 // unparking as the additional threads will instantly park without discovering 57 // any work to do. 58 // 59 // The current approach: 60 // 61 // This approach applies to three primary sources of potential work: readying a 62 // goroutine, new/modified-earlier timers, and idle-priority GC. See below for 63 // additional details. 64 // 65 // We unpark an additional thread when we submit work if (this is wakep()): 66 // 1. There is an idle P, and 67 // 2. There are no "spinning" worker threads. 68 // 69 // A worker thread is considered spinning if it is out of local work and did 70 // not find work in the global run queue or netpoller; the spinning state is 71 // denoted in m.spinning and in sched.nmspinning. Threads unparked this way are 72 // also considered spinning; we don't do goroutine handoff so such threads are 73 // out of work initially. Spinning threads spin on looking for work in per-P 74 // run queues and timer heaps or from the GC before parking. If a spinning 75 // thread finds work it takes itself out of the spinning state and proceeds to 76 // execution. If it does not find work it takes itself out of the spinning 77 // state and then parks. 78 // 79 // If there is at least one spinning thread (sched.nmspinning>1), we don't 80 // unpark new threads when submitting work. To compensate for that, if the last 81 // spinning thread finds work and stops spinning, it must unpark a new spinning 82 // thread. This approach smooths out unjustified spikes of thread unparking, 83 // but at the same time guarantees eventual maximal CPU parallelism 84 // utilization. 85 // 86 // The main implementation complication is that we need to be very careful 87 // during spinning->non-spinning thread transition. This transition can race 88 // with submission of new work, and either one part or another needs to unpark 89 // another worker thread. If they both fail to do that, we can end up with 90 // semi-persistent CPU underutilization. 91 // 92 // The general pattern for submission is: 93 // 1. Submit work to the local or global run queue, timer heap, or GC state. 94 // 2. #StoreLoad-style memory barrier. 95 // 3. Check sched.nmspinning. 96 // 97 // The general pattern for spinning->non-spinning transition is: 98 // 1. Decrement nmspinning. 99 // 2. #StoreLoad-style memory barrier. 100 // 3. Check all per-P work queues and GC for new work. 101 // 102 // Note that all this complexity does not apply to global run queue as we are 103 // not sloppy about thread unparking when submitting to global queue. Also see 104 // comments for nmspinning manipulation. 105 // 106 // How these different sources of work behave varies, though it doesn't affect 107 // the synchronization approach: 108 // * Ready goroutine: this is an obvious source of work; the goroutine is 109 // immediately ready and must run on some thread eventually. 110 // * New/modified-earlier timer: The current timer implementation (see time.go) 111 // uses netpoll in a thread with no work available to wait for the soonest 112 // timer. If there is no thread waiting, we want a new spinning thread to go 113 // wait. 114 // * Idle-priority GC: The GC wakes a stopped idle thread to contribute to 115 // background GC work (note: currently disabled per golang.org/issue/19112). 116 // Also see golang.org/issue/44313, as this should be extended to all GC 117 // workers. 118 119 var ( 120 m0 m 121 g0 g 122 mcache0 *mcache 123 raceprocctx0 uintptr 124 raceFiniLock mutex 125 ) 126 127 // This slice records the initializing tasks that need to be 128 // done to start up the runtime. It is built by the linker. 129 var runtime_inittasks []*initTask 130 131 // mainInitDone is a signal used by cgocallbackg that initialization 132 // has been completed. If this is false, wait on mainInitDoneChan. 133 var mainInitDone atomic.Bool 134 135 // mainInitDoneChan is closed after initialization has been completed. 136 // It is made before _cgo_notify_runtime_init_done, so all cgo 137 // calls can rely on it existing. 138 var mainInitDoneChan chan bool 139 140 //go:linkname main_main main.main 141 func main_main() 142 143 // mainStarted indicates that the main M has started. 144 var mainStarted bool 145 146 // runtimeInitTime is the nanotime() at which the runtime started. 147 var runtimeInitTime int64 148 149 // Value to use for signal mask for newly created M's. 150 var initSigmask sigset 151 152 // The main goroutine. 153 func main() { 154 mp := getg().m 155 156 // Racectx of m0->g0 is used only as the parent of the main goroutine. 157 // It must not be used for anything else. 158 mp.g0.racectx = 0 159 160 // Max stack size is 1 GB on 64-bit, 250 MB on 32-bit. 161 // Using decimal instead of binary GB and MB because 162 // they look nicer in the stack overflow failure message. 163 if goarch.PtrSize == 8 { 164 maxstacksize = 1000000000 165 } else { 166 maxstacksize = 250000000 167 } 168 169 // An upper limit for max stack size. Used to avoid random crashes 170 // after calling SetMaxStack and trying to allocate a stack that is too big, 171 // since stackalloc works with 32-bit sizes. 172 maxstackceiling = 2 * maxstacksize 173 174 // Allow newproc to start new Ms. 175 mainStarted = true 176 177 if haveSysmon { 178 systemstack(func() { 179 newm(sysmon, nil, -1) 180 }) 181 } 182 183 // Lock the main goroutine onto this, the main OS thread, 184 // during initialization. Most programs won't care, but a few 185 // do require certain calls to be made by the main thread. 186 // Those can arrange for main.main to run in the main thread 187 // by calling runtime.LockOSThread during initialization 188 // to preserve the lock. 189 lockOSThread() 190 191 if mp != &m0 { 192 throw("runtime.main not on m0") 193 } 194 195 // Record when the world started. 196 // Must be before doInit for tracing init. 197 runtimeInitTime = nanotime() 198 if runtimeInitTime == 0 { 199 throw("nanotime returning zero") 200 } 201 202 if debug.inittrace != 0 { 203 inittrace.id = getg().goid 204 inittrace.active = true 205 } 206 207 doInit(runtime_inittasks) // Must be before defer. 208 209 // Defer unlock so that runtime.Goexit during init does the unlock too. 210 needUnlock := true 211 defer func() { 212 if needUnlock { 213 unlockOSThread() 214 } 215 }() 216 217 gcenable() 218 defaultGOMAXPROCSUpdateEnable() // don't STW before runtime initialized. 219 220 // If we encountered a removed GODEBUG during startup we can panic now. 221 if k := invalidGODEBUG.key; k != "" { 222 v := invalidGODEBUG.value 223 r := strconv.Itoa(invalidGODEBUG.removed) 224 fatal(`removed GODEBUG "` + k + `" set to old value "` + v + `" in environment (https://go.dev/doc/godebug#go-1` + r + `)`) 225 } 226 227 mainInitDoneChan = make(chan bool) 228 if iscgo { 229 if _cgo_pthread_key_created == nil { 230 throw("_cgo_pthread_key_created missing") 231 } 232 233 if GOOS != "windows" { 234 if _cgo_thread_start == nil { 235 throw("_cgo_thread_start missing") 236 } 237 if _cgo_setenv == nil { 238 throw("_cgo_setenv missing") 239 } 240 if _cgo_unsetenv == nil { 241 throw("_cgo_unsetenv missing") 242 } 243 } 244 if _cgo_notify_runtime_init_done == nil { 245 throw("_cgo_notify_runtime_init_done missing") 246 } 247 248 // Set the x_crosscall2_ptr C function pointer variable point to crosscall2. 249 if set_crosscall2 == nil { 250 throw("set_crosscall2 missing") 251 } 252 set_crosscall2() 253 254 // Start the template thread in case we enter Go from 255 // a C-created thread and need to create a new thread. 256 startTemplateThread() 257 cgocall(_cgo_notify_runtime_init_done, nil) 258 } 259 260 // Run the initializing tasks. Depending on build mode this 261 // list can arrive a few different ways, but it will always 262 // contain the init tasks computed by the linker for all the 263 // packages in the program (excluding those added at runtime 264 // by package plugin). Run through the modules in dependency 265 // order (the order they are initialized by the dynamic 266 // loader, i.e. they are added to the moduledata linked list). 267 last := lastmoduledatap // grab before loop starts. Any added modules after this point will do their own doInit calls. 268 for m := &firstmoduledata; true; m = m.next { 269 doInit(m.inittasks) 270 if m == last { 271 break 272 } 273 } 274 275 // Disable init tracing after main init done to avoid overhead 276 // of collecting statistics in malloc and newproc 277 inittrace.active = false 278 279 mainInitDone.Store(true) 280 close(mainInitDoneChan) 281 282 needUnlock = false 283 unlockOSThread() 284 285 if isarchive || islibrary { 286 // A program compiled with -buildmode=c-archive or c-shared 287 // has a main, but it is not executed. 288 if GOARCH == "wasm" { 289 // On Wasm, pause makes it return to the host. 290 // Unlike cgo callbacks where Ms are created on demand, 291 // on Wasm we have only one M. So we keep this M (and this 292 // G) for callbacks. 293 // Using the caller's SP unwinds this frame and backs to 294 // goexit. The -16 is: 8 for goexit's (fake) return PC, 295 // and pause's epilogue pops 8. 296 pause(sys.GetCallerSP() - 16) // should not return 297 panic("unreachable") 298 } 299 return 300 } 301 fn := main_main // make an indirect call, as the linker doesn't know the address of the main package when laying down the runtime 302 fn() 303 304 // Check for C memory leaks if using ASAN and we've made cgo calls, 305 // or if we are running as a library in a C program. 306 // We always make one cgo call, above, to notify_runtime_init_done, 307 // so we ignore that one. 308 // No point in leak checking if no cgo calls, since leak checking 309 // just looks for objects allocated using malloc and friends. 310 // Just checking iscgo doesn't help because asan implies iscgo. 311 exitHooksRun := false 312 if asanenabled && (isarchive || islibrary || NumCgoCall() > 1) { 313 runExitHooks(0) // lsandoleakcheck may not return 314 exitHooksRun = true 315 lsandoleakcheck() 316 } 317 318 // Make racy client program work: if panicking on 319 // another goroutine at the same time as main returns, 320 // let the other goroutine finish printing the panic trace. 321 // Once it does, it will exit. See issues 3934 and 20018. 322 if runningPanicDefers.Load() != 0 { 323 // Running deferred functions should not take long. 324 for c := 0; c < 1000; c++ { 325 if runningPanicDefers.Load() == 0 { 326 break 327 } 328 Gosched() 329 } 330 } 331 if panicking.Load() != 0 { 332 gopark(nil, nil, waitReasonPanicWait, traceBlockForever, 1) 333 } 334 if !exitHooksRun { 335 runExitHooks(0) 336 } 337 if raceenabled { 338 racefini() // does not return 339 } 340 341 exit(0) 342 for { 343 var x *int32 344 *x = 0 345 } 346 } 347 348 // os_beforeExit is called from os.Exit(0). 349 // 350 //go:linkname os_beforeExit os.runtime_beforeExit 351 func os_beforeExit(exitCode int) { 352 runExitHooks(exitCode) 353 if exitCode == 0 && raceenabled { 354 racefini() 355 } 356 357 // See comment in main, above. 358 if exitCode == 0 && asanenabled && (isarchive || islibrary || NumCgoCall() > 1) { 359 lsandoleakcheck() 360 } 361 } 362 363 func init() { 364 exithook.Gosched = Gosched 365 exithook.Goid = func() uint64 { return getg().goid } 366 exithook.Throw = throw 367 } 368 369 func runExitHooks(code int) { 370 exithook.Run(code) 371 } 372 373 // start forcegc helper goroutine 374 func init() { 375 go forcegchelper() 376 } 377 378 func forcegchelper() { 379 forcegc.g = getg() 380 lockInit(&forcegc.lock, lockRankForcegc) 381 for { 382 lock(&forcegc.lock) 383 if forcegc.idle.Load() { 384 throw("forcegc: phase error") 385 } 386 forcegc.idle.Store(true) 387 goparkunlock(&forcegc.lock, waitReasonForceGCIdle, traceBlockSystemGoroutine, 1) 388 // this goroutine is explicitly resumed by sysmon 389 if debug.gctrace > 0 { 390 println("GC forced") 391 } 392 // Time-triggered, fully concurrent. 393 gcStart(gcTrigger{kind: gcTriggerTime, now: nanotime()}) 394 } 395 } 396 397 // Gosched yields the processor, allowing other goroutines to run. It does not 398 // suspend the current goroutine, so execution resumes automatically. 399 // 400 //go:nosplit 401 func Gosched() { 402 checkTimeouts() 403 mcall(gosched_m) 404 } 405 406 // goschedguarded yields the processor like gosched, but also checks 407 // for forbidden states and opts out of the yield in those cases. 408 // 409 //go:nosplit 410 func goschedguarded() { 411 mcall(goschedguarded_m) 412 } 413 414 // goschedIfBusy yields the processor like gosched, but only does so if 415 // there are no idle Ps or if we're on the only P and there's nothing in 416 // the run queue. In both cases, there is freely available idle time. 417 // 418 //go:nosplit 419 func goschedIfBusy() { 420 gp := getg() 421 // Call gosched if gp.preempt is set; we may be in a tight loop that 422 // doesn't otherwise yield. 423 if !gp.preempt && sched.npidle.Load() > 0 { 424 return 425 } 426 mcall(gosched_m) 427 } 428 429 // Puts the current goroutine into a waiting state and calls unlockf on the 430 // system stack. 431 // 432 // If unlockf returns false, the goroutine is resumed. 433 // 434 // unlockf must not access this G's stack, as it may be moved between 435 // the call to gopark and the call to unlockf. 436 // 437 // Note that because unlockf is called after putting the G into a waiting 438 // state, the G may have already been readied by the time unlockf is called 439 // unless there is external synchronization preventing the G from being 440 // readied. If unlockf returns false, it must guarantee that the G cannot be 441 // externally readied. 442 // 443 // Reason explains why the goroutine has been parked. It is displayed in stack 444 // traces and heap dumps. Reasons should be unique and descriptive. Do not 445 // re-use reasons, add new ones. 446 // 447 // gopark should be an internal detail, 448 // but widely used packages access it using linkname. 449 // Notable members of the hall of shame include: 450 // - gvisor.dev/gvisor 451 // - github.com/sagernet/gvisor 452 // 453 // Do not remove or change the type signature. 454 // See go.dev/issue/67401. 455 // 456 //go:linkname gopark 457 func gopark(unlockf func(*g, unsafe.Pointer) bool, lock unsafe.Pointer, reason waitReason, traceReason traceBlockReason, traceskip int) { 458 if reason != waitReasonSleep { 459 checkTimeouts() // timeouts may expire while two goroutines keep the scheduler busy 460 } 461 mp := acquirem() 462 gp := mp.curg 463 status := readgstatus(gp) 464 if status != _Grunning && status != _Gscanrunning { 465 throw("gopark: bad g status") 466 } 467 mp.waitlock = lock 468 mp.waitunlockf = unlockf 469 gp.waitreason = reason 470 mp.waitTraceBlockReason = traceReason 471 mp.waitTraceSkip = traceskip 472 releasem(mp) 473 // can't do anything that might move the G between Ms here. 474 mcall(park_m) 475 } 476 477 // Puts the current goroutine into a waiting state and unlocks the lock. 478 // The goroutine can be made runnable again by calling goready(gp). 479 func goparkunlock(lock *mutex, reason waitReason, traceReason traceBlockReason, traceskip int) { 480 gopark(parkunlock_c, unsafe.Pointer(lock), reason, traceReason, traceskip) 481 } 482 483 // goready should be an internal detail, 484 // but widely used packages access it using linkname. 485 // Notable members of the hall of shame include: 486 // - gvisor.dev/gvisor 487 // - github.com/sagernet/gvisor 488 // 489 // Do not remove or change the type signature. 490 // See go.dev/issue/67401. 491 // 492 //go:linkname goready 493 func goready(gp *g, traceskip int) { 494 systemstack(func() { 495 ready(gp, traceskip, true) 496 }) 497 } 498 499 //go:nosplit 500 func acquireSudog() *sudog { 501 // Delicate dance: the semaphore implementation calls 502 // acquireSudog, acquireSudog calls new(sudog), 503 // new calls malloc, malloc can call the garbage collector, 504 // and the garbage collector calls the semaphore implementation 505 // in stopTheWorld. 506 // Break the cycle by doing acquirem/releasem around new(sudog). 507 // The acquirem/releasem increments m.locks during new(sudog), 508 // which keeps the garbage collector from being invoked. 509 mp := acquirem() 510 pp := mp.p.ptr() 511 if len(pp.sudogcache) == 0 { 512 lock(&sched.sudoglock) 513 // First, try to grab a batch from central cache. 514 for len(pp.sudogcache) < cap(pp.sudogcache)/2 && sched.sudogcache != nil { 515 s := sched.sudogcache 516 sched.sudogcache = s.next 517 s.next = nil 518 pp.sudogcache = append(pp.sudogcache, s) 519 } 520 unlock(&sched.sudoglock) 521 // If the central cache is empty, allocate a new one. 522 if len(pp.sudogcache) == 0 { 523 pp.sudogcache = append(pp.sudogcache, new(sudog)) 524 } 525 } 526 n := len(pp.sudogcache) 527 s := pp.sudogcache[n-1] 528 pp.sudogcache[n-1] = nil 529 pp.sudogcache = pp.sudogcache[:n-1] 530 if s.elem.get() != nil { 531 throw("acquireSudog: found s.elem != nil in cache") 532 } 533 releasem(mp) 534 return s 535 } 536 537 //go:nosplit 538 func releaseSudog(s *sudog) { 539 if s.elem.get() != nil { 540 throw("runtime: sudog with non-nil elem") 541 } 542 if s.isSelect { 543 throw("runtime: sudog with non-false isSelect") 544 } 545 if s.next != nil { 546 throw("runtime: sudog with non-nil next") 547 } 548 if s.prev != nil { 549 throw("runtime: sudog with non-nil prev") 550 } 551 if s.waitlink != nil { 552 throw("runtime: sudog with non-nil waitlink") 553 } 554 if s.c.get() != nil { 555 throw("runtime: sudog with non-nil c") 556 } 557 gp := getg() 558 if gp.param != nil { 559 throw("runtime: releaseSudog with non-nil gp.param") 560 } 561 mp := acquirem() // avoid rescheduling to another P 562 pp := mp.p.ptr() 563 if len(pp.sudogcache) == cap(pp.sudogcache) { 564 // Transfer half of local cache to the central cache. 565 var first, last *sudog 566 for len(pp.sudogcache) > cap(pp.sudogcache)/2 { 567 n := len(pp.sudogcache) 568 p := pp.sudogcache[n-1] 569 pp.sudogcache[n-1] = nil 570 pp.sudogcache = pp.sudogcache[:n-1] 571 if first == nil { 572 first = p 573 } else { 574 last.next = p 575 } 576 last = p 577 } 578 lock(&sched.sudoglock) 579 last.next = sched.sudogcache 580 sched.sudogcache = first 581 unlock(&sched.sudoglock) 582 } 583 pp.sudogcache = append(pp.sudogcache, s) 584 releasem(mp) 585 } 586 587 // called from assembly. 588 func badmcall(fn func(*g)) { 589 throw("runtime: mcall called on m->g0 stack") 590 } 591 592 func badmcall2(fn func(*g)) { 593 throw("runtime: mcall function returned") 594 } 595 596 func badreflectcall() { 597 panic(plainError("arg size to reflect.call more than 1GB")) 598 } 599 600 //go:nosplit 601 //go:nowritebarrierrec 602 func badmorestackg0() { 603 if !crashStackImplemented { 604 writeErrStr("fatal: morestack on g0\n") 605 return 606 } 607 608 g := getg() 609 switchToCrashStack(func() { 610 print("runtime: morestack on g0, stack [", hex(g.stack.lo), " ", hex(g.stack.hi), "], sp=", hex(g.sched.sp), ", called from\n") 611 g.m.traceback = 2 // include pc and sp in stack trace 612 traceback1(g.sched.pc, g.sched.sp, g.sched.lr, g, 0) 613 print("\n") 614 615 throw("morestack on g0") 616 }) 617 } 618 619 //go:nosplit 620 //go:nowritebarrierrec 621 func badmorestackgsignal() { 622 writeErrStr("fatal: morestack on gsignal\n") 623 } 624 625 //go:nosplit 626 func badctxt() { 627 throw("ctxt != 0") 628 } 629 630 // gcrash is a fake g that can be used when crashing due to bad 631 // stack conditions. 632 var gcrash g 633 634 var crashingG atomic.Pointer[g] 635 636 // Switch to crashstack and call fn, with special handling of 637 // concurrent and recursive cases. 638 // 639 // Nosplit as it is called in a bad stack condition (we know 640 // morestack would fail). 641 // 642 //go:nosplit 643 //go:nowritebarrierrec 644 func switchToCrashStack(fn func()) { 645 me := getg() 646 if crashingG.CompareAndSwapNoWB(nil, me) { 647 switchToCrashStack0(fn) // should never return 648 abort() 649 } 650 if crashingG.Load() == me { 651 // recursive crashing. too bad. 652 writeErrStr("fatal: recursive switchToCrashStack\n") 653 abort() 654 } 655 // Another g is crashing. Give it some time, hopefully it will finish traceback. 656 usleep_no_g(100) 657 writeErrStr("fatal: concurrent switchToCrashStack\n") 658 abort() 659 } 660 661 // Disable crash stack on Windows for now. Apparently, throwing an exception 662 // on a non-system-allocated crash stack causes EXCEPTION_STACK_OVERFLOW and 663 // hangs the process (see issue 63938). 664 const crashStackImplemented = GOOS != "windows" 665 666 //go:noescape 667 func switchToCrashStack0(fn func()) // in assembly 668 669 func lockedOSThread() bool { 670 gp := getg() 671 return gp.lockedm != 0 && gp.m.lockedg != 0 672 } 673 674 var ( 675 // allgs contains all Gs ever created (including dead Gs), and thus 676 // never shrinks. 677 // 678 // Access via the slice is protected by allglock or stop-the-world. 679 // Readers that cannot take the lock may (carefully!) use the atomic 680 // variables below. 681 allglock mutex 682 allgs []*g 683 684 // allglen and allgptr are atomic variables that contain len(allgs) and 685 // &allgs[0] respectively. Proper ordering depends on totally-ordered 686 // loads and stores. Writes are protected by allglock. 687 // 688 // allgptr is updated before allglen. Readers should read allglen 689 // before allgptr to ensure that allglen is always <= len(allgptr). New 690 // Gs appended during the race can be missed. For a consistent view of 691 // all Gs, allglock must be held. 692 // 693 // allgptr copies should always be stored as a concrete type or 694 // unsafe.Pointer, not uintptr, to ensure that GC can still reach it 695 // even if it points to a stale array. 696 allglen uintptr 697 allgptr **g 698 ) 699 700 func allgadd(gp *g) { 701 if readgstatus(gp) == _Gidle { 702 throw("allgadd: bad status Gidle") 703 } 704 705 lock(&allglock) 706 allgs = append(allgs, gp) 707 if &allgs[0] != allgptr { 708 atomicstorep(unsafe.Pointer(&allgptr), unsafe.Pointer(&allgs[0])) 709 } 710 atomic.Storeuintptr(&allglen, uintptr(len(allgs))) 711 unlock(&allglock) 712 } 713 714 // allGsSnapshot returns a snapshot of the slice of all Gs. 715 // 716 // The world must be stopped or allglock must be held. 717 func allGsSnapshot() []*g { 718 assertWorldStoppedOrLockHeld(&allglock) 719 720 // Because the world is stopped or allglock is held, allgadd 721 // cannot happen concurrently with this. allgs grows 722 // monotonically and existing entries never change, so we can 723 // simply return a copy of the slice header. For added safety, 724 // we trim everything past len because that can still change. 725 return allgs[:len(allgs):len(allgs)] 726 } 727 728 // atomicAllG returns &allgs[0] and len(allgs) for use with atomicAllGIndex. 729 func atomicAllG() (**g, uintptr) { 730 length := atomic.Loaduintptr(&allglen) 731 ptr := (**g)(atomic.Loadp(unsafe.Pointer(&allgptr))) 732 return ptr, length 733 } 734 735 // atomicAllGIndex returns ptr[i] with the allgptr returned from atomicAllG. 736 func atomicAllGIndex(ptr **g, i uintptr) *g { 737 return *(**g)(add(unsafe.Pointer(ptr), i*goarch.PtrSize)) 738 } 739 740 // forEachG calls fn on every G from allgs. 741 // 742 // forEachG takes a lock to exclude concurrent addition of new Gs. 743 func forEachG(fn func(gp *g)) { 744 lock(&allglock) 745 for _, gp := range allgs { 746 fn(gp) 747 } 748 unlock(&allglock) 749 } 750 751 // forEachGRace calls fn on every G from allgs. 752 // 753 // forEachGRace avoids locking, but does not exclude addition of new Gs during 754 // execution, which may be missed. 755 func forEachGRace(fn func(gp *g)) { 756 ptr, length := atomicAllG() 757 for i := uintptr(0); i < length; i++ { 758 gp := atomicAllGIndex(ptr, i) 759 fn(gp) 760 } 761 return 762 } 763 764 const ( 765 // Number of goroutine ids to grab from sched.goidgen to local per-P cache at once. 766 // 16 seems to provide enough amortization, but other than that it's mostly arbitrary number. 767 _GoidCacheBatch = 16 768 ) 769 770 // cpuinit sets up CPU feature flags and calls internal/cpu.Initialize. env should be the complete 771 // value of the GODEBUG environment variable. 772 func cpuinit(env string) { 773 cpu.Initialize(env) 774 775 // Support cpu feature variables are used in code generated by the compiler 776 // to guard execution of instructions that can not be assumed to be always supported. 777 switch GOARCH { 778 case "386", "amd64": 779 x86HasAVX = cpu.X86.HasAVX 780 x86HasFMA = cpu.X86.HasFMA 781 x86HasPOPCNT = cpu.X86.HasPOPCNT 782 x86HasSSE41 = cpu.X86.HasSSE41 783 784 case "arm": 785 armHasVFPv4 = cpu.ARM.HasVFPv4 786 787 case "arm64": 788 arm64HasATOMICS = cpu.ARM64.HasATOMICS 789 790 case "loong64": 791 loong64HasLAMCAS = cpu.Loong64.HasLAMCAS 792 loong64HasLAM_BH = cpu.Loong64.HasLAM_BH 793 loong64HasDBAR_HINTS = cpu.Loong64.HasDBAR_HINTS 794 loong64HasLSX = cpu.Loong64.HasLSX 795 796 case "riscv64": 797 riscv64HasZbb = cpu.RISCV64.HasZbb 798 } 799 } 800 801 // getGodebugEarly extracts the environment variable GODEBUG from the environment on 802 // Unix-like operating systems and returns it. This function exists to extract GODEBUG 803 // early before much of the runtime is initialized. 804 // 805 // Returns nil, false if OS doesn't provide env vars early in the init sequence. 806 func getGodebugEarly() (string, bool) { 807 const prefix = "GODEBUG=" 808 var env string 809 switch GOOS { 810 case "aix", "darwin", "ios", "dragonfly", "freebsd", "netbsd", "openbsd", "illumos", "solaris", "linux": 811 // Similar to goenv_unix but extracts the environment value for 812 // GODEBUG directly. 813 // TODO(moehrmann): remove when general goenvs() can be called before cpuinit() 814 n := int32(0) 815 for argv_index(argv, argc+1+n) != nil { 816 n++ 817 } 818 819 for i := int32(0); i < n; i++ { 820 p := argv_index(argv, argc+1+i) 821 s := unsafe.String(p, findnull(p)) 822 823 if stringslite.HasPrefix(s, prefix) { 824 env = gostringnocopy(p)[len(prefix):] 825 break 826 } 827 } 828 break 829 830 default: 831 return "", false 832 } 833 return env, true 834 } 835 836 // The bootstrap sequence is: 837 // 838 // call osinit 839 // call schedinit 840 // make & queue new G 841 // call runtime·mstart 842 // 843 // The new G calls runtime·main. 844 func schedinit() { 845 lockInit(&sched.lock, lockRankSched) 846 lockInit(&sched.sysmonlock, lockRankSysmon) 847 lockInit(&sched.deferlock, lockRankDefer) 848 lockInit(&sched.sudoglock, lockRankSudog) 849 lockInit(&deadlock, lockRankDeadlock) 850 lockInit(&paniclk, lockRankPanic) 851 lockInit(&allglock, lockRankAllg) 852 lockInit(&allpLock, lockRankAllp) 853 lockInit(&reflectOffs.lock, lockRankReflectOffs) 854 lockInit(&finlock, lockRankFin) 855 lockInit(&cpuprof.lock, lockRankCpuprof) 856 lockInit(&computeMaxProcsLock, lockRankComputeMaxProcs) 857 allocmLock.init(lockRankAllocmR, lockRankAllocmRInternal, lockRankAllocmW) 858 execLock.init(lockRankExecR, lockRankExecRInternal, lockRankExecW) 859 traceLockInit() 860 // Enforce that this lock is always a leaf lock. 861 // All of this lock's critical sections should be 862 // extremely short. 863 lockInit(&memstats.heapStats.noPLock, lockRankLeafRank) 864 865 lockVerifyMSize() 866 867 sched.midle.init(unsafe.Offsetof(m{}.idleNode)) 868 869 // raceinit must be the first call to race detector. 870 // In particular, it must be done before mallocinit below calls racemapshadow. 871 gp := getg() 872 if raceenabled { 873 gp.racectx, raceprocctx0 = raceinit() 874 } 875 876 sched.maxmcount = 10000 877 crashFD.Store(^uintptr(0)) 878 879 // The world starts stopped. 880 worldStopped() 881 882 godebug, parsedGodebug := getGodebugEarly() 883 if parsedGodebug { 884 parseRuntimeDebugVars(godebug) 885 } 886 ticks.init() // run as early as possible 887 moduledataverify() 888 stackinit() 889 randinit() // must run before mallocinit, AlgInit, mcommoninit 890 mallocinit() 891 cpuinit(godebug) // must run before AlgInit 892 maps.AlgInit() // maps, hash, rand must not be used before this call 893 mcommoninit(gp.m, -1) 894 modulesinit() // provides activeModules 895 typelinksinit() // uses maps, activeModules 896 itabsinit() // uses activeModules 897 stkobjinit() // must run before GC starts 898 899 sigsave(&gp.m.sigmask) 900 initSigmask = gp.m.sigmask 901 902 goargs() 903 goenvs() 904 secure() 905 checkfds() 906 if !parsedGodebug { 907 // Some platforms, e.g., Windows, didn't make env vars available "early", 908 // so try again now. 909 parseRuntimeDebugVars(gogetenv("GODEBUG")) 910 } 911 finishDebugVarsSetup() 912 gcinit() 913 914 // Allocate stack space that can be used when crashing due to bad stack 915 // conditions, e.g. morestack on g0. 916 gcrash.stack = stackalloc(16384) 917 gcrash.stackguard0 = gcrash.stack.lo + 1000 918 gcrash.stackguard1 = gcrash.stack.lo + 1000 919 920 // if disableMemoryProfiling is set, update MemProfileRate to 0 to turn off memprofile. 921 // Note: parsedebugvars may update MemProfileRate, but when disableMemoryProfiling is 922 // set to true by the linker, it means that nothing is consuming the profile, it is 923 // safe to set MemProfileRate to 0. 924 if disableMemoryProfiling { 925 MemProfileRate = 0 926 } 927 928 // mcommoninit runs before parsedebugvars, so init profstacks again. 929 mProfStackInit(gp.m) 930 defaultGOMAXPROCSInit() 931 932 lock(&sched.lock) 933 sched.lastpoll.Store(nanotime()) 934 var procs int32 935 if n, err := strconv.ParseInt(gogetenv("GOMAXPROCS"), 10, 32); err == nil && n > 0 { 936 procs = int32(n) 937 sched.customGOMAXPROCS = true 938 } else { 939 // Use numCPUStartup for initial GOMAXPROCS for two reasons: 940 // 941 // 1. We just computed it in osinit, recomputing is (minorly) wasteful. 942 // 943 // 2. More importantly, if debug.containermaxprocs == 0 && 944 // debug.updatemaxprocs == 0, we want to guarantee that 945 // runtime.GOMAXPROCS(0) always equals runtime.NumCPU (which is 946 // just numCPUStartup). 947 procs = defaultGOMAXPROCS(numCPUStartup) 948 } 949 if procresize(procs) != nil { 950 throw("unknown runnable goroutine during bootstrap") 951 } 952 unlock(&sched.lock) 953 954 // World is effectively started now, as P's can run. 955 worldStarted() 956 957 if buildVersion == "" { 958 // Condition should never trigger. This code just serves 959 // to ensure runtime·buildVersion is kept in the resulting binary. 960 buildVersion = "unknown" 961 } 962 if len(modinfo) == 1 { 963 // Condition should never trigger. This code just serves 964 // to ensure runtime·modinfo is kept in the resulting binary. 965 modinfo = "" 966 } 967 } 968 969 func dumpgstatus(gp *g) { 970 thisg := getg() 971 print("runtime: gp: gp=", gp, ", goid=", gp.goid, ", gp->atomicstatus=", readgstatus(gp), "\n") 972 print("runtime: getg: g=", thisg, ", goid=", thisg.goid, ", g->atomicstatus=", readgstatus(thisg), "\n") 973 } 974 975 // sched.lock must be held. 976 func checkmcount() { 977 assertLockHeld(&sched.lock) 978 979 // Exclude extra M's, which are used for cgocallback from threads 980 // created in C. 981 // 982 // The purpose of the SetMaxThreads limit is to avoid accidental fork 983 // bomb from something like millions of goroutines blocking on system 984 // calls, causing the runtime to create millions of threads. By 985 // definition, this isn't a problem for threads created in C, so we 986 // exclude them from the limit. See https://go.dev/issue/60004. 987 count := mcount() - int32(extraMInUse.Load()) - int32(extraMLength.Load()) 988 if count > sched.maxmcount { 989 print("runtime: program exceeds ", sched.maxmcount, "-thread limit\n") 990 throw("thread exhaustion") 991 } 992 } 993 994 // mReserveID returns the next ID to use for a new m. This new m is immediately 995 // considered 'running' by checkdead. 996 // 997 // sched.lock must be held. 998 func mReserveID() int64 { 999 assertLockHeld(&sched.lock) 1000 1001 if sched.mnext+1 < sched.mnext { 1002 throw("runtime: thread ID overflow") 1003 } 1004 id := sched.mnext 1005 sched.mnext++ 1006 checkmcount() 1007 return id 1008 } 1009 1010 // Pre-allocated ID may be passed as 'id', or omitted by passing -1. 1011 func mcommoninit(mp *m, id int64) { 1012 gp := getg() 1013 1014 // g0 stack won't make sense for user (and is not necessary unwindable). 1015 if gp != gp.m.g0 { 1016 callers(1, mp.createstack[:]) 1017 } 1018 1019 lock(&sched.lock) 1020 1021 if id >= 0 { 1022 mp.id = id 1023 } else { 1024 mp.id = mReserveID() 1025 } 1026 1027 mp.self = newMWeakPointer(mp) 1028 1029 mrandinit(mp) 1030 1031 mpreinit(mp) 1032 if mp.gsignal != nil { 1033 mp.gsignal.stackguard1 = mp.gsignal.stack.lo + stackGuard 1034 } 1035 1036 // Add to allm so garbage collector doesn't free g->m 1037 // when it is just in a register or thread-local storage. 1038 mp.alllink = allm 1039 1040 // NumCgoCall and others iterate over allm w/o schedlock, 1041 // so we need to publish it safely. 1042 atomicstorep(unsafe.Pointer(&allm), unsafe.Pointer(mp)) 1043 unlock(&sched.lock) 1044 1045 // Allocate memory to hold a cgo traceback if the cgo call crashes. 1046 if iscgo || GOOS == "solaris" || GOOS == "illumos" || GOOS == "windows" { 1047 mp.cgoCallers = new(cgoCallers) 1048 } 1049 mProfStackInit(mp) 1050 } 1051 1052 // mProfStackInit is used to eagerly initialize stack trace buffers for 1053 // profiling. Lazy allocation would have to deal with reentrancy issues in 1054 // malloc and runtime locks for mLockProfile. 1055 // TODO(mknyszek): Implement lazy allocation if this becomes a problem. 1056 func mProfStackInit(mp *m) { 1057 if debug.profstackdepth == 0 { 1058 // debug.profstack is set to 0 by the user, or we're being called from 1059 // schedinit before parsedebugvars. 1060 return 1061 } 1062 mp.profStack = makeProfStackFP() 1063 mp.mLockProfile.stack = makeProfStackFP() 1064 } 1065 1066 // makeProfStackFP creates a buffer large enough to hold a maximum-sized stack 1067 // trace as well as any additional frames needed for frame pointer unwinding 1068 // with delayed inline expansion. 1069 func makeProfStackFP() []uintptr { 1070 // The "1" term is to account for the first stack entry being 1071 // taken up by a "skip" sentinel value for profilers which 1072 // defer inline frame expansion until the profile is reported. 1073 // The "maxSkip" term is for frame pointer unwinding, where we 1074 // want to end up with debug.profstackdebth frames but will discard 1075 // some "physical" frames to account for skipping. 1076 return make([]uintptr, 1+maxSkip+debug.profstackdepth) 1077 } 1078 1079 // makeProfStack returns a buffer large enough to hold a maximum-sized stack 1080 // trace. 1081 func makeProfStack() []uintptr { return make([]uintptr, debug.profstackdepth) } 1082 1083 //go:linkname pprof_makeProfStack 1084 func pprof_makeProfStack() []uintptr { return makeProfStack() } 1085 1086 func (mp *m) becomeSpinning() { 1087 mp.spinning = true 1088 sched.nmspinning.Add(1) 1089 sched.needspinning.Store(0) 1090 } 1091 1092 // Take a snapshot of allp, for use after dropping the P. 1093 // 1094 // Must be called with a P, but the returned slice may be used after dropping 1095 // the P. The M holds a reference on the snapshot to keep the backing array 1096 // alive. 1097 // 1098 //go:yeswritebarrierrec 1099 func (mp *m) snapshotAllp() []*p { 1100 mp.allpSnapshot = allp 1101 return mp.allpSnapshot 1102 } 1103 1104 // Clear the saved allp snapshot. Should be called as soon as the snapshot is 1105 // no longer required. 1106 // 1107 // Must be called after reacquiring a P, as it requires a write barrier. 1108 // 1109 //go:yeswritebarrierrec 1110 func (mp *m) clearAllpSnapshot() { 1111 mp.allpSnapshot = nil 1112 } 1113 1114 func (mp *m) hasCgoOnStack() bool { 1115 return mp.ncgo > 0 || mp.isextra 1116 } 1117 1118 const ( 1119 // osHasLowResTimer indicates that the platform's internal timer system has a low resolution, 1120 // typically on the order of 1 ms or more. 1121 osHasLowResTimer = GOOS == "windows" || GOOS == "openbsd" || GOOS == "netbsd" 1122 1123 // osHasLowResClockInt is osHasLowResClock but in integer form, so it can be used to create 1124 // constants conditionally. 1125 osHasLowResClockInt = goos.IsWindows 1126 1127 // osHasLowResClock indicates that timestamps produced by nanotime on the platform have a 1128 // low resolution, typically on the order of 1 ms or more. 1129 osHasLowResClock = osHasLowResClockInt > 0 1130 ) 1131 1132 // Mark gp ready to run. 1133 func ready(gp *g, traceskip int, next bool) { 1134 status := readgstatus(gp) 1135 1136 // Mark runnable. 1137 mp := acquirem() // disable preemption because it can be holding p in a local var 1138 if status&^_Gscan != _Gwaiting { 1139 dumpgstatus(gp) 1140 throw("bad g->status in ready") 1141 } 1142 1143 // status is Gwaiting or Gscanwaiting, make Grunnable and put on runq 1144 trace := traceAcquire() 1145 casgstatus(gp, _Gwaiting, _Grunnable) 1146 if trace.ok() { 1147 trace.GoUnpark(gp, traceskip) 1148 traceRelease(trace) 1149 } 1150 runqput(mp.p.ptr(), gp, next) 1151 wakep() 1152 releasem(mp) 1153 } 1154 1155 // freezeStopWait is a large value that freezetheworld sets 1156 // sched.stopwait to in order to request that all Gs permanently stop. 1157 const freezeStopWait = 0x7fffffff 1158 1159 // freezing is set to non-zero if the runtime is trying to freeze the 1160 // world. 1161 var freezing atomic.Bool 1162 1163 // Similar to stopTheWorld but best-effort and can be called several times. 1164 // There is no reverse operation, used during crashing. 1165 // This function must not lock any mutexes. 1166 func freezetheworld() { 1167 freezing.Store(true) 1168 if debug.dontfreezetheworld > 0 { 1169 // Don't prempt Ps to stop goroutines. That will perturb 1170 // scheduler state, making debugging more difficult. Instead, 1171 // allow goroutines to continue execution. 1172 // 1173 // fatalpanic will tracebackothers to trace all goroutines. It 1174 // is unsafe to trace a running goroutine, so tracebackothers 1175 // will skip running goroutines. That is OK and expected, we 1176 // expect users of dontfreezetheworld to use core files anyway. 1177 // 1178 // However, allowing the scheduler to continue running free 1179 // introduces a race: a goroutine may be stopped when 1180 // tracebackothers checks its status, and then start running 1181 // later when we are in the middle of traceback, potentially 1182 // causing a crash. 1183 // 1184 // To mitigate this, when an M naturally enters the scheduler, 1185 // schedule checks if freezing is set and if so stops 1186 // execution. This guarantees that while Gs can transition from 1187 // running to stopped, they can never transition from stopped 1188 // to running. 1189 // 1190 // The sleep here allows racing Ms that missed freezing and are 1191 // about to run a G to complete the transition to running 1192 // before we start traceback. 1193 usleep(1000) 1194 return 1195 } 1196 1197 // stopwait and preemption requests can be lost 1198 // due to races with concurrently executing threads, 1199 // so try several times 1200 for i := 0; i < 5; i++ { 1201 // this should tell the scheduler to not start any new goroutines 1202 sched.stopwait = freezeStopWait 1203 sched.gcwaiting.Store(true) 1204 // this should stop running goroutines 1205 if !preemptall() { 1206 break // no running goroutines 1207 } 1208 usleep(1000) 1209 } 1210 // to be sure 1211 usleep(1000) 1212 preemptall() 1213 usleep(1000) 1214 } 1215 1216 // All reads and writes of g's status go through readgstatus, casgstatus 1217 // castogscanstatus, casfrom_Gscanstatus. 1218 // 1219 //go:nosplit 1220 func readgstatus(gp *g) uint32 { 1221 return gp.atomicstatus.Load() 1222 } 1223 1224 // The Gscanstatuses are acting like locks and this releases them. 1225 // If it proves to be a performance hit we should be able to make these 1226 // simple atomic stores but for now we are going to throw if 1227 // we see an inconsistent state. 1228 func casfrom_Gscanstatus(gp *g, oldval, newval uint32) { 1229 success := false 1230 1231 // Check that transition is valid. 1232 switch oldval { 1233 default: 1234 print("runtime: casfrom_Gscanstatus bad oldval gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n") 1235 dumpgstatus(gp) 1236 throw("casfrom_Gscanstatus:top gp->status is not in scan state") 1237 case _Gscanrunnable, 1238 _Gscanwaiting, 1239 _Gscanrunning, 1240 _Gscansyscall, 1241 _Gscanleaked, 1242 _Gscanpreempted, 1243 _Gscandeadextra: 1244 if newval == oldval&^_Gscan { 1245 success = gp.atomicstatus.CompareAndSwap(oldval, newval) 1246 } 1247 } 1248 if !success { 1249 print("runtime: casfrom_Gscanstatus failed gp=", gp, ", oldval=", hex(oldval), ", newval=", hex(newval), "\n") 1250 dumpgstatus(gp) 1251 throw("casfrom_Gscanstatus: gp->status is not in scan state") 1252 } 1253 releaseLockRankAndM(lockRankGscan) 1254 } 1255 1256 // This will return false if the gp is not in the expected status and the cas fails. 1257 // This acts like a lock acquire while the casfromgstatus acts like a lock release. 1258 func castogscanstatus(gp *g, oldval, newval uint32) bool { 1259 switch oldval { 1260 case _Grunnable, 1261 _Grunning, 1262 _Gwaiting, 1263 _Gleaked, 1264 _Gsyscall, 1265 _Gdeadextra: 1266 if newval == oldval|_Gscan { 1267 r := gp.atomicstatus.CompareAndSwap(oldval, newval) 1268 if r { 1269 acquireLockRankAndM(lockRankGscan) 1270 } 1271 return r 1272 1273 } 1274 } 1275 print("runtime: castogscanstatus oldval=", hex(oldval), " newval=", hex(newval), "\n") 1276 throw("bad oldval passed to castogscanstatus") 1277 return false 1278 } 1279 1280 // casgstatusAlwaysTrack is a debug flag that causes casgstatus to always track 1281 // various latencies on every transition instead of sampling them. 1282 var casgstatusAlwaysTrack = false 1283 1284 // If asked to move to or from a Gscanstatus this will throw. Use the castogscanstatus 1285 // and casfrom_Gscanstatus instead. 1286 // casgstatus will loop if the g->atomicstatus is in a Gscan status until the routine that 1287 // put it in the Gscan state is finished. 1288 // 1289 //go:nosplit 1290 func casgstatus(gp *g, oldval, newval uint32) { 1291 if (oldval&_Gscan != 0) || (newval&_Gscan != 0) || oldval == newval { 1292 systemstack(func() { 1293 // Call on the systemstack to prevent print and throw from counting 1294 // against the nosplit stack reservation. 1295 print("runtime: casgstatus: oldval=", hex(oldval), " newval=", hex(newval), "\n") 1296 throw("casgstatus: bad incoming values") 1297 }) 1298 } 1299 1300 lockWithRankMayAcquire(nil, lockRankGscan) 1301 1302 // See https://golang.org/cl/21503 for justification of the yield delay. 1303 const yieldDelay = 5 * 1000 1304 var nextYield int64 1305 1306 // loop if gp->atomicstatus is in a scan state giving 1307 // GC time to finish and change the state to oldval. 1308 for i := 0; !gp.atomicstatus.CompareAndSwap(oldval, newval); i++ { 1309 if oldval == _Gwaiting && gp.atomicstatus.Load() == _Grunnable { 1310 systemstack(func() { 1311 // Call on the systemstack to prevent throw from counting 1312 // against the nosplit stack reservation. 1313 throw("casgstatus: waiting for Gwaiting but is Grunnable") 1314 }) 1315 } 1316 if i == 0 { 1317 nextYield = nanotime() + yieldDelay 1318 } 1319 if nanotime() < nextYield { 1320 for x := 0; x < 10 && gp.atomicstatus.Load() != oldval; x++ { 1321 procyield(1) 1322 } 1323 } else { 1324 osyield() 1325 nextYield = nanotime() + yieldDelay/2 1326 } 1327 } 1328 1329 if gp.bubble != nil { 1330 systemstack(func() { 1331 gp.bubble.changegstatus(gp, oldval, newval) 1332 }) 1333 } 1334 1335 if (oldval == _Grunning || oldval == _Gsyscall) && (newval != _Grunning && newval != _Gsyscall) { 1336 // Track every gTrackingPeriod time a goroutine transitions out of _Grunning or _Gsyscall. 1337 // Do not track _Grunning <-> _Gsyscall transitions, since they're two very similar states. 1338 if casgstatusAlwaysTrack || gp.trackingSeq%gTrackingPeriod == 0 { 1339 gp.tracking = true 1340 } 1341 gp.trackingSeq++ 1342 } 1343 if !gp.tracking { 1344 return 1345 } 1346 1347 // Handle various kinds of tracking. 1348 // 1349 // Currently: 1350 // - Time spent in runnable. 1351 // - Time spent blocked on a sync.Mutex or sync.RWMutex. 1352 switch oldval { 1353 case _Grunnable: 1354 // We transitioned out of runnable, so measure how much 1355 // time we spent in this state and add it to 1356 // runnableTime. 1357 now := nanotime() 1358 gp.runnableTime += now - gp.trackingStamp 1359 gp.trackingStamp = 0 1360 case _Gwaiting: 1361 if !gp.waitreason.isMutexWait() { 1362 // Not blocking on a lock. 1363 break 1364 } 1365 // Blocking on a lock, measure it. Note that because we're 1366 // sampling, we have to multiply by our sampling period to get 1367 // a more representative estimate of the absolute value. 1368 // gTrackingPeriod also represents an accurate sampling period 1369 // because we can only enter this state from _Grunning. 1370 now := nanotime() 1371 sched.totalMutexWaitTime.Add((now - gp.trackingStamp) * gTrackingPeriod) 1372 gp.trackingStamp = 0 1373 } 1374 switch newval { 1375 case _Gwaiting: 1376 if !gp.waitreason.isMutexWait() { 1377 // Not blocking on a lock. 1378 break 1379 } 1380 // Blocking on a lock. Write down the timestamp. 1381 now := nanotime() 1382 gp.trackingStamp = now 1383 case _Grunnable: 1384 // We just transitioned into runnable, so record what 1385 // time that happened. 1386 now := nanotime() 1387 gp.trackingStamp = now 1388 case _Grunning: 1389 // We're transitioning into running, so turn off 1390 // tracking and record how much time we spent in 1391 // runnable. 1392 gp.tracking = false 1393 sched.timeToRun.record(gp.runnableTime) 1394 gp.runnableTime = 0 1395 } 1396 } 1397 1398 // casGToWaiting transitions gp from old to _Gwaiting, and sets the wait reason. 1399 // 1400 // Use this over casgstatus when possible to ensure that a waitreason is set. 1401 func casGToWaiting(gp *g, old uint32, reason waitReason) { 1402 // Set the wait reason before calling casgstatus, because casgstatus will use it. 1403 gp.waitreason = reason 1404 casgstatus(gp, old, _Gwaiting) 1405 } 1406 1407 // casGToWaitingForSuspendG transitions gp from old to _Gwaiting, and sets the wait reason. 1408 // The wait reason must be a valid isWaitingForSuspendG wait reason. 1409 // 1410 // While a goroutine is in this state, it's stack is effectively pinned. 1411 // The garbage collector must not shrink or otherwise mutate the goroutine's stack. 1412 // 1413 // Use this over casgstatus when possible to ensure that a waitreason is set. 1414 func casGToWaitingForSuspendG(gp *g, old uint32, reason waitReason) { 1415 if !reason.isWaitingForSuspendG() { 1416 throw("casGToWaitingForSuspendG with non-isWaitingForSuspendG wait reason") 1417 } 1418 casGToWaiting(gp, old, reason) 1419 } 1420 1421 // casGToPreemptScan transitions gp from _Grunning to _Gscan|_Gpreempted. 1422 // 1423 // TODO(austin): This is the only status operation that both changes 1424 // the status and locks the _Gscan bit. Rethink this. 1425 func casGToPreemptScan(gp *g, old, new uint32) { 1426 if old != _Grunning || new != _Gscan|_Gpreempted { 1427 throw("bad g transition") 1428 } 1429 acquireLockRankAndM(lockRankGscan) 1430 for !gp.atomicstatus.CompareAndSwap(_Grunning, _Gscan|_Gpreempted) { 1431 } 1432 // We never notify gp.bubble that the goroutine state has moved 1433 // from _Grunning to _Gpreempted. We call bubble.changegstatus 1434 // after status changes happen, but doing so here would violate the 1435 // ordering between the gscan and synctest locks. The bubble doesn't 1436 // distinguish between _Grunning and _Gpreempted anyway, so not 1437 // notifying it is fine. 1438 } 1439 1440 // casGFromPreempted attempts to transition gp from _Gpreempted to 1441 // _Gwaiting. If successful, the caller is responsible for 1442 // re-scheduling gp. 1443 func casGFromPreempted(gp *g, old, new uint32) bool { 1444 if old != _Gpreempted || new != _Gwaiting { 1445 throw("bad g transition") 1446 } 1447 gp.waitreason = waitReasonPreempted 1448 if !gp.atomicstatus.CompareAndSwap(_Gpreempted, _Gwaiting) { 1449 return false 1450 } 1451 if bubble := gp.bubble; bubble != nil { 1452 bubble.changegstatus(gp, _Gpreempted, _Gwaiting) 1453 } 1454 return true 1455 } 1456 1457 // stwReason is an enumeration of reasons the world is stopping. 1458 type stwReason uint8 1459 1460 // Reasons to stop-the-world. 1461 // 1462 // Avoid reusing reasons and add new ones instead. 1463 const ( 1464 stwUnknown stwReason = iota // "unknown" 1465 stwGCMarkTerm // "GC mark termination" 1466 stwGCSweepTerm // "GC sweep termination" 1467 stwWriteHeapDump // "write heap dump" 1468 stwGoroutineProfile // "goroutine profile" 1469 stwGoroutineProfileCleanup // "goroutine profile cleanup" 1470 stwAllGoroutinesStack // "all goroutines stack trace" 1471 stwReadMemStats // "read mem stats" 1472 stwAllThreadsSyscall // "AllThreadsSyscall" 1473 stwGOMAXPROCS // "GOMAXPROCS" 1474 stwStartTrace // "start trace" 1475 stwStopTrace // "stop trace" 1476 stwForTestCountPagesInUse // "CountPagesInUse (test)" 1477 stwForTestReadMetricsSlow // "ReadMetricsSlow (test)" 1478 stwForTestReadMemStatsSlow // "ReadMemStatsSlow (test)" 1479 stwForTestPageCachePagesLeaked // "PageCachePagesLeaked (test)" 1480 stwForTestResetDebugLog // "ResetDebugLog (test)" 1481 ) 1482 1483 func (r stwReason) String() string { 1484 return stwReasonStrings[r] 1485 } 1486 1487 func (r stwReason) isGC() bool { 1488 return r == stwGCMarkTerm || r == stwGCSweepTerm 1489 } 1490 1491 // If you add to this list, also add it to src/internal/trace/parser.go. 1492 // If you change the values of any of the stw* constants, bump the trace 1493 // version number and make a copy of this. 1494 var stwReasonStrings = [...]string{ 1495 stwUnknown: "unknown", 1496 stwGCMarkTerm: "GC mark termination", 1497 stwGCSweepTerm: "GC sweep termination", 1498 stwWriteHeapDump: "write heap dump", 1499 stwGoroutineProfile: "goroutine profile", 1500 stwGoroutineProfileCleanup: "goroutine profile cleanup", 1501 stwAllGoroutinesStack: "all goroutines stack trace", 1502 stwReadMemStats: "read mem stats", 1503 stwAllThreadsSyscall: "AllThreadsSyscall", 1504 stwGOMAXPROCS: "GOMAXPROCS", 1505 stwStartTrace: "start trace", 1506 stwStopTrace: "stop trace", 1507 stwForTestCountPagesInUse: "CountPagesInUse (test)", 1508 stwForTestReadMetricsSlow: "ReadMetricsSlow (test)", 1509 stwForTestReadMemStatsSlow: "ReadMemStatsSlow (test)", 1510 stwForTestPageCachePagesLeaked: "PageCachePagesLeaked (test)", 1511 stwForTestResetDebugLog: "ResetDebugLog (test)", 1512 } 1513 1514 // worldStop provides context from the stop-the-world required by the 1515 // start-the-world. 1516 type worldStop struct { 1517 reason stwReason 1518 startedStopping int64 1519 finishedStopping int64 1520 stoppingCPUTime int64 1521 } 1522 1523 // Temporary variable for stopTheWorld, when it can't write to the stack. 1524 // 1525 // Protected by worldsema. 1526 var stopTheWorldContext worldStop 1527 1528 // stopTheWorld stops all P's from executing goroutines, interrupting 1529 // all goroutines at GC safe points and records reason as the reason 1530 // for the stop. On return, only the current goroutine's P is running. 1531 // stopTheWorld must not be called from a system stack and the caller 1532 // must not hold worldsema. The caller must call startTheWorld when 1533 // other P's should resume execution. 1534 // 1535 // stopTheWorld is safe for multiple goroutines to call at the 1536 // same time. Each will execute its own stop, and the stops will 1537 // be serialized. 1538 // 1539 // This is also used by routines that do stack dumps. If the system is 1540 // in panic or being exited, this may not reliably stop all 1541 // goroutines. 1542 // 1543 // Returns the STW context. When starting the world, this context must be 1544 // passed to startTheWorld. 1545 func stopTheWorld(reason stwReason) worldStop { 1546 semacquire(&worldsema) 1547 gp := getg() 1548 gp.m.preemptoff = reason.String() 1549 systemstack(func() { 1550 stopTheWorldContext = stopTheWorldWithSema(reason) // avoid write to stack 1551 }) 1552 return stopTheWorldContext 1553 } 1554 1555 // startTheWorld undoes the effects of stopTheWorld. 1556 // 1557 // w must be the worldStop returned by stopTheWorld. 1558 func startTheWorld(w worldStop) { 1559 systemstack(func() { startTheWorldWithSema(0, w) }) 1560 1561 // worldsema must be held over startTheWorldWithSema to ensure 1562 // gomaxprocs cannot change while worldsema is held. 1563 // 1564 // Release worldsema with direct handoff to the next waiter, but 1565 // acquirem so that semrelease1 doesn't try to yield our time. 1566 // 1567 // Otherwise if e.g. ReadMemStats is being called in a loop, 1568 // it might stomp on other attempts to stop the world, such as 1569 // for starting or ending GC. The operation this blocks is 1570 // so heavy-weight that we should just try to be as fair as 1571 // possible here. 1572 // 1573 // We don't want to just allow us to get preempted between now 1574 // and releasing the semaphore because then we keep everyone 1575 // (including, for example, GCs) waiting longer. 1576 mp := acquirem() 1577 mp.preemptoff = "" 1578 semrelease1(&worldsema, true, 0) 1579 releasem(mp) 1580 } 1581 1582 // stopTheWorldGC has the same effect as stopTheWorld, but blocks 1583 // until the GC is not running. It also blocks a GC from starting 1584 // until startTheWorldGC is called. 1585 func stopTheWorldGC(reason stwReason) worldStop { 1586 semacquire(&gcsema) 1587 return stopTheWorld(reason) 1588 } 1589 1590 // startTheWorldGC undoes the effects of stopTheWorldGC. 1591 // 1592 // w must be the worldStop returned by stopTheWorld. 1593 func startTheWorldGC(w worldStop) { 1594 startTheWorld(w) 1595 semrelease(&gcsema) 1596 } 1597 1598 // Holding worldsema grants an M the right to try to stop the world. 1599 var worldsema uint32 = 1 1600 1601 // Holding gcsema grants the M the right to block a GC, and blocks 1602 // until the current GC is done. In particular, it prevents gomaxprocs 1603 // from changing concurrently. 1604 // 1605 // TODO(mknyszek): Once gomaxprocs and the execution tracer can handle 1606 // being changed/enabled during a GC, remove this. 1607 var gcsema uint32 = 1 1608 1609 // stopTheWorldWithSema is the core implementation of stopTheWorld. 1610 // The caller is responsible for acquiring worldsema and disabling 1611 // preemption first and then should stopTheWorldWithSema on the system 1612 // stack: 1613 // 1614 // semacquire(&worldsema, 0) 1615 // m.preemptoff = "reason" 1616 // var stw worldStop 1617 // systemstack(func() { 1618 // stw = stopTheWorldWithSema(reason) 1619 // }) 1620 // 1621 // When finished, the caller must either call startTheWorld or undo 1622 // these three operations separately: 1623 // 1624 // m.preemptoff = "" 1625 // systemstack(func() { 1626 // now = startTheWorldWithSema(stw) 1627 // }) 1628 // semrelease(&worldsema) 1629 // 1630 // It is allowed to acquire worldsema once and then execute multiple 1631 // startTheWorldWithSema/stopTheWorldWithSema pairs. 1632 // Other P's are able to execute between successive calls to 1633 // startTheWorldWithSema and stopTheWorldWithSema. 1634 // Holding worldsema causes any other goroutines invoking 1635 // stopTheWorld to block. 1636 // 1637 // Returns the STW context. When starting the world, this context must be 1638 // passed to startTheWorldWithSema. 1639 // 1640 //go:systemstack 1641 func stopTheWorldWithSema(reason stwReason) worldStop { 1642 // Mark the goroutine which called stopTheWorld preemptible so its 1643 // stack may be scanned by the GC or observed by the execution tracer. 1644 // 1645 // This lets a mark worker scan us or the execution tracer take our 1646 // stack while we try to stop the world since otherwise we could get 1647 // in a mutual preemption deadlock. 1648 // 1649 // casGToWaitingForSuspendG marks the goroutine as ineligible for a 1650 // stack shrink, effectively pinning the stack in memory for the duration. 1651 // 1652 // N.B. The execution tracer is not aware of this status transition and 1653 // handles it specially based on the wait reason. 1654 casGToWaitingForSuspendG(getg().m.curg, _Grunning, waitReasonStoppingTheWorld) 1655 1656 trace := traceAcquire() 1657 if trace.ok() { 1658 trace.STWStart(reason) 1659 traceRelease(trace) 1660 } 1661 gp := getg() 1662 1663 // If we hold a lock, then we won't be able to stop another M 1664 // that is blocked trying to acquire the lock. 1665 if gp.m.locks > 0 { 1666 throw("stopTheWorld: holding locks") 1667 } 1668 1669 lock(&sched.lock) 1670 start := nanotime() // exclude time waiting for sched.lock from start and total time metrics. 1671 sched.stopwait = gomaxprocs 1672 sched.gcwaiting.Store(true) 1673 preemptall() 1674 1675 // Stop current P. 1676 gp.m.p.ptr().status = _Pgcstop // Pgcstop is only diagnostic. 1677 gp.m.p.ptr().gcStopTime = start 1678 sched.stopwait-- 1679 1680 // Try to retake all P's in syscalls. 1681 for _, pp := range allp { 1682 if thread, ok := setBlockOnExitSyscall(pp); ok { 1683 thread.gcstopP() 1684 thread.resume() 1685 } 1686 } 1687 1688 // Stop idle Ps. 1689 now := nanotime() 1690 for { 1691 pp, _ := pidleget(now) 1692 if pp == nil { 1693 break 1694 } 1695 pp.status = _Pgcstop 1696 pp.gcStopTime = nanotime() 1697 sched.stopwait-- 1698 } 1699 wait := sched.stopwait > 0 1700 unlock(&sched.lock) 1701 1702 // Wait for remaining Ps to stop voluntarily. 1703 if wait { 1704 for { 1705 // wait for 100us, then try to re-preempt in case of any races 1706 if notetsleep(&sched.stopnote, 100*1000) { 1707 noteclear(&sched.stopnote) 1708 break 1709 } 1710 preemptall() 1711 } 1712 } 1713 1714 finish := nanotime() 1715 startTime := finish - start 1716 if reason.isGC() { 1717 sched.stwStoppingTimeGC.record(startTime) 1718 } else { 1719 sched.stwStoppingTimeOther.record(startTime) 1720 } 1721 1722 // Double-check we actually stopped everything, and all the invariants hold. 1723 // Also accumulate all the time spent by each P in _Pgcstop up to the point 1724 // where everything was stopped. This will be accumulated into the total pause 1725 // CPU time by the caller. 1726 stoppingCPUTime := int64(0) 1727 bad := "" 1728 if sched.stopwait != 0 { 1729 bad = "stopTheWorld: not stopped (stopwait != 0)" 1730 } else { 1731 for _, pp := range allp { 1732 if pp.status != _Pgcstop { 1733 bad = "stopTheWorld: not stopped (status != _Pgcstop)" 1734 } 1735 if pp.gcStopTime == 0 && bad == "" { 1736 bad = "stopTheWorld: broken CPU time accounting" 1737 } 1738 stoppingCPUTime += finish - pp.gcStopTime 1739 pp.gcStopTime = 0 1740 } 1741 } 1742 if freezing.Load() { 1743 // Some other thread is panicking. This can cause the 1744 // sanity checks above to fail if the panic happens in 1745 // the signal handler on a stopped thread. Either way, 1746 // we should halt this thread. 1747 lock(&deadlock) 1748 lock(&deadlock) 1749 } 1750 if bad != "" { 1751 throw(bad) 1752 } 1753 1754 worldStopped() 1755 1756 // Switch back to _Grunning, now that the world is stopped. 1757 casgstatus(getg().m.curg, _Gwaiting, _Grunning) 1758 1759 return worldStop{ 1760 reason: reason, 1761 startedStopping: start, 1762 finishedStopping: finish, 1763 stoppingCPUTime: stoppingCPUTime, 1764 } 1765 } 1766 1767 // reason is the same STW reason passed to stopTheWorld. start is the start 1768 // time returned by stopTheWorld. 1769 // 1770 // now is the current time; prefer to pass 0 to capture a fresh timestamp. 1771 // 1772 // stattTheWorldWithSema returns now. 1773 func startTheWorldWithSema(now int64, w worldStop) int64 { 1774 assertWorldStopped() 1775 1776 mp := acquirem() // disable preemption because it can be holding p in a local var 1777 if netpollinited() { 1778 list, delta := netpoll(0) // non-blocking 1779 injectglist(&list) 1780 netpollAdjustWaiters(delta) 1781 } 1782 lock(&sched.lock) 1783 1784 procs := gomaxprocs 1785 if newprocs != 0 { 1786 procs = newprocs 1787 newprocs = 0 1788 } 1789 p1 := procresize(procs) 1790 sched.gcwaiting.Store(false) 1791 if sched.sysmonwait.Load() { 1792 sched.sysmonwait.Store(false) 1793 notewakeup(&sched.sysmonnote) 1794 } 1795 unlock(&sched.lock) 1796 1797 worldStarted() 1798 1799 for p1 != nil { 1800 p := p1 1801 p1 = p1.link.ptr() 1802 if p.m != 0 { 1803 mp := p.m.ptr() 1804 p.m = 0 1805 if mp.nextp != 0 { 1806 throw("startTheWorld: inconsistent mp->nextp") 1807 } 1808 mp.nextp.set(p) 1809 notewakeup(&mp.park) 1810 } else { 1811 // Start M to run P. Do not start another M below. 1812 newm(nil, p, -1) 1813 } 1814 } 1815 1816 // Capture start-the-world time before doing clean-up tasks. 1817 if now == 0 { 1818 now = nanotime() 1819 } 1820 totalTime := now - w.startedStopping 1821 if w.reason.isGC() { 1822 sched.stwTotalTimeGC.record(totalTime) 1823 } else { 1824 sched.stwTotalTimeOther.record(totalTime) 1825 } 1826 trace := traceAcquire() 1827 if trace.ok() { 1828 trace.STWDone() 1829 traceRelease(trace) 1830 } 1831 1832 // Wakeup an additional proc in case we have excessive runnable goroutines 1833 // in local queues or in the global queue. If we don't, the proc will park itself. 1834 // If we have lots of excessive work, resetspinning will unpark additional procs as necessary. 1835 wakep() 1836 1837 releasem(mp) 1838 1839 return now 1840 } 1841 1842 // usesLibcall indicates whether this runtime performs system calls 1843 // via libcall. 1844 func usesLibcall() bool { 1845 switch GOOS { 1846 case "aix", "darwin", "illumos", "ios", "openbsd", "solaris", "windows": 1847 return true 1848 } 1849 return false 1850 } 1851 1852 // mStackIsSystemAllocated indicates whether this runtime starts on a 1853 // system-allocated stack. 1854 func mStackIsSystemAllocated() bool { 1855 switch GOOS { 1856 case "aix", "darwin", "plan9", "illumos", "ios", "openbsd", "solaris", "windows": 1857 return true 1858 } 1859 return false 1860 } 1861 1862 // mstart is the entry-point for new Ms. 1863 // It is written in assembly, uses ABI0, is marked TOPFRAME, and calls mstart0. 1864 func mstart() 1865 1866 // mstart0 is the Go entry-point for new Ms. 1867 // This must not split the stack because we may not even have stack 1868 // bounds set up yet. 1869 // 1870 // May run during STW (because it doesn't have a P yet), so write 1871 // barriers are not allowed. 1872 // 1873 //go:nosplit 1874 //go:nowritebarrierrec 1875 func mstart0() { 1876 gp := getg() 1877 1878 osStack := gp.stack.lo == 0 1879 if osStack { 1880 // Initialize stack bounds from system stack. 1881 // Cgo may have left stack size in stack.hi. 1882 // minit may update the stack bounds. 1883 // 1884 // Note: these bounds may not be very accurate. 1885 // We set hi to &size, but there are things above 1886 // it. The 1024 is supposed to compensate this, 1887 // but is somewhat arbitrary. 1888 size := gp.stack.hi 1889 if size == 0 { 1890 size = 16384 * sys.StackGuardMultiplier 1891 } 1892 gp.stack.hi = uintptr(noescape(unsafe.Pointer(&size))) 1893 gp.stack.lo = gp.stack.hi - size + 1024 1894 } 1895 // Initialize stack guard so that we can start calling regular 1896 // Go code. 1897 gp.stackguard0 = gp.stack.lo + stackGuard 1898 // This is the g0, so we can also call go:systemstack 1899 // functions, which check stackguard1. 1900 gp.stackguard1 = gp.stackguard0 1901 mstart1() 1902 1903 // Exit this thread. 1904 if mStackIsSystemAllocated() { 1905 // Windows, Solaris, illumos, Darwin, AIX and Plan 9 always system-allocate 1906 // the stack, but put it in gp.stack before mstart, 1907 // so the logic above hasn't set osStack yet. 1908 osStack = true 1909 } 1910 mexit(osStack) 1911 } 1912 1913 // The go:noinline is to guarantee the sys.GetCallerPC/sys.GetCallerSP below are safe, 1914 // so that we can set up g0.sched to return to the call of mstart1 above. 1915 // 1916 //go:noinline 1917 func mstart1() { 1918 gp := getg() 1919 1920 if gp != gp.m.g0 { 1921 throw("bad runtime·mstart") 1922 } 1923 1924 // Set up m.g0.sched as a label returning to just 1925 // after the mstart1 call in mstart0 above, for use by goexit0 and mcall. 1926 // We're never coming back to mstart1 after we call schedule, 1927 // so other calls can reuse the current frame. 1928 // And goexit0 does a gogo that needs to return from mstart1 1929 // and let mstart0 exit the thread. 1930 gp.sched.g = guintptr(unsafe.Pointer(gp)) 1931 gp.sched.pc = sys.GetCallerPC() 1932 gp.sched.sp = sys.GetCallerSP() 1933 1934 asminit() 1935 minit() 1936 1937 // Install signal handlers; after minit so that minit can 1938 // prepare the thread to be able to handle the signals. 1939 if gp.m == &m0 { 1940 mstartm0() 1941 } 1942 1943 if debug.dataindependenttiming == 1 { 1944 sys.EnableDIT() 1945 } 1946 1947 if fn := gp.m.mstartfn; fn != nil { 1948 fn() 1949 } 1950 1951 if gp.m != &m0 { 1952 acquirep(gp.m.nextp.ptr()) 1953 gp.m.nextp = 0 1954 } 1955 schedule() 1956 } 1957 1958 // mstartm0 implements part of mstart1 that only runs on the m0. 1959 // 1960 // Write barriers are allowed here because we know the GC can't be 1961 // running yet, so they'll be no-ops. 1962 // 1963 //go:yeswritebarrierrec 1964 func mstartm0() { 1965 // Create an extra M for callbacks on threads not created by Go. 1966 // An extra M is also needed on Windows for callbacks created by 1967 // syscall.NewCallback. See issue #6751 for details. 1968 if (iscgo || GOOS == "windows") && !cgoHasExtraM { 1969 cgoHasExtraM = true 1970 newextram() 1971 } 1972 initsig(false) 1973 } 1974 1975 // mPark causes a thread to park itself, returning once woken. 1976 // 1977 //go:nosplit 1978 func mPark() { 1979 gp := getg() 1980 // This M might stay parked through an entire GC cycle. 1981 // Erase any leftovers on the signal stack. 1982 if goexperiment.RuntimeSecret { 1983 eraseSecretsSignalStk() 1984 } 1985 notesleep(&gp.m.park) 1986 noteclear(&gp.m.park) 1987 } 1988 1989 // mexit tears down and exits the current thread. 1990 // 1991 // Don't call this directly to exit the thread, since it must run at 1992 // the top of the thread stack. Instead, use gogo(&gp.m.g0.sched) to 1993 // unwind the stack to the point that exits the thread. 1994 // 1995 // It is entered with m.p != nil, so write barriers are allowed. It 1996 // will release the P before exiting. 1997 // 1998 //go:yeswritebarrierrec 1999 func mexit(osStack bool) { 2000 mp := getg().m 2001 2002 if mp == &m0 { 2003 // This is the main thread. Just wedge it. 2004 // 2005 // On Linux, exiting the main thread puts the process 2006 // into a non-waitable zombie state. On Plan 9, 2007 // exiting the main thread unblocks wait even though 2008 // other threads are still running. On Solaris we can 2009 // neither exitThread nor return from mstart. Other 2010 // bad things probably happen on other platforms. 2011 // 2012 // We could try to clean up this M more before wedging 2013 // it, but that complicates signal handling. 2014 handoffp(releasep()) 2015 lock(&sched.lock) 2016 sched.nmfreed++ 2017 checkdead() 2018 unlock(&sched.lock) 2019 mPark() 2020 throw("locked m0 woke up") 2021 } 2022 2023 sigblock(true) 2024 unminit() 2025 2026 // Free the gsignal stack. 2027 if mp.gsignal != nil { 2028 stackfree(mp.gsignal.stack) 2029 if valgrindenabled { 2030 valgrindDeregisterStack(mp.gsignal.valgrindStackID) 2031 mp.gsignal.valgrindStackID = 0 2032 } 2033 // On some platforms, when calling into VDSO (e.g. nanotime) 2034 // we store our g on the gsignal stack, if there is one. 2035 // Now the stack is freed, unlink it from the m, so we 2036 // won't write to it when calling VDSO code. 2037 mp.gsignal = nil 2038 } 2039 2040 // Free vgetrandom state. 2041 vgetrandomDestroy(mp) 2042 2043 // Clear the self pointer so Ps don't access this M after it is freed, 2044 // or keep it alive. 2045 mp.self.clear() 2046 2047 // Remove m from allm. 2048 lock(&sched.lock) 2049 for pprev := &allm; *pprev != nil; pprev = &(*pprev).alllink { 2050 if *pprev == mp { 2051 *pprev = mp.alllink 2052 goto found 2053 } 2054 } 2055 throw("m not found in allm") 2056 found: 2057 // Events must not be traced after this point. 2058 2059 // Delay reaping m until it's done with the stack. 2060 // 2061 // Put mp on the free list, though it will not be reaped while freeWait 2062 // is freeMWait. mp is no longer reachable via allm, so even if it is 2063 // on an OS stack, we must keep a reference to mp alive so that the GC 2064 // doesn't free mp while we are still using it. 2065 // 2066 // Note that the free list must not be linked through alllink because 2067 // some functions walk allm without locking, so may be using alllink. 2068 // 2069 // N.B. It's important that the M appears on the free list simultaneously 2070 // with it being removed so that the tracer can find it. 2071 mp.freeWait.Store(freeMWait) 2072 mp.freelink = sched.freem 2073 sched.freem = mp 2074 unlock(&sched.lock) 2075 2076 atomic.Xadd64(&ncgocall, int64(mp.ncgocall)) 2077 sched.totalRuntimeLockWaitTime.Add(mp.mLockProfile.waitTime.Load()) 2078 2079 // Release the P. 2080 handoffp(releasep()) 2081 // After this point we must not have write barriers. 2082 2083 // Invoke the deadlock detector. This must happen after 2084 // handoffp because it may have started a new M to take our 2085 // P's work. 2086 lock(&sched.lock) 2087 sched.nmfreed++ 2088 checkdead() 2089 unlock(&sched.lock) 2090 2091 if GOOS == "darwin" || GOOS == "ios" { 2092 // Make sure pendingPreemptSignals is correct when an M exits. 2093 // For #41702. 2094 if mp.signalPending.Load() != 0 { 2095 pendingPreemptSignals.Add(-1) 2096 } 2097 } 2098 2099 // Destroy all allocated resources. After this is called, we may no 2100 // longer take any locks. 2101 mdestroy(mp) 2102 2103 if osStack { 2104 // No more uses of mp, so it is safe to drop the reference. 2105 mp.freeWait.Store(freeMRef) 2106 2107 // Return from mstart and let the system thread 2108 // library free the g0 stack and terminate the thread. 2109 return 2110 } 2111 2112 // mstart is the thread's entry point, so there's nothing to 2113 // return to. Exit the thread directly. exitThread will clear 2114 // m.freeWait when it's done with the stack and the m can be 2115 // reaped. 2116 exitThread(&mp.freeWait) 2117 } 2118 2119 // forEachP calls fn(p) for every P p when p reaches a GC safe point. 2120 // If a P is currently executing code, this will bring the P to a GC 2121 // safe point and execute fn on that P. If the P is not executing code 2122 // (it is idle or in a syscall), this will call fn(p) directly while 2123 // preventing the P from exiting its state. This does not ensure that 2124 // fn will run on every CPU executing Go code, but it acts as a global 2125 // memory barrier. GC uses this as a "ragged barrier." 2126 // 2127 // The caller must hold worldsema. fn must not refer to any 2128 // part of the current goroutine's stack, since the GC may move it. 2129 func forEachP(reason waitReason, fn func(*p)) { 2130 systemstack(func() { 2131 gp := getg().m.curg 2132 // Mark the user stack as preemptible so that it may be scanned 2133 // by the GC or observed by the execution tracer. Otherwise, our 2134 // attempt to force all P's to a safepoint could result in a 2135 // deadlock as we attempt to preempt a goroutine that's trying 2136 // to preempt us (e.g. for a stack scan). 2137 // 2138 // casGToWaitingForSuspendG marks the goroutine as ineligible for a 2139 // stack shrink, effectively pinning the stack in memory for the duration. 2140 // 2141 // N.B. The execution tracer is not aware of this status transition and 2142 // handles it specially based on the wait reason. 2143 casGToWaitingForSuspendG(gp, _Grunning, reason) 2144 forEachPInternal(fn) 2145 casgstatus(gp, _Gwaiting, _Grunning) 2146 }) 2147 } 2148 2149 // forEachPInternal calls fn(p) for every P p when p reaches a GC safe point. 2150 // It is the internal implementation of forEachP. 2151 // 2152 // The caller must hold worldsema and either must ensure that a GC is not 2153 // running (otherwise this may deadlock with the GC trying to preempt this P) 2154 // or it must leave its goroutine in a preemptible state before it switches 2155 // to the systemstack. Due to these restrictions, prefer forEachP when possible. 2156 // 2157 //go:systemstack 2158 func forEachPInternal(fn func(*p)) { 2159 mp := acquirem() 2160 pp := getg().m.p.ptr() 2161 2162 lock(&sched.lock) 2163 if sched.safePointWait != 0 { 2164 throw("forEachP: sched.safePointWait != 0") 2165 } 2166 sched.safePointWait = gomaxprocs - 1 2167 sched.safePointFn = fn 2168 2169 // Ask all Ps to run the safe point function. 2170 for _, p2 := range allp { 2171 if p2 != pp { 2172 atomic.Store(&p2.runSafePointFn, 1) 2173 } 2174 } 2175 preemptall() 2176 2177 // Any P entering _Pidle or a system call from now on will observe 2178 // p.runSafePointFn == 1 and will call runSafePointFn when 2179 // changing its status to _Pidle. 2180 2181 // Run safe point function for all idle Ps. sched.pidle will 2182 // not change because we hold sched.lock. 2183 for p := sched.pidle.ptr(); p != nil; p = p.link.ptr() { 2184 if atomic.Cas(&p.runSafePointFn, 1, 0) { 2185 fn(p) 2186 sched.safePointWait-- 2187 } 2188 } 2189 2190 wait := sched.safePointWait > 0 2191 unlock(&sched.lock) 2192 2193 // Run fn for the current P. 2194 fn(pp) 2195 2196 // Force Ps currently in a system call into _Pidle and hand them 2197 // off to induce safe point function execution. 2198 for _, p2 := range allp { 2199 if atomic.Load(&p2.runSafePointFn) != 1 { 2200 // Already ran it. 2201 continue 2202 } 2203 if thread, ok := setBlockOnExitSyscall(p2); ok { 2204 thread.takeP() 2205 thread.resume() 2206 handoffp(p2) 2207 } 2208 } 2209 2210 // Wait for remaining Ps to run fn. 2211 if wait { 2212 for { 2213 // Wait for 100us, then try to re-preempt in 2214 // case of any races. 2215 // 2216 // Requires system stack. 2217 if notetsleep(&sched.safePointNote, 100*1000) { 2218 noteclear(&sched.safePointNote) 2219 break 2220 } 2221 preemptall() 2222 } 2223 } 2224 if sched.safePointWait != 0 { 2225 throw("forEachP: not done") 2226 } 2227 for _, p2 := range allp { 2228 if p2.runSafePointFn != 0 { 2229 throw("forEachP: P did not run fn") 2230 } 2231 } 2232 2233 lock(&sched.lock) 2234 sched.safePointFn = nil 2235 unlock(&sched.lock) 2236 releasem(mp) 2237 } 2238 2239 // runSafePointFn runs the safe point function, if any, for this P. 2240 // This should be called like 2241 // 2242 // if getg().m.p.runSafePointFn != 0 { 2243 // runSafePointFn() 2244 // } 2245 // 2246 // runSafePointFn must be checked on any transition in to _Pidle or 2247 // when entering a system call to avoid a race where forEachP sees 2248 // that the P is running just before the P goes into _Pidle/system call 2249 // and neither forEachP nor the P run the safe-point function. 2250 func runSafePointFn() { 2251 p := getg().m.p.ptr() 2252 // Resolve the race between forEachP running the safe-point 2253 // function on this P's behalf and this P running the 2254 // safe-point function directly. 2255 if !atomic.Cas(&p.runSafePointFn, 1, 0) { 2256 return 2257 } 2258 sched.safePointFn(p) 2259 lock(&sched.lock) 2260 sched.safePointWait-- 2261 if sched.safePointWait == 0 { 2262 notewakeup(&sched.safePointNote) 2263 } 2264 unlock(&sched.lock) 2265 } 2266 2267 // When running with cgo, we call _cgo_thread_start 2268 // to start threads for us so that we can play nicely with 2269 // foreign code. 2270 var cgoThreadStart unsafe.Pointer 2271 2272 type cgothreadstart struct { 2273 g guintptr 2274 tls *uint64 2275 fn unsafe.Pointer 2276 } 2277 2278 // Allocate a new m unassociated with any thread. 2279 // Can use p for allocation context if needed. 2280 // fn is recorded as the new m's m.mstartfn. 2281 // id is optional pre-allocated m ID. Omit by passing -1. 2282 // 2283 // This function is allowed to have write barriers even if the caller 2284 // isn't because it borrows pp. 2285 // 2286 //go:yeswritebarrierrec 2287 func allocm(pp *p, fn func(), id int64) *m { 2288 allocmLock.rlock() 2289 2290 // The caller owns pp, but we may borrow (i.e., acquirep) it. We must 2291 // disable preemption to ensure it is not stolen, which would make the 2292 // caller lose ownership. 2293 acquirem() 2294 2295 gp := getg() 2296 if gp.m.p == 0 { 2297 acquirep(pp) // temporarily borrow p for mallocs in this function 2298 } 2299 2300 // Release the free M list. We need to do this somewhere and 2301 // this may free up a stack we can use. 2302 if sched.freem != nil { 2303 lock(&sched.lock) 2304 var newList *m 2305 for freem := sched.freem; freem != nil; { 2306 // Wait for freeWait to indicate that freem's stack is unused. 2307 wait := freem.freeWait.Load() 2308 if wait == freeMWait { 2309 next := freem.freelink 2310 freem.freelink = newList 2311 newList = freem 2312 freem = next 2313 continue 2314 } 2315 // Drop any remaining trace resources. 2316 // Ms can continue to emit events all the way until wait != freeMWait, 2317 // so it's only safe to call traceThreadDestroy at this point. 2318 if traceEnabled() || traceShuttingDown() { 2319 traceThreadDestroy(freem) 2320 } 2321 // Free the stack if needed. For freeMRef, there is 2322 // nothing to do except drop freem from the sched.freem 2323 // list. 2324 if wait == freeMStack { 2325 // stackfree must be on the system stack, but allocm is 2326 // reachable off the system stack transitively from 2327 // startm. 2328 systemstack(func() { 2329 stackfree(freem.g0.stack) 2330 if valgrindenabled { 2331 valgrindDeregisterStack(freem.g0.valgrindStackID) 2332 freem.g0.valgrindStackID = 0 2333 } 2334 }) 2335 } 2336 freem = freem.freelink 2337 } 2338 sched.freem = newList 2339 unlock(&sched.lock) 2340 } 2341 2342 mp := &new(mPadded).m 2343 mp.mstartfn = fn 2344 mcommoninit(mp, id) 2345 2346 // In case of cgo or Solaris or illumos or Darwin, pthread_create will make us a stack. 2347 // Windows and Plan 9 will layout sched stack on OS stack. 2348 if iscgo || mStackIsSystemAllocated() { 2349 mp.g0 = malg(-1) 2350 } else { 2351 mp.g0 = malg(16384 * sys.StackGuardMultiplier) 2352 } 2353 mp.g0.m = mp 2354 2355 if pp == gp.m.p.ptr() { 2356 releasep() 2357 } 2358 2359 releasem(gp.m) 2360 allocmLock.runlock() 2361 return mp 2362 } 2363 2364 // needm is called when a cgo callback happens on a 2365 // thread without an m (a thread not created by Go). 2366 // In this case, needm is expected to find an m to use 2367 // and return with m, g initialized correctly. 2368 // Since m and g are not set now (likely nil, but see below) 2369 // needm is limited in what routines it can call. In particular 2370 // it can only call nosplit functions (textflag 7) and cannot 2371 // do any scheduling that requires an m. 2372 // 2373 // In order to avoid needing heavy lifting here, we adopt 2374 // the following strategy: there is a stack of available m's 2375 // that can be stolen. Using compare-and-swap 2376 // to pop from the stack has ABA races, so we simulate 2377 // a lock by doing an exchange (via Casuintptr) to steal the stack 2378 // head and replace the top pointer with MLOCKED (1). 2379 // This serves as a simple spin lock that we can use even 2380 // without an m. The thread that locks the stack in this way 2381 // unlocks the stack by storing a valid stack head pointer. 2382 // 2383 // In order to make sure that there is always an m structure 2384 // available to be stolen, we maintain the invariant that there 2385 // is always one more than needed. At the beginning of the 2386 // program (if cgo is in use) the list is seeded with a single m. 2387 // If needm finds that it has taken the last m off the list, its job 2388 // is - once it has installed its own m so that it can do things like 2389 // allocate memory - to create a spare m and put it on the list. 2390 // 2391 // Each of these extra m's also has a g0 and a curg that are 2392 // pressed into service as the scheduling stack and current 2393 // goroutine for the duration of the cgo callback. 2394 // 2395 // It calls dropm to put the m back on the list, 2396 // 1. when the callback is done with the m in non-pthread platforms, 2397 // 2. or when the C thread exiting on pthread platforms. 2398 // 2399 // The signal argument indicates whether we're called from a signal 2400 // handler. 2401 // 2402 //go:nosplit 2403 func needm(signal bool) { 2404 if (iscgo || GOOS == "windows") && !cgoHasExtraM { 2405 // Can happen if C/C++ code calls Go from a global ctor. 2406 // Can also happen on Windows if a global ctor uses a 2407 // callback created by syscall.NewCallback. See issue #6751 2408 // for details. 2409 // 2410 // Can not throw, because scheduler is not initialized yet. 2411 writeErrStr("fatal error: cgo callback before cgo call\n") 2412 exit(1) 2413 } 2414 2415 // Save and block signals before getting an M. 2416 // The signal handler may call needm itself, 2417 // and we must avoid a deadlock. Also, once g is installed, 2418 // any incoming signals will try to execute, 2419 // but we won't have the sigaltstack settings and other data 2420 // set up appropriately until the end of minit, which will 2421 // unblock the signals. This is the same dance as when 2422 // starting a new m to run Go code via newosproc. 2423 var sigmask sigset 2424 sigsave(&sigmask) 2425 sigblock(false) 2426 2427 // getExtraM is safe here because of the invariant above, 2428 // that the extra list always contains or will soon contain 2429 // at least one m. 2430 mp, last := getExtraM() 2431 2432 // Set needextram when we've just emptied the list, 2433 // so that the eventual call into cgocallbackg will 2434 // allocate a new m for the extra list. We delay the 2435 // allocation until then so that it can be done 2436 // after exitsyscall makes sure it is okay to be 2437 // running at all (that is, there's no garbage collection 2438 // running right now). 2439 mp.needextram = last 2440 2441 // Store the original signal mask for use by minit. 2442 mp.sigmask = sigmask 2443 2444 // Install TLS on some platforms (previously setg 2445 // would do this if necessary). 2446 osSetupTLS(mp) 2447 2448 // Install g (= m->g0) and set the stack bounds 2449 // to match the current stack. 2450 setg(mp.g0) 2451 sp := sys.GetCallerSP() 2452 callbackUpdateSystemStack(mp, sp, signal) 2453 2454 // We must mark that we are already in Go now. 2455 // Otherwise, we may call needm again when we get a signal, before cgocallbackg1, 2456 // which means the extram list may be empty, that will cause a deadlock. 2457 mp.isExtraInC = false 2458 2459 // Initialize this thread to use the m. 2460 asminit() 2461 minit() 2462 2463 // Emit a trace event for this dead -> syscall transition, 2464 // but only if we're not in a signal handler. 2465 // 2466 // N.B. the tracer can run on a bare M just fine, we just have 2467 // to make sure to do this before setg(nil) and unminit. 2468 var trace traceLocker 2469 if !signal { 2470 trace = traceAcquire() 2471 } 2472 2473 // mp.curg is now a real goroutine. 2474 casgstatus(mp.curg, _Gdeadextra, _Gsyscall) 2475 sched.ngsys.Add(-1) 2476 2477 // This is technically inaccurate, but we set isExtraInC to false above, 2478 // and so we need to update addGSyscallNoP to keep the two pieces of state 2479 // consistent (it's only updated when isExtraInC is false). More specifically, 2480 // When we get to cgocallbackg and exitsyscall, we'll be looking for a P, and 2481 // since isExtraInC is false, we will decrement this metric. 2482 // 2483 // The inaccuracy is thankfully transient: only until this thread can get a P. 2484 // We're going into Go anyway, so it's okay to pretend we're a real goroutine now. 2485 addGSyscallNoP(mp) 2486 2487 if !signal { 2488 if trace.ok() { 2489 trace.GoCreateSyscall(mp.curg) 2490 traceRelease(trace) 2491 } 2492 } 2493 mp.isExtraInSig = signal 2494 } 2495 2496 // Acquire an extra m and bind it to the C thread when a pthread key has been created. 2497 // 2498 //go:nosplit 2499 func needAndBindM() { 2500 needm(false) 2501 2502 if _cgo_pthread_key_created != nil && *(*uintptr)(_cgo_pthread_key_created) != 0 { 2503 cgoBindM() 2504 } 2505 } 2506 2507 // newextram allocates m's and puts them on the extra list. 2508 // It is called with a working local m, so that it can do things 2509 // like call schedlock and allocate. 2510 func newextram() { 2511 c := extraMWaiters.Swap(0) 2512 if c > 0 { 2513 for i := uint32(0); i < c; i++ { 2514 oneNewExtraM() 2515 } 2516 } else if extraMLength.Load() == 0 { 2517 // Make sure there is at least one extra M. 2518 oneNewExtraM() 2519 } 2520 } 2521 2522 // oneNewExtraM allocates an m and puts it on the extra list. 2523 func oneNewExtraM() { 2524 // Create extra goroutine locked to extra m. 2525 // The goroutine is the context in which the cgo callback will run. 2526 // The sched.pc will never be returned to, but setting it to 2527 // goexit makes clear to the traceback routines where 2528 // the goroutine stack ends. 2529 mp := allocm(nil, nil, -1) 2530 gp := malg(4096) 2531 gp.sched.pc = abi.FuncPCABI0(goexit) + sys.PCQuantum 2532 gp.sched.sp = gp.stack.hi 2533 gp.sched.sp -= 4 * goarch.PtrSize // extra space in case of reads slightly beyond frame 2534 gp.sched.lr = 0 2535 gp.sched.g = guintptr(unsafe.Pointer(gp)) 2536 gp.syscallpc = gp.sched.pc 2537 gp.syscallsp = gp.sched.sp 2538 gp.stktopsp = gp.sched.sp 2539 // malg returns status as _Gidle. Change to _Gdeadextra before 2540 // adding to allg where GC can see it. _Gdeadextra hides this 2541 // from traceback and stack scans. 2542 casgstatus(gp, _Gidle, _Gdeadextra) 2543 gp.m = mp 2544 mp.curg = gp 2545 mp.isextra = true 2546 // mark we are in C by default. 2547 mp.isExtraInC = true 2548 mp.lockedInt++ 2549 mp.lockedg.set(gp) 2550 gp.lockedm.set(mp) 2551 gp.goid = sched.goidgen.Add(1) 2552 if raceenabled { 2553 gp.racectx = racegostart(abi.FuncPCABIInternal(newextram) + sys.PCQuantum) 2554 } 2555 // put on allg for garbage collector 2556 allgadd(gp) 2557 2558 // gp is now on the allg list, but we don't want it to be 2559 // counted by gcount. It would be more "proper" to increment 2560 // sched.ngfree, but that requires locking. Incrementing ngsys 2561 // has the same effect. 2562 sched.ngsys.Add(1) 2563 2564 // Add m to the extra list. 2565 addExtraM(mp) 2566 } 2567 2568 // dropm puts the current m back onto the extra list. 2569 // 2570 // 1. On systems without pthreads, like Windows 2571 // dropm is called when a cgo callback has called needm but is now 2572 // done with the callback and returning back into the non-Go thread. 2573 // 2574 // The main expense here is the call to signalstack to release the 2575 // m's signal stack, and then the call to needm on the next callback 2576 // from this thread. It is tempting to try to save the m for next time, 2577 // which would eliminate both these costs, but there might not be 2578 // a next time: the current thread (which Go does not control) might exit. 2579 // If we saved the m for that thread, there would be an m leak each time 2580 // such a thread exited. Instead, we acquire and release an m on each 2581 // call. These should typically not be scheduling operations, just a few 2582 // atomics, so the cost should be small. 2583 // 2584 // 2. On systems with pthreads 2585 // dropm is called while a non-Go thread is exiting. 2586 // We allocate a pthread per-thread variable using pthread_key_create, 2587 // to register a thread-exit-time destructor. 2588 // And store the g into a thread-specific value associated with the pthread key, 2589 // when first return back to C. 2590 // So that the destructor would invoke dropm while the non-Go thread is exiting. 2591 // This is much faster since it avoids expensive signal-related syscalls. 2592 // 2593 // This may run without a P, so //go:nowritebarrierrec is required. 2594 // 2595 // This may run with a different stack than was recorded in g0 (there is no 2596 // call to callbackUpdateSystemStack prior to dropm), so this must be 2597 // //go:nosplit to avoid the stack bounds check. 2598 // 2599 //go:nowritebarrierrec 2600 //go:nosplit 2601 func dropm() { 2602 // Clear m and g, and return m to the extra list. 2603 // After the call to setg we can only call nosplit functions 2604 // with no pointer manipulation. 2605 mp := getg().m 2606 2607 // Emit a trace event for this syscall -> dead transition. 2608 // 2609 // N.B. the tracer can run on a bare M just fine, we just have 2610 // to make sure to do this before setg(nil) and unminit. 2611 var trace traceLocker 2612 if !mp.isExtraInSig { 2613 trace = traceAcquire() 2614 } 2615 2616 // Return mp.curg to _Gdeadextra state. 2617 casgstatus(mp.curg, _Gsyscall, _Gdeadextra) 2618 mp.curg.preemptStop = false 2619 sched.ngsys.Add(1) 2620 decGSyscallNoP(mp) 2621 2622 if !mp.isExtraInSig { 2623 if trace.ok() { 2624 trace.GoDestroySyscall() 2625 traceRelease(trace) 2626 } 2627 } 2628 2629 // Trash syscalltick so that it doesn't line up with mp.old.syscalltick anymore. 2630 // 2631 // In the new tracer, we model needm and dropm and a goroutine being created and 2632 // destroyed respectively. The m then might get reused with a different procid but 2633 // still with a reference to oldp, and still with the same syscalltick. The next 2634 // time a G is "created" in needm, it'll return and quietly reacquire its P from a 2635 // different m with a different procid, which will confuse the trace parser. By 2636 // trashing syscalltick, we ensure that it'll appear as if we lost the P to the 2637 // tracer parser and that we just reacquired it. 2638 // 2639 // Trash the value by decrementing because that gets us as far away from the value 2640 // the syscall exit code expects as possible. Setting to zero is risky because 2641 // syscalltick could already be zero (and in fact, is initialized to zero). 2642 mp.syscalltick-- 2643 2644 // Reset trace state unconditionally. This goroutine is being 'destroyed' 2645 // from the perspective of the tracer. 2646 mp.curg.trace.reset() 2647 2648 // Flush all the M's buffers. This is necessary because the M might 2649 // be used on a different thread with a different procid, so we have 2650 // to make sure we don't write into the same buffer. 2651 if traceEnabled() || traceShuttingDown() { 2652 // Acquire sched.lock across thread destruction. One of the invariants of the tracer 2653 // is that a thread cannot disappear from the tracer's view (allm or freem) without 2654 // it noticing, so it requires that sched.lock be held over traceThreadDestroy. 2655 // 2656 // This isn't strictly necessary in this case, because this thread never leaves allm, 2657 // but the critical section is short and dropm is rare on pthread platforms, so just 2658 // take the lock and play it safe. traceThreadDestroy also asserts that the lock is held. 2659 lock(&sched.lock) 2660 traceThreadDestroy(mp) 2661 unlock(&sched.lock) 2662 } 2663 mp.isExtraInSig = false 2664 2665 // Block signals before unminit. 2666 // Unminit unregisters the signal handling stack (but needs g on some systems). 2667 // Setg(nil) clears g, which is the signal handler's cue not to run Go handlers. 2668 // It's important not to try to handle a signal between those two steps. 2669 sigmask := mp.sigmask 2670 sigblock(false) 2671 unminit() 2672 2673 setg(nil) 2674 2675 // Clear g0 stack bounds to ensure that needm always refreshes the 2676 // bounds when reusing this M. 2677 g0 := mp.g0 2678 g0.stack.hi = 0 2679 g0.stack.lo = 0 2680 g0.stackguard0 = 0 2681 g0.stackguard1 = 0 2682 mp.g0StackAccurate = false 2683 2684 putExtraM(mp) 2685 2686 msigrestore(sigmask) 2687 } 2688 2689 // bindm store the g0 of the current m into a thread-specific value. 2690 // 2691 // We allocate a pthread per-thread variable using pthread_key_create, 2692 // to register a thread-exit-time destructor. 2693 // We are here setting the thread-specific value of the pthread key, to enable the destructor. 2694 // So that the pthread_key_destructor would dropm while the C thread is exiting. 2695 // 2696 // And the saved g will be used in pthread_key_destructor, 2697 // since the g stored in the TLS by Go might be cleared in some platforms, 2698 // before the destructor invoked, so, we restore g by the stored g, before dropm. 2699 // 2700 // We store g0 instead of m, to make the assembly code simpler, 2701 // since we need to restore g0 in runtime.cgocallback. 2702 // 2703 // On systems without pthreads, like Windows, bindm shouldn't be used. 2704 // 2705 // NOTE: this always runs without a P, so, nowritebarrierrec required. 2706 // 2707 //go:nosplit 2708 //go:nowritebarrierrec 2709 func cgoBindM() { 2710 if GOOS == "windows" || GOOS == "plan9" { 2711 fatal("bindm in unexpected GOOS") 2712 } 2713 g := getg() 2714 if g.m.g0 != g { 2715 fatal("the current g is not g0") 2716 } 2717 if _cgo_bindm != nil { 2718 asmcgocall(_cgo_bindm, unsafe.Pointer(g)) 2719 } 2720 } 2721 2722 // A helper function for EnsureDropM. 2723 // 2724 // getm should be an internal detail, 2725 // but widely used packages access it using linkname. 2726 // Notable members of the hall of shame include: 2727 // - fortio.org/log 2728 // 2729 // Do not remove or change the type signature. 2730 // See go.dev/issue/67401. 2731 // 2732 //go:linkname getm 2733 func getm() uintptr { 2734 return uintptr(unsafe.Pointer(getg().m)) 2735 } 2736 2737 var ( 2738 // Locking linked list of extra M's, via mp.schedlink. Must be accessed 2739 // only via lockextra/unlockextra. 2740 // 2741 // Can't be atomic.Pointer[m] because we use an invalid pointer as a 2742 // "locked" sentinel value. M's on this list remain visible to the GC 2743 // because their mp.curg is on allgs. 2744 extraM atomic.Uintptr 2745 // Number of M's in the extraM list. 2746 extraMLength atomic.Uint32 2747 // Number of waiters in lockextra. 2748 extraMWaiters atomic.Uint32 2749 2750 // Number of extra M's in use by threads. 2751 extraMInUse atomic.Uint32 2752 ) 2753 2754 // lockextra locks the extra list and returns the list head. 2755 // The caller must unlock the list by storing a new list head 2756 // to extram. If nilokay is true, then lockextra will 2757 // return a nil list head if that's what it finds. If nilokay is false, 2758 // lockextra will keep waiting until the list head is no longer nil. 2759 // 2760 //go:nosplit 2761 func lockextra(nilokay bool) *m { 2762 const locked = 1 2763 2764 incr := false 2765 for { 2766 old := extraM.Load() 2767 if old == locked { 2768 osyield_no_g() 2769 continue 2770 } 2771 if old == 0 && !nilokay { 2772 if !incr { 2773 // Add 1 to the number of threads 2774 // waiting for an M. 2775 // This is cleared by newextram. 2776 extraMWaiters.Add(1) 2777 incr = true 2778 } 2779 usleep_no_g(1) 2780 continue 2781 } 2782 if extraM.CompareAndSwap(old, locked) { 2783 return (*m)(unsafe.Pointer(old)) 2784 } 2785 osyield_no_g() 2786 continue 2787 } 2788 } 2789 2790 //go:nosplit 2791 func unlockextra(mp *m, delta int32) { 2792 extraMLength.Add(delta) 2793 extraM.Store(uintptr(unsafe.Pointer(mp))) 2794 } 2795 2796 // Return an M from the extra M list. Returns last == true if the list becomes 2797 // empty because of this call. 2798 // 2799 // Spins waiting for an extra M, so caller must ensure that the list always 2800 // contains or will soon contain at least one M. 2801 // 2802 //go:nosplit 2803 func getExtraM() (mp *m, last bool) { 2804 mp = lockextra(false) 2805 extraMInUse.Add(1) 2806 unlockextra(mp.schedlink.ptr(), -1) 2807 return mp, mp.schedlink.ptr() == nil 2808 } 2809 2810 // Returns an extra M back to the list. mp must be from getExtraM. Newly 2811 // allocated M's should use addExtraM. 2812 // 2813 //go:nosplit 2814 func putExtraM(mp *m) { 2815 extraMInUse.Add(-1) 2816 addExtraM(mp) 2817 } 2818 2819 // Adds a newly allocated M to the extra M list. 2820 // 2821 //go:nosplit 2822 func addExtraM(mp *m) { 2823 mnext := lockextra(true) 2824 mp.schedlink.set(mnext) 2825 unlockextra(mp, 1) 2826 } 2827 2828 var ( 2829 // allocmLock is locked for read when creating new Ms in allocm and their 2830 // addition to allm. Thus acquiring this lock for write blocks the 2831 // creation of new Ms. 2832 allocmLock rwmutex 2833 2834 // execLock serializes exec and clone to avoid bugs or unspecified 2835 // behaviour around exec'ing while creating/destroying threads. See 2836 // issue #19546. 2837 execLock rwmutex 2838 ) 2839 2840 // These errors are reported (via writeErrStr) by some OS-specific 2841 // versions of newosproc and newosproc0. 2842 const ( 2843 failthreadcreate = "runtime: failed to create new OS thread\n" 2844 failallocatestack = "runtime: failed to allocate stack for the new OS thread\n" 2845 ) 2846 2847 // newmHandoff contains a list of m structures that need new OS threads. 2848 // This is used by newm in situations where newm itself can't safely 2849 // start an OS thread. 2850 var newmHandoff struct { 2851 lock mutex 2852 2853 // newm points to a list of M structures that need new OS 2854 // threads. The list is linked through m.schedlink. 2855 newm muintptr 2856 2857 // waiting indicates that wake needs to be notified when an m 2858 // is put on the list. 2859 waiting bool 2860 wake note 2861 2862 // haveTemplateThread indicates that the templateThread has 2863 // been started. This is not protected by lock. Use cas to set 2864 // to 1. 2865 haveTemplateThread uint32 2866 } 2867 2868 // Create a new m. It will start off with a call to fn, or else the scheduler. 2869 // fn needs to be static and not a heap allocated closure. 2870 // May run with m.p==nil, so write barriers are not allowed. 2871 // 2872 // id is optional pre-allocated m ID. Omit by passing -1. 2873 // 2874 //go:nowritebarrierrec 2875 func newm(fn func(), pp *p, id int64) { 2876 // allocm adds a new M to allm, but they do not start until created by 2877 // the OS in newm1 or the template thread. 2878 // 2879 // doAllThreadsSyscall requires that every M in allm will eventually 2880 // start and be signal-able, even with a STW. 2881 // 2882 // Disable preemption here until we start the thread to ensure that 2883 // newm is not preempted between allocm and starting the new thread, 2884 // ensuring that anything added to allm is guaranteed to eventually 2885 // start. 2886 acquirem() 2887 2888 mp := allocm(pp, fn, id) 2889 mp.nextp.set(pp) 2890 mp.sigmask = initSigmask 2891 if gp := getg(); gp != nil && gp.m != nil && (gp.m.lockedExt != 0 || gp.m.incgo) && GOOS != "plan9" { 2892 // We're on a locked M or a thread that may have been 2893 // started by C. The kernel state of this thread may 2894 // be strange (the user may have locked it for that 2895 // purpose). We don't want to clone that into another 2896 // thread. Instead, ask a known-good thread to create 2897 // the thread for us. 2898 // 2899 // This is disabled on Plan 9. See golang.org/issue/22227. 2900 // 2901 // TODO: This may be unnecessary on Windows, which 2902 // doesn't model thread creation off fork. 2903 lock(&newmHandoff.lock) 2904 if newmHandoff.haveTemplateThread == 0 { 2905 throw("on a locked thread with no template thread") 2906 } 2907 mp.schedlink = newmHandoff.newm 2908 newmHandoff.newm.set(mp) 2909 if newmHandoff.waiting { 2910 newmHandoff.waiting = false 2911 notewakeup(&newmHandoff.wake) 2912 } 2913 unlock(&newmHandoff.lock) 2914 // The M has not started yet, but the template thread does not 2915 // participate in STW, so it will always process queued Ms and 2916 // it is safe to releasem. 2917 releasem(getg().m) 2918 return 2919 } 2920 newm1(mp) 2921 releasem(getg().m) 2922 } 2923 2924 func newm1(mp *m) { 2925 if iscgo && _cgo_thread_start != nil { 2926 var ts cgothreadstart 2927 ts.g.set(mp.g0) 2928 ts.tls = (*uint64)(unsafe.Pointer(&mp.tls[0])) 2929 ts.fn = unsafe.Pointer(abi.FuncPCABI0(mstart)) 2930 if msanenabled { 2931 msanwrite(unsafe.Pointer(&ts), unsafe.Sizeof(ts)) 2932 } 2933 if asanenabled { 2934 asanwrite(unsafe.Pointer(&ts), unsafe.Sizeof(ts)) 2935 } 2936 execLock.rlock() // Prevent process clone. 2937 asmcgocall(_cgo_thread_start, unsafe.Pointer(&ts)) 2938 execLock.runlock() 2939 return 2940 } 2941 execLock.rlock() // Prevent process clone. 2942 newosproc(mp) 2943 execLock.runlock() 2944 } 2945 2946 // startTemplateThread starts the template thread if it is not already 2947 // running. 2948 // 2949 // The calling thread must itself be in a known-good state. 2950 func startTemplateThread() { 2951 if GOARCH == "wasm" { // no threads on wasm yet 2952 return 2953 } 2954 2955 // Disable preemption to guarantee that the template thread will be 2956 // created before a park once haveTemplateThread is set. 2957 mp := acquirem() 2958 if !atomic.Cas(&newmHandoff.haveTemplateThread, 0, 1) { 2959 releasem(mp) 2960 return 2961 } 2962 newm(templateThread, nil, -1) 2963 releasem(mp) 2964 } 2965 2966 // templateThread is a thread in a known-good state that exists solely 2967 // to start new threads in known-good states when the calling thread 2968 // may not be in a good state. 2969 // 2970 // Many programs never need this, so templateThread is started lazily 2971 // when we first enter a state that might lead to running on a thread 2972 // in an unknown state. 2973 // 2974 // templateThread runs on an M without a P, so it must not have write 2975 // barriers. 2976 // 2977 //go:nowritebarrierrec 2978 func templateThread() { 2979 lock(&sched.lock) 2980 sched.nmsys++ 2981 checkdead() 2982 unlock(&sched.lock) 2983 2984 for { 2985 lock(&newmHandoff.lock) 2986 for newmHandoff.newm != 0 { 2987 newm := newmHandoff.newm.ptr() 2988 newmHandoff.newm = 0 2989 unlock(&newmHandoff.lock) 2990 for newm != nil { 2991 next := newm.schedlink.ptr() 2992 newm.schedlink = 0 2993 newm1(newm) 2994 newm = next 2995 } 2996 lock(&newmHandoff.lock) 2997 } 2998 newmHandoff.waiting = true 2999 noteclear(&newmHandoff.wake) 3000 unlock(&newmHandoff.lock) 3001 notesleep(&newmHandoff.wake) 3002 } 3003 } 3004 3005 // Stops execution of the current m until new work is available. 3006 // Returns with acquired P. 3007 func stopm() { 3008 gp := getg() 3009 3010 if gp.m.locks != 0 { 3011 throw("stopm holding locks") 3012 } 3013 if gp.m.p != 0 { 3014 throw("stopm holding p") 3015 } 3016 if gp.m.spinning { 3017 throw("stopm spinning") 3018 } 3019 3020 lock(&sched.lock) 3021 mput(gp.m) 3022 unlock(&sched.lock) 3023 mPark() 3024 acquirep(gp.m.nextp.ptr()) 3025 gp.m.nextp = 0 3026 } 3027 3028 func mspinning() { 3029 // startm's caller incremented nmspinning. Set the new M's spinning. 3030 getg().m.spinning = true 3031 } 3032 3033 // Schedules some M to run the p (creates an M if necessary). 3034 // If p==nil, tries to get an idle P, if no idle P's does nothing. 3035 // May run with m.p==nil, so write barriers are not allowed. 3036 // If spinning is set, the caller has incremented nmspinning and must provide a 3037 // P. startm will set m.spinning in the newly started M. 3038 // 3039 // Callers passing a non-nil P must call from a non-preemptible context. See 3040 // comment on acquirem below. 3041 // 3042 // Argument lockheld indicates whether the caller already acquired the 3043 // scheduler lock. Callers holding the lock when making the call must pass 3044 // true. The lock might be temporarily dropped, but will be reacquired before 3045 // returning. 3046 // 3047 // Must not have write barriers because this may be called without a P. 3048 // 3049 //go:nowritebarrierrec 3050 func startm(pp *p, spinning, lockheld bool) { 3051 // Disable preemption. 3052 // 3053 // Every owned P must have an owner that will eventually stop it in the 3054 // event of a GC stop request. startm takes transient ownership of a P 3055 // (either from argument or pidleget below) and transfers ownership to 3056 // a started M, which will be responsible for performing the stop. 3057 // 3058 // Preemption must be disabled during this transient ownership, 3059 // otherwise the P this is running on may enter GC stop while still 3060 // holding the transient P, leaving that P in limbo and deadlocking the 3061 // STW. 3062 // 3063 // Callers passing a non-nil P must already be in non-preemptible 3064 // context, otherwise such preemption could occur on function entry to 3065 // startm. Callers passing a nil P may be preemptible, so we must 3066 // disable preemption before acquiring a P from pidleget below. 3067 mp := acquirem() 3068 if !lockheld { 3069 lock(&sched.lock) 3070 } 3071 if pp == nil { 3072 if spinning { 3073 // TODO(prattmic): All remaining calls to this function 3074 // with _p_ == nil could be cleaned up to find a P 3075 // before calling startm. 3076 throw("startm: P required for spinning=true") 3077 } 3078 pp, _ = pidleget(0) 3079 if pp == nil { 3080 if !lockheld { 3081 unlock(&sched.lock) 3082 } 3083 releasem(mp) 3084 return 3085 } 3086 } 3087 nmp := mget() 3088 if nmp == nil { 3089 // No M is available, we must drop sched.lock and call newm. 3090 // However, we already own a P to assign to the M. 3091 // 3092 // Once sched.lock is released, another G (e.g., in a syscall), 3093 // could find no idle P while checkdead finds a runnable G but 3094 // no running M's because this new M hasn't started yet, thus 3095 // throwing in an apparent deadlock. 3096 // This apparent deadlock is possible when startm is called 3097 // from sysmon, which doesn't count as a running M. 3098 // 3099 // Avoid this situation by pre-allocating the ID for the new M, 3100 // thus marking it as 'running' before we drop sched.lock. This 3101 // new M will eventually run the scheduler to execute any 3102 // queued G's. 3103 id := mReserveID() 3104 unlock(&sched.lock) 3105 3106 var fn func() 3107 if spinning { 3108 // The caller incremented nmspinning, so set m.spinning in the new M. 3109 fn = mspinning 3110 } 3111 newm(fn, pp, id) 3112 3113 if lockheld { 3114 lock(&sched.lock) 3115 } 3116 // Ownership transfer of pp committed by start in newm. 3117 // Preemption is now safe. 3118 releasem(mp) 3119 return 3120 } 3121 if !lockheld { 3122 unlock(&sched.lock) 3123 } 3124 if nmp.spinning { 3125 throw("startm: m is spinning") 3126 } 3127 if nmp.nextp != 0 { 3128 throw("startm: m has p") 3129 } 3130 if spinning && !runqempty(pp) { 3131 throw("startm: p has runnable gs") 3132 } 3133 // The caller incremented nmspinning, so set m.spinning in the new M. 3134 nmp.spinning = spinning 3135 nmp.nextp.set(pp) 3136 notewakeup(&nmp.park) 3137 // Ownership transfer of pp committed by wakeup. Preemption is now 3138 // safe. 3139 releasem(mp) 3140 } 3141 3142 // Hands off P from syscall or locked M. 3143 // Always runs without a P, so write barriers are not allowed. 3144 // 3145 //go:nowritebarrierrec 3146 func handoffp(pp *p) { 3147 // handoffp must start an M in any situation where 3148 // findRunnable would return a G to run on pp. 3149 3150 // if it has local work, start it straight away 3151 if !runqempty(pp) || !sched.runq.empty() { 3152 startm(pp, false, false) 3153 return 3154 } 3155 // if there's trace work to do, start it straight away 3156 if (traceEnabled() || traceShuttingDown()) && traceReaderAvailable() != nil { 3157 startm(pp, false, false) 3158 return 3159 } 3160 // if it has GC work, start it straight away 3161 if gcBlackenEnabled != 0 && gcShouldScheduleWorker(pp) { 3162 startm(pp, false, false) 3163 return 3164 } 3165 // no local work, check that there are no spinning/idle M's, 3166 // otherwise our help is not required 3167 if sched.nmspinning.Load()+sched.npidle.Load() == 0 && sched.nmspinning.CompareAndSwap(0, 1) { // TODO: fast atomic 3168 sched.needspinning.Store(0) 3169 startm(pp, true, false) 3170 return 3171 } 3172 lock(&sched.lock) 3173 if sched.gcwaiting.Load() { 3174 pp.status = _Pgcstop 3175 pp.gcStopTime = nanotime() 3176 sched.stopwait-- 3177 if sched.stopwait == 0 { 3178 notewakeup(&sched.stopnote) 3179 } 3180 unlock(&sched.lock) 3181 return 3182 } 3183 if pp.runSafePointFn != 0 && atomic.Cas(&pp.runSafePointFn, 1, 0) { 3184 sched.safePointFn(pp) 3185 sched.safePointWait-- 3186 if sched.safePointWait == 0 { 3187 notewakeup(&sched.safePointNote) 3188 } 3189 } 3190 if !sched.runq.empty() { 3191 unlock(&sched.lock) 3192 startm(pp, false, false) 3193 return 3194 } 3195 // If this is the last running P and nobody is polling network, 3196 // need to wakeup another M to poll network. 3197 if sched.npidle.Load() == gomaxprocs-1 && sched.lastpoll.Load() != 0 { 3198 unlock(&sched.lock) 3199 startm(pp, false, false) 3200 return 3201 } 3202 3203 // The scheduler lock cannot be held when calling wakeNetPoller below 3204 // because wakeNetPoller may call wakep which may call startm. 3205 when := pp.timers.wakeTime() 3206 pidleput(pp, 0) 3207 unlock(&sched.lock) 3208 3209 if when != 0 { 3210 wakeNetPoller(when) 3211 } 3212 } 3213 3214 // Tries to add one more P to execute G's. 3215 // Called when a G is made runnable (newproc, ready). 3216 // Must be called with a P. 3217 // 3218 // wakep should be an internal detail, 3219 // but widely used packages access it using linkname. 3220 // Notable members of the hall of shame include: 3221 // - gvisor.dev/gvisor 3222 // 3223 // Do not remove or change the type signature. 3224 // See go.dev/issue/67401. 3225 // 3226 //go:linkname wakep 3227 func wakep() { 3228 // Be conservative about spinning threads, only start one if none exist 3229 // already. 3230 if sched.nmspinning.Load() != 0 || !sched.nmspinning.CompareAndSwap(0, 1) { 3231 return 3232 } 3233 3234 // Disable preemption until ownership of pp transfers to the next M in 3235 // startm. Otherwise preemption here would leave pp stuck waiting to 3236 // enter _Pgcstop. 3237 // 3238 // See preemption comment on acquirem in startm for more details. 3239 mp := acquirem() 3240 3241 var pp *p 3242 lock(&sched.lock) 3243 pp, _ = pidlegetSpinning(0) 3244 if pp == nil { 3245 if sched.nmspinning.Add(-1) < 0 { 3246 throw("wakep: negative nmspinning") 3247 } 3248 unlock(&sched.lock) 3249 releasem(mp) 3250 return 3251 } 3252 // Since we always have a P, the race in the "No M is available" 3253 // comment in startm doesn't apply during the small window between the 3254 // unlock here and lock in startm. A checkdead in between will always 3255 // see at least one running M (ours). 3256 unlock(&sched.lock) 3257 3258 startm(pp, true, false) 3259 3260 releasem(mp) 3261 } 3262 3263 // Stops execution of the current m that is locked to a g until the g is runnable again. 3264 // Returns with acquired P. 3265 func stoplockedm() { 3266 gp := getg() 3267 3268 if gp.m.lockedg == 0 || gp.m.lockedg.ptr().lockedm.ptr() != gp.m { 3269 throw("stoplockedm: inconsistent locking") 3270 } 3271 if gp.m.p != 0 { 3272 // Schedule another M to run this p. 3273 pp := releasep() 3274 handoffp(pp) 3275 } 3276 incidlelocked(1) 3277 // Wait until another thread schedules lockedg again. 3278 mPark() 3279 status := readgstatus(gp.m.lockedg.ptr()) 3280 if status&^_Gscan != _Grunnable { 3281 print("runtime:stoplockedm: lockedg (atomicstatus=", status, ") is not Grunnable or Gscanrunnable\n") 3282 dumpgstatus(gp.m.lockedg.ptr()) 3283 throw("stoplockedm: not runnable") 3284 } 3285 acquirep(gp.m.nextp.ptr()) 3286 gp.m.nextp = 0 3287 } 3288 3289 // Schedules the locked m to run the locked gp. 3290 // May run during STW, so write barriers are not allowed. 3291 // 3292 //go:nowritebarrierrec 3293 func startlockedm(gp *g) { 3294 mp := gp.lockedm.ptr() 3295 if mp == getg().m { 3296 throw("startlockedm: locked to me") 3297 } 3298 if mp.nextp != 0 { 3299 throw("startlockedm: m has p") 3300 } 3301 // directly handoff current P to the locked m 3302 incidlelocked(-1) 3303 pp := releasep() 3304 mp.nextp.set(pp) 3305 notewakeup(&mp.park) 3306 stopm() 3307 } 3308 3309 // Stops the current m for stopTheWorld. 3310 // Returns when the world is restarted. 3311 func gcstopm() { 3312 gp := getg() 3313 3314 if !sched.gcwaiting.Load() { 3315 throw("gcstopm: not waiting for gc") 3316 } 3317 if gp.m.spinning { 3318 gp.m.spinning = false 3319 // OK to just drop nmspinning here, 3320 // startTheWorld will unpark threads as necessary. 3321 if sched.nmspinning.Add(-1) < 0 { 3322 throw("gcstopm: negative nmspinning") 3323 } 3324 } 3325 pp := releasep() 3326 lock(&sched.lock) 3327 pp.status = _Pgcstop 3328 pp.gcStopTime = nanotime() 3329 sched.stopwait-- 3330 if sched.stopwait == 0 { 3331 notewakeup(&sched.stopnote) 3332 } 3333 unlock(&sched.lock) 3334 stopm() 3335 } 3336 3337 // Schedules gp to run on the current M. 3338 // If inheritTime is true, gp inherits the remaining time in the 3339 // current time slice. Otherwise, it starts a new time slice. 3340 // Never returns. 3341 // 3342 // Write barriers are allowed because this is called immediately after 3343 // acquiring a P in several places. 3344 // 3345 //go:yeswritebarrierrec 3346 func execute(gp *g, inheritTime bool) { 3347 mp := getg().m 3348 3349 if goroutineProfile.active { 3350 // Make sure that gp has had its stack written out to the goroutine 3351 // profile, exactly as it was when the goroutine profiler first stopped 3352 // the world. 3353 tryRecordGoroutineProfile(gp, nil, osyield) 3354 } 3355 3356 // Assign gp.m before entering _Grunning so running Gs have an M. 3357 mp.curg = gp 3358 gp.m = mp 3359 gp.syncSafePoint = false // Clear the flag, which may have been set by morestack. 3360 casgstatus(gp, _Grunnable, _Grunning) 3361 gp.waitsince = 0 3362 gp.preempt = false 3363 gp.stackguard0 = gp.stack.lo + stackGuard 3364 if !inheritTime { 3365 mp.p.ptr().schedtick++ 3366 } 3367 3368 if sys.DITSupported && debug.dataindependenttiming != 1 { 3369 if gp.ditWanted && !mp.ditEnabled { 3370 // The current M doesn't have DIT enabled, but the goroutine we're 3371 // executing does need it, so turn it on. 3372 sys.EnableDIT() 3373 mp.ditEnabled = true 3374 } else if !gp.ditWanted && mp.ditEnabled { 3375 // The current M has DIT enabled, but the goroutine we're executing does 3376 // not need it, so turn it off. 3377 // NOTE: turning off DIT here means that the scheduler will have DIT enabled 3378 // when it runs after this goroutine yields or is preempted. This may have 3379 // a minor performance impact on the scheduler. 3380 sys.DisableDIT() 3381 mp.ditEnabled = false 3382 } 3383 } 3384 3385 // Check whether the profiler needs to be turned on or off. 3386 hz := sched.profilehz 3387 if mp.profilehz != hz { 3388 setThreadCPUProfiler(hz) 3389 } 3390 3391 trace := traceAcquire() 3392 if trace.ok() { 3393 trace.GoStart() 3394 traceRelease(trace) 3395 } 3396 3397 gogo(&gp.sched) 3398 } 3399 3400 // Finds a runnable goroutine to execute. 3401 // Tries to steal from other P's, get g from local or global queue, poll network. 3402 // tryWakeP indicates that the returned goroutine is not normal (GC worker, trace 3403 // reader) so the caller should try to wake a P. 3404 func findRunnable() (gp *g, inheritTime, tryWakeP bool) { 3405 mp := getg().m 3406 3407 // The conditions here and in handoffp must agree: if 3408 // findRunnable would return a G to run, handoffp must start 3409 // an M. 3410 3411 top: 3412 // We may have collected an allp snapshot below. The snapshot is only 3413 // required in each loop iteration. Clear it to all GC to collect the 3414 // slice. 3415 mp.clearAllpSnapshot() 3416 3417 pp := mp.p.ptr() 3418 if sched.gcwaiting.Load() { 3419 gcstopm() 3420 goto top 3421 } 3422 if pp.runSafePointFn != 0 { 3423 runSafePointFn() 3424 } 3425 3426 // now and pollUntil are saved for work stealing later, 3427 // which may steal timers. It's important that between now 3428 // and then, nothing blocks, so these numbers remain mostly 3429 // relevant. 3430 now, pollUntil, _ := pp.timers.check(0, nil) 3431 3432 // Try to schedule the trace reader. 3433 if traceEnabled() || traceShuttingDown() { 3434 gp := traceReader() 3435 if gp != nil { 3436 trace := traceAcquire() 3437 casgstatus(gp, _Gwaiting, _Grunnable) 3438 if trace.ok() { 3439 trace.GoUnpark(gp, 0) 3440 traceRelease(trace) 3441 } 3442 return gp, false, true 3443 } 3444 } 3445 3446 // Try to schedule a GC worker. 3447 if gcBlackenEnabled != 0 { 3448 gp, tnow := gcController.findRunnableGCWorker(pp, now) 3449 if gp != nil { 3450 return gp, false, true 3451 } 3452 now = tnow 3453 } 3454 3455 // Check the global runnable queue once in a while to ensure fairness. 3456 // Otherwise two goroutines can completely occupy the local runqueue 3457 // by constantly respawning each other. 3458 if pp.schedtick%61 == 0 && !sched.runq.empty() { 3459 lock(&sched.lock) 3460 gp := globrunqget() 3461 unlock(&sched.lock) 3462 if gp != nil { 3463 return gp, false, false 3464 } 3465 } 3466 3467 // Wake up the finalizer G. 3468 if fingStatus.Load()&(fingWait|fingWake) == fingWait|fingWake { 3469 if gp := wakefing(); gp != nil { 3470 ready(gp, 0, true) 3471 } 3472 } 3473 3474 // Wake up one or more cleanup Gs. 3475 if gcCleanups.needsWake() { 3476 gcCleanups.wake() 3477 } 3478 3479 if *cgo_yield != nil { 3480 asmcgocall(*cgo_yield, nil) 3481 } 3482 3483 // local runq 3484 if gp, inheritTime := runqget(pp); gp != nil { 3485 return gp, inheritTime, false 3486 } 3487 3488 // global runq 3489 if !sched.runq.empty() { 3490 lock(&sched.lock) 3491 gp, q := globrunqgetbatch(int32(len(pp.runq)) / 2) 3492 unlock(&sched.lock) 3493 if gp != nil { 3494 if runqputbatch(pp, &q); !q.empty() { 3495 throw("Couldn't put Gs into empty local runq") 3496 } 3497 return gp, false, false 3498 } 3499 } 3500 3501 // Poll network. 3502 // This netpoll is only an optimization before we resort to stealing. 3503 // We can safely skip it if there are no waiters or a thread is blocked 3504 // in netpoll already. If there is any kind of logical race with that 3505 // blocked thread (e.g. it has already returned from netpoll, but does 3506 // not set lastpoll yet), this thread will do blocking netpoll below 3507 // anyway. 3508 // We only poll from one thread at a time to avoid kernel contention 3509 // on machines with many cores. 3510 if netpollinited() && netpollAnyWaiters() && sched.lastpoll.Load() != 0 && sched.pollingNet.Swap(1) == 0 { 3511 list, delta := netpoll(0) 3512 sched.pollingNet.Store(0) 3513 if !list.empty() { // non-blocking 3514 gp := list.pop() 3515 injectglist(&list) 3516 netpollAdjustWaiters(delta) 3517 trace := traceAcquire() 3518 casgstatus(gp, _Gwaiting, _Grunnable) 3519 if trace.ok() { 3520 trace.GoUnpark(gp, 0) 3521 traceRelease(trace) 3522 } 3523 return gp, false, false 3524 } 3525 } 3526 3527 // Spinning Ms: steal work from other Ps. 3528 // 3529 // Limit the number of spinning Ms to half the number of busy Ps. 3530 // This is necessary to prevent excessive CPU consumption when 3531 // GOMAXPROCS>>1 but the program parallelism is low. 3532 if mp.spinning || 2*sched.nmspinning.Load() < gomaxprocs-sched.npidle.Load() { 3533 if !mp.spinning { 3534 mp.becomeSpinning() 3535 } 3536 3537 gp, inheritTime, tnow, w, newWork := stealWork(now) 3538 if gp != nil { 3539 // Successfully stole. 3540 return gp, inheritTime, false 3541 } 3542 if newWork { 3543 // There may be new timer or GC work; restart to 3544 // discover. 3545 goto top 3546 } 3547 3548 now = tnow 3549 if w != 0 && (pollUntil == 0 || w < pollUntil) { 3550 // Earlier timer to wait for. 3551 pollUntil = w 3552 } 3553 } 3554 3555 // We have nothing to do. 3556 // 3557 // If we're in the GC mark phase, can safely scan and blacken objects, 3558 // and have work to do, run idle-time marking rather than give up the P. 3559 if gcBlackenEnabled != 0 && gcShouldScheduleWorker(pp) && gcController.addIdleMarkWorker() { 3560 node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) 3561 if node != nil { 3562 pp.gcMarkWorkerMode = gcMarkWorkerIdleMode 3563 gp := node.gp.ptr() 3564 3565 trace := traceAcquire() 3566 casgstatus(gp, _Gwaiting, _Grunnable) 3567 if trace.ok() { 3568 trace.GoUnpark(gp, 0) 3569 traceRelease(trace) 3570 } 3571 return gp, false, false 3572 } 3573 gcController.removeIdleMarkWorker() 3574 } 3575 3576 // wasm only: 3577 // If a callback returned and no other goroutine is awake, 3578 // then wake event handler goroutine which pauses execution 3579 // until a callback was triggered. 3580 gp, otherReady := beforeIdle(now, pollUntil) 3581 if gp != nil { 3582 trace := traceAcquire() 3583 casgstatus(gp, _Gwaiting, _Grunnable) 3584 if trace.ok() { 3585 trace.GoUnpark(gp, 0) 3586 traceRelease(trace) 3587 } 3588 return gp, false, false 3589 } 3590 if otherReady { 3591 goto top 3592 } 3593 3594 // Before we drop our P, make a snapshot of the allp slice, 3595 // which can change underfoot once we no longer block 3596 // safe-points. We don't need to snapshot the contents because 3597 // everything up to cap(allp) is immutable. 3598 // 3599 // We clear the snapshot from the M after return via 3600 // mp.clearAllpSnapshop (in schedule) and on each iteration of the top 3601 // loop. 3602 allpSnapshot := mp.snapshotAllp() 3603 // Also snapshot masks. Value changes are OK, but we can't allow 3604 // len to change out from under us. 3605 idlepMaskSnapshot := idlepMask 3606 timerpMaskSnapshot := timerpMask 3607 3608 // return P and block 3609 lock(&sched.lock) 3610 if sched.gcwaiting.Load() || pp.runSafePointFn != 0 { 3611 unlock(&sched.lock) 3612 goto top 3613 } 3614 if !sched.runq.empty() { 3615 gp, q := globrunqgetbatch(int32(len(pp.runq)) / 2) 3616 unlock(&sched.lock) 3617 if gp == nil { 3618 throw("global runq empty with non-zero runqsize") 3619 } 3620 if runqputbatch(pp, &q); !q.empty() { 3621 throw("Couldn't put Gs into empty local runq") 3622 } 3623 return gp, false, false 3624 } 3625 if !mp.spinning && sched.needspinning.Load() == 1 { 3626 // See "Delicate dance" comment below. 3627 mp.becomeSpinning() 3628 unlock(&sched.lock) 3629 goto top 3630 } 3631 if releasep() != pp { 3632 throw("findRunnable: wrong p") 3633 } 3634 now = pidleput(pp, now) 3635 unlock(&sched.lock) 3636 3637 // Delicate dance: thread transitions from spinning to non-spinning 3638 // state, potentially concurrently with submission of new work. We must 3639 // drop nmspinning first and then check all sources again (with 3640 // #StoreLoad memory barrier in between). If we do it the other way 3641 // around, another thread can submit work after we've checked all 3642 // sources but before we drop nmspinning; as a result nobody will 3643 // unpark a thread to run the work. 3644 // 3645 // This applies to the following sources of work: 3646 // 3647 // * Goroutines added to the global or a per-P run queue. 3648 // * New/modified-earlier timers on a per-P timer heap. 3649 // * Idle-priority GC work (barring golang.org/issue/19112). 3650 // 3651 // If we discover new work below, we need to restore m.spinning as a 3652 // signal for resetspinning to unpark a new worker thread (because 3653 // there can be more than one starving goroutine). 3654 // 3655 // However, if after discovering new work we also observe no idle Ps 3656 // (either here or in resetspinning), we have a problem. We may be 3657 // racing with a non-spinning M in the block above, having found no 3658 // work and preparing to release its P and park. Allowing that P to go 3659 // idle will result in loss of work conservation (idle P while there is 3660 // runnable work). This could result in complete deadlock in the 3661 // unlikely event that we discover new work (from netpoll) right as we 3662 // are racing with _all_ other Ps going idle. 3663 // 3664 // We use sched.needspinning to synchronize with non-spinning Ms going 3665 // idle. If needspinning is set when they are about to drop their P, 3666 // they abort the drop and instead become a new spinning M on our 3667 // behalf. If we are not racing and the system is truly fully loaded 3668 // then no spinning threads are required, and the next thread to 3669 // naturally become spinning will clear the flag. 3670 // 3671 // Also see "Worker thread parking/unparking" comment at the top of the 3672 // file. 3673 wasSpinning := mp.spinning 3674 if mp.spinning { 3675 mp.spinning = false 3676 if sched.nmspinning.Add(-1) < 0 { 3677 throw("findRunnable: negative nmspinning") 3678 } 3679 3680 // Note the for correctness, only the last M transitioning from 3681 // spinning to non-spinning must perform these rechecks to 3682 // ensure no missed work. However, the runtime has some cases 3683 // of transient increments of nmspinning that are decremented 3684 // without going through this path, so we must be conservative 3685 // and perform the check on all spinning Ms. 3686 // 3687 // See https://go.dev/issue/43997. 3688 3689 // Check global and P runqueues again. 3690 3691 lock(&sched.lock) 3692 if !sched.runq.empty() { 3693 pp, _ := pidlegetSpinning(0) 3694 if pp != nil { 3695 gp, q := globrunqgetbatch(int32(len(pp.runq)) / 2) 3696 unlock(&sched.lock) 3697 if gp == nil { 3698 throw("global runq empty with non-zero runqsize") 3699 } 3700 if runqputbatch(pp, &q); !q.empty() { 3701 throw("Couldn't put Gs into empty local runq") 3702 } 3703 acquirep(pp) 3704 mp.becomeSpinning() 3705 return gp, false, false 3706 } 3707 } 3708 unlock(&sched.lock) 3709 3710 pp := checkRunqsNoP(allpSnapshot, idlepMaskSnapshot) 3711 if pp != nil { 3712 acquirep(pp) 3713 mp.becomeSpinning() 3714 goto top 3715 } 3716 3717 // Check for idle-priority GC work again. 3718 pp, gp := checkIdleGCNoP() 3719 if pp != nil { 3720 acquirep(pp) 3721 mp.becomeSpinning() 3722 3723 // Run the idle worker. 3724 pp.gcMarkWorkerMode = gcMarkWorkerIdleMode 3725 trace := traceAcquire() 3726 casgstatus(gp, _Gwaiting, _Grunnable) 3727 if trace.ok() { 3728 trace.GoUnpark(gp, 0) 3729 traceRelease(trace) 3730 } 3731 return gp, false, false 3732 } 3733 3734 // Finally, check for timer creation or expiry concurrently with 3735 // transitioning from spinning to non-spinning. 3736 // 3737 // Note that we cannot use checkTimers here because it calls 3738 // adjusttimers which may need to allocate memory, and that isn't 3739 // allowed when we don't have an active P. 3740 pollUntil = checkTimersNoP(allpSnapshot, timerpMaskSnapshot, pollUntil) 3741 } 3742 3743 // We don't need allp anymore at this pointer, but can't clear the 3744 // snapshot without a P for the write barrier.. 3745 3746 // Poll network until next timer. 3747 if netpollinited() && (netpollAnyWaiters() || pollUntil != 0) && sched.lastpoll.Swap(0) != 0 { 3748 sched.pollUntil.Store(pollUntil) 3749 if mp.p != 0 { 3750 throw("findRunnable: netpoll with p") 3751 } 3752 if mp.spinning { 3753 throw("findRunnable: netpoll with spinning") 3754 } 3755 delay := int64(-1) 3756 if pollUntil != 0 { 3757 if now == 0 { 3758 now = nanotime() 3759 } 3760 delay = pollUntil - now 3761 if delay < 0 { 3762 delay = 0 3763 } 3764 } 3765 if faketime != 0 { 3766 // When using fake time, just poll. 3767 delay = 0 3768 } 3769 list, delta := netpoll(delay) // block until new work is available 3770 // Refresh now again, after potentially blocking. 3771 now = nanotime() 3772 sched.pollUntil.Store(0) 3773 sched.lastpoll.Store(now) 3774 if faketime != 0 && list.empty() { 3775 // Using fake time and nothing is ready; stop M. 3776 // When all M's stop, checkdead will call timejump. 3777 stopm() 3778 goto top 3779 } 3780 lock(&sched.lock) 3781 pp, _ := pidleget(now) 3782 unlock(&sched.lock) 3783 if pp == nil { 3784 injectglist(&list) 3785 netpollAdjustWaiters(delta) 3786 } else { 3787 acquirep(pp) 3788 if !list.empty() { 3789 gp := list.pop() 3790 injectglist(&list) 3791 netpollAdjustWaiters(delta) 3792 trace := traceAcquire() 3793 casgstatus(gp, _Gwaiting, _Grunnable) 3794 if trace.ok() { 3795 trace.GoUnpark(gp, 0) 3796 traceRelease(trace) 3797 } 3798 return gp, false, false 3799 } 3800 if wasSpinning { 3801 mp.becomeSpinning() 3802 } 3803 goto top 3804 } 3805 } else if pollUntil != 0 && netpollinited() { 3806 pollerPollUntil := sched.pollUntil.Load() 3807 if pollerPollUntil == 0 || pollerPollUntil > pollUntil { 3808 netpollBreak() 3809 } 3810 } 3811 stopm() 3812 goto top 3813 } 3814 3815 // pollWork reports whether there is non-background work this P could 3816 // be doing. This is a fairly lightweight check to be used for 3817 // background work loops, like idle GC. It checks a subset of the 3818 // conditions checked by the actual scheduler. 3819 func pollWork() bool { 3820 if !sched.runq.empty() { 3821 return true 3822 } 3823 p := getg().m.p.ptr() 3824 if !runqempty(p) { 3825 return true 3826 } 3827 if netpollinited() && netpollAnyWaiters() && sched.lastpoll.Load() != 0 { 3828 if list, delta := netpoll(0); !list.empty() { 3829 injectglist(&list) 3830 netpollAdjustWaiters(delta) 3831 return true 3832 } 3833 } 3834 return false 3835 } 3836 3837 // stealWork attempts to steal a runnable goroutine or timer from any P. 3838 // 3839 // If newWork is true, new work may have been readied. 3840 // 3841 // If now is not 0 it is the current time. stealWork returns the passed time or 3842 // the current time if now was passed as 0. 3843 func stealWork(now int64) (gp *g, inheritTime bool, rnow, pollUntil int64, newWork bool) { 3844 pp := getg().m.p.ptr() 3845 3846 ranTimer := false 3847 3848 const stealTries = 4 3849 for i := 0; i < stealTries; i++ { 3850 stealTimersOrRunNextG := i == stealTries-1 3851 3852 for enum := stealOrder.start(cheaprand()); !enum.done(); enum.next() { 3853 if sched.gcwaiting.Load() { 3854 // GC work may be available. 3855 return nil, false, now, pollUntil, true 3856 } 3857 p2 := allp[enum.position()] 3858 if pp == p2 { 3859 continue 3860 } 3861 3862 // Steal timers from p2. This call to checkTimers is the only place 3863 // where we might hold a lock on a different P's timers. We do this 3864 // once on the last pass before checking runnext because stealing 3865 // from the other P's runnext should be the last resort, so if there 3866 // are timers to steal do that first. 3867 // 3868 // We only check timers on one of the stealing iterations because 3869 // the time stored in now doesn't change in this loop and checking 3870 // the timers for each P more than once with the same value of now 3871 // is probably a waste of time. 3872 // 3873 // timerpMask tells us whether the P may have timers at all. If it 3874 // can't, no need to check at all. 3875 if stealTimersOrRunNextG && timerpMask.read(enum.position()) { 3876 tnow, w, ran := p2.timers.check(now, nil) 3877 now = tnow 3878 if w != 0 && (pollUntil == 0 || w < pollUntil) { 3879 pollUntil = w 3880 } 3881 if ran { 3882 // Running the timers may have 3883 // made an arbitrary number of G's 3884 // ready and added them to this P's 3885 // local run queue. That invalidates 3886 // the assumption of runqsteal 3887 // that it always has room to add 3888 // stolen G's. So check now if there 3889 // is a local G to run. 3890 if gp, inheritTime := runqget(pp); gp != nil { 3891 return gp, inheritTime, now, pollUntil, ranTimer 3892 } 3893 ranTimer = true 3894 } 3895 } 3896 3897 // Don't bother to attempt to steal if p2 is idle. 3898 if !idlepMask.read(enum.position()) { 3899 if gp := runqsteal(pp, p2, stealTimersOrRunNextG); gp != nil { 3900 return gp, false, now, pollUntil, ranTimer 3901 } 3902 } 3903 } 3904 } 3905 3906 // No goroutines found to steal. Regardless, running a timer may have 3907 // made some goroutine ready that we missed. Indicate the next timer to 3908 // wait for. 3909 return nil, false, now, pollUntil, ranTimer 3910 } 3911 3912 // Check all Ps for a runnable G to steal. 3913 // 3914 // On entry we have no P. If a G is available to steal and a P is available, 3915 // the P is returned which the caller should acquire and attempt to steal the 3916 // work to. 3917 func checkRunqsNoP(allpSnapshot []*p, idlepMaskSnapshot pMask) *p { 3918 for id, p2 := range allpSnapshot { 3919 if !idlepMaskSnapshot.read(uint32(id)) && !runqempty(p2) { 3920 lock(&sched.lock) 3921 pp, _ := pidlegetSpinning(0) 3922 if pp == nil { 3923 // Can't get a P, don't bother checking remaining Ps. 3924 unlock(&sched.lock) 3925 return nil 3926 } 3927 unlock(&sched.lock) 3928 return pp 3929 } 3930 } 3931 3932 // No work available. 3933 return nil 3934 } 3935 3936 // Check all Ps for a timer expiring sooner than pollUntil. 3937 // 3938 // Returns updated pollUntil value. 3939 func checkTimersNoP(allpSnapshot []*p, timerpMaskSnapshot pMask, pollUntil int64) int64 { 3940 for id, p2 := range allpSnapshot { 3941 if timerpMaskSnapshot.read(uint32(id)) { 3942 w := p2.timers.wakeTime() 3943 if w != 0 && (pollUntil == 0 || w < pollUntil) { 3944 pollUntil = w 3945 } 3946 } 3947 } 3948 3949 return pollUntil 3950 } 3951 3952 // Check for idle-priority GC, without a P on entry. 3953 // 3954 // If some GC work, a P, and a worker G are all available, the P and G will be 3955 // returned. The returned P has not been wired yet. 3956 func checkIdleGCNoP() (*p, *g) { 3957 // N.B. Since we have no P, gcBlackenEnabled may change at any time; we 3958 // must check again after acquiring a P. As an optimization, we also check 3959 // if an idle mark worker is needed at all. This is OK here, because if we 3960 // observe that one isn't needed, at least one is currently running. Even if 3961 // it stops running, its own journey into the scheduler should schedule it 3962 // again, if need be (at which point, this check will pass, if relevant). 3963 if atomic.Load(&gcBlackenEnabled) == 0 || !gcController.needIdleMarkWorker() { 3964 return nil, nil 3965 } 3966 if !gcShouldScheduleWorker(nil) { 3967 return nil, nil 3968 } 3969 3970 // Work is available; we can start an idle GC worker only if there is 3971 // an available P and available worker G. 3972 // 3973 // We can attempt to acquire these in either order, though both have 3974 // synchronization concerns (see below). Workers are almost always 3975 // available (see comment in findRunnableGCWorker for the one case 3976 // there may be none). Since we're slightly less likely to find a P, 3977 // check for that first. 3978 // 3979 // Synchronization: note that we must hold sched.lock until we are 3980 // committed to keeping it. Otherwise we cannot put the unnecessary P 3981 // back in sched.pidle without performing the full set of idle 3982 // transition checks. 3983 // 3984 // If we were to check gcBgMarkWorkerPool first, we must somehow handle 3985 // the assumption in gcControllerState.findRunnableGCWorker that an 3986 // empty gcBgMarkWorkerPool is only possible if gcMarkDone is running. 3987 lock(&sched.lock) 3988 pp, now := pidlegetSpinning(0) 3989 if pp == nil { 3990 unlock(&sched.lock) 3991 return nil, nil 3992 } 3993 3994 // Now that we own a P, gcBlackenEnabled can't change (as it requires STW). 3995 if gcBlackenEnabled == 0 || !gcController.addIdleMarkWorker() { 3996 pidleput(pp, now) 3997 unlock(&sched.lock) 3998 return nil, nil 3999 } 4000 4001 node := (*gcBgMarkWorkerNode)(gcBgMarkWorkerPool.pop()) 4002 if node == nil { 4003 pidleput(pp, now) 4004 unlock(&sched.lock) 4005 gcController.removeIdleMarkWorker() 4006 return nil, nil 4007 } 4008 4009 unlock(&sched.lock) 4010 4011 return pp, node.gp.ptr() 4012 } 4013 4014 // wakeNetPoller wakes up the thread sleeping in the network poller if it isn't 4015 // going to wake up before the when argument; or it wakes an idle P to service 4016 // timers and the network poller if there isn't one already. 4017 func wakeNetPoller(when int64) { 4018 if sched.lastpoll.Load() == 0 { 4019 // In findRunnable we ensure that when polling the pollUntil 4020 // field is either zero or the time to which the current 4021 // poll is expected to run. This can have a spurious wakeup 4022 // but should never miss a wakeup. 4023 pollerPollUntil := sched.pollUntil.Load() 4024 if pollerPollUntil == 0 || pollerPollUntil > when { 4025 netpollBreak() 4026 } 4027 } else { 4028 // There are no threads in the network poller, try to get 4029 // one there so it can handle new timers. 4030 if GOOS != "plan9" { // Temporary workaround - see issue #42303. 4031 wakep() 4032 } 4033 } 4034 } 4035 4036 func resetspinning() { 4037 gp := getg() 4038 if !gp.m.spinning { 4039 throw("resetspinning: not a spinning m") 4040 } 4041 gp.m.spinning = false 4042 nmspinning := sched.nmspinning.Add(-1) 4043 if nmspinning < 0 { 4044 throw("findRunnable: negative nmspinning") 4045 } 4046 // M wakeup policy is deliberately somewhat conservative, so check if we 4047 // need to wakeup another P here. See "Worker thread parking/unparking" 4048 // comment at the top of the file for details. 4049 wakep() 4050 } 4051 4052 // injectglist adds each runnable G on the list to some run queue, 4053 // and clears glist. If there is no current P, they are added to the 4054 // global queue, and up to npidle M's are started to run them. 4055 // Otherwise, for each idle P, this adds a G to the global queue 4056 // and starts an M. Any remaining G's are added to the current P's 4057 // local run queue. 4058 // This may temporarily acquire sched.lock. 4059 // Can run concurrently with GC. 4060 func injectglist(glist *gList) { 4061 if glist.empty() { 4062 return 4063 } 4064 4065 // Mark all the goroutines as runnable before we put them 4066 // on the run queues. 4067 var tail *g 4068 trace := traceAcquire() 4069 for gp := glist.head.ptr(); gp != nil; gp = gp.schedlink.ptr() { 4070 tail = gp 4071 casgstatus(gp, _Gwaiting, _Grunnable) 4072 if trace.ok() { 4073 trace.GoUnpark(gp, 0) 4074 } 4075 } 4076 if trace.ok() { 4077 traceRelease(trace) 4078 } 4079 4080 // Turn the gList into a gQueue. 4081 q := gQueue{glist.head, tail.guintptr(), glist.size} 4082 *glist = gList{} 4083 4084 startIdle := func(n int32) { 4085 for ; n > 0; n-- { 4086 mp := acquirem() // See comment in startm. 4087 lock(&sched.lock) 4088 4089 pp, _ := pidlegetSpinning(0) 4090 if pp == nil { 4091 unlock(&sched.lock) 4092 releasem(mp) 4093 break 4094 } 4095 4096 startm(pp, false, true) 4097 unlock(&sched.lock) 4098 releasem(mp) 4099 } 4100 } 4101 4102 pp := getg().m.p.ptr() 4103 if pp == nil { 4104 n := q.size 4105 lock(&sched.lock) 4106 globrunqputbatch(&q) 4107 unlock(&sched.lock) 4108 startIdle(n) 4109 return 4110 } 4111 4112 var globq gQueue 4113 npidle := sched.npidle.Load() 4114 for ; npidle > 0 && !q.empty(); npidle-- { 4115 g := q.pop() 4116 globq.pushBack(g) 4117 } 4118 if !globq.empty() { 4119 n := globq.size 4120 lock(&sched.lock) 4121 globrunqputbatch(&globq) 4122 unlock(&sched.lock) 4123 startIdle(n) 4124 } 4125 4126 if runqputbatch(pp, &q); !q.empty() { 4127 lock(&sched.lock) 4128 globrunqputbatch(&q) 4129 unlock(&sched.lock) 4130 } 4131 4132 // Some P's might have become idle after we loaded `sched.npidle` 4133 // but before any goroutines were added to the queue, which could 4134 // lead to idle P's when there is work available in the global queue. 4135 // That could potentially last until other goroutines become ready 4136 // to run. That said, we need to find a way to hedge 4137 // 4138 // Calling wakep() here is the best bet, it will do nothing in the 4139 // common case (no racing on `sched.npidle`), while it could wake one 4140 // more P to execute G's, which might end up with >1 P's: the first one 4141 // wakes another P and so forth until there is no more work, but this 4142 // ought to be an extremely rare case. 4143 // 4144 // Also see "Worker thread parking/unparking" comment at the top of the file for details. 4145 wakep() 4146 } 4147 4148 // One round of scheduler: find a runnable goroutine and execute it. 4149 // Never returns. 4150 func schedule() { 4151 mp := getg().m 4152 4153 if mp.locks != 0 { 4154 throw("schedule: holding locks") 4155 } 4156 4157 if mp.lockedg != 0 { 4158 stoplockedm() 4159 execute(mp.lockedg.ptr(), false) // Never returns. 4160 } 4161 4162 // We should not schedule away from a g that is executing a cgo call, 4163 // since the cgo call is using the m's g0 stack. 4164 if mp.incgo { 4165 throw("schedule: in cgo") 4166 } 4167 4168 top: 4169 pp := mp.p.ptr() 4170 pp.preempt = false 4171 4172 // Safety check: if we are spinning, the run queue should be empty. 4173 // Check this before calling checkTimers, as that might call 4174 // goready to put a ready goroutine on the local run queue. 4175 if mp.spinning && (pp.runnext != 0 || pp.runqhead != pp.runqtail) { 4176 throw("schedule: spinning with local work") 4177 } 4178 4179 gp, inheritTime, tryWakeP := findRunnable() // blocks until work is available 4180 4181 // May be on a new P. 4182 pp = mp.p.ptr() 4183 4184 // findRunnable may have collected an allp snapshot. The snapshot is 4185 // only required within findRunnable. Clear it to all GC to collect the 4186 // slice. 4187 mp.clearAllpSnapshot() 4188 4189 // If the P was assigned a next GC mark worker but findRunnable 4190 // selected anything else, release the worker so another P may run it. 4191 // 4192 // N.B. If this occurs because a higher-priority goroutine was selected 4193 // (trace reader), then tryWakeP is set, which will wake another P to 4194 // run the worker. If this occurs because the GC is no longer active, 4195 // there is no need to wakep. 4196 gcController.releaseNextGCMarkWorker(pp) 4197 4198 if debug.dontfreezetheworld > 0 && freezing.Load() { 4199 // See comment in freezetheworld. We don't want to perturb 4200 // scheduler state, so we didn't gcstopm in findRunnable, but 4201 // also don't want to allow new goroutines to run. 4202 // 4203 // Deadlock here rather than in the findRunnable loop so if 4204 // findRunnable is stuck in a loop we don't perturb that 4205 // either. 4206 lock(&deadlock) 4207 lock(&deadlock) 4208 } 4209 4210 // This thread is going to run a goroutine and is not spinning anymore, 4211 // so if it was marked as spinning we need to reset it now and potentially 4212 // start a new spinning M. 4213 if mp.spinning { 4214 resetspinning() 4215 } 4216 4217 if sched.disable.user && !schedEnabled(gp) { 4218 // Scheduling of this goroutine is disabled. Put it on 4219 // the list of pending runnable goroutines for when we 4220 // re-enable user scheduling and look again. 4221 lock(&sched.lock) 4222 if schedEnabled(gp) { 4223 // Something re-enabled scheduling while we 4224 // were acquiring the lock. 4225 unlock(&sched.lock) 4226 } else { 4227 sched.disable.runnable.pushBack(gp) 4228 unlock(&sched.lock) 4229 goto top 4230 } 4231 } 4232 4233 // If about to schedule a not-normal goroutine (a GCworker or tracereader), 4234 // wake a P if there is one. 4235 if tryWakeP { 4236 wakep() 4237 } 4238 if gp.lockedm != 0 { 4239 // Hands off own p to the locked m, 4240 // then blocks waiting for a new p. 4241 startlockedm(gp) 4242 goto top 4243 } 4244 4245 execute(gp, inheritTime) 4246 } 4247 4248 // dropg removes the association between m and the current goroutine m->curg (gp for short). 4249 // Typically a caller sets gp's status away from Grunning and then 4250 // immediately calls dropg to finish the job. The caller is also responsible 4251 // for arranging that gp will be restarted using ready at an 4252 // appropriate time. After calling dropg and arranging for gp to be 4253 // readied later, the caller can do other work but eventually should 4254 // call schedule to restart the scheduling of goroutines on this m. 4255 func dropg() { 4256 gp := getg() 4257 4258 setMNoWB(&gp.m.curg.m, nil) 4259 setGNoWB(&gp.m.curg, nil) 4260 } 4261 4262 func parkunlock_c(gp *g, lock unsafe.Pointer) bool { 4263 unlock((*mutex)(lock)) 4264 return true 4265 } 4266 4267 // park continuation on g0. 4268 func park_m(gp *g) { 4269 mp := getg().m 4270 4271 trace := traceAcquire() 4272 4273 // If g is in a synctest group, we don't want to let the group 4274 // become idle until after the waitunlockf (if any) has confirmed 4275 // that the park is happening. 4276 // We need to record gp.bubble here, since waitunlockf can change it. 4277 bubble := gp.bubble 4278 if bubble != nil { 4279 bubble.incActive() 4280 } 4281 4282 if trace.ok() { 4283 // Trace the event before the transition. It may take a 4284 // stack trace, but we won't own the stack after the 4285 // transition anymore. 4286 trace.GoPark(mp.waitTraceBlockReason, mp.waitTraceSkip) 4287 } 4288 // N.B. Not using casGToWaiting here because the waitreason is 4289 // set by park_m's caller. 4290 casgstatus(gp, _Grunning, _Gwaiting) 4291 if trace.ok() { 4292 traceRelease(trace) 4293 } 4294 4295 dropg() 4296 4297 if fn := mp.waitunlockf; fn != nil { 4298 ok := fn(gp, mp.waitlock) 4299 mp.waitunlockf = nil 4300 mp.waitlock = nil 4301 if !ok { 4302 trace := traceAcquire() 4303 casgstatus(gp, _Gwaiting, _Grunnable) 4304 if bubble != nil { 4305 bubble.decActive() 4306 } 4307 if trace.ok() { 4308 trace.GoUnpark(gp, 2) 4309 traceRelease(trace) 4310 } 4311 execute(gp, true) // Schedule it back, never returns. 4312 } 4313 } 4314 4315 if bubble != nil { 4316 bubble.decActive() 4317 } 4318 4319 schedule() 4320 } 4321 4322 func goschedImpl(gp *g, preempted bool) { 4323 pp := gp.m.p.ptr() 4324 trace := traceAcquire() 4325 status := readgstatus(gp) 4326 if status&^_Gscan != _Grunning { 4327 dumpgstatus(gp) 4328 throw("bad g status") 4329 } 4330 if trace.ok() { 4331 // Trace the event before the transition. It may take a 4332 // stack trace, but we won't own the stack after the 4333 // transition anymore. 4334 if preempted { 4335 trace.GoPreempt() 4336 } else { 4337 trace.GoSched() 4338 } 4339 } 4340 casgstatus(gp, _Grunning, _Grunnable) 4341 if trace.ok() { 4342 traceRelease(trace) 4343 } 4344 4345 dropg() 4346 if preempted && sched.gcwaiting.Load() { 4347 // If preempted for STW, keep the G on the local P in runnext 4348 // so it can keep running immediately after the STW. 4349 runqput(pp, gp, true) 4350 } else { 4351 lock(&sched.lock) 4352 globrunqput(gp) 4353 unlock(&sched.lock) 4354 } 4355 4356 if mainStarted { 4357 wakep() 4358 } 4359 4360 schedule() 4361 } 4362 4363 // Gosched continuation on g0. 4364 func gosched_m(gp *g) { 4365 goschedImpl(gp, false) 4366 } 4367 4368 // goschedguarded is a forbidden-states-avoided version of gosched_m. 4369 func goschedguarded_m(gp *g) { 4370 if !canPreemptM(gp.m) { 4371 gogo(&gp.sched) // never return 4372 } 4373 goschedImpl(gp, false) 4374 } 4375 4376 func gopreempt_m(gp *g) { 4377 goschedImpl(gp, true) 4378 } 4379 4380 // preemptPark parks gp and puts it in _Gpreempted. 4381 // 4382 //go:systemstack 4383 func preemptPark(gp *g) { 4384 status := readgstatus(gp) 4385 if status&^_Gscan != _Grunning { 4386 dumpgstatus(gp) 4387 throw("bad g status") 4388 } 4389 4390 if gp.asyncSafePoint { 4391 // Double-check that async preemption does not 4392 // happen in SPWRITE assembly functions. 4393 // isAsyncSafePoint must exclude this case. 4394 f := findfunc(gp.sched.pc) 4395 if !f.valid() { 4396 throw("preempt at unknown pc") 4397 } 4398 if f.flag&abi.FuncFlagSPWrite != 0 { 4399 println("runtime: unexpected SPWRITE function", funcname(f), "in async preempt") 4400 throw("preempt SPWRITE") 4401 } 4402 } 4403 4404 // Transition from _Grunning to _Gscan|_Gpreempted. We can't 4405 // be in _Grunning when we dropg because then we'd be running 4406 // without an M, but the moment we're in _Gpreempted, 4407 // something could claim this G before we've fully cleaned it 4408 // up. Hence, we set the scan bit to lock down further 4409 // transitions until we can dropg. 4410 casGToPreemptScan(gp, _Grunning, _Gscan|_Gpreempted) 4411 4412 // Be careful about ownership as we trace this next event. 4413 // 4414 // According to the tracer invariants (trace.go) it's unsafe 4415 // for us to emit an event for a goroutine we do not own. 4416 // The moment we CAS into _Gpreempted, suspendG could CAS the 4417 // goroutine to _Gwaiting, effectively taking ownership. All of 4418 // this could happen before we even get the chance to emit 4419 // an event. The end result is that the events could appear 4420 // out of order, and the tracer generally assumes the scheduler 4421 // takes care of the ordering between GoPark and GoUnpark. 4422 // 4423 // The answer here is simple: emit the event while we still hold 4424 // the _Gscan bit on the goroutine, since the _Gscan bit means 4425 // ownership over transitions. 4426 // 4427 // We still need to traceAcquire and traceRelease across the CAS 4428 // because the tracer could be what's calling suspendG in the first 4429 // place. This also upholds the tracer invariant that we must hold 4430 // traceAcquire/traceRelease across the transition. However, we 4431 // specifically *only* emit the event while we still have ownership. 4432 trace := traceAcquire() 4433 if trace.ok() { 4434 trace.GoPark(traceBlockPreempted, 0) 4435 } 4436 4437 // Drop the goroutine from the M. Only do this after the tracer has 4438 // emitted an event, because it needs the association for GoPark to 4439 // work correctly. 4440 dropg() 4441 4442 // Drop the scan bit and release the trace locker if necessary. 4443 casfrom_Gscanstatus(gp, _Gscan|_Gpreempted, _Gpreempted) 4444 if trace.ok() { 4445 traceRelease(trace) 4446 } 4447 4448 // All done. 4449 schedule() 4450 } 4451 4452 // goyield is like Gosched, but it: 4453 // - emits a GoPreempt trace event instead of a GoSched trace event 4454 // - puts the current G on the runq of the current P instead of the globrunq 4455 // 4456 // goyield should be an internal detail, 4457 // but widely used packages access it using linkname. 4458 // Notable members of the hall of shame include: 4459 // - gvisor.dev/gvisor 4460 // - github.com/sagernet/gvisor 4461 // 4462 // Do not remove or change the type signature. 4463 // See go.dev/issue/67401. 4464 // 4465 //go:linkname goyield 4466 func goyield() { 4467 checkTimeouts() 4468 mcall(goyield_m) 4469 } 4470 4471 func goyield_m(gp *g) { 4472 trace := traceAcquire() 4473 pp := gp.m.p.ptr() 4474 if trace.ok() { 4475 // Trace the event before the transition. It may take a 4476 // stack trace, but we won't own the stack after the 4477 // transition anymore. 4478 trace.GoPreempt() 4479 } 4480 casgstatus(gp, _Grunning, _Grunnable) 4481 if trace.ok() { 4482 traceRelease(trace) 4483 } 4484 dropg() 4485 runqput(pp, gp, false) 4486 schedule() 4487 } 4488 4489 // Finishes execution of the current goroutine. 4490 func goexit1() { 4491 if raceenabled { 4492 if gp := getg(); gp.bubble != nil { 4493 racereleasemergeg(gp, gp.bubble.raceaddr()) 4494 } 4495 racegoend() 4496 } 4497 trace := traceAcquire() 4498 if trace.ok() { 4499 trace.GoEnd() 4500 traceRelease(trace) 4501 } 4502 mcall(goexit0) 4503 } 4504 4505 // goexit continuation on g0. 4506 func goexit0(gp *g) { 4507 if goexperiment.RuntimeSecret && gp.secret > 0 { 4508 // Erase the whole stack. This path only occurs when 4509 // runtime.Goexit is called from within a runtime/secret.Do call. 4510 memclrNoHeapPointers(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) 4511 // Since this is running on g0, our registers are already zeroed from going through 4512 // mcall in secret mode. 4513 } 4514 gdestroy(gp) 4515 schedule() 4516 } 4517 4518 func gdestroy(gp *g) { 4519 mp := getg().m 4520 pp := mp.p.ptr() 4521 4522 casgstatus(gp, _Grunning, _Gdead) 4523 gcController.addScannableStack(pp, -int64(gp.stack.hi-gp.stack.lo)) 4524 if isSystemGoroutine(gp, false) { 4525 sched.ngsys.Add(-1) 4526 } 4527 gp.m = nil 4528 locked := gp.lockedm != 0 4529 gp.lockedm = 0 4530 mp.lockedg = 0 4531 gp.preemptStop = false 4532 gp.paniconfault = false 4533 gp._defer = nil // should be true already but just in case. 4534 gp._panic = nil // non-nil for Goexit during panic. points at stack-allocated data. 4535 gp.writebuf = nil 4536 gp.waitreason = waitReasonZero 4537 gp.param = nil 4538 gp.labels = nil 4539 gp.timer = nil 4540 gp.bubble = nil 4541 gp.fipsOnlyBypass = false 4542 gp.secret = 0 4543 4544 if gcBlackenEnabled != 0 && gp.gcAssistBytes > 0 { 4545 // Flush assist credit to the global pool. This gives 4546 // better information to pacing if the application is 4547 // rapidly creating an exiting goroutines. 4548 assistWorkPerByte := gcController.assistWorkPerByte.Load() 4549 scanCredit := int64(assistWorkPerByte * float64(gp.gcAssistBytes)) 4550 gcController.bgScanCredit.Add(scanCredit) 4551 gp.gcAssistBytes = 0 4552 } 4553 4554 dropg() 4555 4556 if GOARCH == "wasm" { // no threads yet on wasm 4557 gfput(pp, gp) 4558 return 4559 } 4560 4561 if locked && mp.lockedInt != 0 { 4562 print("runtime: mp.lockedInt = ", mp.lockedInt, "\n") 4563 if mp.isextra { 4564 throw("runtime.Goexit called in a thread that was not created by the Go runtime") 4565 } 4566 throw("exited a goroutine internally locked to the OS thread") 4567 } 4568 gfput(pp, gp) 4569 if locked { 4570 // The goroutine may have locked this thread because 4571 // it put it in an unusual kernel state. Kill it 4572 // rather than returning it to the thread pool. 4573 4574 // Return to mstart, which will release the P and exit 4575 // the thread. 4576 if GOOS != "plan9" { // See golang.org/issue/22227. 4577 gogo(&mp.g0.sched) 4578 } else { 4579 // Clear lockedExt on plan9 since we may end up re-using 4580 // this thread. 4581 mp.lockedExt = 0 4582 } 4583 } 4584 } 4585 4586 // save updates getg().sched to refer to pc and sp so that a following 4587 // gogo will restore pc and sp. 4588 // 4589 // save must not have write barriers because invoking a write barrier 4590 // can clobber getg().sched. 4591 // 4592 //go:nosplit 4593 //go:nowritebarrierrec 4594 func save(pc, sp, bp uintptr) { 4595 gp := getg() 4596 4597 if gp == gp.m.g0 || gp == gp.m.gsignal { 4598 // m.g0.sched is special and must describe the context 4599 // for exiting the thread. mstart1 writes to it directly. 4600 // m.gsignal.sched should not be used at all. 4601 // This check makes sure save calls do not accidentally 4602 // run in contexts where they'd write to system g's. 4603 throw("save on system g not allowed") 4604 } 4605 4606 gp.sched.pc = pc 4607 gp.sched.sp = sp 4608 gp.sched.lr = 0 4609 gp.sched.bp = bp 4610 // We need to ensure ctxt is zero, but can't have a write 4611 // barrier here. However, it should always already be zero. 4612 // Assert that. 4613 if gp.sched.ctxt != nil { 4614 badctxt() 4615 } 4616 } 4617 4618 // The goroutine g is about to enter a system call. 4619 // Record that it's not using the cpu anymore. 4620 // This is called only from the go syscall library and cgocall, 4621 // not from the low-level system calls used by the runtime. 4622 // 4623 // Entersyscall cannot split the stack: the save must 4624 // make g->sched refer to the caller's stack segment, because 4625 // entersyscall is going to return immediately after. 4626 // 4627 // Nothing entersyscall calls can split the stack either. 4628 // We cannot safely move the stack during an active call to syscall, 4629 // because we do not know which of the uintptr arguments are 4630 // really pointers (back into the stack). 4631 // In practice, this means that we make the fast path run through 4632 // entersyscall doing no-split things, and the slow path has to use systemstack 4633 // to run bigger things on the system stack. 4634 // 4635 // reentersyscall is the entry point used by cgo callbacks, where explicitly 4636 // saved SP and PC are restored. This is needed when exitsyscall will be called 4637 // from a function further up in the call stack than the parent, as g->syscallsp 4638 // must always point to a valid stack frame. entersyscall below is the normal 4639 // entry point for syscalls, which obtains the SP and PC from the caller. 4640 // 4641 //go:nosplit 4642 func reentersyscall(pc, sp, bp uintptr) { 4643 gp := getg() 4644 4645 // Disable preemption because during this function g is in Gsyscall status, 4646 // but can have inconsistent g->sched, do not let GC observe it. 4647 gp.m.locks++ 4648 4649 // This M may have a signal stack that is dirtied with secret information 4650 // (see package "runtime/secret"). Since it's about to go into a syscall for 4651 // an arbitrary amount of time and the G that put the secret info there 4652 // might have returned from secret.Do, we have to zero it out now, lest we 4653 // break the guarantee that secrets are purged by the next GC after a return 4654 // to secret.Do. 4655 // 4656 // It might be tempting to think that we only need to zero out this if we're 4657 // not running in secret mode anymore, but that leaves an ABA problem. The G 4658 // that put the secrets onto our signal stack may not be the one that is 4659 // currently executing. 4660 // 4661 // Logically, we should erase this when we lose our P, not when we enter the 4662 // syscall. This would avoid a zeroing in the case where the call returns 4663 // almost immediately. Since we use this path for cgo calls as well, these 4664 // fast "syscalls" are quite common. However, since we only erase the signal 4665 // stack if we were delivered a signal in secret mode and considering the 4666 // cross-thread synchronization cost for the P, it hardly seems worth it. 4667 // 4668 // TODO(dmo): can we encode the goid into mp.signalSecret and avoid the ABA problem? 4669 if goexperiment.RuntimeSecret { 4670 eraseSecretsSignalStk() 4671 } 4672 4673 // Entersyscall must not call any function that might split/grow the stack. 4674 // (See details in comment above.) 4675 // Catch calls that might, by replacing the stack guard with something that 4676 // will trip any stack check and leaving a flag to tell newstack to die. 4677 gp.stackguard0 = stackPreempt 4678 gp.throwsplit = true 4679 4680 // Copy the syscalltick over so we can identify if the P got stolen later. 4681 gp.m.syscalltick = gp.m.p.ptr().syscalltick 4682 4683 pp := gp.m.p.ptr() 4684 if pp.runSafePointFn != 0 { 4685 // runSafePointFn may stack split if run on this stack 4686 systemstack(runSafePointFn) 4687 } 4688 gp.m.oldp.set(pp) 4689 4690 // Leave SP around for GC and traceback. 4691 save(pc, sp, bp) 4692 gp.syscallsp = sp 4693 gp.syscallpc = pc 4694 gp.syscallbp = bp 4695 4696 // Double-check sp and bp. 4697 if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp { 4698 systemstack(func() { 4699 print("entersyscall inconsistent sp ", hex(gp.syscallsp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4700 throw("entersyscall") 4701 }) 4702 } 4703 if gp.syscallbp != 0 && gp.syscallbp < gp.stack.lo || gp.stack.hi < gp.syscallbp { 4704 systemstack(func() { 4705 print("entersyscall inconsistent bp ", hex(gp.syscallbp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4706 throw("entersyscall") 4707 }) 4708 } 4709 trace := traceAcquire() 4710 if trace.ok() { 4711 // Emit a trace event. Notably, actually emitting the event must happen before 4712 // the casgstatus because it mutates the P, but the traceLocker must be held 4713 // across the casgstatus since we're transitioning out of _Grunning 4714 // (see trace.go invariants). 4715 systemstack(func() { 4716 trace.GoSysCall() 4717 }) 4718 // systemstack clobbered gp.sched, so restore it. 4719 save(pc, sp, bp) 4720 } 4721 if sched.gcwaiting.Load() { 4722 // Optimization: If there's a pending STW, do the equivalent of 4723 // entersyscallblock here at the last minute and immediately give 4724 // away our P. 4725 systemstack(func() { 4726 entersyscallHandleGCWait(trace) 4727 }) 4728 // systemstack clobbered gp.sched, so restore it. 4729 save(pc, sp, bp) 4730 } 4731 // As soon as we switch to _Gsyscall, we are in danger of losing our P. 4732 // We must not touch it after this point. 4733 // 4734 // Try to do a quick CAS to avoid calling into casgstatus in the common case. 4735 // If we have a bubble, we need to fall into casgstatus. 4736 if gp.bubble != nil || !gp.atomicstatus.CompareAndSwap(_Grunning, _Gsyscall) { 4737 casgstatus(gp, _Grunning, _Gsyscall) 4738 } 4739 if staticLockRanking { 4740 // casgstatus clobbers gp.sched via systemstack under staticLockRanking. Restore it. 4741 save(pc, sp, bp) 4742 } 4743 if trace.ok() { 4744 // N.B. We don't need to go on the systemstack because traceRelease is very 4745 // carefully recursively nosplit. This also means we don't need to worry 4746 // about clobbering gp.sched. 4747 traceRelease(trace) 4748 } 4749 if sched.sysmonwait.Load() { 4750 systemstack(entersyscallWakeSysmon) 4751 // systemstack clobbered gp.sched, so restore it. 4752 save(pc, sp, bp) 4753 } 4754 gp.m.locks-- 4755 } 4756 4757 // debugExtendGrunningNoP is a debug mode that extends the windows in which 4758 // we're _Grunning without a P in order to try to shake out bugs with code 4759 // assuming this state is impossible. 4760 const debugExtendGrunningNoP = false 4761 4762 // Standard syscall entry used by the go syscall library and normal cgo calls. 4763 // 4764 // This is exported via linkname to assembly in the syscall package and x/sys. 4765 // 4766 // Other packages should not be accessing entersyscall directly, 4767 // but widely used packages access it using linkname. 4768 // Notable members of the hall of shame include: 4769 // - gvisor.dev/gvisor 4770 // 4771 // Do not remove or change the type signature. 4772 // See go.dev/issue/67401. 4773 // 4774 //go:nosplit 4775 //go:linkname entersyscall 4776 func entersyscall() { 4777 // N.B. getcallerfp cannot be written directly as argument in the call 4778 // to reentersyscall because it forces spilling the other arguments to 4779 // the stack. This results in exceeding the nosplit stack requirements 4780 // on some platforms. 4781 fp := getcallerfp() 4782 reentersyscall(sys.GetCallerPC(), sys.GetCallerSP(), fp) 4783 } 4784 4785 func entersyscallWakeSysmon() { 4786 lock(&sched.lock) 4787 if sched.sysmonwait.Load() { 4788 sched.sysmonwait.Store(false) 4789 notewakeup(&sched.sysmonnote) 4790 } 4791 unlock(&sched.lock) 4792 } 4793 4794 func entersyscallHandleGCWait(trace traceLocker) { 4795 gp := getg() 4796 4797 lock(&sched.lock) 4798 if sched.stopwait > 0 { 4799 // Set our P to _Pgcstop so the STW can take it. 4800 pp := gp.m.p.ptr() 4801 pp.m = 0 4802 gp.m.p = 0 4803 atomic.Store(&pp.status, _Pgcstop) 4804 4805 if trace.ok() { 4806 trace.ProcStop(pp) 4807 } 4808 addGSyscallNoP(gp.m) // We gave up our P voluntarily. 4809 pp.gcStopTime = nanotime() 4810 pp.syscalltick++ 4811 if sched.stopwait--; sched.stopwait == 0 { 4812 notewakeup(&sched.stopnote) 4813 } 4814 } 4815 unlock(&sched.lock) 4816 } 4817 4818 // The same as entersyscall(), but with a hint that the syscall is blocking. 4819 4820 // entersyscallblock should be an internal detail, 4821 // but widely used packages access it using linkname. 4822 // Notable members of the hall of shame include: 4823 // - gvisor.dev/gvisor 4824 // 4825 // Do not remove or change the type signature. 4826 // See go.dev/issue/67401. 4827 // 4828 //go:linkname entersyscallblock 4829 //go:nosplit 4830 func entersyscallblock() { 4831 gp := getg() 4832 4833 gp.m.locks++ // see comment in entersyscall 4834 gp.throwsplit = true 4835 gp.stackguard0 = stackPreempt // see comment in entersyscall 4836 gp.m.syscalltick = gp.m.p.ptr().syscalltick 4837 gp.m.p.ptr().syscalltick++ 4838 4839 addGSyscallNoP(gp.m) // We're going to give up our P. 4840 4841 // Leave SP around for GC and traceback. 4842 pc := sys.GetCallerPC() 4843 sp := sys.GetCallerSP() 4844 bp := getcallerfp() 4845 save(pc, sp, bp) 4846 gp.syscallsp = gp.sched.sp 4847 gp.syscallpc = gp.sched.pc 4848 gp.syscallbp = gp.sched.bp 4849 if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp { 4850 sp1 := sp 4851 sp2 := gp.sched.sp 4852 sp3 := gp.syscallsp 4853 systemstack(func() { 4854 print("entersyscallblock inconsistent sp ", hex(sp1), " ", hex(sp2), " ", hex(sp3), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4855 throw("entersyscallblock") 4856 }) 4857 } 4858 4859 // Once we switch to _Gsyscall, we can't safely touch 4860 // our P anymore, so we need to hand it off beforehand. 4861 // The tracer also needs to see the syscall before the P 4862 // handoff, so the order here must be (1) trace, 4863 // (2) handoff, (3) _Gsyscall switch. 4864 trace := traceAcquire() 4865 systemstack(func() { 4866 if trace.ok() { 4867 trace.GoSysCall() 4868 } 4869 handoffp(releasep()) 4870 }) 4871 // <-- 4872 // Caution: we're in a small window where we are in _Grunning without a P. 4873 // --> 4874 if debugExtendGrunningNoP { 4875 usleep(10) 4876 } 4877 casgstatus(gp, _Grunning, _Gsyscall) 4878 if gp.syscallsp < gp.stack.lo || gp.stack.hi < gp.syscallsp { 4879 systemstack(func() { 4880 print("entersyscallblock inconsistent sp ", hex(sp), " ", hex(gp.sched.sp), " ", hex(gp.syscallsp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4881 throw("entersyscallblock") 4882 }) 4883 } 4884 if gp.syscallbp != 0 && gp.syscallbp < gp.stack.lo || gp.stack.hi < gp.syscallbp { 4885 systemstack(func() { 4886 print("entersyscallblock inconsistent bp ", hex(bp), " ", hex(gp.sched.bp), " ", hex(gp.syscallbp), " [", hex(gp.stack.lo), ",", hex(gp.stack.hi), "]\n") 4887 throw("entersyscallblock") 4888 }) 4889 } 4890 if trace.ok() { 4891 systemstack(func() { 4892 traceRelease(trace) 4893 }) 4894 } 4895 4896 // Resave for traceback during blocked call. 4897 save(sys.GetCallerPC(), sys.GetCallerSP(), getcallerfp()) 4898 4899 gp.m.locks-- 4900 } 4901 4902 // The goroutine g exited its system call. 4903 // Arrange for it to run on a cpu again. 4904 // This is called only from the go syscall library, not 4905 // from the low-level system calls used by the runtime. 4906 // 4907 // Write barriers are not allowed because our P may have been stolen. 4908 // 4909 // This is exported via linkname to assembly in the syscall package. 4910 // 4911 // exitsyscall should be an internal detail, 4912 // but widely used packages access it using linkname. 4913 // Notable members of the hall of shame include: 4914 // - gvisor.dev/gvisor 4915 // 4916 // Do not remove or change the type signature. 4917 // See go.dev/issue/67401. 4918 // 4919 //go:nosplit 4920 //go:nowritebarrierrec 4921 //go:linkname exitsyscall 4922 func exitsyscall() { 4923 gp := getg() 4924 4925 gp.m.locks++ // see comment in entersyscall 4926 if sys.GetCallerSP() > gp.syscallsp { 4927 throw("exitsyscall: syscall frame is no longer valid") 4928 } 4929 gp.waitsince = 0 4930 4931 if sched.stopwait == freezeStopWait { 4932 // Wedge ourselves if there's an outstanding freezetheworld. 4933 // If we transition to running, we might end up with our traceback 4934 // being taken twice. 4935 systemstack(func() { 4936 lock(&deadlock) 4937 lock(&deadlock) 4938 }) 4939 } 4940 4941 // Optimistically assume we're going to keep running, and switch to running. 4942 // Before this point, our P wiring is not ours. Once we get past this point, 4943 // we can access our P if we have it, otherwise we lost it. 4944 // 4945 // N.B. Because we're transitioning to _Grunning here, traceAcquire doesn't 4946 // need to be held ahead of time. We're effectively atomic with respect to 4947 // the tracer because we're non-preemptible and in the runtime. It can't stop 4948 // us to read a bad status. 4949 // 4950 // Try to do a quick CAS to avoid calling into casgstatus in the common case. 4951 // If we have a bubble, we need to fall into casgstatus. 4952 if gp.bubble != nil || !gp.atomicstatus.CompareAndSwap(_Gsyscall, _Grunning) { 4953 casgstatus(gp, _Gsyscall, _Grunning) 4954 } 4955 4956 // Caution: we're in a window where we may be in _Grunning without a P. 4957 // Either we will grab a P or call exitsyscall0, where we'll switch to 4958 // _Grunnable. 4959 if debugExtendGrunningNoP { 4960 usleep(10) 4961 } 4962 4963 // Grab and clear our old P. 4964 oldp := gp.m.oldp.ptr() 4965 gp.m.oldp.set(nil) 4966 4967 // Check if we still have a P, and if not, try to acquire an idle P. 4968 pp := gp.m.p.ptr() 4969 if pp != nil { 4970 // Fast path: we still have our P. Just emit a syscall exit event. 4971 if trace := traceAcquire(); trace.ok() { 4972 systemstack(func() { 4973 // The truth is we truly never lost the P, but syscalltick 4974 // is used to indicate whether the P should be treated as 4975 // lost anyway. For example, when syscalltick is trashed by 4976 // dropm. 4977 // 4978 // TODO(mknyszek): Consider a more explicit mechanism for this. 4979 // Then syscalltick doesn't need to be trashed, and can be used 4980 // exclusively by sysmon for deciding when it's time to retake. 4981 if pp.syscalltick == gp.m.syscalltick { 4982 trace.GoSysExit(false) 4983 } else { 4984 // Since we need to pretend we lost the P, but nobody ever 4985 // took it, we need a ProcSteal event to model the loss. 4986 // Then, continue with everything else we'd do if we lost 4987 // the P. 4988 trace.ProcSteal(pp) 4989 trace.ProcStart() 4990 trace.GoSysExit(true) 4991 trace.GoStart() 4992 } 4993 traceRelease(trace) 4994 }) 4995 } 4996 } else { 4997 // Slow path: we lost our P. Try to get another one. 4998 systemstack(func() { 4999 // Try to get some other P. 5000 if pp := exitsyscallTryGetP(oldp); pp != nil { 5001 // Install the P. 5002 acquirepNoTrace(pp) 5003 5004 // We're going to start running again, so emit all the relevant events. 5005 if trace := traceAcquire(); trace.ok() { 5006 trace.ProcStart() 5007 trace.GoSysExit(true) 5008 trace.GoStart() 5009 traceRelease(trace) 5010 } 5011 } 5012 }) 5013 pp = gp.m.p.ptr() 5014 } 5015 5016 // If we have a P, clean up and exit. 5017 if pp != nil { 5018 if goroutineProfile.active { 5019 // Make sure that gp has had its stack written out to the goroutine 5020 // profile, exactly as it was when the goroutine profiler first 5021 // stopped the world. 5022 systemstack(func() { 5023 tryRecordGoroutineProfileWB(gp) 5024 }) 5025 } 5026 5027 // Increment the syscalltick for P, since we're exiting a syscall. 5028 pp.syscalltick++ 5029 5030 // Garbage collector isn't running (since we are), 5031 // so okay to clear syscallsp. 5032 gp.syscallsp = 0 5033 gp.m.locks-- 5034 if gp.preempt { 5035 // Restore the preemption request in case we cleared it in newstack. 5036 gp.stackguard0 = stackPreempt 5037 } else { 5038 // Otherwise restore the real stackGuard, we clobbered it in entersyscall/entersyscallblock. 5039 gp.stackguard0 = gp.stack.lo + stackGuard 5040 } 5041 gp.throwsplit = false 5042 5043 if sched.disable.user && !schedEnabled(gp) { 5044 // Scheduling of this goroutine is disabled. 5045 Gosched() 5046 } 5047 return 5048 } 5049 // Slowest path: We couldn't get a P, so call into the scheduler. 5050 gp.m.locks-- 5051 5052 // Call the scheduler. 5053 mcall(exitsyscallNoP) 5054 5055 // Scheduler returned, so we're allowed to run now. 5056 // Delete the syscallsp information that we left for 5057 // the garbage collector during the system call. 5058 // Must wait until now because until gosched returns 5059 // we don't know for sure that the garbage collector 5060 // is not running. 5061 gp.syscallsp = 0 5062 gp.m.p.ptr().syscalltick++ 5063 gp.throwsplit = false 5064 } 5065 5066 // exitsyscall's attempt to try to get any P, if it's missing one. 5067 // Returns true on success. 5068 // 5069 // Must execute on the systemstack because exitsyscall is nosplit. 5070 // 5071 //go:systemstack 5072 func exitsyscallTryGetP(oldp *p) *p { 5073 // Try to steal our old P back. 5074 if oldp != nil { 5075 if thread, ok := setBlockOnExitSyscall(oldp); ok { 5076 thread.takeP() 5077 decGSyscallNoP(getg().m) // We got a P for ourselves. 5078 thread.resume() 5079 return oldp 5080 } 5081 } 5082 5083 // Try to get an idle P. 5084 if sched.pidle != 0 { 5085 lock(&sched.lock) 5086 pp, _ := pidleget(0) 5087 if pp != nil && sched.sysmonwait.Load() { 5088 sched.sysmonwait.Store(false) 5089 notewakeup(&sched.sysmonnote) 5090 } 5091 unlock(&sched.lock) 5092 if pp != nil { 5093 decGSyscallNoP(getg().m) // We got a P for ourselves. 5094 return pp 5095 } 5096 } 5097 return nil 5098 } 5099 5100 // exitsyscall slow path on g0. 5101 // Failed to acquire P, enqueue gp as runnable. 5102 // 5103 // Called via mcall, so gp is the calling g from this M. 5104 // 5105 //go:nowritebarrierrec 5106 func exitsyscallNoP(gp *g) { 5107 traceExitingSyscall() 5108 trace := traceAcquire() 5109 casgstatus(gp, _Grunning, _Grunnable) 5110 traceExitedSyscall() 5111 if trace.ok() { 5112 // Write out syscall exit eagerly. 5113 // 5114 // It's important that we write this *after* we know whether we 5115 // lost our P or not (determined by exitsyscallfast). 5116 trace.GoSysExit(true) 5117 traceRelease(trace) 5118 } 5119 decGSyscallNoP(getg().m) 5120 dropg() 5121 lock(&sched.lock) 5122 var pp *p 5123 if schedEnabled(gp) { 5124 pp, _ = pidleget(0) 5125 } 5126 var locked bool 5127 if pp == nil { 5128 globrunqput(gp) 5129 5130 // Below, we stoplockedm if gp is locked. globrunqput releases 5131 // ownership of gp, so we must check if gp is locked prior to 5132 // committing the release by unlocking sched.lock, otherwise we 5133 // could race with another M transitioning gp from unlocked to 5134 // locked. 5135 locked = gp.lockedm != 0 5136 } else if sched.sysmonwait.Load() { 5137 sched.sysmonwait.Store(false) 5138 notewakeup(&sched.sysmonnote) 5139 } 5140 unlock(&sched.lock) 5141 if pp != nil { 5142 acquirep(pp) 5143 execute(gp, false) // Never returns. 5144 } 5145 if locked { 5146 // Wait until another thread schedules gp and so m again. 5147 // 5148 // N.B. lockedm must be this M, as this g was running on this M 5149 // before entersyscall. 5150 stoplockedm() 5151 execute(gp, false) // Never returns. 5152 } 5153 stopm() 5154 schedule() // Never returns. 5155 } 5156 5157 // addGSyscallNoP must be called when a goroutine in a syscall loses its P. 5158 // This function updates all relevant accounting. 5159 // 5160 // nosplit because it's called on the syscall paths. 5161 // 5162 //go:nosplit 5163 func addGSyscallNoP(mp *m) { 5164 // It's safe to read isExtraInC here because it's only mutated 5165 // outside of _Gsyscall, and we know this thread is attached 5166 // to a goroutine in _Gsyscall and blocked from exiting. 5167 if !mp.isExtraInC { 5168 // Increment nGsyscallNoP since we're taking away a P 5169 // from a _Gsyscall goroutine, but only if isExtraInC 5170 // is not set on the M. If it is, then this thread is 5171 // back to being a full C thread, and will just inflate 5172 // the count of not-in-go goroutines. See go.dev/issue/76435. 5173 sched.nGsyscallNoP.Add(1) 5174 } 5175 } 5176 5177 // decGSsyscallNoP must be called whenever a goroutine in a syscall without 5178 // a P exits the system call. This function updates all relevant accounting. 5179 // 5180 // nosplit because it's called from dropm. 5181 // 5182 //go:nosplit 5183 func decGSyscallNoP(mp *m) { 5184 // Update nGsyscallNoP, but only if this is not a thread coming 5185 // out of C. See the comment in addGSyscallNoP. This logic must match, 5186 // to avoid unmatched increments and decrements. 5187 if !mp.isExtraInC { 5188 sched.nGsyscallNoP.Add(-1) 5189 } 5190 } 5191 5192 // Called from syscall package before fork. 5193 // 5194 // syscall_runtime_BeforeFork is for package syscall, 5195 // but widely used packages access it using linkname. 5196 // Notable members of the hall of shame include: 5197 // - gvisor.dev/gvisor 5198 // 5199 // Do not remove or change the type signature. 5200 // See go.dev/issue/67401. 5201 // 5202 //go:linkname syscall_runtime_BeforeFork syscall.runtime_BeforeFork 5203 //go:nosplit 5204 func syscall_runtime_BeforeFork() { 5205 gp := getg().m.curg 5206 5207 // Block signals during a fork, so that the child does not run 5208 // a signal handler before exec if a signal is sent to the process 5209 // group. See issue #18600. 5210 gp.m.locks++ 5211 sigsave(&gp.m.sigmask) 5212 sigblock(false) 5213 5214 // This function is called before fork in syscall package. 5215 // Code between fork and exec must not allocate memory nor even try to grow stack. 5216 // Here we spoil g.stackguard0 to reliably detect any attempts to grow stack. 5217 // runtime_AfterFork will undo this in parent process, but not in child. 5218 gp.stackguard0 = stackFork 5219 } 5220 5221 // Called from syscall package after fork in parent. 5222 // 5223 // syscall_runtime_AfterFork is for package syscall, 5224 // but widely used packages access it using linkname. 5225 // Notable members of the hall of shame include: 5226 // - gvisor.dev/gvisor 5227 // 5228 // Do not remove or change the type signature. 5229 // See go.dev/issue/67401. 5230 // 5231 //go:linkname syscall_runtime_AfterFork syscall.runtime_AfterFork 5232 //go:nosplit 5233 func syscall_runtime_AfterFork() { 5234 gp := getg().m.curg 5235 5236 // See the comments in beforefork. 5237 gp.stackguard0 = gp.stack.lo + stackGuard 5238 5239 msigrestore(gp.m.sigmask) 5240 5241 gp.m.locks-- 5242 } 5243 5244 // inForkedChild is true while manipulating signals in the child process. 5245 // This is used to avoid calling libc functions in case we are using vfork. 5246 var inForkedChild bool 5247 5248 // Called from syscall package after fork in child. 5249 // It resets non-sigignored signals to the default handler, and 5250 // restores the signal mask in preparation for the exec. 5251 // 5252 // Because this might be called during a vfork, and therefore may be 5253 // temporarily sharing address space with the parent process, this must 5254 // not change any global variables or calling into C code that may do so. 5255 // 5256 // syscall_runtime_AfterForkInChild is for package syscall, 5257 // but widely used packages access it using linkname. 5258 // Notable members of the hall of shame include: 5259 // - gvisor.dev/gvisor 5260 // 5261 // Do not remove or change the type signature. 5262 // See go.dev/issue/67401. 5263 // 5264 //go:linkname syscall_runtime_AfterForkInChild syscall.runtime_AfterForkInChild 5265 //go:nosplit 5266 //go:nowritebarrierrec 5267 func syscall_runtime_AfterForkInChild() { 5268 // It's OK to change the global variable inForkedChild here 5269 // because we are going to change it back. There is no race here, 5270 // because if we are sharing address space with the parent process, 5271 // then the parent process can not be running concurrently. 5272 inForkedChild = true 5273 5274 clearSignalHandlers() 5275 5276 // When we are the child we are the only thread running, 5277 // so we know that nothing else has changed gp.m.sigmask. 5278 msigrestore(getg().m.sigmask) 5279 5280 inForkedChild = false 5281 } 5282 5283 // pendingPreemptSignals is the number of preemption signals 5284 // that have been sent but not received. This is only used on Darwin. 5285 // For #41702. 5286 var pendingPreemptSignals atomic.Int32 5287 5288 // Called from syscall package before Exec. 5289 // 5290 //go:linkname syscall_runtime_BeforeExec syscall.runtime_BeforeExec 5291 func syscall_runtime_BeforeExec() { 5292 // Prevent thread creation during exec. 5293 execLock.lock() 5294 5295 // On Darwin, wait for all pending preemption signals to 5296 // be received. See issue #41702. 5297 if GOOS == "darwin" || GOOS == "ios" { 5298 for pendingPreemptSignals.Load() > 0 { 5299 osyield() 5300 } 5301 } 5302 } 5303 5304 // Called from syscall package after Exec. 5305 // 5306 //go:linkname syscall_runtime_AfterExec syscall.runtime_AfterExec 5307 func syscall_runtime_AfterExec() { 5308 execLock.unlock() 5309 } 5310 5311 // Allocate a new g, with a stack big enough for stacksize bytes. 5312 func malg(stacksize int32) *g { 5313 newg := new(g) 5314 if stacksize >= 0 { 5315 stacksize = round2(stackSystem + stacksize) 5316 systemstack(func() { 5317 newg.stack = stackalloc(uint32(stacksize)) 5318 if valgrindenabled { 5319 newg.valgrindStackID = valgrindRegisterStack(unsafe.Pointer(newg.stack.lo), unsafe.Pointer(newg.stack.hi)) 5320 } 5321 }) 5322 newg.stackguard0 = newg.stack.lo + stackGuard 5323 newg.stackguard1 = ^uintptr(0) 5324 // Clear the bottom word of the stack. We record g 5325 // there on gsignal stack during VDSO on ARM and ARM64. 5326 *(*uintptr)(unsafe.Pointer(newg.stack.lo)) = 0 5327 } 5328 return newg 5329 } 5330 5331 // Create a new g running fn. 5332 // Put it on the queue of g's waiting to run. 5333 // The compiler turns a go statement into a call to this. 5334 func newproc(fn *funcval) { 5335 gp := getg() 5336 pc := sys.GetCallerPC() 5337 systemstack(func() { 5338 newg := newproc1(fn, gp, pc, false, waitReasonZero) 5339 5340 pp := getg().m.p.ptr() 5341 runqput(pp, newg, true) 5342 5343 if mainStarted { 5344 wakep() 5345 } 5346 }) 5347 } 5348 5349 // Create a new g in state _Grunnable (or _Gwaiting if parked is true), starting at fn. 5350 // callerpc is the address of the go statement that created this. The caller is responsible 5351 // for adding the new g to the scheduler. If parked is true, waitreason must be non-zero. 5352 func newproc1(fn *funcval, callergp *g, callerpc uintptr, parked bool, waitreason waitReason) *g { 5353 if fn == nil { 5354 fatal("go of nil func value") 5355 } 5356 5357 mp := acquirem() // disable preemption because we hold M and P in local vars. 5358 pp := mp.p.ptr() 5359 newg := gfget(pp) 5360 if newg == nil { 5361 newg = malg(stackMin) 5362 casgstatus(newg, _Gidle, _Gdead) 5363 allgadd(newg) // publishes with a g->status of Gdead so GC scanner doesn't look at uninitialized stack. 5364 } 5365 if newg.stack.hi == 0 { 5366 throw("newproc1: newg missing stack") 5367 } 5368 5369 if readgstatus(newg) != _Gdead { 5370 throw("newproc1: new g is not Gdead") 5371 } 5372 5373 totalSize := uintptr(4*goarch.PtrSize + sys.MinFrameSize) // extra space in case of reads slightly beyond frame 5374 totalSize = alignUp(totalSize, sys.StackAlign) 5375 sp := newg.stack.hi - totalSize 5376 if usesLR { 5377 // caller's LR 5378 *(*uintptr)(unsafe.Pointer(sp)) = 0 5379 prepGoExitFrame(sp) 5380 } 5381 if GOARCH == "arm64" { 5382 // caller's FP 5383 *(*uintptr)(unsafe.Pointer(sp - goarch.PtrSize)) = 0 5384 } 5385 5386 memclrNoHeapPointers(unsafe.Pointer(&newg.sched), unsafe.Sizeof(newg.sched)) 5387 newg.sched.sp = sp 5388 newg.stktopsp = sp 5389 newg.sched.pc = abi.FuncPCABI0(goexit) + sys.PCQuantum // +PCQuantum so that previous instruction is in same function 5390 newg.sched.g = guintptr(unsafe.Pointer(newg)) 5391 gostartcallfn(&newg.sched, fn) 5392 newg.parentGoid = callergp.goid 5393 newg.gopc = callerpc 5394 newg.ancestors = saveAncestors(callergp) 5395 newg.startpc = fn.fn 5396 newg.runningCleanups.Store(false) 5397 if isSystemGoroutine(newg, false) { 5398 sched.ngsys.Add(1) 5399 } else { 5400 // Only user goroutines inherit synctest groups and pprof labels. 5401 newg.bubble = callergp.bubble 5402 if mp.curg != nil { 5403 newg.labels = mp.curg.labels 5404 } 5405 if goroutineProfile.active { 5406 // A concurrent goroutine profile is running. It should include 5407 // exactly the set of goroutines that were alive when the goroutine 5408 // profiler first stopped the world. That does not include newg, so 5409 // mark it as not needing a profile before transitioning it from 5410 // _Gdead. 5411 newg.goroutineProfiled.Store(goroutineProfileSatisfied) 5412 } 5413 } 5414 // Track initial transition? 5415 newg.trackingSeq = uint8(cheaprand()) 5416 if newg.trackingSeq%gTrackingPeriod == 0 { 5417 newg.tracking = true 5418 } 5419 gcController.addScannableStack(pp, int64(newg.stack.hi-newg.stack.lo)) 5420 5421 // Get a goid and switch to runnable. This needs to happen under traceAcquire 5422 // since it's a goroutine transition. See tracer invariants in trace.go. 5423 trace := traceAcquire() 5424 var status uint32 = _Grunnable 5425 if parked { 5426 status = _Gwaiting 5427 newg.waitreason = waitreason 5428 } 5429 if pp.goidcache == pp.goidcacheend { 5430 // Sched.goidgen is the last allocated id, 5431 // this batch must be [sched.goidgen+1, sched.goidgen+GoidCacheBatch]. 5432 // At startup sched.goidgen=0, so main goroutine receives goid=1. 5433 pp.goidcache = sched.goidgen.Add(_GoidCacheBatch) 5434 pp.goidcache -= _GoidCacheBatch - 1 5435 pp.goidcacheend = pp.goidcache + _GoidCacheBatch 5436 } 5437 newg.goid = pp.goidcache 5438 casgstatus(newg, _Gdead, status) 5439 pp.goidcache++ 5440 newg.trace.reset() 5441 if trace.ok() { 5442 trace.GoCreate(newg, newg.startpc, parked) 5443 traceRelease(trace) 5444 } 5445 5446 // fips140 bubble 5447 newg.fipsOnlyBypass = callergp.fipsOnlyBypass 5448 5449 // dit bubble 5450 newg.ditWanted = callergp.ditWanted 5451 5452 if goexperiment.RuntimeSecret && callergp.secret > 0 { 5453 // while it might seem weird to have a non-zero gp.secret value 5454 // with no calls to secret.Do on the stack, this case is handled 5455 // just fine by the cleanup logic in goexit0 5456 // TODO: secret mode is invisible to the user if they don't ask about it via secret.Enabled 5457 // and can have severe performance penalties (at time of writing, wrapping the entire 5458 // tls handshake resulted in a 30% slowdown of the benchmarks). 5459 // Whether a goroutine is running in secret mode should be more visible, 5460 // maybe with a stack frame or some sort of bubble inspecting mechanism 5461 newg.secret = 1 5462 } 5463 5464 // Set up race context. 5465 if raceenabled { 5466 newg.racectx = racegostart(callerpc) 5467 newg.raceignore = 0 5468 if newg.labels != nil { 5469 // See note in proflabel.go on labelSync's role in synchronizing 5470 // with the reads in the signal handler. 5471 racereleasemergeg(newg, unsafe.Pointer(&labelSync)) 5472 } 5473 } 5474 pp.goroutinesCreated++ 5475 releasem(mp) 5476 5477 return newg 5478 } 5479 5480 // saveAncestors copies previous ancestors of the given caller g and 5481 // includes info for the current caller into a new set of tracebacks for 5482 // a g being created. 5483 func saveAncestors(callergp *g) *[]ancestorInfo { 5484 // Copy all prior info, except for the root goroutine (goid 0). 5485 if debug.tracebackancestors <= 0 || callergp.goid == 0 { 5486 return nil 5487 } 5488 var callerAncestors []ancestorInfo 5489 if callergp.ancestors != nil { 5490 callerAncestors = *callergp.ancestors 5491 } 5492 n := int32(len(callerAncestors)) + 1 5493 if n > debug.tracebackancestors { 5494 n = debug.tracebackancestors 5495 } 5496 ancestors := make([]ancestorInfo, n) 5497 copy(ancestors[1:], callerAncestors) 5498 5499 var pcs [tracebackInnerFrames]uintptr 5500 npcs := gcallers(callergp, 0, pcs[:]) 5501 ipcs := make([]uintptr, npcs) 5502 copy(ipcs, pcs[:]) 5503 ancestors[0] = ancestorInfo{ 5504 pcs: ipcs, 5505 goid: callergp.goid, 5506 gopc: callergp.gopc, 5507 } 5508 5509 ancestorsp := new([]ancestorInfo) 5510 *ancestorsp = ancestors 5511 return ancestorsp 5512 } 5513 5514 // Put on gfree list. 5515 // If local list is too long, transfer a batch to the global list. 5516 func gfput(pp *p, gp *g) { 5517 if readgstatus(gp) != _Gdead { 5518 throw("gfput: bad status (not Gdead)") 5519 } 5520 5521 stksize := gp.stack.hi - gp.stack.lo 5522 5523 if stksize != uintptr(startingStackSize) { 5524 // non-standard stack size - free it. 5525 stackfree(gp.stack) 5526 gp.stack.lo = 0 5527 gp.stack.hi = 0 5528 gp.stackguard0 = 0 5529 if valgrindenabled { 5530 valgrindDeregisterStack(gp.valgrindStackID) 5531 gp.valgrindStackID = 0 5532 } 5533 } 5534 5535 pp.gFree.push(gp) 5536 if pp.gFree.size >= 64 { 5537 var ( 5538 stackQ gQueue 5539 noStackQ gQueue 5540 ) 5541 for pp.gFree.size >= 32 { 5542 gp := pp.gFree.pop() 5543 if gp.stack.lo == 0 { 5544 noStackQ.push(gp) 5545 } else { 5546 stackQ.push(gp) 5547 } 5548 } 5549 lock(&sched.gFree.lock) 5550 sched.gFree.noStack.pushAll(noStackQ) 5551 sched.gFree.stack.pushAll(stackQ) 5552 unlock(&sched.gFree.lock) 5553 } 5554 } 5555 5556 // Get from gfree list. 5557 // If local list is empty, grab a batch from global list. 5558 func gfget(pp *p) *g { 5559 retry: 5560 if pp.gFree.empty() && (!sched.gFree.stack.empty() || !sched.gFree.noStack.empty()) { 5561 lock(&sched.gFree.lock) 5562 // Move a batch of free Gs to the P. 5563 for pp.gFree.size < 32 { 5564 // Prefer Gs with stacks. 5565 gp := sched.gFree.stack.pop() 5566 if gp == nil { 5567 gp = sched.gFree.noStack.pop() 5568 if gp == nil { 5569 break 5570 } 5571 } 5572 pp.gFree.push(gp) 5573 } 5574 unlock(&sched.gFree.lock) 5575 goto retry 5576 } 5577 gp := pp.gFree.pop() 5578 if gp == nil { 5579 return nil 5580 } 5581 if gp.stack.lo != 0 && gp.stack.hi-gp.stack.lo != uintptr(startingStackSize) { 5582 // Deallocate old stack. We kept it in gfput because it was the 5583 // right size when the goroutine was put on the free list, but 5584 // the right size has changed since then. 5585 systemstack(func() { 5586 stackfree(gp.stack) 5587 gp.stack.lo = 0 5588 gp.stack.hi = 0 5589 gp.stackguard0 = 0 5590 if valgrindenabled { 5591 valgrindDeregisterStack(gp.valgrindStackID) 5592 gp.valgrindStackID = 0 5593 } 5594 }) 5595 } 5596 if gp.stack.lo == 0 { 5597 // Stack was deallocated in gfput or just above. Allocate a new one. 5598 systemstack(func() { 5599 gp.stack = stackalloc(startingStackSize) 5600 if valgrindenabled { 5601 gp.valgrindStackID = valgrindRegisterStack(unsafe.Pointer(gp.stack.lo), unsafe.Pointer(gp.stack.hi)) 5602 } 5603 }) 5604 gp.stackguard0 = gp.stack.lo + stackGuard 5605 } else { 5606 if raceenabled { 5607 racemalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) 5608 } 5609 if msanenabled { 5610 msanmalloc(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) 5611 } 5612 if asanenabled { 5613 asanunpoison(unsafe.Pointer(gp.stack.lo), gp.stack.hi-gp.stack.lo) 5614 } 5615 } 5616 return gp 5617 } 5618 5619 // Purge all cached G's from gfree list to the global list. 5620 func gfpurge(pp *p) { 5621 var ( 5622 stackQ gQueue 5623 noStackQ gQueue 5624 ) 5625 for !pp.gFree.empty() { 5626 gp := pp.gFree.pop() 5627 if gp.stack.lo == 0 { 5628 noStackQ.push(gp) 5629 } else { 5630 stackQ.push(gp) 5631 } 5632 } 5633 lock(&sched.gFree.lock) 5634 sched.gFree.noStack.pushAll(noStackQ) 5635 sched.gFree.stack.pushAll(stackQ) 5636 unlock(&sched.gFree.lock) 5637 } 5638 5639 // Breakpoint executes a breakpoint trap. 5640 func Breakpoint() { 5641 breakpoint() 5642 } 5643 5644 // dolockOSThread is called by LockOSThread and lockOSThread below 5645 // after they modify m.locked. Do not allow preemption during this call, 5646 // or else the m might be different in this function than in the caller. 5647 // 5648 //go:nosplit 5649 func dolockOSThread() { 5650 if GOARCH == "wasm" { 5651 return // no threads on wasm yet 5652 } 5653 gp := getg() 5654 gp.m.lockedg.set(gp) 5655 gp.lockedm.set(gp.m) 5656 } 5657 5658 // LockOSThread wires the calling goroutine to its current operating system thread. 5659 // The calling goroutine will always execute in that thread, 5660 // and no other goroutine will execute in it, 5661 // until the calling goroutine has made as many calls to 5662 // [UnlockOSThread] as to LockOSThread. 5663 // If the calling goroutine exits without unlocking the thread, 5664 // the thread will be terminated. 5665 // 5666 // All init functions are run on the startup thread. Calling LockOSThread 5667 // from an init function will cause the main function to be invoked on 5668 // that thread. 5669 // 5670 // A goroutine should call LockOSThread before calling OS services or 5671 // non-Go library functions that depend on per-thread state. 5672 // 5673 //go:nosplit 5674 func LockOSThread() { 5675 if atomic.Load(&newmHandoff.haveTemplateThread) == 0 && GOOS != "plan9" { 5676 // If we need to start a new thread from the locked 5677 // thread, we need the template thread. Start it now 5678 // while we're in a known-good state. 5679 startTemplateThread() 5680 } 5681 gp := getg() 5682 gp.m.lockedExt++ 5683 if gp.m.lockedExt == 0 { 5684 gp.m.lockedExt-- 5685 panic("LockOSThread nesting overflow") 5686 } 5687 dolockOSThread() 5688 } 5689 5690 //go:nosplit 5691 func lockOSThread() { 5692 getg().m.lockedInt++ 5693 dolockOSThread() 5694 } 5695 5696 // dounlockOSThread is called by UnlockOSThread and unlockOSThread below 5697 // after they update m->locked. Do not allow preemption during this call, 5698 // or else the m might be in different in this function than in the caller. 5699 // 5700 //go:nosplit 5701 func dounlockOSThread() { 5702 if GOARCH == "wasm" { 5703 return // no threads on wasm yet 5704 } 5705 gp := getg() 5706 if gp.m.lockedInt != 0 || gp.m.lockedExt != 0 { 5707 return 5708 } 5709 gp.m.lockedg = 0 5710 gp.lockedm = 0 5711 } 5712 5713 // UnlockOSThread undoes an earlier call to LockOSThread. 5714 // If this drops the number of active LockOSThread calls on the 5715 // calling goroutine to zero, it unwires the calling goroutine from 5716 // its fixed operating system thread. 5717 // If there are no active LockOSThread calls, this is a no-op. 5718 // 5719 // Before calling UnlockOSThread, the caller must ensure that the OS 5720 // thread is suitable for running other goroutines. If the caller made 5721 // any permanent changes to the state of the thread that would affect 5722 // other goroutines, it should not call this function and thus leave 5723 // the goroutine locked to the OS thread until the goroutine (and 5724 // hence the thread) exits. 5725 // 5726 //go:nosplit 5727 func UnlockOSThread() { 5728 gp := getg() 5729 if gp.m.lockedExt == 0 { 5730 return 5731 } 5732 gp.m.lockedExt-- 5733 dounlockOSThread() 5734 } 5735 5736 //go:nosplit 5737 func unlockOSThread() { 5738 gp := getg() 5739 if gp.m.lockedInt == 0 { 5740 systemstack(badunlockosthread) 5741 } 5742 gp.m.lockedInt-- 5743 dounlockOSThread() 5744 } 5745 5746 func badunlockosthread() { 5747 throw("runtime: internal error: misuse of lockOSThread/unlockOSThread") 5748 } 5749 5750 func gcount(includeSys bool) int32 { 5751 n := int32(atomic.Loaduintptr(&allglen)) - sched.gFree.stack.size - sched.gFree.noStack.size 5752 if !includeSys { 5753 n -= sched.ngsys.Load() 5754 } 5755 for _, pp := range allp { 5756 n -= pp.gFree.size 5757 } 5758 5759 // All these variables can be changed concurrently, so the result can be inconsistent. 5760 // But at least the current goroutine is running. 5761 if n < 1 { 5762 n = 1 5763 } 5764 return n 5765 } 5766 5767 // goroutineleakcount returns the number of leaked goroutines last reported by 5768 // the runtime. 5769 // 5770 //go:linkname goroutineleakcount runtime/pprof.runtime_goroutineleakcount 5771 func goroutineleakcount() int { 5772 return work.goroutineLeak.count 5773 } 5774 5775 func mcount() int32 { 5776 return int32(sched.mnext - sched.nmfreed) 5777 } 5778 5779 var prof struct { 5780 signalLock atomic.Uint32 5781 5782 // Must hold signalLock to write. Reads may be lock-free, but 5783 // signalLock should be taken to synchronize with changes. 5784 hz atomic.Int32 5785 } 5786 5787 func _System() { _System() } 5788 func _ExternalCode() { _ExternalCode() } 5789 func _LostExternalCode() { _LostExternalCode() } 5790 func _GC() { _GC() } 5791 func _LostSIGPROFDuringAtomic64() { _LostSIGPROFDuringAtomic64() } 5792 func _LostContendedRuntimeLock() { _LostContendedRuntimeLock() } 5793 func _VDSO() { _VDSO() } 5794 5795 // Called if we receive a SIGPROF signal. 5796 // Called by the signal handler, may run during STW. 5797 // 5798 //go:nowritebarrierrec 5799 func sigprof(pc, sp, lr uintptr, gp *g, mp *m) { 5800 if prof.hz.Load() == 0 { 5801 return 5802 } 5803 5804 // If mp.profilehz is 0, then profiling is not enabled for this thread. 5805 // We must check this to avoid a deadlock between setcpuprofilerate 5806 // and the call to cpuprof.add, below. 5807 if mp != nil && mp.profilehz == 0 { 5808 return 5809 } 5810 5811 // On mips{,le}/arm, 64bit atomics are emulated with spinlocks, in 5812 // internal/runtime/atomic. If SIGPROF arrives while the program is inside 5813 // the critical section, it creates a deadlock (when writing the sample). 5814 // As a workaround, create a counter of SIGPROFs while in critical section 5815 // to store the count, and pass it to sigprof.add() later when SIGPROF is 5816 // received from somewhere else (with _LostSIGPROFDuringAtomic64 as pc). 5817 if GOARCH == "mips" || GOARCH == "mipsle" || GOARCH == "arm" { 5818 if f := findfunc(pc); f.valid() { 5819 if stringslite.HasPrefix(funcname(f), "internal/runtime/atomic") { 5820 cpuprof.lostAtomic++ 5821 return 5822 } 5823 } 5824 if GOARCH == "arm" && goarm < 7 && GOOS == "linux" && pc&0xffff0000 == 0xffff0000 { 5825 // internal/runtime/atomic functions call into kernel 5826 // helpers on arm < 7. See 5827 // internal/runtime/atomic/sys_linux_arm.s. 5828 cpuprof.lostAtomic++ 5829 return 5830 } 5831 } 5832 5833 // Profiling runs concurrently with GC, so it must not allocate. 5834 // Set a trap in case the code does allocate. 5835 // Note that on windows, one thread takes profiles of all the 5836 // other threads, so mp is usually not getg().m. 5837 // In fact mp may not even be stopped. 5838 // See golang.org/issue/17165. 5839 getg().m.mallocing++ 5840 5841 var u unwinder 5842 var stk [maxCPUProfStack]uintptr 5843 n := 0 5844 if mp.ncgo > 0 && mp.curg != nil && mp.curg.syscallpc != 0 && mp.curg.syscallsp != 0 { 5845 cgoOff := 0 5846 // Check cgoCallersUse to make sure that we are not 5847 // interrupting other code that is fiddling with 5848 // cgoCallers. We are running in a signal handler 5849 // with all signals blocked, so we don't have to worry 5850 // about any other code interrupting us. 5851 if mp.cgoCallersUse.Load() == 0 && mp.cgoCallers != nil && mp.cgoCallers[0] != 0 { 5852 for cgoOff < len(mp.cgoCallers) && mp.cgoCallers[cgoOff] != 0 { 5853 cgoOff++ 5854 } 5855 n += copy(stk[:], mp.cgoCallers[:cgoOff]) 5856 mp.cgoCallers[0] = 0 5857 } 5858 5859 // Collect Go stack that leads to the cgo call. 5860 u.initAt(mp.curg.syscallpc, mp.curg.syscallsp, 0, mp.curg, unwindSilentErrors) 5861 } else if usesLibcall() && mp.libcallg != 0 && mp.libcallpc != 0 && mp.libcallsp != 0 { 5862 // Libcall, i.e. runtime syscall on windows. 5863 // Collect Go stack that leads to the call. 5864 u.initAt(mp.libcallpc, mp.libcallsp, 0, mp.libcallg.ptr(), unwindSilentErrors) 5865 } else if mp != nil && mp.vdsoSP != 0 { 5866 // VDSO call, e.g. nanotime1 on Linux. 5867 // Collect Go stack that leads to the call. 5868 u.initAt(mp.vdsoPC, mp.vdsoSP, 0, gp, unwindSilentErrors|unwindJumpStack) 5869 } else { 5870 u.initAt(pc, sp, lr, gp, unwindSilentErrors|unwindTrap|unwindJumpStack) 5871 } 5872 n += tracebackPCs(&u, 0, stk[n:]) 5873 5874 if n <= 0 { 5875 // Normal traceback is impossible or has failed. 5876 // Account it against abstract "System" or "GC". 5877 n = 2 5878 if inVDSOPage(pc) { 5879 pc = abi.FuncPCABIInternal(_VDSO) + sys.PCQuantum 5880 } else if pc > firstmoduledata.etext { 5881 // "ExternalCode" is better than "etext". 5882 pc = abi.FuncPCABIInternal(_ExternalCode) + sys.PCQuantum 5883 } 5884 stk[0] = pc 5885 if mp.preemptoff != "" { 5886 stk[1] = abi.FuncPCABIInternal(_GC) + sys.PCQuantum 5887 } else { 5888 stk[1] = abi.FuncPCABIInternal(_System) + sys.PCQuantum 5889 } 5890 } 5891 5892 if prof.hz.Load() != 0 { 5893 // Note: it can happen on Windows that we interrupted a system thread 5894 // with no g, so gp could nil. The other nil checks are done out of 5895 // caution, but not expected to be nil in practice. 5896 var tagPtr *unsafe.Pointer 5897 if gp != nil && gp.m != nil && gp.m.curg != nil { 5898 tagPtr = &gp.m.curg.labels 5899 } 5900 cpuprof.add(tagPtr, stk[:n]) 5901 5902 gprof := gp 5903 var mp *m 5904 var pp *p 5905 if gp != nil && gp.m != nil { 5906 if gp.m.curg != nil { 5907 gprof = gp.m.curg 5908 } 5909 mp = gp.m 5910 pp = gp.m.p.ptr() 5911 } 5912 traceCPUSample(gprof, mp, pp, stk[:n]) 5913 } 5914 getg().m.mallocing-- 5915 } 5916 5917 // setcpuprofilerate sets the CPU profiling rate to hz times per second. 5918 // If hz <= 0, setcpuprofilerate turns off CPU profiling. 5919 func setcpuprofilerate(hz int32) { 5920 // Force sane arguments. 5921 if hz < 0 { 5922 hz = 0 5923 } 5924 5925 // Disable preemption, otherwise we can be rescheduled to another thread 5926 // that has profiling enabled. 5927 gp := getg() 5928 gp.m.locks++ 5929 5930 // Stop profiler on this thread so that it is safe to lock prof. 5931 // if a profiling signal came in while we had prof locked, 5932 // it would deadlock. 5933 setThreadCPUProfiler(0) 5934 5935 for !prof.signalLock.CompareAndSwap(0, 1) { 5936 osyield() 5937 } 5938 if prof.hz.Load() != hz { 5939 setProcessCPUProfiler(hz) 5940 prof.hz.Store(hz) 5941 } 5942 prof.signalLock.Store(0) 5943 5944 lock(&sched.lock) 5945 sched.profilehz = hz 5946 unlock(&sched.lock) 5947 5948 if hz != 0 { 5949 setThreadCPUProfiler(hz) 5950 } 5951 5952 gp.m.locks-- 5953 } 5954 5955 // init initializes pp, which may be a freshly allocated p or a 5956 // previously destroyed p, and transitions it to status _Pgcstop. 5957 func (pp *p) init(id int32) { 5958 pp.id = id 5959 pp.gcw.id = id 5960 pp.status = _Pgcstop 5961 pp.sudogcache = pp.sudogbuf[:0] 5962 pp.deferpool = pp.deferpoolbuf[:0] 5963 pp.wbBuf.reset() 5964 if pp.mcache == nil { 5965 if id == 0 { 5966 if mcache0 == nil { 5967 throw("missing mcache?") 5968 } 5969 // Use the bootstrap mcache0. Only one P will get 5970 // mcache0: the one with ID 0. 5971 pp.mcache = mcache0 5972 } else { 5973 pp.mcache = allocmcache() 5974 } 5975 } 5976 if raceenabled && pp.raceprocctx == 0 { 5977 if id == 0 { 5978 pp.raceprocctx = raceprocctx0 5979 raceprocctx0 = 0 // bootstrap 5980 } else { 5981 pp.raceprocctx = raceproccreate() 5982 } 5983 } 5984 lockInit(&pp.timers.mu, lockRankTimers) 5985 5986 // This P may get timers when it starts running. Set the mask here 5987 // since the P may not go through pidleget (notably P 0 on startup). 5988 timerpMask.set(id) 5989 // Similarly, we may not go through pidleget before this P starts 5990 // running if it is P 0 on startup. 5991 idlepMask.clear(id) 5992 } 5993 5994 // destroy releases all of the resources associated with pp and 5995 // transitions it to status _Pdead. 5996 // 5997 // sched.lock must be held and the world must be stopped. 5998 func (pp *p) destroy() { 5999 assertLockHeld(&sched.lock) 6000 assertWorldStopped() 6001 6002 // Move all runnable goroutines to the global queue 6003 for pp.runqhead != pp.runqtail { 6004 // Pop from tail of local queue 6005 pp.runqtail-- 6006 gp := pp.runq[pp.runqtail%uint32(len(pp.runq))].ptr() 6007 // Push onto head of global queue 6008 globrunqputhead(gp) 6009 } 6010 if pp.runnext != 0 { 6011 globrunqputhead(pp.runnext.ptr()) 6012 pp.runnext = 0 6013 } 6014 6015 // Move all timers to the local P. 6016 getg().m.p.ptr().timers.take(&pp.timers) 6017 6018 // No need to flush p's write barrier buffer or span queue, as Ps 6019 // cannot be destroyed during the mark phase. 6020 if phase := gcphase; phase != _GCoff { 6021 println("runtime: p id", pp.id, "destroyed during GC phase", phase) 6022 throw("P destroyed while GC is running") 6023 } 6024 // We should free the queues though. 6025 pp.gcw.spanq.destroy() 6026 6027 clear(pp.sudogbuf[:]) 6028 pp.sudogcache = pp.sudogbuf[:0] 6029 pp.pinnerCache = nil 6030 clear(pp.deferpoolbuf[:]) 6031 pp.deferpool = pp.deferpoolbuf[:0] 6032 systemstack(func() { 6033 for i := 0; i < pp.mspancache.len; i++ { 6034 // Safe to call since the world is stopped. 6035 mheap_.spanalloc.free(unsafe.Pointer(pp.mspancache.buf[i])) 6036 } 6037 pp.mspancache.len = 0 6038 lock(&mheap_.lock) 6039 pp.pcache.flush(&mheap_.pages) 6040 unlock(&mheap_.lock) 6041 }) 6042 freemcache(pp.mcache) 6043 pp.mcache = nil 6044 gfpurge(pp) 6045 if raceenabled { 6046 if pp.timers.raceCtx != 0 { 6047 // The race detector code uses a callback to fetch 6048 // the proc context, so arrange for that callback 6049 // to see the right thing. 6050 // This hack only works because we are the only 6051 // thread running. 6052 mp := getg().m 6053 phold := mp.p.ptr() 6054 mp.p.set(pp) 6055 6056 racectxend(pp.timers.raceCtx) 6057 pp.timers.raceCtx = 0 6058 6059 mp.p.set(phold) 6060 } 6061 raceprocdestroy(pp.raceprocctx) 6062 pp.raceprocctx = 0 6063 } 6064 pp.gcAssistTime = 0 6065 gcCleanups.queued += pp.cleanupsQueued 6066 pp.cleanupsQueued = 0 6067 sched.goroutinesCreated.Add(int64(pp.goroutinesCreated)) 6068 pp.goroutinesCreated = 0 6069 pp.xRegs.free() 6070 pp.status = _Pdead 6071 } 6072 6073 // Change number of processors. 6074 // 6075 // sched.lock must be held, and the world must be stopped. 6076 // 6077 // gcworkbufs must not be being modified by either the GC or the write barrier 6078 // code, so the GC must not be running if the number of Ps actually changes. 6079 // 6080 // Returns list of Ps with local work, they need to be scheduled by the caller. 6081 func procresize(nprocs int32) *p { 6082 assertLockHeld(&sched.lock) 6083 assertWorldStopped() 6084 6085 old := gomaxprocs 6086 if old < 0 || nprocs <= 0 { 6087 throw("procresize: invalid arg") 6088 } 6089 trace := traceAcquire() 6090 if trace.ok() { 6091 trace.Gomaxprocs(nprocs) 6092 traceRelease(trace) 6093 } 6094 6095 // update statistics 6096 now := nanotime() 6097 if sched.procresizetime != 0 { 6098 sched.totaltime += int64(old) * (now - sched.procresizetime) 6099 } 6100 sched.procresizetime = now 6101 6102 // Grow allp if necessary. 6103 if nprocs > int32(len(allp)) { 6104 // Synchronize with retake, which could be running 6105 // concurrently since it doesn't run on a P. 6106 lock(&allpLock) 6107 if nprocs <= int32(cap(allp)) { 6108 allp = allp[:nprocs] 6109 } else { 6110 nallp := make([]*p, nprocs) 6111 // Copy everything up to allp's cap so we 6112 // never lose old allocated Ps. 6113 copy(nallp, allp[:cap(allp)]) 6114 allp = nallp 6115 } 6116 6117 idlepMask = idlepMask.resize(nprocs) 6118 timerpMask = timerpMask.resize(nprocs) 6119 work.spanqMask = work.spanqMask.resize(nprocs) 6120 unlock(&allpLock) 6121 } 6122 6123 // initialize new P's 6124 for i := old; i < nprocs; i++ { 6125 pp := allp[i] 6126 if pp == nil { 6127 pp = new(p) 6128 } 6129 pp.init(i) 6130 atomicstorep(unsafe.Pointer(&allp[i]), unsafe.Pointer(pp)) 6131 } 6132 6133 gp := getg() 6134 if gp.m.p != 0 && gp.m.p.ptr().id < nprocs { 6135 // continue to use the current P 6136 gp.m.p.ptr().status = _Prunning 6137 gp.m.p.ptr().mcache.prepareForSweep() 6138 } else { 6139 // release the current P and acquire allp[0]. 6140 // 6141 // We must do this before destroying our current P 6142 // because p.destroy itself has write barriers, so we 6143 // need to do that from a valid P. 6144 if gp.m.p != 0 { 6145 trace := traceAcquire() 6146 if trace.ok() { 6147 // Pretend that we were descheduled 6148 // and then scheduled again to keep 6149 // the trace consistent. 6150 trace.GoSched() 6151 trace.ProcStop(gp.m.p.ptr()) 6152 traceRelease(trace) 6153 } 6154 gp.m.p.ptr().m = 0 6155 } 6156 gp.m.p = 0 6157 pp := allp[0] 6158 pp.m = 0 6159 pp.status = _Pidle 6160 acquirep(pp) 6161 trace := traceAcquire() 6162 if trace.ok() { 6163 trace.GoStart() 6164 traceRelease(trace) 6165 } 6166 } 6167 6168 // g.m.p is now set, so we no longer need mcache0 for bootstrapping. 6169 mcache0 = nil 6170 6171 // release resources from unused P's 6172 for i := nprocs; i < old; i++ { 6173 pp := allp[i] 6174 pp.destroy() 6175 // can't free P itself because it can be referenced by an M in syscall 6176 } 6177 6178 // Trim allp. 6179 if int32(len(allp)) != nprocs { 6180 lock(&allpLock) 6181 allp = allp[:nprocs] 6182 idlepMask = idlepMask.resize(nprocs) 6183 timerpMask = timerpMask.resize(nprocs) 6184 work.spanqMask = work.spanqMask.resize(nprocs) 6185 unlock(&allpLock) 6186 } 6187 6188 // Assign Ms to Ps with runnable goroutines. 6189 var runnablePs *p 6190 var runnablePsNeedM *p 6191 var idlePs *p 6192 for i := nprocs - 1; i >= 0; i-- { 6193 pp := allp[i] 6194 if gp.m.p.ptr() == pp { 6195 continue 6196 } 6197 pp.status = _Pidle 6198 if runqempty(pp) { 6199 pp.link.set(idlePs) 6200 idlePs = pp 6201 continue 6202 } 6203 6204 // Prefer to run on the most recent M if it is 6205 // available. 6206 // 6207 // Ps with no oldm (or for which oldm is already taken 6208 // by an earlier P), we delay until all oldm Ps are 6209 // handled. Otherwise, mget may return an M that a 6210 // later P has in oldm. 6211 var mp *m 6212 if oldm := pp.oldm.get(); oldm != nil { 6213 // Returns nil if oldm is not idle. 6214 mp = mgetSpecific(oldm) 6215 } 6216 if mp == nil { 6217 // Call mget later. 6218 pp.link.set(runnablePsNeedM) 6219 runnablePsNeedM = pp 6220 continue 6221 } 6222 pp.m.set(mp) 6223 pp.link.set(runnablePs) 6224 runnablePs = pp 6225 } 6226 // Assign Ms to remaining runnable Ps without usable oldm. See comment 6227 // above. 6228 for runnablePsNeedM != nil { 6229 pp := runnablePsNeedM 6230 runnablePsNeedM = pp.link.ptr() 6231 6232 mp := mget() 6233 pp.m.set(mp) 6234 pp.link.set(runnablePs) 6235 runnablePs = pp 6236 } 6237 6238 // Now that we've assigned Ms to Ps with runnable goroutines, assign GC 6239 // mark workers to remaining idle Ps, if needed. 6240 // 6241 // By assigning GC workers to Ps here, we slightly speed up starting 6242 // the world, as we will start enough Ps to run all of the user 6243 // goroutines and GC mark workers all at once, rather than using a 6244 // sequence of wakep calls as each P's findRunnable realizes it needs 6245 // to run a mark worker instead of a user goroutine. 6246 // 6247 // By assigning GC workers to Ps only _after_ previously-running Ps are 6248 // assigned Ms, we ensure that goroutines previously running on a P 6249 // continue to run on the same P, with GC mark workers preferring 6250 // previously-idle Ps. This helps prevent goroutines from shuffling 6251 // around too much across STW. 6252 // 6253 // N.B., if there aren't enough Ps left in idlePs for all of the GC 6254 // mark workers, then findRunnable will still choose to run mark 6255 // workers on Ps assigned above. 6256 // 6257 // N.B., we do this during any STW in the mark phase, not just the 6258 // sweep termination STW that starts the mark phase. gcBgMarkWorker 6259 // always preempts by removing itself from the P, so even unrelated 6260 // STWs during the mark require that Ps reselect mark workers upon 6261 // restart. 6262 if gcBlackenEnabled != 0 { 6263 for idlePs != nil { 6264 pp := idlePs 6265 6266 ok, _ := gcController.assignWaitingGCWorker(pp, now) 6267 if !ok { 6268 // No more mark workers needed. 6269 break 6270 } 6271 6272 // Got a worker, P is now runnable. 6273 // 6274 // mget may return nil if there aren't enough Ms, in 6275 // which case startTheWorldWithSema will start one. 6276 // 6277 // N.B. findRunnableGCWorker will make the worker G 6278 // itself runnable. 6279 idlePs = pp.link.ptr() 6280 mp := mget() 6281 pp.m.set(mp) 6282 pp.link.set(runnablePs) 6283 runnablePs = pp 6284 } 6285 } 6286 6287 // Finally, any remaining Ps are truly idle. 6288 for idlePs != nil { 6289 pp := idlePs 6290 idlePs = pp.link.ptr() 6291 pidleput(pp, now) 6292 } 6293 6294 stealOrder.reset(uint32(nprocs)) 6295 var int32p *int32 = &gomaxprocs // make compiler check that gomaxprocs is an int32 6296 atomic.Store((*uint32)(unsafe.Pointer(int32p)), uint32(nprocs)) 6297 if old != nprocs { 6298 // Notify the limiter that the amount of procs has changed. 6299 gcCPULimiter.resetCapacity(now, nprocs) 6300 } 6301 return runnablePs 6302 } 6303 6304 // Associate p and the current m. 6305 // 6306 // This function is allowed to have write barriers even if the caller 6307 // isn't because it immediately acquires pp. 6308 // 6309 //go:yeswritebarrierrec 6310 func acquirep(pp *p) { 6311 // Do the work. 6312 acquirepNoTrace(pp) 6313 6314 // Emit the event. 6315 trace := traceAcquire() 6316 if trace.ok() { 6317 trace.ProcStart() 6318 traceRelease(trace) 6319 } 6320 } 6321 6322 // Internals of acquirep, just skipping the trace events. 6323 // 6324 //go:yeswritebarrierrec 6325 func acquirepNoTrace(pp *p) { 6326 // Do the part that isn't allowed to have write barriers. 6327 wirep(pp) 6328 6329 // Have p; write barriers now allowed. 6330 6331 // The M we're associating with will be the old M after the next 6332 // releasep. We must set this here because write barriers are not 6333 // allowed in releasep. 6334 pp.oldm = pp.m.ptr().self 6335 6336 // Perform deferred mcache flush before this P can allocate 6337 // from a potentially stale mcache. 6338 pp.mcache.prepareForSweep() 6339 } 6340 6341 // wirep is the first step of acquirep, which actually associates the 6342 // current M to pp. This is broken out so we can disallow write 6343 // barriers for this part, since we don't yet have a P. 6344 // 6345 //go:nowritebarrierrec 6346 //go:nosplit 6347 func wirep(pp *p) { 6348 gp := getg() 6349 6350 if gp.m.p != 0 { 6351 // Call on the systemstack to avoid a nosplit overflow build failure 6352 // on some platforms when built with -N -l. See #64113. 6353 systemstack(func() { 6354 throw("wirep: already in go") 6355 }) 6356 } 6357 if pp.m != 0 || pp.status != _Pidle { 6358 // Call on the systemstack to avoid a nosplit overflow build failure 6359 // on some platforms when built with -N -l. See #64113. 6360 systemstack(func() { 6361 id := int64(0) 6362 if pp.m != 0 { 6363 id = pp.m.ptr().id 6364 } 6365 print("wirep: p->m=", pp.m, "(", id, ") p->status=", pp.status, "\n") 6366 throw("wirep: invalid p state") 6367 }) 6368 } 6369 gp.m.p.set(pp) 6370 pp.m.set(gp.m) 6371 pp.status = _Prunning 6372 } 6373 6374 // Disassociate p and the current m. 6375 func releasep() *p { 6376 trace := traceAcquire() 6377 if trace.ok() { 6378 trace.ProcStop(getg().m.p.ptr()) 6379 traceRelease(trace) 6380 } 6381 return releasepNoTrace() 6382 } 6383 6384 // Disassociate p and the current m without tracing an event. 6385 func releasepNoTrace() *p { 6386 gp := getg() 6387 6388 if gp.m.p == 0 { 6389 throw("releasep: invalid arg") 6390 } 6391 pp := gp.m.p.ptr() 6392 if pp.m.ptr() != gp.m || pp.status != _Prunning { 6393 print("releasep: m=", gp.m, " m->p=", gp.m.p.ptr(), " p->m=", hex(pp.m), " p->status=", pp.status, "\n") 6394 throw("releasep: invalid p state") 6395 } 6396 6397 // P must clear if nextGCMarkWorker if it stops. 6398 gcController.releaseNextGCMarkWorker(pp) 6399 6400 gp.m.p = 0 6401 pp.m = 0 6402 pp.status = _Pidle 6403 return pp 6404 } 6405 6406 func incidlelocked(v int32) { 6407 lock(&sched.lock) 6408 sched.nmidlelocked += v 6409 if v > 0 { 6410 checkdead() 6411 } 6412 unlock(&sched.lock) 6413 } 6414 6415 // Check for deadlock situation. 6416 // The check is based on number of running M's, if 0 -> deadlock. 6417 // sched.lock must be held. 6418 func checkdead() { 6419 assertLockHeld(&sched.lock) 6420 6421 // For -buildmode=c-shared or -buildmode=c-archive it's OK if 6422 // there are no running goroutines. The calling program is 6423 // assumed to be running. 6424 // One exception is Wasm, which is single-threaded. If we are 6425 // in Go and all goroutines are blocked, it deadlocks. 6426 if (islibrary || isarchive) && GOARCH != "wasm" { 6427 return 6428 } 6429 6430 // If we are dying because of a signal caught on an already idle thread, 6431 // freezetheworld will cause all running threads to block. 6432 // And runtime will essentially enter into deadlock state, 6433 // except that there is a thread that will call exit soon. 6434 if panicking.Load() > 0 { 6435 return 6436 } 6437 6438 // If we are not running under cgo, but we have an extra M then account 6439 // for it. (It is possible to have an extra M on Windows without cgo to 6440 // accommodate callbacks created by syscall.NewCallback. See issue #6751 6441 // for details.) 6442 var run0 int32 6443 if !iscgo && cgoHasExtraM && extraMLength.Load() > 0 { 6444 run0 = 1 6445 } 6446 6447 run := mcount() - sched.nmidle - sched.nmidlelocked - sched.nmsys 6448 if run > run0 { 6449 return 6450 } 6451 if run < 0 { 6452 print("runtime: checkdead: nmidle=", sched.nmidle, " nmidlelocked=", sched.nmidlelocked, " mcount=", mcount(), " nmsys=", sched.nmsys, "\n") 6453 unlock(&sched.lock) 6454 throw("checkdead: inconsistent counts") 6455 } 6456 6457 grunning := 0 6458 forEachG(func(gp *g) { 6459 if isSystemGoroutine(gp, false) { 6460 return 6461 } 6462 s := readgstatus(gp) 6463 switch s &^ _Gscan { 6464 case _Gwaiting, 6465 _Gpreempted: 6466 grunning++ 6467 case _Grunnable, 6468 _Grunning, 6469 _Gsyscall: 6470 print("runtime: checkdead: find g ", gp.goid, " in status ", s, "\n") 6471 unlock(&sched.lock) 6472 throw("checkdead: runnable g") 6473 } 6474 }) 6475 if grunning == 0 { // possible if main goroutine calls runtime·Goexit() 6476 unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang 6477 fatal("no goroutines (main called runtime.Goexit) - deadlock!") 6478 } 6479 6480 // Maybe jump time forward for playground. 6481 if faketime != 0 { 6482 if when := timeSleepUntil(); when < maxWhen { 6483 faketime = when 6484 6485 // Start an M to steal the timer. 6486 pp, _ := pidleget(faketime) 6487 if pp == nil { 6488 // There should always be a free P since 6489 // nothing is running. 6490 unlock(&sched.lock) 6491 throw("checkdead: no p for timer") 6492 } 6493 mp := mget() 6494 if mp == nil { 6495 // There should always be a free M since 6496 // nothing is running. 6497 unlock(&sched.lock) 6498 throw("checkdead: no m for timer") 6499 } 6500 // M must be spinning to steal. We set this to be 6501 // explicit, but since this is the only M it would 6502 // become spinning on its own anyways. 6503 sched.nmspinning.Add(1) 6504 mp.spinning = true 6505 mp.nextp.set(pp) 6506 notewakeup(&mp.park) 6507 return 6508 } 6509 } 6510 6511 // There are no goroutines running, so we can look at the P's. 6512 for _, pp := range allp { 6513 if len(pp.timers.heap) > 0 { 6514 return 6515 } 6516 } 6517 6518 unlock(&sched.lock) // unlock so that GODEBUG=scheddetail=1 doesn't hang 6519 fatal("all goroutines are asleep - deadlock!") 6520 } 6521 6522 // forcegcperiod is the maximum time in nanoseconds between garbage 6523 // collections. If we go this long without a garbage collection, one 6524 // is forced to run. 6525 // 6526 // This is a variable for testing purposes. It normally doesn't change. 6527 var forcegcperiod int64 = 2 * 60 * 1e9 6528 6529 // haveSysmon indicates whether there is sysmon thread support. 6530 // 6531 // No threads on wasm yet, so no sysmon. 6532 const haveSysmon = GOARCH != "wasm" 6533 6534 // Always runs without a P, so write barriers are not allowed. 6535 // 6536 //go:nowritebarrierrec 6537 func sysmon() { 6538 lock(&sched.lock) 6539 sched.nmsys++ 6540 checkdead() 6541 unlock(&sched.lock) 6542 6543 lastgomaxprocs := int64(0) 6544 lasttrace := int64(0) 6545 idle := 0 // how many cycles in succession we had not wokeup somebody 6546 delay := uint32(0) 6547 6548 for { 6549 if idle == 0 { // start with 20us sleep... 6550 delay = 20 6551 } else if idle > 50 { // start doubling the sleep after 1ms... 6552 delay *= 2 6553 } 6554 if delay > 10*1000 { // up to 10ms 6555 delay = 10 * 1000 6556 } 6557 usleep(delay) 6558 6559 // sysmon should not enter deep sleep if schedtrace is enabled so that 6560 // it can print that information at the right time. 6561 // 6562 // It should also not enter deep sleep if there are any active P's so 6563 // that it can retake P's from syscalls, preempt long running G's, and 6564 // poll the network if all P's are busy for long stretches. 6565 // 6566 // It should wakeup from deep sleep if any P's become active either due 6567 // to exiting a syscall or waking up due to a timer expiring so that it 6568 // can resume performing those duties. If it wakes from a syscall it 6569 // resets idle and delay as a bet that since it had retaken a P from a 6570 // syscall before, it may need to do it again shortly after the 6571 // application starts work again. It does not reset idle when waking 6572 // from a timer to avoid adding system load to applications that spend 6573 // most of their time sleeping. 6574 now := nanotime() 6575 if debug.schedtrace <= 0 && (sched.gcwaiting.Load() || sched.npidle.Load() == gomaxprocs) { 6576 lock(&sched.lock) 6577 if sched.gcwaiting.Load() || sched.npidle.Load() == gomaxprocs { 6578 syscallWake := false 6579 next := timeSleepUntil() 6580 if next > now { 6581 sched.sysmonwait.Store(true) 6582 unlock(&sched.lock) 6583 // Make wake-up period small enough 6584 // for the sampling to be correct. 6585 sleep := forcegcperiod / 2 6586 if next-now < sleep { 6587 sleep = next - now 6588 } 6589 shouldRelax := sleep >= osRelaxMinNS 6590 if shouldRelax { 6591 osRelax(true) 6592 } 6593 syscallWake = notetsleep(&sched.sysmonnote, sleep) 6594 if shouldRelax { 6595 osRelax(false) 6596 } 6597 lock(&sched.lock) 6598 sched.sysmonwait.Store(false) 6599 noteclear(&sched.sysmonnote) 6600 } 6601 if syscallWake { 6602 idle = 0 6603 delay = 20 6604 } 6605 } 6606 unlock(&sched.lock) 6607 } 6608 6609 lock(&sched.sysmonlock) 6610 // Update now in case we blocked on sysmonnote or spent a long time 6611 // blocked on schedlock or sysmonlock above. 6612 now = nanotime() 6613 6614 // trigger libc interceptors if needed 6615 if *cgo_yield != nil { 6616 asmcgocall(*cgo_yield, nil) 6617 } 6618 // poll network if not polled for more than 10ms 6619 lastpoll := sched.lastpoll.Load() 6620 if netpollinited() && lastpoll != 0 && lastpoll+10*1000*1000 < now { 6621 sched.lastpoll.CompareAndSwap(lastpoll, now) 6622 list, delta := netpoll(0) // non-blocking - returns list of goroutines 6623 if !list.empty() { 6624 // Need to decrement number of idle locked M's 6625 // (pretending that one more is running) before injectglist. 6626 // Otherwise it can lead to the following situation: 6627 // injectglist grabs all P's but before it starts M's to run the P's, 6628 // another M returns from syscall, finishes running its G, 6629 // observes that there is no work to do and no other running M's 6630 // and reports deadlock. 6631 incidlelocked(-1) 6632 injectglist(&list) 6633 incidlelocked(1) 6634 netpollAdjustWaiters(delta) 6635 } 6636 } 6637 // Check if we need to update GOMAXPROCS at most once per second. 6638 if debug.updatemaxprocs != 0 && lastgomaxprocs+1e9 <= now { 6639 sysmonUpdateGOMAXPROCS() 6640 lastgomaxprocs = now 6641 } 6642 if scavenger.sysmonWake.Load() != 0 { 6643 // Kick the scavenger awake if someone requested it. 6644 scavenger.wake() 6645 } 6646 // retake P's blocked in syscalls 6647 // and preempt long running G's 6648 if retake(now) != 0 { 6649 idle = 0 6650 } else { 6651 idle++ 6652 } 6653 // check if we need to force a GC 6654 if t := (gcTrigger{kind: gcTriggerTime, now: now}); t.test() && forcegc.idle.Load() { 6655 lock(&forcegc.lock) 6656 forcegc.idle.Store(false) 6657 var list gList 6658 list.push(forcegc.g) 6659 injectglist(&list) 6660 unlock(&forcegc.lock) 6661 } 6662 if debug.schedtrace > 0 && lasttrace+int64(debug.schedtrace)*1000000 <= now { 6663 lasttrace = now 6664 schedtrace(debug.scheddetail > 0) 6665 } 6666 unlock(&sched.sysmonlock) 6667 } 6668 } 6669 6670 type sysmontick struct { 6671 schedtick uint32 6672 syscalltick uint32 6673 schedwhen int64 6674 syscallwhen int64 6675 } 6676 6677 // forcePreemptNS is the time slice given to a G before it is 6678 // preempted. 6679 const forcePreemptNS = 10 * 1000 * 1000 // 10ms 6680 6681 func retake(now int64) uint32 { 6682 n := 0 6683 // Prevent allp slice changes. This lock will be completely 6684 // uncontended unless we're already stopping the world. 6685 lock(&allpLock) 6686 // We can't use a range loop over allp because we may 6687 // temporarily drop the allpLock. Hence, we need to re-fetch 6688 // allp each time around the loop. 6689 for i := 0; i < len(allp); i++ { 6690 // Quickly filter out non-running Ps. Running Ps are either 6691 // in a syscall or are actually executing. Idle Ps don't 6692 // need to be retaken. 6693 // 6694 // This is best-effort, so it's OK that it's racy. Our target 6695 // is to retake Ps that have been running or in a syscall for 6696 // a long time (milliseconds), so the state has plenty of time 6697 // to stabilize. 6698 pp := allp[i] 6699 if pp == nil || atomic.Load(&pp.status) != _Prunning { 6700 // pp can be nil if procresize has grown 6701 // allp but not yet created new Ps. 6702 continue 6703 } 6704 pd := &pp.sysmontick 6705 sysretake := false 6706 6707 // Preempt G if it's running on the same schedtick for 6708 // too long. This could be from a single long-running 6709 // goroutine or a sequence of goroutines run via 6710 // runnext, which share a single schedtick time slice. 6711 schedt := int64(pp.schedtick) 6712 if int64(pd.schedtick) != schedt { 6713 pd.schedtick = uint32(schedt) 6714 pd.schedwhen = now 6715 } else if pd.schedwhen+forcePreemptNS <= now { 6716 preemptone(pp) 6717 // If pp is in a syscall, preemptone doesn't work. 6718 // The goroutine nor the thread can respond to a 6719 // preemption request because they're not in Go code, 6720 // so we need to take the P ourselves. 6721 sysretake = true 6722 } 6723 6724 // Drop allpLock so we can take sched.lock. 6725 unlock(&allpLock) 6726 6727 // Need to decrement number of idle locked M's (pretending that 6728 // one more is running) before we take the P and resume. 6729 // Otherwise the M from which we retake can exit the syscall, 6730 // increment nmidle and report deadlock. 6731 // 6732 // Can't call incidlelocked once we setBlockOnExitSyscall, due 6733 // to a lock ordering violation between sched.lock and _Gscan. 6734 incidlelocked(-1) 6735 6736 // Try to prevent the P from continuing in the syscall, if it's in one at all. 6737 thread, ok := setBlockOnExitSyscall(pp) 6738 if !ok { 6739 // Not in a syscall, or something changed out from under us. 6740 goto done 6741 } 6742 6743 // Retake the P if it's there for more than 1 sysmon tick (at least 20us). 6744 if syst := int64(pp.syscalltick); !sysretake && int64(pd.syscalltick) != syst { 6745 pd.syscalltick = uint32(syst) 6746 pd.syscallwhen = now 6747 thread.resume() 6748 goto done 6749 } 6750 6751 // On the one hand we don't want to retake Ps if there is no other work to do, 6752 // but on the other hand we want to retake them eventually 6753 // because they can prevent the sysmon thread from deep sleep. 6754 if runqempty(pp) && sched.nmspinning.Load()+sched.npidle.Load() > 0 && pd.syscallwhen+10*1000*1000 > now { 6755 thread.resume() 6756 goto done 6757 } 6758 6759 // Take the P. Note: because we have the scan bit, the goroutine 6760 // is at worst stuck spinning in exitsyscall. 6761 thread.takeP() 6762 thread.resume() 6763 n++ 6764 6765 // Handoff the P for some other thread to run it. 6766 handoffp(pp) 6767 6768 // The P has been handed off to another thread, so risk of a false 6769 // deadlock report while we hold onto it is gone. 6770 done: 6771 incidlelocked(1) 6772 lock(&allpLock) 6773 } 6774 unlock(&allpLock) 6775 return uint32(n) 6776 } 6777 6778 // syscallingThread represents a thread in a system call that temporarily 6779 // cannot advance out of the system call. 6780 type syscallingThread struct { 6781 gp *g 6782 mp *m 6783 pp *p 6784 status uint32 6785 } 6786 6787 // setBlockOnExitSyscall prevents pp's thread from advancing out of 6788 // exitsyscall. On success, returns the g/m/p state of the thread 6789 // and true. At that point, the caller owns the g/m/p links referenced, 6790 // the goroutine is in _Gsyscall, and prevented from transitioning out 6791 // of it. On failure, it returns false, and none of these guarantees are 6792 // made. 6793 // 6794 // Callers must call resume on the resulting thread state once 6795 // they're done with thread, otherwise it will remain blocked forever. 6796 // 6797 // This function races with state changes on pp, and thus may fail 6798 // if pp is not in a system call, or exits a system call concurrently 6799 // with this function. However, this function is safe to call without 6800 // any additional synchronization. 6801 func setBlockOnExitSyscall(pp *p) (syscallingThread, bool) { 6802 if pp.status != _Prunning { 6803 return syscallingThread{}, false 6804 } 6805 // Be very careful here, these reads are intentionally racy. 6806 // Once we notice the G is in _Gsyscall, acquire its scan bit, 6807 // and validate that it's still connected to the *same* M and P, 6808 // we can actually get to work. Holding the scan bit will prevent 6809 // the G from exiting the syscall. 6810 // 6811 // Our goal here is to interrupt long syscalls. If it turns out 6812 // that we're wrong and the G switched to another syscall while 6813 // we were trying to do this, that's completely fine. It's 6814 // probably making more frequent syscalls and the typical 6815 // preemption paths should be effective. 6816 mp := pp.m.ptr() 6817 if mp == nil { 6818 // Nothing to do. 6819 return syscallingThread{}, false 6820 } 6821 gp := mp.curg 6822 if gp == nil { 6823 // Nothing to do. 6824 return syscallingThread{}, false 6825 } 6826 status := readgstatus(gp) &^ _Gscan 6827 6828 // A goroutine is considered in a syscall, and may have a corresponding 6829 // P, if it's in _Gsyscall *or* _Gdeadextra. In the latter case, it's an 6830 // extra M goroutine. 6831 if status != _Gsyscall && status != _Gdeadextra { 6832 // Not in a syscall, nothing to do. 6833 return syscallingThread{}, false 6834 } 6835 if !castogscanstatus(gp, status, status|_Gscan) { 6836 // Not in _Gsyscall or _Gdeadextra anymore. Nothing to do. 6837 return syscallingThread{}, false 6838 } 6839 if gp.m != mp || gp.m.p.ptr() != pp { 6840 // This is not what we originally observed. Nothing to do. 6841 casfrom_Gscanstatus(gp, status|_Gscan, status) 6842 return syscallingThread{}, false 6843 } 6844 return syscallingThread{gp, mp, pp, status}, true 6845 } 6846 6847 // gcstopP unwires the P attached to the syscalling thread 6848 // and moves it into the _Pgcstop state. 6849 // 6850 // The caller must be stopping the world. 6851 func (s syscallingThread) gcstopP() { 6852 assertLockHeld(&sched.lock) 6853 6854 s.releaseP(_Pgcstop) 6855 s.pp.gcStopTime = nanotime() 6856 sched.stopwait-- 6857 } 6858 6859 // takeP unwires the P attached to the syscalling thread 6860 // and moves it into the _Pidle state. 6861 func (s syscallingThread) takeP() { 6862 s.releaseP(_Pidle) 6863 } 6864 6865 // releaseP unwires the P from the syscalling thread, moving 6866 // it to the provided state. Callers should prefer to use 6867 // takeP and gcstopP. 6868 func (s syscallingThread) releaseP(state uint32) { 6869 if state != _Pidle && state != _Pgcstop { 6870 throw("attempted to release P into a bad state") 6871 } 6872 trace := traceAcquire() 6873 s.pp.m = 0 6874 s.mp.p = 0 6875 atomic.Store(&s.pp.status, state) 6876 if trace.ok() { 6877 trace.ProcSteal(s.pp) 6878 traceRelease(trace) 6879 } 6880 addGSyscallNoP(s.mp) 6881 s.pp.syscalltick++ 6882 } 6883 6884 // resume allows a syscalling thread to advance beyond exitsyscall. 6885 func (s syscallingThread) resume() { 6886 casfrom_Gscanstatus(s.gp, s.status|_Gscan, s.status) 6887 } 6888 6889 // Tell all goroutines that they have been preempted and they should stop. 6890 // This function is purely best-effort. It can fail to inform a goroutine if a 6891 // processor just started running it. 6892 // No locks need to be held. 6893 // Returns true if preemption request was issued to at least one goroutine. 6894 func preemptall() bool { 6895 res := false 6896 for _, pp := range allp { 6897 if pp.status != _Prunning { 6898 continue 6899 } 6900 if preemptone(pp) { 6901 res = true 6902 } 6903 } 6904 return res 6905 } 6906 6907 // Tell the goroutine running on processor P to stop. 6908 // This function is purely best-effort. It can incorrectly fail to inform the 6909 // goroutine. It can inform the wrong goroutine. Even if it informs the 6910 // correct goroutine, that goroutine might ignore the request if it is 6911 // simultaneously executing newstack. 6912 // No lock needs to be held. 6913 // Returns true if preemption request was issued. 6914 // The actual preemption will happen at some point in the future 6915 // and will be indicated by the gp->status no longer being 6916 // Grunning 6917 func preemptone(pp *p) bool { 6918 mp := pp.m.ptr() 6919 if mp == nil || mp == getg().m { 6920 return false 6921 } 6922 gp := mp.curg 6923 if gp == nil || gp == mp.g0 { 6924 return false 6925 } 6926 if readgstatus(gp)&^_Gscan == _Gsyscall { 6927 // Don't bother trying to preempt a goroutine in a syscall. 6928 return false 6929 } 6930 6931 gp.preempt = true 6932 6933 // Every call in a goroutine checks for stack overflow by 6934 // comparing the current stack pointer to gp->stackguard0. 6935 // Setting gp->stackguard0 to StackPreempt folds 6936 // preemption into the normal stack overflow check. 6937 gp.stackguard0 = stackPreempt 6938 6939 // Request an async preemption of this P. 6940 if preemptMSupported && debug.asyncpreemptoff == 0 { 6941 pp.preempt = true 6942 preemptM(mp) 6943 } 6944 6945 return true 6946 } 6947 6948 var starttime int64 6949 6950 func schedtrace(detailed bool) { 6951 now := nanotime() 6952 if starttime == 0 { 6953 starttime = now 6954 } 6955 6956 lock(&sched.lock) 6957 print("SCHED ", (now-starttime)/1e6, "ms: gomaxprocs=", gomaxprocs, " idleprocs=", sched.npidle.Load(), " threads=", mcount(), " spinningthreads=", sched.nmspinning.Load(), " needspinning=", sched.needspinning.Load(), " idlethreads=", sched.nmidle, " runqueue=", sched.runq.size) 6958 if detailed { 6959 print(" gcwaiting=", sched.gcwaiting.Load(), " nmidlelocked=", sched.nmidlelocked, " stopwait=", sched.stopwait, " sysmonwait=", sched.sysmonwait.Load(), "\n") 6960 } 6961 // We must be careful while reading data from P's, M's and G's. 6962 // Even if we hold schedlock, most data can be changed concurrently. 6963 // E.g. (p->m ? p->m->id : -1) can crash if p->m changes from non-nil to nil. 6964 for i, pp := range allp { 6965 h := atomic.Load(&pp.runqhead) 6966 t := atomic.Load(&pp.runqtail) 6967 if detailed { 6968 print(" P", i, ": status=", pp.status, " schedtick=", pp.schedtick, " syscalltick=", pp.syscalltick, " m=") 6969 mp := pp.m.ptr() 6970 if mp != nil { 6971 print(mp.id) 6972 } else { 6973 print("nil") 6974 } 6975 print(" runqsize=", t-h, " gfreecnt=", pp.gFree.size, " timerslen=", len(pp.timers.heap), "\n") 6976 } else { 6977 // In non-detailed mode format lengths of per-P run queues as: 6978 // [ len1 len2 len3 len4 ] 6979 print(" ") 6980 if i == 0 { 6981 print("[ ") 6982 } 6983 print(t - h) 6984 if i == len(allp)-1 { 6985 print(" ]") 6986 } 6987 } 6988 } 6989 6990 if !detailed { 6991 // Format per-P schedticks as: schedticks=[ tick1 tick2 tick3 tick4 ]. 6992 print(" schedticks=[ ") 6993 for _, pp := range allp { 6994 print(pp.schedtick) 6995 print(" ") 6996 } 6997 print("]\n") 6998 } 6999 7000 if !detailed { 7001 unlock(&sched.lock) 7002 return 7003 } 7004 7005 for mp := allm; mp != nil; mp = mp.alllink { 7006 pp := mp.p.ptr() 7007 print(" M", mp.id, ": p=") 7008 if pp != nil { 7009 print(pp.id) 7010 } else { 7011 print("nil") 7012 } 7013 print(" curg=") 7014 if mp.curg != nil { 7015 print(mp.curg.goid) 7016 } else { 7017 print("nil") 7018 } 7019 print(" mallocing=", mp.mallocing, " throwing=", mp.throwing, " preemptoff=", mp.preemptoff, " locks=", mp.locks, " dying=", mp.dying, " spinning=", mp.spinning, " blocked=", mp.blocked, " lockedg=") 7020 if lockedg := mp.lockedg.ptr(); lockedg != nil { 7021 print(lockedg.goid) 7022 } else { 7023 print("nil") 7024 } 7025 print("\n") 7026 } 7027 7028 forEachG(func(gp *g) { 7029 print(" G", gp.goid, ": status=", readgstatus(gp), "(", gp.waitreason.String(), ") m=") 7030 if gp.m != nil { 7031 print(gp.m.id) 7032 } else { 7033 print("nil") 7034 } 7035 print(" lockedm=") 7036 if lockedm := gp.lockedm.ptr(); lockedm != nil { 7037 print(lockedm.id) 7038 } else { 7039 print("nil") 7040 } 7041 print("\n") 7042 }) 7043 unlock(&sched.lock) 7044 } 7045 7046 type updateMaxProcsGState struct { 7047 lock mutex 7048 g *g 7049 idle atomic.Bool 7050 7051 // Readable when idle == false, writable when idle == true. 7052 procs int32 // new GOMAXPROCS value 7053 } 7054 7055 var ( 7056 // GOMAXPROCS update godebug metric. Incremented if automatic 7057 // GOMAXPROCS updates actually change the value of GOMAXPROCS. 7058 updatemaxprocs = &godebugInc{name: "updatemaxprocs"} 7059 7060 // Synchronization and state between updateMaxProcsGoroutine and 7061 // sysmon. 7062 updateMaxProcsG updateMaxProcsGState 7063 7064 // Synchronization between GOMAXPROCS and sysmon. 7065 // 7066 // Setting GOMAXPROCS via a call to GOMAXPROCS disables automatic 7067 // GOMAXPROCS updates. 7068 // 7069 // We want to make two guarantees to callers of GOMAXPROCS. After 7070 // GOMAXPROCS returns: 7071 // 7072 // 1. The runtime will not make any automatic changes to GOMAXPROCS. 7073 // 7074 // 2. The runtime will not perform any of the system calls used to 7075 // determine the appropriate value of GOMAXPROCS (i.e., it won't 7076 // call defaultGOMAXPROCS). 7077 // 7078 // (1) is the baseline guarantee that everyone needs. The GOMAXPROCS 7079 // API isn't useful to anyone if automatic updates may occur after it 7080 // returns. This is easily achieved by double-checking the state under 7081 // STW before committing an automatic GOMAXPROCS update. 7082 // 7083 // (2) doesn't matter to most users, as it is isn't observable as long 7084 // as (1) holds. However, it can be important to users sandboxing Go. 7085 // They want disable these system calls and need some way to know when 7086 // they are guaranteed the calls will stop. 7087 // 7088 // This would be simple to achieve if we simply called 7089 // defaultGOMAXPROCS under STW in updateMaxProcsGoroutine below. 7090 // However, we would like to avoid scheduling this goroutine every 7091 // second when it will almost never do anything. Instead, sysmon calls 7092 // defaultGOMAXPROCS to decide whether to schedule 7093 // updateMaxProcsGoroutine. Thus we need to synchronize between sysmon 7094 // and GOMAXPROCS calls. 7095 // 7096 // GOMAXPROCS can't hold a runtime mutex across STW. It could hold a 7097 // semaphore, but sysmon cannot take semaphores. Instead, we have a 7098 // more complex scheme: 7099 // 7100 // * sysmon holds computeMaxProcsLock while calling defaultGOMAXPROCS. 7101 // * sysmon skips the current update if sched.customGOMAXPROCS is 7102 // set. 7103 // * GOMAXPROCS sets sched.customGOMAXPROCS once it is committed to 7104 // changing GOMAXPROCS. 7105 // * GOMAXPROCS takes computeMaxProcsLock to wait for outstanding 7106 // defaultGOMAXPROCS calls to complete. 7107 // 7108 // N.B. computeMaxProcsLock could simply be sched.lock, but we want to 7109 // avoid holding that lock during the potentially slow 7110 // defaultGOMAXPROCS. 7111 computeMaxProcsLock mutex 7112 ) 7113 7114 // Start GOMAXPROCS update helper goroutine. 7115 // 7116 // This is based on forcegchelper. 7117 func defaultGOMAXPROCSUpdateEnable() { 7118 if debug.updatemaxprocs == 0 { 7119 // Unconditionally increment the metric when updates are disabled. 7120 // 7121 // It would be more descriptive if we did a dry run of the 7122 // complete update, determining the appropriate value of 7123 // GOMAXPROCS and the bailing out and just incrementing the 7124 // metric if a change would occur. 7125 // 7126 // Not only is that a lot of ongoing work for a disabled 7127 // feature, but some users need to be able to completely 7128 // disable the update system calls (such as sandboxes). 7129 // Currently, updatemaxprocs=0 serves that purpose. 7130 updatemaxprocs.IncNonDefault() 7131 return 7132 } 7133 7134 go updateMaxProcsGoroutine() 7135 } 7136 7137 func updateMaxProcsGoroutine() { 7138 updateMaxProcsG.g = getg() 7139 lockInit(&updateMaxProcsG.lock, lockRankUpdateMaxProcsG) 7140 for { 7141 lock(&updateMaxProcsG.lock) 7142 if updateMaxProcsG.idle.Load() { 7143 throw("updateMaxProcsGoroutine: phase error") 7144 } 7145 updateMaxProcsG.idle.Store(true) 7146 goparkunlock(&updateMaxProcsG.lock, waitReasonUpdateGOMAXPROCSIdle, traceBlockSystemGoroutine, 1) 7147 // This goroutine is explicitly resumed by sysmon. 7148 7149 stw := stopTheWorldGC(stwGOMAXPROCS) 7150 7151 // Still OK to update? 7152 lock(&sched.lock) 7153 custom := sched.customGOMAXPROCS 7154 unlock(&sched.lock) 7155 if custom { 7156 startTheWorldGC(stw) 7157 return 7158 } 7159 7160 // newprocs will be processed by startTheWorld 7161 // 7162 // TODO(prattmic): this could use a nicer API. Perhaps add it to the 7163 // stw parameter? 7164 newprocs = updateMaxProcsG.procs 7165 lock(&sched.lock) 7166 sched.customGOMAXPROCS = false 7167 unlock(&sched.lock) 7168 7169 startTheWorldGC(stw) 7170 } 7171 } 7172 7173 func sysmonUpdateGOMAXPROCS() { 7174 // Synchronize with GOMAXPROCS. See comment on computeMaxProcsLock. 7175 lock(&computeMaxProcsLock) 7176 7177 // No update if GOMAXPROCS was set manually. 7178 lock(&sched.lock) 7179 custom := sched.customGOMAXPROCS 7180 curr := gomaxprocs 7181 unlock(&sched.lock) 7182 if custom { 7183 unlock(&computeMaxProcsLock) 7184 return 7185 } 7186 7187 // Don't hold sched.lock while we read the filesystem. 7188 procs := defaultGOMAXPROCS(0) 7189 unlock(&computeMaxProcsLock) 7190 if procs == curr { 7191 // Nothing to do. 7192 return 7193 } 7194 7195 // Sysmon can't directly stop the world. Run the helper to do so on our 7196 // behalf. If updateGOMAXPROCS.idle is false, then a previous update is 7197 // still pending. 7198 if updateMaxProcsG.idle.Load() { 7199 lock(&updateMaxProcsG.lock) 7200 updateMaxProcsG.procs = procs 7201 updateMaxProcsG.idle.Store(false) 7202 var list gList 7203 list.push(updateMaxProcsG.g) 7204 injectglist(&list) 7205 unlock(&updateMaxProcsG.lock) 7206 } 7207 } 7208 7209 // schedEnableUser enables or disables the scheduling of user 7210 // goroutines. 7211 // 7212 // This does not stop already running user goroutines, so the caller 7213 // should first stop the world when disabling user goroutines. 7214 func schedEnableUser(enable bool) { 7215 lock(&sched.lock) 7216 if sched.disable.user == !enable { 7217 unlock(&sched.lock) 7218 return 7219 } 7220 sched.disable.user = !enable 7221 if enable { 7222 n := sched.disable.runnable.size 7223 globrunqputbatch(&sched.disable.runnable) 7224 unlock(&sched.lock) 7225 for ; n != 0 && sched.npidle.Load() != 0; n-- { 7226 startm(nil, false, false) 7227 } 7228 } else { 7229 unlock(&sched.lock) 7230 } 7231 } 7232 7233 // schedEnabled reports whether gp should be scheduled. It returns 7234 // false is scheduling of gp is disabled. 7235 // 7236 // sched.lock must be held. 7237 func schedEnabled(gp *g) bool { 7238 assertLockHeld(&sched.lock) 7239 7240 if sched.disable.user { 7241 return isSystemGoroutine(gp, true) 7242 } 7243 return true 7244 } 7245 7246 // Put mp on midle list. 7247 // sched.lock must be held. 7248 // May run during STW, so write barriers are not allowed. 7249 // 7250 //go:nowritebarrierrec 7251 func mput(mp *m) { 7252 assertLockHeld(&sched.lock) 7253 7254 sched.midle.push(unsafe.Pointer(mp)) 7255 sched.nmidle++ 7256 checkdead() 7257 } 7258 7259 // Try to get an m from midle list. 7260 // sched.lock must be held. 7261 // May run during STW, so write barriers are not allowed. 7262 // 7263 //go:nowritebarrierrec 7264 func mget() *m { 7265 assertLockHeld(&sched.lock) 7266 7267 mp := (*m)(sched.midle.pop()) 7268 if mp != nil { 7269 sched.nmidle-- 7270 } 7271 return mp 7272 } 7273 7274 // Try to get a specific m from midle list. Returns nil if it isn't on the 7275 // midle list. 7276 // 7277 // sched.lock must be held. 7278 // May run during STW, so write barriers are not allowed. 7279 // 7280 //go:nowritebarrierrec 7281 func mgetSpecific(mp *m) *m { 7282 assertLockHeld(&sched.lock) 7283 7284 if mp.idleNode.prev == 0 && mp.idleNode.next == 0 { 7285 // Not on the list. 7286 return nil 7287 } 7288 7289 sched.midle.remove(unsafe.Pointer(mp)) 7290 sched.nmidle-- 7291 7292 return mp 7293 } 7294 7295 // Put gp on the global runnable queue. 7296 // sched.lock must be held. 7297 // May run during STW, so write barriers are not allowed. 7298 // 7299 //go:nowritebarrierrec 7300 func globrunqput(gp *g) { 7301 assertLockHeld(&sched.lock) 7302 7303 sched.runq.pushBack(gp) 7304 } 7305 7306 // Put gp at the head of the global runnable queue. 7307 // sched.lock must be held. 7308 // May run during STW, so write barriers are not allowed. 7309 // 7310 //go:nowritebarrierrec 7311 func globrunqputhead(gp *g) { 7312 assertLockHeld(&sched.lock) 7313 7314 sched.runq.push(gp) 7315 } 7316 7317 // Put a batch of runnable goroutines on the global runnable queue. 7318 // This clears *batch. 7319 // sched.lock must be held. 7320 // May run during STW, so write barriers are not allowed. 7321 // 7322 //go:nowritebarrierrec 7323 func globrunqputbatch(batch *gQueue) { 7324 assertLockHeld(&sched.lock) 7325 7326 sched.runq.pushBackAll(*batch) 7327 *batch = gQueue{} 7328 } 7329 7330 // Try get a single G from the global runnable queue. 7331 // sched.lock must be held. 7332 func globrunqget() *g { 7333 assertLockHeld(&sched.lock) 7334 7335 if sched.runq.size == 0 { 7336 return nil 7337 } 7338 7339 return sched.runq.pop() 7340 } 7341 7342 // Try get a batch of G's from the global runnable queue. 7343 // sched.lock must be held. 7344 func globrunqgetbatch(n int32) (gp *g, q gQueue) { 7345 assertLockHeld(&sched.lock) 7346 7347 if sched.runq.size == 0 { 7348 return 7349 } 7350 7351 n = min(n, sched.runq.size, sched.runq.size/gomaxprocs+1) 7352 7353 gp = sched.runq.pop() 7354 n-- 7355 7356 for ; n > 0; n-- { 7357 gp1 := sched.runq.pop() 7358 q.pushBack(gp1) 7359 } 7360 return 7361 } 7362 7363 // pMask is an atomic bitstring with one bit per P. 7364 type pMask []uint32 7365 7366 // read returns true if P id's bit is set. 7367 func (p pMask) read(id uint32) bool { 7368 word := id / 32 7369 mask := uint32(1) << (id % 32) 7370 return (atomic.Load(&p[word]) & mask) != 0 7371 } 7372 7373 // set sets P id's bit. 7374 func (p pMask) set(id int32) { 7375 word := id / 32 7376 mask := uint32(1) << (id % 32) 7377 atomic.Or(&p[word], mask) 7378 } 7379 7380 // clear clears P id's bit. 7381 func (p pMask) clear(id int32) { 7382 word := id / 32 7383 mask := uint32(1) << (id % 32) 7384 atomic.And(&p[word], ^mask) 7385 } 7386 7387 // any returns true if any bit in p is set. 7388 func (p pMask) any() bool { 7389 for i := range p { 7390 if atomic.Load(&p[i]) != 0 { 7391 return true 7392 } 7393 } 7394 return false 7395 } 7396 7397 // resize resizes the pMask and returns a new one. 7398 // 7399 // The result may alias p, so callers are encouraged to 7400 // discard p. Not safe for concurrent use. 7401 func (p pMask) resize(nprocs int32) pMask { 7402 maskWords := (nprocs + 31) / 32 7403 7404 if maskWords <= int32(cap(p)) { 7405 return p[:maskWords] 7406 } 7407 newMask := make([]uint32, maskWords) 7408 // No need to copy beyond len, old Ps are irrelevant. 7409 copy(newMask, p) 7410 return newMask 7411 } 7412 7413 // pidleput puts p on the _Pidle list. now must be a relatively recent call 7414 // to nanotime or zero. Returns now or the current time if now was zero. 7415 // 7416 // This releases ownership of p. Once sched.lock is released it is no longer 7417 // safe to use p. 7418 // 7419 // sched.lock must be held. 7420 // 7421 // May run during STW, so write barriers are not allowed. 7422 // 7423 //go:nowritebarrierrec 7424 func pidleput(pp *p, now int64) int64 { 7425 assertLockHeld(&sched.lock) 7426 7427 if !runqempty(pp) { 7428 throw("pidleput: P has non-empty run queue") 7429 } 7430 if now == 0 { 7431 now = nanotime() 7432 } 7433 if pp.timers.len.Load() == 0 { 7434 timerpMask.clear(pp.id) 7435 } 7436 idlepMask.set(pp.id) 7437 pp.link = sched.pidle 7438 sched.pidle.set(pp) 7439 sched.npidle.Add(1) 7440 if !pp.limiterEvent.start(limiterEventIdle, now) { 7441 throw("must be able to track idle limiter event") 7442 } 7443 return now 7444 } 7445 7446 // pidleget tries to get a p from the _Pidle list, acquiring ownership. 7447 // 7448 // sched.lock must be held. 7449 // 7450 // May run during STW, so write barriers are not allowed. 7451 // 7452 //go:nowritebarrierrec 7453 func pidleget(now int64) (*p, int64) { 7454 assertLockHeld(&sched.lock) 7455 7456 pp := sched.pidle.ptr() 7457 if pp != nil { 7458 // Timer may get added at any time now. 7459 if now == 0 { 7460 now = nanotime() 7461 } 7462 timerpMask.set(pp.id) 7463 idlepMask.clear(pp.id) 7464 sched.pidle = pp.link 7465 sched.npidle.Add(-1) 7466 pp.limiterEvent.stop(limiterEventIdle, now) 7467 } 7468 return pp, now 7469 } 7470 7471 // pidlegetSpinning tries to get a p from the _Pidle list, acquiring ownership. 7472 // This is called by spinning Ms (or callers than need a spinning M) that have 7473 // found work. If no P is available, this must synchronized with non-spinning 7474 // Ms that may be preparing to drop their P without discovering this work. 7475 // 7476 // sched.lock must be held. 7477 // 7478 // May run during STW, so write barriers are not allowed. 7479 // 7480 //go:nowritebarrierrec 7481 func pidlegetSpinning(now int64) (*p, int64) { 7482 assertLockHeld(&sched.lock) 7483 7484 pp, now := pidleget(now) 7485 if pp == nil { 7486 // See "Delicate dance" comment in findRunnable. We found work 7487 // that we cannot take, we must synchronize with non-spinning 7488 // Ms that may be preparing to drop their P. 7489 sched.needspinning.Store(1) 7490 return nil, now 7491 } 7492 7493 return pp, now 7494 } 7495 7496 // runqempty reports whether pp has no Gs on its local run queue. 7497 // It never returns true spuriously. 7498 func runqempty(pp *p) bool { 7499 // Defend against a race where 1) pp has G1 in runqnext but runqhead == runqtail, 7500 // 2) runqput on pp kicks G1 to the runq, 3) runqget on pp empties runqnext. 7501 // Simply observing that runqhead == runqtail and then observing that runqnext == nil 7502 // does not mean the queue is empty. 7503 for { 7504 head := atomic.Load(&pp.runqhead) 7505 tail := atomic.Load(&pp.runqtail) 7506 runnext := atomic.Loaduintptr((*uintptr)(unsafe.Pointer(&pp.runnext))) 7507 if tail == atomic.Load(&pp.runqtail) { 7508 return head == tail && runnext == 0 7509 } 7510 } 7511 } 7512 7513 // To shake out latent assumptions about scheduling order, 7514 // we introduce some randomness into scheduling decisions 7515 // when running with the race detector. 7516 // The need for this was made obvious by changing the 7517 // (deterministic) scheduling order in Go 1.5 and breaking 7518 // many poorly-written tests. 7519 // With the randomness here, as long as the tests pass 7520 // consistently with -race, they shouldn't have latent scheduling 7521 // assumptions. 7522 const randomizeScheduler = raceenabled 7523 7524 // runqput tries to put g on the local runnable queue. 7525 // If next is false, runqput adds g to the tail of the runnable queue. 7526 // If next is true, runqput puts g in the pp.runnext slot. 7527 // If the run queue is full, runnext puts g on the global queue. 7528 // Executed only by the owner P. 7529 func runqput(pp *p, gp *g, next bool) { 7530 if !haveSysmon && next { 7531 // A runnext goroutine shares the same time slice as the 7532 // current goroutine (inheritTime from runqget). To prevent a 7533 // ping-pong pair of goroutines from starving all others, we 7534 // depend on sysmon to preempt "long-running goroutines". That 7535 // is, any set of goroutines sharing the same time slice. 7536 // 7537 // If there is no sysmon, we must avoid runnext entirely or 7538 // risk starvation. 7539 next = false 7540 } 7541 if randomizeScheduler && next && randn(2) == 0 { 7542 next = false 7543 } 7544 7545 if next { 7546 retryNext: 7547 oldnext := pp.runnext 7548 if !pp.runnext.cas(oldnext, guintptr(unsafe.Pointer(gp))) { 7549 goto retryNext 7550 } 7551 if oldnext == 0 { 7552 return 7553 } 7554 // Kick the old runnext out to the regular run queue. 7555 gp = oldnext.ptr() 7556 } 7557 7558 retry: 7559 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with consumers 7560 t := pp.runqtail 7561 if t-h < uint32(len(pp.runq)) { 7562 pp.runq[t%uint32(len(pp.runq))].set(gp) 7563 atomic.StoreRel(&pp.runqtail, t+1) // store-release, makes the item available for consumption 7564 return 7565 } 7566 if runqputslow(pp, gp, h, t) { 7567 return 7568 } 7569 // the queue is not full, now the put above must succeed 7570 goto retry 7571 } 7572 7573 // Put g and a batch of work from local runnable queue on global queue. 7574 // Executed only by the owner P. 7575 func runqputslow(pp *p, gp *g, h, t uint32) bool { 7576 var batch [len(pp.runq)/2 + 1]*g 7577 7578 // First, grab a batch from local queue. 7579 n := t - h 7580 n = n / 2 7581 if n != uint32(len(pp.runq)/2) { 7582 throw("runqputslow: queue is not full") 7583 } 7584 for i := uint32(0); i < n; i++ { 7585 batch[i] = pp.runq[(h+i)%uint32(len(pp.runq))].ptr() 7586 } 7587 if !atomic.CasRel(&pp.runqhead, h, h+n) { // cas-release, commits consume 7588 return false 7589 } 7590 batch[n] = gp 7591 7592 if randomizeScheduler { 7593 for i := uint32(1); i <= n; i++ { 7594 j := cheaprandn(i + 1) 7595 batch[i], batch[j] = batch[j], batch[i] 7596 } 7597 } 7598 7599 // Link the goroutines. 7600 for i := uint32(0); i < n; i++ { 7601 batch[i].schedlink.set(batch[i+1]) 7602 } 7603 7604 q := gQueue{batch[0].guintptr(), batch[n].guintptr(), int32(n + 1)} 7605 7606 // Now put the batch on global queue. 7607 lock(&sched.lock) 7608 globrunqputbatch(&q) 7609 unlock(&sched.lock) 7610 return true 7611 } 7612 7613 // runqputbatch tries to put all the G's on q on the local runnable queue. 7614 // If the local runq is full the input queue still contains unqueued Gs. 7615 // Executed only by the owner P. 7616 func runqputbatch(pp *p, q *gQueue) { 7617 if q.empty() { 7618 return 7619 } 7620 h := atomic.LoadAcq(&pp.runqhead) 7621 t := pp.runqtail 7622 n := uint32(0) 7623 for !q.empty() && t-h < uint32(len(pp.runq)) { 7624 gp := q.pop() 7625 pp.runq[t%uint32(len(pp.runq))].set(gp) 7626 t++ 7627 n++ 7628 } 7629 7630 if randomizeScheduler { 7631 off := func(o uint32) uint32 { 7632 return (pp.runqtail + o) % uint32(len(pp.runq)) 7633 } 7634 for i := uint32(1); i < n; i++ { 7635 j := cheaprandn(i + 1) 7636 pp.runq[off(i)], pp.runq[off(j)] = pp.runq[off(j)], pp.runq[off(i)] 7637 } 7638 } 7639 7640 atomic.StoreRel(&pp.runqtail, t) 7641 7642 return 7643 } 7644 7645 // Get g from local runnable queue. 7646 // If inheritTime is true, gp should inherit the remaining time in the 7647 // current time slice. Otherwise, it should start a new time slice. 7648 // Executed only by the owner P. 7649 func runqget(pp *p) (gp *g, inheritTime bool) { 7650 // If there's a runnext, it's the next G to run. 7651 next := pp.runnext 7652 // If the runnext is non-0 and the CAS fails, it could only have been stolen by another P, 7653 // because other Ps can race to set runnext to 0, but only the current P can set it to non-0. 7654 // Hence, there's no need to retry this CAS if it fails. 7655 if next != 0 && pp.runnext.cas(next, 0) { 7656 return next.ptr(), true 7657 } 7658 7659 for { 7660 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers 7661 t := pp.runqtail 7662 if t == h { 7663 return nil, false 7664 } 7665 gp := pp.runq[h%uint32(len(pp.runq))].ptr() 7666 if atomic.CasRel(&pp.runqhead, h, h+1) { // cas-release, commits consume 7667 return gp, false 7668 } 7669 } 7670 } 7671 7672 // runqdrain drains the local runnable queue of pp and returns all goroutines in it. 7673 // Executed only by the owner P. 7674 func runqdrain(pp *p) (drainQ gQueue) { 7675 oldNext := pp.runnext 7676 if oldNext != 0 && pp.runnext.cas(oldNext, 0) { 7677 drainQ.pushBack(oldNext.ptr()) 7678 } 7679 7680 retry: 7681 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers 7682 t := pp.runqtail 7683 qn := t - h 7684 if qn == 0 { 7685 return 7686 } 7687 if qn > uint32(len(pp.runq)) { // read inconsistent h and t 7688 goto retry 7689 } 7690 7691 if !atomic.CasRel(&pp.runqhead, h, h+qn) { // cas-release, commits consume 7692 goto retry 7693 } 7694 7695 // We've inverted the order in which it gets G's from the local P's runnable queue 7696 // and then advances the head pointer because we don't want to mess up the statuses of G's 7697 // while runqdrain() and runqsteal() are running in parallel. 7698 // Thus we should advance the head pointer before draining the local P into a gQueue, 7699 // so that we can update any gp.schedlink only after we take the full ownership of G, 7700 // meanwhile, other P's can't access to all G's in local P's runnable queue and steal them. 7701 // See https://groups.google.com/g/golang-dev/c/0pTKxEKhHSc/m/6Q85QjdVBQAJ for more details. 7702 for i := uint32(0); i < qn; i++ { 7703 gp := pp.runq[(h+i)%uint32(len(pp.runq))].ptr() 7704 drainQ.pushBack(gp) 7705 } 7706 return 7707 } 7708 7709 // Grabs a batch of goroutines from pp's runnable queue into batch. 7710 // Batch is a ring buffer starting at batchHead. 7711 // Returns number of grabbed goroutines. 7712 // Can be executed by any P. 7713 func runqgrab(pp *p, batch *[256]guintptr, batchHead uint32, stealRunNextG bool) uint32 { 7714 for { 7715 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with other consumers 7716 t := atomic.LoadAcq(&pp.runqtail) // load-acquire, synchronize with the producer 7717 n := t - h 7718 n = n - n/2 7719 if n == 0 { 7720 if stealRunNextG { 7721 // Try to steal from pp.runnext. 7722 if next := pp.runnext; next != 0 { 7723 if pp.status == _Prunning { 7724 if mp := pp.m.ptr(); mp != nil { 7725 if gp := mp.curg; gp == nil || readgstatus(gp)&^_Gscan != _Gsyscall { 7726 // Sleep to ensure that pp isn't about to run the g 7727 // we are about to steal. 7728 // The important use case here is when the g running 7729 // on pp ready()s another g and then almost 7730 // immediately blocks. Instead of stealing runnext 7731 // in this window, back off to give pp a chance to 7732 // schedule runnext. This will avoid thrashing gs 7733 // between different Ps. 7734 // A sync chan send/recv takes ~50ns as of time of 7735 // writing, so 3us gives ~50x overshoot. 7736 // If curg is nil, we assume that the P is likely 7737 // to be in the scheduler. If curg isn't nil and isn't 7738 // in a syscall, then it's either running, waiting, or 7739 // runnable. In this case we want to sleep because the 7740 // P might either call into the scheduler soon (running), 7741 // or already is (since we found a waiting or runnable 7742 // goroutine hanging off of a running P, suggesting it 7743 // either recently transitioned out of running, or will 7744 // transition to running shortly). 7745 if !osHasLowResTimer { 7746 usleep(3) 7747 } else { 7748 // On some platforms system timer granularity is 7749 // 1-15ms, which is way too much for this 7750 // optimization. So just yield. 7751 osyield() 7752 } 7753 } 7754 } 7755 } 7756 if !pp.runnext.cas(next, 0) { 7757 continue 7758 } 7759 batch[batchHead%uint32(len(batch))] = next 7760 return 1 7761 } 7762 } 7763 return 0 7764 } 7765 if n > uint32(len(pp.runq)/2) { // read inconsistent h and t 7766 continue 7767 } 7768 for i := uint32(0); i < n; i++ { 7769 g := pp.runq[(h+i)%uint32(len(pp.runq))] 7770 batch[(batchHead+i)%uint32(len(batch))] = g 7771 } 7772 if atomic.CasRel(&pp.runqhead, h, h+n) { // cas-release, commits consume 7773 return n 7774 } 7775 } 7776 } 7777 7778 // Steal half of elements from local runnable queue of p2 7779 // and put onto local runnable queue of p. 7780 // Returns one of the stolen elements (or nil if failed). 7781 func runqsteal(pp, p2 *p, stealRunNextG bool) *g { 7782 t := pp.runqtail 7783 n := runqgrab(p2, &pp.runq, t, stealRunNextG) 7784 if n == 0 { 7785 return nil 7786 } 7787 n-- 7788 gp := pp.runq[(t+n)%uint32(len(pp.runq))].ptr() 7789 if n == 0 { 7790 return gp 7791 } 7792 h := atomic.LoadAcq(&pp.runqhead) // load-acquire, synchronize with consumers 7793 if t-h+n >= uint32(len(pp.runq)) { 7794 throw("runqsteal: runq overflow") 7795 } 7796 atomic.StoreRel(&pp.runqtail, t+n) // store-release, makes the item available for consumption 7797 return gp 7798 } 7799 7800 // A gQueue is a dequeue of Gs linked through g.schedlink. A G can only 7801 // be on one gQueue or gList at a time. 7802 type gQueue struct { 7803 head guintptr 7804 tail guintptr 7805 size int32 7806 } 7807 7808 // empty reports whether q is empty. 7809 func (q *gQueue) empty() bool { 7810 return q.head == 0 7811 } 7812 7813 // push adds gp to the head of q. 7814 func (q *gQueue) push(gp *g) { 7815 gp.schedlink = q.head 7816 q.head.set(gp) 7817 if q.tail == 0 { 7818 q.tail.set(gp) 7819 } 7820 q.size++ 7821 } 7822 7823 // pushBack adds gp to the tail of q. 7824 func (q *gQueue) pushBack(gp *g) { 7825 gp.schedlink = 0 7826 if q.tail != 0 { 7827 q.tail.ptr().schedlink.set(gp) 7828 } else { 7829 q.head.set(gp) 7830 } 7831 q.tail.set(gp) 7832 q.size++ 7833 } 7834 7835 // pushBackAll adds all Gs in q2 to the tail of q. After this q2 must 7836 // not be used. 7837 func (q *gQueue) pushBackAll(q2 gQueue) { 7838 if q2.tail == 0 { 7839 return 7840 } 7841 q2.tail.ptr().schedlink = 0 7842 if q.tail != 0 { 7843 q.tail.ptr().schedlink = q2.head 7844 } else { 7845 q.head = q2.head 7846 } 7847 q.tail = q2.tail 7848 q.size += q2.size 7849 } 7850 7851 // pop removes and returns the head of queue q. It returns nil if 7852 // q is empty. 7853 func (q *gQueue) pop() *g { 7854 gp := q.head.ptr() 7855 if gp != nil { 7856 q.head = gp.schedlink 7857 if q.head == 0 { 7858 q.tail = 0 7859 } 7860 q.size-- 7861 } 7862 return gp 7863 } 7864 7865 // popList takes all Gs in q and returns them as a gList. 7866 func (q *gQueue) popList() gList { 7867 stack := gList{q.head, q.size} 7868 *q = gQueue{} 7869 return stack 7870 } 7871 7872 // A gList is a list of Gs linked through g.schedlink. A G can only be 7873 // on one gQueue or gList at a time. 7874 type gList struct { 7875 head guintptr 7876 size int32 7877 } 7878 7879 // empty reports whether l is empty. 7880 func (l *gList) empty() bool { 7881 return l.head == 0 7882 } 7883 7884 // push adds gp to the head of l. 7885 func (l *gList) push(gp *g) { 7886 gp.schedlink = l.head 7887 l.head.set(gp) 7888 l.size++ 7889 } 7890 7891 // pushAll prepends all Gs in q to l. After this q must not be used. 7892 func (l *gList) pushAll(q gQueue) { 7893 if !q.empty() { 7894 q.tail.ptr().schedlink = l.head 7895 l.head = q.head 7896 l.size += q.size 7897 } 7898 } 7899 7900 // pop removes and returns the head of l. If l is empty, it returns nil. 7901 func (l *gList) pop() *g { 7902 gp := l.head.ptr() 7903 if gp != nil { 7904 l.head = gp.schedlink 7905 l.size-- 7906 } 7907 return gp 7908 } 7909 7910 //go:linkname setMaxThreads runtime/debug.setMaxThreads 7911 func setMaxThreads(in int) (out int) { 7912 lock(&sched.lock) 7913 out = int(sched.maxmcount) 7914 if in > 0x7fffffff { // MaxInt32 7915 sched.maxmcount = 0x7fffffff 7916 } else { 7917 sched.maxmcount = int32(in) 7918 } 7919 checkmcount() 7920 unlock(&sched.lock) 7921 return 7922 } 7923 7924 // procPin should be an internal detail, 7925 // but widely used packages access it using linkname. 7926 // Notable members of the hall of shame include: 7927 // - github.com/bytedance/gopkg 7928 // - github.com/choleraehyq/pid 7929 // - github.com/songzhibin97/gkit 7930 // 7931 // Do not remove or change the type signature. 7932 // See go.dev/issue/67401. 7933 // 7934 //go:linkname procPin 7935 //go:nosplit 7936 func procPin() int { 7937 gp := getg() 7938 mp := gp.m 7939 7940 mp.locks++ 7941 return int(mp.p.ptr().id) 7942 } 7943 7944 // procUnpin should be an internal detail, 7945 // but widely used packages access it using linkname. 7946 // Notable members of the hall of shame include: 7947 // - github.com/bytedance/gopkg 7948 // - github.com/choleraehyq/pid 7949 // - github.com/songzhibin97/gkit 7950 // 7951 // Do not remove or change the type signature. 7952 // See go.dev/issue/67401. 7953 // 7954 //go:linkname procUnpin 7955 //go:nosplit 7956 func procUnpin() { 7957 gp := getg() 7958 gp.m.locks-- 7959 } 7960 7961 //go:linkname sync_runtime_procPin sync.runtime_procPin 7962 //go:nosplit 7963 func sync_runtime_procPin() int { 7964 return procPin() 7965 } 7966 7967 //go:linkname sync_runtime_procUnpin sync.runtime_procUnpin 7968 //go:nosplit 7969 func sync_runtime_procUnpin() { 7970 procUnpin() 7971 } 7972 7973 //go:linkname sync_atomic_runtime_procPin sync/atomic.runtime_procPin 7974 //go:nosplit 7975 func sync_atomic_runtime_procPin() int { 7976 return procPin() 7977 } 7978 7979 //go:linkname sync_atomic_runtime_procUnpin sync/atomic.runtime_procUnpin 7980 //go:nosplit 7981 func sync_atomic_runtime_procUnpin() { 7982 procUnpin() 7983 } 7984 7985 // Active spinning for sync.Mutex. 7986 // 7987 //go:linkname internal_sync_runtime_canSpin internal/sync.runtime_canSpin 7988 //go:nosplit 7989 func internal_sync_runtime_canSpin(i int) bool { 7990 // sync.Mutex is cooperative, so we are conservative with spinning. 7991 // Spin only few times and only if running on a multicore machine and 7992 // GOMAXPROCS>1 and there is at least one other running P and local runq is empty. 7993 // As opposed to runtime mutex we don't do passive spinning here, 7994 // because there can be work on global runq or on other Ps. 7995 if i >= active_spin || numCPUStartup <= 1 || gomaxprocs <= sched.npidle.Load()+sched.nmspinning.Load()+1 { 7996 return false 7997 } 7998 if p := getg().m.p.ptr(); !runqempty(p) { 7999 return false 8000 } 8001 return true 8002 } 8003 8004 //go:linkname internal_sync_runtime_doSpin internal/sync.runtime_doSpin 8005 //go:nosplit 8006 func internal_sync_runtime_doSpin() { 8007 procyield(active_spin_cnt) 8008 } 8009 8010 // Active spinning for sync.Mutex. 8011 // 8012 // sync_runtime_canSpin should be an internal detail, 8013 // but widely used packages access it using linkname. 8014 // Notable members of the hall of shame include: 8015 // - github.com/livekit/protocol 8016 // - github.com/sagernet/gvisor 8017 // - gvisor.dev/gvisor 8018 // 8019 // Do not remove or change the type signature. 8020 // See go.dev/issue/67401. 8021 // 8022 //go:linkname sync_runtime_canSpin sync.runtime_canSpin 8023 //go:nosplit 8024 func sync_runtime_canSpin(i int) bool { 8025 return internal_sync_runtime_canSpin(i) 8026 } 8027 8028 // sync_runtime_doSpin should be an internal detail, 8029 // but widely used packages access it using linkname. 8030 // Notable members of the hall of shame include: 8031 // - github.com/livekit/protocol 8032 // - github.com/sagernet/gvisor 8033 // - gvisor.dev/gvisor 8034 // 8035 // Do not remove or change the type signature. 8036 // See go.dev/issue/67401. 8037 // 8038 //go:linkname sync_runtime_doSpin sync.runtime_doSpin 8039 //go:nosplit 8040 func sync_runtime_doSpin() { 8041 internal_sync_runtime_doSpin() 8042 } 8043 8044 var stealOrder randomOrder 8045 8046 // randomOrder/randomEnum are helper types for randomized work stealing. 8047 // They allow to enumerate all Ps in different pseudo-random orders without repetitions. 8048 // The algorithm is based on the fact that if we have X such that X and GOMAXPROCS 8049 // are coprime, then a sequences of (i + X) % GOMAXPROCS gives the required enumeration. 8050 type randomOrder struct { 8051 count uint32 8052 coprimes []uint32 8053 } 8054 8055 type randomEnum struct { 8056 i uint32 8057 count uint32 8058 pos uint32 8059 inc uint32 8060 } 8061 8062 func (ord *randomOrder) reset(count uint32) { 8063 ord.count = count 8064 ord.coprimes = ord.coprimes[:0] 8065 for i := uint32(1); i <= count; i++ { 8066 if gcd(i, count) == 1 { 8067 ord.coprimes = append(ord.coprimes, i) 8068 } 8069 } 8070 } 8071 8072 func (ord *randomOrder) start(i uint32) randomEnum { 8073 return randomEnum{ 8074 count: ord.count, 8075 pos: i % ord.count, 8076 inc: ord.coprimes[i/ord.count%uint32(len(ord.coprimes))], 8077 } 8078 } 8079 8080 func (enum *randomEnum) done() bool { 8081 return enum.i == enum.count 8082 } 8083 8084 func (enum *randomEnum) next() { 8085 enum.i++ 8086 enum.pos = (enum.pos + enum.inc) % enum.count 8087 } 8088 8089 func (enum *randomEnum) position() uint32 { 8090 return enum.pos 8091 } 8092 8093 func gcd(a, b uint32) uint32 { 8094 for b != 0 { 8095 a, b = b, a%b 8096 } 8097 return a 8098 } 8099 8100 // An initTask represents the set of initializations that need to be done for a package. 8101 // Keep in sync with ../../test/noinit.go:initTask 8102 type initTask struct { 8103 state uint32 // 0 = uninitialized, 1 = in progress, 2 = done 8104 nfns uint32 8105 // followed by nfns pcs, uintptr sized, one per init function to run 8106 } 8107 8108 // inittrace stores statistics for init functions which are 8109 // updated by malloc and newproc when active is true. 8110 var inittrace tracestat 8111 8112 type tracestat struct { 8113 active bool // init tracing activation status 8114 id uint64 // init goroutine id 8115 allocs uint64 // heap allocations 8116 bytes uint64 // heap allocated bytes 8117 } 8118 8119 func doInit(ts []*initTask) { 8120 for _, t := range ts { 8121 doInit1(t) 8122 } 8123 } 8124 8125 func doInit1(t *initTask) { 8126 switch t.state { 8127 case 2: // fully initialized 8128 return 8129 case 1: // initialization in progress 8130 throw("recursive call during initialization - linker skew") 8131 default: // not initialized yet 8132 t.state = 1 // initialization in progress 8133 8134 var ( 8135 start int64 8136 before tracestat 8137 ) 8138 8139 if inittrace.active { 8140 start = nanotime() 8141 // Load stats non-atomically since tracinit is updated only by this init goroutine. 8142 before = inittrace 8143 } 8144 8145 if t.nfns == 0 { 8146 // We should have pruned all of these in the linker. 8147 throw("inittask with no functions") 8148 } 8149 8150 firstFunc := add(unsafe.Pointer(t), 8) 8151 for i := uint32(0); i < t.nfns; i++ { 8152 p := add(firstFunc, uintptr(i)*goarch.PtrSize) 8153 f := *(*func())(unsafe.Pointer(&p)) 8154 f() 8155 } 8156 8157 if inittrace.active { 8158 end := nanotime() 8159 // Load stats non-atomically since tracinit is updated only by this init goroutine. 8160 after := inittrace 8161 8162 f := *(*func())(unsafe.Pointer(&firstFunc)) 8163 pkg := funcpkgpath(findfunc(abi.FuncPCABIInternal(f))) 8164 8165 var sbuf [24]byte 8166 print("init ", pkg, " @") 8167 print(string(fmtNSAsMS(sbuf[:], uint64(start-runtimeInitTime))), " ms, ") 8168 print(string(fmtNSAsMS(sbuf[:], uint64(end-start))), " ms clock, ") 8169 print(string(itoa(sbuf[:], after.bytes-before.bytes)), " bytes, ") 8170 print(string(itoa(sbuf[:], after.allocs-before.allocs)), " allocs") 8171 print("\n") 8172 } 8173 8174 t.state = 2 // initialization done 8175 } 8176 } 8177