// Copyright 2025 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. package main import ( "context" "log" "math/rand/v2" "os" "runtime" "runtime/debug" "runtime/metrics" "runtime/trace" "sync/atomic" ) func init() { register("TraceSTW", TraceSTW) register("TraceGCSTW", TraceGCSTW) } // The parent writes to ping and waits for the children to write back // via pong to show that they are running. var ping atomic.Uint32 var pong [2]atomic.Uint32 // Tell runners to stop. var stop atomic.Bool func traceSTWTarget(i int) { for !stop.Load() { // Async preemption often takes 100ms+ to preempt this loop on // windows-386. This makes the test flaky, as the traceReadCPU // timer often fires by the time STW finishes, jumbling the // goroutine scheduling. As a workaround, ensure we have a // morestack call for prompt preemption. ensureMorestack() pong[i].Store(ping.Load()) } } func TraceSTW() { ctx := context.Background() // The idea here is to have 2 target goroutines that are constantly // running. When the world restarts after STW, we expect these // goroutines to continue execution on the same M and P. // // Set GOMAXPROCS=4 to make room for the 2 target goroutines, 1 parent, // and 1 slack for potential misscheduling. // // Disable the GC because GC STW generally moves goroutines (see // https://go.dev/issue/65694). Alternatively, we could just ignore the // trace if the GC runs. runtime.GOMAXPROCS(4) debug.SetGCPercent(-1) if err := trace.Start(os.Stdout); err != nil { log.Fatalf("failed to start tracing: %v", err) } defer trace.Stop() for i := range 2 { go traceSTWTarget(i) } // Wait for children to start running. ping.Store(1) for pong[0].Load() != 1 {} for pong[1].Load() != 1 {} trace.Log(ctx, "TraceSTW", "start") // STW var ms runtime.MemStats runtime.ReadMemStats(&ms) // Make sure to run long enough for the children to schedule again // after STW. ping.Store(2) for pong[0].Load() != 2 {} for pong[1].Load() != 2 {} trace.Log(ctx, "TraceSTW", "end") stop.Store(true) } // Variant of TraceSTW for GC STWs. We want the GC mark workers to start on // previously-idle Ps, rather than bumping the current P. func TraceGCSTW() { ctx := context.Background() // The idea here is to have 2 target goroutines that are constantly // running. When the world restarts after STW, we expect these // goroutines to continue execution on the same M and P. // // Set GOMAXPROCS=8 to make room for the 2 target goroutines, 1 parent, // 2 dedicated workers, and a bit of slack. // // Disable the GC initially so we can be sure it only triggers once we // are ready. runtime.GOMAXPROCS(8) debug.SetGCPercent(-1) if err := trace.Start(os.Stdout); err != nil { log.Fatalf("failed to start tracing: %v", err) } defer trace.Stop() for i := range 2 { go traceSTWTarget(i) } // Wait for children to start running. ping.Store(1) for pong[0].Load() != 1 {} for pong[1].Load() != 1 {} trace.Log(ctx, "TraceSTW", "start") // STW triggerGC() // Make sure to run long enough for the children to schedule again // after STW. This is included for good measure, but the goroutines // really ought to have already scheduled since the entire GC // completed. ping.Store(2) for pong[0].Load() != 2 {} for pong[1].Load() != 2 {} trace.Log(ctx, "TraceSTW", "end") stop.Store(true) } func triggerGC() { // Allocate a bunch to trigger the GC rather than using runtime.GC. The // latter blocks until the GC is complete, which is convenient, but // messes with scheduling as it gives this P a chance to steal the // other goroutines before their Ps get up and running again. // Bring heap size up prior to enabling the GC to ensure that there is // a decent amount of work in case the GC triggers immediately upon // re-enabling. for range 1000 { alloc() } sample := make([]metrics.Sample, 1) sample[0].Name = "/gc/cycles/total:gc-cycles" metrics.Read(sample) start := sample[0].Value.Uint64() debug.SetGCPercent(100) // Keep allocating until the GC is complete. We really only need to // continue until the mark workers are scheduled, but there isn't a // good way to measure that. for { metrics.Read(sample) if sample[0].Value.Uint64() != start { return } alloc() } } // Allocate a tree data structure to generate plenty of scan work for the GC. type node struct { children []*node } var gcSink node func alloc() { // 10% chance of adding a node a each layer. curr := &gcSink for { if len(curr.children) == 0 || rand.Float32() < 0.1 { curr.children = append(curr.children, new(node)) return } i := rand.IntN(len(curr.children)) curr = curr.children[i] } } // Manually insert a morestack call. Leaf functions can omit morestack, but // non-leaf functions should include them. //go:noinline func ensureMorestack() { ensureMorestack1() } //go:noinline func ensureMorestack1() { }