runtime_test.go

     1  // Copyright 2012 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime_test
     6  
     7  import (
     8  	"flag"
     9  	"fmt"
    10  	"internal/cpu"
    11  	"internal/runtime/atomic"
    12  	"io"
    13  	"math/bits"
    14  	. "runtime"
    15  	"runtime/debug"
    16  	"slices"
    17  	"strings"
    18  	"sync"
    19  	"testing"
    20  	"time"
    21  	"unsafe"
    22  )
    23  
    24  // flagQuick is set by the -quick option to skip some relatively slow tests.
    25  // This is used by the cmd/dist test runtime:cpu124.
    26  // The cmd/dist test passes both -test.short and -quick;
    27  // there are tests that only check testing.Short, and those tests will
    28  // not be skipped if only -quick is used.
    29  var flagQuick = flag.Bool("quick", false, "skip slow tests, for cmd/dist test runtime:cpu124")
    30  
    31  func init() {
    32  	// We're testing the runtime, so make tracebacks show things
    33  	// in the runtime. This only raises the level, so it won't
    34  	// override GOTRACEBACK=crash from the user.
    35  	SetTracebackEnv("system")
    36  }
    37  
    38  var errf error
    39  
    40  func errfn() error {
    41  	return errf
    42  }
    43  
    44  func errfn1() error {
    45  	return io.EOF
    46  }
    47  
    48  func BenchmarkIfaceCmp100(b *testing.B) {
    49  	for i := 0; i < b.N; i++ {
    50  		for j := 0; j < 100; j++ {
    51  			if errfn() == io.EOF {
    52  				b.Fatal("bad comparison")
    53  			}
    54  		}
    55  	}
    56  }
    57  
    58  func BenchmarkIfaceCmpNil100(b *testing.B) {
    59  	for i := 0; i < b.N; i++ {
    60  		for j := 0; j < 100; j++ {
    61  			if errfn1() == nil {
    62  				b.Fatal("bad comparison")
    63  			}
    64  		}
    65  	}
    66  }
    67  
    68  var efaceCmp1 any
    69  var efaceCmp2 any
    70  
    71  func BenchmarkEfaceCmpDiff(b *testing.B) {
    72  	x := 5
    73  	efaceCmp1 = &x
    74  	y := 6
    75  	efaceCmp2 = &y
    76  	for i := 0; i < b.N; i++ {
    77  		for j := 0; j < 100; j++ {
    78  			if efaceCmp1 == efaceCmp2 {
    79  				b.Fatal("bad comparison")
    80  			}
    81  		}
    82  	}
    83  }
    84  
    85  func BenchmarkEfaceCmpDiffIndirect(b *testing.B) {
    86  	efaceCmp1 = [2]int{1, 2}
    87  	efaceCmp2 = [2]int{1, 2}
    88  	for i := 0; i < b.N; i++ {
    89  		for j := 0; j < 100; j++ {
    90  			if efaceCmp1 != efaceCmp2 {
    91  				b.Fatal("bad comparison")
    92  			}
    93  		}
    94  	}
    95  }
    96  
    97  func BenchmarkDefer(b *testing.B) {
    98  	for i := 0; i < b.N; i++ {
    99  		defer1()
   100  	}
   101  }
   102  
   103  func defer1() {
   104  	defer func(x, y, z int) {
   105  		if recover() != nil || x != 1 || y != 2 || z != 3 {
   106  			panic("bad recover")
   107  		}
   108  	}(1, 2, 3)
   109  }
   110  
   111  func BenchmarkDefer10(b *testing.B) {
   112  	for i := 0; i < b.N/10; i++ {
   113  		defer2()
   114  	}
   115  }
   116  
   117  func defer2() {
   118  	for i := 0; i < 10; i++ {
   119  		defer func(x, y, z int) {
   120  			if recover() != nil || x != 1 || y != 2 || z != 3 {
   121  				panic("bad recover")
   122  			}
   123  		}(1, 2, 3)
   124  	}
   125  }
   126  
   127  func BenchmarkDeferMany(b *testing.B) {
   128  	for i := 0; i < b.N; i++ {
   129  		defer func(x, y, z int) {
   130  			if recover() != nil || x != 1 || y != 2 || z != 3 {
   131  				panic("bad recover")
   132  			}
   133  		}(1, 2, 3)
   134  	}
   135  }
   136  
   137  func BenchmarkPanicRecover(b *testing.B) {
   138  	for i := 0; i < b.N; i++ {
   139  		defer3()
   140  	}
   141  }
   142  
   143  func defer3() {
   144  	defer func(x, y, z int) {
   145  		if recover() == nil {
   146  			panic("failed recover")
   147  		}
   148  	}(1, 2, 3)
   149  	panic("hi")
   150  }
   151  
   152  // golang.org/issue/7063
   153  func TestStopCPUProfilingWithProfilerOff(t *testing.T) {
   154  	SetCPUProfileRate(0)
   155  }
   156  
   157  // Addresses to test for faulting behavior.
   158  // This is less a test of SetPanicOnFault and more a check that
   159  // the operating system and the runtime can process these faults
   160  // correctly. That is, we're indirectly testing that without SetPanicOnFault
   161  // these would manage to turn into ordinary crashes.
   162  // Note that these are truncated on 32-bit systems, so the bottom 32 bits
   163  // of the larger addresses must themselves be invalid addresses.
   164  // We might get unlucky and the OS might have mapped one of these
   165  // addresses, but probably not: they're all in the first page, very high
   166  // addresses that normally an OS would reserve for itself, or malformed
   167  // addresses. Even so, we might have to remove one or two on different
   168  // systems. We will see.
   169  
   170  var faultAddrs = []uint64{
   171  	// low addresses
   172  	0,
   173  	1,
   174  	0xfff,
   175  	// high (kernel) addresses
   176  	// or else malformed.
   177  	0xffffffffffffffff,
   178  	0xfffffffffffff001,
   179  	0xffffffffffff0001,
   180  	0xfffffffffff00001,
   181  	0xffffffffff000001,
   182  	0xfffffffff0000001,
   183  	0xffffffff00000001,
   184  	0xfffffff000000001,
   185  	0xffffff0000000001,
   186  	0xfffff00000000001,
   187  	0xffff000000000001,
   188  	0xfff0000000000001,
   189  	0xff00000000000001,
   190  	0xf000000000000001,
   191  	0x8000000000000001,
   192  }
   193  
   194  func TestSetPanicOnFault(t *testing.T) {
   195  	old := debug.SetPanicOnFault(true)
   196  	defer debug.SetPanicOnFault(old)
   197  
   198  	nfault := 0
   199  	for _, addr := range faultAddrs {
   200  		testSetPanicOnFault(t, uintptr(addr), &nfault)
   201  	}
   202  	if nfault == 0 {
   203  		t.Fatalf("none of the addresses faulted")
   204  	}
   205  }
   206  
   207  // testSetPanicOnFault tests one potentially faulting address.
   208  // It deliberately constructs and uses an invalid pointer,
   209  // so mark it as nocheckptr.
   210  //
   211  //go:nocheckptr
   212  func testSetPanicOnFault(t *testing.T, addr uintptr, nfault *int) {
   213  	if GOOS == "js" || GOOS == "wasip1" {
   214  		t.Skip(GOOS + " does not support catching faults")
   215  	}
   216  
   217  	defer func() {
   218  		if err := recover(); err != nil {
   219  			*nfault++
   220  		}
   221  	}()
   222  
   223  	// The read should fault, except that sometimes we hit
   224  	// addresses that have had C or kernel pages mapped there
   225  	// readable by user code. So just log the content.
   226  	// If no addresses fault, we'll fail the test.
   227  	v := *(*byte)(unsafe.Pointer(addr))
   228  	t.Logf("addr %#x: %#x\n", addr, v)
   229  }
   230  
   231  func eqstring_generic(s1, s2 string) bool {
   232  	if len(s1) != len(s2) {
   233  		return false
   234  	}
   235  	// optimization in assembly versions:
   236  	// if s1.str == s2.str { return true }
   237  	for i := 0; i < len(s1); i++ {
   238  		if s1[i] != s2[i] {
   239  			return false
   240  		}
   241  	}
   242  	return true
   243  }
   244  
   245  func TestEqString(t *testing.T) {
   246  	// This isn't really an exhaustive test of == on strings, it's
   247  	// just a convenient way of documenting (via eqstring_generic)
   248  	// what == does.
   249  	s := []string{
   250  		"",
   251  		"a",
   252  		"c",
   253  		"aaa",
   254  		"ccc",
   255  		"cccc"[:3], // same contents, different string
   256  		"1234567890",
   257  	}
   258  	for _, s1 := range s {
   259  		for _, s2 := range s {
   260  			x := s1 == s2
   261  			y := eqstring_generic(s1, s2)
   262  			if x != y {
   263  				t.Errorf(`("%s" == "%s") = %t, want %t`, s1, s2, x, y)
   264  			}
   265  		}
   266  	}
   267  }
   268  
   269  func TestTrailingZero(t *testing.T) {
   270  	// make sure we add padding for structs with trailing zero-sized fields
   271  	type T1 struct {
   272  		n int32
   273  		z [0]byte
   274  	}
   275  	if unsafe.Sizeof(T1{}) != 8 {
   276  		t.Errorf("sizeof(%#v)==%d, want 8", T1{}, unsafe.Sizeof(T1{}))
   277  	}
   278  	type T2 struct {
   279  		n int64
   280  		z struct{}
   281  	}
   282  	if unsafe.Sizeof(T2{}) != 8+unsafe.Sizeof(uintptr(0)) {
   283  		t.Errorf("sizeof(%#v)==%d, want %d", T2{}, unsafe.Sizeof(T2{}), 8+unsafe.Sizeof(uintptr(0)))
   284  	}
   285  	type T3 struct {
   286  		n byte
   287  		z [4]struct{}
   288  	}
   289  	if unsafe.Sizeof(T3{}) != 2 {
   290  		t.Errorf("sizeof(%#v)==%d, want 2", T3{}, unsafe.Sizeof(T3{}))
   291  	}
   292  	// make sure padding can double for both zerosize and alignment
   293  	type T4 struct {
   294  		a int32
   295  		b int16
   296  		c int8
   297  		z struct{}
   298  	}
   299  	if unsafe.Sizeof(T4{}) != 8 {
   300  		t.Errorf("sizeof(%#v)==%d, want 8", T4{}, unsafe.Sizeof(T4{}))
   301  	}
   302  	// make sure we don't pad a zero-sized thing
   303  	type T5 struct {
   304  	}
   305  	if unsafe.Sizeof(T5{}) != 0 {
   306  		t.Errorf("sizeof(%#v)==%d, want 0", T5{}, unsafe.Sizeof(T5{}))
   307  	}
   308  }
   309  
   310  func TestAppendGrowth(t *testing.T) {
   311  	var x []int64
   312  	check := func(want int) {
   313  		if cap(x) != want {
   314  			t.Errorf("len=%d, cap=%d, want cap=%d", len(x), cap(x), want)
   315  		}
   316  	}
   317  
   318  	check(0)
   319  	want := 1
   320  	for i := 1; i <= 100; i++ {
   321  		x = append(x, 1)
   322  		check(want)
   323  		if i&(i-1) == 0 {
   324  			want = 2 * i
   325  		}
   326  	}
   327  }
   328  
   329  var One = []int64{1}
   330  
   331  func TestAppendSliceGrowth(t *testing.T) {
   332  	var x []int64
   333  	check := func(want int) {
   334  		if cap(x) != want {
   335  			t.Errorf("len=%d, cap=%d, want cap=%d", len(x), cap(x), want)
   336  		}
   337  	}
   338  
   339  	check(0)
   340  	want := 1
   341  	for i := 1; i <= 100; i++ {
   342  		x = append(x, One...)
   343  		check(want)
   344  		if i&(i-1) == 0 {
   345  			want = 2 * i
   346  		}
   347  	}
   348  }
   349  
   350  func TestGoroutineProfileTrivial(t *testing.T) {
   351  	// Calling GoroutineProfile twice in a row should find the same number of goroutines,
   352  	// but it's possible there are goroutines just about to exit, so we might end up
   353  	// with fewer in the second call. Try a few times; it should converge once those
   354  	// zombies are gone.
   355  	for i := 0; ; i++ {
   356  		n1, ok := GoroutineProfile(nil) // should fail, there's at least 1 goroutine
   357  		if n1 < 1 || ok {
   358  			t.Fatalf("GoroutineProfile(nil) = %d, %v, want >0, false", n1, ok)
   359  		}
   360  		n2, ok := GoroutineProfile(make([]StackRecord, n1))
   361  		if n2 == n1 && ok {
   362  			break
   363  		}
   364  		t.Logf("GoroutineProfile(%d) = %d, %v, want %d, true", n1, n2, ok, n1)
   365  		if i >= 10 {
   366  			t.Fatalf("GoroutineProfile not converging")
   367  		}
   368  	}
   369  }
   370  
   371  func BenchmarkGoroutineProfile(b *testing.B) {
   372  	run := func(fn func() bool) func(b *testing.B) {
   373  		runOne := func(b *testing.B) {
   374  			latencies := make([]time.Duration, 0, b.N)
   375  
   376  			b.ResetTimer()
   377  			for i := 0; i < b.N; i++ {
   378  				start := time.Now()
   379  				ok := fn()
   380  				if !ok {
   381  					b.Fatal("goroutine profile failed")
   382  				}
   383  				latencies = append(latencies, time.Since(start))
   384  			}
   385  			b.StopTimer()
   386  
   387  			// Sort latencies then report percentiles.
   388  			slices.Sort(latencies)
   389  			b.ReportMetric(float64(latencies[len(latencies)*50/100]), "p50-ns")
   390  			b.ReportMetric(float64(latencies[len(latencies)*90/100]), "p90-ns")
   391  			b.ReportMetric(float64(latencies[len(latencies)*99/100]), "p99-ns")
   392  		}
   393  		return func(b *testing.B) {
   394  			b.Run("idle", runOne)
   395  
   396  			b.Run("loaded", func(b *testing.B) {
   397  				stop := applyGCLoad(b)
   398  				runOne(b)
   399  				// Make sure to stop the timer before we wait! The load created above
   400  				// is very heavy-weight and not easy to stop, so we could end up
   401  				// confusing the benchmarking framework for small b.N.
   402  				b.StopTimer()
   403  				stop()
   404  			})
   405  		}
   406  	}
   407  
   408  	// Measure the cost of counting goroutines
   409  	b.Run("small-nil", run(func() bool {
   410  		GoroutineProfile(nil)
   411  		return true
   412  	}))
   413  
   414  	// Measure the cost with a small set of goroutines
   415  	n := NumGoroutine()
   416  	p := make([]StackRecord, 2*n+2*GOMAXPROCS(0))
   417  	b.Run("small", run(func() bool {
   418  		_, ok := GoroutineProfile(p)
   419  		return ok
   420  	}))
   421  
   422  	// Measure the cost with a large set of goroutines
   423  	ch := make(chan int)
   424  	var ready, done sync.WaitGroup
   425  	for i := 0; i < 5000; i++ {
   426  		ready.Add(1)
   427  		done.Add(1)
   428  		go func() { ready.Done(); <-ch; done.Done() }()
   429  	}
   430  	ready.Wait()
   431  
   432  	// Count goroutines with a large allgs list
   433  	b.Run("large-nil", run(func() bool {
   434  		GoroutineProfile(nil)
   435  		return true
   436  	}))
   437  
   438  	n = NumGoroutine()
   439  	p = make([]StackRecord, 2*n+2*GOMAXPROCS(0))
   440  	b.Run("large", run(func() bool {
   441  		_, ok := GoroutineProfile(p)
   442  		return ok
   443  	}))
   444  
   445  	close(ch)
   446  	done.Wait()
   447  
   448  	// Count goroutines with a large (but unused) allgs list
   449  	b.Run("sparse-nil", run(func() bool {
   450  		GoroutineProfile(nil)
   451  		return true
   452  	}))
   453  
   454  	// Measure the cost of a large (but unused) allgs list
   455  	n = NumGoroutine()
   456  	p = make([]StackRecord, 2*n+2*GOMAXPROCS(0))
   457  	b.Run("sparse", run(func() bool {
   458  		_, ok := GoroutineProfile(p)
   459  		return ok
   460  	}))
   461  }
   462  
   463  func TestVersion(t *testing.T) {
   464  	// Test that version does not contain \r or \n.
   465  	vers := Version()
   466  	if strings.Contains(vers, "\r") || strings.Contains(vers, "\n") {
   467  		t.Fatalf("cr/nl in version: %q", vers)
   468  	}
   469  }
   470  
   471  func TestTimediv(t *testing.T) {
   472  	for _, tc := range []struct {
   473  		num int64
   474  		div int32
   475  		ret int32
   476  		rem int32
   477  	}{
   478  		{
   479  			num: 8,
   480  			div: 2,
   481  			ret: 4,
   482  			rem: 0,
   483  		},
   484  		{
   485  			num: 9,
   486  			div: 2,
   487  			ret: 4,
   488  			rem: 1,
   489  		},
   490  		{
   491  			// Used by runtime.check.
   492  			num: 12345*1000000000 + 54321,
   493  			div: 1000000000,
   494  			ret: 12345,
   495  			rem: 54321,
   496  		},
   497  		{
   498  			num: 1<<32 - 1,
   499  			div: 2,
   500  			ret: 1<<31 - 1, // no overflow.
   501  			rem: 1,
   502  		},
   503  		{
   504  			num: 1 << 32,
   505  			div: 2,
   506  			ret: 1<<31 - 1, // overflow.
   507  			rem: 0,
   508  		},
   509  		{
   510  			num: 1 << 40,
   511  			div: 2,
   512  			ret: 1<<31 - 1, // overflow.
   513  			rem: 0,
   514  		},
   515  		{
   516  			num: 1<<40 + 1,
   517  			div: 1 << 10,
   518  			ret: 1 << 30,
   519  			rem: 1,
   520  		},
   521  	} {
   522  		name := fmt.Sprintf("%d div %d", tc.num, tc.div)
   523  		t.Run(name, func(t *testing.T) {
   524  			// Double check that the inputs make sense using
   525  			// standard 64-bit division.
   526  			ret64 := tc.num / int64(tc.div)
   527  			rem64 := tc.num % int64(tc.div)
   528  			if ret64 != int64(int32(ret64)) {
   529  				// Simulate timediv overflow value.
   530  				ret64 = 1<<31 - 1
   531  				rem64 = 0
   532  			}
   533  			if ret64 != int64(tc.ret) {
   534  				t.Errorf("%d / %d got ret %d rem %d want ret %d rem %d", tc.num, tc.div, ret64, rem64, tc.ret, tc.rem)
   535  			}
   536  
   537  			var rem int32
   538  			ret := Timediv(tc.num, tc.div, &rem)
   539  			if ret != tc.ret || rem != tc.rem {
   540  				t.Errorf("timediv %d / %d got ret %d rem %d want ret %d rem %d", tc.num, tc.div, ret, rem, tc.ret, tc.rem)
   541  			}
   542  		})
   543  	}
   544  }
   545  
   546  func BenchmarkProcYield(b *testing.B) {
   547  	benchN := func(n uint32) func(*testing.B) {
   548  		return func(b *testing.B) {
   549  			for i := 0; i < b.N; i++ {
   550  				ProcYield(n)
   551  			}
   552  		}
   553  	}
   554  
   555  	b.Run("1", benchN(1))
   556  	b.Run("10", benchN(10))
   557  	b.Run("30", benchN(30)) // active_spin_cnt in lock_sema.go and lock_futex.go
   558  	b.Run("100", benchN(100))
   559  	b.Run("1000", benchN(1000))
   560  }
   561  
   562  func BenchmarkOSYield(b *testing.B) {
   563  	for i := 0; i < b.N; i++ {
   564  		OSYield()
   565  	}
   566  }
   567  
   568  func BenchmarkMutexContention(b *testing.B) {
   569  	// Measure throughput of a single mutex with all threads contending
   570  	//
   571  	// Share a single counter across all threads. Progress from any thread is
   572  	// progress for the benchmark as a whole. We don't measure or give points
   573  	// for fairness here, arbitrary delay to any given thread's progress is
   574  	// invisible and allowed.
   575  	//
   576  	// The cache line that holds the count value will need to move between
   577  	// processors, but not as often as the cache line that holds the mutex. The
   578  	// mutex protects access to the count value, which limits contention on that
   579  	// cache line. This is a simple design, but it helps to make the behavior of
   580  	// the benchmark clear. Most real uses of mutex will protect some number of
   581  	// cache lines anyway.
   582  
   583  	var state struct {
   584  		_     cpu.CacheLinePad
   585  		lock  Mutex
   586  		_     cpu.CacheLinePad
   587  		count atomic.Int64
   588  		_     cpu.CacheLinePad
   589  	}
   590  
   591  	procs := GOMAXPROCS(0)
   592  	var wg sync.WaitGroup
   593  	for range procs {
   594  		wg.Add(1)
   595  		go func() {
   596  			defer wg.Done()
   597  			for {
   598  				Lock(&state.lock)
   599  				ours := state.count.Add(1)
   600  				Unlock(&state.lock)
   601  				if ours >= int64(b.N) {
   602  					return
   603  				}
   604  			}
   605  		}()
   606  	}
   607  	wg.Wait()
   608  }
   609  
   610  func BenchmarkMutexCapture(b *testing.B) {
   611  
   612  	// Measure mutex fairness.
   613  	//
   614  	// Have several threads contend for a single mutex value. Measure how
   615  	// effectively a single thread is able to capture the lock and report the
   616  	// duration of those "streak" events. Measure how long other individual
   617  	// threads need to wait between their turns with the lock. Report the
   618  	// duration of those "starve" events.
   619  	//
   620  	// Report in terms of wall clock time (assuming a constant time per
   621  	// lock/unlock pair) rather than number of locks/unlocks. This keeps
   622  	// timekeeping overhead out of the critical path, and avoids giving an
   623  	// advantage to lock/unlock implementations that take less time per
   624  	// operation.
   625  
   626  	var state struct {
   627  		_     cpu.CacheLinePad
   628  		lock  Mutex
   629  		_     cpu.CacheLinePad
   630  		count atomic.Int64
   631  		_     cpu.CacheLinePad
   632  	}
   633  
   634  	procs := GOMAXPROCS(0)
   635  	var wg sync.WaitGroup
   636  	histograms := make(chan [2][65]int)
   637  	for range procs {
   638  		wg.Add(1)
   639  		go func() {
   640  			var (
   641  				prev      int64
   642  				streak    int64
   643  				histogram [2][65]int
   644  			)
   645  			for {
   646  				Lock(&state.lock)
   647  				ours := state.count.Add(1)
   648  				Unlock(&state.lock)
   649  				delta := ours - prev - 1
   650  				prev = ours
   651  				if delta == 0 {
   652  					streak++
   653  				} else {
   654  					histogram[0][bits.LeadingZeros64(uint64(streak))]++
   655  					histogram[1][bits.LeadingZeros64(uint64(delta))]++
   656  					streak = 1
   657  				}
   658  				if ours >= int64(b.N) {
   659  					wg.Done()
   660  					if delta == 0 {
   661  						histogram[0][bits.LeadingZeros64(uint64(streak))]++
   662  						histogram[1][bits.LeadingZeros64(uint64(delta))]++
   663  					}
   664  					histograms <- histogram
   665  					return
   666  				}
   667  			}
   668  		}()
   669  	}
   670  
   671  	wg.Wait()
   672  	b.StopTimer()
   673  
   674  	var histogram [2][65]int
   675  	for range procs {
   676  		h := <-histograms
   677  		for i := range h {
   678  			for j := range h[i] {
   679  				histogram[i][j] += h[i][j]
   680  			}
   681  		}
   682  	}
   683  
   684  	percentile := func(h [65]int, p float64) int {
   685  		sum := 0
   686  		for i, v := range h {
   687  			bound := uint64(1<<63) >> i
   688  			sum += int(bound) * v
   689  		}
   690  
   691  		// Imagine that the longest streak / starvation events were instead half
   692  		// as long but twice in number. (Note that we've pre-multiplied by the
   693  		// [lower] "bound" value.) Continue those splits until we meet the
   694  		// percentile target.
   695  		part := 0
   696  		for i, v := range h {
   697  			bound := uint64(1<<63) >> i
   698  			part += int(bound) * v
   699  			// have we trimmed off enough at the head to dip below the percentile goal
   700  			if float64(sum-part) < float64(sum)*p {
   701  				return int(bound)
   702  			}
   703  		}
   704  
   705  		return 0
   706  	}
   707  
   708  	perOp := float64(b.Elapsed().Nanoseconds()) / float64(b.N)
   709  	b.ReportMetric(perOp*float64(percentile(histogram[0], 1.0)), "ns/streak-p100")
   710  	b.ReportMetric(perOp*float64(percentile(histogram[0], 0.9)), "ns/streak-p90")
   711  	b.ReportMetric(perOp*float64(percentile(histogram[1], 1.0)), "ns/starve-p100")
   712  	b.ReportMetric(perOp*float64(percentile(histogram[1], 0.9)), "ns/starve-p90")
   713  }
   714  
   715  func BenchmarkMutexHandoff(b *testing.B) {
   716  	testcase := func(delay func(l *Mutex)) func(b *testing.B) {
   717  		return func(b *testing.B) {
   718  			if workers := 2; GOMAXPROCS(0) < workers {
   719  				b.Skipf("requires GOMAXPROCS >= %d", workers)
   720  			}
   721  
   722  			// Measure latency of mutex handoff between threads.
   723  			//
   724  			// Hand off a runtime.mutex between two threads, one running a
   725  			// "coordinator" goroutine and the other running a "worker"
   726  			// goroutine. We don't override the runtime's typical
   727  			// goroutine/thread mapping behavior.
   728  			//
   729  			// Measure the latency, starting when the coordinator enters a call
   730  			// to runtime.unlock and ending when the worker's call to
   731  			// runtime.lock returns. The benchmark can specify a "delay"
   732  			// function to simulate the length of the mutex-holder's critical
   733  			// section, including to arrange for the worker's thread to be in
   734  			// either the "spinning" or "sleeping" portions of the runtime.lock2
   735  			// implementation. Measurement starts after any such "delay".
   736  			//
   737  			// The two threads' goroutines communicate their current position to
   738  			// each other in a non-blocking way via the "turn" state.
   739  
   740  			var state struct {
   741  				_    cpu.CacheLinePad
   742  				lock Mutex
   743  				_    cpu.CacheLinePad
   744  				turn atomic.Int64
   745  				_    cpu.CacheLinePad
   746  			}
   747  
   748  			var delta atomic.Int64
   749  			var wg sync.WaitGroup
   750  
   751  			// coordinator:
   752  			//  - acquire the mutex
   753  			//  - set the turn to 2 mod 4, instructing the worker to begin its Lock call
   754  			//  - wait until the mutex is contended
   755  			//  - wait a bit more so the worker can commit to its sleep
   756  			//  - release the mutex and wait for it to be our turn (0 mod 4) again
   757  			wg.Add(1)
   758  			go func() {
   759  				defer wg.Done()
   760  				var t int64
   761  				for range b.N {
   762  					Lock(&state.lock)
   763  					state.turn.Add(2)
   764  					delay(&state.lock)
   765  					t -= Nanotime() // start the timer
   766  					Unlock(&state.lock)
   767  					for state.turn.Load()&0x2 != 0 {
   768  					}
   769  				}
   770  				state.turn.Add(1)
   771  				delta.Add(t)
   772  			}()
   773  
   774  			// worker:
   775  			//  - wait until its our turn (2 mod 4)
   776  			//  - acquire and release the mutex
   777  			//  - switch the turn counter back to the coordinator (0 mod 4)
   778  			wg.Add(1)
   779  			go func() {
   780  				defer wg.Done()
   781  				var t int64
   782  				for {
   783  					switch state.turn.Load() & 0x3 {
   784  					case 0:
   785  					case 1, 3:
   786  						delta.Add(t)
   787  						return
   788  					case 2:
   789  						Lock(&state.lock)
   790  						t += Nanotime() // stop the timer
   791  						Unlock(&state.lock)
   792  						state.turn.Add(2)
   793  					}
   794  				}
   795  			}()
   796  
   797  			wg.Wait()
   798  			b.ReportMetric(float64(delta.Load())/float64(b.N), "ns/op")
   799  		}
   800  	}
   801  
   802  	b.Run("Solo", func(b *testing.B) {
   803  		var lock Mutex
   804  		for range b.N {
   805  			Lock(&lock)
   806  			Unlock(&lock)
   807  		}
   808  	})
   809  
   810  	b.Run("FastPingPong", testcase(func(l *Mutex) {}))
   811  	b.Run("SlowPingPong", testcase(func(l *Mutex) {
   812  		// Wait for the worker to stop spinning and prepare to sleep
   813  		for !MutexContended(l) {
   814  		}
   815  		// Wait a bit longer so the OS can finish committing the worker to its
   816  		// sleep. Balance consistency against getting enough iterations.
   817  		const extraNs = 10e3
   818  		for t0 := Nanotime(); Nanotime()-t0 < extraNs; {
   819  		}
   820  	}))
   821  }
   822
View as plain text