Source file src/testing/benchmark.go

     1  // Copyright 2009 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package testing
     6  
     7  import (
     8  	"flag"
     9  	"fmt"
    10  	"internal/sysinfo"
    11  	"io"
    12  	"math"
    13  	"os"
    14  	"runtime"
    15  	"slices"
    16  	"strconv"
    17  	"strings"
    18  	"sync"
    19  	"sync/atomic"
    20  	"time"
    21  	"unicode"
    22  )
    23  
    24  func initBenchmarkFlags() {
    25  	matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`")
    26  	benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks")
    27  	flag.Var(&benchTime, "test.benchtime", "run each benchmark for duration `d` or N times if `d` is of the form Nx")
    28  }
    29  
    30  var (
    31  	matchBenchmarks *string
    32  	benchmarkMemory *bool
    33  
    34  	benchTime = durationOrCountFlag{d: 1 * time.Second} // changed during test of testing package
    35  )
    36  
    37  type durationOrCountFlag struct {
    38  	d         time.Duration
    39  	n         int
    40  	allowZero bool
    41  }
    42  
    43  func (f *durationOrCountFlag) String() string {
    44  	if f.n > 0 {
    45  		return fmt.Sprintf("%dx", f.n)
    46  	}
    47  	return f.d.String()
    48  }
    49  
    50  func (f *durationOrCountFlag) Set(s string) error {
    51  	if strings.HasSuffix(s, "x") {
    52  		n, err := strconv.ParseInt(s[:len(s)-1], 10, 0)
    53  		if err != nil || n < 0 || (!f.allowZero && n == 0) {
    54  			return fmt.Errorf("invalid count")
    55  		}
    56  		*f = durationOrCountFlag{n: int(n)}
    57  		return nil
    58  	}
    59  	d, err := time.ParseDuration(s)
    60  	if err != nil || d < 0 || (!f.allowZero && d == 0) {
    61  		return fmt.Errorf("invalid duration")
    62  	}
    63  	*f = durationOrCountFlag{d: d}
    64  	return nil
    65  }
    66  
    67  // Global lock to ensure only one benchmark runs at a time.
    68  var benchmarkLock sync.Mutex
    69  
    70  // Used for every benchmark for measuring memory.
    71  var memStats runtime.MemStats
    72  
    73  // InternalBenchmark is an internal type but exported because it is cross-package;
    74  // it is part of the implementation of the "go test" command.
    75  type InternalBenchmark struct {
    76  	Name string
    77  	F    func(b *B)
    78  }
    79  
    80  // B is a type passed to [Benchmark] functions to manage benchmark
    81  // timing and to specify the number of iterations to run.
    82  //
    83  // A benchmark ends when its Benchmark function returns or calls any of the methods
    84  // FailNow, Fatal, Fatalf, SkipNow, Skip, or Skipf. Those methods must be called
    85  // only from the goroutine running the Benchmark function.
    86  // The other reporting methods, such as the variations of Log and Error,
    87  // may be called simultaneously from multiple goroutines.
    88  //
    89  // Like in tests, benchmark logs are accumulated during execution
    90  // and dumped to standard output when done. Unlike in tests, benchmark logs
    91  // are always printed, so as not to hide output whose existence may be
    92  // affecting benchmark results.
    93  type B struct {
    94  	common
    95  	importPath       string // import path of the package containing the benchmark
    96  	bstate           *benchState
    97  	N                int
    98  	previousN        int           // number of iterations in the previous run
    99  	previousDuration time.Duration // total duration of the previous run
   100  	benchFunc        func(b *B)
   101  	benchTime        durationOrCountFlag
   102  	bytes            int64
   103  	missingBytes     bool // one of the subbenchmarks does not have bytes set.
   104  	timerOn          bool
   105  	showAllocResult  bool
   106  	result           BenchmarkResult
   107  	parallelism      int // RunParallel creates parallelism*GOMAXPROCS goroutines
   108  	// The initial states of memStats.Mallocs and memStats.TotalAlloc.
   109  	startAllocs uint64
   110  	startBytes  uint64
   111  	// The net total of this test after being run.
   112  	netAllocs uint64
   113  	netBytes  uint64
   114  	// Extra metrics collected by ReportMetric.
   115  	extra map[string]float64
   116  	// Remaining iterations of Loop() to be executed in benchFunc.
   117  	// See issue #61515.
   118  	loopN int
   119  }
   120  
   121  // StartTimer starts timing a test. This function is called automatically
   122  // before a benchmark starts, but it can also be used to resume timing after
   123  // a call to [B.StopTimer].
   124  func (b *B) StartTimer() {
   125  	if !b.timerOn {
   126  		runtime.ReadMemStats(&memStats)
   127  		b.startAllocs = memStats.Mallocs
   128  		b.startBytes = memStats.TotalAlloc
   129  		b.start = highPrecisionTimeNow()
   130  		b.timerOn = true
   131  	}
   132  }
   133  
   134  // StopTimer stops timing a test. This can be used to pause the timer
   135  // while performing complex initialization that you don't
   136  // want to measure.
   137  func (b *B) StopTimer() {
   138  	if b.timerOn {
   139  		b.duration += highPrecisionTimeSince(b.start)
   140  		runtime.ReadMemStats(&memStats)
   141  		b.netAllocs += memStats.Mallocs - b.startAllocs
   142  		b.netBytes += memStats.TotalAlloc - b.startBytes
   143  		b.timerOn = false
   144  	}
   145  }
   146  
   147  // ResetTimer zeroes the elapsed benchmark time and memory allocation counters
   148  // and deletes user-reported metrics.
   149  // It does not affect whether the timer is running.
   150  func (b *B) ResetTimer() {
   151  	if b.extra == nil {
   152  		// Allocate the extra map before reading memory stats.
   153  		// Pre-size it to make more allocation unlikely.
   154  		b.extra = make(map[string]float64, 16)
   155  	} else {
   156  		clear(b.extra)
   157  	}
   158  	if b.timerOn {
   159  		runtime.ReadMemStats(&memStats)
   160  		b.startAllocs = memStats.Mallocs
   161  		b.startBytes = memStats.TotalAlloc
   162  		b.start = highPrecisionTimeNow()
   163  	}
   164  	b.duration = 0
   165  	b.netAllocs = 0
   166  	b.netBytes = 0
   167  }
   168  
   169  // SetBytes records the number of bytes processed in a single operation.
   170  // If this is called, the benchmark will report ns/op and MB/s.
   171  func (b *B) SetBytes(n int64) { b.bytes = n }
   172  
   173  // ReportAllocs enables malloc statistics for this benchmark.
   174  // It is equivalent to setting -test.benchmem, but it only affects the
   175  // benchmark function that calls ReportAllocs.
   176  func (b *B) ReportAllocs() {
   177  	b.showAllocResult = true
   178  }
   179  
   180  // runN runs a single benchmark for the specified number of iterations.
   181  func (b *B) runN(n int) {
   182  	benchmarkLock.Lock()
   183  	defer benchmarkLock.Unlock()
   184  	defer func() {
   185  		b.runCleanup(normalPanic)
   186  		b.checkRaces()
   187  	}()
   188  	// Try to get a comparable environment for each run
   189  	// by clearing garbage from previous runs.
   190  	runtime.GC()
   191  	b.resetRaces()
   192  	b.N = n
   193  	b.loopN = n
   194  	b.parallelism = 1
   195  	b.ResetTimer()
   196  	b.StartTimer()
   197  	b.benchFunc(b)
   198  	b.StopTimer()
   199  	b.previousN = n
   200  	b.previousDuration = b.duration
   201  }
   202  
   203  // run1 runs the first iteration of benchFunc. It reports whether more
   204  // iterations of this benchmarks should be run.
   205  func (b *B) run1() bool {
   206  	if bstate := b.bstate; bstate != nil {
   207  		// Extend maxLen, if needed.
   208  		if n := len(b.name) + bstate.extLen + 1; n > bstate.maxLen {
   209  			bstate.maxLen = n + 8 // Add additional slack to avoid too many jumps in size.
   210  		}
   211  	}
   212  	go func() {
   213  		// Signal that we're done whether we return normally
   214  		// or by FailNow's runtime.Goexit.
   215  		defer func() {
   216  			b.signal <- true
   217  		}()
   218  
   219  		b.runN(1)
   220  	}()
   221  	<-b.signal
   222  	if b.failed {
   223  		fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), b.name, b.output)
   224  		return false
   225  	}
   226  	// Only print the output if we know we are not going to proceed.
   227  	// Otherwise it is printed in processBench.
   228  	b.mu.RLock()
   229  	finished := b.finished
   230  	b.mu.RUnlock()
   231  	if b.hasSub.Load() || finished {
   232  		tag := "BENCH"
   233  		if b.skipped {
   234  			tag = "SKIP"
   235  		}
   236  		if b.chatty != nil && (len(b.output) > 0 || finished) {
   237  			b.trimOutput()
   238  			fmt.Fprintf(b.w, "%s--- %s: %s\n%s", b.chatty.prefix(), tag, b.name, b.output)
   239  		}
   240  		return false
   241  	}
   242  	return true
   243  }
   244  
   245  var labelsOnce sync.Once
   246  
   247  // run executes the benchmark in a separate goroutine, including all of its
   248  // subbenchmarks. b must not have subbenchmarks.
   249  func (b *B) run() {
   250  	labelsOnce.Do(func() {
   251  		fmt.Fprintf(b.w, "goos: %s\n", runtime.GOOS)
   252  		fmt.Fprintf(b.w, "goarch: %s\n", runtime.GOARCH)
   253  		if b.importPath != "" {
   254  			fmt.Fprintf(b.w, "pkg: %s\n", b.importPath)
   255  		}
   256  		if cpu := sysinfo.CPUName(); cpu != "" {
   257  			fmt.Fprintf(b.w, "cpu: %s\n", cpu)
   258  		}
   259  	})
   260  	if b.bstate != nil {
   261  		// Running go test --test.bench
   262  		b.bstate.processBench(b) // Must call doBench.
   263  	} else {
   264  		// Running func Benchmark.
   265  		b.doBench()
   266  	}
   267  }
   268  
   269  func (b *B) doBench() BenchmarkResult {
   270  	go b.launch()
   271  	<-b.signal
   272  	return b.result
   273  }
   274  
   275  // launch launches the benchmark function. It gradually increases the number
   276  // of benchmark iterations until the benchmark runs for the requested benchtime.
   277  // launch is run by the doBench function as a separate goroutine.
   278  // run1 must have been called on b.
   279  func (b *B) launch() {
   280  	// Signal that we're done whether we return normally
   281  	// or by FailNow's runtime.Goexit.
   282  	defer func() {
   283  		b.signal <- true
   284  	}()
   285  
   286  	// Run the benchmark for at least the specified amount of time.
   287  	if b.benchTime.n > 0 {
   288  		// We already ran a single iteration in run1.
   289  		// If -benchtime=1x was requested, use that result.
   290  		// See https://golang.org/issue/32051.
   291  		if b.benchTime.n > 1 {
   292  			b.runN(b.benchTime.n)
   293  		}
   294  	} else {
   295  		d := b.benchTime.d
   296  		for n := int64(1); !b.failed && b.duration < d && n < 1e9; {
   297  			last := n
   298  			// Predict required iterations.
   299  			goalns := d.Nanoseconds()
   300  			prevIters := int64(b.N)
   301  			prevns := b.duration.Nanoseconds()
   302  			if prevns <= 0 {
   303  				// Round up, to avoid div by zero.
   304  				prevns = 1
   305  			}
   306  			// Order of operations matters.
   307  			// For very fast benchmarks, prevIters ~= prevns.
   308  			// If you divide first, you get 0 or 1,
   309  			// which can hide an order of magnitude in execution time.
   310  			// So multiply first, then divide.
   311  			n = goalns * prevIters / prevns
   312  			// Run more iterations than we think we'll need (1.2x).
   313  			n += n / 5
   314  			// Don't grow too fast in case we had timing errors previously.
   315  			n = min(n, 100*last)
   316  			// Be sure to run at least one more than last time.
   317  			n = max(n, last+1)
   318  			// Don't run more than 1e9 times. (This also keeps n in int range on 32 bit platforms.)
   319  			n = min(n, 1e9)
   320  			b.runN(int(n))
   321  		}
   322  	}
   323  	b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes, b.extra}
   324  }
   325  
   326  // Elapsed returns the measured elapsed time of the benchmark.
   327  // The duration reported by Elapsed matches the one measured by
   328  // [B.StartTimer], [B.StopTimer], and [B.ResetTimer].
   329  func (b *B) Elapsed() time.Duration {
   330  	d := b.duration
   331  	if b.timerOn {
   332  		d += highPrecisionTimeSince(b.start)
   333  	}
   334  	return d
   335  }
   336  
   337  // ReportMetric adds "n unit" to the reported benchmark results.
   338  // If the metric is per-iteration, the caller should divide by b.N,
   339  // and by convention units should end in "/op".
   340  // ReportMetric overrides any previously reported value for the same unit.
   341  // ReportMetric panics if unit is the empty string or if unit contains
   342  // any whitespace.
   343  // If unit is a unit normally reported by the benchmark framework itself
   344  // (such as "allocs/op"), ReportMetric will override that metric.
   345  // Setting "ns/op" to 0 will suppress that built-in metric.
   346  func (b *B) ReportMetric(n float64, unit string) {
   347  	if unit == "" {
   348  		panic("metric unit must not be empty")
   349  	}
   350  	if strings.IndexFunc(unit, unicode.IsSpace) >= 0 {
   351  		panic("metric unit must not contain whitespace")
   352  	}
   353  	b.extra[unit] = n
   354  }
   355  
   356  // Loop returns true until b.N calls has been made to it.
   357  //
   358  // A benchmark should either use Loop or contain an explicit loop from 0 to b.N, but not both.
   359  // After the benchmark finishes, b.N will contain the total number of calls to op, so the benchmark
   360  // may use b.N to compute other average metrics.
   361  func (b *B) Loop() bool {
   362  	if b.loopN == b.N {
   363  		// If it's the first call to b.Loop() in the benchmark function.
   364  		// Allows more precise measurement of benchmark loop cost counts.
   365  		b.ResetTimer()
   366  	}
   367  	b.loopN--
   368  	return b.loopN >= 0
   369  }
   370  
   371  // BenchmarkResult contains the results of a benchmark run.
   372  type BenchmarkResult struct {
   373  	N         int           // The number of iterations.
   374  	T         time.Duration // The total time taken.
   375  	Bytes     int64         // Bytes processed in one iteration.
   376  	MemAllocs uint64        // The total number of memory allocations.
   377  	MemBytes  uint64        // The total number of bytes allocated.
   378  
   379  	// Extra records additional metrics reported by ReportMetric.
   380  	Extra map[string]float64
   381  }
   382  
   383  // NsPerOp returns the "ns/op" metric.
   384  func (r BenchmarkResult) NsPerOp() int64 {
   385  	if v, ok := r.Extra["ns/op"]; ok {
   386  		return int64(v)
   387  	}
   388  	if r.N <= 0 {
   389  		return 0
   390  	}
   391  	return r.T.Nanoseconds() / int64(r.N)
   392  }
   393  
   394  // mbPerSec returns the "MB/s" metric.
   395  func (r BenchmarkResult) mbPerSec() float64 {
   396  	if v, ok := r.Extra["MB/s"]; ok {
   397  		return v
   398  	}
   399  	if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 {
   400  		return 0
   401  	}
   402  	return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds()
   403  }
   404  
   405  // AllocsPerOp returns the "allocs/op" metric,
   406  // which is calculated as r.MemAllocs / r.N.
   407  func (r BenchmarkResult) AllocsPerOp() int64 {
   408  	if v, ok := r.Extra["allocs/op"]; ok {
   409  		return int64(v)
   410  	}
   411  	if r.N <= 0 {
   412  		return 0
   413  	}
   414  	return int64(r.MemAllocs) / int64(r.N)
   415  }
   416  
   417  // AllocedBytesPerOp returns the "B/op" metric,
   418  // which is calculated as r.MemBytes / r.N.
   419  func (r BenchmarkResult) AllocedBytesPerOp() int64 {
   420  	if v, ok := r.Extra["B/op"]; ok {
   421  		return int64(v)
   422  	}
   423  	if r.N <= 0 {
   424  		return 0
   425  	}
   426  	return int64(r.MemBytes) / int64(r.N)
   427  }
   428  
   429  // String returns a summary of the benchmark results.
   430  // It follows the benchmark result line format from
   431  // https://golang.org/design/14313-benchmark-format, not including the
   432  // benchmark name.
   433  // Extra metrics override built-in metrics of the same name.
   434  // String does not include allocs/op or B/op, since those are reported
   435  // by [BenchmarkResult.MemString].
   436  func (r BenchmarkResult) String() string {
   437  	buf := new(strings.Builder)
   438  	fmt.Fprintf(buf, "%8d", r.N)
   439  
   440  	// Get ns/op as a float.
   441  	ns, ok := r.Extra["ns/op"]
   442  	if !ok {
   443  		ns = float64(r.T.Nanoseconds()) / float64(r.N)
   444  	}
   445  	if ns != 0 {
   446  		buf.WriteByte('\t')
   447  		prettyPrint(buf, ns, "ns/op")
   448  	}
   449  
   450  	if mbs := r.mbPerSec(); mbs != 0 {
   451  		fmt.Fprintf(buf, "\t%7.2f MB/s", mbs)
   452  	}
   453  
   454  	// Print extra metrics that aren't represented in the standard
   455  	// metrics.
   456  	var extraKeys []string
   457  	for k := range r.Extra {
   458  		switch k {
   459  		case "ns/op", "MB/s", "B/op", "allocs/op":
   460  			// Built-in metrics reported elsewhere.
   461  			continue
   462  		}
   463  		extraKeys = append(extraKeys, k)
   464  	}
   465  	slices.Sort(extraKeys)
   466  	for _, k := range extraKeys {
   467  		buf.WriteByte('\t')
   468  		prettyPrint(buf, r.Extra[k], k)
   469  	}
   470  	return buf.String()
   471  }
   472  
   473  func prettyPrint(w io.Writer, x float64, unit string) {
   474  	// Print all numbers with 10 places before the decimal point
   475  	// and small numbers with four sig figs. Field widths are
   476  	// chosen to fit the whole part in 10 places while aligning
   477  	// the decimal point of all fractional formats.
   478  	var format string
   479  	switch y := math.Abs(x); {
   480  	case y == 0 || y >= 999.95:
   481  		format = "%10.0f %s"
   482  	case y >= 99.995:
   483  		format = "%12.1f %s"
   484  	case y >= 9.9995:
   485  		format = "%13.2f %s"
   486  	case y >= 0.99995:
   487  		format = "%14.3f %s"
   488  	case y >= 0.099995:
   489  		format = "%15.4f %s"
   490  	case y >= 0.0099995:
   491  		format = "%16.5f %s"
   492  	case y >= 0.00099995:
   493  		format = "%17.6f %s"
   494  	default:
   495  		format = "%18.7f %s"
   496  	}
   497  	fmt.Fprintf(w, format, x, unit)
   498  }
   499  
   500  // MemString returns r.AllocedBytesPerOp and r.AllocsPerOp in the same format as 'go test'.
   501  func (r BenchmarkResult) MemString() string {
   502  	return fmt.Sprintf("%8d B/op\t%8d allocs/op",
   503  		r.AllocedBytesPerOp(), r.AllocsPerOp())
   504  }
   505  
   506  // benchmarkName returns full name of benchmark including procs suffix.
   507  func benchmarkName(name string, n int) string {
   508  	if n != 1 {
   509  		return fmt.Sprintf("%s-%d", name, n)
   510  	}
   511  	return name
   512  }
   513  
   514  type benchState struct {
   515  	match *matcher
   516  
   517  	maxLen int // The largest recorded benchmark name.
   518  	extLen int // Maximum extension length.
   519  }
   520  
   521  // RunBenchmarks is an internal function but exported because it is cross-package;
   522  // it is part of the implementation of the "go test" command.
   523  func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) {
   524  	runBenchmarks("", matchString, benchmarks)
   525  }
   526  
   527  func runBenchmarks(importPath string, matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool {
   528  	// If no flag was specified, don't run benchmarks.
   529  	if len(*matchBenchmarks) == 0 {
   530  		return true
   531  	}
   532  	// Collect matching benchmarks and determine longest name.
   533  	maxprocs := 1
   534  	for _, procs := range cpuList {
   535  		if procs > maxprocs {
   536  			maxprocs = procs
   537  		}
   538  	}
   539  	bstate := &benchState{
   540  		match:  newMatcher(matchString, *matchBenchmarks, "-test.bench", *skip),
   541  		extLen: len(benchmarkName("", maxprocs)),
   542  	}
   543  	var bs []InternalBenchmark
   544  	for _, Benchmark := range benchmarks {
   545  		if _, matched, _ := bstate.match.fullName(nil, Benchmark.Name); matched {
   546  			bs = append(bs, Benchmark)
   547  			benchName := benchmarkName(Benchmark.Name, maxprocs)
   548  			if l := len(benchName) + bstate.extLen + 1; l > bstate.maxLen {
   549  				bstate.maxLen = l
   550  			}
   551  		}
   552  	}
   553  	main := &B{
   554  		common: common{
   555  			name:  "Main",
   556  			w:     os.Stdout,
   557  			bench: true,
   558  		},
   559  		importPath: importPath,
   560  		benchFunc: func(b *B) {
   561  			for _, Benchmark := range bs {
   562  				b.Run(Benchmark.Name, Benchmark.F)
   563  			}
   564  		},
   565  		benchTime: benchTime,
   566  		bstate:    bstate,
   567  	}
   568  	if Verbose() {
   569  		main.chatty = newChattyPrinter(main.w)
   570  	}
   571  	main.runN(1)
   572  	return !main.failed
   573  }
   574  
   575  // processBench runs bench b for the configured CPU counts and prints the results.
   576  func (s *benchState) processBench(b *B) {
   577  	for i, procs := range cpuList {
   578  		for j := uint(0); j < *count; j++ {
   579  			runtime.GOMAXPROCS(procs)
   580  			benchName := benchmarkName(b.name, procs)
   581  
   582  			// If it's chatty, we've already printed this information.
   583  			if b.chatty == nil {
   584  				fmt.Fprintf(b.w, "%-*s\t", s.maxLen, benchName)
   585  			}
   586  			// Recompute the running time for all but the first iteration.
   587  			if i > 0 || j > 0 {
   588  				b = &B{
   589  					common: common{
   590  						signal: make(chan bool),
   591  						name:   b.name,
   592  						w:      b.w,
   593  						chatty: b.chatty,
   594  						bench:  true,
   595  					},
   596  					benchFunc: b.benchFunc,
   597  					benchTime: b.benchTime,
   598  				}
   599  				b.run1()
   600  			}
   601  			r := b.doBench()
   602  			if b.failed {
   603  				// The output could be very long here, but probably isn't.
   604  				// We print it all, regardless, because we don't want to trim the reason
   605  				// the benchmark failed.
   606  				fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), benchName, b.output)
   607  				continue
   608  			}
   609  			results := r.String()
   610  			if b.chatty != nil {
   611  				fmt.Fprintf(b.w, "%-*s\t", s.maxLen, benchName)
   612  			}
   613  			if *benchmarkMemory || b.showAllocResult {
   614  				results += "\t" + r.MemString()
   615  			}
   616  			fmt.Fprintln(b.w, results)
   617  			// Unlike with tests, we ignore the -chatty flag and always print output for
   618  			// benchmarks since the output generation time will skew the results.
   619  			if len(b.output) > 0 {
   620  				b.trimOutput()
   621  				fmt.Fprintf(b.w, "%s--- BENCH: %s\n%s", b.chatty.prefix(), benchName, b.output)
   622  			}
   623  			if p := runtime.GOMAXPROCS(-1); p != procs {
   624  				fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p)
   625  			}
   626  			if b.chatty != nil && b.chatty.json {
   627  				b.chatty.Updatef("", "=== NAME  %s\n", "")
   628  			}
   629  		}
   630  	}
   631  }
   632  
   633  // If hideStdoutForTesting is true, Run does not print the benchName.
   634  // This avoids a spurious print during 'go test' on package testing itself,
   635  // which invokes b.Run in its own tests (see sub_test.go).
   636  var hideStdoutForTesting = false
   637  
   638  // Run benchmarks f as a subbenchmark with the given name. It reports
   639  // whether there were any failures.
   640  //
   641  // A subbenchmark is like any other benchmark. A benchmark that calls Run at
   642  // least once will not be measured itself and will be called once with N=1.
   643  func (b *B) Run(name string, f func(b *B)) bool {
   644  	// Since b has subbenchmarks, we will no longer run it as a benchmark itself.
   645  	// Release the lock and acquire it on exit to ensure locks stay paired.
   646  	b.hasSub.Store(true)
   647  	benchmarkLock.Unlock()
   648  	defer benchmarkLock.Lock()
   649  
   650  	benchName, ok, partial := b.name, true, false
   651  	if b.bstate != nil {
   652  		benchName, ok, partial = b.bstate.match.fullName(&b.common, name)
   653  	}
   654  	if !ok {
   655  		return true
   656  	}
   657  	var pc [maxStackLen]uintptr
   658  	n := runtime.Callers(2, pc[:])
   659  	sub := &B{
   660  		common: common{
   661  			signal:  make(chan bool),
   662  			name:    benchName,
   663  			parent:  &b.common,
   664  			level:   b.level + 1,
   665  			creator: pc[:n],
   666  			w:       b.w,
   667  			chatty:  b.chatty,
   668  			bench:   true,
   669  		},
   670  		importPath: b.importPath,
   671  		benchFunc:  f,
   672  		benchTime:  b.benchTime,
   673  		bstate:     b.bstate,
   674  	}
   675  	if partial {
   676  		// Partial name match, like -bench=X/Y matching BenchmarkX.
   677  		// Only process sub-benchmarks, if any.
   678  		sub.hasSub.Store(true)
   679  	}
   680  
   681  	if b.chatty != nil {
   682  		labelsOnce.Do(func() {
   683  			fmt.Printf("goos: %s\n", runtime.GOOS)
   684  			fmt.Printf("goarch: %s\n", runtime.GOARCH)
   685  			if b.importPath != "" {
   686  				fmt.Printf("pkg: %s\n", b.importPath)
   687  			}
   688  			if cpu := sysinfo.CPUName(); cpu != "" {
   689  				fmt.Printf("cpu: %s\n", cpu)
   690  			}
   691  		})
   692  
   693  		if !hideStdoutForTesting {
   694  			if b.chatty.json {
   695  				b.chatty.Updatef(benchName, "=== RUN   %s\n", benchName)
   696  			}
   697  			fmt.Println(benchName)
   698  		}
   699  	}
   700  
   701  	if sub.run1() {
   702  		sub.run()
   703  	}
   704  	b.add(sub.result)
   705  	return !sub.failed
   706  }
   707  
   708  // add simulates running benchmarks in sequence in a single iteration. It is
   709  // used to give some meaningful results in case func Benchmark is used in
   710  // combination with Run.
   711  func (b *B) add(other BenchmarkResult) {
   712  	r := &b.result
   713  	// The aggregated BenchmarkResults resemble running all subbenchmarks as
   714  	// in sequence in a single benchmark.
   715  	r.N = 1
   716  	r.T += time.Duration(other.NsPerOp())
   717  	if other.Bytes == 0 {
   718  		// Summing Bytes is meaningless in aggregate if not all subbenchmarks
   719  		// set it.
   720  		b.missingBytes = true
   721  		r.Bytes = 0
   722  	}
   723  	if !b.missingBytes {
   724  		r.Bytes += other.Bytes
   725  	}
   726  	r.MemAllocs += uint64(other.AllocsPerOp())
   727  	r.MemBytes += uint64(other.AllocedBytesPerOp())
   728  }
   729  
   730  // trimOutput shortens the output from a benchmark, which can be very long.
   731  func (b *B) trimOutput() {
   732  	// The output is likely to appear multiple times because the benchmark
   733  	// is run multiple times, but at least it will be seen. This is not a big deal
   734  	// because benchmarks rarely print, but just in case, we trim it if it's too long.
   735  	const maxNewlines = 10
   736  	for nlCount, j := 0, 0; j < len(b.output); j++ {
   737  		if b.output[j] == '\n' {
   738  			nlCount++
   739  			if nlCount >= maxNewlines {
   740  				b.output = append(b.output[:j], "\n\t... [output truncated]\n"...)
   741  				break
   742  			}
   743  		}
   744  	}
   745  }
   746  
   747  // A PB is used by RunParallel for running parallel benchmarks.
   748  type PB struct {
   749  	globalN *atomic.Uint64 // shared between all worker goroutines iteration counter
   750  	grain   uint64         // acquire that many iterations from globalN at once
   751  	cache   uint64         // local cache of acquired iterations
   752  	bN      uint64         // total number of iterations to execute (b.N)
   753  }
   754  
   755  // Next reports whether there are more iterations to execute.
   756  func (pb *PB) Next() bool {
   757  	if pb.cache == 0 {
   758  		n := pb.globalN.Add(pb.grain)
   759  		if n <= pb.bN {
   760  			pb.cache = pb.grain
   761  		} else if n < pb.bN+pb.grain {
   762  			pb.cache = pb.bN + pb.grain - n
   763  		} else {
   764  			return false
   765  		}
   766  	}
   767  	pb.cache--
   768  	return true
   769  }
   770  
   771  // RunParallel runs a benchmark in parallel.
   772  // It creates multiple goroutines and distributes b.N iterations among them.
   773  // The number of goroutines defaults to GOMAXPROCS. To increase parallelism for
   774  // non-CPU-bound benchmarks, call [B.SetParallelism] before RunParallel.
   775  // RunParallel is usually used with the go test -cpu flag.
   776  //
   777  // The body function will be run in each goroutine. It should set up any
   778  // goroutine-local state and then iterate until pb.Next returns false.
   779  // It should not use the [B.StartTimer], [B.StopTimer], or [B.ResetTimer] functions,
   780  // because they have global effect. It should also not call [B.Run].
   781  //
   782  // RunParallel reports ns/op values as wall time for the benchmark as a whole,
   783  // not the sum of wall time or CPU time over each parallel goroutine.
   784  func (b *B) RunParallel(body func(*PB)) {
   785  	if b.N == 0 {
   786  		return // Nothing to do when probing.
   787  	}
   788  	// Calculate grain size as number of iterations that take ~100µs.
   789  	// 100µs is enough to amortize the overhead and provide sufficient
   790  	// dynamic load balancing.
   791  	grain := uint64(0)
   792  	if b.previousN > 0 && b.previousDuration > 0 {
   793  		grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration)
   794  	}
   795  	if grain < 1 {
   796  		grain = 1
   797  	}
   798  	// We expect the inner loop and function call to take at least 10ns,
   799  	// so do not do more than 100µs/10ns=1e4 iterations.
   800  	if grain > 1e4 {
   801  		grain = 1e4
   802  	}
   803  
   804  	var n atomic.Uint64
   805  	numProcs := b.parallelism * runtime.GOMAXPROCS(0)
   806  	var wg sync.WaitGroup
   807  	wg.Add(numProcs)
   808  	for p := 0; p < numProcs; p++ {
   809  		go func() {
   810  			defer wg.Done()
   811  			pb := &PB{
   812  				globalN: &n,
   813  				grain:   grain,
   814  				bN:      uint64(b.N),
   815  			}
   816  			body(pb)
   817  		}()
   818  	}
   819  	wg.Wait()
   820  	if n.Load() <= uint64(b.N) && !b.Failed() {
   821  		b.Fatal("RunParallel: body exited without pb.Next() == false")
   822  	}
   823  }
   824  
   825  // SetParallelism sets the number of goroutines used by [B.RunParallel] to p*GOMAXPROCS.
   826  // There is usually no need to call SetParallelism for CPU-bound benchmarks.
   827  // If p is less than 1, this call will have no effect.
   828  func (b *B) SetParallelism(p int) {
   829  	if p >= 1 {
   830  		b.parallelism = p
   831  	}
   832  }
   833  
   834  // Benchmark benchmarks a single function. It is useful for creating
   835  // custom benchmarks that do not use the "go test" command.
   836  //
   837  // If f depends on testing flags, then [Init] must be used to register
   838  // those flags before calling Benchmark and before calling [flag.Parse].
   839  //
   840  // If f calls Run, the result will be an estimate of running all its
   841  // subbenchmarks that don't call Run in sequence in a single benchmark.
   842  func Benchmark(f func(b *B)) BenchmarkResult {
   843  	b := &B{
   844  		common: common{
   845  			signal: make(chan bool),
   846  			w:      discard{},
   847  		},
   848  		benchFunc: f,
   849  		benchTime: benchTime,
   850  	}
   851  	if b.run1() {
   852  		b.run()
   853  	}
   854  	return b.result
   855  }
   856  
   857  type discard struct{}
   858  
   859  func (discard) Write(b []byte) (n int, err error) { return len(b), nil }
   860  

View as plain text