Source file
src/testing/benchmark.go
1
2
3
4
5 package testing
6
7 import (
8 "flag"
9 "fmt"
10 "internal/sysinfo"
11 "io"
12 "math"
13 "os"
14 "runtime"
15 "slices"
16 "strconv"
17 "strings"
18 "sync"
19 "sync/atomic"
20 "time"
21 "unicode"
22 )
23
24 func initBenchmarkFlags() {
25 matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`")
26 benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks")
27 flag.Var(&benchTime, "test.benchtime", "run each benchmark for duration `d` or N times if `d` is of the form Nx")
28 }
29
30 var (
31 matchBenchmarks *string
32 benchmarkMemory *bool
33
34 benchTime = durationOrCountFlag{d: 1 * time.Second}
35 )
36
37 type durationOrCountFlag struct {
38 d time.Duration
39 n int
40 allowZero bool
41 }
42
43 func (f *durationOrCountFlag) String() string {
44 if f.n > 0 {
45 return fmt.Sprintf("%dx", f.n)
46 }
47 return f.d.String()
48 }
49
50 func (f *durationOrCountFlag) Set(s string) error {
51 if strings.HasSuffix(s, "x") {
52 n, err := strconv.ParseInt(s[:len(s)-1], 10, 0)
53 if err != nil || n < 0 || (!f.allowZero && n == 0) {
54 return fmt.Errorf("invalid count")
55 }
56 *f = durationOrCountFlag{n: int(n)}
57 return nil
58 }
59 d, err := time.ParseDuration(s)
60 if err != nil || d < 0 || (!f.allowZero && d == 0) {
61 return fmt.Errorf("invalid duration")
62 }
63 *f = durationOrCountFlag{d: d}
64 return nil
65 }
66
67
68 var benchmarkLock sync.Mutex
69
70
71 var memStats runtime.MemStats
72
73
74
75 type InternalBenchmark struct {
76 Name string
77 F func(b *B)
78 }
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93 type B struct {
94 common
95 importPath string
96 context *benchContext
97 N int
98 previousN int
99 previousDuration time.Duration
100 benchFunc func(b *B)
101 benchTime durationOrCountFlag
102 bytes int64
103 missingBytes bool
104 timerOn bool
105 showAllocResult bool
106 result BenchmarkResult
107 parallelism int
108
109 startAllocs uint64
110 startBytes uint64
111
112 netAllocs uint64
113 netBytes uint64
114
115 extra map[string]float64
116 }
117
118
119
120
121 func (b *B) StartTimer() {
122 if !b.timerOn {
123 runtime.ReadMemStats(&memStats)
124 b.startAllocs = memStats.Mallocs
125 b.startBytes = memStats.TotalAlloc
126 b.start = highPrecisionTimeNow()
127 b.timerOn = true
128 }
129 }
130
131
132
133
134 func (b *B) StopTimer() {
135 if b.timerOn {
136 b.duration += highPrecisionTimeSince(b.start)
137 runtime.ReadMemStats(&memStats)
138 b.netAllocs += memStats.Mallocs - b.startAllocs
139 b.netBytes += memStats.TotalAlloc - b.startBytes
140 b.timerOn = false
141 }
142 }
143
144
145
146
147 func (b *B) ResetTimer() {
148 if b.extra == nil {
149
150
151 b.extra = make(map[string]float64, 16)
152 } else {
153 clear(b.extra)
154 }
155 if b.timerOn {
156 runtime.ReadMemStats(&memStats)
157 b.startAllocs = memStats.Mallocs
158 b.startBytes = memStats.TotalAlloc
159 b.start = highPrecisionTimeNow()
160 }
161 b.duration = 0
162 b.netAllocs = 0
163 b.netBytes = 0
164 }
165
166
167
168 func (b *B) SetBytes(n int64) { b.bytes = n }
169
170
171
172
173 func (b *B) ReportAllocs() {
174 b.showAllocResult = true
175 }
176
177
178 func (b *B) runN(n int) {
179 benchmarkLock.Lock()
180 defer benchmarkLock.Unlock()
181 defer func() {
182 b.runCleanup(normalPanic)
183 b.checkRaces()
184 }()
185
186
187 runtime.GC()
188 b.resetRaces()
189 b.N = n
190 b.parallelism = 1
191 b.ResetTimer()
192 b.StartTimer()
193 b.benchFunc(b)
194 b.StopTimer()
195 b.previousN = n
196 b.previousDuration = b.duration
197 }
198
199
200
201 func (b *B) run1() bool {
202 if ctx := b.context; ctx != nil {
203
204 if n := len(b.name) + ctx.extLen + 1; n > ctx.maxLen {
205 ctx.maxLen = n + 8
206 }
207 }
208 go func() {
209
210
211 defer func() {
212 b.signal <- true
213 }()
214
215 b.runN(1)
216 }()
217 <-b.signal
218 if b.failed {
219 fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), b.name, b.output)
220 return false
221 }
222
223
224 b.mu.RLock()
225 finished := b.finished
226 b.mu.RUnlock()
227 if b.hasSub.Load() || finished {
228 tag := "BENCH"
229 if b.skipped {
230 tag = "SKIP"
231 }
232 if b.chatty != nil && (len(b.output) > 0 || finished) {
233 b.trimOutput()
234 fmt.Fprintf(b.w, "%s--- %s: %s\n%s", b.chatty.prefix(), tag, b.name, b.output)
235 }
236 return false
237 }
238 return true
239 }
240
241 var labelsOnce sync.Once
242
243
244
245 func (b *B) run() {
246 labelsOnce.Do(func() {
247 fmt.Fprintf(b.w, "goos: %s\n", runtime.GOOS)
248 fmt.Fprintf(b.w, "goarch: %s\n", runtime.GOARCH)
249 if b.importPath != "" {
250 fmt.Fprintf(b.w, "pkg: %s\n", b.importPath)
251 }
252 if cpu := sysinfo.CPUName(); cpu != "" {
253 fmt.Fprintf(b.w, "cpu: %s\n", cpu)
254 }
255 })
256 if b.context != nil {
257
258 b.context.processBench(b)
259 } else {
260
261 b.doBench()
262 }
263 }
264
265 func (b *B) doBench() BenchmarkResult {
266 go b.launch()
267 <-b.signal
268 return b.result
269 }
270
271
272
273
274
275 func (b *B) launch() {
276
277
278 defer func() {
279 b.signal <- true
280 }()
281
282
283 if b.benchTime.n > 0 {
284
285
286
287 if b.benchTime.n > 1 {
288 b.runN(b.benchTime.n)
289 }
290 } else {
291 d := b.benchTime.d
292 for n := int64(1); !b.failed && b.duration < d && n < 1e9; {
293 last := n
294
295 goalns := d.Nanoseconds()
296 prevIters := int64(b.N)
297 prevns := b.duration.Nanoseconds()
298 if prevns <= 0 {
299
300 prevns = 1
301 }
302
303
304
305
306
307 n = goalns * prevIters / prevns
308
309 n += n / 5
310
311 n = min(n, 100*last)
312
313 n = max(n, last+1)
314
315 n = min(n, 1e9)
316 b.runN(int(n))
317 }
318 }
319 b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes, b.extra}
320 }
321
322
323
324
325 func (b *B) Elapsed() time.Duration {
326 d := b.duration
327 if b.timerOn {
328 d += highPrecisionTimeSince(b.start)
329 }
330 return d
331 }
332
333
334
335
336
337
338
339
340
341
342 func (b *B) ReportMetric(n float64, unit string) {
343 if unit == "" {
344 panic("metric unit must not be empty")
345 }
346 if strings.IndexFunc(unit, unicode.IsSpace) >= 0 {
347 panic("metric unit must not contain whitespace")
348 }
349 b.extra[unit] = n
350 }
351
352
353 type BenchmarkResult struct {
354 N int
355 T time.Duration
356 Bytes int64
357 MemAllocs uint64
358 MemBytes uint64
359
360
361 Extra map[string]float64
362 }
363
364
365 func (r BenchmarkResult) NsPerOp() int64 {
366 if v, ok := r.Extra["ns/op"]; ok {
367 return int64(v)
368 }
369 if r.N <= 0 {
370 return 0
371 }
372 return r.T.Nanoseconds() / int64(r.N)
373 }
374
375
376 func (r BenchmarkResult) mbPerSec() float64 {
377 if v, ok := r.Extra["MB/s"]; ok {
378 return v
379 }
380 if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 {
381 return 0
382 }
383 return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds()
384 }
385
386
387
388 func (r BenchmarkResult) AllocsPerOp() int64 {
389 if v, ok := r.Extra["allocs/op"]; ok {
390 return int64(v)
391 }
392 if r.N <= 0 {
393 return 0
394 }
395 return int64(r.MemAllocs) / int64(r.N)
396 }
397
398
399
400 func (r BenchmarkResult) AllocedBytesPerOp() int64 {
401 if v, ok := r.Extra["B/op"]; ok {
402 return int64(v)
403 }
404 if r.N <= 0 {
405 return 0
406 }
407 return int64(r.MemBytes) / int64(r.N)
408 }
409
410
411
412
413
414
415
416
417 func (r BenchmarkResult) String() string {
418 buf := new(strings.Builder)
419 fmt.Fprintf(buf, "%8d", r.N)
420
421
422 ns, ok := r.Extra["ns/op"]
423 if !ok {
424 ns = float64(r.T.Nanoseconds()) / float64(r.N)
425 }
426 if ns != 0 {
427 buf.WriteByte('\t')
428 prettyPrint(buf, ns, "ns/op")
429 }
430
431 if mbs := r.mbPerSec(); mbs != 0 {
432 fmt.Fprintf(buf, "\t%7.2f MB/s", mbs)
433 }
434
435
436
437 var extraKeys []string
438 for k := range r.Extra {
439 switch k {
440 case "ns/op", "MB/s", "B/op", "allocs/op":
441
442 continue
443 }
444 extraKeys = append(extraKeys, k)
445 }
446 slices.Sort(extraKeys)
447 for _, k := range extraKeys {
448 buf.WriteByte('\t')
449 prettyPrint(buf, r.Extra[k], k)
450 }
451 return buf.String()
452 }
453
454 func prettyPrint(w io.Writer, x float64, unit string) {
455
456
457
458
459 var format string
460 switch y := math.Abs(x); {
461 case y == 0 || y >= 999.95:
462 format = "%10.0f %s"
463 case y >= 99.995:
464 format = "%12.1f %s"
465 case y >= 9.9995:
466 format = "%13.2f %s"
467 case y >= 0.99995:
468 format = "%14.3f %s"
469 case y >= 0.099995:
470 format = "%15.4f %s"
471 case y >= 0.0099995:
472 format = "%16.5f %s"
473 case y >= 0.00099995:
474 format = "%17.6f %s"
475 default:
476 format = "%18.7f %s"
477 }
478 fmt.Fprintf(w, format, x, unit)
479 }
480
481
482 func (r BenchmarkResult) MemString() string {
483 return fmt.Sprintf("%8d B/op\t%8d allocs/op",
484 r.AllocedBytesPerOp(), r.AllocsPerOp())
485 }
486
487
488 func benchmarkName(name string, n int) string {
489 if n != 1 {
490 return fmt.Sprintf("%s-%d", name, n)
491 }
492 return name
493 }
494
495 type benchContext struct {
496 match *matcher
497
498 maxLen int
499 extLen int
500 }
501
502
503
504 func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) {
505 runBenchmarks("", matchString, benchmarks)
506 }
507
508 func runBenchmarks(importPath string, matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool {
509
510 if len(*matchBenchmarks) == 0 {
511 return true
512 }
513
514 maxprocs := 1
515 for _, procs := range cpuList {
516 if procs > maxprocs {
517 maxprocs = procs
518 }
519 }
520 ctx := &benchContext{
521 match: newMatcher(matchString, *matchBenchmarks, "-test.bench", *skip),
522 extLen: len(benchmarkName("", maxprocs)),
523 }
524 var bs []InternalBenchmark
525 for _, Benchmark := range benchmarks {
526 if _, matched, _ := ctx.match.fullName(nil, Benchmark.Name); matched {
527 bs = append(bs, Benchmark)
528 benchName := benchmarkName(Benchmark.Name, maxprocs)
529 if l := len(benchName) + ctx.extLen + 1; l > ctx.maxLen {
530 ctx.maxLen = l
531 }
532 }
533 }
534 main := &B{
535 common: common{
536 name: "Main",
537 w: os.Stdout,
538 bench: true,
539 },
540 importPath: importPath,
541 benchFunc: func(b *B) {
542 for _, Benchmark := range bs {
543 b.Run(Benchmark.Name, Benchmark.F)
544 }
545 },
546 benchTime: benchTime,
547 context: ctx,
548 }
549 if Verbose() {
550 main.chatty = newChattyPrinter(main.w)
551 }
552 main.runN(1)
553 return !main.failed
554 }
555
556
557 func (ctx *benchContext) processBench(b *B) {
558 for i, procs := range cpuList {
559 for j := uint(0); j < *count; j++ {
560 runtime.GOMAXPROCS(procs)
561 benchName := benchmarkName(b.name, procs)
562
563
564 if b.chatty == nil {
565 fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName)
566 }
567
568 if i > 0 || j > 0 {
569 b = &B{
570 common: common{
571 signal: make(chan bool),
572 name: b.name,
573 w: b.w,
574 chatty: b.chatty,
575 bench: true,
576 },
577 benchFunc: b.benchFunc,
578 benchTime: b.benchTime,
579 }
580 b.run1()
581 }
582 r := b.doBench()
583 if b.failed {
584
585
586
587 fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), benchName, b.output)
588 continue
589 }
590 results := r.String()
591 if b.chatty != nil {
592 fmt.Fprintf(b.w, "%-*s\t", ctx.maxLen, benchName)
593 }
594 if *benchmarkMemory || b.showAllocResult {
595 results += "\t" + r.MemString()
596 }
597 fmt.Fprintln(b.w, results)
598
599
600 if len(b.output) > 0 {
601 b.trimOutput()
602 fmt.Fprintf(b.w, "%s--- BENCH: %s\n%s", b.chatty.prefix(), benchName, b.output)
603 }
604 if p := runtime.GOMAXPROCS(-1); p != procs {
605 fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p)
606 }
607 if b.chatty != nil && b.chatty.json {
608 b.chatty.Updatef("", "=== NAME %s\n", "")
609 }
610 }
611 }
612 }
613
614
615
616
617 var hideStdoutForTesting = false
618
619
620
621
622
623
624 func (b *B) Run(name string, f func(b *B)) bool {
625
626
627 b.hasSub.Store(true)
628 benchmarkLock.Unlock()
629 defer benchmarkLock.Lock()
630
631 benchName, ok, partial := b.name, true, false
632 if b.context != nil {
633 benchName, ok, partial = b.context.match.fullName(&b.common, name)
634 }
635 if !ok {
636 return true
637 }
638 var pc [maxStackLen]uintptr
639 n := runtime.Callers(2, pc[:])
640 sub := &B{
641 common: common{
642 signal: make(chan bool),
643 name: benchName,
644 parent: &b.common,
645 level: b.level + 1,
646 creator: pc[:n],
647 w: b.w,
648 chatty: b.chatty,
649 bench: true,
650 },
651 importPath: b.importPath,
652 benchFunc: f,
653 benchTime: b.benchTime,
654 context: b.context,
655 }
656 if partial {
657
658
659 sub.hasSub.Store(true)
660 }
661
662 if b.chatty != nil {
663 labelsOnce.Do(func() {
664 fmt.Printf("goos: %s\n", runtime.GOOS)
665 fmt.Printf("goarch: %s\n", runtime.GOARCH)
666 if b.importPath != "" {
667 fmt.Printf("pkg: %s\n", b.importPath)
668 }
669 if cpu := sysinfo.CPUName(); cpu != "" {
670 fmt.Printf("cpu: %s\n", cpu)
671 }
672 })
673
674 if !hideStdoutForTesting {
675 if b.chatty.json {
676 b.chatty.Updatef(benchName, "=== RUN %s\n", benchName)
677 }
678 fmt.Println(benchName)
679 }
680 }
681
682 if sub.run1() {
683 sub.run()
684 }
685 b.add(sub.result)
686 return !sub.failed
687 }
688
689
690
691
692 func (b *B) add(other BenchmarkResult) {
693 r := &b.result
694
695
696 r.N = 1
697 r.T += time.Duration(other.NsPerOp())
698 if other.Bytes == 0 {
699
700
701 b.missingBytes = true
702 r.Bytes = 0
703 }
704 if !b.missingBytes {
705 r.Bytes += other.Bytes
706 }
707 r.MemAllocs += uint64(other.AllocsPerOp())
708 r.MemBytes += uint64(other.AllocedBytesPerOp())
709 }
710
711
712 func (b *B) trimOutput() {
713
714
715
716 const maxNewlines = 10
717 for nlCount, j := 0, 0; j < len(b.output); j++ {
718 if b.output[j] == '\n' {
719 nlCount++
720 if nlCount >= maxNewlines {
721 b.output = append(b.output[:j], "\n\t... [output truncated]\n"...)
722 break
723 }
724 }
725 }
726 }
727
728
729 type PB struct {
730 globalN *atomic.Uint64
731 grain uint64
732 cache uint64
733 bN uint64
734 }
735
736
737 func (pb *PB) Next() bool {
738 if pb.cache == 0 {
739 n := pb.globalN.Add(pb.grain)
740 if n <= pb.bN {
741 pb.cache = pb.grain
742 } else if n < pb.bN+pb.grain {
743 pb.cache = pb.bN + pb.grain - n
744 } else {
745 return false
746 }
747 }
748 pb.cache--
749 return true
750 }
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765 func (b *B) RunParallel(body func(*PB)) {
766 if b.N == 0 {
767 return
768 }
769
770
771
772 grain := uint64(0)
773 if b.previousN > 0 && b.previousDuration > 0 {
774 grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration)
775 }
776 if grain < 1 {
777 grain = 1
778 }
779
780
781 if grain > 1e4 {
782 grain = 1e4
783 }
784
785 var n atomic.Uint64
786 numProcs := b.parallelism * runtime.GOMAXPROCS(0)
787 var wg sync.WaitGroup
788 wg.Add(numProcs)
789 for p := 0; p < numProcs; p++ {
790 go func() {
791 defer wg.Done()
792 pb := &PB{
793 globalN: &n,
794 grain: grain,
795 bN: uint64(b.N),
796 }
797 body(pb)
798 }()
799 }
800 wg.Wait()
801 if n.Load() <= uint64(b.N) && !b.Failed() {
802 b.Fatal("RunParallel: body exited without pb.Next() == false")
803 }
804 }
805
806
807
808
809 func (b *B) SetParallelism(p int) {
810 if p >= 1 {
811 b.parallelism = p
812 }
813 }
814
815
816
817
818
819
820
821
822
823 func Benchmark(f func(b *B)) BenchmarkResult {
824 b := &B{
825 common: common{
826 signal: make(chan bool),
827 w: discard{},
828 },
829 benchFunc: f,
830 benchTime: benchTime,
831 }
832 if b.run1() {
833 b.run()
834 }
835 return b.result
836 }
837
838 type discard struct{}
839
840 func (discard) Write(b []byte) (n int, err error) { return len(b), nil }
841
View as plain text