Source file
src/testing/benchmark.go
1
2
3
4
5 package testing
6
7 import (
8 "flag"
9 "fmt"
10 "internal/sysinfo"
11 "io"
12 "math"
13 "os"
14 "runtime"
15 "slices"
16 "strconv"
17 "strings"
18 "sync"
19 "sync/atomic"
20 "time"
21 "unicode"
22 )
23
24 func initBenchmarkFlags() {
25 matchBenchmarks = flag.String("test.bench", "", "run only benchmarks matching `regexp`")
26 benchmarkMemory = flag.Bool("test.benchmem", false, "print memory allocations for benchmarks")
27 flag.Var(&benchTime, "test.benchtime", "run each benchmark for duration `d` or N times if `d` is of the form Nx")
28 }
29
30 var (
31 matchBenchmarks *string
32 benchmarkMemory *bool
33
34 benchTime = durationOrCountFlag{d: 1 * time.Second}
35 )
36
37 type durationOrCountFlag struct {
38 d time.Duration
39 n int
40 allowZero bool
41 }
42
43 func (f *durationOrCountFlag) String() string {
44 if f.n > 0 {
45 return fmt.Sprintf("%dx", f.n)
46 }
47 return f.d.String()
48 }
49
50 func (f *durationOrCountFlag) Set(s string) error {
51 if strings.HasSuffix(s, "x") {
52 n, err := strconv.ParseInt(s[:len(s)-1], 10, 0)
53 if err != nil || n < 0 || (!f.allowZero && n == 0) {
54 return fmt.Errorf("invalid count")
55 }
56 *f = durationOrCountFlag{n: int(n)}
57 return nil
58 }
59 d, err := time.ParseDuration(s)
60 if err != nil || d < 0 || (!f.allowZero && d == 0) {
61 return fmt.Errorf("invalid duration")
62 }
63 *f = durationOrCountFlag{d: d}
64 return nil
65 }
66
67
68 var benchmarkLock sync.Mutex
69
70
71 var memStats runtime.MemStats
72
73
74
75 type InternalBenchmark struct {
76 Name string
77 F func(b *B)
78 }
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93 type B struct {
94 common
95 importPath string
96 bstate *benchState
97 N int
98 previousN int
99 previousDuration time.Duration
100 benchFunc func(b *B)
101 benchTime durationOrCountFlag
102 bytes int64
103 missingBytes bool
104 timerOn bool
105 showAllocResult bool
106 result BenchmarkResult
107 parallelism int
108
109 startAllocs uint64
110 startBytes uint64
111
112 netAllocs uint64
113 netBytes uint64
114
115 extra map[string]float64
116
117
118 loopN int
119 }
120
121
122
123
124 func (b *B) StartTimer() {
125 if !b.timerOn {
126 runtime.ReadMemStats(&memStats)
127 b.startAllocs = memStats.Mallocs
128 b.startBytes = memStats.TotalAlloc
129 b.start = highPrecisionTimeNow()
130 b.timerOn = true
131 }
132 }
133
134
135
136
137 func (b *B) StopTimer() {
138 if b.timerOn {
139 b.duration += highPrecisionTimeSince(b.start)
140 runtime.ReadMemStats(&memStats)
141 b.netAllocs += memStats.Mallocs - b.startAllocs
142 b.netBytes += memStats.TotalAlloc - b.startBytes
143 b.timerOn = false
144 }
145 }
146
147
148
149
150 func (b *B) ResetTimer() {
151 if b.extra == nil {
152
153
154 b.extra = make(map[string]float64, 16)
155 } else {
156 clear(b.extra)
157 }
158 if b.timerOn {
159 runtime.ReadMemStats(&memStats)
160 b.startAllocs = memStats.Mallocs
161 b.startBytes = memStats.TotalAlloc
162 b.start = highPrecisionTimeNow()
163 }
164 b.duration = 0
165 b.netAllocs = 0
166 b.netBytes = 0
167 }
168
169
170
171 func (b *B) SetBytes(n int64) { b.bytes = n }
172
173
174
175
176 func (b *B) ReportAllocs() {
177 b.showAllocResult = true
178 }
179
180
181 func (b *B) runN(n int) {
182 benchmarkLock.Lock()
183 defer benchmarkLock.Unlock()
184 defer func() {
185 b.runCleanup(normalPanic)
186 b.checkRaces()
187 }()
188
189
190 runtime.GC()
191 b.resetRaces()
192 b.N = n
193 b.loopN = n
194 b.parallelism = 1
195 b.ResetTimer()
196 b.StartTimer()
197 b.benchFunc(b)
198 b.StopTimer()
199 b.previousN = n
200 b.previousDuration = b.duration
201 }
202
203
204
205 func (b *B) run1() bool {
206 if bstate := b.bstate; bstate != nil {
207
208 if n := len(b.name) + bstate.extLen + 1; n > bstate.maxLen {
209 bstate.maxLen = n + 8
210 }
211 }
212 go func() {
213
214
215 defer func() {
216 b.signal <- true
217 }()
218
219 b.runN(1)
220 }()
221 <-b.signal
222 if b.failed {
223 fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), b.name, b.output)
224 return false
225 }
226
227
228 b.mu.RLock()
229 finished := b.finished
230 b.mu.RUnlock()
231 if b.hasSub.Load() || finished {
232 tag := "BENCH"
233 if b.skipped {
234 tag = "SKIP"
235 }
236 if b.chatty != nil && (len(b.output) > 0 || finished) {
237 b.trimOutput()
238 fmt.Fprintf(b.w, "%s--- %s: %s\n%s", b.chatty.prefix(), tag, b.name, b.output)
239 }
240 return false
241 }
242 return true
243 }
244
245 var labelsOnce sync.Once
246
247
248
249 func (b *B) run() {
250 labelsOnce.Do(func() {
251 fmt.Fprintf(b.w, "goos: %s\n", runtime.GOOS)
252 fmt.Fprintf(b.w, "goarch: %s\n", runtime.GOARCH)
253 if b.importPath != "" {
254 fmt.Fprintf(b.w, "pkg: %s\n", b.importPath)
255 }
256 if cpu := sysinfo.CPUName(); cpu != "" {
257 fmt.Fprintf(b.w, "cpu: %s\n", cpu)
258 }
259 })
260 if b.bstate != nil {
261
262 b.bstate.processBench(b)
263 } else {
264
265 b.doBench()
266 }
267 }
268
269 func (b *B) doBench() BenchmarkResult {
270 go b.launch()
271 <-b.signal
272 return b.result
273 }
274
275
276
277
278
279 func (b *B) launch() {
280
281
282 defer func() {
283 b.signal <- true
284 }()
285
286
287 if b.benchTime.n > 0 {
288
289
290
291 if b.benchTime.n > 1 {
292 b.runN(b.benchTime.n)
293 }
294 } else {
295 d := b.benchTime.d
296 for n := int64(1); !b.failed && b.duration < d && n < 1e9; {
297 last := n
298
299 goalns := d.Nanoseconds()
300 prevIters := int64(b.N)
301 prevns := b.duration.Nanoseconds()
302 if prevns <= 0 {
303
304 prevns = 1
305 }
306
307
308
309
310
311 n = goalns * prevIters / prevns
312
313 n += n / 5
314
315 n = min(n, 100*last)
316
317 n = max(n, last+1)
318
319 n = min(n, 1e9)
320 b.runN(int(n))
321 }
322 }
323 b.result = BenchmarkResult{b.N, b.duration, b.bytes, b.netAllocs, b.netBytes, b.extra}
324 }
325
326
327
328
329 func (b *B) Elapsed() time.Duration {
330 d := b.duration
331 if b.timerOn {
332 d += highPrecisionTimeSince(b.start)
333 }
334 return d
335 }
336
337
338
339
340
341
342
343
344
345
346 func (b *B) ReportMetric(n float64, unit string) {
347 if unit == "" {
348 panic("metric unit must not be empty")
349 }
350 if strings.IndexFunc(unit, unicode.IsSpace) >= 0 {
351 panic("metric unit must not contain whitespace")
352 }
353 b.extra[unit] = n
354 }
355
356
357
358
359
360
361 func (b *B) Loop() bool {
362 if b.loopN == b.N {
363
364
365 b.ResetTimer()
366 }
367 b.loopN--
368 return b.loopN >= 0
369 }
370
371
372 type BenchmarkResult struct {
373 N int
374 T time.Duration
375 Bytes int64
376 MemAllocs uint64
377 MemBytes uint64
378
379
380 Extra map[string]float64
381 }
382
383
384 func (r BenchmarkResult) NsPerOp() int64 {
385 if v, ok := r.Extra["ns/op"]; ok {
386 return int64(v)
387 }
388 if r.N <= 0 {
389 return 0
390 }
391 return r.T.Nanoseconds() / int64(r.N)
392 }
393
394
395 func (r BenchmarkResult) mbPerSec() float64 {
396 if v, ok := r.Extra["MB/s"]; ok {
397 return v
398 }
399 if r.Bytes <= 0 || r.T <= 0 || r.N <= 0 {
400 return 0
401 }
402 return (float64(r.Bytes) * float64(r.N) / 1e6) / r.T.Seconds()
403 }
404
405
406
407 func (r BenchmarkResult) AllocsPerOp() int64 {
408 if v, ok := r.Extra["allocs/op"]; ok {
409 return int64(v)
410 }
411 if r.N <= 0 {
412 return 0
413 }
414 return int64(r.MemAllocs) / int64(r.N)
415 }
416
417
418
419 func (r BenchmarkResult) AllocedBytesPerOp() int64 {
420 if v, ok := r.Extra["B/op"]; ok {
421 return int64(v)
422 }
423 if r.N <= 0 {
424 return 0
425 }
426 return int64(r.MemBytes) / int64(r.N)
427 }
428
429
430
431
432
433
434
435
436 func (r BenchmarkResult) String() string {
437 buf := new(strings.Builder)
438 fmt.Fprintf(buf, "%8d", r.N)
439
440
441 ns, ok := r.Extra["ns/op"]
442 if !ok {
443 ns = float64(r.T.Nanoseconds()) / float64(r.N)
444 }
445 if ns != 0 {
446 buf.WriteByte('\t')
447 prettyPrint(buf, ns, "ns/op")
448 }
449
450 if mbs := r.mbPerSec(); mbs != 0 {
451 fmt.Fprintf(buf, "\t%7.2f MB/s", mbs)
452 }
453
454
455
456 var extraKeys []string
457 for k := range r.Extra {
458 switch k {
459 case "ns/op", "MB/s", "B/op", "allocs/op":
460
461 continue
462 }
463 extraKeys = append(extraKeys, k)
464 }
465 slices.Sort(extraKeys)
466 for _, k := range extraKeys {
467 buf.WriteByte('\t')
468 prettyPrint(buf, r.Extra[k], k)
469 }
470 return buf.String()
471 }
472
473 func prettyPrint(w io.Writer, x float64, unit string) {
474
475
476
477
478 var format string
479 switch y := math.Abs(x); {
480 case y == 0 || y >= 999.95:
481 format = "%10.0f %s"
482 case y >= 99.995:
483 format = "%12.1f %s"
484 case y >= 9.9995:
485 format = "%13.2f %s"
486 case y >= 0.99995:
487 format = "%14.3f %s"
488 case y >= 0.099995:
489 format = "%15.4f %s"
490 case y >= 0.0099995:
491 format = "%16.5f %s"
492 case y >= 0.00099995:
493 format = "%17.6f %s"
494 default:
495 format = "%18.7f %s"
496 }
497 fmt.Fprintf(w, format, x, unit)
498 }
499
500
501 func (r BenchmarkResult) MemString() string {
502 return fmt.Sprintf("%8d B/op\t%8d allocs/op",
503 r.AllocedBytesPerOp(), r.AllocsPerOp())
504 }
505
506
507 func benchmarkName(name string, n int) string {
508 if n != 1 {
509 return fmt.Sprintf("%s-%d", name, n)
510 }
511 return name
512 }
513
514 type benchState struct {
515 match *matcher
516
517 maxLen int
518 extLen int
519 }
520
521
522
523 func RunBenchmarks(matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) {
524 runBenchmarks("", matchString, benchmarks)
525 }
526
527 func runBenchmarks(importPath string, matchString func(pat, str string) (bool, error), benchmarks []InternalBenchmark) bool {
528
529 if len(*matchBenchmarks) == 0 {
530 return true
531 }
532
533 maxprocs := 1
534 for _, procs := range cpuList {
535 if procs > maxprocs {
536 maxprocs = procs
537 }
538 }
539 bstate := &benchState{
540 match: newMatcher(matchString, *matchBenchmarks, "-test.bench", *skip),
541 extLen: len(benchmarkName("", maxprocs)),
542 }
543 var bs []InternalBenchmark
544 for _, Benchmark := range benchmarks {
545 if _, matched, _ := bstate.match.fullName(nil, Benchmark.Name); matched {
546 bs = append(bs, Benchmark)
547 benchName := benchmarkName(Benchmark.Name, maxprocs)
548 if l := len(benchName) + bstate.extLen + 1; l > bstate.maxLen {
549 bstate.maxLen = l
550 }
551 }
552 }
553 main := &B{
554 common: common{
555 name: "Main",
556 w: os.Stdout,
557 bench: true,
558 },
559 importPath: importPath,
560 benchFunc: func(b *B) {
561 for _, Benchmark := range bs {
562 b.Run(Benchmark.Name, Benchmark.F)
563 }
564 },
565 benchTime: benchTime,
566 bstate: bstate,
567 }
568 if Verbose() {
569 main.chatty = newChattyPrinter(main.w)
570 }
571 main.runN(1)
572 return !main.failed
573 }
574
575
576 func (s *benchState) processBench(b *B) {
577 for i, procs := range cpuList {
578 for j := uint(0); j < *count; j++ {
579 runtime.GOMAXPROCS(procs)
580 benchName := benchmarkName(b.name, procs)
581
582
583 if b.chatty == nil {
584 fmt.Fprintf(b.w, "%-*s\t", s.maxLen, benchName)
585 }
586
587 if i > 0 || j > 0 {
588 b = &B{
589 common: common{
590 signal: make(chan bool),
591 name: b.name,
592 w: b.w,
593 chatty: b.chatty,
594 bench: true,
595 },
596 benchFunc: b.benchFunc,
597 benchTime: b.benchTime,
598 }
599 b.run1()
600 }
601 r := b.doBench()
602 if b.failed {
603
604
605
606 fmt.Fprintf(b.w, "%s--- FAIL: %s\n%s", b.chatty.prefix(), benchName, b.output)
607 continue
608 }
609 results := r.String()
610 if b.chatty != nil {
611 fmt.Fprintf(b.w, "%-*s\t", s.maxLen, benchName)
612 }
613 if *benchmarkMemory || b.showAllocResult {
614 results += "\t" + r.MemString()
615 }
616 fmt.Fprintln(b.w, results)
617
618
619 if len(b.output) > 0 {
620 b.trimOutput()
621 fmt.Fprintf(b.w, "%s--- BENCH: %s\n%s", b.chatty.prefix(), benchName, b.output)
622 }
623 if p := runtime.GOMAXPROCS(-1); p != procs {
624 fmt.Fprintf(os.Stderr, "testing: %s left GOMAXPROCS set to %d\n", benchName, p)
625 }
626 if b.chatty != nil && b.chatty.json {
627 b.chatty.Updatef("", "=== NAME %s\n", "")
628 }
629 }
630 }
631 }
632
633
634
635
636 var hideStdoutForTesting = false
637
638
639
640
641
642
643 func (b *B) Run(name string, f func(b *B)) bool {
644
645
646 b.hasSub.Store(true)
647 benchmarkLock.Unlock()
648 defer benchmarkLock.Lock()
649
650 benchName, ok, partial := b.name, true, false
651 if b.bstate != nil {
652 benchName, ok, partial = b.bstate.match.fullName(&b.common, name)
653 }
654 if !ok {
655 return true
656 }
657 var pc [maxStackLen]uintptr
658 n := runtime.Callers(2, pc[:])
659 sub := &B{
660 common: common{
661 signal: make(chan bool),
662 name: benchName,
663 parent: &b.common,
664 level: b.level + 1,
665 creator: pc[:n],
666 w: b.w,
667 chatty: b.chatty,
668 bench: true,
669 },
670 importPath: b.importPath,
671 benchFunc: f,
672 benchTime: b.benchTime,
673 bstate: b.bstate,
674 }
675 if partial {
676
677
678 sub.hasSub.Store(true)
679 }
680
681 if b.chatty != nil {
682 labelsOnce.Do(func() {
683 fmt.Printf("goos: %s\n", runtime.GOOS)
684 fmt.Printf("goarch: %s\n", runtime.GOARCH)
685 if b.importPath != "" {
686 fmt.Printf("pkg: %s\n", b.importPath)
687 }
688 if cpu := sysinfo.CPUName(); cpu != "" {
689 fmt.Printf("cpu: %s\n", cpu)
690 }
691 })
692
693 if !hideStdoutForTesting {
694 if b.chatty.json {
695 b.chatty.Updatef(benchName, "=== RUN %s\n", benchName)
696 }
697 fmt.Println(benchName)
698 }
699 }
700
701 if sub.run1() {
702 sub.run()
703 }
704 b.add(sub.result)
705 return !sub.failed
706 }
707
708
709
710
711 func (b *B) add(other BenchmarkResult) {
712 r := &b.result
713
714
715 r.N = 1
716 r.T += time.Duration(other.NsPerOp())
717 if other.Bytes == 0 {
718
719
720 b.missingBytes = true
721 r.Bytes = 0
722 }
723 if !b.missingBytes {
724 r.Bytes += other.Bytes
725 }
726 r.MemAllocs += uint64(other.AllocsPerOp())
727 r.MemBytes += uint64(other.AllocedBytesPerOp())
728 }
729
730
731 func (b *B) trimOutput() {
732
733
734
735 const maxNewlines = 10
736 for nlCount, j := 0, 0; j < len(b.output); j++ {
737 if b.output[j] == '\n' {
738 nlCount++
739 if nlCount >= maxNewlines {
740 b.output = append(b.output[:j], "\n\t... [output truncated]\n"...)
741 break
742 }
743 }
744 }
745 }
746
747
748 type PB struct {
749 globalN *atomic.Uint64
750 grain uint64
751 cache uint64
752 bN uint64
753 }
754
755
756 func (pb *PB) Next() bool {
757 if pb.cache == 0 {
758 n := pb.globalN.Add(pb.grain)
759 if n <= pb.bN {
760 pb.cache = pb.grain
761 } else if n < pb.bN+pb.grain {
762 pb.cache = pb.bN + pb.grain - n
763 } else {
764 return false
765 }
766 }
767 pb.cache--
768 return true
769 }
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784 func (b *B) RunParallel(body func(*PB)) {
785 if b.N == 0 {
786 return
787 }
788
789
790
791 grain := uint64(0)
792 if b.previousN > 0 && b.previousDuration > 0 {
793 grain = 1e5 * uint64(b.previousN) / uint64(b.previousDuration)
794 }
795 if grain < 1 {
796 grain = 1
797 }
798
799
800 if grain > 1e4 {
801 grain = 1e4
802 }
803
804 var n atomic.Uint64
805 numProcs := b.parallelism * runtime.GOMAXPROCS(0)
806 var wg sync.WaitGroup
807 wg.Add(numProcs)
808 for p := 0; p < numProcs; p++ {
809 go func() {
810 defer wg.Done()
811 pb := &PB{
812 globalN: &n,
813 grain: grain,
814 bN: uint64(b.N),
815 }
816 body(pb)
817 }()
818 }
819 wg.Wait()
820 if n.Load() <= uint64(b.N) && !b.Failed() {
821 b.Fatal("RunParallel: body exited without pb.Next() == false")
822 }
823 }
824
825
826
827
828 func (b *B) SetParallelism(p int) {
829 if p >= 1 {
830 b.parallelism = p
831 }
832 }
833
834
835
836
837
838
839
840
841
842 func Benchmark(f func(b *B)) BenchmarkResult {
843 b := &B{
844 common: common{
845 signal: make(chan bool),
846 w: discard{},
847 },
848 benchFunc: f,
849 benchTime: benchTime,
850 }
851 if b.run1() {
852 b.run()
853 }
854 return b.result
855 }
856
857 type discard struct{}
858
859 func (discard) Write(b []byte) (n int, err error) { return len(b), nil }
860
View as plain text