Source file src/runtime/tracebuf.go

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Trace buffer management.
     6  
     7  package runtime
     8  
     9  import (
    10  	"internal/runtime/sys"
    11  	"unsafe"
    12  )
    13  
    14  // Maximum number of bytes required to encode uint64 in base-128.
    15  const traceBytesPerNumber = 10
    16  
    17  // traceWriter is the interface for writing all trace data.
    18  //
    19  // This type is passed around as a value, and all of its methods return
    20  // a new traceWriter. This allows for chaining together calls in a fluent-style
    21  // API. This is partly stylistic, and very slightly for performance, since
    22  // the compiler can destructure this value and pass it between calls as
    23  // just regular arguments. However, this style is not load-bearing, and
    24  // we can change it if it's deemed too error-prone.
    25  type traceWriter struct {
    26  	traceLocker
    27  	exp traceExperiment
    28  	*traceBuf
    29  }
    30  
    31  // writer returns an a traceWriter that writes into the current M's stream.
    32  //
    33  // Once this is called, the caller must guard against stack growth until
    34  // end is called on it. Therefore, it's highly recommended to use this
    35  // API in a "fluent" style, for example tl.writer().event(...).end().
    36  // Better yet, callers just looking to write events should use eventWriter
    37  // when possible, which is a much safer wrapper around this function.
    38  //
    39  // nosplit to allow for safe reentrant tracing from stack growth paths.
    40  //
    41  //go:nosplit
    42  func (tl traceLocker) writer() traceWriter {
    43  	if debugTraceReentrancy {
    44  		// Checks that the invariants of this function are being upheld.
    45  		gp := getg()
    46  		if gp == gp.m.curg {
    47  			tl.mp.trace.oldthrowsplit = gp.throwsplit
    48  			gp.throwsplit = true
    49  		}
    50  	}
    51  	return traceWriter{traceLocker: tl, traceBuf: tl.mp.trace.buf[tl.gen%2][traceNoExperiment]}
    52  }
    53  
    54  // unsafeTraceWriter produces a traceWriter that doesn't lock the trace.
    55  //
    56  // It should only be used in contexts where either:
    57  // - Another traceLocker is held.
    58  // - trace.gen is prevented from advancing.
    59  //
    60  // This does not have the same stack growth restrictions as traceLocker.writer.
    61  //
    62  // buf may be nil.
    63  func unsafeTraceWriter(gen uintptr, buf *traceBuf) traceWriter {
    64  	return traceWriter{traceLocker: traceLocker{gen: gen}, traceBuf: buf}
    65  }
    66  
    67  // event writes out the bytes of an event into the event stream.
    68  //
    69  // nosplit because it's part of writing an event for an M, which must not
    70  // have any stack growth.
    71  //
    72  //go:nosplit
    73  func (w traceWriter) event(ev traceEv, args ...traceArg) traceWriter {
    74  	// N.B. Everything in this call must be nosplit to maintain
    75  	// the stack growth related invariants for writing events.
    76  
    77  	// Make sure we have room.
    78  	w, _ = w.ensure(1 + (len(args)+1)*traceBytesPerNumber)
    79  
    80  	// Compute the timestamp diff that we'll put in the trace.
    81  	ts := traceClockNow()
    82  	if ts <= w.traceBuf.lastTime {
    83  		ts = w.traceBuf.lastTime + 1
    84  	}
    85  	tsDiff := uint64(ts - w.traceBuf.lastTime)
    86  	w.traceBuf.lastTime = ts
    87  
    88  	// Write out event.
    89  	w.byte(byte(ev))
    90  	w.varint(tsDiff)
    91  	for _, arg := range args {
    92  		w.varint(uint64(arg))
    93  	}
    94  	return w
    95  }
    96  
    97  // end writes the buffer back into the m.
    98  //
    99  // nosplit because it's part of writing an event for an M, which must not
   100  // have any stack growth.
   101  //
   102  //go:nosplit
   103  func (w traceWriter) end() {
   104  	if w.mp == nil {
   105  		// Tolerate a nil mp. It makes code that creates traceWriters directly
   106  		// less error-prone.
   107  		return
   108  	}
   109  	w.mp.trace.buf[w.gen%2][w.exp] = w.traceBuf
   110  	if debugTraceReentrancy {
   111  		// The writer is no longer live, we can drop throwsplit (if it wasn't
   112  		// already set upon entry).
   113  		gp := getg()
   114  		if gp == gp.m.curg {
   115  			gp.throwsplit = w.mp.trace.oldthrowsplit
   116  		}
   117  	}
   118  }
   119  
   120  // ensure makes sure that at least maxSize bytes are available to write.
   121  //
   122  // Returns whether the buffer was flushed.
   123  //
   124  // nosplit because it's part of writing an event for an M, which must not
   125  // have any stack growth.
   126  //
   127  //go:nosplit
   128  func (w traceWriter) ensure(maxSize int) (traceWriter, bool) {
   129  	refill := w.traceBuf == nil || !w.available(maxSize)
   130  	if refill {
   131  		w = w.refill()
   132  	}
   133  	return w, refill
   134  }
   135  
   136  // flush puts w.traceBuf on the queue of full buffers.
   137  //
   138  // nosplit because it's part of writing an event for an M, which must not
   139  // have any stack growth.
   140  //
   141  //go:nosplit
   142  func (w traceWriter) flush() traceWriter {
   143  	systemstack(func() {
   144  		lock(&trace.lock)
   145  		if w.traceBuf != nil {
   146  			traceBufFlush(w.traceBuf, w.gen)
   147  		}
   148  		unlock(&trace.lock)
   149  	})
   150  	w.traceBuf = nil
   151  	return w
   152  }
   153  
   154  // refill puts w.traceBuf on the queue of full buffers and refresh's w's buffer.
   155  func (w traceWriter) refill() traceWriter {
   156  	systemstack(func() {
   157  		lock(&trace.lock)
   158  		if w.traceBuf != nil {
   159  			traceBufFlush(w.traceBuf, w.gen)
   160  		}
   161  		if trace.empty != nil {
   162  			w.traceBuf = trace.empty
   163  			trace.empty = w.traceBuf.link
   164  			unlock(&trace.lock)
   165  		} else {
   166  			unlock(&trace.lock)
   167  			w.traceBuf = (*traceBuf)(sysAlloc(unsafe.Sizeof(traceBuf{}), &memstats.other_sys))
   168  			if w.traceBuf == nil {
   169  				throw("trace: out of memory")
   170  			}
   171  		}
   172  	})
   173  	// Initialize the buffer.
   174  	ts := traceClockNow()
   175  	if ts <= w.traceBuf.lastTime {
   176  		ts = w.traceBuf.lastTime + 1
   177  	}
   178  	w.traceBuf.lastTime = ts
   179  	w.traceBuf.link = nil
   180  	w.traceBuf.pos = 0
   181  
   182  	// Tolerate a nil mp.
   183  	mID := ^uint64(0)
   184  	if w.mp != nil {
   185  		mID = uint64(w.mp.procid)
   186  	}
   187  
   188  	// Write the buffer's header.
   189  	if w.exp == traceNoExperiment {
   190  		w.byte(byte(traceEvEventBatch))
   191  	} else {
   192  		w.byte(byte(traceEvExperimentalBatch))
   193  		w.byte(byte(w.exp))
   194  	}
   195  	w.varint(uint64(w.gen))
   196  	w.varint(uint64(mID))
   197  	w.varint(uint64(ts))
   198  	w.traceBuf.lenPos = w.varintReserve()
   199  	return w
   200  }
   201  
   202  // traceBufQueue is a FIFO of traceBufs.
   203  type traceBufQueue struct {
   204  	head, tail *traceBuf
   205  }
   206  
   207  // push queues buf into queue of buffers.
   208  func (q *traceBufQueue) push(buf *traceBuf) {
   209  	buf.link = nil
   210  	if q.head == nil {
   211  		q.head = buf
   212  	} else {
   213  		q.tail.link = buf
   214  	}
   215  	q.tail = buf
   216  }
   217  
   218  // pop dequeues from the queue of buffers.
   219  func (q *traceBufQueue) pop() *traceBuf {
   220  	buf := q.head
   221  	if buf == nil {
   222  		return nil
   223  	}
   224  	q.head = buf.link
   225  	if q.head == nil {
   226  		q.tail = nil
   227  	}
   228  	buf.link = nil
   229  	return buf
   230  }
   231  
   232  func (q *traceBufQueue) empty() bool {
   233  	return q.head == nil
   234  }
   235  
   236  // traceBufHeader is per-P tracing buffer.
   237  type traceBufHeader struct {
   238  	link     *traceBuf // in trace.empty/full
   239  	lastTime traceTime // when we wrote the last event
   240  	pos      int       // next write offset in arr
   241  	lenPos   int       // position of batch length value
   242  }
   243  
   244  // traceBuf is per-M tracing buffer.
   245  //
   246  // TODO(mknyszek): Rename traceBuf to traceBatch, since they map 1:1 with event batches.
   247  type traceBuf struct {
   248  	_ sys.NotInHeap
   249  	traceBufHeader
   250  	arr [64<<10 - unsafe.Sizeof(traceBufHeader{})]byte // underlying buffer for traceBufHeader.buf
   251  }
   252  
   253  // byte appends v to buf.
   254  //
   255  // nosplit because it's part of writing an event for an M, which must not
   256  // have any stack growth.
   257  //
   258  //go:nosplit
   259  func (buf *traceBuf) byte(v byte) {
   260  	buf.arr[buf.pos] = v
   261  	buf.pos++
   262  }
   263  
   264  // varint appends v to buf in little-endian-base-128 encoding.
   265  //
   266  // nosplit because it's part of writing an event for an M, which must not
   267  // have any stack growth.
   268  //
   269  //go:nosplit
   270  func (buf *traceBuf) varint(v uint64) {
   271  	pos := buf.pos
   272  	arr := buf.arr[pos : pos+traceBytesPerNumber]
   273  	for i := range arr {
   274  		if v < 0x80 {
   275  			pos += i + 1
   276  			arr[i] = byte(v)
   277  			break
   278  		}
   279  		arr[i] = 0x80 | byte(v)
   280  		v >>= 7
   281  	}
   282  	buf.pos = pos
   283  }
   284  
   285  // varintReserve reserves enough space in buf to hold any varint.
   286  //
   287  // Space reserved this way can be filled in with the varintAt method.
   288  //
   289  // nosplit because it's part of writing an event for an M, which must not
   290  // have any stack growth.
   291  //
   292  //go:nosplit
   293  func (buf *traceBuf) varintReserve() int {
   294  	p := buf.pos
   295  	buf.pos += traceBytesPerNumber
   296  	return p
   297  }
   298  
   299  // stringData appends s's data directly to buf.
   300  //
   301  // nosplit because it's part of writing an event for an M, which must not
   302  // have any stack growth.
   303  //
   304  //go:nosplit
   305  func (buf *traceBuf) stringData(s string) {
   306  	buf.pos += copy(buf.arr[buf.pos:], s)
   307  }
   308  
   309  // nosplit because it's part of writing an event for an M, which must not
   310  // have any stack growth.
   311  //
   312  //go:nosplit
   313  func (buf *traceBuf) available(size int) bool {
   314  	return len(buf.arr)-buf.pos >= size
   315  }
   316  
   317  // varintAt writes varint v at byte position pos in buf. This always
   318  // consumes traceBytesPerNumber bytes. This is intended for when the caller
   319  // needs to reserve space for a varint but can't populate it until later.
   320  // Use varintReserve to reserve this space.
   321  //
   322  // nosplit because it's part of writing an event for an M, which must not
   323  // have any stack growth.
   324  //
   325  //go:nosplit
   326  func (buf *traceBuf) varintAt(pos int, v uint64) {
   327  	for i := 0; i < traceBytesPerNumber; i++ {
   328  		if i < traceBytesPerNumber-1 {
   329  			buf.arr[pos] = 0x80 | byte(v)
   330  		} else {
   331  			buf.arr[pos] = byte(v)
   332  		}
   333  		v >>= 7
   334  		pos++
   335  	}
   336  	if v != 0 {
   337  		throw("v could not fit in traceBytesPerNumber")
   338  	}
   339  }
   340  
   341  // traceBufFlush flushes a trace buffer.
   342  //
   343  // Must run on the system stack because trace.lock must be held.
   344  //
   345  //go:systemstack
   346  func traceBufFlush(buf *traceBuf, gen uintptr) {
   347  	assertLockHeld(&trace.lock)
   348  
   349  	// Write out the non-header length of the batch in the header.
   350  	//
   351  	// Note: the length of the header is not included to make it easier
   352  	// to calculate this value when deserializing and reserializing the
   353  	// trace. Varints can have additional padding of zero bits that is
   354  	// quite difficult to preserve, and if we include the header we
   355  	// force serializers to do more work. Nothing else actually needs
   356  	// padding.
   357  	buf.varintAt(buf.lenPos, uint64(buf.pos-(buf.lenPos+traceBytesPerNumber)))
   358  	trace.full[gen%2].push(buf)
   359  
   360  	// Notify the scheduler that there's work available and that the trace
   361  	// reader should be scheduled.
   362  	if !trace.workAvailable.Load() {
   363  		trace.workAvailable.Store(true)
   364  	}
   365  }
   366  

View as plain text