Source file src/runtime/arena.go

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Implementation of (safe) user arenas.
     6  //
     7  // This file contains the implementation of user arenas wherein Go values can
     8  // be manually allocated and freed in bulk. The act of manually freeing memory,
     9  // potentially before a GC cycle, means that a garbage collection cycle can be
    10  // delayed, improving efficiency by reducing GC cycle frequency. There are other
    11  // potential efficiency benefits, such as improved locality and access to a more
    12  // efficient allocation strategy.
    13  //
    14  // What makes the arenas here safe is that once they are freed, accessing the
    15  // arena's memory will cause an explicit program fault, and the arena's address
    16  // space will not be reused until no more pointers into it are found. There's one
    17  // exception to this: if an arena allocated memory that isn't exhausted, it's placed
    18  // back into a pool for reuse. This means that a crash is not always guaranteed.
    19  //
    20  // While this may seem unsafe, it still prevents memory corruption, and is in fact
    21  // necessary in order to make new(T) a valid implementation of arenas. Such a property
    22  // is desirable to allow for a trivial implementation. (It also avoids complexities
    23  // that arise from synchronization with the GC when trying to set the arena chunks to
    24  // fault while the GC is active.)
    25  //
    26  // The implementation works in layers. At the bottom, arenas are managed in chunks.
    27  // Each chunk must be a multiple of the heap arena size, or the heap arena size must
    28  // be divisible by the arena chunks. The address space for each chunk, and each
    29  // corresponding heapArena for that address space, are eternally reserved for use as
    30  // arena chunks. That is, they can never be used for the general heap. Each chunk
    31  // is also represented by a single mspan, and is modeled as a single large heap
    32  // allocation. It must be, because each chunk contains ordinary Go values that may
    33  // point into the heap, so it must be scanned just like any other object. Any
    34  // pointer into a chunk will therefore always cause the whole chunk to be scanned
    35  // while its corresponding arena is still live.
    36  //
    37  // Chunks may be allocated either from new memory mapped by the OS on our behalf,
    38  // or by reusing old freed chunks. When chunks are freed, their underlying memory
    39  // is returned to the OS, set to fault on access, and may not be reused until the
    40  // program doesn't point into the chunk anymore (the code refers to this state as
    41  // "quarantined"), a property checked by the GC.
    42  //
    43  // The sweeper handles moving chunks out of this quarantine state to be ready for
    44  // reuse. When the chunk is placed into the quarantine state, its corresponding
    45  // span is marked as noscan so that the GC doesn't try to scan memory that would
    46  // cause a fault.
    47  //
    48  // At the next layer are the user arenas themselves. They consist of a single
    49  // active chunk which new Go values are bump-allocated into and a list of chunks
    50  // that were exhausted when allocating into the arena. Once the arena is freed,
    51  // it frees all full chunks it references, and places the active one onto a reuse
    52  // list for a future arena to use. Each arena keeps its list of referenced chunks
    53  // explicitly live until it is freed. Each user arena also maps to an object which
    54  // has a finalizer attached that ensures the arena's chunks are all freed even if
    55  // the arena itself is never explicitly freed.
    56  //
    57  // Pointer-ful memory is bump-allocated from low addresses to high addresses in each
    58  // chunk, while pointer-free memory is bump-allocated from high address to low
    59  // addresses. The reason for this is to take advantage of a GC optimization wherein
    60  // the GC will stop scanning an object when there are no more pointers in it, which
    61  // also allows us to elide clearing the heap bitmap for pointer-free Go values
    62  // allocated into arenas.
    63  //
    64  // Note that arenas are not safe to use concurrently.
    65  //
    66  // In summary, there are 2 resources: arenas, and arena chunks. They exist in the
    67  // following lifecycle:
    68  //
    69  // (1) A new arena is created via newArena.
    70  // (2) Chunks are allocated to hold memory allocated into the arena with new or slice.
    71  //    (a) Chunks are first allocated from the reuse list of partially-used chunks.
    72  //    (b) If there are no such chunks, then chunks on the ready list are taken.
    73  //    (c) Failing all the above, memory for a new chunk is mapped.
    74  // (3) The arena is freed, or all references to it are dropped, triggering its finalizer.
    75  //    (a) If the GC is not active, exhausted chunks are set to fault and placed on a
    76  //        quarantine list.
    77  //    (b) If the GC is active, exhausted chunks are placed on a fault list and will
    78  //        go through step (a) at a later point in time.
    79  //    (c) Any remaining partially-used chunk is placed on a reuse list.
    80  // (4) Once no more pointers are found into quarantined arena chunks, the sweeper
    81  //     takes these chunks out of quarantine and places them on the ready list.
    82  
    83  package runtime
    84  
    85  import (
    86  	"internal/abi"
    87  	"internal/goarch"
    88  	"internal/runtime/atomic"
    89  	"runtime/internal/math"
    90  	"runtime/internal/sys"
    91  	"unsafe"
    92  )
    93  
    94  // Functions starting with arena_ are meant to be exported to downstream users
    95  // of arenas. They should wrap these functions in a higher-lever API.
    96  //
    97  // The underlying arena and its resources are managed through an opaque unsafe.Pointer.
    98  
    99  // arena_newArena is a wrapper around newUserArena.
   100  //
   101  //go:linkname arena_newArena arena.runtime_arena_newArena
   102  func arena_newArena() unsafe.Pointer {
   103  	return unsafe.Pointer(newUserArena())
   104  }
   105  
   106  // arena_arena_New is a wrapper around (*userArena).new, except that typ
   107  // is an any (must be a *_type, still) and typ must be a type descriptor
   108  // for a pointer to the type to actually be allocated, i.e. pass a *T
   109  // to allocate a T. This is necessary because this function returns a *T.
   110  //
   111  //go:linkname arena_arena_New arena.runtime_arena_arena_New
   112  func arena_arena_New(arena unsafe.Pointer, typ any) any {
   113  	t := (*_type)(efaceOf(&typ).data)
   114  	if t.Kind_&abi.KindMask != abi.Pointer {
   115  		throw("arena_New: non-pointer type")
   116  	}
   117  	te := (*ptrtype)(unsafe.Pointer(t)).Elem
   118  	x := ((*userArena)(arena)).new(te)
   119  	var result any
   120  	e := efaceOf(&result)
   121  	e._type = t
   122  	e.data = x
   123  	return result
   124  }
   125  
   126  // arena_arena_Slice is a wrapper around (*userArena).slice.
   127  //
   128  //go:linkname arena_arena_Slice arena.runtime_arena_arena_Slice
   129  func arena_arena_Slice(arena unsafe.Pointer, slice any, cap int) {
   130  	((*userArena)(arena)).slice(slice, cap)
   131  }
   132  
   133  // arena_arena_Free is a wrapper around (*userArena).free.
   134  //
   135  //go:linkname arena_arena_Free arena.runtime_arena_arena_Free
   136  func arena_arena_Free(arena unsafe.Pointer) {
   137  	((*userArena)(arena)).free()
   138  }
   139  
   140  // arena_heapify takes a value that lives in an arena and makes a copy
   141  // of it on the heap. Values that don't live in an arena are returned unmodified.
   142  //
   143  //go:linkname arena_heapify arena.runtime_arena_heapify
   144  func arena_heapify(s any) any {
   145  	var v unsafe.Pointer
   146  	e := efaceOf(&s)
   147  	t := e._type
   148  	switch t.Kind_ & abi.KindMask {
   149  	case abi.String:
   150  		v = stringStructOf((*string)(e.data)).str
   151  	case abi.Slice:
   152  		v = (*slice)(e.data).array
   153  	case abi.Pointer:
   154  		v = e.data
   155  	default:
   156  		panic("arena: Clone only supports pointers, slices, and strings")
   157  	}
   158  	span := spanOf(uintptr(v))
   159  	if span == nil || !span.isUserArenaChunk {
   160  		// Not stored in a user arena chunk.
   161  		return s
   162  	}
   163  	// Heap-allocate storage for a copy.
   164  	var x any
   165  	switch t.Kind_ & abi.KindMask {
   166  	case abi.String:
   167  		s1 := s.(string)
   168  		s2, b := rawstring(len(s1))
   169  		copy(b, s1)
   170  		x = s2
   171  	case abi.Slice:
   172  		len := (*slice)(e.data).len
   173  		et := (*slicetype)(unsafe.Pointer(t)).Elem
   174  		sl := new(slice)
   175  		*sl = slice{makeslicecopy(et, len, len, (*slice)(e.data).array), len, len}
   176  		xe := efaceOf(&x)
   177  		xe._type = t
   178  		xe.data = unsafe.Pointer(sl)
   179  	case abi.Pointer:
   180  		et := (*ptrtype)(unsafe.Pointer(t)).Elem
   181  		e2 := newobject(et)
   182  		typedmemmove(et, e2, e.data)
   183  		xe := efaceOf(&x)
   184  		xe._type = t
   185  		xe.data = e2
   186  	}
   187  	return x
   188  }
   189  
   190  const (
   191  	// userArenaChunkBytes is the size of a user arena chunk.
   192  	userArenaChunkBytesMax = 8 << 20
   193  	userArenaChunkBytes    = uintptr(int64(userArenaChunkBytesMax-heapArenaBytes)&(int64(userArenaChunkBytesMax-heapArenaBytes)>>63) + heapArenaBytes) // min(userArenaChunkBytesMax, heapArenaBytes)
   194  
   195  	// userArenaChunkPages is the number of pages a user arena chunk uses.
   196  	userArenaChunkPages = userArenaChunkBytes / pageSize
   197  
   198  	// userArenaChunkMaxAllocBytes is the maximum size of an object that can
   199  	// be allocated from an arena. This number is chosen to cap worst-case
   200  	// fragmentation of user arenas to 25%. Larger allocations are redirected
   201  	// to the heap.
   202  	userArenaChunkMaxAllocBytes = userArenaChunkBytes / 4
   203  )
   204  
   205  func init() {
   206  	if userArenaChunkPages*pageSize != userArenaChunkBytes {
   207  		throw("user arena chunk size is not a multiple of the page size")
   208  	}
   209  	if userArenaChunkBytes%physPageSize != 0 {
   210  		throw("user arena chunk size is not a multiple of the physical page size")
   211  	}
   212  	if userArenaChunkBytes < heapArenaBytes {
   213  		if heapArenaBytes%userArenaChunkBytes != 0 {
   214  			throw("user arena chunk size is smaller than a heap arena, but doesn't divide it")
   215  		}
   216  	} else {
   217  		if userArenaChunkBytes%heapArenaBytes != 0 {
   218  			throw("user arena chunks size is larger than a heap arena, but not a multiple")
   219  		}
   220  	}
   221  	lockInit(&userArenaState.lock, lockRankUserArenaState)
   222  }
   223  
   224  // userArenaChunkReserveBytes returns the amount of additional bytes to reserve for
   225  // heap metadata.
   226  func userArenaChunkReserveBytes() uintptr {
   227  	// In the allocation headers experiment, we reserve the end of the chunk for
   228  	// a pointer/scalar bitmap. We also reserve space for a dummy _type that
   229  	// refers to the bitmap. The PtrBytes field of the dummy _type indicates how
   230  	// many of those bits are valid.
   231  	return userArenaChunkBytes/goarch.PtrSize/8 + unsafe.Sizeof(_type{})
   232  }
   233  
   234  type userArena struct {
   235  	// fullList is a list of full chunks that have not enough free memory left, and
   236  	// that we'll free once this user arena is freed.
   237  	//
   238  	// Can't use mSpanList here because it's not-in-heap.
   239  	fullList *mspan
   240  
   241  	// active is the user arena chunk we're currently allocating into.
   242  	active *mspan
   243  
   244  	// refs is a set of references to the arena chunks so that they're kept alive.
   245  	//
   246  	// The last reference in the list always refers to active, while the rest of
   247  	// them correspond to fullList. Specifically, the head of fullList is the
   248  	// second-to-last one, fullList.next is the third-to-last, and so on.
   249  	//
   250  	// In other words, every time a new chunk becomes active, its appended to this
   251  	// list.
   252  	refs []unsafe.Pointer
   253  
   254  	// defunct is true if free has been called on this arena.
   255  	//
   256  	// This is just a best-effort way to discover a concurrent allocation
   257  	// and free. Also used to detect a double-free.
   258  	defunct atomic.Bool
   259  }
   260  
   261  // newUserArena creates a new userArena ready to be used.
   262  func newUserArena() *userArena {
   263  	a := new(userArena)
   264  	SetFinalizer(a, func(a *userArena) {
   265  		// If arena handle is dropped without being freed, then call
   266  		// free on the arena, so the arena chunks are never reclaimed
   267  		// by the garbage collector.
   268  		a.free()
   269  	})
   270  	a.refill()
   271  	return a
   272  }
   273  
   274  // new allocates a new object of the provided type into the arena, and returns
   275  // its pointer.
   276  //
   277  // This operation is not safe to call concurrently with other operations on the
   278  // same arena.
   279  func (a *userArena) new(typ *_type) unsafe.Pointer {
   280  	return a.alloc(typ, -1)
   281  }
   282  
   283  // slice allocates a new slice backing store. slice must be a pointer to a slice
   284  // (i.e. *[]T), because userArenaSlice will update the slice directly.
   285  //
   286  // cap determines the capacity of the slice backing store and must be non-negative.
   287  //
   288  // This operation is not safe to call concurrently with other operations on the
   289  // same arena.
   290  func (a *userArena) slice(sl any, cap int) {
   291  	if cap < 0 {
   292  		panic("userArena.slice: negative cap")
   293  	}
   294  	i := efaceOf(&sl)
   295  	typ := i._type
   296  	if typ.Kind_&abi.KindMask != abi.Pointer {
   297  		panic("slice result of non-ptr type")
   298  	}
   299  	typ = (*ptrtype)(unsafe.Pointer(typ)).Elem
   300  	if typ.Kind_&abi.KindMask != abi.Slice {
   301  		panic("slice of non-ptr-to-slice type")
   302  	}
   303  	typ = (*slicetype)(unsafe.Pointer(typ)).Elem
   304  	// t is now the element type of the slice we want to allocate.
   305  
   306  	*((*slice)(i.data)) = slice{a.alloc(typ, cap), cap, cap}
   307  }
   308  
   309  // free returns the userArena's chunks back to mheap and marks it as defunct.
   310  //
   311  // Must be called at most once for any given arena.
   312  //
   313  // This operation is not safe to call concurrently with other operations on the
   314  // same arena.
   315  func (a *userArena) free() {
   316  	// Check for a double-free.
   317  	if a.defunct.Load() {
   318  		panic("arena double free")
   319  	}
   320  
   321  	// Mark ourselves as defunct.
   322  	a.defunct.Store(true)
   323  	SetFinalizer(a, nil)
   324  
   325  	// Free all the full arenas.
   326  	//
   327  	// The refs on this list are in reverse order from the second-to-last.
   328  	s := a.fullList
   329  	i := len(a.refs) - 2
   330  	for s != nil {
   331  		a.fullList = s.next
   332  		s.next = nil
   333  		freeUserArenaChunk(s, a.refs[i])
   334  		s = a.fullList
   335  		i--
   336  	}
   337  	if a.fullList != nil || i >= 0 {
   338  		// There's still something left on the full list, or we
   339  		// failed to actually iterate over the entire refs list.
   340  		throw("full list doesn't match refs list in length")
   341  	}
   342  
   343  	// Put the active chunk onto the reuse list.
   344  	//
   345  	// Note that active's reference is always the last reference in refs.
   346  	s = a.active
   347  	if s != nil {
   348  		if raceenabled || msanenabled || asanenabled {
   349  			// Don't reuse arenas with sanitizers enabled. We want to catch
   350  			// any use-after-free errors aggressively.
   351  			freeUserArenaChunk(s, a.refs[len(a.refs)-1])
   352  		} else {
   353  			lock(&userArenaState.lock)
   354  			userArenaState.reuse = append(userArenaState.reuse, liveUserArenaChunk{s, a.refs[len(a.refs)-1]})
   355  			unlock(&userArenaState.lock)
   356  		}
   357  	}
   358  	// nil out a.active so that a race with freeing will more likely cause a crash.
   359  	a.active = nil
   360  	a.refs = nil
   361  }
   362  
   363  // alloc reserves space in the current chunk or calls refill and reserves space
   364  // in a new chunk. If cap is negative, the type will be taken literally, otherwise
   365  // it will be considered as an element type for a slice backing store with capacity
   366  // cap.
   367  func (a *userArena) alloc(typ *_type, cap int) unsafe.Pointer {
   368  	s := a.active
   369  	var x unsafe.Pointer
   370  	for {
   371  		x = s.userArenaNextFree(typ, cap)
   372  		if x != nil {
   373  			break
   374  		}
   375  		s = a.refill()
   376  	}
   377  	return x
   378  }
   379  
   380  // refill inserts the current arena chunk onto the full list and obtains a new
   381  // one, either from the partial list or allocating a new one, both from mheap.
   382  func (a *userArena) refill() *mspan {
   383  	// If there's an active chunk, assume it's full.
   384  	s := a.active
   385  	if s != nil {
   386  		if s.userArenaChunkFree.size() > userArenaChunkMaxAllocBytes {
   387  			// It's difficult to tell when we're actually out of memory
   388  			// in a chunk because the allocation that failed may still leave
   389  			// some free space available. However, that amount of free space
   390  			// should never exceed the maximum allocation size.
   391  			throw("wasted too much memory in an arena chunk")
   392  		}
   393  		s.next = a.fullList
   394  		a.fullList = s
   395  		a.active = nil
   396  		s = nil
   397  	}
   398  	var x unsafe.Pointer
   399  
   400  	// Check the partially-used list.
   401  	lock(&userArenaState.lock)
   402  	if len(userArenaState.reuse) > 0 {
   403  		// Pick off the last arena chunk from the list.
   404  		n := len(userArenaState.reuse) - 1
   405  		x = userArenaState.reuse[n].x
   406  		s = userArenaState.reuse[n].mspan
   407  		userArenaState.reuse[n].x = nil
   408  		userArenaState.reuse[n].mspan = nil
   409  		userArenaState.reuse = userArenaState.reuse[:n]
   410  	}
   411  	unlock(&userArenaState.lock)
   412  	if s == nil {
   413  		// Allocate a new one.
   414  		x, s = newUserArenaChunk()
   415  		if s == nil {
   416  			throw("out of memory")
   417  		}
   418  	}
   419  	a.refs = append(a.refs, x)
   420  	a.active = s
   421  	return s
   422  }
   423  
   424  type liveUserArenaChunk struct {
   425  	*mspan // Must represent a user arena chunk.
   426  
   427  	// Reference to mspan.base() to keep the chunk alive.
   428  	x unsafe.Pointer
   429  }
   430  
   431  var userArenaState struct {
   432  	lock mutex
   433  
   434  	// reuse contains a list of partially-used and already-live
   435  	// user arena chunks that can be quickly reused for another
   436  	// arena.
   437  	//
   438  	// Protected by lock.
   439  	reuse []liveUserArenaChunk
   440  
   441  	// fault contains full user arena chunks that need to be faulted.
   442  	//
   443  	// Protected by lock.
   444  	fault []liveUserArenaChunk
   445  }
   446  
   447  // userArenaNextFree reserves space in the user arena for an item of the specified
   448  // type. If cap is not -1, this is for an array of cap elements of type t.
   449  func (s *mspan) userArenaNextFree(typ *_type, cap int) unsafe.Pointer {
   450  	size := typ.Size_
   451  	if cap > 0 {
   452  		if size > ^uintptr(0)/uintptr(cap) {
   453  			// Overflow.
   454  			throw("out of memory")
   455  		}
   456  		size *= uintptr(cap)
   457  	}
   458  	if size == 0 || cap == 0 {
   459  		return unsafe.Pointer(&zerobase)
   460  	}
   461  	if size > userArenaChunkMaxAllocBytes {
   462  		// Redirect allocations that don't fit into a chunk well directly
   463  		// from the heap.
   464  		if cap >= 0 {
   465  			return newarray(typ, cap)
   466  		}
   467  		return newobject(typ)
   468  	}
   469  
   470  	// Prevent preemption as we set up the space for a new object.
   471  	//
   472  	// Act like we're allocating.
   473  	mp := acquirem()
   474  	if mp.mallocing != 0 {
   475  		throw("malloc deadlock")
   476  	}
   477  	if mp.gsignal == getg() {
   478  		throw("malloc during signal")
   479  	}
   480  	mp.mallocing = 1
   481  
   482  	var ptr unsafe.Pointer
   483  	if !typ.Pointers() {
   484  		// Allocate pointer-less objects from the tail end of the chunk.
   485  		v, ok := s.userArenaChunkFree.takeFromBack(size, typ.Align_)
   486  		if ok {
   487  			ptr = unsafe.Pointer(v)
   488  		}
   489  	} else {
   490  		v, ok := s.userArenaChunkFree.takeFromFront(size, typ.Align_)
   491  		if ok {
   492  			ptr = unsafe.Pointer(v)
   493  		}
   494  	}
   495  	if ptr == nil {
   496  		// Failed to allocate.
   497  		mp.mallocing = 0
   498  		releasem(mp)
   499  		return nil
   500  	}
   501  	if s.needzero != 0 {
   502  		throw("arena chunk needs zeroing, but should already be zeroed")
   503  	}
   504  	// Set up heap bitmap and do extra accounting.
   505  	if typ.Pointers() {
   506  		if cap >= 0 {
   507  			userArenaHeapBitsSetSliceType(typ, cap, ptr, s)
   508  		} else {
   509  			userArenaHeapBitsSetType(typ, ptr, s)
   510  		}
   511  		c := getMCache(mp)
   512  		if c == nil {
   513  			throw("mallocgc called without a P or outside bootstrapping")
   514  		}
   515  		if cap > 0 {
   516  			c.scanAlloc += size - (typ.Size_ - typ.PtrBytes)
   517  		} else {
   518  			c.scanAlloc += typ.PtrBytes
   519  		}
   520  	}
   521  
   522  	// Ensure that the stores above that initialize x to
   523  	// type-safe memory and set the heap bits occur before
   524  	// the caller can make ptr observable to the garbage
   525  	// collector. Otherwise, on weakly ordered machines,
   526  	// the garbage collector could follow a pointer to x,
   527  	// but see uninitialized memory or stale heap bits.
   528  	publicationBarrier()
   529  
   530  	mp.mallocing = 0
   531  	releasem(mp)
   532  
   533  	return ptr
   534  }
   535  
   536  // userArenaHeapBitsSetSliceType is the equivalent of heapBitsSetType but for
   537  // Go slice backing store values allocated in a user arena chunk. It sets up the
   538  // heap bitmap for n consecutive values with type typ allocated at address ptr.
   539  func userArenaHeapBitsSetSliceType(typ *_type, n int, ptr unsafe.Pointer, s *mspan) {
   540  	mem, overflow := math.MulUintptr(typ.Size_, uintptr(n))
   541  	if overflow || n < 0 || mem > maxAlloc {
   542  		panic(plainError("runtime: allocation size out of range"))
   543  	}
   544  	for i := 0; i < n; i++ {
   545  		userArenaHeapBitsSetType(typ, add(ptr, uintptr(i)*typ.Size_), s)
   546  	}
   547  }
   548  
   549  // userArenaHeapBitsSetType is the equivalent of heapSetType but for
   550  // non-slice-backing-store Go values allocated in a user arena chunk. It
   551  // sets up the type metadata for the value with type typ allocated at address ptr.
   552  // base is the base address of the arena chunk.
   553  func userArenaHeapBitsSetType(typ *_type, ptr unsafe.Pointer, s *mspan) {
   554  	base := s.base()
   555  	h := s.writeUserArenaHeapBits(uintptr(ptr))
   556  
   557  	p := typ.GCData // start of 1-bit pointer mask (or GC program)
   558  	var gcProgBits uintptr
   559  	if typ.Kind_&abi.KindGCProg != 0 {
   560  		// Expand gc program, using the object itself for storage.
   561  		gcProgBits = runGCProg(addb(p, 4), (*byte)(ptr))
   562  		p = (*byte)(ptr)
   563  	}
   564  	nb := typ.PtrBytes / goarch.PtrSize
   565  
   566  	for i := uintptr(0); i < nb; i += ptrBits {
   567  		k := nb - i
   568  		if k > ptrBits {
   569  			k = ptrBits
   570  		}
   571  		// N.B. On big endian platforms we byte swap the data that we
   572  		// read from GCData, which is always stored in little-endian order
   573  		// by the compiler. writeUserArenaHeapBits handles data in
   574  		// a platform-ordered way for efficiency, but stores back the
   575  		// data in little endian order, since we expose the bitmap through
   576  		// a dummy type.
   577  		h = h.write(s, readUintptr(addb(p, i/8)), k)
   578  	}
   579  	// Note: we call pad here to ensure we emit explicit 0 bits
   580  	// for the pointerless tail of the object. This ensures that
   581  	// there's only a single noMorePtrs mark for the next object
   582  	// to clear. We don't need to do this to clear stale noMorePtrs
   583  	// markers from previous uses because arena chunk pointer bitmaps
   584  	// are always fully cleared when reused.
   585  	h = h.pad(s, typ.Size_-typ.PtrBytes)
   586  	h.flush(s, uintptr(ptr), typ.Size_)
   587  
   588  	if typ.Kind_&abi.KindGCProg != 0 {
   589  		// Zero out temporary ptrmask buffer inside object.
   590  		memclrNoHeapPointers(ptr, (gcProgBits+7)/8)
   591  	}
   592  
   593  	// Update the PtrBytes value in the type information. After this
   594  	// point, the GC will observe the new bitmap.
   595  	s.largeType.PtrBytes = uintptr(ptr) - base + typ.PtrBytes
   596  
   597  	// Double-check that the bitmap was written out correctly.
   598  	const doubleCheck = false
   599  	if doubleCheck {
   600  		doubleCheckHeapPointersInterior(uintptr(ptr), uintptr(ptr), typ.Size_, typ.Size_, typ, &s.largeType, s)
   601  	}
   602  }
   603  
   604  type writeUserArenaHeapBits struct {
   605  	offset uintptr // offset in span that the low bit of mask represents the pointer state of.
   606  	mask   uintptr // some pointer bits starting at the address addr.
   607  	valid  uintptr // number of bits in buf that are valid (including low)
   608  	low    uintptr // number of low-order bits to not overwrite
   609  }
   610  
   611  func (s *mspan) writeUserArenaHeapBits(addr uintptr) (h writeUserArenaHeapBits) {
   612  	offset := addr - s.base()
   613  
   614  	// We start writing bits maybe in the middle of a heap bitmap word.
   615  	// Remember how many bits into the word we started, so we can be sure
   616  	// not to overwrite the previous bits.
   617  	h.low = offset / goarch.PtrSize % ptrBits
   618  
   619  	// round down to heap word that starts the bitmap word.
   620  	h.offset = offset - h.low*goarch.PtrSize
   621  
   622  	// We don't have any bits yet.
   623  	h.mask = 0
   624  	h.valid = h.low
   625  
   626  	return
   627  }
   628  
   629  // write appends the pointerness of the next valid pointer slots
   630  // using the low valid bits of bits. 1=pointer, 0=scalar.
   631  func (h writeUserArenaHeapBits) write(s *mspan, bits, valid uintptr) writeUserArenaHeapBits {
   632  	if h.valid+valid <= ptrBits {
   633  		// Fast path - just accumulate the bits.
   634  		h.mask |= bits << h.valid
   635  		h.valid += valid
   636  		return h
   637  	}
   638  	// Too many bits to fit in this word. Write the current word
   639  	// out and move on to the next word.
   640  
   641  	data := h.mask | bits<<h.valid       // mask for this word
   642  	h.mask = bits >> (ptrBits - h.valid) // leftover for next word
   643  	h.valid += valid - ptrBits           // have h.valid+valid bits, writing ptrBits of them
   644  
   645  	// Flush mask to the memory bitmap.
   646  	idx := h.offset / (ptrBits * goarch.PtrSize)
   647  	m := uintptr(1)<<h.low - 1
   648  	bitmap := s.heapBits()
   649  	bitmap[idx] = bswapIfBigEndian(bswapIfBigEndian(bitmap[idx])&m | data)
   650  	// Note: no synchronization required for this write because
   651  	// the allocator has exclusive access to the page, and the bitmap
   652  	// entries are all for a single page. Also, visibility of these
   653  	// writes is guaranteed by the publication barrier in mallocgc.
   654  
   655  	// Move to next word of bitmap.
   656  	h.offset += ptrBits * goarch.PtrSize
   657  	h.low = 0
   658  	return h
   659  }
   660  
   661  // Add padding of size bytes.
   662  func (h writeUserArenaHeapBits) pad(s *mspan, size uintptr) writeUserArenaHeapBits {
   663  	if size == 0 {
   664  		return h
   665  	}
   666  	words := size / goarch.PtrSize
   667  	for words > ptrBits {
   668  		h = h.write(s, 0, ptrBits)
   669  		words -= ptrBits
   670  	}
   671  	return h.write(s, 0, words)
   672  }
   673  
   674  // Flush the bits that have been written, and add zeros as needed
   675  // to cover the full object [addr, addr+size).
   676  func (h writeUserArenaHeapBits) flush(s *mspan, addr, size uintptr) {
   677  	offset := addr - s.base()
   678  
   679  	// zeros counts the number of bits needed to represent the object minus the
   680  	// number of bits we've already written. This is the number of 0 bits
   681  	// that need to be added.
   682  	zeros := (offset+size-h.offset)/goarch.PtrSize - h.valid
   683  
   684  	// Add zero bits up to the bitmap word boundary
   685  	if zeros > 0 {
   686  		z := ptrBits - h.valid
   687  		if z > zeros {
   688  			z = zeros
   689  		}
   690  		h.valid += z
   691  		zeros -= z
   692  	}
   693  
   694  	// Find word in bitmap that we're going to write.
   695  	bitmap := s.heapBits()
   696  	idx := h.offset / (ptrBits * goarch.PtrSize)
   697  
   698  	// Write remaining bits.
   699  	if h.valid != h.low {
   700  		m := uintptr(1)<<h.low - 1      // don't clear existing bits below "low"
   701  		m |= ^(uintptr(1)<<h.valid - 1) // don't clear existing bits above "valid"
   702  		bitmap[idx] = bswapIfBigEndian(bswapIfBigEndian(bitmap[idx])&m | h.mask)
   703  	}
   704  	if zeros == 0 {
   705  		return
   706  	}
   707  
   708  	// Advance to next bitmap word.
   709  	h.offset += ptrBits * goarch.PtrSize
   710  
   711  	// Continue on writing zeros for the rest of the object.
   712  	// For standard use of the ptr bits this is not required, as
   713  	// the bits are read from the beginning of the object. Some uses,
   714  	// like noscan spans, oblets, bulk write barriers, and cgocheck, might
   715  	// start mid-object, so these writes are still required.
   716  	for {
   717  		// Write zero bits.
   718  		idx := h.offset / (ptrBits * goarch.PtrSize)
   719  		if zeros < ptrBits {
   720  			bitmap[idx] = bswapIfBigEndian(bswapIfBigEndian(bitmap[idx]) &^ (uintptr(1)<<zeros - 1))
   721  			break
   722  		} else if zeros == ptrBits {
   723  			bitmap[idx] = 0
   724  			break
   725  		} else {
   726  			bitmap[idx] = 0
   727  			zeros -= ptrBits
   728  		}
   729  		h.offset += ptrBits * goarch.PtrSize
   730  	}
   731  }
   732  
   733  // bswapIfBigEndian swaps the byte order of the uintptr on goarch.BigEndian platforms,
   734  // and leaves it alone elsewhere.
   735  func bswapIfBigEndian(x uintptr) uintptr {
   736  	if goarch.BigEndian {
   737  		if goarch.PtrSize == 8 {
   738  			return uintptr(sys.Bswap64(uint64(x)))
   739  		}
   740  		return uintptr(sys.Bswap32(uint32(x)))
   741  	}
   742  	return x
   743  }
   744  
   745  // newUserArenaChunk allocates a user arena chunk, which maps to a single
   746  // heap arena and single span. Returns a pointer to the base of the chunk
   747  // (this is really important: we need to keep the chunk alive) and the span.
   748  func newUserArenaChunk() (unsafe.Pointer, *mspan) {
   749  	if gcphase == _GCmarktermination {
   750  		throw("newUserArenaChunk called with gcphase == _GCmarktermination")
   751  	}
   752  
   753  	// Deduct assist credit. Because user arena chunks are modeled as one
   754  	// giant heap object which counts toward heapLive, we're obligated to
   755  	// assist the GC proportionally (and it's worth noting that the arena
   756  	// does represent additional work for the GC, but we also have no idea
   757  	// what that looks like until we actually allocate things into the
   758  	// arena).
   759  	deductAssistCredit(userArenaChunkBytes)
   760  
   761  	// Set mp.mallocing to keep from being preempted by GC.
   762  	mp := acquirem()
   763  	if mp.mallocing != 0 {
   764  		throw("malloc deadlock")
   765  	}
   766  	if mp.gsignal == getg() {
   767  		throw("malloc during signal")
   768  	}
   769  	mp.mallocing = 1
   770  
   771  	// Allocate a new user arena.
   772  	var span *mspan
   773  	systemstack(func() {
   774  		span = mheap_.allocUserArenaChunk()
   775  	})
   776  	if span == nil {
   777  		throw("out of memory")
   778  	}
   779  	x := unsafe.Pointer(span.base())
   780  
   781  	// Allocate black during GC.
   782  	// All slots hold nil so no scanning is needed.
   783  	// This may be racing with GC so do it atomically if there can be
   784  	// a race marking the bit.
   785  	if gcphase != _GCoff {
   786  		gcmarknewobject(span, span.base())
   787  	}
   788  
   789  	if raceenabled {
   790  		// TODO(mknyszek): Track individual objects.
   791  		racemalloc(unsafe.Pointer(span.base()), span.elemsize)
   792  	}
   793  
   794  	if msanenabled {
   795  		// TODO(mknyszek): Track individual objects.
   796  		msanmalloc(unsafe.Pointer(span.base()), span.elemsize)
   797  	}
   798  
   799  	if asanenabled {
   800  		// TODO(mknyszek): Track individual objects.
   801  		rzSize := computeRZlog(span.elemsize)
   802  		span.elemsize -= rzSize
   803  		span.largeType.Size_ = span.elemsize
   804  		rzStart := span.base() + span.elemsize
   805  		span.userArenaChunkFree = makeAddrRange(span.base(), rzStart)
   806  		asanpoison(unsafe.Pointer(rzStart), span.limit-rzStart)
   807  		asanunpoison(unsafe.Pointer(span.base()), span.elemsize)
   808  	}
   809  
   810  	if rate := MemProfileRate; rate > 0 {
   811  		c := getMCache(mp)
   812  		if c == nil {
   813  			throw("newUserArenaChunk called without a P or outside bootstrapping")
   814  		}
   815  		// Note cache c only valid while m acquired; see #47302
   816  		if rate != 1 && userArenaChunkBytes < c.nextSample {
   817  			c.nextSample -= userArenaChunkBytes
   818  		} else {
   819  			profilealloc(mp, unsafe.Pointer(span.base()), userArenaChunkBytes)
   820  		}
   821  	}
   822  	mp.mallocing = 0
   823  	releasem(mp)
   824  
   825  	// Again, because this chunk counts toward heapLive, potentially trigger a GC.
   826  	if t := (gcTrigger{kind: gcTriggerHeap}); t.test() {
   827  		gcStart(t)
   828  	}
   829  
   830  	if debug.malloc {
   831  		if inittrace.active && inittrace.id == getg().goid {
   832  			// Init functions are executed sequentially in a single goroutine.
   833  			inittrace.bytes += uint64(userArenaChunkBytes)
   834  		}
   835  	}
   836  
   837  	// Double-check it's aligned to the physical page size. Based on the current
   838  	// implementation this is trivially true, but it need not be in the future.
   839  	// However, if it's not aligned to the physical page size then we can't properly
   840  	// set it to fault later.
   841  	if uintptr(x)%physPageSize != 0 {
   842  		throw("user arena chunk is not aligned to the physical page size")
   843  	}
   844  
   845  	return x, span
   846  }
   847  
   848  // isUnusedUserArenaChunk indicates that the arena chunk has been set to fault
   849  // and doesn't contain any scannable memory anymore. However, it might still be
   850  // mSpanInUse as it sits on the quarantine list, since it needs to be swept.
   851  //
   852  // This is not safe to execute unless the caller has ownership of the mspan or
   853  // the world is stopped (preemption is prevented while the relevant state changes).
   854  //
   855  // This is really only meant to be used by accounting tests in the runtime to
   856  // distinguish when a span shouldn't be counted (since mSpanInUse might not be
   857  // enough).
   858  func (s *mspan) isUnusedUserArenaChunk() bool {
   859  	return s.isUserArenaChunk && s.spanclass == makeSpanClass(0, true)
   860  }
   861  
   862  // setUserArenaChunkToFault sets the address space for the user arena chunk to fault
   863  // and releases any underlying memory resources.
   864  //
   865  // Must be in a non-preemptible state to ensure the consistency of statistics
   866  // exported to MemStats.
   867  func (s *mspan) setUserArenaChunkToFault() {
   868  	if !s.isUserArenaChunk {
   869  		throw("invalid span in heapArena for user arena")
   870  	}
   871  	if s.npages*pageSize != userArenaChunkBytes {
   872  		throw("span on userArena.faultList has invalid size")
   873  	}
   874  
   875  	// Update the span class to be noscan. What we want to happen is that
   876  	// any pointer into the span keeps it from getting recycled, so we want
   877  	// the mark bit to get set, but we're about to set the address space to fault,
   878  	// so we have to prevent the GC from scanning this memory.
   879  	//
   880  	// It's OK to set it here because (1) a GC isn't in progress, so the scanning code
   881  	// won't make a bad decision, (2) we're currently non-preemptible and in the runtime,
   882  	// so a GC is blocked from starting. We might race with sweeping, which could
   883  	// put it on the "wrong" sweep list, but really don't care because the chunk is
   884  	// treated as a large object span and there's no meaningful difference between scan
   885  	// and noscan large objects in the sweeper. The STW at the start of the GC acts as a
   886  	// barrier for this update.
   887  	s.spanclass = makeSpanClass(0, true)
   888  
   889  	// Actually set the arena chunk to fault, so we'll get dangling pointer errors.
   890  	// sysFault currently uses a method on each OS that forces it to evacuate all
   891  	// memory backing the chunk.
   892  	sysFault(unsafe.Pointer(s.base()), s.npages*pageSize)
   893  
   894  	// Everything on the list is counted as in-use, however sysFault transitions to
   895  	// Reserved, not Prepared, so we skip updating heapFree or heapReleased and just
   896  	// remove the memory from the total altogether; it's just address space now.
   897  	gcController.heapInUse.add(-int64(s.npages * pageSize))
   898  
   899  	// Count this as a free of an object right now as opposed to when
   900  	// the span gets off the quarantine list. The main reason is so that the
   901  	// amount of bytes allocated doesn't exceed how much is counted as
   902  	// "mapped ready," which could cause a deadlock in the pacer.
   903  	gcController.totalFree.Add(int64(s.elemsize))
   904  
   905  	// Update consistent stats to match.
   906  	//
   907  	// We're non-preemptible, so it's safe to update consistent stats (our P
   908  	// won't change out from under us).
   909  	stats := memstats.heapStats.acquire()
   910  	atomic.Xaddint64(&stats.committed, -int64(s.npages*pageSize))
   911  	atomic.Xaddint64(&stats.inHeap, -int64(s.npages*pageSize))
   912  	atomic.Xadd64(&stats.largeFreeCount, 1)
   913  	atomic.Xadd64(&stats.largeFree, int64(s.elemsize))
   914  	memstats.heapStats.release()
   915  
   916  	// This counts as a free, so update heapLive.
   917  	gcController.update(-int64(s.elemsize), 0)
   918  
   919  	// Mark it as free for the race detector.
   920  	if raceenabled {
   921  		racefree(unsafe.Pointer(s.base()), s.elemsize)
   922  	}
   923  
   924  	systemstack(func() {
   925  		// Add the user arena to the quarantine list.
   926  		lock(&mheap_.lock)
   927  		mheap_.userArena.quarantineList.insert(s)
   928  		unlock(&mheap_.lock)
   929  	})
   930  }
   931  
   932  // inUserArenaChunk returns true if p points to a user arena chunk.
   933  func inUserArenaChunk(p uintptr) bool {
   934  	s := spanOf(p)
   935  	if s == nil {
   936  		return false
   937  	}
   938  	return s.isUserArenaChunk
   939  }
   940  
   941  // freeUserArenaChunk releases the user arena represented by s back to the runtime.
   942  //
   943  // x must be a live pointer within s.
   944  //
   945  // The runtime will set the user arena to fault once it's safe (the GC is no longer running)
   946  // and then once the user arena is no longer referenced by the application, will allow it to
   947  // be reused.
   948  func freeUserArenaChunk(s *mspan, x unsafe.Pointer) {
   949  	if !s.isUserArenaChunk {
   950  		throw("span is not for a user arena")
   951  	}
   952  	if s.npages*pageSize != userArenaChunkBytes {
   953  		throw("invalid user arena span size")
   954  	}
   955  
   956  	// Mark the region as free to various sanitizers immediately instead
   957  	// of handling them at sweep time.
   958  	if raceenabled {
   959  		racefree(unsafe.Pointer(s.base()), s.elemsize)
   960  	}
   961  	if msanenabled {
   962  		msanfree(unsafe.Pointer(s.base()), s.elemsize)
   963  	}
   964  	if asanenabled {
   965  		asanpoison(unsafe.Pointer(s.base()), s.elemsize)
   966  	}
   967  
   968  	// Make ourselves non-preemptible as we manipulate state and statistics.
   969  	//
   970  	// Also required by setUserArenaChunksToFault.
   971  	mp := acquirem()
   972  
   973  	// We can only set user arenas to fault if we're in the _GCoff phase.
   974  	if gcphase == _GCoff {
   975  		lock(&userArenaState.lock)
   976  		faultList := userArenaState.fault
   977  		userArenaState.fault = nil
   978  		unlock(&userArenaState.lock)
   979  
   980  		s.setUserArenaChunkToFault()
   981  		for _, lc := range faultList {
   982  			lc.mspan.setUserArenaChunkToFault()
   983  		}
   984  
   985  		// Until the chunks are set to fault, keep them alive via the fault list.
   986  		KeepAlive(x)
   987  		KeepAlive(faultList)
   988  	} else {
   989  		// Put the user arena on the fault list.
   990  		lock(&userArenaState.lock)
   991  		userArenaState.fault = append(userArenaState.fault, liveUserArenaChunk{s, x})
   992  		unlock(&userArenaState.lock)
   993  	}
   994  	releasem(mp)
   995  }
   996  
   997  // allocUserArenaChunk attempts to reuse a free user arena chunk represented
   998  // as a span.
   999  //
  1000  // Must be in a non-preemptible state to ensure the consistency of statistics
  1001  // exported to MemStats.
  1002  //
  1003  // Acquires the heap lock. Must run on the system stack for that reason.
  1004  //
  1005  //go:systemstack
  1006  func (h *mheap) allocUserArenaChunk() *mspan {
  1007  	var s *mspan
  1008  	var base uintptr
  1009  
  1010  	// First check the free list.
  1011  	lock(&h.lock)
  1012  	if !h.userArena.readyList.isEmpty() {
  1013  		s = h.userArena.readyList.first
  1014  		h.userArena.readyList.remove(s)
  1015  		base = s.base()
  1016  	} else {
  1017  		// Free list was empty, so allocate a new arena.
  1018  		hintList := &h.userArena.arenaHints
  1019  		if raceenabled {
  1020  			// In race mode just use the regular heap hints. We might fragment
  1021  			// the address space, but the race detector requires that the heap
  1022  			// is mapped contiguously.
  1023  			hintList = &h.arenaHints
  1024  		}
  1025  		v, size := h.sysAlloc(userArenaChunkBytes, hintList, false)
  1026  		if size%userArenaChunkBytes != 0 {
  1027  			throw("sysAlloc size is not divisible by userArenaChunkBytes")
  1028  		}
  1029  		if size > userArenaChunkBytes {
  1030  			// We got more than we asked for. This can happen if
  1031  			// heapArenaSize > userArenaChunkSize, or if sysAlloc just returns
  1032  			// some extra as a result of trying to find an aligned region.
  1033  			//
  1034  			// Divide it up and put it on the ready list.
  1035  			for i := userArenaChunkBytes; i < size; i += userArenaChunkBytes {
  1036  				s := h.allocMSpanLocked()
  1037  				s.init(uintptr(v)+i, userArenaChunkPages)
  1038  				h.userArena.readyList.insertBack(s)
  1039  			}
  1040  			size = userArenaChunkBytes
  1041  		}
  1042  		base = uintptr(v)
  1043  		if base == 0 {
  1044  			// Out of memory.
  1045  			unlock(&h.lock)
  1046  			return nil
  1047  		}
  1048  		s = h.allocMSpanLocked()
  1049  	}
  1050  	unlock(&h.lock)
  1051  
  1052  	// sysAlloc returns Reserved address space, and any span we're
  1053  	// reusing is set to fault (so, also Reserved), so transition
  1054  	// it to Prepared and then Ready.
  1055  	//
  1056  	// Unlike (*mheap).grow, just map in everything that we
  1057  	// asked for. We're likely going to use it all.
  1058  	sysMap(unsafe.Pointer(base), userArenaChunkBytes, &gcController.heapReleased)
  1059  	sysUsed(unsafe.Pointer(base), userArenaChunkBytes, userArenaChunkBytes)
  1060  
  1061  	// Model the user arena as a heap span for a large object.
  1062  	spc := makeSpanClass(0, false)
  1063  	h.initSpan(s, spanAllocHeap, spc, base, userArenaChunkPages)
  1064  	s.isUserArenaChunk = true
  1065  	s.elemsize -= userArenaChunkReserveBytes()
  1066  	s.limit = s.base() + s.elemsize
  1067  	s.freeindex = 1
  1068  	s.allocCount = 1
  1069  
  1070  	// Account for this new arena chunk memory.
  1071  	gcController.heapInUse.add(int64(userArenaChunkBytes))
  1072  	gcController.heapReleased.add(-int64(userArenaChunkBytes))
  1073  
  1074  	stats := memstats.heapStats.acquire()
  1075  	atomic.Xaddint64(&stats.inHeap, int64(userArenaChunkBytes))
  1076  	atomic.Xaddint64(&stats.committed, int64(userArenaChunkBytes))
  1077  
  1078  	// Model the arena as a single large malloc.
  1079  	atomic.Xadd64(&stats.largeAlloc, int64(s.elemsize))
  1080  	atomic.Xadd64(&stats.largeAllocCount, 1)
  1081  	memstats.heapStats.release()
  1082  
  1083  	// Count the alloc in inconsistent, internal stats.
  1084  	gcController.totalAlloc.Add(int64(s.elemsize))
  1085  
  1086  	// Update heapLive.
  1087  	gcController.update(int64(s.elemsize), 0)
  1088  
  1089  	// This must clear the entire heap bitmap so that it's safe
  1090  	// to allocate noscan data without writing anything out.
  1091  	s.initHeapBits(true)
  1092  
  1093  	// Clear the span preemptively. It's an arena chunk, so let's assume
  1094  	// everything is going to be used.
  1095  	//
  1096  	// This also seems to make a massive difference as to whether or
  1097  	// not Linux decides to back this memory with transparent huge
  1098  	// pages. There's latency involved in this zeroing, but the hugepage
  1099  	// gains are almost always worth it. Note: it's important that we
  1100  	// clear even if it's freshly mapped and we know there's no point
  1101  	// to zeroing as *that* is the critical signal to use huge pages.
  1102  	memclrNoHeapPointers(unsafe.Pointer(s.base()), s.elemsize)
  1103  	s.needzero = 0
  1104  
  1105  	s.freeIndexForScan = 1
  1106  
  1107  	// Set up the range for allocation.
  1108  	s.userArenaChunkFree = makeAddrRange(base, base+s.elemsize)
  1109  
  1110  	// Put the large span in the mcentral swept list so that it's
  1111  	// visible to the background sweeper.
  1112  	h.central[spc].mcentral.fullSwept(h.sweepgen).push(s)
  1113  
  1114  	// Set up an allocation header. Avoid write barriers here because this type
  1115  	// is not a real type, and it exists in an invalid location.
  1116  	*(*uintptr)(unsafe.Pointer(&s.largeType)) = uintptr(unsafe.Pointer(s.limit))
  1117  	*(*uintptr)(unsafe.Pointer(&s.largeType.GCData)) = s.limit + unsafe.Sizeof(_type{})
  1118  	s.largeType.PtrBytes = 0
  1119  	s.largeType.Size_ = s.elemsize
  1120  
  1121  	return s
  1122  }
  1123  

View as plain text