Source file src/runtime/string.go

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package runtime
     6  
     7  import (
     8  	"internal/abi"
     9  	"internal/bytealg"
    10  	"internal/goarch"
    11  	"unsafe"
    12  )
    13  
    14  // The constant is known to the compiler.
    15  // There is no fundamental theory behind this number.
    16  const tmpStringBufSize = 32
    17  
    18  type tmpBuf [tmpStringBufSize]byte
    19  
    20  // concatstrings implements a Go string concatenation x+y+z+...
    21  // The operands are passed in the slice a.
    22  // If buf != nil, the compiler has determined that the result does not
    23  // escape the calling function, so the string data can be stored in buf
    24  // if small enough.
    25  func concatstrings(buf *tmpBuf, a []string) string {
    26  	idx := 0
    27  	l := 0
    28  	count := 0
    29  	for i, x := range a {
    30  		n := len(x)
    31  		if n == 0 {
    32  			continue
    33  		}
    34  		if l+n < l {
    35  			throw("string concatenation too long")
    36  		}
    37  		l += n
    38  		count++
    39  		idx = i
    40  	}
    41  	if count == 0 {
    42  		return ""
    43  	}
    44  
    45  	// If there is just one string and either it is not on the stack
    46  	// or our result does not escape the calling frame (buf != nil),
    47  	// then we can return that string directly.
    48  	if count == 1 && (buf != nil || !stringDataOnStack(a[idx])) {
    49  		return a[idx]
    50  	}
    51  	s, b := rawstringtmp(buf, l)
    52  	for _, x := range a {
    53  		copy(b, x)
    54  		b = b[len(x):]
    55  	}
    56  	return s
    57  }
    58  
    59  func concatstring2(buf *tmpBuf, a0, a1 string) string {
    60  	return concatstrings(buf, []string{a0, a1})
    61  }
    62  
    63  func concatstring3(buf *tmpBuf, a0, a1, a2 string) string {
    64  	return concatstrings(buf, []string{a0, a1, a2})
    65  }
    66  
    67  func concatstring4(buf *tmpBuf, a0, a1, a2, a3 string) string {
    68  	return concatstrings(buf, []string{a0, a1, a2, a3})
    69  }
    70  
    71  func concatstring5(buf *tmpBuf, a0, a1, a2, a3, a4 string) string {
    72  	return concatstrings(buf, []string{a0, a1, a2, a3, a4})
    73  }
    74  
    75  // slicebytetostring converts a byte slice to a string.
    76  // It is inserted by the compiler into generated code.
    77  // ptr is a pointer to the first element of the slice;
    78  // n is the length of the slice.
    79  // Buf is a fixed-size buffer for the result,
    80  // it is not nil if the result does not escape.
    81  //
    82  // slicebytetostring should be an internal detail,
    83  // but widely used packages access it using linkname.
    84  // Notable members of the hall of shame include:
    85  //   - github.com/cloudwego/frugal
    86  //
    87  // Do not remove or change the type signature.
    88  // See go.dev/issue/67401.
    89  //
    90  //go:linkname slicebytetostring
    91  func slicebytetostring(buf *tmpBuf, ptr *byte, n int) string {
    92  	if n == 0 {
    93  		// Turns out to be a relatively common case.
    94  		// Consider that you want to parse out data between parens in "foo()bar",
    95  		// you find the indices and convert the subslice to string.
    96  		return ""
    97  	}
    98  	if raceenabled {
    99  		racereadrangepc(unsafe.Pointer(ptr),
   100  			uintptr(n),
   101  			getcallerpc(),
   102  			abi.FuncPCABIInternal(slicebytetostring))
   103  	}
   104  	if msanenabled {
   105  		msanread(unsafe.Pointer(ptr), uintptr(n))
   106  	}
   107  	if asanenabled {
   108  		asanread(unsafe.Pointer(ptr), uintptr(n))
   109  	}
   110  	if n == 1 {
   111  		p := unsafe.Pointer(&staticuint64s[*ptr])
   112  		if goarch.BigEndian {
   113  			p = add(p, 7)
   114  		}
   115  		return unsafe.String((*byte)(p), 1)
   116  	}
   117  
   118  	var p unsafe.Pointer
   119  	if buf != nil && n <= len(buf) {
   120  		p = unsafe.Pointer(buf)
   121  	} else {
   122  		p = mallocgc(uintptr(n), nil, false)
   123  	}
   124  	memmove(p, unsafe.Pointer(ptr), uintptr(n))
   125  	return unsafe.String((*byte)(p), n)
   126  }
   127  
   128  // stringDataOnStack reports whether the string's data is
   129  // stored on the current goroutine's stack.
   130  func stringDataOnStack(s string) bool {
   131  	ptr := uintptr(unsafe.Pointer(unsafe.StringData(s)))
   132  	stk := getg().stack
   133  	return stk.lo <= ptr && ptr < stk.hi
   134  }
   135  
   136  func rawstringtmp(buf *tmpBuf, l int) (s string, b []byte) {
   137  	if buf != nil && l <= len(buf) {
   138  		b = buf[:l]
   139  		s = slicebytetostringtmp(&b[0], len(b))
   140  	} else {
   141  		s, b = rawstring(l)
   142  	}
   143  	return
   144  }
   145  
   146  // slicebytetostringtmp returns a "string" referring to the actual []byte bytes.
   147  //
   148  // Callers need to ensure that the returned string will not be used after
   149  // the calling goroutine modifies the original slice or synchronizes with
   150  // another goroutine.
   151  //
   152  // The function is only called when instrumenting
   153  // and otherwise intrinsified by the compiler.
   154  //
   155  // Some internal compiler optimizations use this function.
   156  //   - Used for m[T1{... Tn{..., string(k), ...} ...}] and m[string(k)]
   157  //     where k is []byte, T1 to Tn is a nesting of struct and array literals.
   158  //   - Used for "<"+string(b)+">" concatenation where b is []byte.
   159  //   - Used for string(b)=="foo" comparison where b is []byte.
   160  func slicebytetostringtmp(ptr *byte, n int) string {
   161  	if raceenabled && n > 0 {
   162  		racereadrangepc(unsafe.Pointer(ptr),
   163  			uintptr(n),
   164  			getcallerpc(),
   165  			abi.FuncPCABIInternal(slicebytetostringtmp))
   166  	}
   167  	if msanenabled && n > 0 {
   168  		msanread(unsafe.Pointer(ptr), uintptr(n))
   169  	}
   170  	if asanenabled && n > 0 {
   171  		asanread(unsafe.Pointer(ptr), uintptr(n))
   172  	}
   173  	return unsafe.String(ptr, n)
   174  }
   175  
   176  func stringtoslicebyte(buf *tmpBuf, s string) []byte {
   177  	var b []byte
   178  	if buf != nil && len(s) <= len(buf) {
   179  		*buf = tmpBuf{}
   180  		b = buf[:len(s)]
   181  	} else {
   182  		b = rawbyteslice(len(s))
   183  	}
   184  	copy(b, s)
   185  	return b
   186  }
   187  
   188  func stringtoslicerune(buf *[tmpStringBufSize]rune, s string) []rune {
   189  	// two passes.
   190  	// unlike slicerunetostring, no race because strings are immutable.
   191  	n := 0
   192  	for range s {
   193  		n++
   194  	}
   195  
   196  	var a []rune
   197  	if buf != nil && n <= len(buf) {
   198  		*buf = [tmpStringBufSize]rune{}
   199  		a = buf[:n]
   200  	} else {
   201  		a = rawruneslice(n)
   202  	}
   203  
   204  	n = 0
   205  	for _, r := range s {
   206  		a[n] = r
   207  		n++
   208  	}
   209  	return a
   210  }
   211  
   212  func slicerunetostring(buf *tmpBuf, a []rune) string {
   213  	if raceenabled && len(a) > 0 {
   214  		racereadrangepc(unsafe.Pointer(&a[0]),
   215  			uintptr(len(a))*unsafe.Sizeof(a[0]),
   216  			getcallerpc(),
   217  			abi.FuncPCABIInternal(slicerunetostring))
   218  	}
   219  	if msanenabled && len(a) > 0 {
   220  		msanread(unsafe.Pointer(&a[0]), uintptr(len(a))*unsafe.Sizeof(a[0]))
   221  	}
   222  	if asanenabled && len(a) > 0 {
   223  		asanread(unsafe.Pointer(&a[0]), uintptr(len(a))*unsafe.Sizeof(a[0]))
   224  	}
   225  	var dum [4]byte
   226  	size1 := 0
   227  	for _, r := range a {
   228  		size1 += encoderune(dum[:], r)
   229  	}
   230  	s, b := rawstringtmp(buf, size1+3)
   231  	size2 := 0
   232  	for _, r := range a {
   233  		// check for race
   234  		if size2 >= size1 {
   235  			break
   236  		}
   237  		size2 += encoderune(b[size2:], r)
   238  	}
   239  	return s[:size2]
   240  }
   241  
   242  type stringStruct struct {
   243  	str unsafe.Pointer
   244  	len int
   245  }
   246  
   247  // Variant with *byte pointer type for DWARF debugging.
   248  type stringStructDWARF struct {
   249  	str *byte
   250  	len int
   251  }
   252  
   253  func stringStructOf(sp *string) *stringStruct {
   254  	return (*stringStruct)(unsafe.Pointer(sp))
   255  }
   256  
   257  func intstring(buf *[4]byte, v int64) (s string) {
   258  	var b []byte
   259  	if buf != nil {
   260  		b = buf[:]
   261  		s = slicebytetostringtmp(&b[0], len(b))
   262  	} else {
   263  		s, b = rawstring(4)
   264  	}
   265  	if int64(rune(v)) != v {
   266  		v = runeError
   267  	}
   268  	n := encoderune(b, rune(v))
   269  	return s[:n]
   270  }
   271  
   272  // rawstring allocates storage for a new string. The returned
   273  // string and byte slice both refer to the same storage.
   274  // The storage is not zeroed. Callers should use
   275  // b to set the string contents and then drop b.
   276  func rawstring(size int) (s string, b []byte) {
   277  	p := mallocgc(uintptr(size), nil, false)
   278  	return unsafe.String((*byte)(p), size), unsafe.Slice((*byte)(p), size)
   279  }
   280  
   281  // rawbyteslice allocates a new byte slice. The byte slice is not zeroed.
   282  func rawbyteslice(size int) (b []byte) {
   283  	cap := roundupsize(uintptr(size), true)
   284  	p := mallocgc(cap, nil, false)
   285  	if cap != uintptr(size) {
   286  		memclrNoHeapPointers(add(p, uintptr(size)), cap-uintptr(size))
   287  	}
   288  
   289  	*(*slice)(unsafe.Pointer(&b)) = slice{p, size, int(cap)}
   290  	return
   291  }
   292  
   293  // rawruneslice allocates a new rune slice. The rune slice is not zeroed.
   294  func rawruneslice(size int) (b []rune) {
   295  	if uintptr(size) > maxAlloc/4 {
   296  		throw("out of memory")
   297  	}
   298  	mem := roundupsize(uintptr(size)*4, true)
   299  	p := mallocgc(mem, nil, false)
   300  	if mem != uintptr(size)*4 {
   301  		memclrNoHeapPointers(add(p, uintptr(size)*4), mem-uintptr(size)*4)
   302  	}
   303  
   304  	*(*slice)(unsafe.Pointer(&b)) = slice{p, size, int(mem / 4)}
   305  	return
   306  }
   307  
   308  // used by cmd/cgo
   309  func gobytes(p *byte, n int) (b []byte) {
   310  	if n == 0 {
   311  		return make([]byte, 0)
   312  	}
   313  
   314  	if n < 0 || uintptr(n) > maxAlloc {
   315  		panic(errorString("gobytes: length out of range"))
   316  	}
   317  
   318  	bp := mallocgc(uintptr(n), nil, false)
   319  	memmove(bp, unsafe.Pointer(p), uintptr(n))
   320  
   321  	*(*slice)(unsafe.Pointer(&b)) = slice{bp, n, n}
   322  	return
   323  }
   324  
   325  // This is exported via linkname to assembly in syscall (for Plan9) and cgo.
   326  //
   327  //go:linkname gostring
   328  func gostring(p *byte) string {
   329  	l := findnull(p)
   330  	if l == 0 {
   331  		return ""
   332  	}
   333  	s, b := rawstring(l)
   334  	memmove(unsafe.Pointer(&b[0]), unsafe.Pointer(p), uintptr(l))
   335  	return s
   336  }
   337  
   338  // internal_syscall_gostring is a version of gostring for internal/syscall/unix.
   339  //
   340  //go:linkname internal_syscall_gostring internal/syscall/unix.gostring
   341  func internal_syscall_gostring(p *byte) string {
   342  	return gostring(p)
   343  }
   344  
   345  func gostringn(p *byte, l int) string {
   346  	if l == 0 {
   347  		return ""
   348  	}
   349  	s, b := rawstring(l)
   350  	memmove(unsafe.Pointer(&b[0]), unsafe.Pointer(p), uintptr(l))
   351  	return s
   352  }
   353  
   354  const (
   355  	maxUint64 = ^uint64(0)
   356  	maxInt64  = int64(maxUint64 >> 1)
   357  )
   358  
   359  // atoi64 parses an int64 from a string s.
   360  // The bool result reports whether s is a number
   361  // representable by a value of type int64.
   362  func atoi64(s string) (int64, bool) {
   363  	if s == "" {
   364  		return 0, false
   365  	}
   366  
   367  	neg := false
   368  	if s[0] == '-' {
   369  		neg = true
   370  		s = s[1:]
   371  	}
   372  
   373  	un := uint64(0)
   374  	for i := 0; i < len(s); i++ {
   375  		c := s[i]
   376  		if c < '0' || c > '9' {
   377  			return 0, false
   378  		}
   379  		if un > maxUint64/10 {
   380  			// overflow
   381  			return 0, false
   382  		}
   383  		un *= 10
   384  		un1 := un + uint64(c) - '0'
   385  		if un1 < un {
   386  			// overflow
   387  			return 0, false
   388  		}
   389  		un = un1
   390  	}
   391  
   392  	if !neg && un > uint64(maxInt64) {
   393  		return 0, false
   394  	}
   395  	if neg && un > uint64(maxInt64)+1 {
   396  		return 0, false
   397  	}
   398  
   399  	n := int64(un)
   400  	if neg {
   401  		n = -n
   402  	}
   403  
   404  	return n, true
   405  }
   406  
   407  // atoi is like atoi64 but for integers
   408  // that fit into an int.
   409  func atoi(s string) (int, bool) {
   410  	if n, ok := atoi64(s); n == int64(int(n)) {
   411  		return int(n), ok
   412  	}
   413  	return 0, false
   414  }
   415  
   416  // atoi32 is like atoi but for integers
   417  // that fit into an int32.
   418  func atoi32(s string) (int32, bool) {
   419  	if n, ok := atoi64(s); n == int64(int32(n)) {
   420  		return int32(n), ok
   421  	}
   422  	return 0, false
   423  }
   424  
   425  // parseByteCount parses a string that represents a count of bytes.
   426  //
   427  // s must match the following regular expression:
   428  //
   429  //	^[0-9]+(([KMGT]i)?B)?$
   430  //
   431  // In other words, an integer byte count with an optional unit
   432  // suffix. Acceptable suffixes include one of
   433  // - KiB, MiB, GiB, TiB which represent binary IEC/ISO 80000 units, or
   434  // - B, which just represents bytes.
   435  //
   436  // Returns an int64 because that's what its callers want and receive,
   437  // but the result is always non-negative.
   438  func parseByteCount(s string) (int64, bool) {
   439  	// The empty string is not valid.
   440  	if s == "" {
   441  		return 0, false
   442  	}
   443  	// Handle the easy non-suffix case.
   444  	last := s[len(s)-1]
   445  	if last >= '0' && last <= '9' {
   446  		n, ok := atoi64(s)
   447  		if !ok || n < 0 {
   448  			return 0, false
   449  		}
   450  		return n, ok
   451  	}
   452  	// Failing a trailing digit, this must always end in 'B'.
   453  	// Also at this point there must be at least one digit before
   454  	// that B.
   455  	if last != 'B' || len(s) < 2 {
   456  		return 0, false
   457  	}
   458  	// The one before that must always be a digit or 'i'.
   459  	if c := s[len(s)-2]; c >= '0' && c <= '9' {
   460  		// Trivial 'B' suffix.
   461  		n, ok := atoi64(s[:len(s)-1])
   462  		if !ok || n < 0 {
   463  			return 0, false
   464  		}
   465  		return n, ok
   466  	} else if c != 'i' {
   467  		return 0, false
   468  	}
   469  	// Finally, we need at least 4 characters now, for the unit
   470  	// prefix and at least one digit.
   471  	if len(s) < 4 {
   472  		return 0, false
   473  	}
   474  	power := 0
   475  	switch s[len(s)-3] {
   476  	case 'K':
   477  		power = 1
   478  	case 'M':
   479  		power = 2
   480  	case 'G':
   481  		power = 3
   482  	case 'T':
   483  		power = 4
   484  	default:
   485  		// Invalid suffix.
   486  		return 0, false
   487  	}
   488  	m := uint64(1)
   489  	for i := 0; i < power; i++ {
   490  		m *= 1024
   491  	}
   492  	n, ok := atoi64(s[:len(s)-3])
   493  	if !ok || n < 0 {
   494  		return 0, false
   495  	}
   496  	un := uint64(n)
   497  	if un > maxUint64/m {
   498  		// Overflow.
   499  		return 0, false
   500  	}
   501  	un *= m
   502  	if un > uint64(maxInt64) {
   503  		// Overflow.
   504  		return 0, false
   505  	}
   506  	return int64(un), true
   507  }
   508  
   509  //go:nosplit
   510  func findnull(s *byte) int {
   511  	if s == nil {
   512  		return 0
   513  	}
   514  
   515  	// Avoid IndexByteString on Plan 9 because it uses SSE instructions
   516  	// on x86 machines, and those are classified as floating point instructions,
   517  	// which are illegal in a note handler.
   518  	if GOOS == "plan9" {
   519  		p := (*[maxAlloc/2 - 1]byte)(unsafe.Pointer(s))
   520  		l := 0
   521  		for p[l] != 0 {
   522  			l++
   523  		}
   524  		return l
   525  	}
   526  
   527  	// pageSize is the unit we scan at a time looking for NULL.
   528  	// It must be the minimum page size for any architecture Go
   529  	// runs on. It's okay (just a minor performance loss) if the
   530  	// actual system page size is larger than this value.
   531  	const pageSize = 4096
   532  
   533  	offset := 0
   534  	ptr := unsafe.Pointer(s)
   535  	// IndexByteString uses wide reads, so we need to be careful
   536  	// with page boundaries. Call IndexByteString on
   537  	// [ptr, endOfPage) interval.
   538  	safeLen := int(pageSize - uintptr(ptr)%pageSize)
   539  
   540  	for {
   541  		t := *(*string)(unsafe.Pointer(&stringStruct{ptr, safeLen}))
   542  		// Check one page at a time.
   543  		if i := bytealg.IndexByteString(t, 0); i != -1 {
   544  			return offset + i
   545  		}
   546  		// Move to next page
   547  		ptr = unsafe.Pointer(uintptr(ptr) + uintptr(safeLen))
   548  		offset += safeLen
   549  		safeLen = pageSize
   550  	}
   551  }
   552  
   553  func findnullw(s *uint16) int {
   554  	if s == nil {
   555  		return 0
   556  	}
   557  	p := (*[maxAlloc/2/2 - 1]uint16)(unsafe.Pointer(s))
   558  	l := 0
   559  	for p[l] != 0 {
   560  		l++
   561  	}
   562  	return l
   563  }
   564  
   565  //go:nosplit
   566  func gostringnocopy(str *byte) string {
   567  	ss := stringStruct{str: unsafe.Pointer(str), len: findnull(str)}
   568  	s := *(*string)(unsafe.Pointer(&ss))
   569  	return s
   570  }
   571  
   572  func gostringw(strw *uint16) string {
   573  	var buf [8]byte
   574  	str := (*[maxAlloc/2/2 - 1]uint16)(unsafe.Pointer(strw))
   575  	n1 := 0
   576  	for i := 0; str[i] != 0; i++ {
   577  		n1 += encoderune(buf[:], rune(str[i]))
   578  	}
   579  	s, b := rawstring(n1 + 4)
   580  	n2 := 0
   581  	for i := 0; str[i] != 0; i++ {
   582  		// check for race
   583  		if n2 >= n1 {
   584  			break
   585  		}
   586  		n2 += encoderune(b[n2:], rune(str[i]))
   587  	}
   588  	b[n2] = 0 // for luck
   589  	return s[:n2]
   590  }
   591  

View as plain text