Source file src/cmd/compile/internal/ssagen/intrinsics.go

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package ssagen
     6  
     7  import (
     8  	"fmt"
     9  	"internal/buildcfg"
    10  
    11  	"cmd/compile/internal/base"
    12  	"cmd/compile/internal/ir"
    13  	"cmd/compile/internal/ssa"
    14  	"cmd/compile/internal/types"
    15  	"cmd/internal/sys"
    16  )
    17  
    18  var intrinsics intrinsicBuilders
    19  
    20  // An intrinsicBuilder converts a call node n into an ssa value that
    21  // implements that call as an intrinsic. args is a list of arguments to the func.
    22  type intrinsicBuilder func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value
    23  
    24  type intrinsicKey struct {
    25  	arch *sys.Arch
    26  	pkg  string
    27  	fn   string
    28  }
    29  
    30  // intrinsicBuildConfig specifies the config to use for intrinsic building.
    31  type intrinsicBuildConfig struct {
    32  	instrumenting bool
    33  
    34  	go386     string
    35  	goamd64   int
    36  	goarm     buildcfg.GoarmFeatures
    37  	goarm64   buildcfg.Goarm64Features
    38  	gomips    string
    39  	gomips64  string
    40  	goppc64   int
    41  	goriscv64 int
    42  }
    43  
    44  type intrinsicBuilders map[intrinsicKey]intrinsicBuilder
    45  
    46  // add adds the intrinsic builder b for pkg.fn for the given architecture.
    47  func (ib intrinsicBuilders) add(arch *sys.Arch, pkg, fn string, b intrinsicBuilder) {
    48  	if _, found := ib[intrinsicKey{arch, pkg, fn}]; found {
    49  		panic(fmt.Sprintf("intrinsic already exists for %v.%v on %v", pkg, fn, arch.Name))
    50  	}
    51  	ib[intrinsicKey{arch, pkg, fn}] = b
    52  }
    53  
    54  // addForArchs adds the intrinsic builder b for pkg.fn for the given architectures.
    55  func (ib intrinsicBuilders) addForArchs(pkg, fn string, b intrinsicBuilder, archs ...*sys.Arch) {
    56  	for _, arch := range archs {
    57  		ib.add(arch, pkg, fn, b)
    58  	}
    59  }
    60  
    61  // addForFamilies does the same as addForArchs but operates on architecture families.
    62  func (ib intrinsicBuilders) addForFamilies(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily) {
    63  	for _, arch := range sys.Archs {
    64  		if arch.InFamily(archFamilies...) {
    65  			intrinsics.add(arch, pkg, fn, b)
    66  		}
    67  	}
    68  }
    69  
    70  // alias aliases pkg.fn to targetPkg.targetFn for all architectures in archs
    71  // for which targetPkg.targetFn already exists.
    72  func (ib intrinsicBuilders) alias(pkg, fn, targetPkg, targetFn string, archs ...*sys.Arch) {
    73  	// TODO(jsing): Consider making this work even if the alias is added
    74  	// before the intrinsic.
    75  	aliased := false
    76  	for _, arch := range archs {
    77  		if b := intrinsics.lookup(arch, targetPkg, targetFn); b != nil {
    78  			intrinsics.add(arch, pkg, fn, b)
    79  			aliased = true
    80  		}
    81  	}
    82  	if !aliased {
    83  		panic(fmt.Sprintf("attempted to alias undefined intrinsic: %s.%s", pkg, fn))
    84  	}
    85  }
    86  
    87  // lookup looks up the intrinsic for a pkg.fn on the specified architecture.
    88  func (ib intrinsicBuilders) lookup(arch *sys.Arch, pkg, fn string) intrinsicBuilder {
    89  	return intrinsics[intrinsicKey{arch, pkg, fn}]
    90  }
    91  
    92  func initIntrinsics(cfg *intrinsicBuildConfig) {
    93  	if cfg == nil {
    94  		cfg = &intrinsicBuildConfig{
    95  			instrumenting: base.Flag.Cfg.Instrumenting,
    96  			go386:         buildcfg.GO386,
    97  			goamd64:       buildcfg.GOAMD64,
    98  			goarm:         buildcfg.GOARM,
    99  			goarm64:       buildcfg.GOARM64,
   100  			gomips:        buildcfg.GOMIPS,
   101  			gomips64:      buildcfg.GOMIPS64,
   102  			goppc64:       buildcfg.GOPPC64,
   103  			goriscv64:     buildcfg.GORISCV64,
   104  		}
   105  	}
   106  	intrinsics = intrinsicBuilders{}
   107  
   108  	var p4 []*sys.Arch
   109  	var p8 []*sys.Arch
   110  	var lwatomics []*sys.Arch
   111  	for _, a := range sys.Archs {
   112  		if a.PtrSize == 4 {
   113  			p4 = append(p4, a)
   114  		} else {
   115  			p8 = append(p8, a)
   116  		}
   117  		if a.Family != sys.PPC64 {
   118  			lwatomics = append(lwatomics, a)
   119  		}
   120  	}
   121  	all := sys.Archs[:]
   122  
   123  	add := func(pkg, fn string, b intrinsicBuilder, archs ...*sys.Arch) {
   124  		intrinsics.addForArchs(pkg, fn, b, archs...)
   125  	}
   126  	addF := func(pkg, fn string, b intrinsicBuilder, archFamilies ...sys.ArchFamily) {
   127  		intrinsics.addForFamilies(pkg, fn, b, archFamilies...)
   128  	}
   129  	alias := func(pkg, fn, pkg2, fn2 string, archs ...*sys.Arch) {
   130  		intrinsics.alias(pkg, fn, pkg2, fn2, archs...)
   131  	}
   132  
   133  	/******** runtime ********/
   134  	if !cfg.instrumenting {
   135  		add("runtime", "slicebytetostringtmp",
   136  			func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   137  				// Compiler frontend optimizations emit OBYTES2STRTMP nodes
   138  				// for the backend instead of slicebytetostringtmp calls
   139  				// when not instrumenting.
   140  				return s.newValue2(ssa.OpStringMake, n.Type(), args[0], args[1])
   141  			},
   142  			all...)
   143  	}
   144  	addF("internal/runtime/math", "MulUintptr",
   145  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   146  			if s.config.PtrSize == 4 {
   147  				return s.newValue2(ssa.OpMul32uover, types.NewTuple(types.Types[types.TUINT], types.Types[types.TUINT]), args[0], args[1])
   148  			}
   149  			return s.newValue2(ssa.OpMul64uover, types.NewTuple(types.Types[types.TUINT], types.Types[types.TUINT]), args[0], args[1])
   150  		},
   151  		sys.AMD64, sys.I386, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.ARM64)
   152  	add("runtime", "KeepAlive",
   153  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   154  			data := s.newValue1(ssa.OpIData, s.f.Config.Types.BytePtr, args[0])
   155  			s.vars[memVar] = s.newValue2(ssa.OpKeepAlive, types.TypeMem, data, s.mem())
   156  			return nil
   157  		},
   158  		all...)
   159  
   160  	addF("runtime", "publicationBarrier",
   161  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   162  			s.vars[memVar] = s.newValue1(ssa.OpPubBarrier, types.TypeMem, s.mem())
   163  			return nil
   164  		},
   165  		sys.ARM64, sys.Loong64, sys.PPC64, sys.RISCV64)
   166  
   167  	/******** internal/runtime/sys ********/
   168  	add("internal/runtime/sys", "GetCallerPC",
   169  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   170  			return s.newValue0(ssa.OpGetCallerPC, s.f.Config.Types.Uintptr)
   171  		},
   172  		all...)
   173  
   174  	add("internal/runtime/sys", "GetCallerSP",
   175  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   176  			return s.newValue1(ssa.OpGetCallerSP, s.f.Config.Types.Uintptr, s.mem())
   177  		},
   178  		all...)
   179  
   180  	add("internal/runtime/sys", "GetClosurePtr",
   181  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   182  			return s.newValue0(ssa.OpGetClosurePtr, s.f.Config.Types.Uintptr)
   183  		},
   184  		all...)
   185  
   186  	brev_arch := []sys.ArchFamily{sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X}
   187  	if cfg.goppc64 >= 10 {
   188  		// Use only on Power10 as the new byte reverse instructions that Power10 provide
   189  		// make it worthwhile as an intrinsic
   190  		brev_arch = append(brev_arch, sys.PPC64)
   191  	}
   192  	addF("internal/runtime/sys", "Bswap32",
   193  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   194  			return s.newValue1(ssa.OpBswap32, types.Types[types.TUINT32], args[0])
   195  		},
   196  		brev_arch...)
   197  	addF("internal/runtime/sys", "Bswap64",
   198  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   199  			return s.newValue1(ssa.OpBswap64, types.Types[types.TUINT64], args[0])
   200  		},
   201  		brev_arch...)
   202  
   203  	/****** Prefetch ******/
   204  	makePrefetchFunc := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   205  		return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   206  			s.vars[memVar] = s.newValue2(op, types.TypeMem, args[0], s.mem())
   207  			return nil
   208  		}
   209  	}
   210  
   211  	// Make Prefetch intrinsics for supported platforms
   212  	// On the unsupported platforms stub function will be eliminated
   213  	addF("internal/runtime/sys", "Prefetch", makePrefetchFunc(ssa.OpPrefetchCache),
   214  		sys.AMD64, sys.ARM64, sys.PPC64)
   215  	addF("internal/runtime/sys", "PrefetchStreamed", makePrefetchFunc(ssa.OpPrefetchCacheStreamed),
   216  		sys.AMD64, sys.ARM64, sys.PPC64)
   217  
   218  	/******** internal/runtime/atomic ********/
   219  	type atomicOpEmitter func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool)
   220  
   221  	addF("internal/runtime/atomic", "Load",
   222  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   223  			v := s.newValue2(ssa.OpAtomicLoad32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], s.mem())
   224  			s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
   225  			return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v)
   226  		},
   227  		sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
   228  	addF("internal/runtime/atomic", "Load8",
   229  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   230  			v := s.newValue2(ssa.OpAtomicLoad8, types.NewTuple(types.Types[types.TUINT8], types.TypeMem), args[0], s.mem())
   231  			s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
   232  			return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT8], v)
   233  		},
   234  		sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
   235  	addF("internal/runtime/atomic", "Load64",
   236  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   237  			v := s.newValue2(ssa.OpAtomicLoad64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], s.mem())
   238  			s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
   239  			return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v)
   240  		},
   241  		sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
   242  	addF("internal/runtime/atomic", "LoadAcq",
   243  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   244  			v := s.newValue2(ssa.OpAtomicLoadAcq32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], s.mem())
   245  			s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
   246  			return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v)
   247  		},
   248  		sys.PPC64)
   249  	addF("internal/runtime/atomic", "LoadAcq64",
   250  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   251  			v := s.newValue2(ssa.OpAtomicLoadAcq64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], s.mem())
   252  			s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
   253  			return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v)
   254  		},
   255  		sys.PPC64)
   256  	addF("internal/runtime/atomic", "Loadp",
   257  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   258  			v := s.newValue2(ssa.OpAtomicLoadPtr, types.NewTuple(s.f.Config.Types.BytePtr, types.TypeMem), args[0], s.mem())
   259  			s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
   260  			return s.newValue1(ssa.OpSelect0, s.f.Config.Types.BytePtr, v)
   261  		},
   262  		sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
   263  
   264  	addF("internal/runtime/atomic", "Store",
   265  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   266  			s.vars[memVar] = s.newValue3(ssa.OpAtomicStore32, types.TypeMem, args[0], args[1], s.mem())
   267  			return nil
   268  		},
   269  		sys.AMD64, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
   270  	addF("internal/runtime/atomic", "Store8",
   271  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   272  			s.vars[memVar] = s.newValue3(ssa.OpAtomicStore8, types.TypeMem, args[0], args[1], s.mem())
   273  			return nil
   274  		},
   275  		sys.AMD64, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
   276  	addF("internal/runtime/atomic", "Store64",
   277  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   278  			s.vars[memVar] = s.newValue3(ssa.OpAtomicStore64, types.TypeMem, args[0], args[1], s.mem())
   279  			return nil
   280  		},
   281  		sys.AMD64, sys.ARM64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
   282  	addF("internal/runtime/atomic", "StorepNoWB",
   283  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   284  			s.vars[memVar] = s.newValue3(ssa.OpAtomicStorePtrNoWB, types.TypeMem, args[0], args[1], s.mem())
   285  			return nil
   286  		},
   287  		sys.AMD64, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.RISCV64, sys.S390X)
   288  	addF("internal/runtime/atomic", "StoreRel",
   289  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   290  			s.vars[memVar] = s.newValue3(ssa.OpAtomicStoreRel32, types.TypeMem, args[0], args[1], s.mem())
   291  			return nil
   292  		},
   293  		sys.PPC64)
   294  	addF("internal/runtime/atomic", "StoreRel64",
   295  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   296  			s.vars[memVar] = s.newValue3(ssa.OpAtomicStoreRel64, types.TypeMem, args[0], args[1], s.mem())
   297  			return nil
   298  		},
   299  		sys.PPC64)
   300  
   301  	makeAtomicGuardedIntrinsicLoong64common := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter, needReturn bool) intrinsicBuilder {
   302  		return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   303  			// Target Atomic feature is identified by dynamic detection
   304  			addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.Loong64HasLAM_BH, s.sb)
   305  			v := s.load(types.Types[types.TBOOL], addr)
   306  			b := s.endBlock()
   307  			b.Kind = ssa.BlockIf
   308  			b.SetControl(v)
   309  			bTrue := s.f.NewBlock(ssa.BlockPlain)
   310  			bFalse := s.f.NewBlock(ssa.BlockPlain)
   311  			bEnd := s.f.NewBlock(ssa.BlockPlain)
   312  			b.AddEdgeTo(bTrue)
   313  			b.AddEdgeTo(bFalse)
   314  			b.Likely = ssa.BranchLikely
   315  
   316  			// We have atomic instructions - use it directly.
   317  			s.startBlock(bTrue)
   318  			emit(s, n, args, op1, typ, needReturn)
   319  			s.endBlock().AddEdgeTo(bEnd)
   320  
   321  			// Use original instruction sequence.
   322  			s.startBlock(bFalse)
   323  			emit(s, n, args, op0, typ, needReturn)
   324  			s.endBlock().AddEdgeTo(bEnd)
   325  
   326  			// Merge results.
   327  			s.startBlock(bEnd)
   328  
   329  			if needReturn {
   330  				return s.variable(n, types.Types[typ])
   331  			} else {
   332  				return nil
   333  			}
   334  		}
   335  	}
   336  
   337  	makeAtomicStoreGuardedIntrinsicLoong64 := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter) intrinsicBuilder {
   338  		return makeAtomicGuardedIntrinsicLoong64common(op0, op1, typ, emit, false)
   339  	}
   340  
   341  	atomicStoreEmitterLoong64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) {
   342  		v := s.newValue3(op, types.NewTuple(types.Types[typ], types.TypeMem), args[0], args[1], s.mem())
   343  		s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
   344  		if needReturn {
   345  			s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
   346  		}
   347  	}
   348  
   349  	addF("internal/runtime/atomic", "Store8",
   350  		makeAtomicStoreGuardedIntrinsicLoong64(ssa.OpAtomicStore8, ssa.OpAtomicStore8Variant, types.TUINT8, atomicStoreEmitterLoong64),
   351  		sys.Loong64)
   352  	addF("internal/runtime/atomic", "Store",
   353  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   354  			s.vars[memVar] = s.newValue3(ssa.OpAtomicStore32Variant, types.TypeMem, args[0], args[1], s.mem())
   355  			return nil
   356  		},
   357  		sys.Loong64)
   358  	addF("internal/runtime/atomic", "Store64",
   359  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   360  			s.vars[memVar] = s.newValue3(ssa.OpAtomicStore64Variant, types.TypeMem, args[0], args[1], s.mem())
   361  			return nil
   362  		},
   363  		sys.Loong64)
   364  
   365  	addF("internal/runtime/atomic", "Xchg8",
   366  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   367  			v := s.newValue3(ssa.OpAtomicExchange8, types.NewTuple(types.Types[types.TUINT8], types.TypeMem), args[0], args[1], s.mem())
   368  			s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
   369  			return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT8], v)
   370  		},
   371  		sys.AMD64, sys.PPC64)
   372  	addF("internal/runtime/atomic", "Xchg",
   373  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   374  			v := s.newValue3(ssa.OpAtomicExchange32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem())
   375  			s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
   376  			return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v)
   377  		},
   378  		sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
   379  	addF("internal/runtime/atomic", "Xchg64",
   380  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   381  			v := s.newValue3(ssa.OpAtomicExchange64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem())
   382  			s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
   383  			return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v)
   384  		},
   385  		sys.AMD64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
   386  
   387  	makeAtomicGuardedIntrinsicARM64common := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter, needReturn bool) intrinsicBuilder {
   388  
   389  		return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   390  			if cfg.goarm64.LSE {
   391  				emit(s, n, args, op1, typ, needReturn)
   392  			} else {
   393  				// Target Atomic feature is identified by dynamic detection
   394  				addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.ARM64HasATOMICS, s.sb)
   395  				v := s.load(types.Types[types.TBOOL], addr)
   396  				b := s.endBlock()
   397  				b.Kind = ssa.BlockIf
   398  				b.SetControl(v)
   399  				bTrue := s.f.NewBlock(ssa.BlockPlain)
   400  				bFalse := s.f.NewBlock(ssa.BlockPlain)
   401  				bEnd := s.f.NewBlock(ssa.BlockPlain)
   402  				b.AddEdgeTo(bTrue)
   403  				b.AddEdgeTo(bFalse)
   404  				b.Likely = ssa.BranchLikely
   405  
   406  				// We have atomic instructions - use it directly.
   407  				s.startBlock(bTrue)
   408  				emit(s, n, args, op1, typ, needReturn)
   409  				s.endBlock().AddEdgeTo(bEnd)
   410  
   411  				// Use original instruction sequence.
   412  				s.startBlock(bFalse)
   413  				emit(s, n, args, op0, typ, needReturn)
   414  				s.endBlock().AddEdgeTo(bEnd)
   415  
   416  				// Merge results.
   417  				s.startBlock(bEnd)
   418  			}
   419  			if needReturn {
   420  				return s.variable(n, types.Types[typ])
   421  			} else {
   422  				return nil
   423  			}
   424  		}
   425  	}
   426  	makeAtomicGuardedIntrinsicARM64 := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter) intrinsicBuilder {
   427  		return makeAtomicGuardedIntrinsicARM64common(op0, op1, typ, emit, true)
   428  	}
   429  	makeAtomicGuardedIntrinsicARM64old := func(op0, op1 ssa.Op, typ types.Kind, emit atomicOpEmitter) intrinsicBuilder {
   430  		return makeAtomicGuardedIntrinsicARM64common(op0, op1, typ, emit, false)
   431  	}
   432  
   433  	atomicEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) {
   434  		v := s.newValue3(op, types.NewTuple(types.Types[typ], types.TypeMem), args[0], args[1], s.mem())
   435  		s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
   436  		if needReturn {
   437  			s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
   438  		}
   439  	}
   440  	addF("internal/runtime/atomic", "Xchg8",
   441  		makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange8, ssa.OpAtomicExchange8Variant, types.TUINT8, atomicEmitterARM64),
   442  		sys.ARM64)
   443  	addF("internal/runtime/atomic", "Xchg",
   444  		makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange32, ssa.OpAtomicExchange32Variant, types.TUINT32, atomicEmitterARM64),
   445  		sys.ARM64)
   446  	addF("internal/runtime/atomic", "Xchg64",
   447  		makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange64, ssa.OpAtomicExchange64Variant, types.TUINT64, atomicEmitterARM64),
   448  		sys.ARM64)
   449  
   450  	addF("internal/runtime/atomic", "Xadd",
   451  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   452  			v := s.newValue3(ssa.OpAtomicAdd32, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem())
   453  			s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
   454  			return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT32], v)
   455  		},
   456  		sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
   457  	addF("internal/runtime/atomic", "Xadd64",
   458  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   459  			v := s.newValue3(ssa.OpAtomicAdd64, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem())
   460  			s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
   461  			return s.newValue1(ssa.OpSelect0, types.Types[types.TUINT64], v)
   462  		},
   463  		sys.AMD64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
   464  
   465  	addF("internal/runtime/atomic", "Xadd",
   466  		makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd32, ssa.OpAtomicAdd32Variant, types.TUINT32, atomicEmitterARM64),
   467  		sys.ARM64)
   468  	addF("internal/runtime/atomic", "Xadd64",
   469  		makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd64, ssa.OpAtomicAdd64Variant, types.TUINT64, atomicEmitterARM64),
   470  		sys.ARM64)
   471  
   472  	addF("internal/runtime/atomic", "Cas",
   473  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   474  			v := s.newValue4(ssa.OpAtomicCompareAndSwap32, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
   475  			s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
   476  			return s.newValue1(ssa.OpSelect0, types.Types[types.TBOOL], v)
   477  		},
   478  		sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
   479  	addF("internal/runtime/atomic", "Cas64",
   480  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   481  			v := s.newValue4(ssa.OpAtomicCompareAndSwap64, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
   482  			s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
   483  			return s.newValue1(ssa.OpSelect0, types.Types[types.TBOOL], v)
   484  		},
   485  		sys.AMD64, sys.Loong64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
   486  	addF("internal/runtime/atomic", "CasRel",
   487  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   488  			v := s.newValue4(ssa.OpAtomicCompareAndSwap32, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
   489  			s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
   490  			return s.newValue1(ssa.OpSelect0, types.Types[types.TBOOL], v)
   491  		},
   492  		sys.PPC64)
   493  
   494  	atomicCasEmitterARM64 := func(s *state, n *ir.CallExpr, args []*ssa.Value, op ssa.Op, typ types.Kind, needReturn bool) {
   495  		v := s.newValue4(op, types.NewTuple(types.Types[types.TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem())
   496  		s.vars[memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v)
   497  		if needReturn {
   498  			s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v)
   499  		}
   500  	}
   501  
   502  	addF("internal/runtime/atomic", "Cas",
   503  		makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap32, ssa.OpAtomicCompareAndSwap32Variant, types.TBOOL, atomicCasEmitterARM64),
   504  		sys.ARM64)
   505  	addF("internal/runtime/atomic", "Cas64",
   506  		makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap64, ssa.OpAtomicCompareAndSwap64Variant, types.TBOOL, atomicCasEmitterARM64),
   507  		sys.ARM64)
   508  
   509  	// Old-style atomic logical operation API (all supported archs except arm64).
   510  	addF("internal/runtime/atomic", "And8",
   511  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   512  			s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd8, types.TypeMem, args[0], args[1], s.mem())
   513  			return nil
   514  		},
   515  		sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
   516  	addF("internal/runtime/atomic", "And",
   517  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   518  			s.vars[memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem())
   519  			return nil
   520  		},
   521  		sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
   522  	addF("internal/runtime/atomic", "Or8",
   523  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   524  			s.vars[memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem())
   525  			return nil
   526  		},
   527  		sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
   528  	addF("internal/runtime/atomic", "Or",
   529  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   530  			s.vars[memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem())
   531  			return nil
   532  		},
   533  		sys.AMD64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X)
   534  
   535  	// arm64 always uses the new-style atomic logical operations, for both the
   536  	// old and new style API.
   537  	addF("internal/runtime/atomic", "And8",
   538  		makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicAnd8value, ssa.OpAtomicAnd8valueVariant, types.TUINT8, atomicEmitterARM64),
   539  		sys.ARM64)
   540  	addF("internal/runtime/atomic", "Or8",
   541  		makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicOr8value, ssa.OpAtomicOr8valueVariant, types.TUINT8, atomicEmitterARM64),
   542  		sys.ARM64)
   543  	addF("internal/runtime/atomic", "And64",
   544  		makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd64value, ssa.OpAtomicAnd64valueVariant, types.TUINT64, atomicEmitterARM64),
   545  		sys.ARM64)
   546  	addF("internal/runtime/atomic", "And32",
   547  		makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd32value, ssa.OpAtomicAnd32valueVariant, types.TUINT32, atomicEmitterARM64),
   548  		sys.ARM64)
   549  	addF("internal/runtime/atomic", "And",
   550  		makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicAnd32value, ssa.OpAtomicAnd32valueVariant, types.TUINT32, atomicEmitterARM64),
   551  		sys.ARM64)
   552  	addF("internal/runtime/atomic", "Or64",
   553  		makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr64value, ssa.OpAtomicOr64valueVariant, types.TUINT64, atomicEmitterARM64),
   554  		sys.ARM64)
   555  	addF("internal/runtime/atomic", "Or32",
   556  		makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr32value, ssa.OpAtomicOr32valueVariant, types.TUINT32, atomicEmitterARM64),
   557  		sys.ARM64)
   558  	addF("internal/runtime/atomic", "Or",
   559  		makeAtomicGuardedIntrinsicARM64old(ssa.OpAtomicOr32value, ssa.OpAtomicOr32valueVariant, types.TUINT32, atomicEmitterARM64),
   560  		sys.ARM64)
   561  
   562  	// New-style atomic logical operations, which return the old memory value.
   563  	addF("internal/runtime/atomic", "And64",
   564  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   565  			v := s.newValue3(ssa.OpAtomicAnd64value, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem())
   566  			p0, p1 := s.split(v)
   567  			s.vars[memVar] = p1
   568  			return p0
   569  		},
   570  		sys.AMD64, sys.Loong64)
   571  	addF("internal/runtime/atomic", "And32",
   572  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   573  			v := s.newValue3(ssa.OpAtomicAnd32value, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem())
   574  			p0, p1 := s.split(v)
   575  			s.vars[memVar] = p1
   576  			return p0
   577  		},
   578  		sys.AMD64, sys.Loong64)
   579  	addF("internal/runtime/atomic", "Or64",
   580  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   581  			v := s.newValue3(ssa.OpAtomicOr64value, types.NewTuple(types.Types[types.TUINT64], types.TypeMem), args[0], args[1], s.mem())
   582  			p0, p1 := s.split(v)
   583  			s.vars[memVar] = p1
   584  			return p0
   585  		},
   586  		sys.AMD64, sys.Loong64)
   587  	addF("internal/runtime/atomic", "Or32",
   588  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   589  			v := s.newValue3(ssa.OpAtomicOr32value, types.NewTuple(types.Types[types.TUINT32], types.TypeMem), args[0], args[1], s.mem())
   590  			p0, p1 := s.split(v)
   591  			s.vars[memVar] = p1
   592  			return p0
   593  		},
   594  		sys.AMD64, sys.Loong64)
   595  
   596  	// Aliases for atomic load operations
   597  	alias("internal/runtime/atomic", "Loadint32", "internal/runtime/atomic", "Load", all...)
   598  	alias("internal/runtime/atomic", "Loadint64", "internal/runtime/atomic", "Load64", all...)
   599  	alias("internal/runtime/atomic", "Loaduintptr", "internal/runtime/atomic", "Load", p4...)
   600  	alias("internal/runtime/atomic", "Loaduintptr", "internal/runtime/atomic", "Load64", p8...)
   601  	alias("internal/runtime/atomic", "Loaduint", "internal/runtime/atomic", "Load", p4...)
   602  	alias("internal/runtime/atomic", "Loaduint", "internal/runtime/atomic", "Load64", p8...)
   603  	alias("internal/runtime/atomic", "LoadAcq", "internal/runtime/atomic", "Load", lwatomics...)
   604  	alias("internal/runtime/atomic", "LoadAcq64", "internal/runtime/atomic", "Load64", lwatomics...)
   605  	alias("internal/runtime/atomic", "LoadAcquintptr", "internal/runtime/atomic", "LoadAcq", p4...)
   606  	alias("sync", "runtime_LoadAcquintptr", "internal/runtime/atomic", "LoadAcq", p4...) // linknamed
   607  	alias("internal/runtime/atomic", "LoadAcquintptr", "internal/runtime/atomic", "LoadAcq64", p8...)
   608  	alias("sync", "runtime_LoadAcquintptr", "internal/runtime/atomic", "LoadAcq64", p8...) // linknamed
   609  
   610  	// Aliases for atomic store operations
   611  	alias("internal/runtime/atomic", "Storeint32", "internal/runtime/atomic", "Store", all...)
   612  	alias("internal/runtime/atomic", "Storeint64", "internal/runtime/atomic", "Store64", all...)
   613  	alias("internal/runtime/atomic", "Storeuintptr", "internal/runtime/atomic", "Store", p4...)
   614  	alias("internal/runtime/atomic", "Storeuintptr", "internal/runtime/atomic", "Store64", p8...)
   615  	alias("internal/runtime/atomic", "StoreRel", "internal/runtime/atomic", "Store", lwatomics...)
   616  	alias("internal/runtime/atomic", "StoreRel64", "internal/runtime/atomic", "Store64", lwatomics...)
   617  	alias("internal/runtime/atomic", "StoreReluintptr", "internal/runtime/atomic", "StoreRel", p4...)
   618  	alias("sync", "runtime_StoreReluintptr", "internal/runtime/atomic", "StoreRel", p4...) // linknamed
   619  	alias("internal/runtime/atomic", "StoreReluintptr", "internal/runtime/atomic", "StoreRel64", p8...)
   620  	alias("sync", "runtime_StoreReluintptr", "internal/runtime/atomic", "StoreRel64", p8...) // linknamed
   621  
   622  	// Aliases for atomic swap operations
   623  	alias("internal/runtime/atomic", "Xchgint32", "internal/runtime/atomic", "Xchg", all...)
   624  	alias("internal/runtime/atomic", "Xchgint64", "internal/runtime/atomic", "Xchg64", all...)
   625  	alias("internal/runtime/atomic", "Xchguintptr", "internal/runtime/atomic", "Xchg", p4...)
   626  	alias("internal/runtime/atomic", "Xchguintptr", "internal/runtime/atomic", "Xchg64", p8...)
   627  
   628  	// Aliases for atomic add operations
   629  	alias("internal/runtime/atomic", "Xaddint32", "internal/runtime/atomic", "Xadd", all...)
   630  	alias("internal/runtime/atomic", "Xaddint64", "internal/runtime/atomic", "Xadd64", all...)
   631  	alias("internal/runtime/atomic", "Xadduintptr", "internal/runtime/atomic", "Xadd", p4...)
   632  	alias("internal/runtime/atomic", "Xadduintptr", "internal/runtime/atomic", "Xadd64", p8...)
   633  
   634  	// Aliases for atomic CAS operations
   635  	alias("internal/runtime/atomic", "Casint32", "internal/runtime/atomic", "Cas", all...)
   636  	alias("internal/runtime/atomic", "Casint64", "internal/runtime/atomic", "Cas64", all...)
   637  	alias("internal/runtime/atomic", "Casuintptr", "internal/runtime/atomic", "Cas", p4...)
   638  	alias("internal/runtime/atomic", "Casuintptr", "internal/runtime/atomic", "Cas64", p8...)
   639  	alias("internal/runtime/atomic", "Casp1", "internal/runtime/atomic", "Cas", p4...)
   640  	alias("internal/runtime/atomic", "Casp1", "internal/runtime/atomic", "Cas64", p8...)
   641  	alias("internal/runtime/atomic", "CasRel", "internal/runtime/atomic", "Cas", lwatomics...)
   642  
   643  	// Aliases for atomic And/Or operations
   644  	alias("internal/runtime/atomic", "Anduintptr", "internal/runtime/atomic", "And64", sys.ArchARM64, sys.ArchLoong64)
   645  	alias("internal/runtime/atomic", "Oruintptr", "internal/runtime/atomic", "Or64", sys.ArchARM64, sys.ArchLoong64)
   646  
   647  	/******** math ********/
   648  	addF("math", "sqrt",
   649  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   650  			return s.newValue1(ssa.OpSqrt, types.Types[types.TFLOAT64], args[0])
   651  		},
   652  		sys.I386, sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X, sys.Wasm)
   653  	addF("math", "Trunc",
   654  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   655  			return s.newValue1(ssa.OpTrunc, types.Types[types.TFLOAT64], args[0])
   656  		},
   657  		sys.ARM64, sys.PPC64, sys.S390X, sys.Wasm)
   658  	addF("math", "Ceil",
   659  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   660  			return s.newValue1(ssa.OpCeil, types.Types[types.TFLOAT64], args[0])
   661  		},
   662  		sys.ARM64, sys.PPC64, sys.S390X, sys.Wasm)
   663  	addF("math", "Floor",
   664  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   665  			return s.newValue1(ssa.OpFloor, types.Types[types.TFLOAT64], args[0])
   666  		},
   667  		sys.ARM64, sys.PPC64, sys.S390X, sys.Wasm)
   668  	addF("math", "Round",
   669  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   670  			return s.newValue1(ssa.OpRound, types.Types[types.TFLOAT64], args[0])
   671  		},
   672  		sys.ARM64, sys.PPC64, sys.S390X)
   673  	addF("math", "RoundToEven",
   674  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   675  			return s.newValue1(ssa.OpRoundToEven, types.Types[types.TFLOAT64], args[0])
   676  		},
   677  		sys.ARM64, sys.S390X, sys.Wasm)
   678  	addF("math", "Abs",
   679  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   680  			return s.newValue1(ssa.OpAbs, types.Types[types.TFLOAT64], args[0])
   681  		},
   682  		sys.ARM64, sys.ARM, sys.Loong64, sys.PPC64, sys.RISCV64, sys.Wasm, sys.MIPS, sys.MIPS64)
   683  	addF("math", "Copysign",
   684  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   685  			return s.newValue2(ssa.OpCopysign, types.Types[types.TFLOAT64], args[0], args[1])
   686  		},
   687  		sys.Loong64, sys.PPC64, sys.RISCV64, sys.Wasm)
   688  	addF("math", "FMA",
   689  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   690  			return s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2])
   691  		},
   692  		sys.ARM64, sys.Loong64, sys.PPC64, sys.RISCV64, sys.S390X)
   693  	addF("math", "FMA",
   694  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   695  			if !s.config.UseFMA {
   696  				s.vars[n] = s.callResult(n, callNormal) // types.Types[TFLOAT64]
   697  				return s.variable(n, types.Types[types.TFLOAT64])
   698  			}
   699  
   700  			if cfg.goamd64 >= 3 {
   701  				return s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2])
   702  			}
   703  
   704  			v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[types.TBOOL], ir.Syms.X86HasFMA)
   705  			b := s.endBlock()
   706  			b.Kind = ssa.BlockIf
   707  			b.SetControl(v)
   708  			bTrue := s.f.NewBlock(ssa.BlockPlain)
   709  			bFalse := s.f.NewBlock(ssa.BlockPlain)
   710  			bEnd := s.f.NewBlock(ssa.BlockPlain)
   711  			b.AddEdgeTo(bTrue)
   712  			b.AddEdgeTo(bFalse)
   713  			b.Likely = ssa.BranchLikely // >= haswell cpus are common
   714  
   715  			// We have the intrinsic - use it directly.
   716  			s.startBlock(bTrue)
   717  			s.vars[n] = s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2])
   718  			s.endBlock().AddEdgeTo(bEnd)
   719  
   720  			// Call the pure Go version.
   721  			s.startBlock(bFalse)
   722  			s.vars[n] = s.callResult(n, callNormal) // types.Types[TFLOAT64]
   723  			s.endBlock().AddEdgeTo(bEnd)
   724  
   725  			// Merge results.
   726  			s.startBlock(bEnd)
   727  			return s.variable(n, types.Types[types.TFLOAT64])
   728  		},
   729  		sys.AMD64)
   730  	addF("math", "FMA",
   731  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   732  			if !s.config.UseFMA {
   733  				s.vars[n] = s.callResult(n, callNormal) // types.Types[TFLOAT64]
   734  				return s.variable(n, types.Types[types.TFLOAT64])
   735  			}
   736  			addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.ARMHasVFPv4, s.sb)
   737  			v := s.load(types.Types[types.TBOOL], addr)
   738  			b := s.endBlock()
   739  			b.Kind = ssa.BlockIf
   740  			b.SetControl(v)
   741  			bTrue := s.f.NewBlock(ssa.BlockPlain)
   742  			bFalse := s.f.NewBlock(ssa.BlockPlain)
   743  			bEnd := s.f.NewBlock(ssa.BlockPlain)
   744  			b.AddEdgeTo(bTrue)
   745  			b.AddEdgeTo(bFalse)
   746  			b.Likely = ssa.BranchLikely
   747  
   748  			// We have the intrinsic - use it directly.
   749  			s.startBlock(bTrue)
   750  			s.vars[n] = s.newValue3(ssa.OpFMA, types.Types[types.TFLOAT64], args[0], args[1], args[2])
   751  			s.endBlock().AddEdgeTo(bEnd)
   752  
   753  			// Call the pure Go version.
   754  			s.startBlock(bFalse)
   755  			s.vars[n] = s.callResult(n, callNormal) // types.Types[TFLOAT64]
   756  			s.endBlock().AddEdgeTo(bEnd)
   757  
   758  			// Merge results.
   759  			s.startBlock(bEnd)
   760  			return s.variable(n, types.Types[types.TFLOAT64])
   761  		},
   762  		sys.ARM)
   763  
   764  	makeRoundAMD64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   765  		return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   766  			if cfg.goamd64 >= 2 {
   767  				return s.newValue1(op, types.Types[types.TFLOAT64], args[0])
   768  			}
   769  
   770  			v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[types.TBOOL], ir.Syms.X86HasSSE41)
   771  			b := s.endBlock()
   772  			b.Kind = ssa.BlockIf
   773  			b.SetControl(v)
   774  			bTrue := s.f.NewBlock(ssa.BlockPlain)
   775  			bFalse := s.f.NewBlock(ssa.BlockPlain)
   776  			bEnd := s.f.NewBlock(ssa.BlockPlain)
   777  			b.AddEdgeTo(bTrue)
   778  			b.AddEdgeTo(bFalse)
   779  			b.Likely = ssa.BranchLikely // most machines have sse4.1 nowadays
   780  
   781  			// We have the intrinsic - use it directly.
   782  			s.startBlock(bTrue)
   783  			s.vars[n] = s.newValue1(op, types.Types[types.TFLOAT64], args[0])
   784  			s.endBlock().AddEdgeTo(bEnd)
   785  
   786  			// Call the pure Go version.
   787  			s.startBlock(bFalse)
   788  			s.vars[n] = s.callResult(n, callNormal) // types.Types[TFLOAT64]
   789  			s.endBlock().AddEdgeTo(bEnd)
   790  
   791  			// Merge results.
   792  			s.startBlock(bEnd)
   793  			return s.variable(n, types.Types[types.TFLOAT64])
   794  		}
   795  	}
   796  	addF("math", "RoundToEven",
   797  		makeRoundAMD64(ssa.OpRoundToEven),
   798  		sys.AMD64)
   799  	addF("math", "Floor",
   800  		makeRoundAMD64(ssa.OpFloor),
   801  		sys.AMD64)
   802  	addF("math", "Ceil",
   803  		makeRoundAMD64(ssa.OpCeil),
   804  		sys.AMD64)
   805  	addF("math", "Trunc",
   806  		makeRoundAMD64(ssa.OpTrunc),
   807  		sys.AMD64)
   808  
   809  	/******** math/bits ********/
   810  	addF("math/bits", "TrailingZeros64",
   811  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   812  			return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], args[0])
   813  		},
   814  		sys.AMD64, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
   815  	addF("math/bits", "TrailingZeros64",
   816  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   817  			lo := s.newValue1(ssa.OpInt64Lo, types.Types[types.TUINT32], args[0])
   818  			hi := s.newValue1(ssa.OpInt64Hi, types.Types[types.TUINT32], args[0])
   819  			return s.newValue2(ssa.OpCtz64On32, types.Types[types.TINT], lo, hi)
   820  		},
   821  		sys.I386)
   822  	addF("math/bits", "TrailingZeros32",
   823  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   824  			return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], args[0])
   825  		},
   826  		sys.AMD64, sys.I386, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
   827  	addF("math/bits", "TrailingZeros16",
   828  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   829  			x := s.newValue1(ssa.OpZeroExt16to32, types.Types[types.TUINT32], args[0])
   830  			c := s.constInt32(types.Types[types.TUINT32], 1<<16)
   831  			y := s.newValue2(ssa.OpOr32, types.Types[types.TUINT32], x, c)
   832  			return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], y)
   833  		},
   834  		sys.MIPS)
   835  	addF("math/bits", "TrailingZeros16",
   836  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   837  			return s.newValue1(ssa.OpCtz16, types.Types[types.TINT], args[0])
   838  		},
   839  		sys.AMD64, sys.I386, sys.ARM, sys.ARM64, sys.Wasm)
   840  	addF("math/bits", "TrailingZeros16",
   841  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   842  			x := s.newValue1(ssa.OpZeroExt16to64, types.Types[types.TUINT64], args[0])
   843  			c := s.constInt64(types.Types[types.TUINT64], 1<<16)
   844  			y := s.newValue2(ssa.OpOr64, types.Types[types.TUINT64], x, c)
   845  			return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], y)
   846  		},
   847  		sys.Loong64, sys.S390X, sys.PPC64)
   848  	addF("math/bits", "TrailingZeros8",
   849  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   850  			x := s.newValue1(ssa.OpZeroExt8to32, types.Types[types.TUINT32], args[0])
   851  			c := s.constInt32(types.Types[types.TUINT32], 1<<8)
   852  			y := s.newValue2(ssa.OpOr32, types.Types[types.TUINT32], x, c)
   853  			return s.newValue1(ssa.OpCtz32, types.Types[types.TINT], y)
   854  		},
   855  		sys.MIPS)
   856  	addF("math/bits", "TrailingZeros8",
   857  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   858  			return s.newValue1(ssa.OpCtz8, types.Types[types.TINT], args[0])
   859  		},
   860  		sys.AMD64, sys.I386, sys.ARM, sys.ARM64, sys.Wasm)
   861  	addF("math/bits", "TrailingZeros8",
   862  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   863  			x := s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], args[0])
   864  			c := s.constInt64(types.Types[types.TUINT64], 1<<8)
   865  			y := s.newValue2(ssa.OpOr64, types.Types[types.TUINT64], x, c)
   866  			return s.newValue1(ssa.OpCtz64, types.Types[types.TINT], y)
   867  		},
   868  		sys.Loong64, sys.S390X)
   869  	alias("math/bits", "ReverseBytes64", "internal/runtime/sys", "Bswap64", all...)
   870  	alias("math/bits", "ReverseBytes32", "internal/runtime/sys", "Bswap32", all...)
   871  	addF("math/bits", "ReverseBytes16",
   872  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   873  			return s.newValue1(ssa.OpBswap16, types.Types[types.TUINT16], args[0])
   874  		},
   875  		sys.Loong64)
   876  	// ReverseBytes inlines correctly, no need to intrinsify it.
   877  	// Nothing special is needed for targets where ReverseBytes16 lowers to a rotate
   878  	// On Power10, 16-bit rotate is not available so use BRH instruction
   879  	if cfg.goppc64 >= 10 {
   880  		addF("math/bits", "ReverseBytes16",
   881  			func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   882  				return s.newValue1(ssa.OpBswap16, types.Types[types.TUINT], args[0])
   883  			},
   884  			sys.PPC64)
   885  	}
   886  
   887  	addF("math/bits", "Len64",
   888  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   889  			return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0])
   890  		},
   891  		sys.AMD64, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
   892  	addF("math/bits", "Len32",
   893  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   894  			return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0])
   895  		},
   896  		sys.AMD64, sys.ARM64, sys.Loong64, sys.PPC64)
   897  	addF("math/bits", "Len32",
   898  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   899  			if s.config.PtrSize == 4 {
   900  				return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0])
   901  			}
   902  			x := s.newValue1(ssa.OpZeroExt32to64, types.Types[types.TUINT64], args[0])
   903  			return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x)
   904  		},
   905  		sys.ARM, sys.S390X, sys.MIPS, sys.Wasm)
   906  	addF("math/bits", "Len16",
   907  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   908  			if s.config.PtrSize == 4 {
   909  				x := s.newValue1(ssa.OpZeroExt16to32, types.Types[types.TUINT32], args[0])
   910  				return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], x)
   911  			}
   912  			x := s.newValue1(ssa.OpZeroExt16to64, types.Types[types.TUINT64], args[0])
   913  			return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x)
   914  		},
   915  		sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
   916  	addF("math/bits", "Len16",
   917  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   918  			return s.newValue1(ssa.OpBitLen16, types.Types[types.TINT], args[0])
   919  		},
   920  		sys.AMD64)
   921  	addF("math/bits", "Len8",
   922  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   923  			if s.config.PtrSize == 4 {
   924  				x := s.newValue1(ssa.OpZeroExt8to32, types.Types[types.TUINT32], args[0])
   925  				return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], x)
   926  			}
   927  			x := s.newValue1(ssa.OpZeroExt8to64, types.Types[types.TUINT64], args[0])
   928  			return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], x)
   929  		},
   930  		sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
   931  	addF("math/bits", "Len8",
   932  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   933  			return s.newValue1(ssa.OpBitLen8, types.Types[types.TINT], args[0])
   934  		},
   935  		sys.AMD64)
   936  	addF("math/bits", "Len",
   937  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   938  			if s.config.PtrSize == 4 {
   939  				return s.newValue1(ssa.OpBitLen32, types.Types[types.TINT], args[0])
   940  			}
   941  			return s.newValue1(ssa.OpBitLen64, types.Types[types.TINT], args[0])
   942  		},
   943  		sys.AMD64, sys.ARM64, sys.ARM, sys.Loong64, sys.S390X, sys.MIPS, sys.PPC64, sys.Wasm)
   944  	// LeadingZeros is handled because it trivially calls Len.
   945  	addF("math/bits", "Reverse64",
   946  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   947  			return s.newValue1(ssa.OpBitRev64, types.Types[types.TINT], args[0])
   948  		},
   949  		sys.ARM64, sys.Loong64)
   950  	addF("math/bits", "Reverse32",
   951  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   952  			return s.newValue1(ssa.OpBitRev32, types.Types[types.TINT], args[0])
   953  		},
   954  		sys.ARM64, sys.Loong64)
   955  	addF("math/bits", "Reverse16",
   956  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   957  			return s.newValue1(ssa.OpBitRev16, types.Types[types.TINT], args[0])
   958  		},
   959  		sys.ARM64, sys.Loong64)
   960  	addF("math/bits", "Reverse8",
   961  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   962  			return s.newValue1(ssa.OpBitRev8, types.Types[types.TINT], args[0])
   963  		},
   964  		sys.ARM64, sys.Loong64)
   965  	addF("math/bits", "Reverse",
   966  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   967  			return s.newValue1(ssa.OpBitRev64, types.Types[types.TINT], args[0])
   968  		},
   969  		sys.ARM64, sys.Loong64)
   970  	addF("math/bits", "RotateLeft8",
   971  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   972  			return s.newValue2(ssa.OpRotateLeft8, types.Types[types.TUINT8], args[0], args[1])
   973  		},
   974  		sys.AMD64, sys.RISCV64)
   975  	addF("math/bits", "RotateLeft16",
   976  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   977  			return s.newValue2(ssa.OpRotateLeft16, types.Types[types.TUINT16], args[0], args[1])
   978  		},
   979  		sys.AMD64, sys.RISCV64)
   980  	addF("math/bits", "RotateLeft32",
   981  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   982  			return s.newValue2(ssa.OpRotateLeft32, types.Types[types.TUINT32], args[0], args[1])
   983  		},
   984  		sys.AMD64, sys.ARM, sys.ARM64, sys.Loong64, sys.PPC64, sys.RISCV64, sys.S390X, sys.Wasm)
   985  	addF("math/bits", "RotateLeft64",
   986  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   987  			return s.newValue2(ssa.OpRotateLeft64, types.Types[types.TUINT64], args[0], args[1])
   988  		},
   989  		sys.AMD64, sys.ARM64, sys.Loong64, sys.PPC64, sys.RISCV64, sys.S390X, sys.Wasm)
   990  	alias("math/bits", "RotateLeft", "math/bits", "RotateLeft64", p8...)
   991  
   992  	makeOnesCountAMD64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   993  		return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
   994  			if cfg.goamd64 >= 2 {
   995  				return s.newValue1(op, types.Types[types.TINT], args[0])
   996  			}
   997  
   998  			v := s.entryNewValue0A(ssa.OpHasCPUFeature, types.Types[types.TBOOL], ir.Syms.X86HasPOPCNT)
   999  			b := s.endBlock()
  1000  			b.Kind = ssa.BlockIf
  1001  			b.SetControl(v)
  1002  			bTrue := s.f.NewBlock(ssa.BlockPlain)
  1003  			bFalse := s.f.NewBlock(ssa.BlockPlain)
  1004  			bEnd := s.f.NewBlock(ssa.BlockPlain)
  1005  			b.AddEdgeTo(bTrue)
  1006  			b.AddEdgeTo(bFalse)
  1007  			b.Likely = ssa.BranchLikely // most machines have popcnt nowadays
  1008  
  1009  			// We have the intrinsic - use it directly.
  1010  			s.startBlock(bTrue)
  1011  			s.vars[n] = s.newValue1(op, types.Types[types.TINT], args[0])
  1012  			s.endBlock().AddEdgeTo(bEnd)
  1013  
  1014  			// Call the pure Go version.
  1015  			s.startBlock(bFalse)
  1016  			s.vars[n] = s.callResult(n, callNormal) // types.Types[TINT]
  1017  			s.endBlock().AddEdgeTo(bEnd)
  1018  
  1019  			// Merge results.
  1020  			s.startBlock(bEnd)
  1021  			return s.variable(n, types.Types[types.TINT])
  1022  		}
  1023  	}
  1024  
  1025  	makeOnesCountLoong64 := func(op ssa.Op) func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
  1026  		return func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
  1027  			addr := s.entryNewValue1A(ssa.OpAddr, types.Types[types.TBOOL].PtrTo(), ir.Syms.Loong64HasLSX, s.sb)
  1028  			v := s.load(types.Types[types.TBOOL], addr)
  1029  			b := s.endBlock()
  1030  			b.Kind = ssa.BlockIf
  1031  			b.SetControl(v)
  1032  			bTrue := s.f.NewBlock(ssa.BlockPlain)
  1033  			bFalse := s.f.NewBlock(ssa.BlockPlain)
  1034  			bEnd := s.f.NewBlock(ssa.BlockPlain)
  1035  			b.AddEdgeTo(bTrue)
  1036  			b.AddEdgeTo(bFalse)
  1037  			b.Likely = ssa.BranchLikely // most loong64 machines support the LSX
  1038  
  1039  			// We have the intrinsic - use it directly.
  1040  			s.startBlock(bTrue)
  1041  			s.vars[n] = s.newValue1(op, types.Types[types.TINT], args[0])
  1042  			s.endBlock().AddEdgeTo(bEnd)
  1043  
  1044  			// Call the pure Go version.
  1045  			s.startBlock(bFalse)
  1046  			s.vars[n] = s.callResult(n, callNormal) // types.Types[TINT]
  1047  			s.endBlock().AddEdgeTo(bEnd)
  1048  
  1049  			// Merge results.
  1050  			s.startBlock(bEnd)
  1051  			return s.variable(n, types.Types[types.TINT])
  1052  		}
  1053  	}
  1054  
  1055  	addF("math/bits", "OnesCount64",
  1056  		makeOnesCountAMD64(ssa.OpPopCount64),
  1057  		sys.AMD64)
  1058  	addF("math/bits", "OnesCount64",
  1059  		makeOnesCountLoong64(ssa.OpPopCount64),
  1060  		sys.Loong64)
  1061  	addF("math/bits", "OnesCount64",
  1062  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
  1063  			return s.newValue1(ssa.OpPopCount64, types.Types[types.TINT], args[0])
  1064  		},
  1065  		sys.PPC64, sys.ARM64, sys.S390X, sys.Wasm)
  1066  	addF("math/bits", "OnesCount32",
  1067  		makeOnesCountAMD64(ssa.OpPopCount32),
  1068  		sys.AMD64)
  1069  	addF("math/bits", "OnesCount32",
  1070  		makeOnesCountLoong64(ssa.OpPopCount32),
  1071  		sys.Loong64)
  1072  	addF("math/bits", "OnesCount32",
  1073  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
  1074  			return s.newValue1(ssa.OpPopCount32, types.Types[types.TINT], args[0])
  1075  		},
  1076  		sys.PPC64, sys.ARM64, sys.S390X, sys.Wasm)
  1077  	addF("math/bits", "OnesCount16",
  1078  		makeOnesCountAMD64(ssa.OpPopCount16),
  1079  		sys.AMD64)
  1080  	addF("math/bits", "OnesCount16",
  1081  		makeOnesCountLoong64(ssa.OpPopCount16),
  1082  		sys.Loong64)
  1083  	addF("math/bits", "OnesCount16",
  1084  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
  1085  			return s.newValue1(ssa.OpPopCount16, types.Types[types.TINT], args[0])
  1086  		},
  1087  		sys.ARM64, sys.S390X, sys.PPC64, sys.Wasm)
  1088  	addF("math/bits", "OnesCount8",
  1089  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
  1090  			return s.newValue1(ssa.OpPopCount8, types.Types[types.TINT], args[0])
  1091  		},
  1092  		sys.S390X, sys.PPC64, sys.Wasm)
  1093  	addF("math/bits", "OnesCount",
  1094  		makeOnesCountAMD64(ssa.OpPopCount64),
  1095  		sys.AMD64)
  1096  	addF("math/bits", "Mul64",
  1097  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
  1098  			return s.newValue2(ssa.OpMul64uhilo, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1])
  1099  		},
  1100  		sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.MIPS64, sys.RISCV64, sys.Loong64)
  1101  	alias("math/bits", "Mul", "math/bits", "Mul64", p8...)
  1102  	alias("internal/runtime/math", "Mul64", "math/bits", "Mul64", p8...)
  1103  	addF("math/bits", "Add64",
  1104  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
  1105  			return s.newValue3(ssa.OpAdd64carry, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
  1106  		},
  1107  		sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64, sys.MIPS64)
  1108  	alias("math/bits", "Add", "math/bits", "Add64", p8...)
  1109  	alias("internal/runtime/math", "Add64", "math/bits", "Add64", all...)
  1110  	addF("math/bits", "Sub64",
  1111  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
  1112  			return s.newValue3(ssa.OpSub64borrow, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
  1113  		},
  1114  		sys.AMD64, sys.ARM64, sys.PPC64, sys.S390X, sys.RISCV64, sys.Loong64, sys.MIPS64)
  1115  	alias("math/bits", "Sub", "math/bits", "Sub64", p8...)
  1116  	addF("math/bits", "Div64",
  1117  		func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
  1118  			// check for divide-by-zero/overflow and panic with appropriate message
  1119  			cmpZero := s.newValue2(s.ssaOp(ir.ONE, types.Types[types.TUINT64]), types.Types[types.TBOOL], args[2], s.zeroVal(types.Types[types.TUINT64]))
  1120  			s.check(cmpZero, ir.Syms.Panicdivide)
  1121  			cmpOverflow := s.newValue2(s.ssaOp(ir.OLT, types.Types[types.TUINT64]), types.Types[types.TBOOL], args[0], args[2])
  1122  			s.check(cmpOverflow, ir.Syms.Panicoverflow)
  1123  			return s.newValue3(ssa.OpDiv128u, types.NewTuple(types.Types[types.TUINT64], types.Types[types.TUINT64]), args[0], args[1], args[2])
  1124  		},
  1125  		sys.AMD64)
  1126  	alias("math/bits", "Div", "math/bits", "Div64", sys.ArchAMD64)
  1127  
  1128  	alias("internal/runtime/sys", "TrailingZeros8", "math/bits", "TrailingZeros8", all...)
  1129  	alias("internal/runtime/sys", "TrailingZeros32", "math/bits", "TrailingZeros32", all...)
  1130  	alias("internal/runtime/sys", "TrailingZeros64", "math/bits", "TrailingZeros64", all...)
  1131  	alias("internal/runtime/sys", "Len8", "math/bits", "Len8", all...)
  1132  	alias("internal/runtime/sys", "Len64", "math/bits", "Len64", all...)
  1133  	alias("internal/runtime/sys", "OnesCount64", "math/bits", "OnesCount64", all...)
  1134  
  1135  	/******** sync/atomic ********/
  1136  
  1137  	// Note: these are disabled by flag_race in findIntrinsic below.
  1138  	alias("sync/atomic", "LoadInt32", "internal/runtime/atomic", "Load", all...)
  1139  	alias("sync/atomic", "LoadInt64", "internal/runtime/atomic", "Load64", all...)
  1140  	alias("sync/atomic", "LoadPointer", "internal/runtime/atomic", "Loadp", all...)
  1141  	alias("sync/atomic", "LoadUint32", "internal/runtime/atomic", "Load", all...)
  1142  	alias("sync/atomic", "LoadUint64", "internal/runtime/atomic", "Load64", all...)
  1143  	alias("sync/atomic", "LoadUintptr", "internal/runtime/atomic", "Load", p4...)
  1144  	alias("sync/atomic", "LoadUintptr", "internal/runtime/atomic", "Load64", p8...)
  1145  
  1146  	alias("sync/atomic", "StoreInt32", "internal/runtime/atomic", "Store", all...)
  1147  	alias("sync/atomic", "StoreInt64", "internal/runtime/atomic", "Store64", all...)
  1148  	// Note: not StorePointer, that needs a write barrier.  Same below for {CompareAnd}Swap.
  1149  	alias("sync/atomic", "StoreUint32", "internal/runtime/atomic", "Store", all...)
  1150  	alias("sync/atomic", "StoreUint64", "internal/runtime/atomic", "Store64", all...)
  1151  	alias("sync/atomic", "StoreUintptr", "internal/runtime/atomic", "Store", p4...)
  1152  	alias("sync/atomic", "StoreUintptr", "internal/runtime/atomic", "Store64", p8...)
  1153  
  1154  	alias("sync/atomic", "SwapInt32", "internal/runtime/atomic", "Xchg", all...)
  1155  	alias("sync/atomic", "SwapInt64", "internal/runtime/atomic", "Xchg64", all...)
  1156  	alias("sync/atomic", "SwapUint32", "internal/runtime/atomic", "Xchg", all...)
  1157  	alias("sync/atomic", "SwapUint64", "internal/runtime/atomic", "Xchg64", all...)
  1158  	alias("sync/atomic", "SwapUintptr", "internal/runtime/atomic", "Xchg", p4...)
  1159  	alias("sync/atomic", "SwapUintptr", "internal/runtime/atomic", "Xchg64", p8...)
  1160  
  1161  	alias("sync/atomic", "CompareAndSwapInt32", "internal/runtime/atomic", "Cas", all...)
  1162  	alias("sync/atomic", "CompareAndSwapInt64", "internal/runtime/atomic", "Cas64", all...)
  1163  	alias("sync/atomic", "CompareAndSwapUint32", "internal/runtime/atomic", "Cas", all...)
  1164  	alias("sync/atomic", "CompareAndSwapUint64", "internal/runtime/atomic", "Cas64", all...)
  1165  	alias("sync/atomic", "CompareAndSwapUintptr", "internal/runtime/atomic", "Cas", p4...)
  1166  	alias("sync/atomic", "CompareAndSwapUintptr", "internal/runtime/atomic", "Cas64", p8...)
  1167  
  1168  	alias("sync/atomic", "AddInt32", "internal/runtime/atomic", "Xadd", all...)
  1169  	alias("sync/atomic", "AddInt64", "internal/runtime/atomic", "Xadd64", all...)
  1170  	alias("sync/atomic", "AddUint32", "internal/runtime/atomic", "Xadd", all...)
  1171  	alias("sync/atomic", "AddUint64", "internal/runtime/atomic", "Xadd64", all...)
  1172  	alias("sync/atomic", "AddUintptr", "internal/runtime/atomic", "Xadd", p4...)
  1173  	alias("sync/atomic", "AddUintptr", "internal/runtime/atomic", "Xadd64", p8...)
  1174  
  1175  	alias("sync/atomic", "AndInt32", "internal/runtime/atomic", "And32", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
  1176  	alias("sync/atomic", "AndUint32", "internal/runtime/atomic", "And32", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
  1177  	alias("sync/atomic", "AndInt64", "internal/runtime/atomic", "And64", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
  1178  	alias("sync/atomic", "AndUint64", "internal/runtime/atomic", "And64", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
  1179  	alias("sync/atomic", "AndUintptr", "internal/runtime/atomic", "And64", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
  1180  	alias("sync/atomic", "OrInt32", "internal/runtime/atomic", "Or32", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
  1181  	alias("sync/atomic", "OrUint32", "internal/runtime/atomic", "Or32", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
  1182  	alias("sync/atomic", "OrInt64", "internal/runtime/atomic", "Or64", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
  1183  	alias("sync/atomic", "OrUint64", "internal/runtime/atomic", "Or64", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
  1184  	alias("sync/atomic", "OrUintptr", "internal/runtime/atomic", "Or64", sys.ArchARM64, sys.ArchAMD64, sys.ArchLoong64)
  1185  
  1186  	/******** math/big ********/
  1187  	alias("math/big", "mulWW", "math/bits", "Mul64", p8...)
  1188  }
  1189  
  1190  // findIntrinsic returns a function which builds the SSA equivalent of the
  1191  // function identified by the symbol sym.  If sym is not an intrinsic call, returns nil.
  1192  func findIntrinsic(sym *types.Sym) intrinsicBuilder {
  1193  	if sym == nil || sym.Pkg == nil {
  1194  		return nil
  1195  	}
  1196  	pkg := sym.Pkg.Path
  1197  	if sym.Pkg == ir.Pkgs.Runtime {
  1198  		pkg = "runtime"
  1199  	}
  1200  	if base.Flag.Race && pkg == "sync/atomic" {
  1201  		// The race detector needs to be able to intercept these calls.
  1202  		// We can't intrinsify them.
  1203  		return nil
  1204  	}
  1205  	// Skip intrinsifying math functions (which may contain hard-float
  1206  	// instructions) when soft-float
  1207  	if Arch.SoftFloat && pkg == "math" {
  1208  		return nil
  1209  	}
  1210  
  1211  	fn := sym.Name
  1212  	if ssa.IntrinsicsDisable {
  1213  		if pkg == "internal/runtime/sys" && (fn == "GetCallerPC" || fn == "GrtCallerSP" || fn == "GetClosurePtr") {
  1214  			// These runtime functions don't have definitions, must be intrinsics.
  1215  		} else {
  1216  			return nil
  1217  		}
  1218  	}
  1219  	return intrinsics.lookup(Arch.LinkArch.Arch, pkg, fn)
  1220  }
  1221  
  1222  func IsIntrinsicCall(n *ir.CallExpr) bool {
  1223  	if n == nil {
  1224  		return false
  1225  	}
  1226  	name, ok := n.Fun.(*ir.Name)
  1227  	if !ok {
  1228  		return false
  1229  	}
  1230  	return findIntrinsic(name.Sym()) != nil
  1231  }
  1232  

View as plain text