Source file src/cmd/compile/internal/arm64/ssa.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package arm64
     6  
     7  import (
     8  	"math"
     9  
    10  	"cmd/compile/internal/base"
    11  	"cmd/compile/internal/ir"
    12  	"cmd/compile/internal/logopt"
    13  	"cmd/compile/internal/objw"
    14  	"cmd/compile/internal/ssa"
    15  	"cmd/compile/internal/ssagen"
    16  	"cmd/compile/internal/types"
    17  	"cmd/internal/obj"
    18  	"cmd/internal/obj/arm64"
    19  	"internal/abi"
    20  )
    21  
    22  // loadByType returns the load instruction of the given type.
    23  func loadByType(t *types.Type) obj.As {
    24  	if t.IsFloat() {
    25  		switch t.Size() {
    26  		case 4:
    27  			return arm64.AFMOVS
    28  		case 8:
    29  			return arm64.AFMOVD
    30  		}
    31  	} else {
    32  		switch t.Size() {
    33  		case 1:
    34  			if t.IsSigned() {
    35  				return arm64.AMOVB
    36  			} else {
    37  				return arm64.AMOVBU
    38  			}
    39  		case 2:
    40  			if t.IsSigned() {
    41  				return arm64.AMOVH
    42  			} else {
    43  				return arm64.AMOVHU
    44  			}
    45  		case 4:
    46  			if t.IsSigned() {
    47  				return arm64.AMOVW
    48  			} else {
    49  				return arm64.AMOVWU
    50  			}
    51  		case 8:
    52  			return arm64.AMOVD
    53  		}
    54  	}
    55  	panic("bad load type")
    56  }
    57  
    58  // storeByType returns the store instruction of the given type.
    59  func storeByType(t *types.Type) obj.As {
    60  	if t.IsFloat() {
    61  		switch t.Size() {
    62  		case 4:
    63  			return arm64.AFMOVS
    64  		case 8:
    65  			return arm64.AFMOVD
    66  		}
    67  	} else {
    68  		switch t.Size() {
    69  		case 1:
    70  			return arm64.AMOVB
    71  		case 2:
    72  			return arm64.AMOVH
    73  		case 4:
    74  			return arm64.AMOVW
    75  		case 8:
    76  			return arm64.AMOVD
    77  		}
    78  	}
    79  	panic("bad store type")
    80  }
    81  
    82  // loadByType2 returns an opcode that can load consecutive memory locations into 2 registers with type t.
    83  // returns obj.AXXX if no such opcode exists.
    84  func loadByType2(t *types.Type) obj.As {
    85  	if t.IsFloat() {
    86  		switch t.Size() {
    87  		case 4:
    88  			return arm64.AFLDPS
    89  		case 8:
    90  			return arm64.AFLDPD
    91  		}
    92  	} else {
    93  		switch t.Size() {
    94  		case 4:
    95  			return arm64.ALDPW
    96  		case 8:
    97  			return arm64.ALDP
    98  		}
    99  	}
   100  	return obj.AXXX
   101  }
   102  
   103  // storeByType2 returns an opcode that can store registers with type t into 2 consecutive memory locations.
   104  // returns obj.AXXX if no such opcode exists.
   105  func storeByType2(t *types.Type) obj.As {
   106  	if t.IsFloat() {
   107  		switch t.Size() {
   108  		case 4:
   109  			return arm64.AFSTPS
   110  		case 8:
   111  			return arm64.AFSTPD
   112  		}
   113  	} else {
   114  		switch t.Size() {
   115  		case 4:
   116  			return arm64.ASTPW
   117  		case 8:
   118  			return arm64.ASTP
   119  		}
   120  	}
   121  	return obj.AXXX
   122  }
   123  
   124  // makeshift encodes a register shifted by a constant, used as an Offset in Prog.
   125  func makeshift(v *ssa.Value, reg int16, typ int64, s int64) int64 {
   126  	if s < 0 || s >= 64 {
   127  		v.Fatalf("shift out of range: %d", s)
   128  	}
   129  	return int64(reg&31)<<16 | typ | (s&63)<<10
   130  }
   131  
   132  // genshift generates a Prog for r = r0 op (r1 shifted by n).
   133  func genshift(s *ssagen.State, v *ssa.Value, as obj.As, r0, r1, r int16, typ int64, n int64) *obj.Prog {
   134  	p := s.Prog(as)
   135  	p.From.Type = obj.TYPE_SHIFT
   136  	p.From.Offset = makeshift(v, r1, typ, n)
   137  	p.Reg = r0
   138  	if r != 0 {
   139  		p.To.Type = obj.TYPE_REG
   140  		p.To.Reg = r
   141  	}
   142  	return p
   143  }
   144  
   145  // generate the memory operand for the indexed load/store instructions.
   146  // base and idx are registers.
   147  func genIndexedOperand(op ssa.Op, base, idx int16) obj.Addr {
   148  	// Reg: base register, Index: (shifted) index register
   149  	mop := obj.Addr{Type: obj.TYPE_MEM, Reg: base}
   150  	switch op {
   151  	case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8,
   152  		ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8:
   153  		mop.Index = arm64.REG_LSL | 3<<5 | idx&31
   154  	case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4,
   155  		ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4:
   156  		mop.Index = arm64.REG_LSL | 2<<5 | idx&31
   157  	case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2:
   158  		mop.Index = arm64.REG_LSL | 1<<5 | idx&31
   159  	default: // not shifted
   160  		mop.Index = idx
   161  	}
   162  	return mop
   163  }
   164  
   165  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   166  	switch v.Op {
   167  	case ssa.OpCopy, ssa.OpARM64MOVDreg:
   168  		if v.Type.IsMemory() {
   169  			return
   170  		}
   171  		x := v.Args[0].Reg()
   172  		y := v.Reg()
   173  		if x == y {
   174  			return
   175  		}
   176  		as := arm64.AMOVD
   177  		if v.Type.IsFloat() {
   178  			switch v.Type.Size() {
   179  			case 4:
   180  				as = arm64.AFMOVS
   181  			case 8:
   182  				as = arm64.AFMOVD
   183  			default:
   184  				panic("bad float size")
   185  			}
   186  		}
   187  		p := s.Prog(as)
   188  		p.From.Type = obj.TYPE_REG
   189  		p.From.Reg = x
   190  		p.To.Type = obj.TYPE_REG
   191  		p.To.Reg = y
   192  	case ssa.OpARM64MOVDnop, ssa.OpARM64ZERO:
   193  		// nothing to do
   194  	case ssa.OpLoadReg:
   195  		if v.Type.IsFlags() {
   196  			v.Fatalf("load flags not implemented: %v", v.LongString())
   197  			return
   198  		}
   199  		p := s.Prog(loadByType(v.Type))
   200  		ssagen.AddrAuto(&p.From, v.Args[0])
   201  		p.To.Type = obj.TYPE_REG
   202  		p.To.Reg = v.Reg()
   203  	case ssa.OpStoreReg:
   204  		if v.Type.IsFlags() {
   205  			v.Fatalf("store flags not implemented: %v", v.LongString())
   206  			return
   207  		}
   208  		p := s.Prog(storeByType(v.Type))
   209  		p.From.Type = obj.TYPE_REG
   210  		p.From.Reg = v.Args[0].Reg()
   211  		ssagen.AddrAuto(&p.To, v)
   212  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
   213  		ssagen.CheckArgReg(v)
   214  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
   215  		// The loop only runs once.
   216  		args := v.Block.Func.RegArgs
   217  		if len(args) == 0 {
   218  			break
   219  		}
   220  		v.Block.Func.RegArgs = nil // prevent from running again
   221  
   222  		for i := 0; i < len(args); i++ {
   223  			a := args[i]
   224  			// Offset by size of the saved LR slot.
   225  			addr := ssagen.SpillSlotAddr(a, arm64.REGSP, base.Ctxt.Arch.FixedFrameSize)
   226  			// Look for double-register operations if we can.
   227  			if i < len(args)-1 {
   228  				b := args[i+1]
   229  				if a.Type.Size() == b.Type.Size() &&
   230  					a.Type.IsFloat() == b.Type.IsFloat() &&
   231  					b.Offset == a.Offset+a.Type.Size() {
   232  					ld := loadByType2(a.Type)
   233  					st := storeByType2(a.Type)
   234  					if ld != obj.AXXX && st != obj.AXXX {
   235  						s.FuncInfo().AddSpill(obj.RegSpill{Reg: a.Reg, Reg2: b.Reg, Addr: addr, Unspill: ld, Spill: st})
   236  						i++ // b is done also, skip it.
   237  						continue
   238  					}
   239  				}
   240  			}
   241  			// Pass the spill/unspill information along to the assembler.
   242  			s.FuncInfo().AddSpill(obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
   243  		}
   244  
   245  	case ssa.OpARM64ADD,
   246  		ssa.OpARM64SUB,
   247  		ssa.OpARM64AND,
   248  		ssa.OpARM64OR,
   249  		ssa.OpARM64XOR,
   250  		ssa.OpARM64BIC,
   251  		ssa.OpARM64EON,
   252  		ssa.OpARM64ORN,
   253  		ssa.OpARM64MUL,
   254  		ssa.OpARM64MULW,
   255  		ssa.OpARM64MNEG,
   256  		ssa.OpARM64MNEGW,
   257  		ssa.OpARM64MULH,
   258  		ssa.OpARM64UMULH,
   259  		ssa.OpARM64MULL,
   260  		ssa.OpARM64UMULL,
   261  		ssa.OpARM64DIV,
   262  		ssa.OpARM64UDIV,
   263  		ssa.OpARM64DIVW,
   264  		ssa.OpARM64UDIVW,
   265  		ssa.OpARM64MOD,
   266  		ssa.OpARM64UMOD,
   267  		ssa.OpARM64MODW,
   268  		ssa.OpARM64UMODW,
   269  		ssa.OpARM64SLL,
   270  		ssa.OpARM64SRL,
   271  		ssa.OpARM64SRA,
   272  		ssa.OpARM64FADDS,
   273  		ssa.OpARM64FADDD,
   274  		ssa.OpARM64FSUBS,
   275  		ssa.OpARM64FSUBD,
   276  		ssa.OpARM64FMULS,
   277  		ssa.OpARM64FMULD,
   278  		ssa.OpARM64FNMULS,
   279  		ssa.OpARM64FNMULD,
   280  		ssa.OpARM64FDIVS,
   281  		ssa.OpARM64FDIVD,
   282  		ssa.OpARM64FMINS,
   283  		ssa.OpARM64FMIND,
   284  		ssa.OpARM64FMAXS,
   285  		ssa.OpARM64FMAXD,
   286  		ssa.OpARM64ROR,
   287  		ssa.OpARM64RORW:
   288  		r := v.Reg()
   289  		r1 := v.Args[0].Reg()
   290  		r2 := v.Args[1].Reg()
   291  		p := s.Prog(v.Op.Asm())
   292  		p.From.Type = obj.TYPE_REG
   293  		p.From.Reg = r2
   294  		p.Reg = r1
   295  		p.To.Type = obj.TYPE_REG
   296  		p.To.Reg = r
   297  	case ssa.OpARM64FMADDS,
   298  		ssa.OpARM64FMADDD,
   299  		ssa.OpARM64FNMADDS,
   300  		ssa.OpARM64FNMADDD,
   301  		ssa.OpARM64FMSUBS,
   302  		ssa.OpARM64FMSUBD,
   303  		ssa.OpARM64FNMSUBS,
   304  		ssa.OpARM64FNMSUBD,
   305  		ssa.OpARM64MADD,
   306  		ssa.OpARM64MADDW,
   307  		ssa.OpARM64MSUB,
   308  		ssa.OpARM64MSUBW:
   309  		rt := v.Reg()
   310  		ra := v.Args[0].Reg()
   311  		rm := v.Args[1].Reg()
   312  		rn := v.Args[2].Reg()
   313  		p := s.Prog(v.Op.Asm())
   314  		p.Reg = ra
   315  		p.From.Type = obj.TYPE_REG
   316  		p.From.Reg = rm
   317  		p.AddRestSourceReg(rn)
   318  		p.To.Type = obj.TYPE_REG
   319  		p.To.Reg = rt
   320  	case ssa.OpARM64ADDconst,
   321  		ssa.OpARM64SUBconst,
   322  		ssa.OpARM64ANDconst,
   323  		ssa.OpARM64ORconst,
   324  		ssa.OpARM64XORconst,
   325  		ssa.OpARM64SLLconst,
   326  		ssa.OpARM64SRLconst,
   327  		ssa.OpARM64SRAconst,
   328  		ssa.OpARM64RORconst,
   329  		ssa.OpARM64RORWconst:
   330  		p := s.Prog(v.Op.Asm())
   331  		p.From.Type = obj.TYPE_CONST
   332  		p.From.Offset = v.AuxInt
   333  		p.Reg = v.Args[0].Reg()
   334  		p.To.Type = obj.TYPE_REG
   335  		p.To.Reg = v.Reg()
   336  	case ssa.OpARM64ADDSconstflags:
   337  		p := s.Prog(v.Op.Asm())
   338  		p.From.Type = obj.TYPE_CONST
   339  		p.From.Offset = v.AuxInt
   340  		p.Reg = v.Args[0].Reg()
   341  		p.To.Type = obj.TYPE_REG
   342  		p.To.Reg = v.Reg0()
   343  	case ssa.OpARM64ADCzerocarry:
   344  		p := s.Prog(v.Op.Asm())
   345  		p.From.Type = obj.TYPE_REG
   346  		p.From.Reg = arm64.REGZERO
   347  		p.Reg = arm64.REGZERO
   348  		p.To.Type = obj.TYPE_REG
   349  		p.To.Reg = v.Reg()
   350  	case ssa.OpARM64ADCSflags,
   351  		ssa.OpARM64ADDSflags,
   352  		ssa.OpARM64SBCSflags,
   353  		ssa.OpARM64SUBSflags:
   354  		r := v.Reg0()
   355  		r1 := v.Args[0].Reg()
   356  		r2 := v.Args[1].Reg()
   357  		p := s.Prog(v.Op.Asm())
   358  		p.From.Type = obj.TYPE_REG
   359  		p.From.Reg = r2
   360  		p.Reg = r1
   361  		p.To.Type = obj.TYPE_REG
   362  		p.To.Reg = r
   363  	case ssa.OpARM64NEGSflags:
   364  		p := s.Prog(v.Op.Asm())
   365  		p.From.Type = obj.TYPE_REG
   366  		p.From.Reg = v.Args[0].Reg()
   367  		p.To.Type = obj.TYPE_REG
   368  		p.To.Reg = v.Reg0()
   369  	case ssa.OpARM64NGCzerocarry:
   370  		p := s.Prog(v.Op.Asm())
   371  		p.From.Type = obj.TYPE_REG
   372  		p.From.Reg = arm64.REGZERO
   373  		p.To.Type = obj.TYPE_REG
   374  		p.To.Reg = v.Reg()
   375  	case ssa.OpARM64EXTRconst,
   376  		ssa.OpARM64EXTRWconst:
   377  		p := s.Prog(v.Op.Asm())
   378  		p.From.Type = obj.TYPE_CONST
   379  		p.From.Offset = v.AuxInt
   380  		p.AddRestSourceReg(v.Args[0].Reg())
   381  		p.Reg = v.Args[1].Reg()
   382  		p.To.Type = obj.TYPE_REG
   383  		p.To.Reg = v.Reg()
   384  	case ssa.OpARM64MVNshiftLL, ssa.OpARM64NEGshiftLL:
   385  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_LL, v.AuxInt)
   386  	case ssa.OpARM64MVNshiftRL, ssa.OpARM64NEGshiftRL:
   387  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_LR, v.AuxInt)
   388  	case ssa.OpARM64MVNshiftRA, ssa.OpARM64NEGshiftRA:
   389  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_AR, v.AuxInt)
   390  	case ssa.OpARM64MVNshiftRO:
   391  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_ROR, v.AuxInt)
   392  	case ssa.OpARM64ADDshiftLL,
   393  		ssa.OpARM64SUBshiftLL,
   394  		ssa.OpARM64ANDshiftLL,
   395  		ssa.OpARM64ORshiftLL,
   396  		ssa.OpARM64XORshiftLL,
   397  		ssa.OpARM64EONshiftLL,
   398  		ssa.OpARM64ORNshiftLL,
   399  		ssa.OpARM64BICshiftLL:
   400  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_LL, v.AuxInt)
   401  	case ssa.OpARM64ADDshiftRL,
   402  		ssa.OpARM64SUBshiftRL,
   403  		ssa.OpARM64ANDshiftRL,
   404  		ssa.OpARM64ORshiftRL,
   405  		ssa.OpARM64XORshiftRL,
   406  		ssa.OpARM64EONshiftRL,
   407  		ssa.OpARM64ORNshiftRL,
   408  		ssa.OpARM64BICshiftRL:
   409  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_LR, v.AuxInt)
   410  	case ssa.OpARM64ADDshiftRA,
   411  		ssa.OpARM64SUBshiftRA,
   412  		ssa.OpARM64ANDshiftRA,
   413  		ssa.OpARM64ORshiftRA,
   414  		ssa.OpARM64XORshiftRA,
   415  		ssa.OpARM64EONshiftRA,
   416  		ssa.OpARM64ORNshiftRA,
   417  		ssa.OpARM64BICshiftRA:
   418  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_AR, v.AuxInt)
   419  	case ssa.OpARM64ANDshiftRO,
   420  		ssa.OpARM64ORshiftRO,
   421  		ssa.OpARM64XORshiftRO,
   422  		ssa.OpARM64EONshiftRO,
   423  		ssa.OpARM64ORNshiftRO,
   424  		ssa.OpARM64BICshiftRO:
   425  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_ROR, v.AuxInt)
   426  	case ssa.OpARM64MOVDconst:
   427  		p := s.Prog(v.Op.Asm())
   428  		p.From.Type = obj.TYPE_CONST
   429  		p.From.Offset = v.AuxInt
   430  		p.To.Type = obj.TYPE_REG
   431  		p.To.Reg = v.Reg()
   432  	case ssa.OpARM64FMOVSconst,
   433  		ssa.OpARM64FMOVDconst:
   434  		p := s.Prog(v.Op.Asm())
   435  		p.From.Type = obj.TYPE_FCONST
   436  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   437  		p.To.Type = obj.TYPE_REG
   438  		p.To.Reg = v.Reg()
   439  	case ssa.OpARM64FCMPS0,
   440  		ssa.OpARM64FCMPD0:
   441  		p := s.Prog(v.Op.Asm())
   442  		p.From.Type = obj.TYPE_FCONST
   443  		p.From.Val = math.Float64frombits(0)
   444  		p.Reg = v.Args[0].Reg()
   445  	case ssa.OpARM64CMP,
   446  		ssa.OpARM64CMPW,
   447  		ssa.OpARM64CMN,
   448  		ssa.OpARM64CMNW,
   449  		ssa.OpARM64TST,
   450  		ssa.OpARM64TSTW,
   451  		ssa.OpARM64FCMPS,
   452  		ssa.OpARM64FCMPD:
   453  		p := s.Prog(v.Op.Asm())
   454  		p.From.Type = obj.TYPE_REG
   455  		p.From.Reg = v.Args[1].Reg()
   456  		p.Reg = v.Args[0].Reg()
   457  	case ssa.OpARM64CMPconst,
   458  		ssa.OpARM64CMPWconst,
   459  		ssa.OpARM64CMNconst,
   460  		ssa.OpARM64CMNWconst,
   461  		ssa.OpARM64TSTconst,
   462  		ssa.OpARM64TSTWconst:
   463  		p := s.Prog(v.Op.Asm())
   464  		p.From.Type = obj.TYPE_CONST
   465  		p.From.Offset = v.AuxInt
   466  		p.Reg = v.Args[0].Reg()
   467  	case ssa.OpARM64CMPshiftLL, ssa.OpARM64CMNshiftLL, ssa.OpARM64TSTshiftLL:
   468  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_LL, v.AuxInt)
   469  	case ssa.OpARM64CMPshiftRL, ssa.OpARM64CMNshiftRL, ssa.OpARM64TSTshiftRL:
   470  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_LR, v.AuxInt)
   471  	case ssa.OpARM64CMPshiftRA, ssa.OpARM64CMNshiftRA, ssa.OpARM64TSTshiftRA:
   472  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_AR, v.AuxInt)
   473  	case ssa.OpARM64TSTshiftRO:
   474  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_ROR, v.AuxInt)
   475  	case ssa.OpARM64MOVDaddr:
   476  		p := s.Prog(arm64.AMOVD)
   477  		p.From.Type = obj.TYPE_ADDR
   478  		p.From.Reg = v.Args[0].Reg()
   479  		p.To.Type = obj.TYPE_REG
   480  		p.To.Reg = v.Reg()
   481  
   482  		var wantreg string
   483  		// MOVD $sym+off(base), R
   484  		// the assembler expands it as the following:
   485  		// - base is SP: add constant offset to SP (R13)
   486  		//               when constant is large, tmp register (R11) may be used
   487  		// - base is SB: load external address from constant pool (use relocation)
   488  		switch v.Aux.(type) {
   489  		default:
   490  			v.Fatalf("aux is of unknown type %T", v.Aux)
   491  		case *obj.LSym:
   492  			wantreg = "SB"
   493  			ssagen.AddAux(&p.From, v)
   494  		case *ir.Name:
   495  			wantreg = "SP"
   496  			ssagen.AddAux(&p.From, v)
   497  		case nil:
   498  			// No sym, just MOVD $off(SP), R
   499  			wantreg = "SP"
   500  			p.From.Offset = v.AuxInt
   501  		}
   502  		if reg := v.Args[0].RegName(); reg != wantreg {
   503  			v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg)
   504  		}
   505  	case ssa.OpARM64MOVBload,
   506  		ssa.OpARM64MOVBUload,
   507  		ssa.OpARM64MOVHload,
   508  		ssa.OpARM64MOVHUload,
   509  		ssa.OpARM64MOVWload,
   510  		ssa.OpARM64MOVWUload,
   511  		ssa.OpARM64MOVDload,
   512  		ssa.OpARM64FMOVSload,
   513  		ssa.OpARM64FMOVDload:
   514  		p := s.Prog(v.Op.Asm())
   515  		p.From.Type = obj.TYPE_MEM
   516  		p.From.Reg = v.Args[0].Reg()
   517  		ssagen.AddAux(&p.From, v)
   518  		p.To.Type = obj.TYPE_REG
   519  		p.To.Reg = v.Reg()
   520  	case ssa.OpARM64LDP, ssa.OpARM64LDPW, ssa.OpARM64LDPSW, ssa.OpARM64FLDPD, ssa.OpARM64FLDPS:
   521  		p := s.Prog(v.Op.Asm())
   522  		p.From.Type = obj.TYPE_MEM
   523  		p.From.Reg = v.Args[0].Reg()
   524  		ssagen.AddAux(&p.From, v)
   525  		p.To.Type = obj.TYPE_REGREG
   526  		p.To.Reg = v.Reg0()
   527  		p.To.Offset = int64(v.Reg1())
   528  	case ssa.OpARM64MOVBloadidx,
   529  		ssa.OpARM64MOVBUloadidx,
   530  		ssa.OpARM64MOVHloadidx,
   531  		ssa.OpARM64MOVHUloadidx,
   532  		ssa.OpARM64MOVWloadidx,
   533  		ssa.OpARM64MOVWUloadidx,
   534  		ssa.OpARM64MOVDloadidx,
   535  		ssa.OpARM64FMOVSloadidx,
   536  		ssa.OpARM64FMOVDloadidx,
   537  		ssa.OpARM64MOVHloadidx2,
   538  		ssa.OpARM64MOVHUloadidx2,
   539  		ssa.OpARM64MOVWloadidx4,
   540  		ssa.OpARM64MOVWUloadidx4,
   541  		ssa.OpARM64MOVDloadidx8,
   542  		ssa.OpARM64FMOVDloadidx8,
   543  		ssa.OpARM64FMOVSloadidx4:
   544  		p := s.Prog(v.Op.Asm())
   545  		p.From = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
   546  		p.To.Type = obj.TYPE_REG
   547  		p.To.Reg = v.Reg()
   548  	case ssa.OpARM64LDAR,
   549  		ssa.OpARM64LDARB,
   550  		ssa.OpARM64LDARW:
   551  		p := s.Prog(v.Op.Asm())
   552  		p.From.Type = obj.TYPE_MEM
   553  		p.From.Reg = v.Args[0].Reg()
   554  		ssagen.AddAux(&p.From, v)
   555  		p.To.Type = obj.TYPE_REG
   556  		p.To.Reg = v.Reg0()
   557  	case ssa.OpARM64MOVBstore,
   558  		ssa.OpARM64MOVHstore,
   559  		ssa.OpARM64MOVWstore,
   560  		ssa.OpARM64MOVDstore,
   561  		ssa.OpARM64FMOVSstore,
   562  		ssa.OpARM64FMOVDstore,
   563  		ssa.OpARM64STLRB,
   564  		ssa.OpARM64STLR,
   565  		ssa.OpARM64STLRW:
   566  		p := s.Prog(v.Op.Asm())
   567  		p.From.Type = obj.TYPE_REG
   568  		p.From.Reg = v.Args[1].Reg()
   569  		p.To.Type = obj.TYPE_MEM
   570  		p.To.Reg = v.Args[0].Reg()
   571  		ssagen.AddAux(&p.To, v)
   572  	case ssa.OpARM64MOVBstoreidx,
   573  		ssa.OpARM64MOVHstoreidx,
   574  		ssa.OpARM64MOVWstoreidx,
   575  		ssa.OpARM64MOVDstoreidx,
   576  		ssa.OpARM64FMOVSstoreidx,
   577  		ssa.OpARM64FMOVDstoreidx,
   578  		ssa.OpARM64MOVHstoreidx2,
   579  		ssa.OpARM64MOVWstoreidx4,
   580  		ssa.OpARM64FMOVSstoreidx4,
   581  		ssa.OpARM64MOVDstoreidx8,
   582  		ssa.OpARM64FMOVDstoreidx8:
   583  		p := s.Prog(v.Op.Asm())
   584  		p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
   585  		p.From.Type = obj.TYPE_REG
   586  		p.From.Reg = v.Args[2].Reg()
   587  	case ssa.OpARM64STP, ssa.OpARM64STPW, ssa.OpARM64FSTPD, ssa.OpARM64FSTPS:
   588  		p := s.Prog(v.Op.Asm())
   589  		p.From.Type = obj.TYPE_REGREG
   590  		p.From.Reg = v.Args[1].Reg()
   591  		p.From.Offset = int64(v.Args[2].Reg())
   592  		p.To.Type = obj.TYPE_MEM
   593  		p.To.Reg = v.Args[0].Reg()
   594  		ssagen.AddAux(&p.To, v)
   595  	case ssa.OpARM64BFI,
   596  		ssa.OpARM64BFXIL:
   597  		p := s.Prog(v.Op.Asm())
   598  		p.From.Type = obj.TYPE_CONST
   599  		p.From.Offset = v.AuxInt >> 8
   600  		p.AddRestSourceConst(v.AuxInt & 0xff)
   601  		p.Reg = v.Args[1].Reg()
   602  		p.To.Type = obj.TYPE_REG
   603  		p.To.Reg = v.Reg()
   604  	case ssa.OpARM64SBFIZ,
   605  		ssa.OpARM64SBFX,
   606  		ssa.OpARM64UBFIZ,
   607  		ssa.OpARM64UBFX:
   608  		p := s.Prog(v.Op.Asm())
   609  		p.From.Type = obj.TYPE_CONST
   610  		p.From.Offset = v.AuxInt >> 8
   611  		p.AddRestSourceConst(v.AuxInt & 0xff)
   612  		p.Reg = v.Args[0].Reg()
   613  		p.To.Type = obj.TYPE_REG
   614  		p.To.Reg = v.Reg()
   615  	case ssa.OpARM64LoweredAtomicExchange64,
   616  		ssa.OpARM64LoweredAtomicExchange32,
   617  		ssa.OpARM64LoweredAtomicExchange8:
   618  		// LDAXR	(Rarg0), Rout
   619  		// STLXR	Rarg1, (Rarg0), Rtmp
   620  		// CBNZ		Rtmp, -2(PC)
   621  		var ld, st obj.As
   622  		switch v.Op {
   623  		case ssa.OpARM64LoweredAtomicExchange8:
   624  			ld = arm64.ALDAXRB
   625  			st = arm64.ASTLXRB
   626  		case ssa.OpARM64LoweredAtomicExchange32:
   627  			ld = arm64.ALDAXRW
   628  			st = arm64.ASTLXRW
   629  		case ssa.OpARM64LoweredAtomicExchange64:
   630  			ld = arm64.ALDAXR
   631  			st = arm64.ASTLXR
   632  		}
   633  		r0 := v.Args[0].Reg()
   634  		r1 := v.Args[1].Reg()
   635  		out := v.Reg0()
   636  		p := s.Prog(ld)
   637  		p.From.Type = obj.TYPE_MEM
   638  		p.From.Reg = r0
   639  		p.To.Type = obj.TYPE_REG
   640  		p.To.Reg = out
   641  		p1 := s.Prog(st)
   642  		p1.From.Type = obj.TYPE_REG
   643  		p1.From.Reg = r1
   644  		p1.To.Type = obj.TYPE_MEM
   645  		p1.To.Reg = r0
   646  		p1.RegTo2 = arm64.REGTMP
   647  		p2 := s.Prog(arm64.ACBNZ)
   648  		p2.From.Type = obj.TYPE_REG
   649  		p2.From.Reg = arm64.REGTMP
   650  		p2.To.Type = obj.TYPE_BRANCH
   651  		p2.To.SetTarget(p)
   652  	case ssa.OpARM64LoweredAtomicExchange64Variant,
   653  		ssa.OpARM64LoweredAtomicExchange32Variant,
   654  		ssa.OpARM64LoweredAtomicExchange8Variant:
   655  		var swap obj.As
   656  		switch v.Op {
   657  		case ssa.OpARM64LoweredAtomicExchange8Variant:
   658  			swap = arm64.ASWPALB
   659  		case ssa.OpARM64LoweredAtomicExchange32Variant:
   660  			swap = arm64.ASWPALW
   661  		case ssa.OpARM64LoweredAtomicExchange64Variant:
   662  			swap = arm64.ASWPALD
   663  		}
   664  		r0 := v.Args[0].Reg()
   665  		r1 := v.Args[1].Reg()
   666  		out := v.Reg0()
   667  
   668  		// SWPALD	Rarg1, (Rarg0), Rout
   669  		p := s.Prog(swap)
   670  		p.From.Type = obj.TYPE_REG
   671  		p.From.Reg = r1
   672  		p.To.Type = obj.TYPE_MEM
   673  		p.To.Reg = r0
   674  		p.RegTo2 = out
   675  
   676  	case ssa.OpARM64LoweredAtomicAdd64,
   677  		ssa.OpARM64LoweredAtomicAdd32:
   678  		// LDAXR	(Rarg0), Rout
   679  		// ADD		Rarg1, Rout
   680  		// STLXR	Rout, (Rarg0), Rtmp
   681  		// CBNZ		Rtmp, -3(PC)
   682  		ld := arm64.ALDAXR
   683  		st := arm64.ASTLXR
   684  		if v.Op == ssa.OpARM64LoweredAtomicAdd32 {
   685  			ld = arm64.ALDAXRW
   686  			st = arm64.ASTLXRW
   687  		}
   688  		r0 := v.Args[0].Reg()
   689  		r1 := v.Args[1].Reg()
   690  		out := v.Reg0()
   691  		p := s.Prog(ld)
   692  		p.From.Type = obj.TYPE_MEM
   693  		p.From.Reg = r0
   694  		p.To.Type = obj.TYPE_REG
   695  		p.To.Reg = out
   696  		p1 := s.Prog(arm64.AADD)
   697  		p1.From.Type = obj.TYPE_REG
   698  		p1.From.Reg = r1
   699  		p1.To.Type = obj.TYPE_REG
   700  		p1.To.Reg = out
   701  		p2 := s.Prog(st)
   702  		p2.From.Type = obj.TYPE_REG
   703  		p2.From.Reg = out
   704  		p2.To.Type = obj.TYPE_MEM
   705  		p2.To.Reg = r0
   706  		p2.RegTo2 = arm64.REGTMP
   707  		p3 := s.Prog(arm64.ACBNZ)
   708  		p3.From.Type = obj.TYPE_REG
   709  		p3.From.Reg = arm64.REGTMP
   710  		p3.To.Type = obj.TYPE_BRANCH
   711  		p3.To.SetTarget(p)
   712  	case ssa.OpARM64LoweredAtomicAdd64Variant,
   713  		ssa.OpARM64LoweredAtomicAdd32Variant:
   714  		// LDADDAL	Rarg1, (Rarg0), Rout
   715  		// ADD		Rarg1, Rout
   716  		op := arm64.ALDADDALD
   717  		if v.Op == ssa.OpARM64LoweredAtomicAdd32Variant {
   718  			op = arm64.ALDADDALW
   719  		}
   720  		r0 := v.Args[0].Reg()
   721  		r1 := v.Args[1].Reg()
   722  		out := v.Reg0()
   723  		p := s.Prog(op)
   724  		p.From.Type = obj.TYPE_REG
   725  		p.From.Reg = r1
   726  		p.To.Type = obj.TYPE_MEM
   727  		p.To.Reg = r0
   728  		p.RegTo2 = out
   729  		p1 := s.Prog(arm64.AADD)
   730  		p1.From.Type = obj.TYPE_REG
   731  		p1.From.Reg = r1
   732  		p1.To.Type = obj.TYPE_REG
   733  		p1.To.Reg = out
   734  	case ssa.OpARM64LoweredAtomicCas64,
   735  		ssa.OpARM64LoweredAtomicCas32:
   736  		// LDAXR	(Rarg0), Rtmp
   737  		// CMP		Rarg1, Rtmp
   738  		// BNE		3(PC)
   739  		// STLXR	Rarg2, (Rarg0), Rtmp
   740  		// CBNZ		Rtmp, -4(PC)
   741  		// CSET		EQ, Rout
   742  		ld := arm64.ALDAXR
   743  		st := arm64.ASTLXR
   744  		cmp := arm64.ACMP
   745  		if v.Op == ssa.OpARM64LoweredAtomicCas32 {
   746  			ld = arm64.ALDAXRW
   747  			st = arm64.ASTLXRW
   748  			cmp = arm64.ACMPW
   749  		}
   750  		r0 := v.Args[0].Reg()
   751  		r1 := v.Args[1].Reg()
   752  		r2 := v.Args[2].Reg()
   753  		out := v.Reg0()
   754  		p := s.Prog(ld)
   755  		p.From.Type = obj.TYPE_MEM
   756  		p.From.Reg = r0
   757  		p.To.Type = obj.TYPE_REG
   758  		p.To.Reg = arm64.REGTMP
   759  		p1 := s.Prog(cmp)
   760  		p1.From.Type = obj.TYPE_REG
   761  		p1.From.Reg = r1
   762  		p1.Reg = arm64.REGTMP
   763  		p2 := s.Prog(arm64.ABNE)
   764  		p2.To.Type = obj.TYPE_BRANCH
   765  		p3 := s.Prog(st)
   766  		p3.From.Type = obj.TYPE_REG
   767  		p3.From.Reg = r2
   768  		p3.To.Type = obj.TYPE_MEM
   769  		p3.To.Reg = r0
   770  		p3.RegTo2 = arm64.REGTMP
   771  		p4 := s.Prog(arm64.ACBNZ)
   772  		p4.From.Type = obj.TYPE_REG
   773  		p4.From.Reg = arm64.REGTMP
   774  		p4.To.Type = obj.TYPE_BRANCH
   775  		p4.To.SetTarget(p)
   776  		p5 := s.Prog(arm64.ACSET)
   777  		p5.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
   778  		p5.From.Offset = int64(arm64.SPOP_EQ)
   779  		p5.To.Type = obj.TYPE_REG
   780  		p5.To.Reg = out
   781  		p2.To.SetTarget(p5)
   782  	case ssa.OpARM64LoweredAtomicCas64Variant,
   783  		ssa.OpARM64LoweredAtomicCas32Variant:
   784  		// Rarg0: ptr
   785  		// Rarg1: old
   786  		// Rarg2: new
   787  		// MOV  	Rarg1, Rtmp
   788  		// CASAL	Rtmp, (Rarg0), Rarg2
   789  		// CMP  	Rarg1, Rtmp
   790  		// CSET 	EQ, Rout
   791  		cas := arm64.ACASALD
   792  		cmp := arm64.ACMP
   793  		mov := arm64.AMOVD
   794  		if v.Op == ssa.OpARM64LoweredAtomicCas32Variant {
   795  			cas = arm64.ACASALW
   796  			cmp = arm64.ACMPW
   797  			mov = arm64.AMOVW
   798  		}
   799  		r0 := v.Args[0].Reg()
   800  		r1 := v.Args[1].Reg()
   801  		r2 := v.Args[2].Reg()
   802  		out := v.Reg0()
   803  
   804  		// MOV  	Rarg1, Rtmp
   805  		p := s.Prog(mov)
   806  		p.From.Type = obj.TYPE_REG
   807  		p.From.Reg = r1
   808  		p.To.Type = obj.TYPE_REG
   809  		p.To.Reg = arm64.REGTMP
   810  
   811  		// CASAL	Rtmp, (Rarg0), Rarg2
   812  		p1 := s.Prog(cas)
   813  		p1.From.Type = obj.TYPE_REG
   814  		p1.From.Reg = arm64.REGTMP
   815  		p1.To.Type = obj.TYPE_MEM
   816  		p1.To.Reg = r0
   817  		p1.RegTo2 = r2
   818  
   819  		// CMP  	Rarg1, Rtmp
   820  		p2 := s.Prog(cmp)
   821  		p2.From.Type = obj.TYPE_REG
   822  		p2.From.Reg = r1
   823  		p2.Reg = arm64.REGTMP
   824  
   825  		// CSET 	EQ, Rout
   826  		p3 := s.Prog(arm64.ACSET)
   827  		p3.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
   828  		p3.From.Offset = int64(arm64.SPOP_EQ)
   829  		p3.To.Type = obj.TYPE_REG
   830  		p3.To.Reg = out
   831  
   832  	case ssa.OpARM64LoweredAtomicAnd64,
   833  		ssa.OpARM64LoweredAtomicOr64,
   834  		ssa.OpARM64LoweredAtomicAnd32,
   835  		ssa.OpARM64LoweredAtomicOr32,
   836  		ssa.OpARM64LoweredAtomicAnd8,
   837  		ssa.OpARM64LoweredAtomicOr8:
   838  		// LDAXR[BW] (Rarg0), Rout
   839  		// AND/OR	Rarg1, Rout, tmp1
   840  		// STLXR[BW] tmp1, (Rarg0), Rtmp
   841  		// CBNZ		Rtmp, -3(PC)
   842  		ld := arm64.ALDAXR
   843  		st := arm64.ASTLXR
   844  		if v.Op == ssa.OpARM64LoweredAtomicAnd32 || v.Op == ssa.OpARM64LoweredAtomicOr32 {
   845  			ld = arm64.ALDAXRW
   846  			st = arm64.ASTLXRW
   847  		}
   848  		if v.Op == ssa.OpARM64LoweredAtomicAnd8 || v.Op == ssa.OpARM64LoweredAtomicOr8 {
   849  			ld = arm64.ALDAXRB
   850  			st = arm64.ASTLXRB
   851  		}
   852  		r0 := v.Args[0].Reg()
   853  		r1 := v.Args[1].Reg()
   854  		out := v.Reg0()
   855  		tmp := v.RegTmp()
   856  		p := s.Prog(ld)
   857  		p.From.Type = obj.TYPE_MEM
   858  		p.From.Reg = r0
   859  		p.To.Type = obj.TYPE_REG
   860  		p.To.Reg = out
   861  		p1 := s.Prog(v.Op.Asm())
   862  		p1.From.Type = obj.TYPE_REG
   863  		p1.From.Reg = r1
   864  		p1.Reg = out
   865  		p1.To.Type = obj.TYPE_REG
   866  		p1.To.Reg = tmp
   867  		p2 := s.Prog(st)
   868  		p2.From.Type = obj.TYPE_REG
   869  		p2.From.Reg = tmp
   870  		p2.To.Type = obj.TYPE_MEM
   871  		p2.To.Reg = r0
   872  		p2.RegTo2 = arm64.REGTMP
   873  		p3 := s.Prog(arm64.ACBNZ)
   874  		p3.From.Type = obj.TYPE_REG
   875  		p3.From.Reg = arm64.REGTMP
   876  		p3.To.Type = obj.TYPE_BRANCH
   877  		p3.To.SetTarget(p)
   878  
   879  	case ssa.OpARM64LoweredAtomicAnd8Variant,
   880  		ssa.OpARM64LoweredAtomicAnd32Variant,
   881  		ssa.OpARM64LoweredAtomicAnd64Variant:
   882  		atomic_clear := arm64.ALDCLRALD
   883  		if v.Op == ssa.OpARM64LoweredAtomicAnd32Variant {
   884  			atomic_clear = arm64.ALDCLRALW
   885  		}
   886  		if v.Op == ssa.OpARM64LoweredAtomicAnd8Variant {
   887  			atomic_clear = arm64.ALDCLRALB
   888  		}
   889  		r0 := v.Args[0].Reg()
   890  		r1 := v.Args[1].Reg()
   891  		out := v.Reg0()
   892  
   893  		// MNV       Rarg1 Rtemp
   894  		p := s.Prog(arm64.AMVN)
   895  		p.From.Type = obj.TYPE_REG
   896  		p.From.Reg = r1
   897  		p.To.Type = obj.TYPE_REG
   898  		p.To.Reg = arm64.REGTMP
   899  
   900  		// LDCLRAL[BDW]  Rtemp, (Rarg0), Rout
   901  		p1 := s.Prog(atomic_clear)
   902  		p1.From.Type = obj.TYPE_REG
   903  		p1.From.Reg = arm64.REGTMP
   904  		p1.To.Type = obj.TYPE_MEM
   905  		p1.To.Reg = r0
   906  		p1.RegTo2 = out
   907  
   908  	case ssa.OpARM64LoweredAtomicOr8Variant,
   909  		ssa.OpARM64LoweredAtomicOr32Variant,
   910  		ssa.OpARM64LoweredAtomicOr64Variant:
   911  		atomic_or := arm64.ALDORALD
   912  		if v.Op == ssa.OpARM64LoweredAtomicOr32Variant {
   913  			atomic_or = arm64.ALDORALW
   914  		}
   915  		if v.Op == ssa.OpARM64LoweredAtomicOr8Variant {
   916  			atomic_or = arm64.ALDORALB
   917  		}
   918  		r0 := v.Args[0].Reg()
   919  		r1 := v.Args[1].Reg()
   920  		out := v.Reg0()
   921  
   922  		// LDORAL[BDW]  Rarg1, (Rarg0), Rout
   923  		p := s.Prog(atomic_or)
   924  		p.From.Type = obj.TYPE_REG
   925  		p.From.Reg = r1
   926  		p.To.Type = obj.TYPE_MEM
   927  		p.To.Reg = r0
   928  		p.RegTo2 = out
   929  
   930  	case ssa.OpARM64MOVBreg,
   931  		ssa.OpARM64MOVBUreg,
   932  		ssa.OpARM64MOVHreg,
   933  		ssa.OpARM64MOVHUreg,
   934  		ssa.OpARM64MOVWreg,
   935  		ssa.OpARM64MOVWUreg:
   936  		a := v.Args[0]
   937  		for a.Op == ssa.OpCopy || a.Op == ssa.OpARM64MOVDreg {
   938  			a = a.Args[0]
   939  		}
   940  		if a.Op == ssa.OpLoadReg {
   941  			t := a.Type
   942  			switch {
   943  			case v.Op == ssa.OpARM64MOVBreg && t.Size() == 1 && t.IsSigned(),
   944  				v.Op == ssa.OpARM64MOVBUreg && t.Size() == 1 && !t.IsSigned(),
   945  				v.Op == ssa.OpARM64MOVHreg && t.Size() == 2 && t.IsSigned(),
   946  				v.Op == ssa.OpARM64MOVHUreg && t.Size() == 2 && !t.IsSigned(),
   947  				v.Op == ssa.OpARM64MOVWreg && t.Size() == 4 && t.IsSigned(),
   948  				v.Op == ssa.OpARM64MOVWUreg && t.Size() == 4 && !t.IsSigned():
   949  				// arg is a proper-typed load, already zero/sign-extended, don't extend again
   950  				if v.Reg() == v.Args[0].Reg() {
   951  					return
   952  				}
   953  				p := s.Prog(arm64.AMOVD)
   954  				p.From.Type = obj.TYPE_REG
   955  				p.From.Reg = v.Args[0].Reg()
   956  				p.To.Type = obj.TYPE_REG
   957  				p.To.Reg = v.Reg()
   958  				return
   959  			default:
   960  			}
   961  		}
   962  		fallthrough
   963  	case ssa.OpARM64MVN,
   964  		ssa.OpARM64NEG,
   965  		ssa.OpARM64FABSD,
   966  		ssa.OpARM64FMOVDfpgp,
   967  		ssa.OpARM64FMOVDgpfp,
   968  		ssa.OpARM64FMOVSfpgp,
   969  		ssa.OpARM64FMOVSgpfp,
   970  		ssa.OpARM64FNEGS,
   971  		ssa.OpARM64FNEGD,
   972  		ssa.OpARM64FSQRTS,
   973  		ssa.OpARM64FSQRTD,
   974  		ssa.OpARM64FCVTZSSW,
   975  		ssa.OpARM64FCVTZSDW,
   976  		ssa.OpARM64FCVTZUSW,
   977  		ssa.OpARM64FCVTZUDW,
   978  		ssa.OpARM64FCVTZSS,
   979  		ssa.OpARM64FCVTZSD,
   980  		ssa.OpARM64FCVTZUS,
   981  		ssa.OpARM64FCVTZUD,
   982  		ssa.OpARM64SCVTFWS,
   983  		ssa.OpARM64SCVTFWD,
   984  		ssa.OpARM64SCVTFS,
   985  		ssa.OpARM64SCVTFD,
   986  		ssa.OpARM64UCVTFWS,
   987  		ssa.OpARM64UCVTFWD,
   988  		ssa.OpARM64UCVTFS,
   989  		ssa.OpARM64UCVTFD,
   990  		ssa.OpARM64FCVTSD,
   991  		ssa.OpARM64FCVTDS,
   992  		ssa.OpARM64REV,
   993  		ssa.OpARM64REVW,
   994  		ssa.OpARM64REV16,
   995  		ssa.OpARM64REV16W,
   996  		ssa.OpARM64RBIT,
   997  		ssa.OpARM64RBITW,
   998  		ssa.OpARM64CLZ,
   999  		ssa.OpARM64CLZW,
  1000  		ssa.OpARM64FRINTAD,
  1001  		ssa.OpARM64FRINTMD,
  1002  		ssa.OpARM64FRINTND,
  1003  		ssa.OpARM64FRINTPD,
  1004  		ssa.OpARM64FRINTZD:
  1005  		p := s.Prog(v.Op.Asm())
  1006  		p.From.Type = obj.TYPE_REG
  1007  		p.From.Reg = v.Args[0].Reg()
  1008  		p.To.Type = obj.TYPE_REG
  1009  		p.To.Reg = v.Reg()
  1010  	case ssa.OpARM64LoweredRound32F, ssa.OpARM64LoweredRound64F:
  1011  		// input is already rounded
  1012  	case ssa.OpARM64VCNT:
  1013  		p := s.Prog(v.Op.Asm())
  1014  		p.From.Type = obj.TYPE_REG
  1015  		p.From.Reg = (v.Args[0].Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
  1016  		p.To.Type = obj.TYPE_REG
  1017  		p.To.Reg = (v.Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
  1018  	case ssa.OpARM64VUADDLV:
  1019  		p := s.Prog(v.Op.Asm())
  1020  		p.From.Type = obj.TYPE_REG
  1021  		p.From.Reg = (v.Args[0].Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
  1022  		p.To.Type = obj.TYPE_REG
  1023  		p.To.Reg = v.Reg() - arm64.REG_F0 + arm64.REG_V0
  1024  	case ssa.OpARM64CSEL, ssa.OpARM64CSEL0:
  1025  		r1 := int16(arm64.REGZERO)
  1026  		if v.Op != ssa.OpARM64CSEL0 {
  1027  			r1 = v.Args[1].Reg()
  1028  		}
  1029  		p := s.Prog(v.Op.Asm())
  1030  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1031  		condCode := condBits[ssa.Op(v.AuxInt)]
  1032  		p.From.Offset = int64(condCode)
  1033  		p.Reg = v.Args[0].Reg()
  1034  		p.AddRestSourceReg(r1)
  1035  		p.To.Type = obj.TYPE_REG
  1036  		p.To.Reg = v.Reg()
  1037  	case ssa.OpARM64CSINC, ssa.OpARM64CSINV, ssa.OpARM64CSNEG:
  1038  		p := s.Prog(v.Op.Asm())
  1039  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1040  		condCode := condBits[ssa.Op(v.AuxInt)]
  1041  		p.From.Offset = int64(condCode)
  1042  		p.Reg = v.Args[0].Reg()
  1043  		p.AddRestSourceReg(v.Args[1].Reg())
  1044  		p.To.Type = obj.TYPE_REG
  1045  		p.To.Reg = v.Reg()
  1046  	case ssa.OpARM64CSETM:
  1047  		p := s.Prog(arm64.ACSETM)
  1048  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1049  		condCode := condBits[ssa.Op(v.AuxInt)]
  1050  		p.From.Offset = int64(condCode)
  1051  		p.To.Type = obj.TYPE_REG
  1052  		p.To.Reg = v.Reg()
  1053  	case ssa.OpARM64CCMP,
  1054  		ssa.OpARM64CCMN,
  1055  		ssa.OpARM64CCMPconst,
  1056  		ssa.OpARM64CCMNconst,
  1057  		ssa.OpARM64CCMPW,
  1058  		ssa.OpARM64CCMNW,
  1059  		ssa.OpARM64CCMPWconst,
  1060  		ssa.OpARM64CCMNWconst:
  1061  		p := s.Prog(v.Op.Asm())
  1062  		p.Reg = v.Args[0].Reg()
  1063  		params := v.AuxArm64ConditionalParams()
  1064  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1065  		p.From.Offset = int64(condBits[params.Cond()])
  1066  		constValue, ok := params.ConstValue()
  1067  		if ok {
  1068  			p.AddRestSourceConst(constValue)
  1069  		} else {
  1070  			p.AddRestSourceReg(v.Args[1].Reg())
  1071  		}
  1072  		p.To.Type = obj.TYPE_CONST
  1073  		p.To.Offset = params.Nzcv()
  1074  	case ssa.OpARM64LoweredZero:
  1075  		ptrReg := v.Args[0].Reg()
  1076  		n := v.AuxInt
  1077  		if n < 16 {
  1078  			v.Fatalf("Zero too small %d", n)
  1079  		}
  1080  
  1081  		// Generate zeroing instructions.
  1082  		var off int64
  1083  		for n >= 16 {
  1084  			//  STP     (ZR, ZR), off(ptrReg)
  1085  			zero16(s, ptrReg, off, false)
  1086  			off += 16
  1087  			n -= 16
  1088  		}
  1089  		// Write any fractional portion.
  1090  		// An overlapping 16-byte write can't be used here
  1091  		// because STP's offsets must be a multiple of 8.
  1092  		if n > 8 {
  1093  			//  MOVD    ZR, off(ptrReg)
  1094  			zero8(s, ptrReg, off)
  1095  			off += 8
  1096  			n -= 8
  1097  		}
  1098  		if n != 0 {
  1099  			//  MOVD    ZR, off+n-8(ptrReg)
  1100  			// TODO: for n<=4 we could use a smaller write.
  1101  			zero8(s, ptrReg, off+n-8)
  1102  		}
  1103  	case ssa.OpARM64LoweredZeroLoop:
  1104  		ptrReg := v.Args[0].Reg()
  1105  		countReg := v.RegTmp()
  1106  		n := v.AuxInt
  1107  		loopSize := int64(64)
  1108  		if n < 3*loopSize {
  1109  			// - a loop count of 0 won't work.
  1110  			// - a loop count of 1 is useless.
  1111  			// - a loop count of 2 is a code size ~tie
  1112  			//     3 instructions to implement the loop
  1113  			//     4 instructions in the loop body
  1114  			//   vs
  1115  			//     8 instructions in the straightline code
  1116  			//   Might as well use straightline code.
  1117  			v.Fatalf("ZeroLoop size too small %d", n)
  1118  		}
  1119  
  1120  		// Put iteration count in a register.
  1121  		//   MOVD    $n, countReg
  1122  		p := s.Prog(arm64.AMOVD)
  1123  		p.From.Type = obj.TYPE_CONST
  1124  		p.From.Offset = n / loopSize
  1125  		p.To.Type = obj.TYPE_REG
  1126  		p.To.Reg = countReg
  1127  		cntInit := p
  1128  
  1129  		// Zero loopSize bytes starting at ptrReg.
  1130  		// Increment ptrReg by loopSize as a side effect.
  1131  		for range loopSize / 16 {
  1132  			//  STP.P   (ZR, ZR), 16(ptrReg)
  1133  			zero16(s, ptrReg, 0, true)
  1134  			// TODO: should we use the postincrement form,
  1135  			// or use a separate += 64 instruction?
  1136  			// postincrement saves an instruction, but maybe
  1137  			// it requires more integer units to do the +=16s.
  1138  		}
  1139  		// Decrement loop count.
  1140  		//   SUB     $1, countReg
  1141  		p = s.Prog(arm64.ASUB)
  1142  		p.From.Type = obj.TYPE_CONST
  1143  		p.From.Offset = 1
  1144  		p.To.Type = obj.TYPE_REG
  1145  		p.To.Reg = countReg
  1146  		// Jump to loop header if we're not done yet.
  1147  		//   CBNZ    head
  1148  		p = s.Prog(arm64.ACBNZ)
  1149  		p.From.Type = obj.TYPE_REG
  1150  		p.From.Reg = countReg
  1151  		p.To.Type = obj.TYPE_BRANCH
  1152  		p.To.SetTarget(cntInit.Link)
  1153  
  1154  		// Multiples of the loop size are now done.
  1155  		n %= loopSize
  1156  
  1157  		// Write any fractional portion.
  1158  		var off int64
  1159  		for n >= 16 {
  1160  			//  STP     (ZR, ZR), off(ptrReg)
  1161  			zero16(s, ptrReg, off, false)
  1162  			off += 16
  1163  			n -= 16
  1164  		}
  1165  		if n > 8 {
  1166  			// Note: an overlapping 16-byte write can't be used
  1167  			// here because STP's offsets must be a multiple of 8.
  1168  			//  MOVD    ZR, off(ptrReg)
  1169  			zero8(s, ptrReg, off)
  1170  			off += 8
  1171  			n -= 8
  1172  		}
  1173  		if n != 0 {
  1174  			//  MOVD    ZR, off+n-8(ptrReg)
  1175  			// TODO: for n<=4 we could use a smaller write.
  1176  			zero8(s, ptrReg, off+n-8)
  1177  		}
  1178  		// TODO: maybe we should use the count register to instead
  1179  		// hold an end pointer and compare against that?
  1180  		//   ADD $n, ptrReg, endReg
  1181  		// then
  1182  		//   CMP ptrReg, endReg
  1183  		//   BNE loop
  1184  		// There's a past-the-end pointer here, any problem with that?
  1185  
  1186  	case ssa.OpARM64LoweredMove:
  1187  		dstReg := v.Args[0].Reg()
  1188  		srcReg := v.Args[1].Reg()
  1189  		if dstReg == srcReg {
  1190  			break
  1191  		}
  1192  		tmpReg1 := int16(arm64.REG_R25)
  1193  		tmpFReg1 := int16(arm64.REG_F16)
  1194  		tmpFReg2 := int16(arm64.REG_F17)
  1195  		n := v.AuxInt
  1196  		if n < 16 {
  1197  			v.Fatalf("Move too small %d", n)
  1198  		}
  1199  
  1200  		// Generate copying instructions.
  1201  		var off int64
  1202  		for n >= 32 {
  1203  			//  FLDPQ   off(srcReg), (tmpFReg1, tmpFReg2)
  1204  			//  FSTPQ   (tmpFReg1, tmpFReg2), off(dstReg)
  1205  			move32(s, srcReg, dstReg, tmpFReg1, tmpFReg2, off, false)
  1206  			off += 32
  1207  			n -= 32
  1208  		}
  1209  		for n >= 16 {
  1210  			//  FMOVQ   off(src), tmpFReg1
  1211  			//  FMOVQ   tmpFReg1, off(dst)
  1212  			move16(s, srcReg, dstReg, tmpFReg1, off, false)
  1213  			off += 16
  1214  			n -= 16
  1215  		}
  1216  		if n > 8 {
  1217  			//  MOVD    off(srcReg), tmpReg1
  1218  			//  MOVD    tmpReg1, off(dstReg)
  1219  			move8(s, srcReg, dstReg, tmpReg1, off)
  1220  			off += 8
  1221  			n -= 8
  1222  		}
  1223  		if n != 0 {
  1224  			//  MOVD    off+n-8(srcReg), tmpReg1
  1225  			//  MOVD    tmpReg1, off+n-8(dstReg)
  1226  			move8(s, srcReg, dstReg, tmpReg1, off+n-8)
  1227  		}
  1228  	case ssa.OpARM64LoweredMoveLoop:
  1229  		dstReg := v.Args[0].Reg()
  1230  		srcReg := v.Args[1].Reg()
  1231  		if dstReg == srcReg {
  1232  			break
  1233  		}
  1234  		countReg := int16(arm64.REG_R24)
  1235  		tmpReg1 := int16(arm64.REG_R25)
  1236  		tmpFReg1 := int16(arm64.REG_F16)
  1237  		tmpFReg2 := int16(arm64.REG_F17)
  1238  		n := v.AuxInt
  1239  		loopSize := int64(64)
  1240  		if n < 3*loopSize {
  1241  			// - a loop count of 0 won't work.
  1242  			// - a loop count of 1 is useless.
  1243  			// - a loop count of 2 is a code size ~tie
  1244  			//     3 instructions to implement the loop
  1245  			//     4 instructions in the loop body
  1246  			//   vs
  1247  			//     8 instructions in the straightline code
  1248  			//   Might as well use straightline code.
  1249  			v.Fatalf("ZeroLoop size too small %d", n)
  1250  		}
  1251  
  1252  		// Put iteration count in a register.
  1253  		//   MOVD    $n, countReg
  1254  		p := s.Prog(arm64.AMOVD)
  1255  		p.From.Type = obj.TYPE_CONST
  1256  		p.From.Offset = n / loopSize
  1257  		p.To.Type = obj.TYPE_REG
  1258  		p.To.Reg = countReg
  1259  		cntInit := p
  1260  
  1261  		// Move loopSize bytes starting at srcReg to dstReg.
  1262  		// Increment srcReg and destReg by loopSize as a side effect.
  1263  		for range loopSize / 32 {
  1264  			// FLDPQ.P 32(srcReg), (tmpFReg1, tmpFReg2)
  1265  			// FSTPQ.P (tmpFReg1, tmpFReg2), 32(dstReg)
  1266  			move32(s, srcReg, dstReg, tmpFReg1, tmpFReg2, 0, true)
  1267  		}
  1268  		// Decrement loop count.
  1269  		//   SUB     $1, countReg
  1270  		p = s.Prog(arm64.ASUB)
  1271  		p.From.Type = obj.TYPE_CONST
  1272  		p.From.Offset = 1
  1273  		p.To.Type = obj.TYPE_REG
  1274  		p.To.Reg = countReg
  1275  		// Jump to loop header if we're not done yet.
  1276  		//   CBNZ    head
  1277  		p = s.Prog(arm64.ACBNZ)
  1278  		p.From.Type = obj.TYPE_REG
  1279  		p.From.Reg = countReg
  1280  		p.To.Type = obj.TYPE_BRANCH
  1281  		p.To.SetTarget(cntInit.Link)
  1282  
  1283  		// Multiples of the loop size are now done.
  1284  		n %= loopSize
  1285  
  1286  		// Copy any fractional portion.
  1287  		var off int64
  1288  		for n >= 32 {
  1289  			//  FLDPQ   off(srcReg), (tmpFReg1, tmpFReg2)
  1290  			//  FSTPQ   (tmpFReg1, tmpFReg2), off(dstReg)
  1291  			move32(s, srcReg, dstReg, tmpFReg1, tmpFReg2, off, false)
  1292  			off += 32
  1293  			n -= 32
  1294  		}
  1295  		for n >= 16 {
  1296  			//  FMOVQ   off(src), tmpFReg1
  1297  			//  FMOVQ   tmpFReg1, off(dst)
  1298  			move16(s, srcReg, dstReg, tmpFReg1, off, false)
  1299  			off += 16
  1300  			n -= 16
  1301  		}
  1302  		if n > 8 {
  1303  			//  MOVD    off(srcReg), tmpReg1
  1304  			//  MOVD    tmpReg1, off(dstReg)
  1305  			move8(s, srcReg, dstReg, tmpReg1, off)
  1306  			off += 8
  1307  			n -= 8
  1308  		}
  1309  		if n != 0 {
  1310  			//  MOVD    off+n-8(srcReg), tmpReg1
  1311  			//  MOVD    tmpReg1, off+n-8(dstReg)
  1312  			move8(s, srcReg, dstReg, tmpReg1, off+n-8)
  1313  		}
  1314  
  1315  	case ssa.OpARM64CALLstatic, ssa.OpARM64CALLclosure, ssa.OpARM64CALLinter:
  1316  		s.Call(v)
  1317  	case ssa.OpARM64CALLtail:
  1318  		s.TailCall(v)
  1319  	case ssa.OpARM64LoweredWB:
  1320  		p := s.Prog(obj.ACALL)
  1321  		p.To.Type = obj.TYPE_MEM
  1322  		p.To.Name = obj.NAME_EXTERN
  1323  		// AuxInt encodes how many buffer entries we need.
  1324  		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
  1325  
  1326  	case ssa.OpARM64LoweredPanicBoundsRR, ssa.OpARM64LoweredPanicBoundsRC, ssa.OpARM64LoweredPanicBoundsCR, ssa.OpARM64LoweredPanicBoundsCC:
  1327  		// Compute the constant we put in the PCData entry for this call.
  1328  		code, signed := ssa.BoundsKind(v.AuxInt).Code()
  1329  		xIsReg := false
  1330  		yIsReg := false
  1331  		xVal := 0
  1332  		yVal := 0
  1333  		switch v.Op {
  1334  		case ssa.OpARM64LoweredPanicBoundsRR:
  1335  			xIsReg = true
  1336  			xVal = int(v.Args[0].Reg() - arm64.REG_R0)
  1337  			yIsReg = true
  1338  			yVal = int(v.Args[1].Reg() - arm64.REG_R0)
  1339  		case ssa.OpARM64LoweredPanicBoundsRC:
  1340  			xIsReg = true
  1341  			xVal = int(v.Args[0].Reg() - arm64.REG_R0)
  1342  			c := v.Aux.(ssa.PanicBoundsC).C
  1343  			if c >= 0 && c <= abi.BoundsMaxConst {
  1344  				yVal = int(c)
  1345  			} else {
  1346  				// Move constant to a register
  1347  				yIsReg = true
  1348  				if yVal == xVal {
  1349  					yVal = 1
  1350  				}
  1351  				p := s.Prog(arm64.AMOVD)
  1352  				p.From.Type = obj.TYPE_CONST
  1353  				p.From.Offset = c
  1354  				p.To.Type = obj.TYPE_REG
  1355  				p.To.Reg = arm64.REG_R0 + int16(yVal)
  1356  			}
  1357  		case ssa.OpARM64LoweredPanicBoundsCR:
  1358  			yIsReg = true
  1359  			yVal = int(v.Args[0].Reg() - arm64.REG_R0)
  1360  			c := v.Aux.(ssa.PanicBoundsC).C
  1361  			if c >= 0 && c <= abi.BoundsMaxConst {
  1362  				xVal = int(c)
  1363  			} else {
  1364  				// Move constant to a register
  1365  				if xVal == yVal {
  1366  					xVal = 1
  1367  				}
  1368  				p := s.Prog(arm64.AMOVD)
  1369  				p.From.Type = obj.TYPE_CONST
  1370  				p.From.Offset = c
  1371  				p.To.Type = obj.TYPE_REG
  1372  				p.To.Reg = arm64.REG_R0 + int16(xVal)
  1373  			}
  1374  		case ssa.OpARM64LoweredPanicBoundsCC:
  1375  			c := v.Aux.(ssa.PanicBoundsCC).Cx
  1376  			if c >= 0 && c <= abi.BoundsMaxConst {
  1377  				xVal = int(c)
  1378  			} else {
  1379  				// Move constant to a register
  1380  				xIsReg = true
  1381  				p := s.Prog(arm64.AMOVD)
  1382  				p.From.Type = obj.TYPE_CONST
  1383  				p.From.Offset = c
  1384  				p.To.Type = obj.TYPE_REG
  1385  				p.To.Reg = arm64.REG_R0 + int16(xVal)
  1386  			}
  1387  			c = v.Aux.(ssa.PanicBoundsCC).Cy
  1388  			if c >= 0 && c <= abi.BoundsMaxConst {
  1389  				yVal = int(c)
  1390  			} else {
  1391  				// Move constant to a register
  1392  				yIsReg = true
  1393  				yVal = 1
  1394  				p := s.Prog(arm64.AMOVD)
  1395  				p.From.Type = obj.TYPE_CONST
  1396  				p.From.Offset = c
  1397  				p.To.Type = obj.TYPE_REG
  1398  				p.To.Reg = arm64.REG_R0 + int16(yVal)
  1399  			}
  1400  		}
  1401  		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
  1402  
  1403  		p := s.Prog(obj.APCDATA)
  1404  		p.From.SetConst(abi.PCDATA_PanicBounds)
  1405  		p.To.SetConst(int64(c))
  1406  		p = s.Prog(obj.ACALL)
  1407  		p.To.Type = obj.TYPE_MEM
  1408  		p.To.Name = obj.NAME_EXTERN
  1409  		p.To.Sym = ir.Syms.PanicBounds
  1410  
  1411  	case ssa.OpARM64LoweredNilCheck:
  1412  		// Issue a load which will fault if arg is nil.
  1413  		p := s.Prog(arm64.AMOVB)
  1414  		p.From.Type = obj.TYPE_MEM
  1415  		p.From.Reg = v.Args[0].Reg()
  1416  		ssagen.AddAux(&p.From, v)
  1417  		p.To.Type = obj.TYPE_REG
  1418  		p.To.Reg = arm64.REGTMP
  1419  		if logopt.Enabled() {
  1420  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1421  		}
  1422  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Line==1 in generated wrappers
  1423  			base.WarnfAt(v.Pos, "generated nil check")
  1424  		}
  1425  	case ssa.OpARM64Equal,
  1426  		ssa.OpARM64NotEqual,
  1427  		ssa.OpARM64LessThan,
  1428  		ssa.OpARM64LessEqual,
  1429  		ssa.OpARM64GreaterThan,
  1430  		ssa.OpARM64GreaterEqual,
  1431  		ssa.OpARM64LessThanU,
  1432  		ssa.OpARM64LessEqualU,
  1433  		ssa.OpARM64GreaterThanU,
  1434  		ssa.OpARM64GreaterEqualU,
  1435  		ssa.OpARM64LessThanF,
  1436  		ssa.OpARM64LessEqualF,
  1437  		ssa.OpARM64GreaterThanF,
  1438  		ssa.OpARM64GreaterEqualF,
  1439  		ssa.OpARM64NotLessThanF,
  1440  		ssa.OpARM64NotLessEqualF,
  1441  		ssa.OpARM64NotGreaterThanF,
  1442  		ssa.OpARM64NotGreaterEqualF,
  1443  		ssa.OpARM64LessThanNoov,
  1444  		ssa.OpARM64GreaterEqualNoov:
  1445  		// generate boolean values using CSET
  1446  		p := s.Prog(arm64.ACSET)
  1447  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1448  		condCode := condBits[v.Op]
  1449  		p.From.Offset = int64(condCode)
  1450  		p.To.Type = obj.TYPE_REG
  1451  		p.To.Reg = v.Reg()
  1452  	case ssa.OpARM64PRFM:
  1453  		p := s.Prog(v.Op.Asm())
  1454  		p.From.Type = obj.TYPE_MEM
  1455  		p.From.Reg = v.Args[0].Reg()
  1456  		p.To.Type = obj.TYPE_CONST
  1457  		p.To.Offset = v.AuxInt
  1458  	case ssa.OpARM64LoweredGetClosurePtr:
  1459  		// Closure pointer is R26 (arm64.REGCTXT).
  1460  		ssagen.CheckLoweredGetClosurePtr(v)
  1461  	case ssa.OpARM64LoweredGetCallerSP:
  1462  		// caller's SP is FixedFrameSize below the address of the first arg
  1463  		p := s.Prog(arm64.AMOVD)
  1464  		p.From.Type = obj.TYPE_ADDR
  1465  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
  1466  		p.From.Name = obj.NAME_PARAM
  1467  		p.To.Type = obj.TYPE_REG
  1468  		p.To.Reg = v.Reg()
  1469  	case ssa.OpARM64LoweredGetCallerPC:
  1470  		p := s.Prog(obj.AGETCALLERPC)
  1471  		p.To.Type = obj.TYPE_REG
  1472  		p.To.Reg = v.Reg()
  1473  	case ssa.OpARM64DMB:
  1474  		p := s.Prog(v.Op.Asm())
  1475  		p.From.Type = obj.TYPE_CONST
  1476  		p.From.Offset = v.AuxInt
  1477  	case ssa.OpARM64FlagConstant:
  1478  		v.Fatalf("FlagConstant op should never make it to codegen %v", v.LongString())
  1479  	case ssa.OpARM64InvertFlags:
  1480  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1481  	case ssa.OpClobber:
  1482  		// MOVW	$0xdeaddead, REGTMP
  1483  		// MOVW	REGTMP, (slot)
  1484  		// MOVW	REGTMP, 4(slot)
  1485  		p := s.Prog(arm64.AMOVW)
  1486  		p.From.Type = obj.TYPE_CONST
  1487  		p.From.Offset = 0xdeaddead
  1488  		p.To.Type = obj.TYPE_REG
  1489  		p.To.Reg = arm64.REGTMP
  1490  		p = s.Prog(arm64.AMOVW)
  1491  		p.From.Type = obj.TYPE_REG
  1492  		p.From.Reg = arm64.REGTMP
  1493  		p.To.Type = obj.TYPE_MEM
  1494  		p.To.Reg = arm64.REGSP
  1495  		ssagen.AddAux(&p.To, v)
  1496  		p = s.Prog(arm64.AMOVW)
  1497  		p.From.Type = obj.TYPE_REG
  1498  		p.From.Reg = arm64.REGTMP
  1499  		p.To.Type = obj.TYPE_MEM
  1500  		p.To.Reg = arm64.REGSP
  1501  		ssagen.AddAux2(&p.To, v, v.AuxInt+4)
  1502  	case ssa.OpClobberReg:
  1503  		x := uint64(0xdeaddeaddeaddead)
  1504  		p := s.Prog(arm64.AMOVD)
  1505  		p.From.Type = obj.TYPE_CONST
  1506  		p.From.Offset = int64(x)
  1507  		p.To.Type = obj.TYPE_REG
  1508  		p.To.Reg = v.Reg()
  1509  	default:
  1510  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1511  	}
  1512  }
  1513  
  1514  var condBits = map[ssa.Op]arm64.SpecialOperand{
  1515  	ssa.OpARM64Equal:         arm64.SPOP_EQ,
  1516  	ssa.OpARM64NotEqual:      arm64.SPOP_NE,
  1517  	ssa.OpARM64LessThan:      arm64.SPOP_LT,
  1518  	ssa.OpARM64LessThanU:     arm64.SPOP_LO,
  1519  	ssa.OpARM64LessEqual:     arm64.SPOP_LE,
  1520  	ssa.OpARM64LessEqualU:    arm64.SPOP_LS,
  1521  	ssa.OpARM64GreaterThan:   arm64.SPOP_GT,
  1522  	ssa.OpARM64GreaterThanU:  arm64.SPOP_HI,
  1523  	ssa.OpARM64GreaterEqual:  arm64.SPOP_GE,
  1524  	ssa.OpARM64GreaterEqualU: arm64.SPOP_HS,
  1525  	ssa.OpARM64LessThanF:     arm64.SPOP_MI, // Less than
  1526  	ssa.OpARM64LessEqualF:    arm64.SPOP_LS, // Less than or equal to
  1527  	ssa.OpARM64GreaterThanF:  arm64.SPOP_GT, // Greater than
  1528  	ssa.OpARM64GreaterEqualF: arm64.SPOP_GE, // Greater than or equal to
  1529  
  1530  	// The following condition codes have unordered to handle comparisons related to NaN.
  1531  	ssa.OpARM64NotLessThanF:     arm64.SPOP_PL, // Greater than, equal to, or unordered
  1532  	ssa.OpARM64NotLessEqualF:    arm64.SPOP_HI, // Greater than or unordered
  1533  	ssa.OpARM64NotGreaterThanF:  arm64.SPOP_LE, // Less than, equal to or unordered
  1534  	ssa.OpARM64NotGreaterEqualF: arm64.SPOP_LT, // Less than or unordered
  1535  
  1536  	ssa.OpARM64LessThanNoov:     arm64.SPOP_MI, // Less than but without honoring overflow
  1537  	ssa.OpARM64GreaterEqualNoov: arm64.SPOP_PL, // Greater than or equal to but without honoring overflow
  1538  }
  1539  
  1540  var blockJump = map[ssa.BlockKind]struct {
  1541  	asm, invasm obj.As
  1542  }{
  1543  	ssa.BlockARM64EQ:     {arm64.ABEQ, arm64.ABNE},
  1544  	ssa.BlockARM64NE:     {arm64.ABNE, arm64.ABEQ},
  1545  	ssa.BlockARM64LT:     {arm64.ABLT, arm64.ABGE},
  1546  	ssa.BlockARM64GE:     {arm64.ABGE, arm64.ABLT},
  1547  	ssa.BlockARM64LE:     {arm64.ABLE, arm64.ABGT},
  1548  	ssa.BlockARM64GT:     {arm64.ABGT, arm64.ABLE},
  1549  	ssa.BlockARM64ULT:    {arm64.ABLO, arm64.ABHS},
  1550  	ssa.BlockARM64UGE:    {arm64.ABHS, arm64.ABLO},
  1551  	ssa.BlockARM64UGT:    {arm64.ABHI, arm64.ABLS},
  1552  	ssa.BlockARM64ULE:    {arm64.ABLS, arm64.ABHI},
  1553  	ssa.BlockARM64Z:      {arm64.ACBZ, arm64.ACBNZ},
  1554  	ssa.BlockARM64NZ:     {arm64.ACBNZ, arm64.ACBZ},
  1555  	ssa.BlockARM64ZW:     {arm64.ACBZW, arm64.ACBNZW},
  1556  	ssa.BlockARM64NZW:    {arm64.ACBNZW, arm64.ACBZW},
  1557  	ssa.BlockARM64TBZ:    {arm64.ATBZ, arm64.ATBNZ},
  1558  	ssa.BlockARM64TBNZ:   {arm64.ATBNZ, arm64.ATBZ},
  1559  	ssa.BlockARM64FLT:    {arm64.ABMI, arm64.ABPL},
  1560  	ssa.BlockARM64FGE:    {arm64.ABGE, arm64.ABLT},
  1561  	ssa.BlockARM64FLE:    {arm64.ABLS, arm64.ABHI},
  1562  	ssa.BlockARM64FGT:    {arm64.ABGT, arm64.ABLE},
  1563  	ssa.BlockARM64LTnoov: {arm64.ABMI, arm64.ABPL},
  1564  	ssa.BlockARM64GEnoov: {arm64.ABPL, arm64.ABMI},
  1565  }
  1566  
  1567  // To model a 'LEnoov' ('<=' without overflow checking) branching.
  1568  var leJumps = [2][2]ssagen.IndexJump{
  1569  	{{Jump: arm64.ABEQ, Index: 0}, {Jump: arm64.ABPL, Index: 1}}, // next == b.Succs[0]
  1570  	{{Jump: arm64.ABMI, Index: 0}, {Jump: arm64.ABEQ, Index: 0}}, // next == b.Succs[1]
  1571  }
  1572  
  1573  // To model a 'GTnoov' ('>' without overflow checking) branching.
  1574  var gtJumps = [2][2]ssagen.IndexJump{
  1575  	{{Jump: arm64.ABMI, Index: 1}, {Jump: arm64.ABEQ, Index: 1}}, // next == b.Succs[0]
  1576  	{{Jump: arm64.ABEQ, Index: 1}, {Jump: arm64.ABPL, Index: 0}}, // next == b.Succs[1]
  1577  }
  1578  
  1579  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  1580  	switch b.Kind {
  1581  	case ssa.BlockPlain, ssa.BlockDefer:
  1582  		if b.Succs[0].Block() != next {
  1583  			p := s.Prog(obj.AJMP)
  1584  			p.To.Type = obj.TYPE_BRANCH
  1585  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1586  		}
  1587  
  1588  	case ssa.BlockExit, ssa.BlockRetJmp:
  1589  
  1590  	case ssa.BlockRet:
  1591  		s.Prog(obj.ARET)
  1592  
  1593  	case ssa.BlockARM64EQ, ssa.BlockARM64NE,
  1594  		ssa.BlockARM64LT, ssa.BlockARM64GE,
  1595  		ssa.BlockARM64LE, ssa.BlockARM64GT,
  1596  		ssa.BlockARM64ULT, ssa.BlockARM64UGT,
  1597  		ssa.BlockARM64ULE, ssa.BlockARM64UGE,
  1598  		ssa.BlockARM64Z, ssa.BlockARM64NZ,
  1599  		ssa.BlockARM64ZW, ssa.BlockARM64NZW,
  1600  		ssa.BlockARM64FLT, ssa.BlockARM64FGE,
  1601  		ssa.BlockARM64FLE, ssa.BlockARM64FGT,
  1602  		ssa.BlockARM64LTnoov, ssa.BlockARM64GEnoov:
  1603  		jmp := blockJump[b.Kind]
  1604  		var p *obj.Prog
  1605  		switch next {
  1606  		case b.Succs[0].Block():
  1607  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1608  		case b.Succs[1].Block():
  1609  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1610  		default:
  1611  			if b.Likely != ssa.BranchUnlikely {
  1612  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1613  				s.Br(obj.AJMP, b.Succs[1].Block())
  1614  			} else {
  1615  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1616  				s.Br(obj.AJMP, b.Succs[0].Block())
  1617  			}
  1618  		}
  1619  		if !b.Controls[0].Type.IsFlags() {
  1620  			p.From.Type = obj.TYPE_REG
  1621  			p.From.Reg = b.Controls[0].Reg()
  1622  		}
  1623  	case ssa.BlockARM64TBZ, ssa.BlockARM64TBNZ:
  1624  		jmp := blockJump[b.Kind]
  1625  		var p *obj.Prog
  1626  		switch next {
  1627  		case b.Succs[0].Block():
  1628  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1629  		case b.Succs[1].Block():
  1630  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1631  		default:
  1632  			if b.Likely != ssa.BranchUnlikely {
  1633  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1634  				s.Br(obj.AJMP, b.Succs[1].Block())
  1635  			} else {
  1636  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1637  				s.Br(obj.AJMP, b.Succs[0].Block())
  1638  			}
  1639  		}
  1640  		p.From.Offset = b.AuxInt
  1641  		p.From.Type = obj.TYPE_CONST
  1642  		p.Reg = b.Controls[0].Reg()
  1643  
  1644  	case ssa.BlockARM64LEnoov:
  1645  		s.CombJump(b, next, &leJumps)
  1646  	case ssa.BlockARM64GTnoov:
  1647  		s.CombJump(b, next, &gtJumps)
  1648  
  1649  	case ssa.BlockARM64JUMPTABLE:
  1650  		// MOVD	(TABLE)(IDX<<3), Rtmp
  1651  		// JMP	(Rtmp)
  1652  		p := s.Prog(arm64.AMOVD)
  1653  		p.From = genIndexedOperand(ssa.OpARM64MOVDloadidx8, b.Controls[1].Reg(), b.Controls[0].Reg())
  1654  		p.To.Type = obj.TYPE_REG
  1655  		p.To.Reg = arm64.REGTMP
  1656  		p = s.Prog(obj.AJMP)
  1657  		p.To.Type = obj.TYPE_MEM
  1658  		p.To.Reg = arm64.REGTMP
  1659  		// Save jump tables for later resolution of the target blocks.
  1660  		s.JumpTables = append(s.JumpTables, b)
  1661  
  1662  	default:
  1663  		b.Fatalf("branch not implemented: %s", b.LongString())
  1664  	}
  1665  }
  1666  
  1667  func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1668  	p := s.Prog(loadByType(t))
  1669  	p.From.Type = obj.TYPE_MEM
  1670  	p.From.Name = obj.NAME_AUTO
  1671  	p.From.Sym = n.Linksym()
  1672  	p.From.Offset = n.FrameOffset() + off
  1673  	p.To.Type = obj.TYPE_REG
  1674  	p.To.Reg = reg
  1675  	return p
  1676  }
  1677  
  1678  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1679  	p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  1680  	p.To.Name = obj.NAME_PARAM
  1681  	p.To.Sym = n.Linksym()
  1682  	p.Pos = p.Pos.WithNotStmt()
  1683  	return p
  1684  }
  1685  
  1686  // zero16 zeroes 16 bytes at reg+off.
  1687  // If postInc is true, increment reg by 16.
  1688  func zero16(s *ssagen.State, reg int16, off int64, postInc bool) {
  1689  	//   STP     (ZR, ZR), off(reg)
  1690  	p := s.Prog(arm64.ASTP)
  1691  	p.From.Type = obj.TYPE_REGREG
  1692  	p.From.Reg = arm64.REGZERO
  1693  	p.From.Offset = int64(arm64.REGZERO)
  1694  	p.To.Type = obj.TYPE_MEM
  1695  	p.To.Reg = reg
  1696  	p.To.Offset = off
  1697  	if postInc {
  1698  		if off != 0 {
  1699  			panic("can't postinc with non-zero offset")
  1700  		}
  1701  		//   STP.P  (ZR, ZR), 16(reg)
  1702  		p.Scond = arm64.C_XPOST
  1703  		p.To.Offset = 16
  1704  	}
  1705  }
  1706  
  1707  // zero8 zeroes 8 bytes at reg+off.
  1708  func zero8(s *ssagen.State, reg int16, off int64) {
  1709  	//   MOVD     ZR, off(reg)
  1710  	p := s.Prog(arm64.AMOVD)
  1711  	p.From.Type = obj.TYPE_REG
  1712  	p.From.Reg = arm64.REGZERO
  1713  	p.To.Type = obj.TYPE_MEM
  1714  	p.To.Reg = reg
  1715  	p.To.Offset = off
  1716  }
  1717  
  1718  // move32 copies 32 bytes at src+off to dst+off.
  1719  // Uses registers tmp1 and tmp2.
  1720  // If postInc is true, increment src and dst by 32.
  1721  func move32(s *ssagen.State, src, dst, tmp1, tmp2 int16, off int64, postInc bool) {
  1722  	// FLDPQ   off(src), (tmp1, tmp2)
  1723  	ld := s.Prog(arm64.AFLDPQ)
  1724  	ld.From.Type = obj.TYPE_MEM
  1725  	ld.From.Reg = src
  1726  	ld.From.Offset = off
  1727  	ld.To.Type = obj.TYPE_REGREG
  1728  	ld.To.Reg = tmp1
  1729  	ld.To.Offset = int64(tmp2)
  1730  	// FSTPQ   (tmp1, tmp2), off(dst)
  1731  	st := s.Prog(arm64.AFSTPQ)
  1732  	st.From.Type = obj.TYPE_REGREG
  1733  	st.From.Reg = tmp1
  1734  	st.From.Offset = int64(tmp2)
  1735  	st.To.Type = obj.TYPE_MEM
  1736  	st.To.Reg = dst
  1737  	st.To.Offset = off
  1738  	if postInc {
  1739  		if off != 0 {
  1740  			panic("can't postinc with non-zero offset")
  1741  		}
  1742  		ld.Scond = arm64.C_XPOST
  1743  		st.Scond = arm64.C_XPOST
  1744  		ld.From.Offset = 32
  1745  		st.To.Offset = 32
  1746  	}
  1747  }
  1748  
  1749  // move16 copies 16 bytes at src+off to dst+off.
  1750  // Uses register tmp1
  1751  // If postInc is true, increment src and dst by 16.
  1752  func move16(s *ssagen.State, src, dst, tmp1 int16, off int64, postInc bool) {
  1753  	// FMOVQ     off(src), tmp1
  1754  	ld := s.Prog(arm64.AFMOVQ)
  1755  	ld.From.Type = obj.TYPE_MEM
  1756  	ld.From.Reg = src
  1757  	ld.From.Offset = off
  1758  	ld.To.Type = obj.TYPE_REG
  1759  	ld.To.Reg = tmp1
  1760  	// FMOVQ     tmp1, off(dst)
  1761  	st := s.Prog(arm64.AFMOVQ)
  1762  	st.From.Type = obj.TYPE_REG
  1763  	st.From.Reg = tmp1
  1764  	st.To.Type = obj.TYPE_MEM
  1765  	st.To.Reg = dst
  1766  	st.To.Offset = off
  1767  	if postInc {
  1768  		if off != 0 {
  1769  			panic("can't postinc with non-zero offset")
  1770  		}
  1771  		ld.Scond = arm64.C_XPOST
  1772  		st.Scond = arm64.C_XPOST
  1773  		ld.From.Offset = 16
  1774  		st.To.Offset = 16
  1775  	}
  1776  }
  1777  
  1778  // move8 copies 8 bytes at src+off to dst+off.
  1779  // Uses register tmp.
  1780  func move8(s *ssagen.State, src, dst, tmp int16, off int64) {
  1781  	// MOVD    off(src), tmp
  1782  	ld := s.Prog(arm64.AMOVD)
  1783  	ld.From.Type = obj.TYPE_MEM
  1784  	ld.From.Reg = src
  1785  	ld.From.Offset = off
  1786  	ld.To.Type = obj.TYPE_REG
  1787  	ld.To.Reg = tmp
  1788  	// MOVD    tmp, off(dst)
  1789  	st := s.Prog(arm64.AMOVD)
  1790  	st.From.Type = obj.TYPE_REG
  1791  	st.From.Reg = tmp
  1792  	st.To.Type = obj.TYPE_MEM
  1793  	st.To.Reg = dst
  1794  	st.To.Offset = off
  1795  }
  1796  

View as plain text