Source file src/cmd/compile/internal/arm64/ssa.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package arm64
     6  
     7  import (
     8  	"math"
     9  
    10  	"cmd/compile/internal/base"
    11  	"cmd/compile/internal/ir"
    12  	"cmd/compile/internal/logopt"
    13  	"cmd/compile/internal/objw"
    14  	"cmd/compile/internal/ssa"
    15  	"cmd/compile/internal/ssagen"
    16  	"cmd/compile/internal/types"
    17  	"cmd/internal/obj"
    18  	"cmd/internal/obj/arm64"
    19  	"internal/abi"
    20  )
    21  
    22  // loadByType returns the load instruction of the given type.
    23  func loadByType(t *types.Type) obj.As {
    24  	if t.IsFloat() {
    25  		switch t.Size() {
    26  		case 4:
    27  			return arm64.AFMOVS
    28  		case 8:
    29  			return arm64.AFMOVD
    30  		}
    31  	} else {
    32  		switch t.Size() {
    33  		case 1:
    34  			if t.IsSigned() {
    35  				return arm64.AMOVB
    36  			} else {
    37  				return arm64.AMOVBU
    38  			}
    39  		case 2:
    40  			if t.IsSigned() {
    41  				return arm64.AMOVH
    42  			} else {
    43  				return arm64.AMOVHU
    44  			}
    45  		case 4:
    46  			if t.IsSigned() {
    47  				return arm64.AMOVW
    48  			} else {
    49  				return arm64.AMOVWU
    50  			}
    51  		case 8:
    52  			return arm64.AMOVD
    53  		}
    54  	}
    55  	panic("bad load type")
    56  }
    57  
    58  // storeByType returns the store instruction of the given type.
    59  func storeByType(t *types.Type) obj.As {
    60  	if t.IsFloat() {
    61  		switch t.Size() {
    62  		case 4:
    63  			return arm64.AFMOVS
    64  		case 8:
    65  			return arm64.AFMOVD
    66  		}
    67  	} else {
    68  		switch t.Size() {
    69  		case 1:
    70  			return arm64.AMOVB
    71  		case 2:
    72  			return arm64.AMOVH
    73  		case 4:
    74  			return arm64.AMOVW
    75  		case 8:
    76  			return arm64.AMOVD
    77  		}
    78  	}
    79  	panic("bad store type")
    80  }
    81  
    82  // loadByType2 returns an opcode that can load consecutive memory locations into 2 registers with type t.
    83  // returns obj.AXXX if no such opcode exists.
    84  func loadByType2(t *types.Type) obj.As {
    85  	if t.IsFloat() {
    86  		switch t.Size() {
    87  		case 4:
    88  			return arm64.AFLDPS
    89  		case 8:
    90  			return arm64.AFLDPD
    91  		}
    92  	} else {
    93  		switch t.Size() {
    94  		case 4:
    95  			return arm64.ALDPW
    96  		case 8:
    97  			return arm64.ALDP
    98  		}
    99  	}
   100  	return obj.AXXX
   101  }
   102  
   103  // storeByType2 returns an opcode that can store registers with type t into 2 consecutive memory locations.
   104  // returns obj.AXXX if no such opcode exists.
   105  func storeByType2(t *types.Type) obj.As {
   106  	if t.IsFloat() {
   107  		switch t.Size() {
   108  		case 4:
   109  			return arm64.AFSTPS
   110  		case 8:
   111  			return arm64.AFSTPD
   112  		}
   113  	} else {
   114  		switch t.Size() {
   115  		case 4:
   116  			return arm64.ASTPW
   117  		case 8:
   118  			return arm64.ASTP
   119  		}
   120  	}
   121  	return obj.AXXX
   122  }
   123  
   124  // makeshift encodes a register shifted by a constant, used as an Offset in Prog.
   125  func makeshift(v *ssa.Value, reg int16, typ int64, s int64) int64 {
   126  	if s < 0 || s >= 64 {
   127  		v.Fatalf("shift out of range: %d", s)
   128  	}
   129  	return int64(reg&31)<<16 | typ | (s&63)<<10
   130  }
   131  
   132  // genshift generates a Prog for r = r0 op (r1 shifted by n).
   133  func genshift(s *ssagen.State, v *ssa.Value, as obj.As, r0, r1, r int16, typ int64, n int64) *obj.Prog {
   134  	p := s.Prog(as)
   135  	p.From.Type = obj.TYPE_SHIFT
   136  	p.From.Offset = makeshift(v, r1, typ, n)
   137  	p.Reg = r0
   138  	if r != 0 {
   139  		p.To.Type = obj.TYPE_REG
   140  		p.To.Reg = r
   141  	}
   142  	return p
   143  }
   144  
   145  // generate the memory operand for the indexed load/store instructions.
   146  // base and idx are registers.
   147  func genIndexedOperand(op ssa.Op, base, idx int16) obj.Addr {
   148  	// Reg: base register, Index: (shifted) index register
   149  	mop := obj.Addr{Type: obj.TYPE_MEM, Reg: base}
   150  	switch op {
   151  	case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8,
   152  		ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8:
   153  		mop.Index = arm64.REG_LSL | 3<<5 | idx&31
   154  	case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4,
   155  		ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4:
   156  		mop.Index = arm64.REG_LSL | 2<<5 | idx&31
   157  	case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2:
   158  		mop.Index = arm64.REG_LSL | 1<<5 | idx&31
   159  	default: // not shifted
   160  		mop.Index = idx
   161  	}
   162  	return mop
   163  }
   164  
   165  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   166  	switch v.Op {
   167  	case ssa.OpCopy, ssa.OpARM64MOVDreg:
   168  		if v.Type.IsMemory() {
   169  			return
   170  		}
   171  		x := v.Args[0].Reg()
   172  		y := v.Reg()
   173  		if x == y {
   174  			return
   175  		}
   176  		as := arm64.AMOVD
   177  		if v.Type.IsFloat() {
   178  			switch v.Type.Size() {
   179  			case 4:
   180  				as = arm64.AFMOVS
   181  			case 8:
   182  				as = arm64.AFMOVD
   183  			default:
   184  				panic("bad float size")
   185  			}
   186  		}
   187  		p := s.Prog(as)
   188  		p.From.Type = obj.TYPE_REG
   189  		p.From.Reg = x
   190  		p.To.Type = obj.TYPE_REG
   191  		p.To.Reg = y
   192  	case ssa.OpARM64MOVDnop, ssa.OpARM64ZERO:
   193  		// nothing to do
   194  	case ssa.OpLoadReg:
   195  		if v.Type.IsFlags() {
   196  			v.Fatalf("load flags not implemented: %v", v.LongString())
   197  			return
   198  		}
   199  		p := s.Prog(loadByType(v.Type))
   200  		ssagen.AddrAuto(&p.From, v.Args[0])
   201  		p.To.Type = obj.TYPE_REG
   202  		p.To.Reg = v.Reg()
   203  	case ssa.OpStoreReg:
   204  		if v.Type.IsFlags() {
   205  			v.Fatalf("store flags not implemented: %v", v.LongString())
   206  			return
   207  		}
   208  		p := s.Prog(storeByType(v.Type))
   209  		p.From.Type = obj.TYPE_REG
   210  		p.From.Reg = v.Args[0].Reg()
   211  		ssagen.AddrAuto(&p.To, v)
   212  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
   213  		ssagen.CheckArgReg(v)
   214  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
   215  		// The loop only runs once.
   216  		args := v.Block.Func.RegArgs
   217  		if len(args) == 0 {
   218  			break
   219  		}
   220  		v.Block.Func.RegArgs = nil // prevent from running again
   221  
   222  		for i := 0; i < len(args); i++ {
   223  			a := args[i]
   224  			// Offset by size of the saved LR slot.
   225  			addr := ssagen.SpillSlotAddr(a, arm64.REGSP, base.Ctxt.Arch.FixedFrameSize)
   226  			// Look for double-register operations if we can.
   227  			if i < len(args)-1 {
   228  				b := args[i+1]
   229  				if a.Type.Size() == b.Type.Size() &&
   230  					a.Type.IsFloat() == b.Type.IsFloat() &&
   231  					b.Offset == a.Offset+a.Type.Size() {
   232  					ld := loadByType2(a.Type)
   233  					st := storeByType2(a.Type)
   234  					if ld != obj.AXXX && st != obj.AXXX {
   235  						s.FuncInfo().AddSpill(obj.RegSpill{Reg: a.Reg, Reg2: b.Reg, Addr: addr, Unspill: ld, Spill: st})
   236  						i++ // b is done also, skip it.
   237  						continue
   238  					}
   239  				}
   240  			}
   241  			// Pass the spill/unspill information along to the assembler.
   242  			s.FuncInfo().AddSpill(obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
   243  		}
   244  
   245  	case ssa.OpARM64ADD,
   246  		ssa.OpARM64SUB,
   247  		ssa.OpARM64AND,
   248  		ssa.OpARM64OR,
   249  		ssa.OpARM64XOR,
   250  		ssa.OpARM64BIC,
   251  		ssa.OpARM64EON,
   252  		ssa.OpARM64ORN,
   253  		ssa.OpARM64MUL,
   254  		ssa.OpARM64MULW,
   255  		ssa.OpARM64MNEG,
   256  		ssa.OpARM64MNEGW,
   257  		ssa.OpARM64MULH,
   258  		ssa.OpARM64UMULH,
   259  		ssa.OpARM64MULL,
   260  		ssa.OpARM64UMULL,
   261  		ssa.OpARM64DIV,
   262  		ssa.OpARM64UDIV,
   263  		ssa.OpARM64DIVW,
   264  		ssa.OpARM64UDIVW,
   265  		ssa.OpARM64MOD,
   266  		ssa.OpARM64UMOD,
   267  		ssa.OpARM64MODW,
   268  		ssa.OpARM64UMODW,
   269  		ssa.OpARM64SLL,
   270  		ssa.OpARM64SRL,
   271  		ssa.OpARM64SRA,
   272  		ssa.OpARM64FADDS,
   273  		ssa.OpARM64FADDD,
   274  		ssa.OpARM64FSUBS,
   275  		ssa.OpARM64FSUBD,
   276  		ssa.OpARM64FMULS,
   277  		ssa.OpARM64FMULD,
   278  		ssa.OpARM64FNMULS,
   279  		ssa.OpARM64FNMULD,
   280  		ssa.OpARM64FDIVS,
   281  		ssa.OpARM64FDIVD,
   282  		ssa.OpARM64FMINS,
   283  		ssa.OpARM64FMIND,
   284  		ssa.OpARM64FMAXS,
   285  		ssa.OpARM64FMAXD,
   286  		ssa.OpARM64ROR,
   287  		ssa.OpARM64RORW:
   288  		r := v.Reg()
   289  		r1 := v.Args[0].Reg()
   290  		r2 := v.Args[1].Reg()
   291  		p := s.Prog(v.Op.Asm())
   292  		p.From.Type = obj.TYPE_REG
   293  		p.From.Reg = r2
   294  		p.Reg = r1
   295  		p.To.Type = obj.TYPE_REG
   296  		p.To.Reg = r
   297  	case ssa.OpARM64FMADDS,
   298  		ssa.OpARM64FMADDD,
   299  		ssa.OpARM64FNMADDS,
   300  		ssa.OpARM64FNMADDD,
   301  		ssa.OpARM64FMSUBS,
   302  		ssa.OpARM64FMSUBD,
   303  		ssa.OpARM64FNMSUBS,
   304  		ssa.OpARM64FNMSUBD,
   305  		ssa.OpARM64MADD,
   306  		ssa.OpARM64MADDW,
   307  		ssa.OpARM64MSUB,
   308  		ssa.OpARM64MSUBW:
   309  		rt := v.Reg()
   310  		ra := v.Args[0].Reg()
   311  		rm := v.Args[1].Reg()
   312  		rn := v.Args[2].Reg()
   313  		p := s.Prog(v.Op.Asm())
   314  		p.Reg = ra
   315  		p.From.Type = obj.TYPE_REG
   316  		p.From.Reg = rm
   317  		p.AddRestSourceReg(rn)
   318  		p.To.Type = obj.TYPE_REG
   319  		p.To.Reg = rt
   320  	case ssa.OpARM64ADDconst,
   321  		ssa.OpARM64SUBconst,
   322  		ssa.OpARM64ANDconst,
   323  		ssa.OpARM64ORconst,
   324  		ssa.OpARM64XORconst,
   325  		ssa.OpARM64SLLconst,
   326  		ssa.OpARM64SRLconst,
   327  		ssa.OpARM64SRAconst,
   328  		ssa.OpARM64RORconst,
   329  		ssa.OpARM64RORWconst:
   330  		p := s.Prog(v.Op.Asm())
   331  		p.From.Type = obj.TYPE_CONST
   332  		p.From.Offset = v.AuxInt
   333  		p.Reg = v.Args[0].Reg()
   334  		p.To.Type = obj.TYPE_REG
   335  		p.To.Reg = v.Reg()
   336  	case ssa.OpARM64ADDSconstflags:
   337  		p := s.Prog(v.Op.Asm())
   338  		p.From.Type = obj.TYPE_CONST
   339  		p.From.Offset = v.AuxInt
   340  		p.Reg = v.Args[0].Reg()
   341  		p.To.Type = obj.TYPE_REG
   342  		p.To.Reg = v.Reg0()
   343  	case ssa.OpARM64ADCzerocarry:
   344  		p := s.Prog(v.Op.Asm())
   345  		p.From.Type = obj.TYPE_REG
   346  		p.From.Reg = arm64.REGZERO
   347  		p.Reg = arm64.REGZERO
   348  		p.To.Type = obj.TYPE_REG
   349  		p.To.Reg = v.Reg()
   350  	case ssa.OpARM64ADCSflags,
   351  		ssa.OpARM64ADDSflags,
   352  		ssa.OpARM64SBCSflags,
   353  		ssa.OpARM64SUBSflags:
   354  		r := v.Reg0()
   355  		r1 := v.Args[0].Reg()
   356  		r2 := v.Args[1].Reg()
   357  		p := s.Prog(v.Op.Asm())
   358  		p.From.Type = obj.TYPE_REG
   359  		p.From.Reg = r2
   360  		p.Reg = r1
   361  		p.To.Type = obj.TYPE_REG
   362  		p.To.Reg = r
   363  	case ssa.OpARM64NEGSflags:
   364  		p := s.Prog(v.Op.Asm())
   365  		p.From.Type = obj.TYPE_REG
   366  		p.From.Reg = v.Args[0].Reg()
   367  		p.To.Type = obj.TYPE_REG
   368  		p.To.Reg = v.Reg0()
   369  	case ssa.OpARM64NGCzerocarry:
   370  		p := s.Prog(v.Op.Asm())
   371  		p.From.Type = obj.TYPE_REG
   372  		p.From.Reg = arm64.REGZERO
   373  		p.To.Type = obj.TYPE_REG
   374  		p.To.Reg = v.Reg()
   375  	case ssa.OpARM64EXTRconst,
   376  		ssa.OpARM64EXTRWconst:
   377  		p := s.Prog(v.Op.Asm())
   378  		p.From.Type = obj.TYPE_CONST
   379  		p.From.Offset = v.AuxInt
   380  		p.AddRestSourceReg(v.Args[0].Reg())
   381  		p.Reg = v.Args[1].Reg()
   382  		p.To.Type = obj.TYPE_REG
   383  		p.To.Reg = v.Reg()
   384  	case ssa.OpARM64MVNshiftLL, ssa.OpARM64NEGshiftLL:
   385  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_LL, v.AuxInt)
   386  	case ssa.OpARM64MVNshiftRL, ssa.OpARM64NEGshiftRL:
   387  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_LR, v.AuxInt)
   388  	case ssa.OpARM64MVNshiftRA, ssa.OpARM64NEGshiftRA:
   389  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_AR, v.AuxInt)
   390  	case ssa.OpARM64MVNshiftRO:
   391  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_ROR, v.AuxInt)
   392  	case ssa.OpARM64ADDshiftLL,
   393  		ssa.OpARM64SUBshiftLL,
   394  		ssa.OpARM64ANDshiftLL,
   395  		ssa.OpARM64ORshiftLL,
   396  		ssa.OpARM64XORshiftLL,
   397  		ssa.OpARM64EONshiftLL,
   398  		ssa.OpARM64ORNshiftLL,
   399  		ssa.OpARM64BICshiftLL:
   400  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_LL, v.AuxInt)
   401  	case ssa.OpARM64ADDshiftRL,
   402  		ssa.OpARM64SUBshiftRL,
   403  		ssa.OpARM64ANDshiftRL,
   404  		ssa.OpARM64ORshiftRL,
   405  		ssa.OpARM64XORshiftRL,
   406  		ssa.OpARM64EONshiftRL,
   407  		ssa.OpARM64ORNshiftRL,
   408  		ssa.OpARM64BICshiftRL:
   409  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_LR, v.AuxInt)
   410  	case ssa.OpARM64ADDshiftRA,
   411  		ssa.OpARM64SUBshiftRA,
   412  		ssa.OpARM64ANDshiftRA,
   413  		ssa.OpARM64ORshiftRA,
   414  		ssa.OpARM64XORshiftRA,
   415  		ssa.OpARM64EONshiftRA,
   416  		ssa.OpARM64ORNshiftRA,
   417  		ssa.OpARM64BICshiftRA:
   418  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_AR, v.AuxInt)
   419  	case ssa.OpARM64ANDshiftRO,
   420  		ssa.OpARM64ORshiftRO,
   421  		ssa.OpARM64XORshiftRO,
   422  		ssa.OpARM64EONshiftRO,
   423  		ssa.OpARM64ORNshiftRO,
   424  		ssa.OpARM64BICshiftRO:
   425  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_ROR, v.AuxInt)
   426  	case ssa.OpARM64MOVDconst:
   427  		p := s.Prog(v.Op.Asm())
   428  		p.From.Type = obj.TYPE_CONST
   429  		p.From.Offset = v.AuxInt
   430  		p.To.Type = obj.TYPE_REG
   431  		p.To.Reg = v.Reg()
   432  	case ssa.OpARM64FMOVSconst,
   433  		ssa.OpARM64FMOVDconst:
   434  		p := s.Prog(v.Op.Asm())
   435  		p.From.Type = obj.TYPE_FCONST
   436  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   437  		p.To.Type = obj.TYPE_REG
   438  		p.To.Reg = v.Reg()
   439  	case ssa.OpARM64FCMPS0,
   440  		ssa.OpARM64FCMPD0:
   441  		p := s.Prog(v.Op.Asm())
   442  		p.From.Type = obj.TYPE_FCONST
   443  		p.From.Val = math.Float64frombits(0)
   444  		p.Reg = v.Args[0].Reg()
   445  	case ssa.OpARM64CMP,
   446  		ssa.OpARM64CMPW,
   447  		ssa.OpARM64CMN,
   448  		ssa.OpARM64CMNW,
   449  		ssa.OpARM64TST,
   450  		ssa.OpARM64TSTW,
   451  		ssa.OpARM64FCMPS,
   452  		ssa.OpARM64FCMPD:
   453  		p := s.Prog(v.Op.Asm())
   454  		p.From.Type = obj.TYPE_REG
   455  		p.From.Reg = v.Args[1].Reg()
   456  		p.Reg = v.Args[0].Reg()
   457  	case ssa.OpARM64CMPconst,
   458  		ssa.OpARM64CMPWconst,
   459  		ssa.OpARM64CMNconst,
   460  		ssa.OpARM64CMNWconst,
   461  		ssa.OpARM64TSTconst,
   462  		ssa.OpARM64TSTWconst:
   463  		p := s.Prog(v.Op.Asm())
   464  		p.From.Type = obj.TYPE_CONST
   465  		p.From.Offset = v.AuxInt
   466  		p.Reg = v.Args[0].Reg()
   467  	case ssa.OpARM64CMPshiftLL, ssa.OpARM64CMNshiftLL, ssa.OpARM64TSTshiftLL:
   468  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_LL, v.AuxInt)
   469  	case ssa.OpARM64CMPshiftRL, ssa.OpARM64CMNshiftRL, ssa.OpARM64TSTshiftRL:
   470  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_LR, v.AuxInt)
   471  	case ssa.OpARM64CMPshiftRA, ssa.OpARM64CMNshiftRA, ssa.OpARM64TSTshiftRA:
   472  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_AR, v.AuxInt)
   473  	case ssa.OpARM64TSTshiftRO:
   474  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_ROR, v.AuxInt)
   475  	case ssa.OpARM64MOVDaddr:
   476  		p := s.Prog(arm64.AMOVD)
   477  		p.From.Type = obj.TYPE_ADDR
   478  		p.From.Reg = v.Args[0].Reg()
   479  		p.To.Type = obj.TYPE_REG
   480  		p.To.Reg = v.Reg()
   481  
   482  		var wantreg string
   483  		// MOVD $sym+off(base), R
   484  		// the assembler expands it as the following:
   485  		// - base is SP: add constant offset to SP (R13)
   486  		//               when constant is large, tmp register (R11) may be used
   487  		// - base is SB: load external address from constant pool (use relocation)
   488  		switch v.Aux.(type) {
   489  		default:
   490  			v.Fatalf("aux is of unknown type %T", v.Aux)
   491  		case *obj.LSym:
   492  			wantreg = "SB"
   493  			ssagen.AddAux(&p.From, v)
   494  		case *ir.Name:
   495  			wantreg = "SP"
   496  			ssagen.AddAux(&p.From, v)
   497  		case nil:
   498  			// No sym, just MOVD $off(SP), R
   499  			wantreg = "SP"
   500  			p.From.Offset = v.AuxInt
   501  		}
   502  		if reg := v.Args[0].RegName(); reg != wantreg {
   503  			v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg)
   504  		}
   505  	case ssa.OpARM64MOVBload,
   506  		ssa.OpARM64MOVBUload,
   507  		ssa.OpARM64MOVHload,
   508  		ssa.OpARM64MOVHUload,
   509  		ssa.OpARM64MOVWload,
   510  		ssa.OpARM64MOVWUload,
   511  		ssa.OpARM64MOVDload,
   512  		ssa.OpARM64FMOVSload,
   513  		ssa.OpARM64FMOVDload:
   514  		p := s.Prog(v.Op.Asm())
   515  		p.From.Type = obj.TYPE_MEM
   516  		p.From.Reg = v.Args[0].Reg()
   517  		ssagen.AddAux(&p.From, v)
   518  		p.To.Type = obj.TYPE_REG
   519  		p.To.Reg = v.Reg()
   520  	case ssa.OpARM64LDP, ssa.OpARM64LDPW, ssa.OpARM64LDPSW, ssa.OpARM64FLDPD, ssa.OpARM64FLDPS:
   521  		p := s.Prog(v.Op.Asm())
   522  		p.From.Type = obj.TYPE_MEM
   523  		p.From.Reg = v.Args[0].Reg()
   524  		ssagen.AddAux(&p.From, v)
   525  		p.To.Type = obj.TYPE_REGREG
   526  		p.To.Reg = v.Reg0()
   527  		p.To.Offset = int64(v.Reg1())
   528  	case ssa.OpARM64MOVBloadidx,
   529  		ssa.OpARM64MOVBUloadidx,
   530  		ssa.OpARM64MOVHloadidx,
   531  		ssa.OpARM64MOVHUloadidx,
   532  		ssa.OpARM64MOVWloadidx,
   533  		ssa.OpARM64MOVWUloadidx,
   534  		ssa.OpARM64MOVDloadidx,
   535  		ssa.OpARM64FMOVSloadidx,
   536  		ssa.OpARM64FMOVDloadidx,
   537  		ssa.OpARM64MOVHloadidx2,
   538  		ssa.OpARM64MOVHUloadidx2,
   539  		ssa.OpARM64MOVWloadidx4,
   540  		ssa.OpARM64MOVWUloadidx4,
   541  		ssa.OpARM64MOVDloadidx8,
   542  		ssa.OpARM64FMOVDloadidx8,
   543  		ssa.OpARM64FMOVSloadidx4:
   544  		p := s.Prog(v.Op.Asm())
   545  		p.From = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
   546  		p.To.Type = obj.TYPE_REG
   547  		p.To.Reg = v.Reg()
   548  	case ssa.OpARM64LDAR,
   549  		ssa.OpARM64LDARB,
   550  		ssa.OpARM64LDARW:
   551  		p := s.Prog(v.Op.Asm())
   552  		p.From.Type = obj.TYPE_MEM
   553  		p.From.Reg = v.Args[0].Reg()
   554  		ssagen.AddAux(&p.From, v)
   555  		p.To.Type = obj.TYPE_REG
   556  		p.To.Reg = v.Reg0()
   557  	case ssa.OpARM64MOVBstore,
   558  		ssa.OpARM64MOVHstore,
   559  		ssa.OpARM64MOVWstore,
   560  		ssa.OpARM64MOVDstore,
   561  		ssa.OpARM64FMOVSstore,
   562  		ssa.OpARM64FMOVDstore,
   563  		ssa.OpARM64STLRB,
   564  		ssa.OpARM64STLR,
   565  		ssa.OpARM64STLRW:
   566  		p := s.Prog(v.Op.Asm())
   567  		p.From.Type = obj.TYPE_REG
   568  		p.From.Reg = v.Args[1].Reg()
   569  		p.To.Type = obj.TYPE_MEM
   570  		p.To.Reg = v.Args[0].Reg()
   571  		ssagen.AddAux(&p.To, v)
   572  	case ssa.OpARM64MOVBstoreidx,
   573  		ssa.OpARM64MOVHstoreidx,
   574  		ssa.OpARM64MOVWstoreidx,
   575  		ssa.OpARM64MOVDstoreidx,
   576  		ssa.OpARM64FMOVSstoreidx,
   577  		ssa.OpARM64FMOVDstoreidx,
   578  		ssa.OpARM64MOVHstoreidx2,
   579  		ssa.OpARM64MOVWstoreidx4,
   580  		ssa.OpARM64FMOVSstoreidx4,
   581  		ssa.OpARM64MOVDstoreidx8,
   582  		ssa.OpARM64FMOVDstoreidx8:
   583  		p := s.Prog(v.Op.Asm())
   584  		p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
   585  		p.From.Type = obj.TYPE_REG
   586  		p.From.Reg = v.Args[2].Reg()
   587  	case ssa.OpARM64STP, ssa.OpARM64STPW, ssa.OpARM64FSTPD, ssa.OpARM64FSTPS:
   588  		p := s.Prog(v.Op.Asm())
   589  		p.From.Type = obj.TYPE_REGREG
   590  		p.From.Reg = v.Args[1].Reg()
   591  		p.From.Offset = int64(v.Args[2].Reg())
   592  		p.To.Type = obj.TYPE_MEM
   593  		p.To.Reg = v.Args[0].Reg()
   594  		ssagen.AddAux(&p.To, v)
   595  	case ssa.OpARM64BFI,
   596  		ssa.OpARM64BFXIL:
   597  		p := s.Prog(v.Op.Asm())
   598  		p.From.Type = obj.TYPE_CONST
   599  		p.From.Offset = v.AuxInt >> 8
   600  		p.AddRestSourceConst(v.AuxInt & 0xff)
   601  		p.Reg = v.Args[1].Reg()
   602  		p.To.Type = obj.TYPE_REG
   603  		p.To.Reg = v.Reg()
   604  	case ssa.OpARM64SBFIZ,
   605  		ssa.OpARM64SBFX,
   606  		ssa.OpARM64UBFIZ,
   607  		ssa.OpARM64UBFX:
   608  		p := s.Prog(v.Op.Asm())
   609  		p.From.Type = obj.TYPE_CONST
   610  		p.From.Offset = v.AuxInt >> 8
   611  		p.AddRestSourceConst(v.AuxInt & 0xff)
   612  		p.Reg = v.Args[0].Reg()
   613  		p.To.Type = obj.TYPE_REG
   614  		p.To.Reg = v.Reg()
   615  	case ssa.OpARM64LoweredAtomicExchange64,
   616  		ssa.OpARM64LoweredAtomicExchange32,
   617  		ssa.OpARM64LoweredAtomicExchange8:
   618  		// LDAXR	(Rarg0), Rout
   619  		// STLXR	Rarg1, (Rarg0), Rtmp
   620  		// CBNZ		Rtmp, -2(PC)
   621  		var ld, st obj.As
   622  		switch v.Op {
   623  		case ssa.OpARM64LoweredAtomicExchange8:
   624  			ld = arm64.ALDAXRB
   625  			st = arm64.ASTLXRB
   626  		case ssa.OpARM64LoweredAtomicExchange32:
   627  			ld = arm64.ALDAXRW
   628  			st = arm64.ASTLXRW
   629  		case ssa.OpARM64LoweredAtomicExchange64:
   630  			ld = arm64.ALDAXR
   631  			st = arm64.ASTLXR
   632  		}
   633  		r0 := v.Args[0].Reg()
   634  		r1 := v.Args[1].Reg()
   635  		out := v.Reg0()
   636  		p := s.Prog(ld)
   637  		p.From.Type = obj.TYPE_MEM
   638  		p.From.Reg = r0
   639  		p.To.Type = obj.TYPE_REG
   640  		p.To.Reg = out
   641  		p1 := s.Prog(st)
   642  		p1.From.Type = obj.TYPE_REG
   643  		p1.From.Reg = r1
   644  		p1.To.Type = obj.TYPE_MEM
   645  		p1.To.Reg = r0
   646  		p1.RegTo2 = arm64.REGTMP
   647  		p2 := s.Prog(arm64.ACBNZ)
   648  		p2.From.Type = obj.TYPE_REG
   649  		p2.From.Reg = arm64.REGTMP
   650  		p2.To.Type = obj.TYPE_BRANCH
   651  		p2.To.SetTarget(p)
   652  	case ssa.OpARM64LoweredAtomicExchange64Variant,
   653  		ssa.OpARM64LoweredAtomicExchange32Variant,
   654  		ssa.OpARM64LoweredAtomicExchange8Variant:
   655  		var swap obj.As
   656  		switch v.Op {
   657  		case ssa.OpARM64LoweredAtomicExchange8Variant:
   658  			swap = arm64.ASWPALB
   659  		case ssa.OpARM64LoweredAtomicExchange32Variant:
   660  			swap = arm64.ASWPALW
   661  		case ssa.OpARM64LoweredAtomicExchange64Variant:
   662  			swap = arm64.ASWPALD
   663  		}
   664  		r0 := v.Args[0].Reg()
   665  		r1 := v.Args[1].Reg()
   666  		out := v.Reg0()
   667  
   668  		// SWPALD	Rarg1, (Rarg0), Rout
   669  		p := s.Prog(swap)
   670  		p.From.Type = obj.TYPE_REG
   671  		p.From.Reg = r1
   672  		p.To.Type = obj.TYPE_MEM
   673  		p.To.Reg = r0
   674  		p.RegTo2 = out
   675  
   676  	case ssa.OpARM64LoweredAtomicAdd64,
   677  		ssa.OpARM64LoweredAtomicAdd32:
   678  		// LDAXR	(Rarg0), Rout
   679  		// ADD		Rarg1, Rout
   680  		// STLXR	Rout, (Rarg0), Rtmp
   681  		// CBNZ		Rtmp, -3(PC)
   682  		ld := arm64.ALDAXR
   683  		st := arm64.ASTLXR
   684  		if v.Op == ssa.OpARM64LoweredAtomicAdd32 {
   685  			ld = arm64.ALDAXRW
   686  			st = arm64.ASTLXRW
   687  		}
   688  		r0 := v.Args[0].Reg()
   689  		r1 := v.Args[1].Reg()
   690  		out := v.Reg0()
   691  		p := s.Prog(ld)
   692  		p.From.Type = obj.TYPE_MEM
   693  		p.From.Reg = r0
   694  		p.To.Type = obj.TYPE_REG
   695  		p.To.Reg = out
   696  		p1 := s.Prog(arm64.AADD)
   697  		p1.From.Type = obj.TYPE_REG
   698  		p1.From.Reg = r1
   699  		p1.To.Type = obj.TYPE_REG
   700  		p1.To.Reg = out
   701  		p2 := s.Prog(st)
   702  		p2.From.Type = obj.TYPE_REG
   703  		p2.From.Reg = out
   704  		p2.To.Type = obj.TYPE_MEM
   705  		p2.To.Reg = r0
   706  		p2.RegTo2 = arm64.REGTMP
   707  		p3 := s.Prog(arm64.ACBNZ)
   708  		p3.From.Type = obj.TYPE_REG
   709  		p3.From.Reg = arm64.REGTMP
   710  		p3.To.Type = obj.TYPE_BRANCH
   711  		p3.To.SetTarget(p)
   712  	case ssa.OpARM64LoweredAtomicAdd64Variant,
   713  		ssa.OpARM64LoweredAtomicAdd32Variant:
   714  		// LDADDAL	Rarg1, (Rarg0), Rout
   715  		// ADD		Rarg1, Rout
   716  		op := arm64.ALDADDALD
   717  		if v.Op == ssa.OpARM64LoweredAtomicAdd32Variant {
   718  			op = arm64.ALDADDALW
   719  		}
   720  		r0 := v.Args[0].Reg()
   721  		r1 := v.Args[1].Reg()
   722  		out := v.Reg0()
   723  		p := s.Prog(op)
   724  		p.From.Type = obj.TYPE_REG
   725  		p.From.Reg = r1
   726  		p.To.Type = obj.TYPE_MEM
   727  		p.To.Reg = r0
   728  		p.RegTo2 = out
   729  		p1 := s.Prog(arm64.AADD)
   730  		p1.From.Type = obj.TYPE_REG
   731  		p1.From.Reg = r1
   732  		p1.To.Type = obj.TYPE_REG
   733  		p1.To.Reg = out
   734  	case ssa.OpARM64LoweredAtomicCas64,
   735  		ssa.OpARM64LoweredAtomicCas32:
   736  		// LDAXR	(Rarg0), Rtmp
   737  		// CMP		Rarg1, Rtmp
   738  		// BNE		3(PC)
   739  		// STLXR	Rarg2, (Rarg0), Rtmp
   740  		// CBNZ		Rtmp, -4(PC)
   741  		// CSET		EQ, Rout
   742  		ld := arm64.ALDAXR
   743  		st := arm64.ASTLXR
   744  		cmp := arm64.ACMP
   745  		if v.Op == ssa.OpARM64LoweredAtomicCas32 {
   746  			ld = arm64.ALDAXRW
   747  			st = arm64.ASTLXRW
   748  			cmp = arm64.ACMPW
   749  		}
   750  		r0 := v.Args[0].Reg()
   751  		r1 := v.Args[1].Reg()
   752  		r2 := v.Args[2].Reg()
   753  		out := v.Reg0()
   754  		p := s.Prog(ld)
   755  		p.From.Type = obj.TYPE_MEM
   756  		p.From.Reg = r0
   757  		p.To.Type = obj.TYPE_REG
   758  		p.To.Reg = arm64.REGTMP
   759  		p1 := s.Prog(cmp)
   760  		p1.From.Type = obj.TYPE_REG
   761  		p1.From.Reg = r1
   762  		p1.Reg = arm64.REGTMP
   763  		p2 := s.Prog(arm64.ABNE)
   764  		p2.To.Type = obj.TYPE_BRANCH
   765  		p3 := s.Prog(st)
   766  		p3.From.Type = obj.TYPE_REG
   767  		p3.From.Reg = r2
   768  		p3.To.Type = obj.TYPE_MEM
   769  		p3.To.Reg = r0
   770  		p3.RegTo2 = arm64.REGTMP
   771  		p4 := s.Prog(arm64.ACBNZ)
   772  		p4.From.Type = obj.TYPE_REG
   773  		p4.From.Reg = arm64.REGTMP
   774  		p4.To.Type = obj.TYPE_BRANCH
   775  		p4.To.SetTarget(p)
   776  		p5 := s.Prog(arm64.ACSET)
   777  		p5.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
   778  		p5.From.Offset = int64(arm64.SPOP_EQ)
   779  		p5.To.Type = obj.TYPE_REG
   780  		p5.To.Reg = out
   781  		p2.To.SetTarget(p5)
   782  	case ssa.OpARM64LoweredAtomicCas64Variant,
   783  		ssa.OpARM64LoweredAtomicCas32Variant:
   784  		// Rarg0: ptr
   785  		// Rarg1: old
   786  		// Rarg2: new
   787  		// MOV  	Rarg1, Rtmp
   788  		// CASAL	Rtmp, (Rarg0), Rarg2
   789  		// CMP  	Rarg1, Rtmp
   790  		// CSET 	EQ, Rout
   791  		cas := arm64.ACASALD
   792  		cmp := arm64.ACMP
   793  		mov := arm64.AMOVD
   794  		if v.Op == ssa.OpARM64LoweredAtomicCas32Variant {
   795  			cas = arm64.ACASALW
   796  			cmp = arm64.ACMPW
   797  			mov = arm64.AMOVW
   798  		}
   799  		r0 := v.Args[0].Reg()
   800  		r1 := v.Args[1].Reg()
   801  		r2 := v.Args[2].Reg()
   802  		out := v.Reg0()
   803  
   804  		// MOV  	Rarg1, Rtmp
   805  		p := s.Prog(mov)
   806  		p.From.Type = obj.TYPE_REG
   807  		p.From.Reg = r1
   808  		p.To.Type = obj.TYPE_REG
   809  		p.To.Reg = arm64.REGTMP
   810  
   811  		// CASAL	Rtmp, (Rarg0), Rarg2
   812  		p1 := s.Prog(cas)
   813  		p1.From.Type = obj.TYPE_REG
   814  		p1.From.Reg = arm64.REGTMP
   815  		p1.To.Type = obj.TYPE_MEM
   816  		p1.To.Reg = r0
   817  		p1.RegTo2 = r2
   818  
   819  		// CMP  	Rarg1, Rtmp
   820  		p2 := s.Prog(cmp)
   821  		p2.From.Type = obj.TYPE_REG
   822  		p2.From.Reg = r1
   823  		p2.Reg = arm64.REGTMP
   824  
   825  		// CSET 	EQ, Rout
   826  		p3 := s.Prog(arm64.ACSET)
   827  		p3.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
   828  		p3.From.Offset = int64(arm64.SPOP_EQ)
   829  		p3.To.Type = obj.TYPE_REG
   830  		p3.To.Reg = out
   831  
   832  	case ssa.OpARM64LoweredAtomicAnd64,
   833  		ssa.OpARM64LoweredAtomicOr64,
   834  		ssa.OpARM64LoweredAtomicAnd32,
   835  		ssa.OpARM64LoweredAtomicOr32,
   836  		ssa.OpARM64LoweredAtomicAnd8,
   837  		ssa.OpARM64LoweredAtomicOr8:
   838  		// LDAXR[BW] (Rarg0), Rout
   839  		// AND/OR	Rarg1, Rout, tmp1
   840  		// STLXR[BW] tmp1, (Rarg0), Rtmp
   841  		// CBNZ		Rtmp, -3(PC)
   842  		ld := arm64.ALDAXR
   843  		st := arm64.ASTLXR
   844  		if v.Op == ssa.OpARM64LoweredAtomicAnd32 || v.Op == ssa.OpARM64LoweredAtomicOr32 {
   845  			ld = arm64.ALDAXRW
   846  			st = arm64.ASTLXRW
   847  		}
   848  		if v.Op == ssa.OpARM64LoweredAtomicAnd8 || v.Op == ssa.OpARM64LoweredAtomicOr8 {
   849  			ld = arm64.ALDAXRB
   850  			st = arm64.ASTLXRB
   851  		}
   852  		r0 := v.Args[0].Reg()
   853  		r1 := v.Args[1].Reg()
   854  		out := v.Reg0()
   855  		tmp := v.RegTmp()
   856  		p := s.Prog(ld)
   857  		p.From.Type = obj.TYPE_MEM
   858  		p.From.Reg = r0
   859  		p.To.Type = obj.TYPE_REG
   860  		p.To.Reg = out
   861  		p1 := s.Prog(v.Op.Asm())
   862  		p1.From.Type = obj.TYPE_REG
   863  		p1.From.Reg = r1
   864  		p1.Reg = out
   865  		p1.To.Type = obj.TYPE_REG
   866  		p1.To.Reg = tmp
   867  		p2 := s.Prog(st)
   868  		p2.From.Type = obj.TYPE_REG
   869  		p2.From.Reg = tmp
   870  		p2.To.Type = obj.TYPE_MEM
   871  		p2.To.Reg = r0
   872  		p2.RegTo2 = arm64.REGTMP
   873  		p3 := s.Prog(arm64.ACBNZ)
   874  		p3.From.Type = obj.TYPE_REG
   875  		p3.From.Reg = arm64.REGTMP
   876  		p3.To.Type = obj.TYPE_BRANCH
   877  		p3.To.SetTarget(p)
   878  
   879  	case ssa.OpARM64LoweredAtomicAnd8Variant,
   880  		ssa.OpARM64LoweredAtomicAnd32Variant,
   881  		ssa.OpARM64LoweredAtomicAnd64Variant:
   882  		atomic_clear := arm64.ALDCLRALD
   883  		if v.Op == ssa.OpARM64LoweredAtomicAnd32Variant {
   884  			atomic_clear = arm64.ALDCLRALW
   885  		}
   886  		if v.Op == ssa.OpARM64LoweredAtomicAnd8Variant {
   887  			atomic_clear = arm64.ALDCLRALB
   888  		}
   889  		r0 := v.Args[0].Reg()
   890  		r1 := v.Args[1].Reg()
   891  		out := v.Reg0()
   892  
   893  		// MNV       Rarg1 Rtemp
   894  		p := s.Prog(arm64.AMVN)
   895  		p.From.Type = obj.TYPE_REG
   896  		p.From.Reg = r1
   897  		p.To.Type = obj.TYPE_REG
   898  		p.To.Reg = arm64.REGTMP
   899  
   900  		// LDCLRAL[BDW]  Rtemp, (Rarg0), Rout
   901  		p1 := s.Prog(atomic_clear)
   902  		p1.From.Type = obj.TYPE_REG
   903  		p1.From.Reg = arm64.REGTMP
   904  		p1.To.Type = obj.TYPE_MEM
   905  		p1.To.Reg = r0
   906  		p1.RegTo2 = out
   907  
   908  	case ssa.OpARM64LoweredAtomicOr8Variant,
   909  		ssa.OpARM64LoweredAtomicOr32Variant,
   910  		ssa.OpARM64LoweredAtomicOr64Variant:
   911  		atomic_or := arm64.ALDORALD
   912  		if v.Op == ssa.OpARM64LoweredAtomicOr32Variant {
   913  			atomic_or = arm64.ALDORALW
   914  		}
   915  		if v.Op == ssa.OpARM64LoweredAtomicOr8Variant {
   916  			atomic_or = arm64.ALDORALB
   917  		}
   918  		r0 := v.Args[0].Reg()
   919  		r1 := v.Args[1].Reg()
   920  		out := v.Reg0()
   921  
   922  		// LDORAL[BDW]  Rarg1, (Rarg0), Rout
   923  		p := s.Prog(atomic_or)
   924  		p.From.Type = obj.TYPE_REG
   925  		p.From.Reg = r1
   926  		p.To.Type = obj.TYPE_MEM
   927  		p.To.Reg = r0
   928  		p.RegTo2 = out
   929  
   930  	case ssa.OpARM64MOVBreg,
   931  		ssa.OpARM64MOVBUreg,
   932  		ssa.OpARM64MOVHreg,
   933  		ssa.OpARM64MOVHUreg,
   934  		ssa.OpARM64MOVWreg,
   935  		ssa.OpARM64MOVWUreg:
   936  		a := v.Args[0]
   937  		for a.Op == ssa.OpCopy || a.Op == ssa.OpARM64MOVDreg {
   938  			a = a.Args[0]
   939  		}
   940  		if a.Op == ssa.OpLoadReg {
   941  			t := a.Type
   942  			switch {
   943  			case v.Op == ssa.OpARM64MOVBreg && t.Size() == 1 && t.IsSigned(),
   944  				v.Op == ssa.OpARM64MOVBUreg && t.Size() == 1 && !t.IsSigned(),
   945  				v.Op == ssa.OpARM64MOVHreg && t.Size() == 2 && t.IsSigned(),
   946  				v.Op == ssa.OpARM64MOVHUreg && t.Size() == 2 && !t.IsSigned(),
   947  				v.Op == ssa.OpARM64MOVWreg && t.Size() == 4 && t.IsSigned(),
   948  				v.Op == ssa.OpARM64MOVWUreg && t.Size() == 4 && !t.IsSigned():
   949  				// arg is a proper-typed load, already zero/sign-extended, don't extend again
   950  				if v.Reg() == v.Args[0].Reg() {
   951  					return
   952  				}
   953  				p := s.Prog(arm64.AMOVD)
   954  				p.From.Type = obj.TYPE_REG
   955  				p.From.Reg = v.Args[0].Reg()
   956  				p.To.Type = obj.TYPE_REG
   957  				p.To.Reg = v.Reg()
   958  				return
   959  			default:
   960  			}
   961  		}
   962  		fallthrough
   963  	case ssa.OpARM64MVN,
   964  		ssa.OpARM64NEG,
   965  		ssa.OpARM64FABSD,
   966  		ssa.OpARM64FMOVDfpgp,
   967  		ssa.OpARM64FMOVDgpfp,
   968  		ssa.OpARM64FMOVSfpgp,
   969  		ssa.OpARM64FMOVSgpfp,
   970  		ssa.OpARM64FNEGS,
   971  		ssa.OpARM64FNEGD,
   972  		ssa.OpARM64FSQRTS,
   973  		ssa.OpARM64FSQRTD,
   974  		ssa.OpARM64FCVTZSSW,
   975  		ssa.OpARM64FCVTZSDW,
   976  		ssa.OpARM64FCVTZUSW,
   977  		ssa.OpARM64FCVTZUDW,
   978  		ssa.OpARM64FCVTZSS,
   979  		ssa.OpARM64FCVTZSD,
   980  		ssa.OpARM64FCVTZUS,
   981  		ssa.OpARM64FCVTZUD,
   982  		ssa.OpARM64SCVTFWS,
   983  		ssa.OpARM64SCVTFWD,
   984  		ssa.OpARM64SCVTFS,
   985  		ssa.OpARM64SCVTFD,
   986  		ssa.OpARM64UCVTFWS,
   987  		ssa.OpARM64UCVTFWD,
   988  		ssa.OpARM64UCVTFS,
   989  		ssa.OpARM64UCVTFD,
   990  		ssa.OpARM64FCVTSD,
   991  		ssa.OpARM64FCVTDS,
   992  		ssa.OpARM64REV,
   993  		ssa.OpARM64REVW,
   994  		ssa.OpARM64REV16,
   995  		ssa.OpARM64REV16W,
   996  		ssa.OpARM64RBIT,
   997  		ssa.OpARM64RBITW,
   998  		ssa.OpARM64CLZ,
   999  		ssa.OpARM64CLZW,
  1000  		ssa.OpARM64FRINTAD,
  1001  		ssa.OpARM64FRINTMD,
  1002  		ssa.OpARM64FRINTND,
  1003  		ssa.OpARM64FRINTPD,
  1004  		ssa.OpARM64FRINTZD:
  1005  		p := s.Prog(v.Op.Asm())
  1006  		p.From.Type = obj.TYPE_REG
  1007  		p.From.Reg = v.Args[0].Reg()
  1008  		p.To.Type = obj.TYPE_REG
  1009  		p.To.Reg = v.Reg()
  1010  	case ssa.OpARM64LoweredRound32F, ssa.OpARM64LoweredRound64F:
  1011  		// input is already rounded
  1012  	case ssa.OpARM64VCNT:
  1013  		p := s.Prog(v.Op.Asm())
  1014  		p.From.Type = obj.TYPE_REG
  1015  		p.From.Reg = (v.Args[0].Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
  1016  		p.To.Type = obj.TYPE_REG
  1017  		p.To.Reg = (v.Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
  1018  	case ssa.OpARM64VUADDLV:
  1019  		p := s.Prog(v.Op.Asm())
  1020  		p.From.Type = obj.TYPE_REG
  1021  		p.From.Reg = (v.Args[0].Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
  1022  		p.To.Type = obj.TYPE_REG
  1023  		p.To.Reg = v.Reg() - arm64.REG_F0 + arm64.REG_V0
  1024  	case ssa.OpARM64CSEL, ssa.OpARM64CSEL0:
  1025  		r1 := int16(arm64.REGZERO)
  1026  		if v.Op != ssa.OpARM64CSEL0 {
  1027  			r1 = v.Args[1].Reg()
  1028  		}
  1029  		p := s.Prog(v.Op.Asm())
  1030  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1031  		condCode := condBits[ssa.Op(v.AuxInt)]
  1032  		p.From.Offset = int64(condCode)
  1033  		p.Reg = v.Args[0].Reg()
  1034  		p.AddRestSourceReg(r1)
  1035  		p.To.Type = obj.TYPE_REG
  1036  		p.To.Reg = v.Reg()
  1037  	case ssa.OpARM64CSINC, ssa.OpARM64CSINV, ssa.OpARM64CSNEG:
  1038  		p := s.Prog(v.Op.Asm())
  1039  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1040  		condCode := condBits[ssa.Op(v.AuxInt)]
  1041  		p.From.Offset = int64(condCode)
  1042  		p.Reg = v.Args[0].Reg()
  1043  		p.AddRestSourceReg(v.Args[1].Reg())
  1044  		p.To.Type = obj.TYPE_REG
  1045  		p.To.Reg = v.Reg()
  1046  	case ssa.OpARM64CSETM:
  1047  		p := s.Prog(arm64.ACSETM)
  1048  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1049  		condCode := condBits[ssa.Op(v.AuxInt)]
  1050  		p.From.Offset = int64(condCode)
  1051  		p.To.Type = obj.TYPE_REG
  1052  		p.To.Reg = v.Reg()
  1053  	case ssa.OpARM64CCMP,
  1054  		ssa.OpARM64CCMN,
  1055  		ssa.OpARM64CCMPconst,
  1056  		ssa.OpARM64CCMNconst,
  1057  		ssa.OpARM64CCMPW,
  1058  		ssa.OpARM64CCMNW,
  1059  		ssa.OpARM64CCMPWconst,
  1060  		ssa.OpARM64CCMNWconst:
  1061  		p := s.Prog(v.Op.Asm())
  1062  		p.Reg = v.Args[0].Reg()
  1063  		params := v.AuxArm64ConditionalParams()
  1064  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1065  		p.From.Offset = int64(condBits[params.Cond()])
  1066  		constValue, ok := params.ConstValue()
  1067  		if ok {
  1068  			p.AddRestSourceConst(constValue)
  1069  		} else {
  1070  			p.AddRestSourceReg(v.Args[1].Reg())
  1071  		}
  1072  		p.To.Type = obj.TYPE_CONST
  1073  		p.To.Offset = params.Nzcv()
  1074  	case ssa.OpARM64LoweredZero:
  1075  		ptrReg := v.Args[0].Reg()
  1076  		n := v.AuxInt
  1077  		if n < 16 {
  1078  			v.Fatalf("Zero too small %d", n)
  1079  		}
  1080  
  1081  		// Generate zeroing instructions.
  1082  		var off int64
  1083  		for n >= 16 {
  1084  			//  STP     (ZR, ZR), off(ptrReg)
  1085  			zero16(s, ptrReg, off, false)
  1086  			off += 16
  1087  			n -= 16
  1088  		}
  1089  		// Write any fractional portion.
  1090  		// An overlapping 16-byte write can't be used here
  1091  		// because STP's offsets must be a multiple of 8.
  1092  		if n > 8 {
  1093  			//  MOVD    ZR, off(ptrReg)
  1094  			zero8(s, ptrReg, off)
  1095  			off += 8
  1096  			n -= 8
  1097  		}
  1098  		if n != 0 {
  1099  			//  MOVD    ZR, off+n-8(ptrReg)
  1100  			// TODO: for n<=4 we could use a smaller write.
  1101  			zero8(s, ptrReg, off+n-8)
  1102  		}
  1103  	case ssa.OpARM64LoweredZeroLoop:
  1104  		ptrReg := v.Args[0].Reg()
  1105  		countReg := v.RegTmp()
  1106  		n := v.AuxInt
  1107  		loopSize := int64(64)
  1108  		if n < 3*loopSize {
  1109  			// - a loop count of 0 won't work.
  1110  			// - a loop count of 1 is useless.
  1111  			// - a loop count of 2 is a code size ~tie
  1112  			//     3 instructions to implement the loop
  1113  			//     4 instructions in the loop body
  1114  			//   vs
  1115  			//     8 instructions in the straightline code
  1116  			//   Might as well use straightline code.
  1117  			v.Fatalf("ZeroLoop size too small %d", n)
  1118  		}
  1119  
  1120  		// Put iteration count in a register.
  1121  		//   MOVD    $n, countReg
  1122  		p := s.Prog(arm64.AMOVD)
  1123  		p.From.Type = obj.TYPE_CONST
  1124  		p.From.Offset = n / loopSize
  1125  		p.To.Type = obj.TYPE_REG
  1126  		p.To.Reg = countReg
  1127  		cntInit := p
  1128  
  1129  		// Zero loopSize bytes starting at ptrReg.
  1130  		// Increment ptrReg by loopSize as a side effect.
  1131  		for range loopSize / 16 {
  1132  			//  STP.P   (ZR, ZR), 16(ptrReg)
  1133  			zero16(s, ptrReg, 0, true)
  1134  			// TODO: should we use the postincrement form,
  1135  			// or use a separate += 64 instruction?
  1136  			// postincrement saves an instruction, but maybe
  1137  			// it requires more integer units to do the +=16s.
  1138  		}
  1139  		// Decrement loop count.
  1140  		//   SUB     $1, countReg
  1141  		p = s.Prog(arm64.ASUB)
  1142  		p.From.Type = obj.TYPE_CONST
  1143  		p.From.Offset = 1
  1144  		p.To.Type = obj.TYPE_REG
  1145  		p.To.Reg = countReg
  1146  		// Jump to loop header if we're not done yet.
  1147  		//   CBNZ    head
  1148  		p = s.Prog(arm64.ACBNZ)
  1149  		p.From.Type = obj.TYPE_REG
  1150  		p.From.Reg = countReg
  1151  		p.To.Type = obj.TYPE_BRANCH
  1152  		p.To.SetTarget(cntInit.Link)
  1153  
  1154  		// Multiples of the loop size are now done.
  1155  		n %= loopSize
  1156  
  1157  		// Write any fractional portion.
  1158  		var off int64
  1159  		for n >= 16 {
  1160  			//  STP     (ZR, ZR), off(ptrReg)
  1161  			zero16(s, ptrReg, off, false)
  1162  			off += 16
  1163  			n -= 16
  1164  		}
  1165  		if n > 8 {
  1166  			// Note: an overlapping 16-byte write can't be used
  1167  			// here because STP's offsets must be a multiple of 8.
  1168  			//  MOVD    ZR, off(ptrReg)
  1169  			zero8(s, ptrReg, off)
  1170  			off += 8
  1171  			n -= 8
  1172  		}
  1173  		if n != 0 {
  1174  			//  MOVD    ZR, off+n-8(ptrReg)
  1175  			// TODO: for n<=4 we could use a smaller write.
  1176  			zero8(s, ptrReg, off+n-8)
  1177  		}
  1178  		// TODO: maybe we should use the count register to instead
  1179  		// hold an end pointer and compare against that?
  1180  		//   ADD $n, ptrReg, endReg
  1181  		// then
  1182  		//   CMP ptrReg, endReg
  1183  		//   BNE loop
  1184  		// There's a past-the-end pointer here, any problem with that?
  1185  
  1186  	case ssa.OpARM64LoweredMove:
  1187  		dstReg := v.Args[0].Reg()
  1188  		srcReg := v.Args[1].Reg()
  1189  		if dstReg == srcReg {
  1190  			break
  1191  		}
  1192  		tmpReg1 := int16(arm64.REG_R24)
  1193  		tmpReg2 := int16(arm64.REG_R25)
  1194  		n := v.AuxInt
  1195  		if n < 16 {
  1196  			v.Fatalf("Move too small %d", n)
  1197  		}
  1198  
  1199  		// Generate copying instructions.
  1200  		var off int64
  1201  		for n >= 16 {
  1202  			// LDP     off(srcReg), (tmpReg1, tmpReg2)
  1203  			// STP     (tmpReg1, tmpReg2), off(dstReg)
  1204  			move16(s, srcReg, dstReg, tmpReg1, tmpReg2, off, false)
  1205  			off += 16
  1206  			n -= 16
  1207  		}
  1208  		if n > 8 {
  1209  			//  MOVD    off(srcReg), tmpReg1
  1210  			//  MOVD    tmpReg1, off(dstReg)
  1211  			move8(s, srcReg, dstReg, tmpReg1, off)
  1212  			off += 8
  1213  			n -= 8
  1214  		}
  1215  		if n != 0 {
  1216  			//  MOVD    off+n-8(srcReg), tmpReg1
  1217  			//  MOVD    tmpReg1, off+n-8(dstReg)
  1218  			move8(s, srcReg, dstReg, tmpReg1, off+n-8)
  1219  		}
  1220  	case ssa.OpARM64LoweredMoveLoop:
  1221  		dstReg := v.Args[0].Reg()
  1222  		srcReg := v.Args[1].Reg()
  1223  		if dstReg == srcReg {
  1224  			break
  1225  		}
  1226  		countReg := int16(arm64.REG_R23)
  1227  		tmpReg1 := int16(arm64.REG_R24)
  1228  		tmpReg2 := int16(arm64.REG_R25)
  1229  		n := v.AuxInt
  1230  		loopSize := int64(64)
  1231  		if n < 3*loopSize {
  1232  			// - a loop count of 0 won't work.
  1233  			// - a loop count of 1 is useless.
  1234  			// - a loop count of 2 is a code size ~tie
  1235  			//     3 instructions to implement the loop
  1236  			//     4 instructions in the loop body
  1237  			//   vs
  1238  			//     8 instructions in the straightline code
  1239  			//   Might as well use straightline code.
  1240  			v.Fatalf("ZeroLoop size too small %d", n)
  1241  		}
  1242  
  1243  		// Put iteration count in a register.
  1244  		//   MOVD    $n, countReg
  1245  		p := s.Prog(arm64.AMOVD)
  1246  		p.From.Type = obj.TYPE_CONST
  1247  		p.From.Offset = n / loopSize
  1248  		p.To.Type = obj.TYPE_REG
  1249  		p.To.Reg = countReg
  1250  		cntInit := p
  1251  
  1252  		// Move loopSize bytes starting at srcReg to dstReg.
  1253  		// Increment srcReg and destReg by loopSize as a side effect.
  1254  		for range loopSize / 16 {
  1255  			// LDP.P  16(srcReg), (tmpReg1, tmpReg2)
  1256  			// STP.P  (tmpReg1, tmpReg2), 16(dstReg)
  1257  			move16(s, srcReg, dstReg, tmpReg1, tmpReg2, 0, true)
  1258  		}
  1259  		// Decrement loop count.
  1260  		//   SUB     $1, countReg
  1261  		p = s.Prog(arm64.ASUB)
  1262  		p.From.Type = obj.TYPE_CONST
  1263  		p.From.Offset = 1
  1264  		p.To.Type = obj.TYPE_REG
  1265  		p.To.Reg = countReg
  1266  		// Jump to loop header if we're not done yet.
  1267  		//   CBNZ    head
  1268  		p = s.Prog(arm64.ACBNZ)
  1269  		p.From.Type = obj.TYPE_REG
  1270  		p.From.Reg = countReg
  1271  		p.To.Type = obj.TYPE_BRANCH
  1272  		p.To.SetTarget(cntInit.Link)
  1273  
  1274  		// Multiples of the loop size are now done.
  1275  		n %= loopSize
  1276  
  1277  		// Copy any fractional portion.
  1278  		var off int64
  1279  		for n >= 16 {
  1280  			//  LDP     off(srcReg), (tmpReg1, tmpReg2)
  1281  			//  STP     (tmpReg1, tmpReg2), off(dstReg)
  1282  			move16(s, srcReg, dstReg, tmpReg1, tmpReg2, off, false)
  1283  			off += 16
  1284  			n -= 16
  1285  		}
  1286  		if n > 8 {
  1287  			//  MOVD    off(srcReg), tmpReg1
  1288  			//  MOVD    tmpReg1, off(dstReg)
  1289  			move8(s, srcReg, dstReg, tmpReg1, off)
  1290  			off += 8
  1291  			n -= 8
  1292  		}
  1293  		if n != 0 {
  1294  			//  MOVD    off+n-8(srcReg), tmpReg1
  1295  			//  MOVD    tmpReg1, off+n-8(dstReg)
  1296  			move8(s, srcReg, dstReg, tmpReg1, off+n-8)
  1297  		}
  1298  
  1299  	case ssa.OpARM64CALLstatic, ssa.OpARM64CALLclosure, ssa.OpARM64CALLinter:
  1300  		s.Call(v)
  1301  	case ssa.OpARM64CALLtail:
  1302  		s.TailCall(v)
  1303  	case ssa.OpARM64LoweredWB:
  1304  		p := s.Prog(obj.ACALL)
  1305  		p.To.Type = obj.TYPE_MEM
  1306  		p.To.Name = obj.NAME_EXTERN
  1307  		// AuxInt encodes how many buffer entries we need.
  1308  		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
  1309  
  1310  	case ssa.OpARM64LoweredPanicBoundsRR, ssa.OpARM64LoweredPanicBoundsRC, ssa.OpARM64LoweredPanicBoundsCR, ssa.OpARM64LoweredPanicBoundsCC:
  1311  		// Compute the constant we put in the PCData entry for this call.
  1312  		code, signed := ssa.BoundsKind(v.AuxInt).Code()
  1313  		xIsReg := false
  1314  		yIsReg := false
  1315  		xVal := 0
  1316  		yVal := 0
  1317  		switch v.Op {
  1318  		case ssa.OpARM64LoweredPanicBoundsRR:
  1319  			xIsReg = true
  1320  			xVal = int(v.Args[0].Reg() - arm64.REG_R0)
  1321  			yIsReg = true
  1322  			yVal = int(v.Args[1].Reg() - arm64.REG_R0)
  1323  		case ssa.OpARM64LoweredPanicBoundsRC:
  1324  			xIsReg = true
  1325  			xVal = int(v.Args[0].Reg() - arm64.REG_R0)
  1326  			c := v.Aux.(ssa.PanicBoundsC).C
  1327  			if c >= 0 && c <= abi.BoundsMaxConst {
  1328  				yVal = int(c)
  1329  			} else {
  1330  				// Move constant to a register
  1331  				yIsReg = true
  1332  				if yVal == xVal {
  1333  					yVal = 1
  1334  				}
  1335  				p := s.Prog(arm64.AMOVD)
  1336  				p.From.Type = obj.TYPE_CONST
  1337  				p.From.Offset = c
  1338  				p.To.Type = obj.TYPE_REG
  1339  				p.To.Reg = arm64.REG_R0 + int16(yVal)
  1340  			}
  1341  		case ssa.OpARM64LoweredPanicBoundsCR:
  1342  			yIsReg = true
  1343  			yVal = int(v.Args[0].Reg() - arm64.REG_R0)
  1344  			c := v.Aux.(ssa.PanicBoundsC).C
  1345  			if c >= 0 && c <= abi.BoundsMaxConst {
  1346  				xVal = int(c)
  1347  			} else {
  1348  				// Move constant to a register
  1349  				if xVal == yVal {
  1350  					xVal = 1
  1351  				}
  1352  				p := s.Prog(arm64.AMOVD)
  1353  				p.From.Type = obj.TYPE_CONST
  1354  				p.From.Offset = c
  1355  				p.To.Type = obj.TYPE_REG
  1356  				p.To.Reg = arm64.REG_R0 + int16(xVal)
  1357  			}
  1358  		case ssa.OpARM64LoweredPanicBoundsCC:
  1359  			c := v.Aux.(ssa.PanicBoundsCC).Cx
  1360  			if c >= 0 && c <= abi.BoundsMaxConst {
  1361  				xVal = int(c)
  1362  			} else {
  1363  				// Move constant to a register
  1364  				xIsReg = true
  1365  				p := s.Prog(arm64.AMOVD)
  1366  				p.From.Type = obj.TYPE_CONST
  1367  				p.From.Offset = c
  1368  				p.To.Type = obj.TYPE_REG
  1369  				p.To.Reg = arm64.REG_R0 + int16(xVal)
  1370  			}
  1371  			c = v.Aux.(ssa.PanicBoundsCC).Cy
  1372  			if c >= 0 && c <= abi.BoundsMaxConst {
  1373  				yVal = int(c)
  1374  			} else {
  1375  				// Move constant to a register
  1376  				yIsReg = true
  1377  				yVal = 1
  1378  				p := s.Prog(arm64.AMOVD)
  1379  				p.From.Type = obj.TYPE_CONST
  1380  				p.From.Offset = c
  1381  				p.To.Type = obj.TYPE_REG
  1382  				p.To.Reg = arm64.REG_R0 + int16(yVal)
  1383  			}
  1384  		}
  1385  		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
  1386  
  1387  		p := s.Prog(obj.APCDATA)
  1388  		p.From.SetConst(abi.PCDATA_PanicBounds)
  1389  		p.To.SetConst(int64(c))
  1390  		p = s.Prog(obj.ACALL)
  1391  		p.To.Type = obj.TYPE_MEM
  1392  		p.To.Name = obj.NAME_EXTERN
  1393  		p.To.Sym = ir.Syms.PanicBounds
  1394  
  1395  	case ssa.OpARM64LoweredNilCheck:
  1396  		// Issue a load which will fault if arg is nil.
  1397  		p := s.Prog(arm64.AMOVB)
  1398  		p.From.Type = obj.TYPE_MEM
  1399  		p.From.Reg = v.Args[0].Reg()
  1400  		ssagen.AddAux(&p.From, v)
  1401  		p.To.Type = obj.TYPE_REG
  1402  		p.To.Reg = arm64.REGTMP
  1403  		if logopt.Enabled() {
  1404  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1405  		}
  1406  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Line==1 in generated wrappers
  1407  			base.WarnfAt(v.Pos, "generated nil check")
  1408  		}
  1409  	case ssa.OpARM64Equal,
  1410  		ssa.OpARM64NotEqual,
  1411  		ssa.OpARM64LessThan,
  1412  		ssa.OpARM64LessEqual,
  1413  		ssa.OpARM64GreaterThan,
  1414  		ssa.OpARM64GreaterEqual,
  1415  		ssa.OpARM64LessThanU,
  1416  		ssa.OpARM64LessEqualU,
  1417  		ssa.OpARM64GreaterThanU,
  1418  		ssa.OpARM64GreaterEqualU,
  1419  		ssa.OpARM64LessThanF,
  1420  		ssa.OpARM64LessEqualF,
  1421  		ssa.OpARM64GreaterThanF,
  1422  		ssa.OpARM64GreaterEqualF,
  1423  		ssa.OpARM64NotLessThanF,
  1424  		ssa.OpARM64NotLessEqualF,
  1425  		ssa.OpARM64NotGreaterThanF,
  1426  		ssa.OpARM64NotGreaterEqualF,
  1427  		ssa.OpARM64LessThanNoov,
  1428  		ssa.OpARM64GreaterEqualNoov:
  1429  		// generate boolean values using CSET
  1430  		p := s.Prog(arm64.ACSET)
  1431  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1432  		condCode := condBits[v.Op]
  1433  		p.From.Offset = int64(condCode)
  1434  		p.To.Type = obj.TYPE_REG
  1435  		p.To.Reg = v.Reg()
  1436  	case ssa.OpARM64PRFM:
  1437  		p := s.Prog(v.Op.Asm())
  1438  		p.From.Type = obj.TYPE_MEM
  1439  		p.From.Reg = v.Args[0].Reg()
  1440  		p.To.Type = obj.TYPE_CONST
  1441  		p.To.Offset = v.AuxInt
  1442  	case ssa.OpARM64LoweredGetClosurePtr:
  1443  		// Closure pointer is R26 (arm64.REGCTXT).
  1444  		ssagen.CheckLoweredGetClosurePtr(v)
  1445  	case ssa.OpARM64LoweredGetCallerSP:
  1446  		// caller's SP is FixedFrameSize below the address of the first arg
  1447  		p := s.Prog(arm64.AMOVD)
  1448  		p.From.Type = obj.TYPE_ADDR
  1449  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
  1450  		p.From.Name = obj.NAME_PARAM
  1451  		p.To.Type = obj.TYPE_REG
  1452  		p.To.Reg = v.Reg()
  1453  	case ssa.OpARM64LoweredGetCallerPC:
  1454  		p := s.Prog(obj.AGETCALLERPC)
  1455  		p.To.Type = obj.TYPE_REG
  1456  		p.To.Reg = v.Reg()
  1457  	case ssa.OpARM64DMB:
  1458  		p := s.Prog(v.Op.Asm())
  1459  		p.From.Type = obj.TYPE_CONST
  1460  		p.From.Offset = v.AuxInt
  1461  	case ssa.OpARM64FlagConstant:
  1462  		v.Fatalf("FlagConstant op should never make it to codegen %v", v.LongString())
  1463  	case ssa.OpARM64InvertFlags:
  1464  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1465  	case ssa.OpClobber:
  1466  		// MOVW	$0xdeaddead, REGTMP
  1467  		// MOVW	REGTMP, (slot)
  1468  		// MOVW	REGTMP, 4(slot)
  1469  		p := s.Prog(arm64.AMOVW)
  1470  		p.From.Type = obj.TYPE_CONST
  1471  		p.From.Offset = 0xdeaddead
  1472  		p.To.Type = obj.TYPE_REG
  1473  		p.To.Reg = arm64.REGTMP
  1474  		p = s.Prog(arm64.AMOVW)
  1475  		p.From.Type = obj.TYPE_REG
  1476  		p.From.Reg = arm64.REGTMP
  1477  		p.To.Type = obj.TYPE_MEM
  1478  		p.To.Reg = arm64.REGSP
  1479  		ssagen.AddAux(&p.To, v)
  1480  		p = s.Prog(arm64.AMOVW)
  1481  		p.From.Type = obj.TYPE_REG
  1482  		p.From.Reg = arm64.REGTMP
  1483  		p.To.Type = obj.TYPE_MEM
  1484  		p.To.Reg = arm64.REGSP
  1485  		ssagen.AddAux2(&p.To, v, v.AuxInt+4)
  1486  	case ssa.OpClobberReg:
  1487  		x := uint64(0xdeaddeaddeaddead)
  1488  		p := s.Prog(arm64.AMOVD)
  1489  		p.From.Type = obj.TYPE_CONST
  1490  		p.From.Offset = int64(x)
  1491  		p.To.Type = obj.TYPE_REG
  1492  		p.To.Reg = v.Reg()
  1493  	default:
  1494  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1495  	}
  1496  }
  1497  
  1498  var condBits = map[ssa.Op]arm64.SpecialOperand{
  1499  	ssa.OpARM64Equal:         arm64.SPOP_EQ,
  1500  	ssa.OpARM64NotEqual:      arm64.SPOP_NE,
  1501  	ssa.OpARM64LessThan:      arm64.SPOP_LT,
  1502  	ssa.OpARM64LessThanU:     arm64.SPOP_LO,
  1503  	ssa.OpARM64LessEqual:     arm64.SPOP_LE,
  1504  	ssa.OpARM64LessEqualU:    arm64.SPOP_LS,
  1505  	ssa.OpARM64GreaterThan:   arm64.SPOP_GT,
  1506  	ssa.OpARM64GreaterThanU:  arm64.SPOP_HI,
  1507  	ssa.OpARM64GreaterEqual:  arm64.SPOP_GE,
  1508  	ssa.OpARM64GreaterEqualU: arm64.SPOP_HS,
  1509  	ssa.OpARM64LessThanF:     arm64.SPOP_MI, // Less than
  1510  	ssa.OpARM64LessEqualF:    arm64.SPOP_LS, // Less than or equal to
  1511  	ssa.OpARM64GreaterThanF:  arm64.SPOP_GT, // Greater than
  1512  	ssa.OpARM64GreaterEqualF: arm64.SPOP_GE, // Greater than or equal to
  1513  
  1514  	// The following condition codes have unordered to handle comparisons related to NaN.
  1515  	ssa.OpARM64NotLessThanF:     arm64.SPOP_PL, // Greater than, equal to, or unordered
  1516  	ssa.OpARM64NotLessEqualF:    arm64.SPOP_HI, // Greater than or unordered
  1517  	ssa.OpARM64NotGreaterThanF:  arm64.SPOP_LE, // Less than, equal to or unordered
  1518  	ssa.OpARM64NotGreaterEqualF: arm64.SPOP_LT, // Less than or unordered
  1519  
  1520  	ssa.OpARM64LessThanNoov:     arm64.SPOP_MI, // Less than but without honoring overflow
  1521  	ssa.OpARM64GreaterEqualNoov: arm64.SPOP_PL, // Greater than or equal to but without honoring overflow
  1522  }
  1523  
  1524  var blockJump = map[ssa.BlockKind]struct {
  1525  	asm, invasm obj.As
  1526  }{
  1527  	ssa.BlockARM64EQ:     {arm64.ABEQ, arm64.ABNE},
  1528  	ssa.BlockARM64NE:     {arm64.ABNE, arm64.ABEQ},
  1529  	ssa.BlockARM64LT:     {arm64.ABLT, arm64.ABGE},
  1530  	ssa.BlockARM64GE:     {arm64.ABGE, arm64.ABLT},
  1531  	ssa.BlockARM64LE:     {arm64.ABLE, arm64.ABGT},
  1532  	ssa.BlockARM64GT:     {arm64.ABGT, arm64.ABLE},
  1533  	ssa.BlockARM64ULT:    {arm64.ABLO, arm64.ABHS},
  1534  	ssa.BlockARM64UGE:    {arm64.ABHS, arm64.ABLO},
  1535  	ssa.BlockARM64UGT:    {arm64.ABHI, arm64.ABLS},
  1536  	ssa.BlockARM64ULE:    {arm64.ABLS, arm64.ABHI},
  1537  	ssa.BlockARM64Z:      {arm64.ACBZ, arm64.ACBNZ},
  1538  	ssa.BlockARM64NZ:     {arm64.ACBNZ, arm64.ACBZ},
  1539  	ssa.BlockARM64ZW:     {arm64.ACBZW, arm64.ACBNZW},
  1540  	ssa.BlockARM64NZW:    {arm64.ACBNZW, arm64.ACBZW},
  1541  	ssa.BlockARM64TBZ:    {arm64.ATBZ, arm64.ATBNZ},
  1542  	ssa.BlockARM64TBNZ:   {arm64.ATBNZ, arm64.ATBZ},
  1543  	ssa.BlockARM64FLT:    {arm64.ABMI, arm64.ABPL},
  1544  	ssa.BlockARM64FGE:    {arm64.ABGE, arm64.ABLT},
  1545  	ssa.BlockARM64FLE:    {arm64.ABLS, arm64.ABHI},
  1546  	ssa.BlockARM64FGT:    {arm64.ABGT, arm64.ABLE},
  1547  	ssa.BlockARM64LTnoov: {arm64.ABMI, arm64.ABPL},
  1548  	ssa.BlockARM64GEnoov: {arm64.ABPL, arm64.ABMI},
  1549  }
  1550  
  1551  // To model a 'LEnoov' ('<=' without overflow checking) branching.
  1552  var leJumps = [2][2]ssagen.IndexJump{
  1553  	{{Jump: arm64.ABEQ, Index: 0}, {Jump: arm64.ABPL, Index: 1}}, // next == b.Succs[0]
  1554  	{{Jump: arm64.ABMI, Index: 0}, {Jump: arm64.ABEQ, Index: 0}}, // next == b.Succs[1]
  1555  }
  1556  
  1557  // To model a 'GTnoov' ('>' without overflow checking) branching.
  1558  var gtJumps = [2][2]ssagen.IndexJump{
  1559  	{{Jump: arm64.ABMI, Index: 1}, {Jump: arm64.ABEQ, Index: 1}}, // next == b.Succs[0]
  1560  	{{Jump: arm64.ABEQ, Index: 1}, {Jump: arm64.ABPL, Index: 0}}, // next == b.Succs[1]
  1561  }
  1562  
  1563  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  1564  	switch b.Kind {
  1565  	case ssa.BlockPlain, ssa.BlockDefer:
  1566  		if b.Succs[0].Block() != next {
  1567  			p := s.Prog(obj.AJMP)
  1568  			p.To.Type = obj.TYPE_BRANCH
  1569  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1570  		}
  1571  
  1572  	case ssa.BlockExit, ssa.BlockRetJmp:
  1573  
  1574  	case ssa.BlockRet:
  1575  		s.Prog(obj.ARET)
  1576  
  1577  	case ssa.BlockARM64EQ, ssa.BlockARM64NE,
  1578  		ssa.BlockARM64LT, ssa.BlockARM64GE,
  1579  		ssa.BlockARM64LE, ssa.BlockARM64GT,
  1580  		ssa.BlockARM64ULT, ssa.BlockARM64UGT,
  1581  		ssa.BlockARM64ULE, ssa.BlockARM64UGE,
  1582  		ssa.BlockARM64Z, ssa.BlockARM64NZ,
  1583  		ssa.BlockARM64ZW, ssa.BlockARM64NZW,
  1584  		ssa.BlockARM64FLT, ssa.BlockARM64FGE,
  1585  		ssa.BlockARM64FLE, ssa.BlockARM64FGT,
  1586  		ssa.BlockARM64LTnoov, ssa.BlockARM64GEnoov:
  1587  		jmp := blockJump[b.Kind]
  1588  		var p *obj.Prog
  1589  		switch next {
  1590  		case b.Succs[0].Block():
  1591  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1592  		case b.Succs[1].Block():
  1593  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1594  		default:
  1595  			if b.Likely != ssa.BranchUnlikely {
  1596  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1597  				s.Br(obj.AJMP, b.Succs[1].Block())
  1598  			} else {
  1599  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1600  				s.Br(obj.AJMP, b.Succs[0].Block())
  1601  			}
  1602  		}
  1603  		if !b.Controls[0].Type.IsFlags() {
  1604  			p.From.Type = obj.TYPE_REG
  1605  			p.From.Reg = b.Controls[0].Reg()
  1606  		}
  1607  	case ssa.BlockARM64TBZ, ssa.BlockARM64TBNZ:
  1608  		jmp := blockJump[b.Kind]
  1609  		var p *obj.Prog
  1610  		switch next {
  1611  		case b.Succs[0].Block():
  1612  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1613  		case b.Succs[1].Block():
  1614  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1615  		default:
  1616  			if b.Likely != ssa.BranchUnlikely {
  1617  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1618  				s.Br(obj.AJMP, b.Succs[1].Block())
  1619  			} else {
  1620  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1621  				s.Br(obj.AJMP, b.Succs[0].Block())
  1622  			}
  1623  		}
  1624  		p.From.Offset = b.AuxInt
  1625  		p.From.Type = obj.TYPE_CONST
  1626  		p.Reg = b.Controls[0].Reg()
  1627  
  1628  	case ssa.BlockARM64LEnoov:
  1629  		s.CombJump(b, next, &leJumps)
  1630  	case ssa.BlockARM64GTnoov:
  1631  		s.CombJump(b, next, &gtJumps)
  1632  
  1633  	case ssa.BlockARM64JUMPTABLE:
  1634  		// MOVD	(TABLE)(IDX<<3), Rtmp
  1635  		// JMP	(Rtmp)
  1636  		p := s.Prog(arm64.AMOVD)
  1637  		p.From = genIndexedOperand(ssa.OpARM64MOVDloadidx8, b.Controls[1].Reg(), b.Controls[0].Reg())
  1638  		p.To.Type = obj.TYPE_REG
  1639  		p.To.Reg = arm64.REGTMP
  1640  		p = s.Prog(obj.AJMP)
  1641  		p.To.Type = obj.TYPE_MEM
  1642  		p.To.Reg = arm64.REGTMP
  1643  		// Save jump tables for later resolution of the target blocks.
  1644  		s.JumpTables = append(s.JumpTables, b)
  1645  
  1646  	default:
  1647  		b.Fatalf("branch not implemented: %s", b.LongString())
  1648  	}
  1649  }
  1650  
  1651  func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1652  	p := s.Prog(loadByType(t))
  1653  	p.From.Type = obj.TYPE_MEM
  1654  	p.From.Name = obj.NAME_AUTO
  1655  	p.From.Sym = n.Linksym()
  1656  	p.From.Offset = n.FrameOffset() + off
  1657  	p.To.Type = obj.TYPE_REG
  1658  	p.To.Reg = reg
  1659  	return p
  1660  }
  1661  
  1662  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1663  	p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  1664  	p.To.Name = obj.NAME_PARAM
  1665  	p.To.Sym = n.Linksym()
  1666  	p.Pos = p.Pos.WithNotStmt()
  1667  	return p
  1668  }
  1669  
  1670  // zero16 zeroes 16 bytes at reg+off.
  1671  // If postInc is true, increment reg by 16.
  1672  func zero16(s *ssagen.State, reg int16, off int64, postInc bool) {
  1673  	//   STP     (ZR, ZR), off(reg)
  1674  	p := s.Prog(arm64.ASTP)
  1675  	p.From.Type = obj.TYPE_REGREG
  1676  	p.From.Reg = arm64.REGZERO
  1677  	p.From.Offset = int64(arm64.REGZERO)
  1678  	p.To.Type = obj.TYPE_MEM
  1679  	p.To.Reg = reg
  1680  	p.To.Offset = off
  1681  	if postInc {
  1682  		if off != 0 {
  1683  			panic("can't postinc with non-zero offset")
  1684  		}
  1685  		//   STP.P  (ZR, ZR), 16(reg)
  1686  		p.Scond = arm64.C_XPOST
  1687  		p.To.Offset = 16
  1688  	}
  1689  }
  1690  
  1691  // zero8 zeroes 8 bytes at reg+off.
  1692  func zero8(s *ssagen.State, reg int16, off int64) {
  1693  	//   MOVD     ZR, off(reg)
  1694  	p := s.Prog(arm64.AMOVD)
  1695  	p.From.Type = obj.TYPE_REG
  1696  	p.From.Reg = arm64.REGZERO
  1697  	p.To.Type = obj.TYPE_MEM
  1698  	p.To.Reg = reg
  1699  	p.To.Offset = off
  1700  }
  1701  
  1702  // move16 copies 16 bytes at src+off to dst+off.
  1703  // Uses registers tmp1 and tmp2.
  1704  // If postInc is true, increment src and dst by 16.
  1705  func move16(s *ssagen.State, src, dst, tmp1, tmp2 int16, off int64, postInc bool) {
  1706  	// LDP     off(src), (tmp1, tmp2)
  1707  	ld := s.Prog(arm64.ALDP)
  1708  	ld.From.Type = obj.TYPE_MEM
  1709  	ld.From.Reg = src
  1710  	ld.From.Offset = off
  1711  	ld.To.Type = obj.TYPE_REGREG
  1712  	ld.To.Reg = tmp1
  1713  	ld.To.Offset = int64(tmp2)
  1714  	// STP     (tmp1, tmp2), off(dst)
  1715  	st := s.Prog(arm64.ASTP)
  1716  	st.From.Type = obj.TYPE_REGREG
  1717  	st.From.Reg = tmp1
  1718  	st.From.Offset = int64(tmp2)
  1719  	st.To.Type = obj.TYPE_MEM
  1720  	st.To.Reg = dst
  1721  	st.To.Offset = off
  1722  	if postInc {
  1723  		if off != 0 {
  1724  			panic("can't postinc with non-zero offset")
  1725  		}
  1726  		ld.Scond = arm64.C_XPOST
  1727  		st.Scond = arm64.C_XPOST
  1728  		ld.From.Offset = 16
  1729  		st.To.Offset = 16
  1730  	}
  1731  }
  1732  
  1733  // move8 copies 8 bytes at src+off to dst+off.
  1734  // Uses register tmp.
  1735  func move8(s *ssagen.State, src, dst, tmp int16, off int64) {
  1736  	// MOVD    off(src), tmp
  1737  	ld := s.Prog(arm64.AMOVD)
  1738  	ld.From.Type = obj.TYPE_MEM
  1739  	ld.From.Reg = src
  1740  	ld.From.Offset = off
  1741  	ld.To.Type = obj.TYPE_REG
  1742  	ld.To.Reg = tmp
  1743  	// MOVD    tmp, off(dst)
  1744  	st := s.Prog(arm64.AMOVD)
  1745  	st.From.Type = obj.TYPE_REG
  1746  	st.From.Reg = tmp
  1747  	st.To.Type = obj.TYPE_MEM
  1748  	st.To.Reg = dst
  1749  	st.To.Offset = off
  1750  }
  1751  

View as plain text