Source file src/cmd/compile/internal/arm64/ssa.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package arm64
     6  
     7  import (
     8  	"math"
     9  
    10  	"cmd/compile/internal/base"
    11  	"cmd/compile/internal/ir"
    12  	"cmd/compile/internal/logopt"
    13  	"cmd/compile/internal/objw"
    14  	"cmd/compile/internal/ssa"
    15  	"cmd/compile/internal/ssagen"
    16  	"cmd/compile/internal/types"
    17  	"cmd/internal/obj"
    18  	"cmd/internal/obj/arm64"
    19  	"internal/abi"
    20  )
    21  
    22  // loadByType returns the load instruction of the given type.
    23  func loadByType(t *types.Type) obj.As {
    24  	if t.IsFloat() {
    25  		switch t.Size() {
    26  		case 4:
    27  			return arm64.AFMOVS
    28  		case 8:
    29  			return arm64.AFMOVD
    30  		}
    31  	} else {
    32  		switch t.Size() {
    33  		case 1:
    34  			if t.IsSigned() {
    35  				return arm64.AMOVB
    36  			} else {
    37  				return arm64.AMOVBU
    38  			}
    39  		case 2:
    40  			if t.IsSigned() {
    41  				return arm64.AMOVH
    42  			} else {
    43  				return arm64.AMOVHU
    44  			}
    45  		case 4:
    46  			if t.IsSigned() {
    47  				return arm64.AMOVW
    48  			} else {
    49  				return arm64.AMOVWU
    50  			}
    51  		case 8:
    52  			return arm64.AMOVD
    53  		}
    54  	}
    55  	panic("bad load type")
    56  }
    57  
    58  // storeByType returns the store instruction of the given type.
    59  func storeByType(t *types.Type) obj.As {
    60  	if t.IsFloat() {
    61  		switch t.Size() {
    62  		case 4:
    63  			return arm64.AFMOVS
    64  		case 8:
    65  			return arm64.AFMOVD
    66  		}
    67  	} else {
    68  		switch t.Size() {
    69  		case 1:
    70  			return arm64.AMOVB
    71  		case 2:
    72  			return arm64.AMOVH
    73  		case 4:
    74  			return arm64.AMOVW
    75  		case 8:
    76  			return arm64.AMOVD
    77  		}
    78  	}
    79  	panic("bad store type")
    80  }
    81  
    82  // loadByType2 returns an opcode that can load consecutive memory locations into 2 registers with type t.
    83  // returns obj.AXXX if no such opcode exists.
    84  func loadByType2(t *types.Type) obj.As {
    85  	if t.IsFloat() {
    86  		switch t.Size() {
    87  		case 4:
    88  			return arm64.AFLDPS
    89  		case 8:
    90  			return arm64.AFLDPD
    91  		}
    92  	} else {
    93  		switch t.Size() {
    94  		case 4:
    95  			return arm64.ALDPW
    96  		case 8:
    97  			return arm64.ALDP
    98  		}
    99  	}
   100  	return obj.AXXX
   101  }
   102  
   103  // storeByType2 returns an opcode that can store registers with type t into 2 consecutive memory locations.
   104  // returns obj.AXXX if no such opcode exists.
   105  func storeByType2(t *types.Type) obj.As {
   106  	if t.IsFloat() {
   107  		switch t.Size() {
   108  		case 4:
   109  			return arm64.AFSTPS
   110  		case 8:
   111  			return arm64.AFSTPD
   112  		}
   113  	} else {
   114  		switch t.Size() {
   115  		case 4:
   116  			return arm64.ASTPW
   117  		case 8:
   118  			return arm64.ASTP
   119  		}
   120  	}
   121  	return obj.AXXX
   122  }
   123  
   124  // makeshift encodes a register shifted by a constant, used as an Offset in Prog.
   125  func makeshift(v *ssa.Value, reg int16, typ int64, s int64) int64 {
   126  	if s < 0 || s >= 64 {
   127  		v.Fatalf("shift out of range: %d", s)
   128  	}
   129  	return int64(reg&31)<<16 | typ | (s&63)<<10
   130  }
   131  
   132  // genshift generates a Prog for r = r0 op (r1 shifted by n).
   133  func genshift(s *ssagen.State, v *ssa.Value, as obj.As, r0, r1, r int16, typ int64, n int64) *obj.Prog {
   134  	p := s.Prog(as)
   135  	p.From.Type = obj.TYPE_SHIFT
   136  	p.From.Offset = makeshift(v, r1, typ, n)
   137  	p.Reg = r0
   138  	if r != 0 {
   139  		p.To.Type = obj.TYPE_REG
   140  		p.To.Reg = r
   141  	}
   142  	return p
   143  }
   144  
   145  // generate the memory operand for the indexed load/store instructions.
   146  // base and idx are registers.
   147  func genIndexedOperand(op ssa.Op, base, idx int16) obj.Addr {
   148  	// Reg: base register, Index: (shifted) index register
   149  	mop := obj.Addr{Type: obj.TYPE_MEM, Reg: base}
   150  	switch op {
   151  	case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8,
   152  		ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8:
   153  		mop.Index = arm64.REG_LSL | 3<<5 | idx&31
   154  	case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4,
   155  		ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4:
   156  		mop.Index = arm64.REG_LSL | 2<<5 | idx&31
   157  	case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2:
   158  		mop.Index = arm64.REG_LSL | 1<<5 | idx&31
   159  	default: // not shifted
   160  		mop.Index = idx
   161  	}
   162  	return mop
   163  }
   164  
   165  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   166  	switch v.Op {
   167  	case ssa.OpCopy, ssa.OpARM64MOVDreg:
   168  		if v.Type.IsMemory() {
   169  			return
   170  		}
   171  		x := v.Args[0].Reg()
   172  		y := v.Reg()
   173  		if x == y {
   174  			return
   175  		}
   176  		as := arm64.AMOVD
   177  		if v.Type.IsFloat() {
   178  			switch v.Type.Size() {
   179  			case 4:
   180  				as = arm64.AFMOVS
   181  			case 8:
   182  				as = arm64.AFMOVD
   183  			default:
   184  				panic("bad float size")
   185  			}
   186  		}
   187  		p := s.Prog(as)
   188  		p.From.Type = obj.TYPE_REG
   189  		p.From.Reg = x
   190  		p.To.Type = obj.TYPE_REG
   191  		p.To.Reg = y
   192  	case ssa.OpARM64MOVDnop, ssa.OpARM64ZERO:
   193  		// nothing to do
   194  	case ssa.OpLoadReg:
   195  		if v.Type.IsFlags() {
   196  			v.Fatalf("load flags not implemented: %v", v.LongString())
   197  			return
   198  		}
   199  		p := s.Prog(loadByType(v.Type))
   200  		ssagen.AddrAuto(&p.From, v.Args[0])
   201  		p.To.Type = obj.TYPE_REG
   202  		p.To.Reg = v.Reg()
   203  	case ssa.OpStoreReg:
   204  		if v.Type.IsFlags() {
   205  			v.Fatalf("store flags not implemented: %v", v.LongString())
   206  			return
   207  		}
   208  		p := s.Prog(storeByType(v.Type))
   209  		p.From.Type = obj.TYPE_REG
   210  		p.From.Reg = v.Args[0].Reg()
   211  		ssagen.AddrAuto(&p.To, v)
   212  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
   213  		ssagen.CheckArgReg(v)
   214  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
   215  		// The loop only runs once.
   216  		args := v.Block.Func.RegArgs
   217  		if len(args) == 0 {
   218  			break
   219  		}
   220  		v.Block.Func.RegArgs = nil // prevent from running again
   221  
   222  		for i := 0; i < len(args); i++ {
   223  			a := args[i]
   224  			// Offset by size of the saved LR slot.
   225  			addr := ssagen.SpillSlotAddr(a, arm64.REGSP, base.Ctxt.Arch.FixedFrameSize)
   226  			// Look for double-register operations if we can.
   227  			if i < len(args)-1 {
   228  				b := args[i+1]
   229  				if a.Type.Size() == b.Type.Size() &&
   230  					a.Type.IsFloat() == b.Type.IsFloat() &&
   231  					b.Offset == a.Offset+a.Type.Size() {
   232  					ld := loadByType2(a.Type)
   233  					st := storeByType2(a.Type)
   234  					if ld != obj.AXXX && st != obj.AXXX {
   235  						s.FuncInfo().AddSpill(obj.RegSpill{Reg: a.Reg, Reg2: b.Reg, Addr: addr, Unspill: ld, Spill: st})
   236  						i++ // b is done also, skip it.
   237  						continue
   238  					}
   239  				}
   240  			}
   241  			// Pass the spill/unspill information along to the assembler.
   242  			s.FuncInfo().AddSpill(obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
   243  		}
   244  
   245  	case ssa.OpARM64ADD,
   246  		ssa.OpARM64SUB,
   247  		ssa.OpARM64AND,
   248  		ssa.OpARM64OR,
   249  		ssa.OpARM64XOR,
   250  		ssa.OpARM64BIC,
   251  		ssa.OpARM64EON,
   252  		ssa.OpARM64ORN,
   253  		ssa.OpARM64MUL,
   254  		ssa.OpARM64MULW,
   255  		ssa.OpARM64MNEG,
   256  		ssa.OpARM64MNEGW,
   257  		ssa.OpARM64MULH,
   258  		ssa.OpARM64UMULH,
   259  		ssa.OpARM64MULL,
   260  		ssa.OpARM64UMULL,
   261  		ssa.OpARM64DIV,
   262  		ssa.OpARM64UDIV,
   263  		ssa.OpARM64DIVW,
   264  		ssa.OpARM64UDIVW,
   265  		ssa.OpARM64MOD,
   266  		ssa.OpARM64UMOD,
   267  		ssa.OpARM64MODW,
   268  		ssa.OpARM64UMODW,
   269  		ssa.OpARM64SLL,
   270  		ssa.OpARM64SRL,
   271  		ssa.OpARM64SRA,
   272  		ssa.OpARM64FADDS,
   273  		ssa.OpARM64FADDD,
   274  		ssa.OpARM64FSUBS,
   275  		ssa.OpARM64FSUBD,
   276  		ssa.OpARM64FMULS,
   277  		ssa.OpARM64FMULD,
   278  		ssa.OpARM64FNMULS,
   279  		ssa.OpARM64FNMULD,
   280  		ssa.OpARM64FDIVS,
   281  		ssa.OpARM64FDIVD,
   282  		ssa.OpARM64FMINS,
   283  		ssa.OpARM64FMIND,
   284  		ssa.OpARM64FMAXS,
   285  		ssa.OpARM64FMAXD,
   286  		ssa.OpARM64ROR,
   287  		ssa.OpARM64RORW:
   288  		r := v.Reg()
   289  		r1 := v.Args[0].Reg()
   290  		r2 := v.Args[1].Reg()
   291  		p := s.Prog(v.Op.Asm())
   292  		p.From.Type = obj.TYPE_REG
   293  		p.From.Reg = r2
   294  		p.Reg = r1
   295  		p.To.Type = obj.TYPE_REG
   296  		p.To.Reg = r
   297  	case ssa.OpARM64FMADDS,
   298  		ssa.OpARM64FMADDD,
   299  		ssa.OpARM64FNMADDS,
   300  		ssa.OpARM64FNMADDD,
   301  		ssa.OpARM64FMSUBS,
   302  		ssa.OpARM64FMSUBD,
   303  		ssa.OpARM64FNMSUBS,
   304  		ssa.OpARM64FNMSUBD,
   305  		ssa.OpARM64MADD,
   306  		ssa.OpARM64MADDW,
   307  		ssa.OpARM64MSUB,
   308  		ssa.OpARM64MSUBW:
   309  		rt := v.Reg()
   310  		ra := v.Args[0].Reg()
   311  		rm := v.Args[1].Reg()
   312  		rn := v.Args[2].Reg()
   313  		p := s.Prog(v.Op.Asm())
   314  		p.Reg = ra
   315  		p.From.Type = obj.TYPE_REG
   316  		p.From.Reg = rm
   317  		p.AddRestSourceReg(rn)
   318  		p.To.Type = obj.TYPE_REG
   319  		p.To.Reg = rt
   320  	case ssa.OpARM64ADDconst,
   321  		ssa.OpARM64SUBconst,
   322  		ssa.OpARM64ANDconst,
   323  		ssa.OpARM64ORconst,
   324  		ssa.OpARM64XORconst,
   325  		ssa.OpARM64SLLconst,
   326  		ssa.OpARM64SRLconst,
   327  		ssa.OpARM64SRAconst,
   328  		ssa.OpARM64RORconst,
   329  		ssa.OpARM64RORWconst:
   330  		p := s.Prog(v.Op.Asm())
   331  		p.From.Type = obj.TYPE_CONST
   332  		p.From.Offset = v.AuxInt
   333  		p.Reg = v.Args[0].Reg()
   334  		p.To.Type = obj.TYPE_REG
   335  		p.To.Reg = v.Reg()
   336  	case ssa.OpARM64ADDSconstflags:
   337  		p := s.Prog(v.Op.Asm())
   338  		p.From.Type = obj.TYPE_CONST
   339  		p.From.Offset = v.AuxInt
   340  		p.Reg = v.Args[0].Reg()
   341  		p.To.Type = obj.TYPE_REG
   342  		p.To.Reg = v.Reg0()
   343  	case ssa.OpARM64ADCzerocarry:
   344  		p := s.Prog(v.Op.Asm())
   345  		p.From.Type = obj.TYPE_REG
   346  		p.From.Reg = arm64.REGZERO
   347  		p.Reg = arm64.REGZERO
   348  		p.To.Type = obj.TYPE_REG
   349  		p.To.Reg = v.Reg()
   350  	case ssa.OpARM64ADCSflags,
   351  		ssa.OpARM64ADDSflags,
   352  		ssa.OpARM64SBCSflags,
   353  		ssa.OpARM64SUBSflags:
   354  		r := v.Reg0()
   355  		r1 := v.Args[0].Reg()
   356  		r2 := v.Args[1].Reg()
   357  		p := s.Prog(v.Op.Asm())
   358  		p.From.Type = obj.TYPE_REG
   359  		p.From.Reg = r2
   360  		p.Reg = r1
   361  		p.To.Type = obj.TYPE_REG
   362  		p.To.Reg = r
   363  	case ssa.OpARM64NEGSflags:
   364  		p := s.Prog(v.Op.Asm())
   365  		p.From.Type = obj.TYPE_REG
   366  		p.From.Reg = v.Args[0].Reg()
   367  		p.To.Type = obj.TYPE_REG
   368  		p.To.Reg = v.Reg0()
   369  	case ssa.OpARM64NGCzerocarry:
   370  		p := s.Prog(v.Op.Asm())
   371  		p.From.Type = obj.TYPE_REG
   372  		p.From.Reg = arm64.REGZERO
   373  		p.To.Type = obj.TYPE_REG
   374  		p.To.Reg = v.Reg()
   375  	case ssa.OpARM64EXTRconst,
   376  		ssa.OpARM64EXTRWconst:
   377  		p := s.Prog(v.Op.Asm())
   378  		p.From.Type = obj.TYPE_CONST
   379  		p.From.Offset = v.AuxInt
   380  		p.AddRestSourceReg(v.Args[0].Reg())
   381  		p.Reg = v.Args[1].Reg()
   382  		p.To.Type = obj.TYPE_REG
   383  		p.To.Reg = v.Reg()
   384  	case ssa.OpARM64MVNshiftLL, ssa.OpARM64NEGshiftLL:
   385  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_LL, v.AuxInt)
   386  	case ssa.OpARM64MVNshiftRL, ssa.OpARM64NEGshiftRL:
   387  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_LR, v.AuxInt)
   388  	case ssa.OpARM64MVNshiftRA, ssa.OpARM64NEGshiftRA:
   389  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_AR, v.AuxInt)
   390  	case ssa.OpARM64MVNshiftRO:
   391  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_ROR, v.AuxInt)
   392  	case ssa.OpARM64ADDshiftLL,
   393  		ssa.OpARM64SUBshiftLL,
   394  		ssa.OpARM64ANDshiftLL,
   395  		ssa.OpARM64ORshiftLL,
   396  		ssa.OpARM64XORshiftLL,
   397  		ssa.OpARM64EONshiftLL,
   398  		ssa.OpARM64ORNshiftLL,
   399  		ssa.OpARM64BICshiftLL:
   400  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_LL, v.AuxInt)
   401  	case ssa.OpARM64ADDshiftRL,
   402  		ssa.OpARM64SUBshiftRL,
   403  		ssa.OpARM64ANDshiftRL,
   404  		ssa.OpARM64ORshiftRL,
   405  		ssa.OpARM64XORshiftRL,
   406  		ssa.OpARM64EONshiftRL,
   407  		ssa.OpARM64ORNshiftRL,
   408  		ssa.OpARM64BICshiftRL:
   409  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_LR, v.AuxInt)
   410  	case ssa.OpARM64ADDshiftRA,
   411  		ssa.OpARM64SUBshiftRA,
   412  		ssa.OpARM64ANDshiftRA,
   413  		ssa.OpARM64ORshiftRA,
   414  		ssa.OpARM64XORshiftRA,
   415  		ssa.OpARM64EONshiftRA,
   416  		ssa.OpARM64ORNshiftRA,
   417  		ssa.OpARM64BICshiftRA:
   418  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_AR, v.AuxInt)
   419  	case ssa.OpARM64ANDshiftRO,
   420  		ssa.OpARM64ORshiftRO,
   421  		ssa.OpARM64XORshiftRO,
   422  		ssa.OpARM64EONshiftRO,
   423  		ssa.OpARM64ORNshiftRO,
   424  		ssa.OpARM64BICshiftRO:
   425  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_ROR, v.AuxInt)
   426  	case ssa.OpARM64MOVDconst:
   427  		p := s.Prog(v.Op.Asm())
   428  		p.From.Type = obj.TYPE_CONST
   429  		p.From.Offset = v.AuxInt
   430  		p.To.Type = obj.TYPE_REG
   431  		p.To.Reg = v.Reg()
   432  	case ssa.OpARM64FMOVSconst,
   433  		ssa.OpARM64FMOVDconst:
   434  		p := s.Prog(v.Op.Asm())
   435  		p.From.Type = obj.TYPE_FCONST
   436  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   437  		p.To.Type = obj.TYPE_REG
   438  		p.To.Reg = v.Reg()
   439  	case ssa.OpARM64FCMPS0,
   440  		ssa.OpARM64FCMPD0:
   441  		p := s.Prog(v.Op.Asm())
   442  		p.From.Type = obj.TYPE_FCONST
   443  		p.From.Val = math.Float64frombits(0)
   444  		p.Reg = v.Args[0].Reg()
   445  	case ssa.OpARM64CMP,
   446  		ssa.OpARM64CMPW,
   447  		ssa.OpARM64CMN,
   448  		ssa.OpARM64CMNW,
   449  		ssa.OpARM64TST,
   450  		ssa.OpARM64TSTW,
   451  		ssa.OpARM64FCMPS,
   452  		ssa.OpARM64FCMPD:
   453  		p := s.Prog(v.Op.Asm())
   454  		p.From.Type = obj.TYPE_REG
   455  		p.From.Reg = v.Args[1].Reg()
   456  		p.Reg = v.Args[0].Reg()
   457  	case ssa.OpARM64CMPconst,
   458  		ssa.OpARM64CMPWconst,
   459  		ssa.OpARM64CMNconst,
   460  		ssa.OpARM64CMNWconst,
   461  		ssa.OpARM64TSTconst,
   462  		ssa.OpARM64TSTWconst:
   463  		p := s.Prog(v.Op.Asm())
   464  		p.From.Type = obj.TYPE_CONST
   465  		p.From.Offset = v.AuxInt
   466  		p.Reg = v.Args[0].Reg()
   467  	case ssa.OpARM64CMPshiftLL, ssa.OpARM64CMNshiftLL, ssa.OpARM64TSTshiftLL:
   468  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_LL, v.AuxInt)
   469  	case ssa.OpARM64CMPshiftRL, ssa.OpARM64CMNshiftRL, ssa.OpARM64TSTshiftRL:
   470  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_LR, v.AuxInt)
   471  	case ssa.OpARM64CMPshiftRA, ssa.OpARM64CMNshiftRA, ssa.OpARM64TSTshiftRA:
   472  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_AR, v.AuxInt)
   473  	case ssa.OpARM64TSTshiftRO:
   474  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_ROR, v.AuxInt)
   475  	case ssa.OpARM64MOVDaddr:
   476  		p := s.Prog(arm64.AMOVD)
   477  		p.From.Type = obj.TYPE_ADDR
   478  		p.From.Reg = v.Args[0].Reg()
   479  		p.To.Type = obj.TYPE_REG
   480  		p.To.Reg = v.Reg()
   481  
   482  		var wantreg string
   483  		// MOVD $sym+off(base), R
   484  		// the assembler expands it as the following:
   485  		// - base is SP: add constant offset to SP (R13)
   486  		//               when constant is large, tmp register (R11) may be used
   487  		// - base is SB: load external address from constant pool (use relocation)
   488  		switch v.Aux.(type) {
   489  		default:
   490  			v.Fatalf("aux is of unknown type %T", v.Aux)
   491  		case *obj.LSym:
   492  			wantreg = "SB"
   493  			ssagen.AddAux(&p.From, v)
   494  		case *ir.Name:
   495  			wantreg = "SP"
   496  			ssagen.AddAux(&p.From, v)
   497  		case nil:
   498  			// No sym, just MOVD $off(SP), R
   499  			wantreg = "SP"
   500  			p.From.Offset = v.AuxInt
   501  		}
   502  		if reg := v.Args[0].RegName(); reg != wantreg {
   503  			v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg)
   504  		}
   505  	case ssa.OpARM64MOVBload,
   506  		ssa.OpARM64MOVBUload,
   507  		ssa.OpARM64MOVHload,
   508  		ssa.OpARM64MOVHUload,
   509  		ssa.OpARM64MOVWload,
   510  		ssa.OpARM64MOVWUload,
   511  		ssa.OpARM64MOVDload,
   512  		ssa.OpARM64FMOVSload,
   513  		ssa.OpARM64FMOVDload:
   514  		p := s.Prog(v.Op.Asm())
   515  		p.From.Type = obj.TYPE_MEM
   516  		p.From.Reg = v.Args[0].Reg()
   517  		ssagen.AddAux(&p.From, v)
   518  		p.To.Type = obj.TYPE_REG
   519  		p.To.Reg = v.Reg()
   520  	case ssa.OpARM64LDP, ssa.OpARM64LDPW, ssa.OpARM64LDPSW, ssa.OpARM64FLDPD, ssa.OpARM64FLDPS:
   521  		p := s.Prog(v.Op.Asm())
   522  		p.From.Type = obj.TYPE_MEM
   523  		p.From.Reg = v.Args[0].Reg()
   524  		ssagen.AddAux(&p.From, v)
   525  		p.To.Type = obj.TYPE_REGREG
   526  		p.To.Reg = v.Reg0()
   527  		p.To.Offset = int64(v.Reg1())
   528  	case ssa.OpARM64MOVBloadidx,
   529  		ssa.OpARM64MOVBUloadidx,
   530  		ssa.OpARM64MOVHloadidx,
   531  		ssa.OpARM64MOVHUloadidx,
   532  		ssa.OpARM64MOVWloadidx,
   533  		ssa.OpARM64MOVWUloadidx,
   534  		ssa.OpARM64MOVDloadidx,
   535  		ssa.OpARM64FMOVSloadidx,
   536  		ssa.OpARM64FMOVDloadidx,
   537  		ssa.OpARM64MOVHloadidx2,
   538  		ssa.OpARM64MOVHUloadidx2,
   539  		ssa.OpARM64MOVWloadidx4,
   540  		ssa.OpARM64MOVWUloadidx4,
   541  		ssa.OpARM64MOVDloadidx8,
   542  		ssa.OpARM64FMOVDloadidx8,
   543  		ssa.OpARM64FMOVSloadidx4:
   544  		p := s.Prog(v.Op.Asm())
   545  		p.From = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
   546  		p.To.Type = obj.TYPE_REG
   547  		p.To.Reg = v.Reg()
   548  	case ssa.OpARM64LDAR,
   549  		ssa.OpARM64LDARB,
   550  		ssa.OpARM64LDARW:
   551  		p := s.Prog(v.Op.Asm())
   552  		p.From.Type = obj.TYPE_MEM
   553  		p.From.Reg = v.Args[0].Reg()
   554  		ssagen.AddAux(&p.From, v)
   555  		p.To.Type = obj.TYPE_REG
   556  		p.To.Reg = v.Reg0()
   557  	case ssa.OpARM64MOVBstore,
   558  		ssa.OpARM64MOVHstore,
   559  		ssa.OpARM64MOVWstore,
   560  		ssa.OpARM64MOVDstore,
   561  		ssa.OpARM64FMOVSstore,
   562  		ssa.OpARM64FMOVDstore,
   563  		ssa.OpARM64STLRB,
   564  		ssa.OpARM64STLR,
   565  		ssa.OpARM64STLRW:
   566  		p := s.Prog(v.Op.Asm())
   567  		p.From.Type = obj.TYPE_REG
   568  		p.From.Reg = v.Args[1].Reg()
   569  		p.To.Type = obj.TYPE_MEM
   570  		p.To.Reg = v.Args[0].Reg()
   571  		ssagen.AddAux(&p.To, v)
   572  	case ssa.OpARM64MOVBstoreidx,
   573  		ssa.OpARM64MOVHstoreidx,
   574  		ssa.OpARM64MOVWstoreidx,
   575  		ssa.OpARM64MOVDstoreidx,
   576  		ssa.OpARM64FMOVSstoreidx,
   577  		ssa.OpARM64FMOVDstoreidx,
   578  		ssa.OpARM64MOVHstoreidx2,
   579  		ssa.OpARM64MOVWstoreidx4,
   580  		ssa.OpARM64FMOVSstoreidx4,
   581  		ssa.OpARM64MOVDstoreidx8,
   582  		ssa.OpARM64FMOVDstoreidx8:
   583  		p := s.Prog(v.Op.Asm())
   584  		p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
   585  		p.From.Type = obj.TYPE_REG
   586  		p.From.Reg = v.Args[2].Reg()
   587  	case ssa.OpARM64STP, ssa.OpARM64STPW, ssa.OpARM64FSTPD, ssa.OpARM64FSTPS:
   588  		p := s.Prog(v.Op.Asm())
   589  		p.From.Type = obj.TYPE_REGREG
   590  		p.From.Reg = v.Args[1].Reg()
   591  		p.From.Offset = int64(v.Args[2].Reg())
   592  		p.To.Type = obj.TYPE_MEM
   593  		p.To.Reg = v.Args[0].Reg()
   594  		ssagen.AddAux(&p.To, v)
   595  	case ssa.OpARM64BFI,
   596  		ssa.OpARM64BFXIL:
   597  		p := s.Prog(v.Op.Asm())
   598  		p.From.Type = obj.TYPE_CONST
   599  		p.From.Offset = v.AuxInt >> 8
   600  		p.AddRestSourceConst(v.AuxInt & 0xff)
   601  		p.Reg = v.Args[1].Reg()
   602  		p.To.Type = obj.TYPE_REG
   603  		p.To.Reg = v.Reg()
   604  	case ssa.OpARM64SBFIZ,
   605  		ssa.OpARM64SBFX,
   606  		ssa.OpARM64UBFIZ,
   607  		ssa.OpARM64UBFX:
   608  		p := s.Prog(v.Op.Asm())
   609  		p.From.Type = obj.TYPE_CONST
   610  		p.From.Offset = v.AuxInt >> 8
   611  		p.AddRestSourceConst(v.AuxInt & 0xff)
   612  		p.Reg = v.Args[0].Reg()
   613  		p.To.Type = obj.TYPE_REG
   614  		p.To.Reg = v.Reg()
   615  	case ssa.OpARM64LoweredAtomicExchange64,
   616  		ssa.OpARM64LoweredAtomicExchange32,
   617  		ssa.OpARM64LoweredAtomicExchange8:
   618  		// LDAXR	(Rarg0), Rout
   619  		// STLXR	Rarg1, (Rarg0), Rtmp
   620  		// CBNZ		Rtmp, -2(PC)
   621  		var ld, st obj.As
   622  		switch v.Op {
   623  		case ssa.OpARM64LoweredAtomicExchange8:
   624  			ld = arm64.ALDAXRB
   625  			st = arm64.ASTLXRB
   626  		case ssa.OpARM64LoweredAtomicExchange32:
   627  			ld = arm64.ALDAXRW
   628  			st = arm64.ASTLXRW
   629  		case ssa.OpARM64LoweredAtomicExchange64:
   630  			ld = arm64.ALDAXR
   631  			st = arm64.ASTLXR
   632  		}
   633  		r0 := v.Args[0].Reg()
   634  		r1 := v.Args[1].Reg()
   635  		out := v.Reg0()
   636  		p := s.Prog(ld)
   637  		p.From.Type = obj.TYPE_MEM
   638  		p.From.Reg = r0
   639  		p.To.Type = obj.TYPE_REG
   640  		p.To.Reg = out
   641  		p1 := s.Prog(st)
   642  		p1.From.Type = obj.TYPE_REG
   643  		p1.From.Reg = r1
   644  		p1.To.Type = obj.TYPE_MEM
   645  		p1.To.Reg = r0
   646  		p1.RegTo2 = arm64.REGTMP
   647  		p2 := s.Prog(arm64.ACBNZ)
   648  		p2.From.Type = obj.TYPE_REG
   649  		p2.From.Reg = arm64.REGTMP
   650  		p2.To.Type = obj.TYPE_BRANCH
   651  		p2.To.SetTarget(p)
   652  	case ssa.OpARM64LoweredAtomicExchange64Variant,
   653  		ssa.OpARM64LoweredAtomicExchange32Variant,
   654  		ssa.OpARM64LoweredAtomicExchange8Variant:
   655  		var swap obj.As
   656  		switch v.Op {
   657  		case ssa.OpARM64LoweredAtomicExchange8Variant:
   658  			swap = arm64.ASWPALB
   659  		case ssa.OpARM64LoweredAtomicExchange32Variant:
   660  			swap = arm64.ASWPALW
   661  		case ssa.OpARM64LoweredAtomicExchange64Variant:
   662  			swap = arm64.ASWPALD
   663  		}
   664  		r0 := v.Args[0].Reg()
   665  		r1 := v.Args[1].Reg()
   666  		out := v.Reg0()
   667  
   668  		// SWPALD	Rarg1, (Rarg0), Rout
   669  		p := s.Prog(swap)
   670  		p.From.Type = obj.TYPE_REG
   671  		p.From.Reg = r1
   672  		p.To.Type = obj.TYPE_MEM
   673  		p.To.Reg = r0
   674  		p.RegTo2 = out
   675  
   676  	case ssa.OpARM64LoweredAtomicAdd64,
   677  		ssa.OpARM64LoweredAtomicAdd32:
   678  		// LDAXR	(Rarg0), Rout
   679  		// ADD		Rarg1, Rout
   680  		// STLXR	Rout, (Rarg0), Rtmp
   681  		// CBNZ		Rtmp, -3(PC)
   682  		ld := arm64.ALDAXR
   683  		st := arm64.ASTLXR
   684  		if v.Op == ssa.OpARM64LoweredAtomicAdd32 {
   685  			ld = arm64.ALDAXRW
   686  			st = arm64.ASTLXRW
   687  		}
   688  		r0 := v.Args[0].Reg()
   689  		r1 := v.Args[1].Reg()
   690  		out := v.Reg0()
   691  		p := s.Prog(ld)
   692  		p.From.Type = obj.TYPE_MEM
   693  		p.From.Reg = r0
   694  		p.To.Type = obj.TYPE_REG
   695  		p.To.Reg = out
   696  		p1 := s.Prog(arm64.AADD)
   697  		p1.From.Type = obj.TYPE_REG
   698  		p1.From.Reg = r1
   699  		p1.To.Type = obj.TYPE_REG
   700  		p1.To.Reg = out
   701  		p2 := s.Prog(st)
   702  		p2.From.Type = obj.TYPE_REG
   703  		p2.From.Reg = out
   704  		p2.To.Type = obj.TYPE_MEM
   705  		p2.To.Reg = r0
   706  		p2.RegTo2 = arm64.REGTMP
   707  		p3 := s.Prog(arm64.ACBNZ)
   708  		p3.From.Type = obj.TYPE_REG
   709  		p3.From.Reg = arm64.REGTMP
   710  		p3.To.Type = obj.TYPE_BRANCH
   711  		p3.To.SetTarget(p)
   712  	case ssa.OpARM64LoweredAtomicAdd64Variant,
   713  		ssa.OpARM64LoweredAtomicAdd32Variant:
   714  		// LDADDAL	Rarg1, (Rarg0), Rout
   715  		// ADD		Rarg1, Rout
   716  		op := arm64.ALDADDALD
   717  		if v.Op == ssa.OpARM64LoweredAtomicAdd32Variant {
   718  			op = arm64.ALDADDALW
   719  		}
   720  		r0 := v.Args[0].Reg()
   721  		r1 := v.Args[1].Reg()
   722  		out := v.Reg0()
   723  		p := s.Prog(op)
   724  		p.From.Type = obj.TYPE_REG
   725  		p.From.Reg = r1
   726  		p.To.Type = obj.TYPE_MEM
   727  		p.To.Reg = r0
   728  		p.RegTo2 = out
   729  		p1 := s.Prog(arm64.AADD)
   730  		p1.From.Type = obj.TYPE_REG
   731  		p1.From.Reg = r1
   732  		p1.To.Type = obj.TYPE_REG
   733  		p1.To.Reg = out
   734  	case ssa.OpARM64LoweredAtomicCas64,
   735  		ssa.OpARM64LoweredAtomicCas32:
   736  		// LDAXR	(Rarg0), Rtmp
   737  		// CMP		Rarg1, Rtmp
   738  		// BNE		3(PC)
   739  		// STLXR	Rarg2, (Rarg0), Rtmp
   740  		// CBNZ		Rtmp, -4(PC)
   741  		// CSET		EQ, Rout
   742  		ld := arm64.ALDAXR
   743  		st := arm64.ASTLXR
   744  		cmp := arm64.ACMP
   745  		if v.Op == ssa.OpARM64LoweredAtomicCas32 {
   746  			ld = arm64.ALDAXRW
   747  			st = arm64.ASTLXRW
   748  			cmp = arm64.ACMPW
   749  		}
   750  		r0 := v.Args[0].Reg()
   751  		r1 := v.Args[1].Reg()
   752  		r2 := v.Args[2].Reg()
   753  		out := v.Reg0()
   754  		p := s.Prog(ld)
   755  		p.From.Type = obj.TYPE_MEM
   756  		p.From.Reg = r0
   757  		p.To.Type = obj.TYPE_REG
   758  		p.To.Reg = arm64.REGTMP
   759  		p1 := s.Prog(cmp)
   760  		p1.From.Type = obj.TYPE_REG
   761  		p1.From.Reg = r1
   762  		p1.Reg = arm64.REGTMP
   763  		p2 := s.Prog(arm64.ABNE)
   764  		p2.To.Type = obj.TYPE_BRANCH
   765  		p3 := s.Prog(st)
   766  		p3.From.Type = obj.TYPE_REG
   767  		p3.From.Reg = r2
   768  		p3.To.Type = obj.TYPE_MEM
   769  		p3.To.Reg = r0
   770  		p3.RegTo2 = arm64.REGTMP
   771  		p4 := s.Prog(arm64.ACBNZ)
   772  		p4.From.Type = obj.TYPE_REG
   773  		p4.From.Reg = arm64.REGTMP
   774  		p4.To.Type = obj.TYPE_BRANCH
   775  		p4.To.SetTarget(p)
   776  		p5 := s.Prog(arm64.ACSET)
   777  		p5.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
   778  		p5.From.Offset = int64(arm64.SPOP_EQ)
   779  		p5.To.Type = obj.TYPE_REG
   780  		p5.To.Reg = out
   781  		p2.To.SetTarget(p5)
   782  	case ssa.OpARM64LoweredAtomicCas64Variant,
   783  		ssa.OpARM64LoweredAtomicCas32Variant:
   784  		// Rarg0: ptr
   785  		// Rarg1: old
   786  		// Rarg2: new
   787  		// MOV  	Rarg1, Rtmp
   788  		// CASAL	Rtmp, (Rarg0), Rarg2
   789  		// CMP  	Rarg1, Rtmp
   790  		// CSET 	EQ, Rout
   791  		cas := arm64.ACASALD
   792  		cmp := arm64.ACMP
   793  		mov := arm64.AMOVD
   794  		if v.Op == ssa.OpARM64LoweredAtomicCas32Variant {
   795  			cas = arm64.ACASALW
   796  			cmp = arm64.ACMPW
   797  			mov = arm64.AMOVW
   798  		}
   799  		r0 := v.Args[0].Reg()
   800  		r1 := v.Args[1].Reg()
   801  		r2 := v.Args[2].Reg()
   802  		out := v.Reg0()
   803  
   804  		// MOV  	Rarg1, Rtmp
   805  		p := s.Prog(mov)
   806  		p.From.Type = obj.TYPE_REG
   807  		p.From.Reg = r1
   808  		p.To.Type = obj.TYPE_REG
   809  		p.To.Reg = arm64.REGTMP
   810  
   811  		// CASAL	Rtmp, (Rarg0), Rarg2
   812  		p1 := s.Prog(cas)
   813  		p1.From.Type = obj.TYPE_REG
   814  		p1.From.Reg = arm64.REGTMP
   815  		p1.To.Type = obj.TYPE_MEM
   816  		p1.To.Reg = r0
   817  		p1.RegTo2 = r2
   818  
   819  		// CMP  	Rarg1, Rtmp
   820  		p2 := s.Prog(cmp)
   821  		p2.From.Type = obj.TYPE_REG
   822  		p2.From.Reg = r1
   823  		p2.Reg = arm64.REGTMP
   824  
   825  		// CSET 	EQ, Rout
   826  		p3 := s.Prog(arm64.ACSET)
   827  		p3.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
   828  		p3.From.Offset = int64(arm64.SPOP_EQ)
   829  		p3.To.Type = obj.TYPE_REG
   830  		p3.To.Reg = out
   831  
   832  	case ssa.OpARM64LoweredAtomicAnd64,
   833  		ssa.OpARM64LoweredAtomicOr64,
   834  		ssa.OpARM64LoweredAtomicAnd32,
   835  		ssa.OpARM64LoweredAtomicOr32,
   836  		ssa.OpARM64LoweredAtomicAnd8,
   837  		ssa.OpARM64LoweredAtomicOr8:
   838  		// LDAXR[BW] (Rarg0), Rout
   839  		// AND/OR	Rarg1, Rout, tmp1
   840  		// STLXR[BW] tmp1, (Rarg0), Rtmp
   841  		// CBNZ		Rtmp, -3(PC)
   842  		ld := arm64.ALDAXR
   843  		st := arm64.ASTLXR
   844  		if v.Op == ssa.OpARM64LoweredAtomicAnd32 || v.Op == ssa.OpARM64LoweredAtomicOr32 {
   845  			ld = arm64.ALDAXRW
   846  			st = arm64.ASTLXRW
   847  		}
   848  		if v.Op == ssa.OpARM64LoweredAtomicAnd8 || v.Op == ssa.OpARM64LoweredAtomicOr8 {
   849  			ld = arm64.ALDAXRB
   850  			st = arm64.ASTLXRB
   851  		}
   852  		r0 := v.Args[0].Reg()
   853  		r1 := v.Args[1].Reg()
   854  		out := v.Reg0()
   855  		tmp := v.RegTmp()
   856  		p := s.Prog(ld)
   857  		p.From.Type = obj.TYPE_MEM
   858  		p.From.Reg = r0
   859  		p.To.Type = obj.TYPE_REG
   860  		p.To.Reg = out
   861  		p1 := s.Prog(v.Op.Asm())
   862  		p1.From.Type = obj.TYPE_REG
   863  		p1.From.Reg = r1
   864  		p1.Reg = out
   865  		p1.To.Type = obj.TYPE_REG
   866  		p1.To.Reg = tmp
   867  		p2 := s.Prog(st)
   868  		p2.From.Type = obj.TYPE_REG
   869  		p2.From.Reg = tmp
   870  		p2.To.Type = obj.TYPE_MEM
   871  		p2.To.Reg = r0
   872  		p2.RegTo2 = arm64.REGTMP
   873  		p3 := s.Prog(arm64.ACBNZ)
   874  		p3.From.Type = obj.TYPE_REG
   875  		p3.From.Reg = arm64.REGTMP
   876  		p3.To.Type = obj.TYPE_BRANCH
   877  		p3.To.SetTarget(p)
   878  
   879  	case ssa.OpARM64LoweredAtomicAnd8Variant,
   880  		ssa.OpARM64LoweredAtomicAnd32Variant,
   881  		ssa.OpARM64LoweredAtomicAnd64Variant:
   882  		atomic_clear := arm64.ALDCLRALD
   883  		if v.Op == ssa.OpARM64LoweredAtomicAnd32Variant {
   884  			atomic_clear = arm64.ALDCLRALW
   885  		}
   886  		if v.Op == ssa.OpARM64LoweredAtomicAnd8Variant {
   887  			atomic_clear = arm64.ALDCLRALB
   888  		}
   889  		r0 := v.Args[0].Reg()
   890  		r1 := v.Args[1].Reg()
   891  		out := v.Reg0()
   892  
   893  		// MNV       Rarg1 Rtemp
   894  		p := s.Prog(arm64.AMVN)
   895  		p.From.Type = obj.TYPE_REG
   896  		p.From.Reg = r1
   897  		p.To.Type = obj.TYPE_REG
   898  		p.To.Reg = arm64.REGTMP
   899  
   900  		// LDCLRAL[BDW]  Rtemp, (Rarg0), Rout
   901  		p1 := s.Prog(atomic_clear)
   902  		p1.From.Type = obj.TYPE_REG
   903  		p1.From.Reg = arm64.REGTMP
   904  		p1.To.Type = obj.TYPE_MEM
   905  		p1.To.Reg = r0
   906  		p1.RegTo2 = out
   907  
   908  	case ssa.OpARM64LoweredAtomicOr8Variant,
   909  		ssa.OpARM64LoweredAtomicOr32Variant,
   910  		ssa.OpARM64LoweredAtomicOr64Variant:
   911  		atomic_or := arm64.ALDORALD
   912  		if v.Op == ssa.OpARM64LoweredAtomicOr32Variant {
   913  			atomic_or = arm64.ALDORALW
   914  		}
   915  		if v.Op == ssa.OpARM64LoweredAtomicOr8Variant {
   916  			atomic_or = arm64.ALDORALB
   917  		}
   918  		r0 := v.Args[0].Reg()
   919  		r1 := v.Args[1].Reg()
   920  		out := v.Reg0()
   921  
   922  		// LDORAL[BDW]  Rarg1, (Rarg0), Rout
   923  		p := s.Prog(atomic_or)
   924  		p.From.Type = obj.TYPE_REG
   925  		p.From.Reg = r1
   926  		p.To.Type = obj.TYPE_MEM
   927  		p.To.Reg = r0
   928  		p.RegTo2 = out
   929  
   930  	case ssa.OpARM64MOVBreg,
   931  		ssa.OpARM64MOVBUreg,
   932  		ssa.OpARM64MOVHreg,
   933  		ssa.OpARM64MOVHUreg,
   934  		ssa.OpARM64MOVWreg,
   935  		ssa.OpARM64MOVWUreg:
   936  		a := v.Args[0]
   937  		for a.Op == ssa.OpCopy || a.Op == ssa.OpARM64MOVDreg {
   938  			a = a.Args[0]
   939  		}
   940  		if a.Op == ssa.OpLoadReg {
   941  			t := a.Type
   942  			switch {
   943  			case v.Op == ssa.OpARM64MOVBreg && t.Size() == 1 && t.IsSigned(),
   944  				v.Op == ssa.OpARM64MOVBUreg && t.Size() == 1 && !t.IsSigned(),
   945  				v.Op == ssa.OpARM64MOVHreg && t.Size() == 2 && t.IsSigned(),
   946  				v.Op == ssa.OpARM64MOVHUreg && t.Size() == 2 && !t.IsSigned(),
   947  				v.Op == ssa.OpARM64MOVWreg && t.Size() == 4 && t.IsSigned(),
   948  				v.Op == ssa.OpARM64MOVWUreg && t.Size() == 4 && !t.IsSigned():
   949  				// arg is a proper-typed load, already zero/sign-extended, don't extend again
   950  				if v.Reg() == v.Args[0].Reg() {
   951  					return
   952  				}
   953  				p := s.Prog(arm64.AMOVD)
   954  				p.From.Type = obj.TYPE_REG
   955  				p.From.Reg = v.Args[0].Reg()
   956  				p.To.Type = obj.TYPE_REG
   957  				p.To.Reg = v.Reg()
   958  				return
   959  			default:
   960  			}
   961  		}
   962  		fallthrough
   963  	case ssa.OpARM64MVN,
   964  		ssa.OpARM64NEG,
   965  		ssa.OpARM64FABSD,
   966  		ssa.OpARM64FMOVDfpgp,
   967  		ssa.OpARM64FMOVDgpfp,
   968  		ssa.OpARM64FMOVSfpgp,
   969  		ssa.OpARM64FMOVSgpfp,
   970  		ssa.OpARM64FNEGS,
   971  		ssa.OpARM64FNEGD,
   972  		ssa.OpARM64FSQRTS,
   973  		ssa.OpARM64FSQRTD,
   974  		ssa.OpARM64FCVTZSSW,
   975  		ssa.OpARM64FCVTZSDW,
   976  		ssa.OpARM64FCVTZUSW,
   977  		ssa.OpARM64FCVTZUDW,
   978  		ssa.OpARM64FCVTZSS,
   979  		ssa.OpARM64FCVTZSD,
   980  		ssa.OpARM64FCVTZUS,
   981  		ssa.OpARM64FCVTZUD,
   982  		ssa.OpARM64SCVTFWS,
   983  		ssa.OpARM64SCVTFWD,
   984  		ssa.OpARM64SCVTFS,
   985  		ssa.OpARM64SCVTFD,
   986  		ssa.OpARM64UCVTFWS,
   987  		ssa.OpARM64UCVTFWD,
   988  		ssa.OpARM64UCVTFS,
   989  		ssa.OpARM64UCVTFD,
   990  		ssa.OpARM64FCVTSD,
   991  		ssa.OpARM64FCVTDS,
   992  		ssa.OpARM64REV,
   993  		ssa.OpARM64REVW,
   994  		ssa.OpARM64REV16,
   995  		ssa.OpARM64REV16W,
   996  		ssa.OpARM64RBIT,
   997  		ssa.OpARM64RBITW,
   998  		ssa.OpARM64CLZ,
   999  		ssa.OpARM64CLZW,
  1000  		ssa.OpARM64FRINTAD,
  1001  		ssa.OpARM64FRINTMD,
  1002  		ssa.OpARM64FRINTND,
  1003  		ssa.OpARM64FRINTPD,
  1004  		ssa.OpARM64FRINTZD:
  1005  		p := s.Prog(v.Op.Asm())
  1006  		p.From.Type = obj.TYPE_REG
  1007  		p.From.Reg = v.Args[0].Reg()
  1008  		p.To.Type = obj.TYPE_REG
  1009  		p.To.Reg = v.Reg()
  1010  	case ssa.OpARM64LoweredRound32F, ssa.OpARM64LoweredRound64F:
  1011  		// input is already rounded
  1012  	case ssa.OpARM64VCNT:
  1013  		p := s.Prog(v.Op.Asm())
  1014  		p.From.Type = obj.TYPE_REG
  1015  		p.From.Reg = (v.Args[0].Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
  1016  		p.To.Type = obj.TYPE_REG
  1017  		p.To.Reg = (v.Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
  1018  	case ssa.OpARM64VUADDLV:
  1019  		p := s.Prog(v.Op.Asm())
  1020  		p.From.Type = obj.TYPE_REG
  1021  		p.From.Reg = (v.Args[0].Reg()-arm64.REG_F0)&31 + arm64.REG_ARNG + ((arm64.ARNG_8B & 15) << 5)
  1022  		p.To.Type = obj.TYPE_REG
  1023  		p.To.Reg = v.Reg() - arm64.REG_F0 + arm64.REG_V0
  1024  	case ssa.OpARM64CSEL, ssa.OpARM64CSEL0:
  1025  		r1 := int16(arm64.REGZERO)
  1026  		if v.Op != ssa.OpARM64CSEL0 {
  1027  			r1 = v.Args[1].Reg()
  1028  		}
  1029  		p := s.Prog(v.Op.Asm())
  1030  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1031  		condCode := condBits[ssa.Op(v.AuxInt)]
  1032  		p.From.Offset = int64(condCode)
  1033  		p.Reg = v.Args[0].Reg()
  1034  		p.AddRestSourceReg(r1)
  1035  		p.To.Type = obj.TYPE_REG
  1036  		p.To.Reg = v.Reg()
  1037  	case ssa.OpARM64CSINC, ssa.OpARM64CSINV, ssa.OpARM64CSNEG:
  1038  		p := s.Prog(v.Op.Asm())
  1039  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1040  		condCode := condBits[ssa.Op(v.AuxInt)]
  1041  		p.From.Offset = int64(condCode)
  1042  		p.Reg = v.Args[0].Reg()
  1043  		p.AddRestSourceReg(v.Args[1].Reg())
  1044  		p.To.Type = obj.TYPE_REG
  1045  		p.To.Reg = v.Reg()
  1046  	case ssa.OpARM64CSETM:
  1047  		p := s.Prog(arm64.ACSETM)
  1048  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1049  		condCode := condBits[ssa.Op(v.AuxInt)]
  1050  		p.From.Offset = int64(condCode)
  1051  		p.To.Type = obj.TYPE_REG
  1052  		p.To.Reg = v.Reg()
  1053  	case ssa.OpARM64CCMP,
  1054  		ssa.OpARM64CCMN,
  1055  		ssa.OpARM64CCMPconst,
  1056  		ssa.OpARM64CCMNconst,
  1057  		ssa.OpARM64CCMPW,
  1058  		ssa.OpARM64CCMNW,
  1059  		ssa.OpARM64CCMPWconst,
  1060  		ssa.OpARM64CCMNWconst:
  1061  		p := s.Prog(v.Op.Asm())
  1062  		p.Reg = v.Args[0].Reg()
  1063  		params := v.AuxArm64ConditionalParams()
  1064  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1065  		p.From.Offset = int64(condBits[params.Cond()])
  1066  		constValue, ok := params.ConstValue()
  1067  		if ok {
  1068  			p.AddRestSourceConst(constValue)
  1069  		} else {
  1070  			p.AddRestSourceReg(v.Args[1].Reg())
  1071  		}
  1072  		p.To.Type = obj.TYPE_CONST
  1073  		p.To.Offset = params.Nzcv()
  1074  	case ssa.OpARM64LoweredZero:
  1075  		ptrReg := v.Args[0].Reg()
  1076  		n := v.AuxInt
  1077  		if n < 16 {
  1078  			v.Fatalf("Zero too small %d", n)
  1079  		}
  1080  
  1081  		// Generate zeroing instructions.
  1082  		var off int64
  1083  		for n >= 16 {
  1084  			//  STP     (ZR, ZR), off(ptrReg)
  1085  			zero16(s, ptrReg, off, false)
  1086  			off += 16
  1087  			n -= 16
  1088  		}
  1089  		// Write any fractional portion.
  1090  		// An overlapping 16-byte write can't be used here
  1091  		// because STP's offsets must be a multiple of 8.
  1092  		if n > 8 {
  1093  			//  MOVD    ZR, off(ptrReg)
  1094  			zero8(s, ptrReg, off)
  1095  			off += 8
  1096  			n -= 8
  1097  		}
  1098  		if n != 0 {
  1099  			//  MOVD    ZR, off+n-8(ptrReg)
  1100  			// TODO: for n<=4 we could use a smaller write.
  1101  			zero8(s, ptrReg, off+n-8)
  1102  		}
  1103  	case ssa.OpARM64LoweredZeroLoop:
  1104  		ptrReg := v.Args[0].Reg()
  1105  		countReg := v.RegTmp()
  1106  		n := v.AuxInt
  1107  		loopSize := int64(64)
  1108  		if n < 3*loopSize {
  1109  			// - a loop count of 0 won't work.
  1110  			// - a loop count of 1 is useless.
  1111  			// - a loop count of 2 is a code size ~tie
  1112  			//     3 instructions to implement the loop
  1113  			//     4 instructions in the loop body
  1114  			//   vs
  1115  			//     8 instructions in the straightline code
  1116  			//   Might as well use straightline code.
  1117  			v.Fatalf("ZeroLoop size too small %d", n)
  1118  		}
  1119  
  1120  		// Put iteration count in a register.
  1121  		//   MOVD    $n, countReg
  1122  		p := s.Prog(arm64.AMOVD)
  1123  		p.From.Type = obj.TYPE_CONST
  1124  		p.From.Offset = n / loopSize
  1125  		p.To.Type = obj.TYPE_REG
  1126  		p.To.Reg = countReg
  1127  		cntInit := p
  1128  
  1129  		// Zero loopSize bytes starting at ptrReg.
  1130  		// Increment ptrReg by loopSize as a side effect.
  1131  		for range loopSize / 16 {
  1132  			//  STP.P   (ZR, ZR), 16(ptrReg)
  1133  			zero16(s, ptrReg, 0, true)
  1134  			// TODO: should we use the postincrement form,
  1135  			// or use a separate += 64 instruction?
  1136  			// postincrement saves an instruction, but maybe
  1137  			// it requires more integer units to do the +=16s.
  1138  		}
  1139  		// Decrement loop count.
  1140  		//   SUB     $1, countReg
  1141  		p = s.Prog(arm64.ASUB)
  1142  		p.From.Type = obj.TYPE_CONST
  1143  		p.From.Offset = 1
  1144  		p.To.Type = obj.TYPE_REG
  1145  		p.To.Reg = countReg
  1146  		// Jump to loop header if we're not done yet.
  1147  		//   CBNZ    head
  1148  		p = s.Prog(arm64.ACBNZ)
  1149  		p.From.Type = obj.TYPE_REG
  1150  		p.From.Reg = countReg
  1151  		p.To.Type = obj.TYPE_BRANCH
  1152  		p.To.SetTarget(cntInit.Link)
  1153  
  1154  		// Multiples of the loop size are now done.
  1155  		n %= loopSize
  1156  
  1157  		// Write any fractional portion.
  1158  		var off int64
  1159  		for n >= 16 {
  1160  			//  STP     (ZR, ZR), off(ptrReg)
  1161  			zero16(s, ptrReg, off, false)
  1162  			off += 16
  1163  			n -= 16
  1164  		}
  1165  		if n > 8 {
  1166  			// Note: an overlapping 16-byte write can't be used
  1167  			// here because STP's offsets must be a multiple of 8.
  1168  			//  MOVD    ZR, off(ptrReg)
  1169  			zero8(s, ptrReg, off)
  1170  			off += 8
  1171  			n -= 8
  1172  		}
  1173  		if n != 0 {
  1174  			//  MOVD    ZR, off+n-8(ptrReg)
  1175  			// TODO: for n<=4 we could use a smaller write.
  1176  			zero8(s, ptrReg, off+n-8)
  1177  		}
  1178  		// TODO: maybe we should use the count register to instead
  1179  		// hold an end pointer and compare against that?
  1180  		//   ADD $n, ptrReg, endReg
  1181  		// then
  1182  		//   CMP ptrReg, endReg
  1183  		//   BNE loop
  1184  		// There's a past-the-end pointer here, any problem with that?
  1185  
  1186  	case ssa.OpARM64LoweredMove:
  1187  		dstReg := v.Args[0].Reg()
  1188  		srcReg := v.Args[1].Reg()
  1189  		if dstReg == srcReg {
  1190  			break
  1191  		}
  1192  		tmpReg1 := int16(arm64.REG_R25)
  1193  		tmpFReg1 := int16(arm64.REG_F16)
  1194  		tmpFReg2 := int16(arm64.REG_F17)
  1195  		n := v.AuxInt
  1196  		if n < 16 {
  1197  			v.Fatalf("Move too small %d", n)
  1198  		}
  1199  
  1200  		// Generate copying instructions.
  1201  		var off int64
  1202  		for n >= 32 {
  1203  			//  FLDPQ   off(srcReg), (tmpFReg1, tmpFReg2)
  1204  			//  FSTPQ   (tmpFReg1, tmpFReg2), off(dstReg)
  1205  			move32(s, srcReg, dstReg, tmpFReg1, tmpFReg2, off, false)
  1206  			off += 32
  1207  			n -= 32
  1208  		}
  1209  		for n >= 16 {
  1210  			//  FMOVQ   off(src), tmpFReg1
  1211  			//  FMOVQ   tmpFReg1, off(dst)
  1212  			move16(s, srcReg, dstReg, tmpFReg1, off, false)
  1213  			off += 16
  1214  			n -= 16
  1215  		}
  1216  		if n > 8 {
  1217  			//  MOVD    off(srcReg), tmpReg1
  1218  			//  MOVD    tmpReg1, off(dstReg)
  1219  			move8(s, srcReg, dstReg, tmpReg1, off)
  1220  			off += 8
  1221  			n -= 8
  1222  		}
  1223  		if n != 0 {
  1224  			//  MOVD    off+n-8(srcReg), tmpReg1
  1225  			//  MOVD    tmpReg1, off+n-8(dstReg)
  1226  			move8(s, srcReg, dstReg, tmpReg1, off+n-8)
  1227  		}
  1228  	case ssa.OpARM64LoweredMoveLoop:
  1229  		dstReg := v.Args[0].Reg()
  1230  		srcReg := v.Args[1].Reg()
  1231  		if dstReg == srcReg {
  1232  			break
  1233  		}
  1234  		countReg := int16(arm64.REG_R24)
  1235  		tmpReg1 := int16(arm64.REG_R25)
  1236  		tmpFReg1 := int16(arm64.REG_F16)
  1237  		tmpFReg2 := int16(arm64.REG_F17)
  1238  		n := v.AuxInt
  1239  		loopSize := int64(64)
  1240  		if n < 3*loopSize {
  1241  			// - a loop count of 0 won't work.
  1242  			// - a loop count of 1 is useless.
  1243  			// - a loop count of 2 is a code size ~tie
  1244  			//     3 instructions to implement the loop
  1245  			//     4 instructions in the loop body
  1246  			//   vs
  1247  			//     8 instructions in the straightline code
  1248  			//   Might as well use straightline code.
  1249  			v.Fatalf("ZeroLoop size too small %d", n)
  1250  		}
  1251  
  1252  		// Put iteration count in a register.
  1253  		//   MOVD    $n, countReg
  1254  		p := s.Prog(arm64.AMOVD)
  1255  		p.From.Type = obj.TYPE_CONST
  1256  		p.From.Offset = n / loopSize
  1257  		p.To.Type = obj.TYPE_REG
  1258  		p.To.Reg = countReg
  1259  		cntInit := p
  1260  
  1261  		// Move loopSize bytes starting at srcReg to dstReg.
  1262  		// Increment srcReg and destReg by loopSize as a side effect.
  1263  		for range loopSize / 32 {
  1264  			// FLDPQ.P 32(srcReg), (tmpFReg1, tmpFReg2)
  1265  			// FSTPQ.P (tmpFReg1, tmpFReg2), 32(dstReg)
  1266  			move32(s, srcReg, dstReg, tmpFReg1, tmpFReg2, 0, true)
  1267  		}
  1268  		// Decrement loop count.
  1269  		//   SUB     $1, countReg
  1270  		p = s.Prog(arm64.ASUB)
  1271  		p.From.Type = obj.TYPE_CONST
  1272  		p.From.Offset = 1
  1273  		p.To.Type = obj.TYPE_REG
  1274  		p.To.Reg = countReg
  1275  		// Jump to loop header if we're not done yet.
  1276  		//   CBNZ    head
  1277  		p = s.Prog(arm64.ACBNZ)
  1278  		p.From.Type = obj.TYPE_REG
  1279  		p.From.Reg = countReg
  1280  		p.To.Type = obj.TYPE_BRANCH
  1281  		p.To.SetTarget(cntInit.Link)
  1282  
  1283  		// Multiples of the loop size are now done.
  1284  		n %= loopSize
  1285  
  1286  		// Copy any fractional portion.
  1287  		var off int64
  1288  		for n >= 32 {
  1289  			//  FLDPQ   off(srcReg), (tmpFReg1, tmpFReg2)
  1290  			//  FSTPQ   (tmpFReg1, tmpFReg2), off(dstReg)
  1291  			move32(s, srcReg, dstReg, tmpFReg1, tmpFReg2, off, false)
  1292  			off += 32
  1293  			n -= 32
  1294  		}
  1295  		for n >= 16 {
  1296  			//  FMOVQ   off(src), tmpFReg1
  1297  			//  FMOVQ   tmpFReg1, off(dst)
  1298  			move16(s, srcReg, dstReg, tmpFReg1, off, false)
  1299  			off += 16
  1300  			n -= 16
  1301  		}
  1302  		if n > 8 {
  1303  			//  MOVD    off(srcReg), tmpReg1
  1304  			//  MOVD    tmpReg1, off(dstReg)
  1305  			move8(s, srcReg, dstReg, tmpReg1, off)
  1306  			off += 8
  1307  			n -= 8
  1308  		}
  1309  		if n != 0 {
  1310  			//  MOVD    off+n-8(srcReg), tmpReg1
  1311  			//  MOVD    tmpReg1, off+n-8(dstReg)
  1312  			move8(s, srcReg, dstReg, tmpReg1, off+n-8)
  1313  		}
  1314  
  1315  	case ssa.OpARM64CALLstatic, ssa.OpARM64CALLclosure, ssa.OpARM64CALLinter:
  1316  		s.Call(v)
  1317  	case ssa.OpARM64CALLtail:
  1318  		s.TailCall(v)
  1319  	case ssa.OpARM64LoweredWB:
  1320  		p := s.Prog(obj.ACALL)
  1321  		p.To.Type = obj.TYPE_MEM
  1322  		p.To.Name = obj.NAME_EXTERN
  1323  		// AuxInt encodes how many buffer entries we need.
  1324  		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
  1325  	case ssa.OpARM64LoweredMemEq:
  1326  		p := s.Prog(obj.ACALL)
  1327  		p.To.Type = obj.TYPE_MEM
  1328  		p.To.Name = obj.NAME_EXTERN
  1329  		p.To.Sym = ir.Syms.Memequal
  1330  
  1331  	case ssa.OpARM64LoweredPanicBoundsRR, ssa.OpARM64LoweredPanicBoundsRC, ssa.OpARM64LoweredPanicBoundsCR, ssa.OpARM64LoweredPanicBoundsCC:
  1332  		// Compute the constant we put in the PCData entry for this call.
  1333  		code, signed := ssa.BoundsKind(v.AuxInt).Code()
  1334  		xIsReg := false
  1335  		yIsReg := false
  1336  		xVal := 0
  1337  		yVal := 0
  1338  		switch v.Op {
  1339  		case ssa.OpARM64LoweredPanicBoundsRR:
  1340  			xIsReg = true
  1341  			xVal = int(v.Args[0].Reg() - arm64.REG_R0)
  1342  			yIsReg = true
  1343  			yVal = int(v.Args[1].Reg() - arm64.REG_R0)
  1344  		case ssa.OpARM64LoweredPanicBoundsRC:
  1345  			xIsReg = true
  1346  			xVal = int(v.Args[0].Reg() - arm64.REG_R0)
  1347  			c := v.Aux.(ssa.PanicBoundsC).C
  1348  			if c >= 0 && c <= abi.BoundsMaxConst {
  1349  				yVal = int(c)
  1350  			} else {
  1351  				// Move constant to a register
  1352  				yIsReg = true
  1353  				if yVal == xVal {
  1354  					yVal = 1
  1355  				}
  1356  				p := s.Prog(arm64.AMOVD)
  1357  				p.From.Type = obj.TYPE_CONST
  1358  				p.From.Offset = c
  1359  				p.To.Type = obj.TYPE_REG
  1360  				p.To.Reg = arm64.REG_R0 + int16(yVal)
  1361  			}
  1362  		case ssa.OpARM64LoweredPanicBoundsCR:
  1363  			yIsReg = true
  1364  			yVal = int(v.Args[0].Reg() - arm64.REG_R0)
  1365  			c := v.Aux.(ssa.PanicBoundsC).C
  1366  			if c >= 0 && c <= abi.BoundsMaxConst {
  1367  				xVal = int(c)
  1368  			} else {
  1369  				// Move constant to a register
  1370  				if xVal == yVal {
  1371  					xVal = 1
  1372  				}
  1373  				p := s.Prog(arm64.AMOVD)
  1374  				p.From.Type = obj.TYPE_CONST
  1375  				p.From.Offset = c
  1376  				p.To.Type = obj.TYPE_REG
  1377  				p.To.Reg = arm64.REG_R0 + int16(xVal)
  1378  			}
  1379  		case ssa.OpARM64LoweredPanicBoundsCC:
  1380  			c := v.Aux.(ssa.PanicBoundsCC).Cx
  1381  			if c >= 0 && c <= abi.BoundsMaxConst {
  1382  				xVal = int(c)
  1383  			} else {
  1384  				// Move constant to a register
  1385  				xIsReg = true
  1386  				p := s.Prog(arm64.AMOVD)
  1387  				p.From.Type = obj.TYPE_CONST
  1388  				p.From.Offset = c
  1389  				p.To.Type = obj.TYPE_REG
  1390  				p.To.Reg = arm64.REG_R0 + int16(xVal)
  1391  			}
  1392  			c = v.Aux.(ssa.PanicBoundsCC).Cy
  1393  			if c >= 0 && c <= abi.BoundsMaxConst {
  1394  				yVal = int(c)
  1395  			} else {
  1396  				// Move constant to a register
  1397  				yIsReg = true
  1398  				yVal = 1
  1399  				p := s.Prog(arm64.AMOVD)
  1400  				p.From.Type = obj.TYPE_CONST
  1401  				p.From.Offset = c
  1402  				p.To.Type = obj.TYPE_REG
  1403  				p.To.Reg = arm64.REG_R0 + int16(yVal)
  1404  			}
  1405  		}
  1406  		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
  1407  
  1408  		p := s.Prog(obj.APCDATA)
  1409  		p.From.SetConst(abi.PCDATA_PanicBounds)
  1410  		p.To.SetConst(int64(c))
  1411  		p = s.Prog(obj.ACALL)
  1412  		p.To.Type = obj.TYPE_MEM
  1413  		p.To.Name = obj.NAME_EXTERN
  1414  		p.To.Sym = ir.Syms.PanicBounds
  1415  
  1416  	case ssa.OpARM64LoweredNilCheck:
  1417  		// Issue a load which will fault if arg is nil.
  1418  		p := s.Prog(arm64.AMOVB)
  1419  		p.From.Type = obj.TYPE_MEM
  1420  		p.From.Reg = v.Args[0].Reg()
  1421  		ssagen.AddAux(&p.From, v)
  1422  		p.To.Type = obj.TYPE_REG
  1423  		p.To.Reg = arm64.REGTMP
  1424  		if logopt.Enabled() {
  1425  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1426  		}
  1427  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Line==1 in generated wrappers
  1428  			base.WarnfAt(v.Pos, "generated nil check")
  1429  		}
  1430  	case ssa.OpARM64Equal,
  1431  		ssa.OpARM64NotEqual,
  1432  		ssa.OpARM64LessThan,
  1433  		ssa.OpARM64LessEqual,
  1434  		ssa.OpARM64GreaterThan,
  1435  		ssa.OpARM64GreaterEqual,
  1436  		ssa.OpARM64LessThanU,
  1437  		ssa.OpARM64LessEqualU,
  1438  		ssa.OpARM64GreaterThanU,
  1439  		ssa.OpARM64GreaterEqualU,
  1440  		ssa.OpARM64LessThanF,
  1441  		ssa.OpARM64LessEqualF,
  1442  		ssa.OpARM64GreaterThanF,
  1443  		ssa.OpARM64GreaterEqualF,
  1444  		ssa.OpARM64NotLessThanF,
  1445  		ssa.OpARM64NotLessEqualF,
  1446  		ssa.OpARM64NotGreaterThanF,
  1447  		ssa.OpARM64NotGreaterEqualF,
  1448  		ssa.OpARM64LessThanNoov,
  1449  		ssa.OpARM64GreaterEqualNoov:
  1450  		// generate boolean values using CSET
  1451  		p := s.Prog(arm64.ACSET)
  1452  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1453  		condCode := condBits[v.Op]
  1454  		p.From.Offset = int64(condCode)
  1455  		p.To.Type = obj.TYPE_REG
  1456  		p.To.Reg = v.Reg()
  1457  	case ssa.OpARM64PRFM:
  1458  		p := s.Prog(v.Op.Asm())
  1459  		p.From.Type = obj.TYPE_MEM
  1460  		p.From.Reg = v.Args[0].Reg()
  1461  		p.To.Type = obj.TYPE_CONST
  1462  		p.To.Offset = v.AuxInt
  1463  	case ssa.OpARM64LoweredGetClosurePtr:
  1464  		// Closure pointer is R26 (arm64.REGCTXT).
  1465  		ssagen.CheckLoweredGetClosurePtr(v)
  1466  	case ssa.OpARM64LoweredGetCallerSP:
  1467  		// caller's SP is FixedFrameSize below the address of the first arg
  1468  		p := s.Prog(arm64.AMOVD)
  1469  		p.From.Type = obj.TYPE_ADDR
  1470  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
  1471  		p.From.Name = obj.NAME_PARAM
  1472  		p.To.Type = obj.TYPE_REG
  1473  		p.To.Reg = v.Reg()
  1474  	case ssa.OpARM64LoweredGetCallerPC:
  1475  		p := s.Prog(obj.AGETCALLERPC)
  1476  		p.To.Type = obj.TYPE_REG
  1477  		p.To.Reg = v.Reg()
  1478  	case ssa.OpARM64DMB:
  1479  		p := s.Prog(v.Op.Asm())
  1480  		p.From.Type = obj.TYPE_CONST
  1481  		p.From.Offset = v.AuxInt
  1482  	case ssa.OpARM64FlagConstant:
  1483  		v.Fatalf("FlagConstant op should never make it to codegen %v", v.LongString())
  1484  	case ssa.OpARM64InvertFlags:
  1485  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1486  	case ssa.OpClobber:
  1487  		// MOVW	$0xdeaddead, REGTMP
  1488  		// MOVW	REGTMP, (slot)
  1489  		// MOVW	REGTMP, 4(slot)
  1490  		p := s.Prog(arm64.AMOVW)
  1491  		p.From.Type = obj.TYPE_CONST
  1492  		p.From.Offset = 0xdeaddead
  1493  		p.To.Type = obj.TYPE_REG
  1494  		p.To.Reg = arm64.REGTMP
  1495  		p = s.Prog(arm64.AMOVW)
  1496  		p.From.Type = obj.TYPE_REG
  1497  		p.From.Reg = arm64.REGTMP
  1498  		p.To.Type = obj.TYPE_MEM
  1499  		p.To.Reg = arm64.REGSP
  1500  		ssagen.AddAux(&p.To, v)
  1501  		p = s.Prog(arm64.AMOVW)
  1502  		p.From.Type = obj.TYPE_REG
  1503  		p.From.Reg = arm64.REGTMP
  1504  		p.To.Type = obj.TYPE_MEM
  1505  		p.To.Reg = arm64.REGSP
  1506  		ssagen.AddAux2(&p.To, v, v.AuxInt+4)
  1507  	case ssa.OpClobberReg:
  1508  		x := uint64(0xdeaddeaddeaddead)
  1509  		p := s.Prog(arm64.AMOVD)
  1510  		p.From.Type = obj.TYPE_CONST
  1511  		p.From.Offset = int64(x)
  1512  		p.To.Type = obj.TYPE_REG
  1513  		p.To.Reg = v.Reg()
  1514  	default:
  1515  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1516  	}
  1517  }
  1518  
  1519  var condBits = map[ssa.Op]arm64.SpecialOperand{
  1520  	ssa.OpARM64Equal:         arm64.SPOP_EQ,
  1521  	ssa.OpARM64NotEqual:      arm64.SPOP_NE,
  1522  	ssa.OpARM64LessThan:      arm64.SPOP_LT,
  1523  	ssa.OpARM64LessThanU:     arm64.SPOP_LO,
  1524  	ssa.OpARM64LessEqual:     arm64.SPOP_LE,
  1525  	ssa.OpARM64LessEqualU:    arm64.SPOP_LS,
  1526  	ssa.OpARM64GreaterThan:   arm64.SPOP_GT,
  1527  	ssa.OpARM64GreaterThanU:  arm64.SPOP_HI,
  1528  	ssa.OpARM64GreaterEqual:  arm64.SPOP_GE,
  1529  	ssa.OpARM64GreaterEqualU: arm64.SPOP_HS,
  1530  	ssa.OpARM64LessThanF:     arm64.SPOP_MI, // Less than
  1531  	ssa.OpARM64LessEqualF:    arm64.SPOP_LS, // Less than or equal to
  1532  	ssa.OpARM64GreaterThanF:  arm64.SPOP_GT, // Greater than
  1533  	ssa.OpARM64GreaterEqualF: arm64.SPOP_GE, // Greater than or equal to
  1534  
  1535  	// The following condition codes have unordered to handle comparisons related to NaN.
  1536  	ssa.OpARM64NotLessThanF:     arm64.SPOP_PL, // Greater than, equal to, or unordered
  1537  	ssa.OpARM64NotLessEqualF:    arm64.SPOP_HI, // Greater than or unordered
  1538  	ssa.OpARM64NotGreaterThanF:  arm64.SPOP_LE, // Less than, equal to or unordered
  1539  	ssa.OpARM64NotGreaterEqualF: arm64.SPOP_LT, // Less than or unordered
  1540  
  1541  	ssa.OpARM64LessThanNoov:     arm64.SPOP_MI, // Less than but without honoring overflow
  1542  	ssa.OpARM64GreaterEqualNoov: arm64.SPOP_PL, // Greater than or equal to but without honoring overflow
  1543  }
  1544  
  1545  var blockJump = map[ssa.BlockKind]struct {
  1546  	asm, invasm obj.As
  1547  }{
  1548  	ssa.BlockARM64EQ:     {arm64.ABEQ, arm64.ABNE},
  1549  	ssa.BlockARM64NE:     {arm64.ABNE, arm64.ABEQ},
  1550  	ssa.BlockARM64LT:     {arm64.ABLT, arm64.ABGE},
  1551  	ssa.BlockARM64GE:     {arm64.ABGE, arm64.ABLT},
  1552  	ssa.BlockARM64LE:     {arm64.ABLE, arm64.ABGT},
  1553  	ssa.BlockARM64GT:     {arm64.ABGT, arm64.ABLE},
  1554  	ssa.BlockARM64ULT:    {arm64.ABLO, arm64.ABHS},
  1555  	ssa.BlockARM64UGE:    {arm64.ABHS, arm64.ABLO},
  1556  	ssa.BlockARM64UGT:    {arm64.ABHI, arm64.ABLS},
  1557  	ssa.BlockARM64ULE:    {arm64.ABLS, arm64.ABHI},
  1558  	ssa.BlockARM64Z:      {arm64.ACBZ, arm64.ACBNZ},
  1559  	ssa.BlockARM64NZ:     {arm64.ACBNZ, arm64.ACBZ},
  1560  	ssa.BlockARM64ZW:     {arm64.ACBZW, arm64.ACBNZW},
  1561  	ssa.BlockARM64NZW:    {arm64.ACBNZW, arm64.ACBZW},
  1562  	ssa.BlockARM64TBZ:    {arm64.ATBZ, arm64.ATBNZ},
  1563  	ssa.BlockARM64TBNZ:   {arm64.ATBNZ, arm64.ATBZ},
  1564  	ssa.BlockARM64FLT:    {arm64.ABMI, arm64.ABPL},
  1565  	ssa.BlockARM64FGE:    {arm64.ABGE, arm64.ABLT},
  1566  	ssa.BlockARM64FLE:    {arm64.ABLS, arm64.ABHI},
  1567  	ssa.BlockARM64FGT:    {arm64.ABGT, arm64.ABLE},
  1568  	ssa.BlockARM64LTnoov: {arm64.ABMI, arm64.ABPL},
  1569  	ssa.BlockARM64GEnoov: {arm64.ABPL, arm64.ABMI},
  1570  }
  1571  
  1572  // To model a 'LEnoov' ('<=' without overflow checking) branching.
  1573  var leJumps = [2][2]ssagen.IndexJump{
  1574  	{{Jump: arm64.ABEQ, Index: 0}, {Jump: arm64.ABPL, Index: 1}}, // next == b.Succs[0]
  1575  	{{Jump: arm64.ABMI, Index: 0}, {Jump: arm64.ABEQ, Index: 0}}, // next == b.Succs[1]
  1576  }
  1577  
  1578  // To model a 'GTnoov' ('>' without overflow checking) branching.
  1579  var gtJumps = [2][2]ssagen.IndexJump{
  1580  	{{Jump: arm64.ABMI, Index: 1}, {Jump: arm64.ABEQ, Index: 1}}, // next == b.Succs[0]
  1581  	{{Jump: arm64.ABEQ, Index: 1}, {Jump: arm64.ABPL, Index: 0}}, // next == b.Succs[1]
  1582  }
  1583  
  1584  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  1585  	switch b.Kind {
  1586  	case ssa.BlockPlain, ssa.BlockDefer:
  1587  		if b.Succs[0].Block() != next {
  1588  			p := s.Prog(obj.AJMP)
  1589  			p.To.Type = obj.TYPE_BRANCH
  1590  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1591  		}
  1592  
  1593  	case ssa.BlockExit, ssa.BlockRetJmp:
  1594  
  1595  	case ssa.BlockRet:
  1596  		s.Prog(obj.ARET)
  1597  
  1598  	case ssa.BlockARM64EQ, ssa.BlockARM64NE,
  1599  		ssa.BlockARM64LT, ssa.BlockARM64GE,
  1600  		ssa.BlockARM64LE, ssa.BlockARM64GT,
  1601  		ssa.BlockARM64ULT, ssa.BlockARM64UGT,
  1602  		ssa.BlockARM64ULE, ssa.BlockARM64UGE,
  1603  		ssa.BlockARM64Z, ssa.BlockARM64NZ,
  1604  		ssa.BlockARM64ZW, ssa.BlockARM64NZW,
  1605  		ssa.BlockARM64FLT, ssa.BlockARM64FGE,
  1606  		ssa.BlockARM64FLE, ssa.BlockARM64FGT,
  1607  		ssa.BlockARM64LTnoov, ssa.BlockARM64GEnoov:
  1608  		jmp := blockJump[b.Kind]
  1609  		var p *obj.Prog
  1610  		switch next {
  1611  		case b.Succs[0].Block():
  1612  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1613  		case b.Succs[1].Block():
  1614  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1615  		default:
  1616  			if b.Likely != ssa.BranchUnlikely {
  1617  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1618  				s.Br(obj.AJMP, b.Succs[1].Block())
  1619  			} else {
  1620  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1621  				s.Br(obj.AJMP, b.Succs[0].Block())
  1622  			}
  1623  		}
  1624  		if !b.Controls[0].Type.IsFlags() {
  1625  			p.From.Type = obj.TYPE_REG
  1626  			p.From.Reg = b.Controls[0].Reg()
  1627  		}
  1628  	case ssa.BlockARM64TBZ, ssa.BlockARM64TBNZ:
  1629  		jmp := blockJump[b.Kind]
  1630  		var p *obj.Prog
  1631  		switch next {
  1632  		case b.Succs[0].Block():
  1633  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1634  		case b.Succs[1].Block():
  1635  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1636  		default:
  1637  			if b.Likely != ssa.BranchUnlikely {
  1638  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1639  				s.Br(obj.AJMP, b.Succs[1].Block())
  1640  			} else {
  1641  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1642  				s.Br(obj.AJMP, b.Succs[0].Block())
  1643  			}
  1644  		}
  1645  		p.From.Offset = b.AuxInt
  1646  		p.From.Type = obj.TYPE_CONST
  1647  		p.Reg = b.Controls[0].Reg()
  1648  
  1649  	case ssa.BlockARM64LEnoov:
  1650  		s.CombJump(b, next, &leJumps)
  1651  	case ssa.BlockARM64GTnoov:
  1652  		s.CombJump(b, next, &gtJumps)
  1653  
  1654  	case ssa.BlockARM64JUMPTABLE:
  1655  		// MOVD	(TABLE)(IDX<<3), Rtmp
  1656  		// JMP	(Rtmp)
  1657  		p := s.Prog(arm64.AMOVD)
  1658  		p.From = genIndexedOperand(ssa.OpARM64MOVDloadidx8, b.Controls[1].Reg(), b.Controls[0].Reg())
  1659  		p.To.Type = obj.TYPE_REG
  1660  		p.To.Reg = arm64.REGTMP
  1661  		p = s.Prog(obj.AJMP)
  1662  		p.To.Type = obj.TYPE_MEM
  1663  		p.To.Reg = arm64.REGTMP
  1664  		// Save jump tables for later resolution of the target blocks.
  1665  		s.JumpTables = append(s.JumpTables, b)
  1666  
  1667  	default:
  1668  		b.Fatalf("branch not implemented: %s", b.LongString())
  1669  	}
  1670  }
  1671  
  1672  func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1673  	p := s.Prog(loadByType(t))
  1674  	p.From.Type = obj.TYPE_MEM
  1675  	p.From.Name = obj.NAME_AUTO
  1676  	p.From.Sym = n.Linksym()
  1677  	p.From.Offset = n.FrameOffset() + off
  1678  	p.To.Type = obj.TYPE_REG
  1679  	p.To.Reg = reg
  1680  	return p
  1681  }
  1682  
  1683  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1684  	p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  1685  	p.To.Name = obj.NAME_PARAM
  1686  	p.To.Sym = n.Linksym()
  1687  	p.Pos = p.Pos.WithNotStmt()
  1688  	return p
  1689  }
  1690  
  1691  // zero16 zeroes 16 bytes at reg+off.
  1692  // If postInc is true, increment reg by 16.
  1693  func zero16(s *ssagen.State, reg int16, off int64, postInc bool) {
  1694  	//   STP     (ZR, ZR), off(reg)
  1695  	p := s.Prog(arm64.ASTP)
  1696  	p.From.Type = obj.TYPE_REGREG
  1697  	p.From.Reg = arm64.REGZERO
  1698  	p.From.Offset = int64(arm64.REGZERO)
  1699  	p.To.Type = obj.TYPE_MEM
  1700  	p.To.Reg = reg
  1701  	p.To.Offset = off
  1702  	if postInc {
  1703  		if off != 0 {
  1704  			panic("can't postinc with non-zero offset")
  1705  		}
  1706  		//   STP.P  (ZR, ZR), 16(reg)
  1707  		p.Scond = arm64.C_XPOST
  1708  		p.To.Offset = 16
  1709  	}
  1710  }
  1711  
  1712  // zero8 zeroes 8 bytes at reg+off.
  1713  func zero8(s *ssagen.State, reg int16, off int64) {
  1714  	//   MOVD     ZR, off(reg)
  1715  	p := s.Prog(arm64.AMOVD)
  1716  	p.From.Type = obj.TYPE_REG
  1717  	p.From.Reg = arm64.REGZERO
  1718  	p.To.Type = obj.TYPE_MEM
  1719  	p.To.Reg = reg
  1720  	p.To.Offset = off
  1721  }
  1722  
  1723  // move32 copies 32 bytes at src+off to dst+off.
  1724  // Uses registers tmp1 and tmp2.
  1725  // If postInc is true, increment src and dst by 32.
  1726  func move32(s *ssagen.State, src, dst, tmp1, tmp2 int16, off int64, postInc bool) {
  1727  	// FLDPQ   off(src), (tmp1, tmp2)
  1728  	ld := s.Prog(arm64.AFLDPQ)
  1729  	ld.From.Type = obj.TYPE_MEM
  1730  	ld.From.Reg = src
  1731  	ld.From.Offset = off
  1732  	ld.To.Type = obj.TYPE_REGREG
  1733  	ld.To.Reg = tmp1
  1734  	ld.To.Offset = int64(tmp2)
  1735  	// FSTPQ   (tmp1, tmp2), off(dst)
  1736  	st := s.Prog(arm64.AFSTPQ)
  1737  	st.From.Type = obj.TYPE_REGREG
  1738  	st.From.Reg = tmp1
  1739  	st.From.Offset = int64(tmp2)
  1740  	st.To.Type = obj.TYPE_MEM
  1741  	st.To.Reg = dst
  1742  	st.To.Offset = off
  1743  	if postInc {
  1744  		if off != 0 {
  1745  			panic("can't postinc with non-zero offset")
  1746  		}
  1747  		ld.Scond = arm64.C_XPOST
  1748  		st.Scond = arm64.C_XPOST
  1749  		ld.From.Offset = 32
  1750  		st.To.Offset = 32
  1751  	}
  1752  }
  1753  
  1754  // move16 copies 16 bytes at src+off to dst+off.
  1755  // Uses register tmp1
  1756  // If postInc is true, increment src and dst by 16.
  1757  func move16(s *ssagen.State, src, dst, tmp1 int16, off int64, postInc bool) {
  1758  	// FMOVQ     off(src), tmp1
  1759  	ld := s.Prog(arm64.AFMOVQ)
  1760  	ld.From.Type = obj.TYPE_MEM
  1761  	ld.From.Reg = src
  1762  	ld.From.Offset = off
  1763  	ld.To.Type = obj.TYPE_REG
  1764  	ld.To.Reg = tmp1
  1765  	// FMOVQ     tmp1, off(dst)
  1766  	st := s.Prog(arm64.AFMOVQ)
  1767  	st.From.Type = obj.TYPE_REG
  1768  	st.From.Reg = tmp1
  1769  	st.To.Type = obj.TYPE_MEM
  1770  	st.To.Reg = dst
  1771  	st.To.Offset = off
  1772  	if postInc {
  1773  		if off != 0 {
  1774  			panic("can't postinc with non-zero offset")
  1775  		}
  1776  		ld.Scond = arm64.C_XPOST
  1777  		st.Scond = arm64.C_XPOST
  1778  		ld.From.Offset = 16
  1779  		st.To.Offset = 16
  1780  	}
  1781  }
  1782  
  1783  // move8 copies 8 bytes at src+off to dst+off.
  1784  // Uses register tmp.
  1785  func move8(s *ssagen.State, src, dst, tmp int16, off int64) {
  1786  	// MOVD    off(src), tmp
  1787  	ld := s.Prog(arm64.AMOVD)
  1788  	ld.From.Type = obj.TYPE_MEM
  1789  	ld.From.Reg = src
  1790  	ld.From.Offset = off
  1791  	ld.To.Type = obj.TYPE_REG
  1792  	ld.To.Reg = tmp
  1793  	// MOVD    tmp, off(dst)
  1794  	st := s.Prog(arm64.AMOVD)
  1795  	st.From.Type = obj.TYPE_REG
  1796  	st.From.Reg = tmp
  1797  	st.To.Type = obj.TYPE_MEM
  1798  	st.To.Reg = dst
  1799  	st.To.Offset = off
  1800  }
  1801  

View as plain text