Source file src/cmd/compile/internal/arm64/ssa.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package arm64
     6  
     7  import (
     8  	"math"
     9  
    10  	"cmd/compile/internal/base"
    11  	"cmd/compile/internal/ir"
    12  	"cmd/compile/internal/logopt"
    13  	"cmd/compile/internal/objw"
    14  	"cmd/compile/internal/ssa"
    15  	"cmd/compile/internal/ssagen"
    16  	"cmd/compile/internal/types"
    17  	"cmd/internal/obj"
    18  	"cmd/internal/obj/arm64"
    19  	"internal/abi"
    20  )
    21  
    22  // loadByType returns the load instruction of the given type.
    23  func loadByType(t *types.Type) obj.As {
    24  	if t.IsFloat() {
    25  		switch t.Size() {
    26  		case 4:
    27  			return arm64.AFMOVS
    28  		case 8:
    29  			return arm64.AFMOVD
    30  		}
    31  	} else {
    32  		switch t.Size() {
    33  		case 1:
    34  			if t.IsSigned() {
    35  				return arm64.AMOVB
    36  			} else {
    37  				return arm64.AMOVBU
    38  			}
    39  		case 2:
    40  			if t.IsSigned() {
    41  				return arm64.AMOVH
    42  			} else {
    43  				return arm64.AMOVHU
    44  			}
    45  		case 4:
    46  			if t.IsSigned() {
    47  				return arm64.AMOVW
    48  			} else {
    49  				return arm64.AMOVWU
    50  			}
    51  		case 8:
    52  			return arm64.AMOVD
    53  		}
    54  	}
    55  	panic("bad load type")
    56  }
    57  
    58  // storeByType returns the store instruction of the given type.
    59  func storeByType(t *types.Type) obj.As {
    60  	if t.IsFloat() {
    61  		switch t.Size() {
    62  		case 4:
    63  			return arm64.AFMOVS
    64  		case 8:
    65  			return arm64.AFMOVD
    66  		}
    67  	} else {
    68  		switch t.Size() {
    69  		case 1:
    70  			return arm64.AMOVB
    71  		case 2:
    72  			return arm64.AMOVH
    73  		case 4:
    74  			return arm64.AMOVW
    75  		case 8:
    76  			return arm64.AMOVD
    77  		}
    78  	}
    79  	panic("bad store type")
    80  }
    81  
    82  // loadByType2 returns an opcode that can load consecutive memory locations into 2 registers with type t.
    83  // returns obj.AXXX if no such opcode exists.
    84  func loadByType2(t *types.Type) obj.As {
    85  	if t.IsFloat() {
    86  		switch t.Size() {
    87  		case 4:
    88  			return arm64.AFLDPS
    89  		case 8:
    90  			return arm64.AFLDPD
    91  		}
    92  	} else {
    93  		switch t.Size() {
    94  		case 4:
    95  			return arm64.ALDPW
    96  		case 8:
    97  			return arm64.ALDP
    98  		}
    99  	}
   100  	return obj.AXXX
   101  }
   102  
   103  // storeByType2 returns an opcode that can store registers with type t into 2 consecutive memory locations.
   104  // returns obj.AXXX if no such opcode exists.
   105  func storeByType2(t *types.Type) obj.As {
   106  	if t.IsFloat() {
   107  		switch t.Size() {
   108  		case 4:
   109  			return arm64.AFSTPS
   110  		case 8:
   111  			return arm64.AFSTPD
   112  		}
   113  	} else {
   114  		switch t.Size() {
   115  		case 4:
   116  			return arm64.ASTPW
   117  		case 8:
   118  			return arm64.ASTP
   119  		}
   120  	}
   121  	return obj.AXXX
   122  }
   123  
   124  // makeshift encodes a register shifted by a constant, used as an Offset in Prog.
   125  func makeshift(v *ssa.Value, reg int16, typ int64, s int64) int64 {
   126  	if s < 0 || s >= 64 {
   127  		v.Fatalf("shift out of range: %d", s)
   128  	}
   129  	return int64(reg&31)<<16 | typ | (s&63)<<10
   130  }
   131  
   132  // genshift generates a Prog for r = r0 op (r1 shifted by n).
   133  func genshift(s *ssagen.State, v *ssa.Value, as obj.As, r0, r1, r int16, typ int64, n int64) *obj.Prog {
   134  	p := s.Prog(as)
   135  	p.From.Type = obj.TYPE_SHIFT
   136  	p.From.Offset = makeshift(v, r1, typ, n)
   137  	p.Reg = r0
   138  	if r != 0 {
   139  		p.To.Type = obj.TYPE_REG
   140  		p.To.Reg = r
   141  	}
   142  	return p
   143  }
   144  
   145  // generate the memory operand for the indexed load/store instructions.
   146  // base and idx are registers.
   147  func genIndexedOperand(op ssa.Op, base, idx int16) obj.Addr {
   148  	// Reg: base register, Index: (shifted) index register
   149  	mop := obj.Addr{Type: obj.TYPE_MEM, Reg: base}
   150  	switch op {
   151  	case ssa.OpARM64MOVDloadidx8, ssa.OpARM64MOVDstoreidx8,
   152  		ssa.OpARM64FMOVDloadidx8, ssa.OpARM64FMOVDstoreidx8:
   153  		mop.Index = arm64.REG_LSL | 3<<5 | idx&31
   154  	case ssa.OpARM64MOVWloadidx4, ssa.OpARM64MOVWUloadidx4, ssa.OpARM64MOVWstoreidx4,
   155  		ssa.OpARM64FMOVSloadidx4, ssa.OpARM64FMOVSstoreidx4:
   156  		mop.Index = arm64.REG_LSL | 2<<5 | idx&31
   157  	case ssa.OpARM64MOVHloadidx2, ssa.OpARM64MOVHUloadidx2, ssa.OpARM64MOVHstoreidx2:
   158  		mop.Index = arm64.REG_LSL | 1<<5 | idx&31
   159  	default: // not shifted
   160  		mop.Index = idx
   161  	}
   162  	return mop
   163  }
   164  
   165  // simdRegArng encodes ssa value's register with specified simd arrangement
   166  func simdRegArng(reg int16, arng int16) int16 {
   167  	if reg < arm64.REG_F0 || arm64.REG_F31 < reg {
   168  		base.Fatalf("expected fp register: r%d", reg)
   169  	}
   170  	var err error
   171  	if reg, err = arm64.RegisterArrangement(reg, arng, false); err != nil {
   172  		base.Fatalf("bad simd register arrangement: %v", err)
   173  	}
   174  	return reg
   175  }
   176  
   177  // simdV11 generates element-wise unary vector operations, e.g. VCNT V1.B8, V0.B8
   178  func simdV11(s *ssagen.State, v *ssa.Value, arrangement int16) *obj.Prog {
   179  	p := s.Prog(v.Op.Asm())
   180  	p.From.Type = obj.TYPE_REG
   181  	p.From.Reg = simdRegArng(v.Args[0].Reg(), arrangement)
   182  	p.To.Type = obj.TYPE_REG
   183  	p.To.Reg = simdRegArng(v.Reg(), arrangement)
   184  	return p
   185  }
   186  
   187  // simdV11Scalar generates vector-to-scalar reduction operations, e.g. VUADDLV V1.B8, V0
   188  func simdV11Scalar(s *ssagen.State, v *ssa.Value, arrangement int16) *obj.Prog {
   189  	p := s.Prog(v.Op.Asm())
   190  	p.From.Type = obj.TYPE_REG
   191  	p.From.Reg = simdRegArng(v.Args[0].Reg(), arrangement)
   192  	p.To.Type = obj.TYPE_REG
   193  	p.To.Reg = v.Reg() - arm64.REG_F0 + arm64.REG_V0
   194  	return p
   195  }
   196  
   197  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   198  	switch v.Op {
   199  	case ssa.OpCopy, ssa.OpARM64MOVDreg:
   200  		if v.Type.IsMemory() {
   201  			return
   202  		}
   203  		x := v.Args[0].Reg()
   204  		y := v.Reg()
   205  		if x == y {
   206  			return
   207  		}
   208  		as := arm64.AMOVD
   209  		if v.Type.IsFloat() {
   210  			switch v.Type.Size() {
   211  			case 4:
   212  				as = arm64.AFMOVS
   213  			case 8:
   214  				as = arm64.AFMOVD
   215  			default:
   216  				panic("bad float size")
   217  			}
   218  		}
   219  		p := s.Prog(as)
   220  		p.From.Type = obj.TYPE_REG
   221  		p.From.Reg = x
   222  		p.To.Type = obj.TYPE_REG
   223  		p.To.Reg = y
   224  	case ssa.OpARM64MOVDnop, ssa.OpARM64ZERO:
   225  		// nothing to do
   226  	case ssa.OpLoadReg:
   227  		if v.Type.IsFlags() {
   228  			v.Fatalf("load flags not implemented: %v", v.LongString())
   229  			return
   230  		}
   231  		p := s.Prog(loadByType(v.Type))
   232  		ssagen.AddrAuto(&p.From, v.Args[0])
   233  		p.To.Type = obj.TYPE_REG
   234  		p.To.Reg = v.Reg()
   235  	case ssa.OpStoreReg:
   236  		if v.Type.IsFlags() {
   237  			v.Fatalf("store flags not implemented: %v", v.LongString())
   238  			return
   239  		}
   240  		p := s.Prog(storeByType(v.Type))
   241  		p.From.Type = obj.TYPE_REG
   242  		p.From.Reg = v.Args[0].Reg()
   243  		ssagen.AddrAuto(&p.To, v)
   244  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
   245  		ssagen.CheckArgReg(v)
   246  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
   247  		// The loop only runs once.
   248  		args := v.Block.Func.RegArgs
   249  		if len(args) == 0 {
   250  			break
   251  		}
   252  		v.Block.Func.RegArgs = nil // prevent from running again
   253  
   254  		for i := 0; i < len(args); i++ {
   255  			a := args[i]
   256  			// Offset by size of the saved LR slot.
   257  			addr := ssagen.SpillSlotAddr(a, arm64.REGSP, base.Ctxt.Arch.FixedFrameSize)
   258  			// Look for double-register operations if we can.
   259  			if i < len(args)-1 {
   260  				b := args[i+1]
   261  				if a.Type.Size() == b.Type.Size() &&
   262  					a.Type.IsFloat() == b.Type.IsFloat() &&
   263  					b.Offset == a.Offset+a.Type.Size() {
   264  					ld := loadByType2(a.Type)
   265  					st := storeByType2(a.Type)
   266  					if ld != obj.AXXX && st != obj.AXXX {
   267  						s.FuncInfo().AddSpill(obj.RegSpill{Reg: a.Reg, Reg2: b.Reg, Addr: addr, Unspill: ld, Spill: st})
   268  						i++ // b is done also, skip it.
   269  						continue
   270  					}
   271  				}
   272  			}
   273  			// Pass the spill/unspill information along to the assembler.
   274  			s.FuncInfo().AddSpill(obj.RegSpill{Reg: a.Reg, Addr: addr, Unspill: loadByType(a.Type), Spill: storeByType(a.Type)})
   275  		}
   276  
   277  	case ssa.OpARM64ADD,
   278  		ssa.OpARM64SUB,
   279  		ssa.OpARM64AND,
   280  		ssa.OpARM64OR,
   281  		ssa.OpARM64XOR,
   282  		ssa.OpARM64BIC,
   283  		ssa.OpARM64EON,
   284  		ssa.OpARM64ORN,
   285  		ssa.OpARM64MUL,
   286  		ssa.OpARM64MULW,
   287  		ssa.OpARM64MNEG,
   288  		ssa.OpARM64MNEGW,
   289  		ssa.OpARM64MULH,
   290  		ssa.OpARM64UMULH,
   291  		ssa.OpARM64MULL,
   292  		ssa.OpARM64UMULL,
   293  		ssa.OpARM64DIV,
   294  		ssa.OpARM64UDIV,
   295  		ssa.OpARM64DIVW,
   296  		ssa.OpARM64UDIVW,
   297  		ssa.OpARM64MOD,
   298  		ssa.OpARM64UMOD,
   299  		ssa.OpARM64MODW,
   300  		ssa.OpARM64UMODW,
   301  		ssa.OpARM64SLL,
   302  		ssa.OpARM64SRL,
   303  		ssa.OpARM64SRA,
   304  		ssa.OpARM64FADDS,
   305  		ssa.OpARM64FADDD,
   306  		ssa.OpARM64FSUBS,
   307  		ssa.OpARM64FSUBD,
   308  		ssa.OpARM64FMULS,
   309  		ssa.OpARM64FMULD,
   310  		ssa.OpARM64FNMULS,
   311  		ssa.OpARM64FNMULD,
   312  		ssa.OpARM64FDIVS,
   313  		ssa.OpARM64FDIVD,
   314  		ssa.OpARM64FMINS,
   315  		ssa.OpARM64FMIND,
   316  		ssa.OpARM64FMAXS,
   317  		ssa.OpARM64FMAXD,
   318  		ssa.OpARM64ROR,
   319  		ssa.OpARM64RORW:
   320  		r := v.Reg()
   321  		r1 := v.Args[0].Reg()
   322  		r2 := v.Args[1].Reg()
   323  		p := s.Prog(v.Op.Asm())
   324  		p.From.Type = obj.TYPE_REG
   325  		p.From.Reg = r2
   326  		p.Reg = r1
   327  		p.To.Type = obj.TYPE_REG
   328  		p.To.Reg = r
   329  	case ssa.OpARM64FMADDS,
   330  		ssa.OpARM64FMADDD,
   331  		ssa.OpARM64FNMADDS,
   332  		ssa.OpARM64FNMADDD,
   333  		ssa.OpARM64FMSUBS,
   334  		ssa.OpARM64FMSUBD,
   335  		ssa.OpARM64FNMSUBS,
   336  		ssa.OpARM64FNMSUBD,
   337  		ssa.OpARM64MADD,
   338  		ssa.OpARM64MADDW,
   339  		ssa.OpARM64MSUB,
   340  		ssa.OpARM64MSUBW:
   341  		rt := v.Reg()
   342  		ra := v.Args[0].Reg()
   343  		rm := v.Args[1].Reg()
   344  		rn := v.Args[2].Reg()
   345  		p := s.Prog(v.Op.Asm())
   346  		p.Reg = ra
   347  		p.From.Type = obj.TYPE_REG
   348  		p.From.Reg = rm
   349  		p.AddRestSourceReg(rn)
   350  		p.To.Type = obj.TYPE_REG
   351  		p.To.Reg = rt
   352  	case ssa.OpARM64ADDconst,
   353  		ssa.OpARM64SUBconst,
   354  		ssa.OpARM64ANDconst,
   355  		ssa.OpARM64ORconst,
   356  		ssa.OpARM64XORconst,
   357  		ssa.OpARM64SLLconst,
   358  		ssa.OpARM64SRLconst,
   359  		ssa.OpARM64SRAconst,
   360  		ssa.OpARM64RORconst,
   361  		ssa.OpARM64RORWconst:
   362  		p := s.Prog(v.Op.Asm())
   363  		p.From.Type = obj.TYPE_CONST
   364  		p.From.Offset = v.AuxInt
   365  		p.Reg = v.Args[0].Reg()
   366  		p.To.Type = obj.TYPE_REG
   367  		p.To.Reg = v.Reg()
   368  	case ssa.OpARM64ADDSconstflags:
   369  		p := s.Prog(v.Op.Asm())
   370  		p.From.Type = obj.TYPE_CONST
   371  		p.From.Offset = v.AuxInt
   372  		p.Reg = v.Args[0].Reg()
   373  		p.To.Type = obj.TYPE_REG
   374  		p.To.Reg = v.Reg0()
   375  	case ssa.OpARM64ADCzerocarry:
   376  		p := s.Prog(v.Op.Asm())
   377  		p.From.Type = obj.TYPE_REG
   378  		p.From.Reg = arm64.REGZERO
   379  		p.Reg = arm64.REGZERO
   380  		p.To.Type = obj.TYPE_REG
   381  		p.To.Reg = v.Reg()
   382  	case ssa.OpARM64ADCSflags,
   383  		ssa.OpARM64ADDSflags,
   384  		ssa.OpARM64SBCSflags,
   385  		ssa.OpARM64SUBSflags:
   386  		r := v.Reg0()
   387  		r1 := v.Args[0].Reg()
   388  		r2 := v.Args[1].Reg()
   389  		p := s.Prog(v.Op.Asm())
   390  		p.From.Type = obj.TYPE_REG
   391  		p.From.Reg = r2
   392  		p.Reg = r1
   393  		p.To.Type = obj.TYPE_REG
   394  		p.To.Reg = r
   395  	case ssa.OpARM64NEGSflags:
   396  		p := s.Prog(v.Op.Asm())
   397  		p.From.Type = obj.TYPE_REG
   398  		p.From.Reg = v.Args[0].Reg()
   399  		p.To.Type = obj.TYPE_REG
   400  		p.To.Reg = v.Reg0()
   401  	case ssa.OpARM64NGCzerocarry:
   402  		p := s.Prog(v.Op.Asm())
   403  		p.From.Type = obj.TYPE_REG
   404  		p.From.Reg = arm64.REGZERO
   405  		p.To.Type = obj.TYPE_REG
   406  		p.To.Reg = v.Reg()
   407  	case ssa.OpARM64EXTRconst,
   408  		ssa.OpARM64EXTRWconst:
   409  		p := s.Prog(v.Op.Asm())
   410  		p.From.Type = obj.TYPE_CONST
   411  		p.From.Offset = v.AuxInt
   412  		p.AddRestSourceReg(v.Args[0].Reg())
   413  		p.Reg = v.Args[1].Reg()
   414  		p.To.Type = obj.TYPE_REG
   415  		p.To.Reg = v.Reg()
   416  	case ssa.OpARM64MVNshiftLL, ssa.OpARM64NEGshiftLL:
   417  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_LL, v.AuxInt)
   418  	case ssa.OpARM64MVNshiftRL, ssa.OpARM64NEGshiftRL:
   419  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_LR, v.AuxInt)
   420  	case ssa.OpARM64MVNshiftRA, ssa.OpARM64NEGshiftRA:
   421  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_AR, v.AuxInt)
   422  	case ssa.OpARM64MVNshiftRO:
   423  		genshift(s, v, v.Op.Asm(), 0, v.Args[0].Reg(), v.Reg(), arm64.SHIFT_ROR, v.AuxInt)
   424  	case ssa.OpARM64ADDshiftLL,
   425  		ssa.OpARM64SUBshiftLL,
   426  		ssa.OpARM64ANDshiftLL,
   427  		ssa.OpARM64ORshiftLL,
   428  		ssa.OpARM64XORshiftLL,
   429  		ssa.OpARM64EONshiftLL,
   430  		ssa.OpARM64ORNshiftLL,
   431  		ssa.OpARM64BICshiftLL:
   432  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_LL, v.AuxInt)
   433  	case ssa.OpARM64ADDshiftRL,
   434  		ssa.OpARM64SUBshiftRL,
   435  		ssa.OpARM64ANDshiftRL,
   436  		ssa.OpARM64ORshiftRL,
   437  		ssa.OpARM64XORshiftRL,
   438  		ssa.OpARM64EONshiftRL,
   439  		ssa.OpARM64ORNshiftRL,
   440  		ssa.OpARM64BICshiftRL:
   441  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_LR, v.AuxInt)
   442  	case ssa.OpARM64ADDshiftRA,
   443  		ssa.OpARM64SUBshiftRA,
   444  		ssa.OpARM64ANDshiftRA,
   445  		ssa.OpARM64ORshiftRA,
   446  		ssa.OpARM64XORshiftRA,
   447  		ssa.OpARM64EONshiftRA,
   448  		ssa.OpARM64ORNshiftRA,
   449  		ssa.OpARM64BICshiftRA:
   450  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_AR, v.AuxInt)
   451  	case ssa.OpARM64ANDshiftRO,
   452  		ssa.OpARM64ORshiftRO,
   453  		ssa.OpARM64XORshiftRO,
   454  		ssa.OpARM64EONshiftRO,
   455  		ssa.OpARM64ORNshiftRO,
   456  		ssa.OpARM64BICshiftRO:
   457  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), v.Reg(), arm64.SHIFT_ROR, v.AuxInt)
   458  	case ssa.OpARM64MOVDconst:
   459  		p := s.Prog(v.Op.Asm())
   460  		p.From.Type = obj.TYPE_CONST
   461  		p.From.Offset = v.AuxInt
   462  		p.To.Type = obj.TYPE_REG
   463  		p.To.Reg = v.Reg()
   464  	case ssa.OpARM64FMOVSconst,
   465  		ssa.OpARM64FMOVDconst:
   466  		p := s.Prog(v.Op.Asm())
   467  		p.From.Type = obj.TYPE_FCONST
   468  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   469  		p.To.Type = obj.TYPE_REG
   470  		p.To.Reg = v.Reg()
   471  	case ssa.OpARM64FCMPS0,
   472  		ssa.OpARM64FCMPD0:
   473  		p := s.Prog(v.Op.Asm())
   474  		p.From.Type = obj.TYPE_FCONST
   475  		p.From.Val = math.Float64frombits(0)
   476  		p.Reg = v.Args[0].Reg()
   477  	case ssa.OpARM64CMP,
   478  		ssa.OpARM64CMPW,
   479  		ssa.OpARM64CMN,
   480  		ssa.OpARM64CMNW,
   481  		ssa.OpARM64TST,
   482  		ssa.OpARM64TSTW,
   483  		ssa.OpARM64FCMPS,
   484  		ssa.OpARM64FCMPD:
   485  		p := s.Prog(v.Op.Asm())
   486  		p.From.Type = obj.TYPE_REG
   487  		p.From.Reg = v.Args[1].Reg()
   488  		p.Reg = v.Args[0].Reg()
   489  	case ssa.OpARM64CMPconst,
   490  		ssa.OpARM64CMPWconst,
   491  		ssa.OpARM64CMNconst,
   492  		ssa.OpARM64CMNWconst,
   493  		ssa.OpARM64TSTconst,
   494  		ssa.OpARM64TSTWconst:
   495  		p := s.Prog(v.Op.Asm())
   496  		p.From.Type = obj.TYPE_CONST
   497  		p.From.Offset = v.AuxInt
   498  		p.Reg = v.Args[0].Reg()
   499  	case ssa.OpARM64CMPshiftLL, ssa.OpARM64CMNshiftLL, ssa.OpARM64TSTshiftLL:
   500  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_LL, v.AuxInt)
   501  	case ssa.OpARM64CMPshiftRL, ssa.OpARM64CMNshiftRL, ssa.OpARM64TSTshiftRL:
   502  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_LR, v.AuxInt)
   503  	case ssa.OpARM64CMPshiftRA, ssa.OpARM64CMNshiftRA, ssa.OpARM64TSTshiftRA:
   504  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_AR, v.AuxInt)
   505  	case ssa.OpARM64TSTshiftRO:
   506  		genshift(s, v, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg(), 0, arm64.SHIFT_ROR, v.AuxInt)
   507  	case ssa.OpARM64MOVDaddr:
   508  		p := s.Prog(arm64.AMOVD)
   509  		p.From.Type = obj.TYPE_ADDR
   510  		p.From.Reg = v.Args[0].Reg()
   511  		p.To.Type = obj.TYPE_REG
   512  		p.To.Reg = v.Reg()
   513  
   514  		var wantreg string
   515  		// MOVD $sym+off(base), R
   516  		// the assembler expands it as the following:
   517  		// - base is SP: add constant offset to SP (R13)
   518  		//               when constant is large, tmp register (R11) may be used
   519  		// - base is SB: load external address from constant pool (use relocation)
   520  		switch v.Aux.(type) {
   521  		default:
   522  			v.Fatalf("aux is of unknown type %T", v.Aux)
   523  		case *obj.LSym:
   524  			wantreg = "SB"
   525  			ssagen.AddAux(&p.From, v)
   526  		case *ir.Name:
   527  			wantreg = "SP"
   528  			ssagen.AddAux(&p.From, v)
   529  		case nil:
   530  			// No sym, just MOVD $off(SP), R
   531  			wantreg = "SP"
   532  			p.From.Offset = v.AuxInt
   533  		}
   534  		if reg := v.Args[0].RegName(); reg != wantreg {
   535  			v.Fatalf("bad reg %s for symbol type %T, want %s", reg, v.Aux, wantreg)
   536  		}
   537  	case ssa.OpARM64MOVBload,
   538  		ssa.OpARM64MOVBUload,
   539  		ssa.OpARM64MOVHload,
   540  		ssa.OpARM64MOVHUload,
   541  		ssa.OpARM64MOVWload,
   542  		ssa.OpARM64MOVWUload,
   543  		ssa.OpARM64MOVDload,
   544  		ssa.OpARM64FMOVSload,
   545  		ssa.OpARM64FMOVDload,
   546  		ssa.OpARM64FMOVQload:
   547  		p := s.Prog(v.Op.Asm())
   548  		p.From.Type = obj.TYPE_MEM
   549  		p.From.Reg = v.Args[0].Reg()
   550  		ssagen.AddAux(&p.From, v)
   551  		p.To.Type = obj.TYPE_REG
   552  		p.To.Reg = v.Reg()
   553  	case ssa.OpARM64LDP, ssa.OpARM64LDPW, ssa.OpARM64LDPSW, ssa.OpARM64FLDPD, ssa.OpARM64FLDPS, ssa.OpARM64FLDPQ:
   554  		p := s.Prog(v.Op.Asm())
   555  		p.From.Type = obj.TYPE_MEM
   556  		p.From.Reg = v.Args[0].Reg()
   557  		ssagen.AddAux(&p.From, v)
   558  		p.To.Type = obj.TYPE_REGREG
   559  		p.To.Reg = v.Reg0()
   560  		p.To.Offset = int64(v.Reg1())
   561  	case ssa.OpARM64MOVBloadidx,
   562  		ssa.OpARM64MOVBUloadidx,
   563  		ssa.OpARM64MOVHloadidx,
   564  		ssa.OpARM64MOVHUloadidx,
   565  		ssa.OpARM64MOVWloadidx,
   566  		ssa.OpARM64MOVWUloadidx,
   567  		ssa.OpARM64MOVDloadidx,
   568  		ssa.OpARM64FMOVSloadidx,
   569  		ssa.OpARM64FMOVDloadidx,
   570  		ssa.OpARM64MOVHloadidx2,
   571  		ssa.OpARM64MOVHUloadidx2,
   572  		ssa.OpARM64MOVWloadidx4,
   573  		ssa.OpARM64MOVWUloadidx4,
   574  		ssa.OpARM64MOVDloadidx8,
   575  		ssa.OpARM64FMOVDloadidx8,
   576  		ssa.OpARM64FMOVSloadidx4:
   577  		p := s.Prog(v.Op.Asm())
   578  		p.From = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
   579  		p.To.Type = obj.TYPE_REG
   580  		p.To.Reg = v.Reg()
   581  	case ssa.OpARM64LDAR,
   582  		ssa.OpARM64LDARB,
   583  		ssa.OpARM64LDARW:
   584  		p := s.Prog(v.Op.Asm())
   585  		p.From.Type = obj.TYPE_MEM
   586  		p.From.Reg = v.Args[0].Reg()
   587  		ssagen.AddAux(&p.From, v)
   588  		p.To.Type = obj.TYPE_REG
   589  		p.To.Reg = v.Reg0()
   590  	case ssa.OpARM64MOVBstore,
   591  		ssa.OpARM64MOVHstore,
   592  		ssa.OpARM64MOVWstore,
   593  		ssa.OpARM64MOVDstore,
   594  		ssa.OpARM64FMOVSstore,
   595  		ssa.OpARM64FMOVDstore,
   596  		ssa.OpARM64FMOVQstore,
   597  		ssa.OpARM64STLRB,
   598  		ssa.OpARM64STLR,
   599  		ssa.OpARM64STLRW:
   600  		p := s.Prog(v.Op.Asm())
   601  		p.From.Type = obj.TYPE_REG
   602  		p.From.Reg = v.Args[1].Reg()
   603  		p.To.Type = obj.TYPE_MEM
   604  		p.To.Reg = v.Args[0].Reg()
   605  		ssagen.AddAux(&p.To, v)
   606  	case ssa.OpARM64MOVBstoreidx,
   607  		ssa.OpARM64MOVHstoreidx,
   608  		ssa.OpARM64MOVWstoreidx,
   609  		ssa.OpARM64MOVDstoreidx,
   610  		ssa.OpARM64FMOVSstoreidx,
   611  		ssa.OpARM64FMOVDstoreidx,
   612  		ssa.OpARM64MOVHstoreidx2,
   613  		ssa.OpARM64MOVWstoreidx4,
   614  		ssa.OpARM64FMOVSstoreidx4,
   615  		ssa.OpARM64MOVDstoreidx8,
   616  		ssa.OpARM64FMOVDstoreidx8:
   617  		p := s.Prog(v.Op.Asm())
   618  		p.To = genIndexedOperand(v.Op, v.Args[0].Reg(), v.Args[1].Reg())
   619  		p.From.Type = obj.TYPE_REG
   620  		p.From.Reg = v.Args[2].Reg()
   621  	case ssa.OpARM64STP, ssa.OpARM64STPW, ssa.OpARM64FSTPD, ssa.OpARM64FSTPS, ssa.OpARM64FSTPQ:
   622  		p := s.Prog(v.Op.Asm())
   623  		p.From.Type = obj.TYPE_REGREG
   624  		p.From.Reg = v.Args[1].Reg()
   625  		p.From.Offset = int64(v.Args[2].Reg())
   626  		p.To.Type = obj.TYPE_MEM
   627  		p.To.Reg = v.Args[0].Reg()
   628  		ssagen.AddAux(&p.To, v)
   629  	case ssa.OpARM64BFI,
   630  		ssa.OpARM64BFXIL:
   631  		p := s.Prog(v.Op.Asm())
   632  		p.From.Type = obj.TYPE_CONST
   633  		p.From.Offset = v.AuxInt >> 8
   634  		p.AddRestSourceConst(v.AuxInt & 0xff)
   635  		p.Reg = v.Args[1].Reg()
   636  		p.To.Type = obj.TYPE_REG
   637  		p.To.Reg = v.Reg()
   638  	case ssa.OpARM64SBFIZ,
   639  		ssa.OpARM64SBFX,
   640  		ssa.OpARM64UBFIZ,
   641  		ssa.OpARM64UBFX:
   642  		p := s.Prog(v.Op.Asm())
   643  		p.From.Type = obj.TYPE_CONST
   644  		p.From.Offset = v.AuxInt >> 8
   645  		p.AddRestSourceConst(v.AuxInt & 0xff)
   646  		p.Reg = v.Args[0].Reg()
   647  		p.To.Type = obj.TYPE_REG
   648  		p.To.Reg = v.Reg()
   649  	case ssa.OpARM64LoweredAtomicExchange64,
   650  		ssa.OpARM64LoweredAtomicExchange32,
   651  		ssa.OpARM64LoweredAtomicExchange8:
   652  		// LDAXR	(Rarg0), Rout
   653  		// STLXR	Rarg1, (Rarg0), Rtmp
   654  		// CBNZ		Rtmp, -2(PC)
   655  		var ld, st obj.As
   656  		switch v.Op {
   657  		case ssa.OpARM64LoweredAtomicExchange8:
   658  			ld = arm64.ALDAXRB
   659  			st = arm64.ASTLXRB
   660  		case ssa.OpARM64LoweredAtomicExchange32:
   661  			ld = arm64.ALDAXRW
   662  			st = arm64.ASTLXRW
   663  		case ssa.OpARM64LoweredAtomicExchange64:
   664  			ld = arm64.ALDAXR
   665  			st = arm64.ASTLXR
   666  		}
   667  		r0 := v.Args[0].Reg()
   668  		r1 := v.Args[1].Reg()
   669  		out := v.Reg0()
   670  		p := s.Prog(ld)
   671  		p.From.Type = obj.TYPE_MEM
   672  		p.From.Reg = r0
   673  		p.To.Type = obj.TYPE_REG
   674  		p.To.Reg = out
   675  		p1 := s.Prog(st)
   676  		p1.From.Type = obj.TYPE_REG
   677  		p1.From.Reg = r1
   678  		p1.To.Type = obj.TYPE_MEM
   679  		p1.To.Reg = r0
   680  		p1.RegTo2 = arm64.REGTMP
   681  		p2 := s.Prog(arm64.ACBNZ)
   682  		p2.From.Type = obj.TYPE_REG
   683  		p2.From.Reg = arm64.REGTMP
   684  		p2.To.Type = obj.TYPE_BRANCH
   685  		p2.To.SetTarget(p)
   686  	case ssa.OpARM64LoweredAtomicExchange64Variant,
   687  		ssa.OpARM64LoweredAtomicExchange32Variant,
   688  		ssa.OpARM64LoweredAtomicExchange8Variant:
   689  		var swap obj.As
   690  		switch v.Op {
   691  		case ssa.OpARM64LoweredAtomicExchange8Variant:
   692  			swap = arm64.ASWPALB
   693  		case ssa.OpARM64LoweredAtomicExchange32Variant:
   694  			swap = arm64.ASWPALW
   695  		case ssa.OpARM64LoweredAtomicExchange64Variant:
   696  			swap = arm64.ASWPALD
   697  		}
   698  		r0 := v.Args[0].Reg()
   699  		r1 := v.Args[1].Reg()
   700  		out := v.Reg0()
   701  
   702  		// SWPALD	Rarg1, (Rarg0), Rout
   703  		p := s.Prog(swap)
   704  		p.From.Type = obj.TYPE_REG
   705  		p.From.Reg = r1
   706  		p.To.Type = obj.TYPE_MEM
   707  		p.To.Reg = r0
   708  		p.RegTo2 = out
   709  
   710  	case ssa.OpARM64LoweredAtomicAdd64,
   711  		ssa.OpARM64LoweredAtomicAdd32:
   712  		// LDAXR	(Rarg0), Rout
   713  		// ADD		Rarg1, Rout
   714  		// STLXR	Rout, (Rarg0), Rtmp
   715  		// CBNZ		Rtmp, -3(PC)
   716  		ld := arm64.ALDAXR
   717  		st := arm64.ASTLXR
   718  		if v.Op == ssa.OpARM64LoweredAtomicAdd32 {
   719  			ld = arm64.ALDAXRW
   720  			st = arm64.ASTLXRW
   721  		}
   722  		r0 := v.Args[0].Reg()
   723  		r1 := v.Args[1].Reg()
   724  		out := v.Reg0()
   725  		p := s.Prog(ld)
   726  		p.From.Type = obj.TYPE_MEM
   727  		p.From.Reg = r0
   728  		p.To.Type = obj.TYPE_REG
   729  		p.To.Reg = out
   730  		p1 := s.Prog(arm64.AADD)
   731  		p1.From.Type = obj.TYPE_REG
   732  		p1.From.Reg = r1
   733  		p1.To.Type = obj.TYPE_REG
   734  		p1.To.Reg = out
   735  		p2 := s.Prog(st)
   736  		p2.From.Type = obj.TYPE_REG
   737  		p2.From.Reg = out
   738  		p2.To.Type = obj.TYPE_MEM
   739  		p2.To.Reg = r0
   740  		p2.RegTo2 = arm64.REGTMP
   741  		p3 := s.Prog(arm64.ACBNZ)
   742  		p3.From.Type = obj.TYPE_REG
   743  		p3.From.Reg = arm64.REGTMP
   744  		p3.To.Type = obj.TYPE_BRANCH
   745  		p3.To.SetTarget(p)
   746  	case ssa.OpARM64LoweredAtomicAdd64Variant,
   747  		ssa.OpARM64LoweredAtomicAdd32Variant:
   748  		// LDADDAL	Rarg1, (Rarg0), Rout
   749  		// ADD		Rarg1, Rout
   750  		op := arm64.ALDADDALD
   751  		if v.Op == ssa.OpARM64LoweredAtomicAdd32Variant {
   752  			op = arm64.ALDADDALW
   753  		}
   754  		r0 := v.Args[0].Reg()
   755  		r1 := v.Args[1].Reg()
   756  		out := v.Reg0()
   757  		p := s.Prog(op)
   758  		p.From.Type = obj.TYPE_REG
   759  		p.From.Reg = r1
   760  		p.To.Type = obj.TYPE_MEM
   761  		p.To.Reg = r0
   762  		p.RegTo2 = out
   763  		p1 := s.Prog(arm64.AADD)
   764  		p1.From.Type = obj.TYPE_REG
   765  		p1.From.Reg = r1
   766  		p1.To.Type = obj.TYPE_REG
   767  		p1.To.Reg = out
   768  	case ssa.OpARM64LoweredAtomicCas64,
   769  		ssa.OpARM64LoweredAtomicCas32:
   770  		// LDAXR	(Rarg0), Rtmp
   771  		// CMP		Rarg1, Rtmp
   772  		// BNE		3(PC)
   773  		// STLXR	Rarg2, (Rarg0), Rtmp
   774  		// CBNZ		Rtmp, -4(PC)
   775  		// CSET		EQ, Rout
   776  		ld := arm64.ALDAXR
   777  		st := arm64.ASTLXR
   778  		cmp := arm64.ACMP
   779  		if v.Op == ssa.OpARM64LoweredAtomicCas32 {
   780  			ld = arm64.ALDAXRW
   781  			st = arm64.ASTLXRW
   782  			cmp = arm64.ACMPW
   783  		}
   784  		r0 := v.Args[0].Reg()
   785  		r1 := v.Args[1].Reg()
   786  		r2 := v.Args[2].Reg()
   787  		out := v.Reg0()
   788  		p := s.Prog(ld)
   789  		p.From.Type = obj.TYPE_MEM
   790  		p.From.Reg = r0
   791  		p.To.Type = obj.TYPE_REG
   792  		p.To.Reg = arm64.REGTMP
   793  		p1 := s.Prog(cmp)
   794  		p1.From.Type = obj.TYPE_REG
   795  		p1.From.Reg = r1
   796  		p1.Reg = arm64.REGTMP
   797  		p2 := s.Prog(arm64.ABNE)
   798  		p2.To.Type = obj.TYPE_BRANCH
   799  		p3 := s.Prog(st)
   800  		p3.From.Type = obj.TYPE_REG
   801  		p3.From.Reg = r2
   802  		p3.To.Type = obj.TYPE_MEM
   803  		p3.To.Reg = r0
   804  		p3.RegTo2 = arm64.REGTMP
   805  		p4 := s.Prog(arm64.ACBNZ)
   806  		p4.From.Type = obj.TYPE_REG
   807  		p4.From.Reg = arm64.REGTMP
   808  		p4.To.Type = obj.TYPE_BRANCH
   809  		p4.To.SetTarget(p)
   810  		p5 := s.Prog(arm64.ACSET)
   811  		p5.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
   812  		p5.From.Offset = int64(arm64.SPOP_EQ)
   813  		p5.To.Type = obj.TYPE_REG
   814  		p5.To.Reg = out
   815  		p2.To.SetTarget(p5)
   816  	case ssa.OpARM64LoweredAtomicCas64Variant,
   817  		ssa.OpARM64LoweredAtomicCas32Variant:
   818  		// Rarg0: ptr
   819  		// Rarg1: old
   820  		// Rarg2: new
   821  		// MOV  	Rarg1, Rtmp
   822  		// CASAL	Rtmp, (Rarg0), Rarg2
   823  		// CMP  	Rarg1, Rtmp
   824  		// CSET 	EQ, Rout
   825  		cas := arm64.ACASALD
   826  		cmp := arm64.ACMP
   827  		mov := arm64.AMOVD
   828  		if v.Op == ssa.OpARM64LoweredAtomicCas32Variant {
   829  			cas = arm64.ACASALW
   830  			cmp = arm64.ACMPW
   831  			mov = arm64.AMOVW
   832  		}
   833  		r0 := v.Args[0].Reg()
   834  		r1 := v.Args[1].Reg()
   835  		r2 := v.Args[2].Reg()
   836  		out := v.Reg0()
   837  
   838  		// MOV  	Rarg1, Rtmp
   839  		p := s.Prog(mov)
   840  		p.From.Type = obj.TYPE_REG
   841  		p.From.Reg = r1
   842  		p.To.Type = obj.TYPE_REG
   843  		p.To.Reg = arm64.REGTMP
   844  
   845  		// CASAL	Rtmp, (Rarg0), Rarg2
   846  		p1 := s.Prog(cas)
   847  		p1.From.Type = obj.TYPE_REG
   848  		p1.From.Reg = arm64.REGTMP
   849  		p1.To.Type = obj.TYPE_MEM
   850  		p1.To.Reg = r0
   851  		p1.RegTo2 = r2
   852  
   853  		// CMP  	Rarg1, Rtmp
   854  		p2 := s.Prog(cmp)
   855  		p2.From.Type = obj.TYPE_REG
   856  		p2.From.Reg = r1
   857  		p2.Reg = arm64.REGTMP
   858  
   859  		// CSET 	EQ, Rout
   860  		p3 := s.Prog(arm64.ACSET)
   861  		p3.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
   862  		p3.From.Offset = int64(arm64.SPOP_EQ)
   863  		p3.To.Type = obj.TYPE_REG
   864  		p3.To.Reg = out
   865  
   866  	case ssa.OpARM64LoweredAtomicAnd64,
   867  		ssa.OpARM64LoweredAtomicOr64,
   868  		ssa.OpARM64LoweredAtomicAnd32,
   869  		ssa.OpARM64LoweredAtomicOr32,
   870  		ssa.OpARM64LoweredAtomicAnd8,
   871  		ssa.OpARM64LoweredAtomicOr8:
   872  		// LDAXR[BW] (Rarg0), Rout
   873  		// AND/OR	Rarg1, Rout, tmp1
   874  		// STLXR[BW] tmp1, (Rarg0), Rtmp
   875  		// CBNZ		Rtmp, -3(PC)
   876  		ld := arm64.ALDAXR
   877  		st := arm64.ASTLXR
   878  		if v.Op == ssa.OpARM64LoweredAtomicAnd32 || v.Op == ssa.OpARM64LoweredAtomicOr32 {
   879  			ld = arm64.ALDAXRW
   880  			st = arm64.ASTLXRW
   881  		}
   882  		if v.Op == ssa.OpARM64LoweredAtomicAnd8 || v.Op == ssa.OpARM64LoweredAtomicOr8 {
   883  			ld = arm64.ALDAXRB
   884  			st = arm64.ASTLXRB
   885  		}
   886  		r0 := v.Args[0].Reg()
   887  		r1 := v.Args[1].Reg()
   888  		out := v.Reg0()
   889  		tmp := v.RegTmp()
   890  		p := s.Prog(ld)
   891  		p.From.Type = obj.TYPE_MEM
   892  		p.From.Reg = r0
   893  		p.To.Type = obj.TYPE_REG
   894  		p.To.Reg = out
   895  		p1 := s.Prog(v.Op.Asm())
   896  		p1.From.Type = obj.TYPE_REG
   897  		p1.From.Reg = r1
   898  		p1.Reg = out
   899  		p1.To.Type = obj.TYPE_REG
   900  		p1.To.Reg = tmp
   901  		p2 := s.Prog(st)
   902  		p2.From.Type = obj.TYPE_REG
   903  		p2.From.Reg = tmp
   904  		p2.To.Type = obj.TYPE_MEM
   905  		p2.To.Reg = r0
   906  		p2.RegTo2 = arm64.REGTMP
   907  		p3 := s.Prog(arm64.ACBNZ)
   908  		p3.From.Type = obj.TYPE_REG
   909  		p3.From.Reg = arm64.REGTMP
   910  		p3.To.Type = obj.TYPE_BRANCH
   911  		p3.To.SetTarget(p)
   912  
   913  	case ssa.OpARM64LoweredAtomicAnd8Variant,
   914  		ssa.OpARM64LoweredAtomicAnd32Variant,
   915  		ssa.OpARM64LoweredAtomicAnd64Variant:
   916  		atomic_clear := arm64.ALDCLRALD
   917  		if v.Op == ssa.OpARM64LoweredAtomicAnd32Variant {
   918  			atomic_clear = arm64.ALDCLRALW
   919  		}
   920  		if v.Op == ssa.OpARM64LoweredAtomicAnd8Variant {
   921  			atomic_clear = arm64.ALDCLRALB
   922  		}
   923  		r0 := v.Args[0].Reg()
   924  		r1 := v.Args[1].Reg()
   925  		out := v.Reg0()
   926  
   927  		// MNV       Rarg1 Rtemp
   928  		p := s.Prog(arm64.AMVN)
   929  		p.From.Type = obj.TYPE_REG
   930  		p.From.Reg = r1
   931  		p.To.Type = obj.TYPE_REG
   932  		p.To.Reg = arm64.REGTMP
   933  
   934  		// LDCLRAL[BDW]  Rtemp, (Rarg0), Rout
   935  		p1 := s.Prog(atomic_clear)
   936  		p1.From.Type = obj.TYPE_REG
   937  		p1.From.Reg = arm64.REGTMP
   938  		p1.To.Type = obj.TYPE_MEM
   939  		p1.To.Reg = r0
   940  		p1.RegTo2 = out
   941  
   942  	case ssa.OpARM64LoweredAtomicOr8Variant,
   943  		ssa.OpARM64LoweredAtomicOr32Variant,
   944  		ssa.OpARM64LoweredAtomicOr64Variant:
   945  		atomic_or := arm64.ALDORALD
   946  		if v.Op == ssa.OpARM64LoweredAtomicOr32Variant {
   947  			atomic_or = arm64.ALDORALW
   948  		}
   949  		if v.Op == ssa.OpARM64LoweredAtomicOr8Variant {
   950  			atomic_or = arm64.ALDORALB
   951  		}
   952  		r0 := v.Args[0].Reg()
   953  		r1 := v.Args[1].Reg()
   954  		out := v.Reg0()
   955  
   956  		// LDORAL[BDW]  Rarg1, (Rarg0), Rout
   957  		p := s.Prog(atomic_or)
   958  		p.From.Type = obj.TYPE_REG
   959  		p.From.Reg = r1
   960  		p.To.Type = obj.TYPE_MEM
   961  		p.To.Reg = r0
   962  		p.RegTo2 = out
   963  
   964  	case ssa.OpARM64MOVBreg,
   965  		ssa.OpARM64MOVBUreg,
   966  		ssa.OpARM64MOVHreg,
   967  		ssa.OpARM64MOVHUreg,
   968  		ssa.OpARM64MOVWreg,
   969  		ssa.OpARM64MOVWUreg:
   970  		a := v.Args[0]
   971  		for a.Op == ssa.OpCopy || a.Op == ssa.OpARM64MOVDreg {
   972  			a = a.Args[0]
   973  		}
   974  		if a.Op == ssa.OpLoadReg {
   975  			t := a.Type
   976  			switch {
   977  			case v.Op == ssa.OpARM64MOVBreg && t.Size() == 1 && t.IsSigned(),
   978  				v.Op == ssa.OpARM64MOVBUreg && t.Size() == 1 && !t.IsSigned(),
   979  				v.Op == ssa.OpARM64MOVHreg && t.Size() == 2 && t.IsSigned(),
   980  				v.Op == ssa.OpARM64MOVHUreg && t.Size() == 2 && !t.IsSigned(),
   981  				v.Op == ssa.OpARM64MOVWreg && t.Size() == 4 && t.IsSigned(),
   982  				v.Op == ssa.OpARM64MOVWUreg && t.Size() == 4 && !t.IsSigned():
   983  				// arg is a proper-typed load, already zero/sign-extended, don't extend again
   984  				if v.Reg() == v.Args[0].Reg() {
   985  					return
   986  				}
   987  				p := s.Prog(arm64.AMOVD)
   988  				p.From.Type = obj.TYPE_REG
   989  				p.From.Reg = v.Args[0].Reg()
   990  				p.To.Type = obj.TYPE_REG
   991  				p.To.Reg = v.Reg()
   992  				return
   993  			default:
   994  			}
   995  		}
   996  		fallthrough
   997  	case ssa.OpARM64MVN,
   998  		ssa.OpARM64NEG,
   999  		ssa.OpARM64FABSD,
  1000  		ssa.OpARM64FABSS,
  1001  		ssa.OpARM64FMOVDfpgp,
  1002  		ssa.OpARM64FMOVDgpfp,
  1003  		ssa.OpARM64FMOVSfpgp,
  1004  		ssa.OpARM64FMOVSgpfp,
  1005  		ssa.OpARM64FNEGS,
  1006  		ssa.OpARM64FNEGD,
  1007  		ssa.OpARM64FSQRTS,
  1008  		ssa.OpARM64FSQRTD,
  1009  		ssa.OpARM64FCVTZSSW,
  1010  		ssa.OpARM64FCVTZSDW,
  1011  		ssa.OpARM64FCVTZUSW,
  1012  		ssa.OpARM64FCVTZUDW,
  1013  		ssa.OpARM64FCVTZSS,
  1014  		ssa.OpARM64FCVTZSD,
  1015  		ssa.OpARM64FCVTZUS,
  1016  		ssa.OpARM64FCVTZUD,
  1017  		ssa.OpARM64SCVTFWS,
  1018  		ssa.OpARM64SCVTFWD,
  1019  		ssa.OpARM64SCVTFS,
  1020  		ssa.OpARM64SCVTFD,
  1021  		ssa.OpARM64UCVTFWS,
  1022  		ssa.OpARM64UCVTFWD,
  1023  		ssa.OpARM64UCVTFS,
  1024  		ssa.OpARM64UCVTFD,
  1025  		ssa.OpARM64FCVTSD,
  1026  		ssa.OpARM64FCVTDS,
  1027  		ssa.OpARM64REV,
  1028  		ssa.OpARM64REVW,
  1029  		ssa.OpARM64REV16,
  1030  		ssa.OpARM64REV16W,
  1031  		ssa.OpARM64RBIT,
  1032  		ssa.OpARM64RBITW,
  1033  		ssa.OpARM64CLZ,
  1034  		ssa.OpARM64CLZW,
  1035  		ssa.OpARM64FRINTAD,
  1036  		ssa.OpARM64FRINTMD,
  1037  		ssa.OpARM64FRINTND,
  1038  		ssa.OpARM64FRINTPD,
  1039  		ssa.OpARM64FRINTZD,
  1040  		ssa.OpARM64FRINTAS,
  1041  		ssa.OpARM64FRINTMS,
  1042  		ssa.OpARM64FRINTNS,
  1043  		ssa.OpARM64FRINTPS,
  1044  		ssa.OpARM64FRINTZS:
  1045  		p := s.Prog(v.Op.Asm())
  1046  		p.From.Type = obj.TYPE_REG
  1047  		p.From.Reg = v.Args[0].Reg()
  1048  		p.To.Type = obj.TYPE_REG
  1049  		p.To.Reg = v.Reg()
  1050  	case ssa.OpARM64LoweredRound32F, ssa.OpARM64LoweredRound64F:
  1051  		// input is already rounded
  1052  	case ssa.OpARM64VCNT:
  1053  		simdV11(s, v, arm64.ARNG_8B)
  1054  	case ssa.OpARM64VUADDLV:
  1055  		simdV11Scalar(s, v, arm64.ARNG_8B)
  1056  	case ssa.OpARM64CSEL, ssa.OpARM64CSEL0:
  1057  		r1 := int16(arm64.REGZERO)
  1058  		if v.Op != ssa.OpARM64CSEL0 {
  1059  			r1 = v.Args[1].Reg()
  1060  		}
  1061  		p := s.Prog(v.Op.Asm())
  1062  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1063  		condCode := condBits[ssa.Op(v.AuxInt)]
  1064  		p.From.Offset = int64(condCode)
  1065  		p.Reg = v.Args[0].Reg()
  1066  		p.AddRestSourceReg(r1)
  1067  		p.To.Type = obj.TYPE_REG
  1068  		p.To.Reg = v.Reg()
  1069  	case ssa.OpARM64CSINC, ssa.OpARM64CSINV, ssa.OpARM64CSNEG:
  1070  		p := s.Prog(v.Op.Asm())
  1071  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1072  		condCode := condBits[ssa.Op(v.AuxInt)]
  1073  		p.From.Offset = int64(condCode)
  1074  		p.Reg = v.Args[0].Reg()
  1075  		p.AddRestSourceReg(v.Args[1].Reg())
  1076  		p.To.Type = obj.TYPE_REG
  1077  		p.To.Reg = v.Reg()
  1078  	case ssa.OpARM64CSETM:
  1079  		p := s.Prog(arm64.ACSETM)
  1080  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1081  		condCode := condBits[ssa.Op(v.AuxInt)]
  1082  		p.From.Offset = int64(condCode)
  1083  		p.To.Type = obj.TYPE_REG
  1084  		p.To.Reg = v.Reg()
  1085  	case ssa.OpARM64CCMP,
  1086  		ssa.OpARM64CCMN,
  1087  		ssa.OpARM64CCMPconst,
  1088  		ssa.OpARM64CCMNconst,
  1089  		ssa.OpARM64CCMPW,
  1090  		ssa.OpARM64CCMNW,
  1091  		ssa.OpARM64CCMPWconst,
  1092  		ssa.OpARM64CCMNWconst:
  1093  		p := s.Prog(v.Op.Asm())
  1094  		p.Reg = v.Args[0].Reg()
  1095  		params := v.AuxArm64ConditionalParams()
  1096  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1097  		p.From.Offset = int64(condBits[params.Cond()])
  1098  		constValue, ok := params.ConstValue()
  1099  		if ok {
  1100  			p.AddRestSourceConst(constValue)
  1101  		} else {
  1102  			p.AddRestSourceReg(v.Args[1].Reg())
  1103  		}
  1104  		p.To.Type = obj.TYPE_CONST
  1105  		p.To.Offset = params.Nzcv()
  1106  	case ssa.OpARM64LoweredZero:
  1107  		ptrReg := v.Args[0].Reg()
  1108  		n := v.AuxInt
  1109  		if n < 16 {
  1110  			v.Fatalf("Zero too small %d", n)
  1111  		}
  1112  
  1113  		// Generate zeroing instructions.
  1114  		var off int64
  1115  		for n >= 16 {
  1116  			//  STP     (ZR, ZR), off(ptrReg)
  1117  			zero16(s, ptrReg, off, false)
  1118  			off += 16
  1119  			n -= 16
  1120  		}
  1121  		// Write any fractional portion.
  1122  		// An overlapping 16-byte write can't be used here
  1123  		// because STP's offsets must be a multiple of 8.
  1124  		if n > 8 {
  1125  			//  MOVD    ZR, off(ptrReg)
  1126  			zero8(s, ptrReg, off)
  1127  			off += 8
  1128  			n -= 8
  1129  		}
  1130  		if n != 0 {
  1131  			//  MOVD    ZR, off+n-8(ptrReg)
  1132  			// TODO: for n<=4 we could use a smaller write.
  1133  			zero8(s, ptrReg, off+n-8)
  1134  		}
  1135  	case ssa.OpARM64LoweredZeroLoop:
  1136  		ptrReg := v.Args[0].Reg()
  1137  		countReg := v.RegTmp()
  1138  		n := v.AuxInt
  1139  		loopSize := int64(64)
  1140  		if n < 3*loopSize {
  1141  			// - a loop count of 0 won't work.
  1142  			// - a loop count of 1 is useless.
  1143  			// - a loop count of 2 is a code size ~tie
  1144  			//     3 instructions to implement the loop
  1145  			//     4 instructions in the loop body
  1146  			//   vs
  1147  			//     8 instructions in the straightline code
  1148  			//   Might as well use straightline code.
  1149  			v.Fatalf("ZeroLoop size too small %d", n)
  1150  		}
  1151  
  1152  		// Put iteration count in a register.
  1153  		//   MOVD    $n, countReg
  1154  		p := s.Prog(arm64.AMOVD)
  1155  		p.From.Type = obj.TYPE_CONST
  1156  		p.From.Offset = n / loopSize
  1157  		p.To.Type = obj.TYPE_REG
  1158  		p.To.Reg = countReg
  1159  		cntInit := p
  1160  
  1161  		// Zero loopSize bytes starting at ptrReg.
  1162  		// Increment ptrReg by loopSize as a side effect.
  1163  		for range loopSize / 16 {
  1164  			//  STP.P   (ZR, ZR), 16(ptrReg)
  1165  			zero16(s, ptrReg, 0, true)
  1166  			// TODO: should we use the postincrement form,
  1167  			// or use a separate += 64 instruction?
  1168  			// postincrement saves an instruction, but maybe
  1169  			// it requires more integer units to do the +=16s.
  1170  		}
  1171  		// Decrement loop count.
  1172  		//   SUB     $1, countReg
  1173  		p = s.Prog(arm64.ASUB)
  1174  		p.From.Type = obj.TYPE_CONST
  1175  		p.From.Offset = 1
  1176  		p.To.Type = obj.TYPE_REG
  1177  		p.To.Reg = countReg
  1178  		// Jump to loop header if we're not done yet.
  1179  		//   CBNZ    head
  1180  		p = s.Prog(arm64.ACBNZ)
  1181  		p.From.Type = obj.TYPE_REG
  1182  		p.From.Reg = countReg
  1183  		p.To.Type = obj.TYPE_BRANCH
  1184  		p.To.SetTarget(cntInit.Link)
  1185  
  1186  		// Multiples of the loop size are now done.
  1187  		n %= loopSize
  1188  
  1189  		// Write any fractional portion.
  1190  		var off int64
  1191  		for n >= 16 {
  1192  			//  STP     (ZR, ZR), off(ptrReg)
  1193  			zero16(s, ptrReg, off, false)
  1194  			off += 16
  1195  			n -= 16
  1196  		}
  1197  		if n > 8 {
  1198  			// Note: an overlapping 16-byte write can't be used
  1199  			// here because STP's offsets must be a multiple of 8.
  1200  			//  MOVD    ZR, off(ptrReg)
  1201  			zero8(s, ptrReg, off)
  1202  			off += 8
  1203  			n -= 8
  1204  		}
  1205  		if n != 0 {
  1206  			//  MOVD    ZR, off+n-8(ptrReg)
  1207  			// TODO: for n<=4 we could use a smaller write.
  1208  			zero8(s, ptrReg, off+n-8)
  1209  		}
  1210  		// TODO: maybe we should use the count register to instead
  1211  		// hold an end pointer and compare against that?
  1212  		//   ADD $n, ptrReg, endReg
  1213  		// then
  1214  		//   CMP ptrReg, endReg
  1215  		//   BNE loop
  1216  		// There's a past-the-end pointer here, any problem with that?
  1217  
  1218  	case ssa.OpARM64LoweredMove:
  1219  		dstReg := v.Args[0].Reg()
  1220  		srcReg := v.Args[1].Reg()
  1221  		if dstReg == srcReg {
  1222  			break
  1223  		}
  1224  		tmpReg1 := int16(arm64.REG_R25)
  1225  		tmpFReg1 := int16(arm64.REG_F16)
  1226  		tmpFReg2 := int16(arm64.REG_F17)
  1227  		n := v.AuxInt
  1228  		if n < 16 {
  1229  			v.Fatalf("Move too small %d", n)
  1230  		}
  1231  
  1232  		// Generate copying instructions.
  1233  		var off int64
  1234  		for n >= 32 {
  1235  			//  FLDPQ   off(srcReg), (tmpFReg1, tmpFReg2)
  1236  			//  FSTPQ   (tmpFReg1, tmpFReg2), off(dstReg)
  1237  			move32(s, srcReg, dstReg, tmpFReg1, tmpFReg2, off, false)
  1238  			off += 32
  1239  			n -= 32
  1240  		}
  1241  		for n >= 16 {
  1242  			//  FMOVQ   off(src), tmpFReg1
  1243  			//  FMOVQ   tmpFReg1, off(dst)
  1244  			move16(s, srcReg, dstReg, tmpFReg1, off, false)
  1245  			off += 16
  1246  			n -= 16
  1247  		}
  1248  		if n > 8 {
  1249  			//  MOVD    off(srcReg), tmpReg1
  1250  			//  MOVD    tmpReg1, off(dstReg)
  1251  			move8(s, srcReg, dstReg, tmpReg1, off)
  1252  			off += 8
  1253  			n -= 8
  1254  		}
  1255  		if n != 0 {
  1256  			//  MOVD    off+n-8(srcReg), tmpReg1
  1257  			//  MOVD    tmpReg1, off+n-8(dstReg)
  1258  			move8(s, srcReg, dstReg, tmpReg1, off+n-8)
  1259  		}
  1260  	case ssa.OpARM64LoweredMoveLoop:
  1261  		dstReg := v.Args[0].Reg()
  1262  		srcReg := v.Args[1].Reg()
  1263  		if dstReg == srcReg {
  1264  			break
  1265  		}
  1266  		countReg := int16(arm64.REG_R24)
  1267  		tmpReg1 := int16(arm64.REG_R25)
  1268  		tmpFReg1 := int16(arm64.REG_F16)
  1269  		tmpFReg2 := int16(arm64.REG_F17)
  1270  		n := v.AuxInt
  1271  		loopSize := int64(64)
  1272  		if n < 3*loopSize {
  1273  			// - a loop count of 0 won't work.
  1274  			// - a loop count of 1 is useless.
  1275  			// - a loop count of 2 is a code size ~tie
  1276  			//     3 instructions to implement the loop
  1277  			//     4 instructions in the loop body
  1278  			//   vs
  1279  			//     8 instructions in the straightline code
  1280  			//   Might as well use straightline code.
  1281  			v.Fatalf("ZeroLoop size too small %d", n)
  1282  		}
  1283  
  1284  		// Put iteration count in a register.
  1285  		//   MOVD    $n, countReg
  1286  		p := s.Prog(arm64.AMOVD)
  1287  		p.From.Type = obj.TYPE_CONST
  1288  		p.From.Offset = n / loopSize
  1289  		p.To.Type = obj.TYPE_REG
  1290  		p.To.Reg = countReg
  1291  		cntInit := p
  1292  
  1293  		// Move loopSize bytes starting at srcReg to dstReg.
  1294  		// Increment srcReg and destReg by loopSize as a side effect.
  1295  		for range loopSize / 32 {
  1296  			// FLDPQ.P 32(srcReg), (tmpFReg1, tmpFReg2)
  1297  			// FSTPQ.P (tmpFReg1, tmpFReg2), 32(dstReg)
  1298  			move32(s, srcReg, dstReg, tmpFReg1, tmpFReg2, 0, true)
  1299  		}
  1300  		// Decrement loop count.
  1301  		//   SUB     $1, countReg
  1302  		p = s.Prog(arm64.ASUB)
  1303  		p.From.Type = obj.TYPE_CONST
  1304  		p.From.Offset = 1
  1305  		p.To.Type = obj.TYPE_REG
  1306  		p.To.Reg = countReg
  1307  		// Jump to loop header if we're not done yet.
  1308  		//   CBNZ    head
  1309  		p = s.Prog(arm64.ACBNZ)
  1310  		p.From.Type = obj.TYPE_REG
  1311  		p.From.Reg = countReg
  1312  		p.To.Type = obj.TYPE_BRANCH
  1313  		p.To.SetTarget(cntInit.Link)
  1314  
  1315  		// Multiples of the loop size are now done.
  1316  		n %= loopSize
  1317  
  1318  		// Copy any fractional portion.
  1319  		var off int64
  1320  		for n >= 32 {
  1321  			//  FLDPQ   off(srcReg), (tmpFReg1, tmpFReg2)
  1322  			//  FSTPQ   (tmpFReg1, tmpFReg2), off(dstReg)
  1323  			move32(s, srcReg, dstReg, tmpFReg1, tmpFReg2, off, false)
  1324  			off += 32
  1325  			n -= 32
  1326  		}
  1327  		for n >= 16 {
  1328  			//  FMOVQ   off(src), tmpFReg1
  1329  			//  FMOVQ   tmpFReg1, off(dst)
  1330  			move16(s, srcReg, dstReg, tmpFReg1, off, false)
  1331  			off += 16
  1332  			n -= 16
  1333  		}
  1334  		if n > 8 {
  1335  			//  MOVD    off(srcReg), tmpReg1
  1336  			//  MOVD    tmpReg1, off(dstReg)
  1337  			move8(s, srcReg, dstReg, tmpReg1, off)
  1338  			off += 8
  1339  			n -= 8
  1340  		}
  1341  		if n != 0 {
  1342  			//  MOVD    off+n-8(srcReg), tmpReg1
  1343  			//  MOVD    tmpReg1, off+n-8(dstReg)
  1344  			move8(s, srcReg, dstReg, tmpReg1, off+n-8)
  1345  		}
  1346  
  1347  	case ssa.OpARM64CALLstatic, ssa.OpARM64CALLclosure, ssa.OpARM64CALLinter:
  1348  		s.Call(v)
  1349  	case ssa.OpARM64CALLtail, ssa.OpARM64CALLtailinter:
  1350  		s.TailCall(v)
  1351  	case ssa.OpARM64LoweredWB:
  1352  		p := s.Prog(obj.ACALL)
  1353  		p.To.Type = obj.TYPE_MEM
  1354  		p.To.Name = obj.NAME_EXTERN
  1355  		// AuxInt encodes how many buffer entries we need.
  1356  		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
  1357  	case ssa.OpARM64LoweredMemEq:
  1358  		p := s.Prog(obj.ACALL)
  1359  		p.To.Type = obj.TYPE_MEM
  1360  		p.To.Name = obj.NAME_EXTERN
  1361  		p.To.Sym = ir.Syms.Memequal
  1362  
  1363  	case ssa.OpARM64LoweredPanicBoundsRR, ssa.OpARM64LoweredPanicBoundsRC, ssa.OpARM64LoweredPanicBoundsCR, ssa.OpARM64LoweredPanicBoundsCC:
  1364  		// Compute the constant we put in the PCData entry for this call.
  1365  		code, signed := ssa.BoundsKind(v.AuxInt).Code()
  1366  		xIsReg := false
  1367  		yIsReg := false
  1368  		xVal := 0
  1369  		yVal := 0
  1370  		switch v.Op {
  1371  		case ssa.OpARM64LoweredPanicBoundsRR:
  1372  			xIsReg = true
  1373  			xVal = int(v.Args[0].Reg() - arm64.REG_R0)
  1374  			yIsReg = true
  1375  			yVal = int(v.Args[1].Reg() - arm64.REG_R0)
  1376  		case ssa.OpARM64LoweredPanicBoundsRC:
  1377  			xIsReg = true
  1378  			xVal = int(v.Args[0].Reg() - arm64.REG_R0)
  1379  			c := v.Aux.(ssa.PanicBoundsC).C
  1380  			if c >= 0 && c <= abi.BoundsMaxConst {
  1381  				yVal = int(c)
  1382  			} else {
  1383  				// Move constant to a register
  1384  				yIsReg = true
  1385  				if yVal == xVal {
  1386  					yVal = 1
  1387  				}
  1388  				p := s.Prog(arm64.AMOVD)
  1389  				p.From.Type = obj.TYPE_CONST
  1390  				p.From.Offset = c
  1391  				p.To.Type = obj.TYPE_REG
  1392  				p.To.Reg = arm64.REG_R0 + int16(yVal)
  1393  			}
  1394  		case ssa.OpARM64LoweredPanicBoundsCR:
  1395  			yIsReg = true
  1396  			yVal = int(v.Args[0].Reg() - arm64.REG_R0)
  1397  			c := v.Aux.(ssa.PanicBoundsC).C
  1398  			if c >= 0 && c <= abi.BoundsMaxConst {
  1399  				xVal = int(c)
  1400  			} else {
  1401  				// Move constant to a register
  1402  				if xVal == yVal {
  1403  					xVal = 1
  1404  				}
  1405  				p := s.Prog(arm64.AMOVD)
  1406  				p.From.Type = obj.TYPE_CONST
  1407  				p.From.Offset = c
  1408  				p.To.Type = obj.TYPE_REG
  1409  				p.To.Reg = arm64.REG_R0 + int16(xVal)
  1410  			}
  1411  		case ssa.OpARM64LoweredPanicBoundsCC:
  1412  			c := v.Aux.(ssa.PanicBoundsCC).Cx
  1413  			if c >= 0 && c <= abi.BoundsMaxConst {
  1414  				xVal = int(c)
  1415  			} else {
  1416  				// Move constant to a register
  1417  				xIsReg = true
  1418  				p := s.Prog(arm64.AMOVD)
  1419  				p.From.Type = obj.TYPE_CONST
  1420  				p.From.Offset = c
  1421  				p.To.Type = obj.TYPE_REG
  1422  				p.To.Reg = arm64.REG_R0 + int16(xVal)
  1423  			}
  1424  			c = v.Aux.(ssa.PanicBoundsCC).Cy
  1425  			if c >= 0 && c <= abi.BoundsMaxConst {
  1426  				yVal = int(c)
  1427  			} else {
  1428  				// Move constant to a register
  1429  				yIsReg = true
  1430  				yVal = 1
  1431  				p := s.Prog(arm64.AMOVD)
  1432  				p.From.Type = obj.TYPE_CONST
  1433  				p.From.Offset = c
  1434  				p.To.Type = obj.TYPE_REG
  1435  				p.To.Reg = arm64.REG_R0 + int16(yVal)
  1436  			}
  1437  		}
  1438  		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
  1439  
  1440  		p := s.Prog(obj.APCDATA)
  1441  		p.From.SetConst(abi.PCDATA_PanicBounds)
  1442  		p.To.SetConst(int64(c))
  1443  		p = s.Prog(obj.ACALL)
  1444  		p.To.Type = obj.TYPE_MEM
  1445  		p.To.Name = obj.NAME_EXTERN
  1446  		p.To.Sym = ir.Syms.PanicBounds
  1447  
  1448  	case ssa.OpARM64LoweredNilCheck:
  1449  		// Issue a load which will fault if arg is nil.
  1450  		p := s.Prog(arm64.AMOVB)
  1451  		p.From.Type = obj.TYPE_MEM
  1452  		p.From.Reg = v.Args[0].Reg()
  1453  		ssagen.AddAux(&p.From, v)
  1454  		p.To.Type = obj.TYPE_REG
  1455  		p.To.Reg = arm64.REGTMP
  1456  		if logopt.Enabled() {
  1457  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1458  		}
  1459  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Line==1 in generated wrappers
  1460  			base.WarnfAt(v.Pos, "generated nil check")
  1461  		}
  1462  	case ssa.OpARM64Equal,
  1463  		ssa.OpARM64NotEqual,
  1464  		ssa.OpARM64LessThan,
  1465  		ssa.OpARM64LessEqual,
  1466  		ssa.OpARM64GreaterThan,
  1467  		ssa.OpARM64GreaterEqual,
  1468  		ssa.OpARM64LessThanU,
  1469  		ssa.OpARM64LessEqualU,
  1470  		ssa.OpARM64GreaterThanU,
  1471  		ssa.OpARM64GreaterEqualU,
  1472  		ssa.OpARM64LessThanF,
  1473  		ssa.OpARM64LessEqualF,
  1474  		ssa.OpARM64GreaterThanF,
  1475  		ssa.OpARM64GreaterEqualF,
  1476  		ssa.OpARM64NotLessThanF,
  1477  		ssa.OpARM64NotLessEqualF,
  1478  		ssa.OpARM64NotGreaterThanF,
  1479  		ssa.OpARM64NotGreaterEqualF,
  1480  		ssa.OpARM64LessThanNoov,
  1481  		ssa.OpARM64GreaterEqualNoov:
  1482  		// generate boolean values using CSET
  1483  		p := s.Prog(arm64.ACSET)
  1484  		p.From.Type = obj.TYPE_SPECIAL // assembler encodes conditional bits in Offset
  1485  		condCode := condBits[v.Op]
  1486  		p.From.Offset = int64(condCode)
  1487  		p.To.Type = obj.TYPE_REG
  1488  		p.To.Reg = v.Reg()
  1489  	case ssa.OpARM64PRFM:
  1490  		p := s.Prog(v.Op.Asm())
  1491  		p.From.Type = obj.TYPE_MEM
  1492  		p.From.Reg = v.Args[0].Reg()
  1493  		p.To.Type = obj.TYPE_CONST
  1494  		p.To.Offset = v.AuxInt
  1495  	case ssa.OpARM64LoweredGetClosurePtr:
  1496  		// Closure pointer is R26 (arm64.REGCTXT).
  1497  		ssagen.CheckLoweredGetClosurePtr(v)
  1498  	case ssa.OpARM64LoweredGetCallerSP:
  1499  		// caller's SP is FixedFrameSize below the address of the first arg
  1500  		p := s.Prog(arm64.AMOVD)
  1501  		p.From.Type = obj.TYPE_ADDR
  1502  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize
  1503  		p.From.Name = obj.NAME_PARAM
  1504  		p.To.Type = obj.TYPE_REG
  1505  		p.To.Reg = v.Reg()
  1506  	case ssa.OpARM64LoweredGetCallerPC:
  1507  		p := s.Prog(obj.AGETCALLERPC)
  1508  		p.To.Type = obj.TYPE_REG
  1509  		p.To.Reg = v.Reg()
  1510  	case ssa.OpARM64DMB:
  1511  		p := s.Prog(v.Op.Asm())
  1512  		p.From.Type = obj.TYPE_CONST
  1513  		p.From.Offset = v.AuxInt
  1514  	case ssa.OpARM64FlagConstant:
  1515  		v.Fatalf("FlagConstant op should never make it to codegen %v", v.LongString())
  1516  	case ssa.OpARM64InvertFlags:
  1517  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1518  	case ssa.OpClobber:
  1519  		// MOVW	$0xdeaddead, REGTMP
  1520  		// MOVW	REGTMP, (slot)
  1521  		// MOVW	REGTMP, 4(slot)
  1522  		p := s.Prog(arm64.AMOVW)
  1523  		p.From.Type = obj.TYPE_CONST
  1524  		p.From.Offset = 0xdeaddead
  1525  		p.To.Type = obj.TYPE_REG
  1526  		p.To.Reg = arm64.REGTMP
  1527  		p = s.Prog(arm64.AMOVW)
  1528  		p.From.Type = obj.TYPE_REG
  1529  		p.From.Reg = arm64.REGTMP
  1530  		p.To.Type = obj.TYPE_MEM
  1531  		p.To.Reg = arm64.REGSP
  1532  		ssagen.AddAux(&p.To, v)
  1533  		p = s.Prog(arm64.AMOVW)
  1534  		p.From.Type = obj.TYPE_REG
  1535  		p.From.Reg = arm64.REGTMP
  1536  		p.To.Type = obj.TYPE_MEM
  1537  		p.To.Reg = arm64.REGSP
  1538  		ssagen.AddAux2(&p.To, v, v.AuxInt+4)
  1539  	case ssa.OpClobberReg:
  1540  		x := uint64(0xdeaddeaddeaddead)
  1541  		p := s.Prog(arm64.AMOVD)
  1542  		p.From.Type = obj.TYPE_CONST
  1543  		p.From.Offset = int64(x)
  1544  		p.To.Type = obj.TYPE_REG
  1545  		p.To.Reg = v.Reg()
  1546  	default:
  1547  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1548  	}
  1549  }
  1550  
  1551  var condBits = map[ssa.Op]arm64.SpecialOperand{
  1552  	ssa.OpARM64Equal:         arm64.SPOP_EQ,
  1553  	ssa.OpARM64NotEqual:      arm64.SPOP_NE,
  1554  	ssa.OpARM64LessThan:      arm64.SPOP_LT,
  1555  	ssa.OpARM64LessThanU:     arm64.SPOP_LO,
  1556  	ssa.OpARM64LessEqual:     arm64.SPOP_LE,
  1557  	ssa.OpARM64LessEqualU:    arm64.SPOP_LS,
  1558  	ssa.OpARM64GreaterThan:   arm64.SPOP_GT,
  1559  	ssa.OpARM64GreaterThanU:  arm64.SPOP_HI,
  1560  	ssa.OpARM64GreaterEqual:  arm64.SPOP_GE,
  1561  	ssa.OpARM64GreaterEqualU: arm64.SPOP_HS,
  1562  	ssa.OpARM64LessThanF:     arm64.SPOP_MI, // Less than
  1563  	ssa.OpARM64LessEqualF:    arm64.SPOP_LS, // Less than or equal to
  1564  	ssa.OpARM64GreaterThanF:  arm64.SPOP_GT, // Greater than
  1565  	ssa.OpARM64GreaterEqualF: arm64.SPOP_GE, // Greater than or equal to
  1566  
  1567  	// The following condition codes have unordered to handle comparisons related to NaN.
  1568  	ssa.OpARM64NotLessThanF:     arm64.SPOP_PL, // Greater than, equal to, or unordered
  1569  	ssa.OpARM64NotLessEqualF:    arm64.SPOP_HI, // Greater than or unordered
  1570  	ssa.OpARM64NotGreaterThanF:  arm64.SPOP_LE, // Less than, equal to or unordered
  1571  	ssa.OpARM64NotGreaterEqualF: arm64.SPOP_LT, // Less than or unordered
  1572  
  1573  	ssa.OpARM64LessThanNoov:     arm64.SPOP_MI, // Less than but without honoring overflow
  1574  	ssa.OpARM64GreaterEqualNoov: arm64.SPOP_PL, // Greater than or equal to but without honoring overflow
  1575  }
  1576  
  1577  var blockJump = map[ssa.BlockKind]struct {
  1578  	asm, invasm obj.As
  1579  }{
  1580  	ssa.BlockARM64EQ:     {arm64.ABEQ, arm64.ABNE},
  1581  	ssa.BlockARM64NE:     {arm64.ABNE, arm64.ABEQ},
  1582  	ssa.BlockARM64LT:     {arm64.ABLT, arm64.ABGE},
  1583  	ssa.BlockARM64GE:     {arm64.ABGE, arm64.ABLT},
  1584  	ssa.BlockARM64LE:     {arm64.ABLE, arm64.ABGT},
  1585  	ssa.BlockARM64GT:     {arm64.ABGT, arm64.ABLE},
  1586  	ssa.BlockARM64ULT:    {arm64.ABLO, arm64.ABHS},
  1587  	ssa.BlockARM64UGE:    {arm64.ABHS, arm64.ABLO},
  1588  	ssa.BlockARM64UGT:    {arm64.ABHI, arm64.ABLS},
  1589  	ssa.BlockARM64ULE:    {arm64.ABLS, arm64.ABHI},
  1590  	ssa.BlockARM64Z:      {arm64.ACBZ, arm64.ACBNZ},
  1591  	ssa.BlockARM64NZ:     {arm64.ACBNZ, arm64.ACBZ},
  1592  	ssa.BlockARM64ZW:     {arm64.ACBZW, arm64.ACBNZW},
  1593  	ssa.BlockARM64NZW:    {arm64.ACBNZW, arm64.ACBZW},
  1594  	ssa.BlockARM64TBZ:    {arm64.ATBZ, arm64.ATBNZ},
  1595  	ssa.BlockARM64TBNZ:   {arm64.ATBNZ, arm64.ATBZ},
  1596  	ssa.BlockARM64FLT:    {arm64.ABMI, arm64.ABPL},
  1597  	ssa.BlockARM64FGE:    {arm64.ABGE, arm64.ABLT},
  1598  	ssa.BlockARM64FLE:    {arm64.ABLS, arm64.ABHI},
  1599  	ssa.BlockARM64FGT:    {arm64.ABGT, arm64.ABLE},
  1600  	ssa.BlockARM64LTnoov: {arm64.ABMI, arm64.ABPL},
  1601  	ssa.BlockARM64GEnoov: {arm64.ABPL, arm64.ABMI},
  1602  }
  1603  
  1604  // To model a 'LEnoov' ('<=' without overflow checking) branching.
  1605  var leJumps = [2][2]ssagen.IndexJump{
  1606  	{{Jump: arm64.ABEQ, Index: 0}, {Jump: arm64.ABPL, Index: 1}}, // next == b.Succs[0]
  1607  	{{Jump: arm64.ABMI, Index: 0}, {Jump: arm64.ABEQ, Index: 0}}, // next == b.Succs[1]
  1608  }
  1609  
  1610  // To model a 'GTnoov' ('>' without overflow checking) branching.
  1611  var gtJumps = [2][2]ssagen.IndexJump{
  1612  	{{Jump: arm64.ABMI, Index: 1}, {Jump: arm64.ABEQ, Index: 1}}, // next == b.Succs[0]
  1613  	{{Jump: arm64.ABEQ, Index: 1}, {Jump: arm64.ABPL, Index: 0}}, // next == b.Succs[1]
  1614  }
  1615  
  1616  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  1617  	switch b.Kind {
  1618  	case ssa.BlockPlain, ssa.BlockDefer:
  1619  		if b.Succs[0].Block() != next {
  1620  			p := s.Prog(obj.AJMP)
  1621  			p.To.Type = obj.TYPE_BRANCH
  1622  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1623  		}
  1624  
  1625  	case ssa.BlockExit, ssa.BlockRetJmp:
  1626  
  1627  	case ssa.BlockRet:
  1628  		s.Prog(obj.ARET)
  1629  
  1630  	case ssa.BlockARM64EQ, ssa.BlockARM64NE,
  1631  		ssa.BlockARM64LT, ssa.BlockARM64GE,
  1632  		ssa.BlockARM64LE, ssa.BlockARM64GT,
  1633  		ssa.BlockARM64ULT, ssa.BlockARM64UGT,
  1634  		ssa.BlockARM64ULE, ssa.BlockARM64UGE,
  1635  		ssa.BlockARM64Z, ssa.BlockARM64NZ,
  1636  		ssa.BlockARM64ZW, ssa.BlockARM64NZW,
  1637  		ssa.BlockARM64FLT, ssa.BlockARM64FGE,
  1638  		ssa.BlockARM64FLE, ssa.BlockARM64FGT,
  1639  		ssa.BlockARM64LTnoov, ssa.BlockARM64GEnoov:
  1640  		jmp := blockJump[b.Kind]
  1641  		var p *obj.Prog
  1642  		switch next {
  1643  		case b.Succs[0].Block():
  1644  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1645  		case b.Succs[1].Block():
  1646  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1647  		default:
  1648  			if b.Likely != ssa.BranchUnlikely {
  1649  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1650  				s.Br(obj.AJMP, b.Succs[1].Block())
  1651  			} else {
  1652  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1653  				s.Br(obj.AJMP, b.Succs[0].Block())
  1654  			}
  1655  		}
  1656  		if !b.Controls[0].Type.IsFlags() {
  1657  			p.From.Type = obj.TYPE_REG
  1658  			p.From.Reg = b.Controls[0].Reg()
  1659  		}
  1660  	case ssa.BlockARM64TBZ, ssa.BlockARM64TBNZ:
  1661  		jmp := blockJump[b.Kind]
  1662  		var p *obj.Prog
  1663  		switch next {
  1664  		case b.Succs[0].Block():
  1665  			p = s.Br(jmp.invasm, b.Succs[1].Block())
  1666  		case b.Succs[1].Block():
  1667  			p = s.Br(jmp.asm, b.Succs[0].Block())
  1668  		default:
  1669  			if b.Likely != ssa.BranchUnlikely {
  1670  				p = s.Br(jmp.asm, b.Succs[0].Block())
  1671  				s.Br(obj.AJMP, b.Succs[1].Block())
  1672  			} else {
  1673  				p = s.Br(jmp.invasm, b.Succs[1].Block())
  1674  				s.Br(obj.AJMP, b.Succs[0].Block())
  1675  			}
  1676  		}
  1677  		p.From.Offset = b.AuxInt
  1678  		p.From.Type = obj.TYPE_CONST
  1679  		p.Reg = b.Controls[0].Reg()
  1680  
  1681  	case ssa.BlockARM64LEnoov:
  1682  		s.CombJump(b, next, &leJumps)
  1683  	case ssa.BlockARM64GTnoov:
  1684  		s.CombJump(b, next, &gtJumps)
  1685  
  1686  	case ssa.BlockARM64JUMPTABLE:
  1687  		// MOVD	(TABLE)(IDX<<3), Rtmp
  1688  		// JMP	(Rtmp)
  1689  		p := s.Prog(arm64.AMOVD)
  1690  		p.From = genIndexedOperand(ssa.OpARM64MOVDloadidx8, b.Controls[1].Reg(), b.Controls[0].Reg())
  1691  		p.To.Type = obj.TYPE_REG
  1692  		p.To.Reg = arm64.REGTMP
  1693  		p = s.Prog(obj.AJMP)
  1694  		p.To.Type = obj.TYPE_MEM
  1695  		p.To.Reg = arm64.REGTMP
  1696  		// Save jump tables for later resolution of the target blocks.
  1697  		s.JumpTables = append(s.JumpTables, b)
  1698  
  1699  	default:
  1700  		b.Fatalf("branch not implemented: %s", b.LongString())
  1701  	}
  1702  }
  1703  
  1704  func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1705  	p := s.Prog(loadByType(t))
  1706  	p.From.Type = obj.TYPE_MEM
  1707  	p.From.Name = obj.NAME_AUTO
  1708  	p.From.Sym = n.Linksym()
  1709  	p.From.Offset = n.FrameOffset() + off
  1710  	p.To.Type = obj.TYPE_REG
  1711  	p.To.Reg = reg
  1712  	return p
  1713  }
  1714  
  1715  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1716  	p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  1717  	p.To.Name = obj.NAME_PARAM
  1718  	p.To.Sym = n.Linksym()
  1719  	p.Pos = p.Pos.WithNotStmt()
  1720  	return p
  1721  }
  1722  
  1723  // zero16 zeroes 16 bytes at reg+off.
  1724  // If postInc is true, increment reg by 16.
  1725  func zero16(s *ssagen.State, reg int16, off int64, postInc bool) {
  1726  	//   STP     (ZR, ZR), off(reg)
  1727  	p := s.Prog(arm64.ASTP)
  1728  	p.From.Type = obj.TYPE_REGREG
  1729  	p.From.Reg = arm64.REGZERO
  1730  	p.From.Offset = int64(arm64.REGZERO)
  1731  	p.To.Type = obj.TYPE_MEM
  1732  	p.To.Reg = reg
  1733  	p.To.Offset = off
  1734  	if postInc {
  1735  		if off != 0 {
  1736  			panic("can't postinc with non-zero offset")
  1737  		}
  1738  		//   STP.P  (ZR, ZR), 16(reg)
  1739  		p.Scond = arm64.C_XPOST
  1740  		p.To.Offset = 16
  1741  	}
  1742  }
  1743  
  1744  // zero8 zeroes 8 bytes at reg+off.
  1745  func zero8(s *ssagen.State, reg int16, off int64) {
  1746  	//   MOVD     ZR, off(reg)
  1747  	p := s.Prog(arm64.AMOVD)
  1748  	p.From.Type = obj.TYPE_REG
  1749  	p.From.Reg = arm64.REGZERO
  1750  	p.To.Type = obj.TYPE_MEM
  1751  	p.To.Reg = reg
  1752  	p.To.Offset = off
  1753  }
  1754  
  1755  // move32 copies 32 bytes at src+off to dst+off.
  1756  // Uses registers tmp1 and tmp2.
  1757  // If postInc is true, increment src and dst by 32.
  1758  func move32(s *ssagen.State, src, dst, tmp1, tmp2 int16, off int64, postInc bool) {
  1759  	// FLDPQ   off(src), (tmp1, tmp2)
  1760  	ld := s.Prog(arm64.AFLDPQ)
  1761  	ld.From.Type = obj.TYPE_MEM
  1762  	ld.From.Reg = src
  1763  	ld.From.Offset = off
  1764  	ld.To.Type = obj.TYPE_REGREG
  1765  	ld.To.Reg = tmp1
  1766  	ld.To.Offset = int64(tmp2)
  1767  	// FSTPQ   (tmp1, tmp2), off(dst)
  1768  	st := s.Prog(arm64.AFSTPQ)
  1769  	st.From.Type = obj.TYPE_REGREG
  1770  	st.From.Reg = tmp1
  1771  	st.From.Offset = int64(tmp2)
  1772  	st.To.Type = obj.TYPE_MEM
  1773  	st.To.Reg = dst
  1774  	st.To.Offset = off
  1775  	if postInc {
  1776  		if off != 0 {
  1777  			panic("can't postinc with non-zero offset")
  1778  		}
  1779  		ld.Scond = arm64.C_XPOST
  1780  		st.Scond = arm64.C_XPOST
  1781  		ld.From.Offset = 32
  1782  		st.To.Offset = 32
  1783  	}
  1784  }
  1785  
  1786  // move16 copies 16 bytes at src+off to dst+off.
  1787  // Uses register tmp1
  1788  // If postInc is true, increment src and dst by 16.
  1789  func move16(s *ssagen.State, src, dst, tmp1 int16, off int64, postInc bool) {
  1790  	// FMOVQ     off(src), tmp1
  1791  	ld := s.Prog(arm64.AFMOVQ)
  1792  	ld.From.Type = obj.TYPE_MEM
  1793  	ld.From.Reg = src
  1794  	ld.From.Offset = off
  1795  	ld.To.Type = obj.TYPE_REG
  1796  	ld.To.Reg = tmp1
  1797  	// FMOVQ     tmp1, off(dst)
  1798  	st := s.Prog(arm64.AFMOVQ)
  1799  	st.From.Type = obj.TYPE_REG
  1800  	st.From.Reg = tmp1
  1801  	st.To.Type = obj.TYPE_MEM
  1802  	st.To.Reg = dst
  1803  	st.To.Offset = off
  1804  	if postInc {
  1805  		if off != 0 {
  1806  			panic("can't postinc with non-zero offset")
  1807  		}
  1808  		ld.Scond = arm64.C_XPOST
  1809  		st.Scond = arm64.C_XPOST
  1810  		ld.From.Offset = 16
  1811  		st.To.Offset = 16
  1812  	}
  1813  }
  1814  
  1815  // move8 copies 8 bytes at src+off to dst+off.
  1816  // Uses register tmp.
  1817  func move8(s *ssagen.State, src, dst, tmp int16, off int64) {
  1818  	// MOVD    off(src), tmp
  1819  	ld := s.Prog(arm64.AMOVD)
  1820  	ld.From.Type = obj.TYPE_MEM
  1821  	ld.From.Reg = src
  1822  	ld.From.Offset = off
  1823  	ld.To.Type = obj.TYPE_REG
  1824  	ld.To.Reg = tmp
  1825  	// MOVD    tmp, off(dst)
  1826  	st := s.Prog(arm64.AMOVD)
  1827  	st.From.Type = obj.TYPE_REG
  1828  	st.From.Reg = tmp
  1829  	st.To.Type = obj.TYPE_MEM
  1830  	st.To.Reg = dst
  1831  	st.To.Offset = off
  1832  }
  1833  

View as plain text