Source file src/cmd/compile/internal/amd64/ssa.go

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package amd64
     6  
     7  import (
     8  	"fmt"
     9  	"math"
    10  
    11  	"cmd/compile/internal/base"
    12  	"cmd/compile/internal/ir"
    13  	"cmd/compile/internal/logopt"
    14  	"cmd/compile/internal/objw"
    15  	"cmd/compile/internal/ssa"
    16  	"cmd/compile/internal/ssagen"
    17  	"cmd/compile/internal/types"
    18  	"cmd/internal/obj"
    19  	"cmd/internal/obj/x86"
    20  	"internal/abi"
    21  )
    22  
    23  // ssaMarkMoves marks any MOVXconst ops that need to avoid clobbering flags.
    24  func ssaMarkMoves(s *ssagen.State, b *ssa.Block) {
    25  	flive := b.FlagsLiveAtEnd
    26  	for _, c := range b.ControlValues() {
    27  		flive = c.Type.IsFlags() || flive
    28  	}
    29  	for i := len(b.Values) - 1; i >= 0; i-- {
    30  		v := b.Values[i]
    31  		if flive && (v.Op == ssa.OpAMD64MOVLconst || v.Op == ssa.OpAMD64MOVQconst) {
    32  			// The "mark" is any non-nil Aux value.
    33  			v.Aux = ssa.AuxMark
    34  		}
    35  		if v.Type.IsFlags() {
    36  			flive = false
    37  		}
    38  		for _, a := range v.Args {
    39  			if a.Type.IsFlags() {
    40  				flive = true
    41  			}
    42  		}
    43  	}
    44  }
    45  
    46  // loadByType returns the load instruction of the given type.
    47  func loadByType(t *types.Type) obj.As {
    48  	// Avoid partial register write
    49  	if !t.IsFloat() {
    50  		switch t.Size() {
    51  		case 1:
    52  			return x86.AMOVBLZX
    53  		case 2:
    54  			return x86.AMOVWLZX
    55  		}
    56  	}
    57  	// Otherwise, there's no difference between load and store opcodes.
    58  	return storeByType(t)
    59  }
    60  
    61  // storeByType returns the store instruction of the given type.
    62  func storeByType(t *types.Type) obj.As {
    63  	width := t.Size()
    64  	if t.IsFloat() {
    65  		switch width {
    66  		case 4:
    67  			return x86.AMOVSS
    68  		case 8:
    69  			return x86.AMOVSD
    70  		}
    71  	} else {
    72  		switch width {
    73  		case 1:
    74  			return x86.AMOVB
    75  		case 2:
    76  			return x86.AMOVW
    77  		case 4:
    78  			return x86.AMOVL
    79  		case 8:
    80  			return x86.AMOVQ
    81  		case 16:
    82  			return x86.AMOVUPS
    83  		}
    84  	}
    85  	panic(fmt.Sprintf("bad store type %v", t))
    86  }
    87  
    88  // moveByType returns the reg->reg move instruction of the given type.
    89  func moveByType(t *types.Type) obj.As {
    90  	if t.IsFloat() {
    91  		// Moving the whole sse2 register is faster
    92  		// than moving just the correct low portion of it.
    93  		// There is no xmm->xmm move with 1 byte opcode,
    94  		// so use movups, which has 2 byte opcode.
    95  		return x86.AMOVUPS
    96  	} else {
    97  		switch t.Size() {
    98  		case 1:
    99  			// Avoids partial register write
   100  			return x86.AMOVL
   101  		case 2:
   102  			return x86.AMOVL
   103  		case 4:
   104  			return x86.AMOVL
   105  		case 8:
   106  			return x86.AMOVQ
   107  		case 16:
   108  			return x86.AMOVUPS // int128s are in SSE registers
   109  		default:
   110  			panic(fmt.Sprintf("bad int register width %d:%v", t.Size(), t))
   111  		}
   112  	}
   113  }
   114  
   115  // opregreg emits instructions for
   116  //
   117  //	dest := dest(To) op src(From)
   118  //
   119  // and also returns the created obj.Prog so it
   120  // may be further adjusted (offset, scale, etc).
   121  func opregreg(s *ssagen.State, op obj.As, dest, src int16) *obj.Prog {
   122  	p := s.Prog(op)
   123  	p.From.Type = obj.TYPE_REG
   124  	p.To.Type = obj.TYPE_REG
   125  	p.To.Reg = dest
   126  	p.From.Reg = src
   127  	return p
   128  }
   129  
   130  // memIdx fills out a as an indexed memory reference for v.
   131  // It assumes that the base register and the index register
   132  // are v.Args[0].Reg() and v.Args[1].Reg(), respectively.
   133  // The caller must still use gc.AddAux/gc.AddAux2 to handle v.Aux as necessary.
   134  func memIdx(a *obj.Addr, v *ssa.Value) {
   135  	r, i := v.Args[0].Reg(), v.Args[1].Reg()
   136  	a.Type = obj.TYPE_MEM
   137  	a.Scale = v.Op.Scale()
   138  	if a.Scale == 1 && i == x86.REG_SP {
   139  		r, i = i, r
   140  	}
   141  	a.Reg = r
   142  	a.Index = i
   143  }
   144  
   145  func getgFromTLS(s *ssagen.State, r int16) {
   146  	// See the comments in cmd/internal/obj/x86/obj6.go
   147  	// near CanUse1InsnTLS for a detailed explanation of these instructions.
   148  	if x86.CanUse1InsnTLS(base.Ctxt) {
   149  		// MOVQ (TLS), r
   150  		p := s.Prog(x86.AMOVQ)
   151  		p.From.Type = obj.TYPE_MEM
   152  		p.From.Reg = x86.REG_TLS
   153  		p.To.Type = obj.TYPE_REG
   154  		p.To.Reg = r
   155  	} else {
   156  		// MOVQ TLS, r
   157  		// MOVQ (r)(TLS*1), r
   158  		p := s.Prog(x86.AMOVQ)
   159  		p.From.Type = obj.TYPE_REG
   160  		p.From.Reg = x86.REG_TLS
   161  		p.To.Type = obj.TYPE_REG
   162  		p.To.Reg = r
   163  		q := s.Prog(x86.AMOVQ)
   164  		q.From.Type = obj.TYPE_MEM
   165  		q.From.Reg = r
   166  		q.From.Index = x86.REG_TLS
   167  		q.From.Scale = 1
   168  		q.To.Type = obj.TYPE_REG
   169  		q.To.Reg = r
   170  	}
   171  }
   172  
   173  func ssaGenValue(s *ssagen.State, v *ssa.Value) {
   174  	switch v.Op {
   175  	case ssa.OpAMD64VFMADD231SD, ssa.OpAMD64VFMADD231SS:
   176  		p := s.Prog(v.Op.Asm())
   177  		p.From = obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[2].Reg()}
   178  		p.To = obj.Addr{Type: obj.TYPE_REG, Reg: v.Reg()}
   179  		p.AddRestSourceReg(v.Args[1].Reg())
   180  	case ssa.OpAMD64ADDQ, ssa.OpAMD64ADDL:
   181  		r := v.Reg()
   182  		r1 := v.Args[0].Reg()
   183  		r2 := v.Args[1].Reg()
   184  		switch {
   185  		case r == r1:
   186  			p := s.Prog(v.Op.Asm())
   187  			p.From.Type = obj.TYPE_REG
   188  			p.From.Reg = r2
   189  			p.To.Type = obj.TYPE_REG
   190  			p.To.Reg = r
   191  		case r == r2:
   192  			p := s.Prog(v.Op.Asm())
   193  			p.From.Type = obj.TYPE_REG
   194  			p.From.Reg = r1
   195  			p.To.Type = obj.TYPE_REG
   196  			p.To.Reg = r
   197  		default:
   198  			var asm obj.As
   199  			if v.Op == ssa.OpAMD64ADDQ {
   200  				asm = x86.ALEAQ
   201  			} else {
   202  				asm = x86.ALEAL
   203  			}
   204  			p := s.Prog(asm)
   205  			p.From.Type = obj.TYPE_MEM
   206  			p.From.Reg = r1
   207  			p.From.Scale = 1
   208  			p.From.Index = r2
   209  			p.To.Type = obj.TYPE_REG
   210  			p.To.Reg = r
   211  		}
   212  	// 2-address opcode arithmetic
   213  	case ssa.OpAMD64SUBQ, ssa.OpAMD64SUBL,
   214  		ssa.OpAMD64MULQ, ssa.OpAMD64MULL,
   215  		ssa.OpAMD64ANDQ, ssa.OpAMD64ANDL,
   216  		ssa.OpAMD64ORQ, ssa.OpAMD64ORL,
   217  		ssa.OpAMD64XORQ, ssa.OpAMD64XORL,
   218  		ssa.OpAMD64SHLQ, ssa.OpAMD64SHLL,
   219  		ssa.OpAMD64SHRQ, ssa.OpAMD64SHRL, ssa.OpAMD64SHRW, ssa.OpAMD64SHRB,
   220  		ssa.OpAMD64SARQ, ssa.OpAMD64SARL, ssa.OpAMD64SARW, ssa.OpAMD64SARB,
   221  		ssa.OpAMD64ROLQ, ssa.OpAMD64ROLL, ssa.OpAMD64ROLW, ssa.OpAMD64ROLB,
   222  		ssa.OpAMD64RORQ, ssa.OpAMD64RORL, ssa.OpAMD64RORW, ssa.OpAMD64RORB,
   223  		ssa.OpAMD64ADDSS, ssa.OpAMD64ADDSD, ssa.OpAMD64SUBSS, ssa.OpAMD64SUBSD,
   224  		ssa.OpAMD64MULSS, ssa.OpAMD64MULSD, ssa.OpAMD64DIVSS, ssa.OpAMD64DIVSD,
   225  		ssa.OpAMD64MINSS, ssa.OpAMD64MINSD,
   226  		ssa.OpAMD64POR, ssa.OpAMD64PXOR,
   227  		ssa.OpAMD64BTSL, ssa.OpAMD64BTSQ,
   228  		ssa.OpAMD64BTCL, ssa.OpAMD64BTCQ,
   229  		ssa.OpAMD64BTRL, ssa.OpAMD64BTRQ,
   230  		ssa.OpAMD64PCMPEQB, ssa.OpAMD64PSIGNB,
   231  		ssa.OpAMD64PUNPCKLBW:
   232  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
   233  
   234  	case ssa.OpAMD64PSHUFLW:
   235  		p := s.Prog(v.Op.Asm())
   236  		imm := v.AuxInt
   237  		if imm < 0 || imm > 255 {
   238  			v.Fatalf("Invalid source selection immediate")
   239  		}
   240  		p.From.Offset = imm
   241  		p.From.Type = obj.TYPE_CONST
   242  		p.AddRestSourceReg(v.Args[0].Reg())
   243  		p.To.Type = obj.TYPE_REG
   244  		p.To.Reg = v.Reg()
   245  
   246  	case ssa.OpAMD64PSHUFBbroadcast:
   247  		// PSHUFB with a control mask of zero copies byte 0 to all
   248  		// bytes in the register.
   249  		//
   250  		// X15 is always zero with ABIInternal.
   251  		if s.ABI != obj.ABIInternal {
   252  			// zero X15 manually
   253  			opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
   254  		}
   255  
   256  		p := s.Prog(v.Op.Asm())
   257  		p.From.Type = obj.TYPE_REG
   258  		p.To.Type = obj.TYPE_REG
   259  		p.To.Reg = v.Reg()
   260  		p.From.Reg = x86.REG_X15
   261  
   262  	case ssa.OpAMD64SHRDQ, ssa.OpAMD64SHLDQ:
   263  		p := s.Prog(v.Op.Asm())
   264  		lo, hi, bits := v.Args[0].Reg(), v.Args[1].Reg(), v.Args[2].Reg()
   265  		p.From.Type = obj.TYPE_REG
   266  		p.From.Reg = bits
   267  		p.To.Type = obj.TYPE_REG
   268  		p.To.Reg = lo
   269  		p.AddRestSourceReg(hi)
   270  
   271  	case ssa.OpAMD64BLSIQ, ssa.OpAMD64BLSIL,
   272  		ssa.OpAMD64BLSMSKQ, ssa.OpAMD64BLSMSKL,
   273  		ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL:
   274  		p := s.Prog(v.Op.Asm())
   275  		p.From.Type = obj.TYPE_REG
   276  		p.From.Reg = v.Args[0].Reg()
   277  		p.To.Type = obj.TYPE_REG
   278  		switch v.Op {
   279  		case ssa.OpAMD64BLSRQ, ssa.OpAMD64BLSRL:
   280  			p.To.Reg = v.Reg0()
   281  		default:
   282  			p.To.Reg = v.Reg()
   283  		}
   284  
   285  	case ssa.OpAMD64ANDNQ, ssa.OpAMD64ANDNL:
   286  		p := s.Prog(v.Op.Asm())
   287  		p.From.Type = obj.TYPE_REG
   288  		p.From.Reg = v.Args[0].Reg()
   289  		p.To.Type = obj.TYPE_REG
   290  		p.To.Reg = v.Reg()
   291  		p.AddRestSourceReg(v.Args[1].Reg())
   292  
   293  	case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ,
   294  		ssa.OpAMD64SHLXL, ssa.OpAMD64SHLXQ,
   295  		ssa.OpAMD64SHRXL, ssa.OpAMD64SHRXQ:
   296  		p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
   297  		p.AddRestSourceReg(v.Args[0].Reg())
   298  
   299  	case ssa.OpAMD64SHLXLload, ssa.OpAMD64SHLXQload,
   300  		ssa.OpAMD64SHRXLload, ssa.OpAMD64SHRXQload,
   301  		ssa.OpAMD64SARXLload, ssa.OpAMD64SARXQload:
   302  		p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
   303  		m := obj.Addr{Type: obj.TYPE_MEM, Reg: v.Args[0].Reg()}
   304  		ssagen.AddAux(&m, v)
   305  		p.AddRestSource(m)
   306  
   307  	case ssa.OpAMD64SHLXLloadidx1, ssa.OpAMD64SHLXLloadidx4, ssa.OpAMD64SHLXLloadidx8,
   308  		ssa.OpAMD64SHRXLloadidx1, ssa.OpAMD64SHRXLloadidx4, ssa.OpAMD64SHRXLloadidx8,
   309  		ssa.OpAMD64SARXLloadidx1, ssa.OpAMD64SARXLloadidx4, ssa.OpAMD64SARXLloadidx8,
   310  		ssa.OpAMD64SHLXQloadidx1, ssa.OpAMD64SHLXQloadidx8,
   311  		ssa.OpAMD64SHRXQloadidx1, ssa.OpAMD64SHRXQloadidx8,
   312  		ssa.OpAMD64SARXQloadidx1, ssa.OpAMD64SARXQloadidx8:
   313  		p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[2].Reg())
   314  		m := obj.Addr{Type: obj.TYPE_MEM}
   315  		memIdx(&m, v)
   316  		ssagen.AddAux(&m, v)
   317  		p.AddRestSource(m)
   318  
   319  	case ssa.OpAMD64DIVQU, ssa.OpAMD64DIVLU, ssa.OpAMD64DIVWU:
   320  		// Arg[0] (the dividend) is in AX.
   321  		// Arg[1] (the divisor) can be in any other register.
   322  		// Result[0] (the quotient) is in AX.
   323  		// Result[1] (the remainder) is in DX.
   324  		r := v.Args[1].Reg()
   325  
   326  		// Zero extend dividend.
   327  		opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX)
   328  
   329  		// Issue divide.
   330  		p := s.Prog(v.Op.Asm())
   331  		p.From.Type = obj.TYPE_REG
   332  		p.From.Reg = r
   333  
   334  	case ssa.OpAMD64DIVQ, ssa.OpAMD64DIVL, ssa.OpAMD64DIVW:
   335  		// Arg[0] (the dividend) is in AX.
   336  		// Arg[1] (the divisor) can be in any other register.
   337  		// Result[0] (the quotient) is in AX.
   338  		// Result[1] (the remainder) is in DX.
   339  		r := v.Args[1].Reg()
   340  
   341  		var opCMP, opNEG, opSXD obj.As
   342  		switch v.Op {
   343  		case ssa.OpAMD64DIVQ:
   344  			opCMP, opNEG, opSXD = x86.ACMPQ, x86.ANEGQ, x86.ACQO
   345  		case ssa.OpAMD64DIVL:
   346  			opCMP, opNEG, opSXD = x86.ACMPL, x86.ANEGL, x86.ACDQ
   347  		case ssa.OpAMD64DIVW:
   348  			opCMP, opNEG, opSXD = x86.ACMPW, x86.ANEGW, x86.ACWD
   349  		}
   350  
   351  		// CPU faults upon signed overflow, which occurs when the most
   352  		// negative int is divided by -1. Handle divide by -1 as a special case.
   353  		var j1, j2 *obj.Prog
   354  		if ssa.DivisionNeedsFixUp(v) {
   355  			c := s.Prog(opCMP)
   356  			c.From.Type = obj.TYPE_REG
   357  			c.From.Reg = r
   358  			c.To.Type = obj.TYPE_CONST
   359  			c.To.Offset = -1
   360  
   361  			// Divisor is not -1, proceed with normal division.
   362  			j1 = s.Prog(x86.AJNE)
   363  			j1.To.Type = obj.TYPE_BRANCH
   364  
   365  			// Divisor is -1, manually compute quotient and remainder via fixup code.
   366  			// n / -1 = -n
   367  			n1 := s.Prog(opNEG)
   368  			n1.To.Type = obj.TYPE_REG
   369  			n1.To.Reg = x86.REG_AX
   370  
   371  			// n % -1 == 0
   372  			opregreg(s, x86.AXORL, x86.REG_DX, x86.REG_DX)
   373  
   374  			// TODO(khr): issue only the -1 fixup code we need.
   375  			// For instance, if only the quotient is used, no point in zeroing the remainder.
   376  
   377  			// Skip over normal division.
   378  			j2 = s.Prog(obj.AJMP)
   379  			j2.To.Type = obj.TYPE_BRANCH
   380  		}
   381  
   382  		// Sign extend dividend and perform division.
   383  		p := s.Prog(opSXD)
   384  		if j1 != nil {
   385  			j1.To.SetTarget(p)
   386  		}
   387  		p = s.Prog(v.Op.Asm())
   388  		p.From.Type = obj.TYPE_REG
   389  		p.From.Reg = r
   390  
   391  		if j2 != nil {
   392  			j2.To.SetTarget(s.Pc())
   393  		}
   394  
   395  	case ssa.OpAMD64HMULQ, ssa.OpAMD64HMULL, ssa.OpAMD64HMULQU, ssa.OpAMD64HMULLU:
   396  		// the frontend rewrites constant division by 8/16/32 bit integers into
   397  		// HMUL by a constant
   398  		// SSA rewrites generate the 64 bit versions
   399  
   400  		// Arg[0] is already in AX as it's the only register we allow
   401  		// and DX is the only output we care about (the high bits)
   402  		p := s.Prog(v.Op.Asm())
   403  		p.From.Type = obj.TYPE_REG
   404  		p.From.Reg = v.Args[1].Reg()
   405  
   406  		// IMULB puts the high portion in AH instead of DL,
   407  		// so move it to DL for consistency
   408  		if v.Type.Size() == 1 {
   409  			m := s.Prog(x86.AMOVB)
   410  			m.From.Type = obj.TYPE_REG
   411  			m.From.Reg = x86.REG_AH
   412  			m.To.Type = obj.TYPE_REG
   413  			m.To.Reg = x86.REG_DX
   414  		}
   415  
   416  	case ssa.OpAMD64MULQU, ssa.OpAMD64MULLU:
   417  		// Arg[0] is already in AX as it's the only register we allow
   418  		// results lo in AX
   419  		p := s.Prog(v.Op.Asm())
   420  		p.From.Type = obj.TYPE_REG
   421  		p.From.Reg = v.Args[1].Reg()
   422  
   423  	case ssa.OpAMD64MULQU2:
   424  		// Arg[0] is already in AX as it's the only register we allow
   425  		// results hi in DX, lo in AX
   426  		p := s.Prog(v.Op.Asm())
   427  		p.From.Type = obj.TYPE_REG
   428  		p.From.Reg = v.Args[1].Reg()
   429  
   430  	case ssa.OpAMD64DIVQU2:
   431  		// Arg[0], Arg[1] are already in Dx, AX, as they're the only registers we allow
   432  		// results q in AX, r in DX
   433  		p := s.Prog(v.Op.Asm())
   434  		p.From.Type = obj.TYPE_REG
   435  		p.From.Reg = v.Args[2].Reg()
   436  
   437  	case ssa.OpAMD64AVGQU:
   438  		// compute (x+y)/2 unsigned.
   439  		// Do a 64-bit add, the overflow goes into the carry.
   440  		// Shift right once and pull the carry back into the 63rd bit.
   441  		p := s.Prog(x86.AADDQ)
   442  		p.From.Type = obj.TYPE_REG
   443  		p.To.Type = obj.TYPE_REG
   444  		p.To.Reg = v.Reg()
   445  		p.From.Reg = v.Args[1].Reg()
   446  		p = s.Prog(x86.ARCRQ)
   447  		p.From.Type = obj.TYPE_CONST
   448  		p.From.Offset = 1
   449  		p.To.Type = obj.TYPE_REG
   450  		p.To.Reg = v.Reg()
   451  
   452  	case ssa.OpAMD64ADDQcarry, ssa.OpAMD64ADCQ:
   453  		r := v.Reg0()
   454  		r0 := v.Args[0].Reg()
   455  		r1 := v.Args[1].Reg()
   456  		switch r {
   457  		case r0:
   458  			p := s.Prog(v.Op.Asm())
   459  			p.From.Type = obj.TYPE_REG
   460  			p.From.Reg = r1
   461  			p.To.Type = obj.TYPE_REG
   462  			p.To.Reg = r
   463  		case r1:
   464  			p := s.Prog(v.Op.Asm())
   465  			p.From.Type = obj.TYPE_REG
   466  			p.From.Reg = r0
   467  			p.To.Type = obj.TYPE_REG
   468  			p.To.Reg = r
   469  		default:
   470  			v.Fatalf("output not in same register as an input %s", v.LongString())
   471  		}
   472  
   473  	case ssa.OpAMD64SUBQborrow, ssa.OpAMD64SBBQ:
   474  		p := s.Prog(v.Op.Asm())
   475  		p.From.Type = obj.TYPE_REG
   476  		p.From.Reg = v.Args[1].Reg()
   477  		p.To.Type = obj.TYPE_REG
   478  		p.To.Reg = v.Reg0()
   479  
   480  	case ssa.OpAMD64ADDQconstcarry, ssa.OpAMD64ADCQconst, ssa.OpAMD64SUBQconstborrow, ssa.OpAMD64SBBQconst:
   481  		p := s.Prog(v.Op.Asm())
   482  		p.From.Type = obj.TYPE_CONST
   483  		p.From.Offset = v.AuxInt
   484  		p.To.Type = obj.TYPE_REG
   485  		p.To.Reg = v.Reg0()
   486  
   487  	case ssa.OpAMD64ADDQconst, ssa.OpAMD64ADDLconst:
   488  		r := v.Reg()
   489  		a := v.Args[0].Reg()
   490  		if r == a {
   491  			switch v.AuxInt {
   492  			case 1:
   493  				var asm obj.As
   494  				// Software optimization manual recommends add $1,reg.
   495  				// But inc/dec is 1 byte smaller. ICC always uses inc
   496  				// Clang/GCC choose depending on flags, but prefer add.
   497  				// Experiments show that inc/dec is both a little faster
   498  				// and make a binary a little smaller.
   499  				if v.Op == ssa.OpAMD64ADDQconst {
   500  					asm = x86.AINCQ
   501  				} else {
   502  					asm = x86.AINCL
   503  				}
   504  				p := s.Prog(asm)
   505  				p.To.Type = obj.TYPE_REG
   506  				p.To.Reg = r
   507  				return
   508  			case -1:
   509  				var asm obj.As
   510  				if v.Op == ssa.OpAMD64ADDQconst {
   511  					asm = x86.ADECQ
   512  				} else {
   513  					asm = x86.ADECL
   514  				}
   515  				p := s.Prog(asm)
   516  				p.To.Type = obj.TYPE_REG
   517  				p.To.Reg = r
   518  				return
   519  			case 0x80:
   520  				// 'SUBQ $-0x80, r' is shorter to encode than
   521  				// and functionally equivalent to 'ADDQ $0x80, r'.
   522  				asm := x86.ASUBL
   523  				if v.Op == ssa.OpAMD64ADDQconst {
   524  					asm = x86.ASUBQ
   525  				}
   526  				p := s.Prog(asm)
   527  				p.From.Type = obj.TYPE_CONST
   528  				p.From.Offset = -0x80
   529  				p.To.Type = obj.TYPE_REG
   530  				p.To.Reg = r
   531  				return
   532  
   533  			}
   534  			p := s.Prog(v.Op.Asm())
   535  			p.From.Type = obj.TYPE_CONST
   536  			p.From.Offset = v.AuxInt
   537  			p.To.Type = obj.TYPE_REG
   538  			p.To.Reg = r
   539  			return
   540  		}
   541  		var asm obj.As
   542  		if v.Op == ssa.OpAMD64ADDQconst {
   543  			asm = x86.ALEAQ
   544  		} else {
   545  			asm = x86.ALEAL
   546  		}
   547  		p := s.Prog(asm)
   548  		p.From.Type = obj.TYPE_MEM
   549  		p.From.Reg = a
   550  		p.From.Offset = v.AuxInt
   551  		p.To.Type = obj.TYPE_REG
   552  		p.To.Reg = r
   553  
   554  	case ssa.OpAMD64CMOVQEQ, ssa.OpAMD64CMOVLEQ, ssa.OpAMD64CMOVWEQ,
   555  		ssa.OpAMD64CMOVQLT, ssa.OpAMD64CMOVLLT, ssa.OpAMD64CMOVWLT,
   556  		ssa.OpAMD64CMOVQNE, ssa.OpAMD64CMOVLNE, ssa.OpAMD64CMOVWNE,
   557  		ssa.OpAMD64CMOVQGT, ssa.OpAMD64CMOVLGT, ssa.OpAMD64CMOVWGT,
   558  		ssa.OpAMD64CMOVQLE, ssa.OpAMD64CMOVLLE, ssa.OpAMD64CMOVWLE,
   559  		ssa.OpAMD64CMOVQGE, ssa.OpAMD64CMOVLGE, ssa.OpAMD64CMOVWGE,
   560  		ssa.OpAMD64CMOVQHI, ssa.OpAMD64CMOVLHI, ssa.OpAMD64CMOVWHI,
   561  		ssa.OpAMD64CMOVQLS, ssa.OpAMD64CMOVLLS, ssa.OpAMD64CMOVWLS,
   562  		ssa.OpAMD64CMOVQCC, ssa.OpAMD64CMOVLCC, ssa.OpAMD64CMOVWCC,
   563  		ssa.OpAMD64CMOVQCS, ssa.OpAMD64CMOVLCS, ssa.OpAMD64CMOVWCS,
   564  		ssa.OpAMD64CMOVQGTF, ssa.OpAMD64CMOVLGTF, ssa.OpAMD64CMOVWGTF,
   565  		ssa.OpAMD64CMOVQGEF, ssa.OpAMD64CMOVLGEF, ssa.OpAMD64CMOVWGEF:
   566  		p := s.Prog(v.Op.Asm())
   567  		p.From.Type = obj.TYPE_REG
   568  		p.From.Reg = v.Args[1].Reg()
   569  		p.To.Type = obj.TYPE_REG
   570  		p.To.Reg = v.Reg()
   571  
   572  	case ssa.OpAMD64CMOVQNEF, ssa.OpAMD64CMOVLNEF, ssa.OpAMD64CMOVWNEF:
   573  		// Flag condition: ^ZERO || PARITY
   574  		// Generate:
   575  		//   CMOV*NE  SRC,DST
   576  		//   CMOV*PS  SRC,DST
   577  		p := s.Prog(v.Op.Asm())
   578  		p.From.Type = obj.TYPE_REG
   579  		p.From.Reg = v.Args[1].Reg()
   580  		p.To.Type = obj.TYPE_REG
   581  		p.To.Reg = v.Reg()
   582  		var q *obj.Prog
   583  		if v.Op == ssa.OpAMD64CMOVQNEF {
   584  			q = s.Prog(x86.ACMOVQPS)
   585  		} else if v.Op == ssa.OpAMD64CMOVLNEF {
   586  			q = s.Prog(x86.ACMOVLPS)
   587  		} else {
   588  			q = s.Prog(x86.ACMOVWPS)
   589  		}
   590  		q.From.Type = obj.TYPE_REG
   591  		q.From.Reg = v.Args[1].Reg()
   592  		q.To.Type = obj.TYPE_REG
   593  		q.To.Reg = v.Reg()
   594  
   595  	case ssa.OpAMD64CMOVQEQF, ssa.OpAMD64CMOVLEQF, ssa.OpAMD64CMOVWEQF:
   596  		// Flag condition: ZERO && !PARITY
   597  		// Generate:
   598  		//   MOV      SRC,TMP
   599  		//   CMOV*NE  DST,TMP
   600  		//   CMOV*PC  TMP,DST
   601  		//
   602  		// TODO(rasky): we could generate:
   603  		//   CMOV*NE  DST,SRC
   604  		//   CMOV*PC  SRC,DST
   605  		// But this requires a way for regalloc to know that SRC might be
   606  		// clobbered by this instruction.
   607  		t := v.RegTmp()
   608  		opregreg(s, moveByType(v.Type), t, v.Args[1].Reg())
   609  
   610  		p := s.Prog(v.Op.Asm())
   611  		p.From.Type = obj.TYPE_REG
   612  		p.From.Reg = v.Reg()
   613  		p.To.Type = obj.TYPE_REG
   614  		p.To.Reg = t
   615  		var q *obj.Prog
   616  		if v.Op == ssa.OpAMD64CMOVQEQF {
   617  			q = s.Prog(x86.ACMOVQPC)
   618  		} else if v.Op == ssa.OpAMD64CMOVLEQF {
   619  			q = s.Prog(x86.ACMOVLPC)
   620  		} else {
   621  			q = s.Prog(x86.ACMOVWPC)
   622  		}
   623  		q.From.Type = obj.TYPE_REG
   624  		q.From.Reg = t
   625  		q.To.Type = obj.TYPE_REG
   626  		q.To.Reg = v.Reg()
   627  
   628  	case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst:
   629  		r := v.Reg()
   630  		p := s.Prog(v.Op.Asm())
   631  		p.From.Type = obj.TYPE_CONST
   632  		p.From.Offset = v.AuxInt
   633  		p.To.Type = obj.TYPE_REG
   634  		p.To.Reg = r
   635  		p.AddRestSourceReg(v.Args[0].Reg())
   636  
   637  	case ssa.OpAMD64ANDQconst:
   638  		asm := v.Op.Asm()
   639  		// If the constant is positive and fits into 32 bits, use ANDL.
   640  		// This saves a few bytes of encoding.
   641  		if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
   642  			asm = x86.AANDL
   643  		}
   644  		p := s.Prog(asm)
   645  		p.From.Type = obj.TYPE_CONST
   646  		p.From.Offset = v.AuxInt
   647  		p.To.Type = obj.TYPE_REG
   648  		p.To.Reg = v.Reg()
   649  
   650  	case ssa.OpAMD64SUBQconst, ssa.OpAMD64SUBLconst,
   651  		ssa.OpAMD64ANDLconst,
   652  		ssa.OpAMD64ORQconst, ssa.OpAMD64ORLconst,
   653  		ssa.OpAMD64XORQconst, ssa.OpAMD64XORLconst,
   654  		ssa.OpAMD64SHLQconst, ssa.OpAMD64SHLLconst,
   655  		ssa.OpAMD64SHRQconst, ssa.OpAMD64SHRLconst, ssa.OpAMD64SHRWconst, ssa.OpAMD64SHRBconst,
   656  		ssa.OpAMD64SARQconst, ssa.OpAMD64SARLconst, ssa.OpAMD64SARWconst, ssa.OpAMD64SARBconst,
   657  		ssa.OpAMD64ROLQconst, ssa.OpAMD64ROLLconst, ssa.OpAMD64ROLWconst, ssa.OpAMD64ROLBconst:
   658  		p := s.Prog(v.Op.Asm())
   659  		p.From.Type = obj.TYPE_CONST
   660  		p.From.Offset = v.AuxInt
   661  		p.To.Type = obj.TYPE_REG
   662  		p.To.Reg = v.Reg()
   663  	case ssa.OpAMD64SBBQcarrymask, ssa.OpAMD64SBBLcarrymask:
   664  		r := v.Reg()
   665  		p := s.Prog(v.Op.Asm())
   666  		p.From.Type = obj.TYPE_REG
   667  		p.From.Reg = r
   668  		p.To.Type = obj.TYPE_REG
   669  		p.To.Reg = r
   670  	case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8,
   671  		ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8,
   672  		ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
   673  		p := s.Prog(v.Op.Asm())
   674  		memIdx(&p.From, v)
   675  		o := v.Reg()
   676  		p.To.Type = obj.TYPE_REG
   677  		p.To.Reg = o
   678  		if v.AuxInt != 0 && v.Aux == nil {
   679  			// Emit an additional LEA to add the displacement instead of creating a slow 3 operand LEA.
   680  			switch v.Op {
   681  			case ssa.OpAMD64LEAQ1, ssa.OpAMD64LEAQ2, ssa.OpAMD64LEAQ4, ssa.OpAMD64LEAQ8:
   682  				p = s.Prog(x86.ALEAQ)
   683  			case ssa.OpAMD64LEAL1, ssa.OpAMD64LEAL2, ssa.OpAMD64LEAL4, ssa.OpAMD64LEAL8:
   684  				p = s.Prog(x86.ALEAL)
   685  			case ssa.OpAMD64LEAW1, ssa.OpAMD64LEAW2, ssa.OpAMD64LEAW4, ssa.OpAMD64LEAW8:
   686  				p = s.Prog(x86.ALEAW)
   687  			}
   688  			p.From.Type = obj.TYPE_MEM
   689  			p.From.Reg = o
   690  			p.To.Type = obj.TYPE_REG
   691  			p.To.Reg = o
   692  		}
   693  		ssagen.AddAux(&p.From, v)
   694  	case ssa.OpAMD64LEAQ, ssa.OpAMD64LEAL, ssa.OpAMD64LEAW:
   695  		p := s.Prog(v.Op.Asm())
   696  		p.From.Type = obj.TYPE_MEM
   697  		p.From.Reg = v.Args[0].Reg()
   698  		ssagen.AddAux(&p.From, v)
   699  		p.To.Type = obj.TYPE_REG
   700  		p.To.Reg = v.Reg()
   701  	case ssa.OpAMD64CMPQ, ssa.OpAMD64CMPL, ssa.OpAMD64CMPW, ssa.OpAMD64CMPB,
   702  		ssa.OpAMD64TESTQ, ssa.OpAMD64TESTL, ssa.OpAMD64TESTW, ssa.OpAMD64TESTB,
   703  		ssa.OpAMD64BTL, ssa.OpAMD64BTQ:
   704  		opregreg(s, v.Op.Asm(), v.Args[1].Reg(), v.Args[0].Reg())
   705  	case ssa.OpAMD64UCOMISS, ssa.OpAMD64UCOMISD:
   706  		// Go assembler has swapped operands for UCOMISx relative to CMP,
   707  		// must account for that right here.
   708  		opregreg(s, v.Op.Asm(), v.Args[0].Reg(), v.Args[1].Reg())
   709  	case ssa.OpAMD64CMPQconst, ssa.OpAMD64CMPLconst, ssa.OpAMD64CMPWconst, ssa.OpAMD64CMPBconst:
   710  		p := s.Prog(v.Op.Asm())
   711  		p.From.Type = obj.TYPE_REG
   712  		p.From.Reg = v.Args[0].Reg()
   713  		p.To.Type = obj.TYPE_CONST
   714  		p.To.Offset = v.AuxInt
   715  	case ssa.OpAMD64BTLconst, ssa.OpAMD64BTQconst,
   716  		ssa.OpAMD64TESTQconst, ssa.OpAMD64TESTLconst, ssa.OpAMD64TESTWconst, ssa.OpAMD64TESTBconst,
   717  		ssa.OpAMD64BTSQconst,
   718  		ssa.OpAMD64BTCQconst,
   719  		ssa.OpAMD64BTRQconst:
   720  		op := v.Op
   721  		if op == ssa.OpAMD64BTQconst && v.AuxInt < 32 {
   722  			// Emit 32-bit version because it's shorter
   723  			op = ssa.OpAMD64BTLconst
   724  		}
   725  		p := s.Prog(op.Asm())
   726  		p.From.Type = obj.TYPE_CONST
   727  		p.From.Offset = v.AuxInt
   728  		p.To.Type = obj.TYPE_REG
   729  		p.To.Reg = v.Args[0].Reg()
   730  	case ssa.OpAMD64CMPQload, ssa.OpAMD64CMPLload, ssa.OpAMD64CMPWload, ssa.OpAMD64CMPBload:
   731  		p := s.Prog(v.Op.Asm())
   732  		p.From.Type = obj.TYPE_MEM
   733  		p.From.Reg = v.Args[0].Reg()
   734  		ssagen.AddAux(&p.From, v)
   735  		p.To.Type = obj.TYPE_REG
   736  		p.To.Reg = v.Args[1].Reg()
   737  	case ssa.OpAMD64CMPQconstload, ssa.OpAMD64CMPLconstload, ssa.OpAMD64CMPWconstload, ssa.OpAMD64CMPBconstload:
   738  		sc := v.AuxValAndOff()
   739  		p := s.Prog(v.Op.Asm())
   740  		p.From.Type = obj.TYPE_MEM
   741  		p.From.Reg = v.Args[0].Reg()
   742  		ssagen.AddAux2(&p.From, v, sc.Off64())
   743  		p.To.Type = obj.TYPE_CONST
   744  		p.To.Offset = sc.Val64()
   745  	case ssa.OpAMD64CMPQloadidx8, ssa.OpAMD64CMPQloadidx1, ssa.OpAMD64CMPLloadidx4, ssa.OpAMD64CMPLloadidx1, ssa.OpAMD64CMPWloadidx2, ssa.OpAMD64CMPWloadidx1, ssa.OpAMD64CMPBloadidx1:
   746  		p := s.Prog(v.Op.Asm())
   747  		memIdx(&p.From, v)
   748  		ssagen.AddAux(&p.From, v)
   749  		p.To.Type = obj.TYPE_REG
   750  		p.To.Reg = v.Args[2].Reg()
   751  	case ssa.OpAMD64CMPQconstloadidx8, ssa.OpAMD64CMPQconstloadidx1, ssa.OpAMD64CMPLconstloadidx4, ssa.OpAMD64CMPLconstloadidx1, ssa.OpAMD64CMPWconstloadidx2, ssa.OpAMD64CMPWconstloadidx1, ssa.OpAMD64CMPBconstloadidx1:
   752  		sc := v.AuxValAndOff()
   753  		p := s.Prog(v.Op.Asm())
   754  		memIdx(&p.From, v)
   755  		ssagen.AddAux2(&p.From, v, sc.Off64())
   756  		p.To.Type = obj.TYPE_CONST
   757  		p.To.Offset = sc.Val64()
   758  	case ssa.OpAMD64MOVLconst, ssa.OpAMD64MOVQconst:
   759  		x := v.Reg()
   760  
   761  		// If flags aren't live (indicated by v.Aux == nil),
   762  		// then we can rewrite MOV $0, AX into XOR AX, AX.
   763  		if v.AuxInt == 0 && v.Aux == nil {
   764  			opregreg(s, x86.AXORL, x, x)
   765  			break
   766  		}
   767  
   768  		asm := v.Op.Asm()
   769  		// Use MOVL to move a small constant into a register
   770  		// when the constant is positive and fits into 32 bits.
   771  		if 0 <= v.AuxInt && v.AuxInt <= (1<<32-1) {
   772  			// The upper 32bit are zeroed automatically when using MOVL.
   773  			asm = x86.AMOVL
   774  		}
   775  		p := s.Prog(asm)
   776  		p.From.Type = obj.TYPE_CONST
   777  		p.From.Offset = v.AuxInt
   778  		p.To.Type = obj.TYPE_REG
   779  		p.To.Reg = x
   780  	case ssa.OpAMD64MOVSSconst, ssa.OpAMD64MOVSDconst:
   781  		x := v.Reg()
   782  		p := s.Prog(v.Op.Asm())
   783  		p.From.Type = obj.TYPE_FCONST
   784  		p.From.Val = math.Float64frombits(uint64(v.AuxInt))
   785  		p.To.Type = obj.TYPE_REG
   786  		p.To.Reg = x
   787  	case ssa.OpAMD64MOVQload, ssa.OpAMD64MOVLload, ssa.OpAMD64MOVWload, ssa.OpAMD64MOVBload, ssa.OpAMD64MOVOload,
   788  		ssa.OpAMD64MOVSSload, ssa.OpAMD64MOVSDload, ssa.OpAMD64MOVBQSXload, ssa.OpAMD64MOVWQSXload, ssa.OpAMD64MOVLQSXload,
   789  		ssa.OpAMD64MOVBEQload, ssa.OpAMD64MOVBELload:
   790  		p := s.Prog(v.Op.Asm())
   791  		p.From.Type = obj.TYPE_MEM
   792  		p.From.Reg = v.Args[0].Reg()
   793  		ssagen.AddAux(&p.From, v)
   794  		p.To.Type = obj.TYPE_REG
   795  		p.To.Reg = v.Reg()
   796  	case ssa.OpAMD64MOVBloadidx1, ssa.OpAMD64MOVWloadidx1, ssa.OpAMD64MOVLloadidx1, ssa.OpAMD64MOVQloadidx1, ssa.OpAMD64MOVSSloadidx1, ssa.OpAMD64MOVSDloadidx1,
   797  		ssa.OpAMD64MOVQloadidx8, ssa.OpAMD64MOVSDloadidx8, ssa.OpAMD64MOVLloadidx8, ssa.OpAMD64MOVLloadidx4, ssa.OpAMD64MOVSSloadidx4, ssa.OpAMD64MOVWloadidx2,
   798  		ssa.OpAMD64MOVBELloadidx1, ssa.OpAMD64MOVBELloadidx4, ssa.OpAMD64MOVBELloadidx8, ssa.OpAMD64MOVBEQloadidx1, ssa.OpAMD64MOVBEQloadidx8:
   799  		p := s.Prog(v.Op.Asm())
   800  		memIdx(&p.From, v)
   801  		ssagen.AddAux(&p.From, v)
   802  		p.To.Type = obj.TYPE_REG
   803  		p.To.Reg = v.Reg()
   804  	case ssa.OpAMD64MOVQstore, ssa.OpAMD64MOVSSstore, ssa.OpAMD64MOVSDstore, ssa.OpAMD64MOVLstore, ssa.OpAMD64MOVWstore, ssa.OpAMD64MOVBstore, ssa.OpAMD64MOVOstore,
   805  		ssa.OpAMD64ADDQmodify, ssa.OpAMD64SUBQmodify, ssa.OpAMD64ANDQmodify, ssa.OpAMD64ORQmodify, ssa.OpAMD64XORQmodify,
   806  		ssa.OpAMD64ADDLmodify, ssa.OpAMD64SUBLmodify, ssa.OpAMD64ANDLmodify, ssa.OpAMD64ORLmodify, ssa.OpAMD64XORLmodify,
   807  		ssa.OpAMD64MOVBEQstore, ssa.OpAMD64MOVBELstore, ssa.OpAMD64MOVBEWstore:
   808  		p := s.Prog(v.Op.Asm())
   809  		p.From.Type = obj.TYPE_REG
   810  		p.From.Reg = v.Args[1].Reg()
   811  		p.To.Type = obj.TYPE_MEM
   812  		p.To.Reg = v.Args[0].Reg()
   813  		ssagen.AddAux(&p.To, v)
   814  	case ssa.OpAMD64MOVBstoreidx1, ssa.OpAMD64MOVWstoreidx1, ssa.OpAMD64MOVLstoreidx1, ssa.OpAMD64MOVQstoreidx1, ssa.OpAMD64MOVSSstoreidx1, ssa.OpAMD64MOVSDstoreidx1,
   815  		ssa.OpAMD64MOVQstoreidx8, ssa.OpAMD64MOVSDstoreidx8, ssa.OpAMD64MOVLstoreidx8, ssa.OpAMD64MOVSSstoreidx4, ssa.OpAMD64MOVLstoreidx4, ssa.OpAMD64MOVWstoreidx2,
   816  		ssa.OpAMD64ADDLmodifyidx1, ssa.OpAMD64ADDLmodifyidx4, ssa.OpAMD64ADDLmodifyidx8, ssa.OpAMD64ADDQmodifyidx1, ssa.OpAMD64ADDQmodifyidx8,
   817  		ssa.OpAMD64SUBLmodifyidx1, ssa.OpAMD64SUBLmodifyidx4, ssa.OpAMD64SUBLmodifyidx8, ssa.OpAMD64SUBQmodifyidx1, ssa.OpAMD64SUBQmodifyidx8,
   818  		ssa.OpAMD64ANDLmodifyidx1, ssa.OpAMD64ANDLmodifyidx4, ssa.OpAMD64ANDLmodifyidx8, ssa.OpAMD64ANDQmodifyidx1, ssa.OpAMD64ANDQmodifyidx8,
   819  		ssa.OpAMD64ORLmodifyidx1, ssa.OpAMD64ORLmodifyidx4, ssa.OpAMD64ORLmodifyidx8, ssa.OpAMD64ORQmodifyidx1, ssa.OpAMD64ORQmodifyidx8,
   820  		ssa.OpAMD64XORLmodifyidx1, ssa.OpAMD64XORLmodifyidx4, ssa.OpAMD64XORLmodifyidx8, ssa.OpAMD64XORQmodifyidx1, ssa.OpAMD64XORQmodifyidx8,
   821  		ssa.OpAMD64MOVBEWstoreidx1, ssa.OpAMD64MOVBEWstoreidx2, ssa.OpAMD64MOVBELstoreidx1, ssa.OpAMD64MOVBELstoreidx4, ssa.OpAMD64MOVBELstoreidx8, ssa.OpAMD64MOVBEQstoreidx1, ssa.OpAMD64MOVBEQstoreidx8:
   822  		p := s.Prog(v.Op.Asm())
   823  		p.From.Type = obj.TYPE_REG
   824  		p.From.Reg = v.Args[2].Reg()
   825  		memIdx(&p.To, v)
   826  		ssagen.AddAux(&p.To, v)
   827  	case ssa.OpAMD64ADDQconstmodify, ssa.OpAMD64ADDLconstmodify:
   828  		sc := v.AuxValAndOff()
   829  		off := sc.Off64()
   830  		val := sc.Val()
   831  		if val == 1 || val == -1 {
   832  			var asm obj.As
   833  			if v.Op == ssa.OpAMD64ADDQconstmodify {
   834  				if val == 1 {
   835  					asm = x86.AINCQ
   836  				} else {
   837  					asm = x86.ADECQ
   838  				}
   839  			} else {
   840  				if val == 1 {
   841  					asm = x86.AINCL
   842  				} else {
   843  					asm = x86.ADECL
   844  				}
   845  			}
   846  			p := s.Prog(asm)
   847  			p.To.Type = obj.TYPE_MEM
   848  			p.To.Reg = v.Args[0].Reg()
   849  			ssagen.AddAux2(&p.To, v, off)
   850  			break
   851  		}
   852  		fallthrough
   853  	case ssa.OpAMD64ANDQconstmodify, ssa.OpAMD64ANDLconstmodify, ssa.OpAMD64ORQconstmodify, ssa.OpAMD64ORLconstmodify,
   854  		ssa.OpAMD64XORQconstmodify, ssa.OpAMD64XORLconstmodify,
   855  		ssa.OpAMD64BTSQconstmodify, ssa.OpAMD64BTRQconstmodify, ssa.OpAMD64BTCQconstmodify:
   856  		sc := v.AuxValAndOff()
   857  		off := sc.Off64()
   858  		val := sc.Val64()
   859  		p := s.Prog(v.Op.Asm())
   860  		p.From.Type = obj.TYPE_CONST
   861  		p.From.Offset = val
   862  		p.To.Type = obj.TYPE_MEM
   863  		p.To.Reg = v.Args[0].Reg()
   864  		ssagen.AddAux2(&p.To, v, off)
   865  
   866  	case ssa.OpAMD64MOVQstoreconst, ssa.OpAMD64MOVLstoreconst, ssa.OpAMD64MOVWstoreconst, ssa.OpAMD64MOVBstoreconst:
   867  		p := s.Prog(v.Op.Asm())
   868  		p.From.Type = obj.TYPE_CONST
   869  		sc := v.AuxValAndOff()
   870  		p.From.Offset = sc.Val64()
   871  		p.To.Type = obj.TYPE_MEM
   872  		p.To.Reg = v.Args[0].Reg()
   873  		ssagen.AddAux2(&p.To, v, sc.Off64())
   874  	case ssa.OpAMD64MOVOstoreconst:
   875  		sc := v.AuxValAndOff()
   876  		if sc.Val() != 0 {
   877  			v.Fatalf("MOVO for non zero constants not implemented: %s", v.LongString())
   878  		}
   879  
   880  		if s.ABI != obj.ABIInternal {
   881  			// zero X15 manually
   882  			opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
   883  		}
   884  		p := s.Prog(v.Op.Asm())
   885  		p.From.Type = obj.TYPE_REG
   886  		p.From.Reg = x86.REG_X15
   887  		p.To.Type = obj.TYPE_MEM
   888  		p.To.Reg = v.Args[0].Reg()
   889  		ssagen.AddAux2(&p.To, v, sc.Off64())
   890  
   891  	case ssa.OpAMD64MOVQstoreconstidx1, ssa.OpAMD64MOVQstoreconstidx8, ssa.OpAMD64MOVLstoreconstidx1, ssa.OpAMD64MOVLstoreconstidx4, ssa.OpAMD64MOVWstoreconstidx1, ssa.OpAMD64MOVWstoreconstidx2, ssa.OpAMD64MOVBstoreconstidx1,
   892  		ssa.OpAMD64ADDLconstmodifyidx1, ssa.OpAMD64ADDLconstmodifyidx4, ssa.OpAMD64ADDLconstmodifyidx8, ssa.OpAMD64ADDQconstmodifyidx1, ssa.OpAMD64ADDQconstmodifyidx8,
   893  		ssa.OpAMD64ANDLconstmodifyidx1, ssa.OpAMD64ANDLconstmodifyidx4, ssa.OpAMD64ANDLconstmodifyidx8, ssa.OpAMD64ANDQconstmodifyidx1, ssa.OpAMD64ANDQconstmodifyidx8,
   894  		ssa.OpAMD64ORLconstmodifyidx1, ssa.OpAMD64ORLconstmodifyidx4, ssa.OpAMD64ORLconstmodifyidx8, ssa.OpAMD64ORQconstmodifyidx1, ssa.OpAMD64ORQconstmodifyidx8,
   895  		ssa.OpAMD64XORLconstmodifyidx1, ssa.OpAMD64XORLconstmodifyidx4, ssa.OpAMD64XORLconstmodifyidx8, ssa.OpAMD64XORQconstmodifyidx1, ssa.OpAMD64XORQconstmodifyidx8:
   896  		p := s.Prog(v.Op.Asm())
   897  		p.From.Type = obj.TYPE_CONST
   898  		sc := v.AuxValAndOff()
   899  		p.From.Offset = sc.Val64()
   900  		switch {
   901  		case p.As == x86.AADDQ && p.From.Offset == 1:
   902  			p.As = x86.AINCQ
   903  			p.From.Type = obj.TYPE_NONE
   904  		case p.As == x86.AADDQ && p.From.Offset == -1:
   905  			p.As = x86.ADECQ
   906  			p.From.Type = obj.TYPE_NONE
   907  		case p.As == x86.AADDL && p.From.Offset == 1:
   908  			p.As = x86.AINCL
   909  			p.From.Type = obj.TYPE_NONE
   910  		case p.As == x86.AADDL && p.From.Offset == -1:
   911  			p.As = x86.ADECL
   912  			p.From.Type = obj.TYPE_NONE
   913  		}
   914  		memIdx(&p.To, v)
   915  		ssagen.AddAux2(&p.To, v, sc.Off64())
   916  	case ssa.OpAMD64MOVLQSX, ssa.OpAMD64MOVWQSX, ssa.OpAMD64MOVBQSX, ssa.OpAMD64MOVLQZX, ssa.OpAMD64MOVWQZX, ssa.OpAMD64MOVBQZX,
   917  		ssa.OpAMD64CVTTSS2SL, ssa.OpAMD64CVTTSD2SL, ssa.OpAMD64CVTTSS2SQ, ssa.OpAMD64CVTTSD2SQ,
   918  		ssa.OpAMD64CVTSS2SD, ssa.OpAMD64CVTSD2SS, ssa.OpAMD64VPBROADCASTB, ssa.OpAMD64PMOVMSKB:
   919  		opregreg(s, v.Op.Asm(), v.Reg(), v.Args[0].Reg())
   920  	case ssa.OpAMD64CVTSL2SD, ssa.OpAMD64CVTSQ2SD, ssa.OpAMD64CVTSQ2SS, ssa.OpAMD64CVTSL2SS:
   921  		r := v.Reg()
   922  		// Break false dependency on destination register.
   923  		opregreg(s, x86.AXORPS, r, r)
   924  		opregreg(s, v.Op.Asm(), r, v.Args[0].Reg())
   925  	case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i, ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
   926  		var p *obj.Prog
   927  		switch v.Op {
   928  		case ssa.OpAMD64MOVQi2f, ssa.OpAMD64MOVQf2i:
   929  			p = s.Prog(x86.AMOVQ)
   930  		case ssa.OpAMD64MOVLi2f, ssa.OpAMD64MOVLf2i:
   931  			p = s.Prog(x86.AMOVL)
   932  		}
   933  		p.From.Type = obj.TYPE_REG
   934  		p.From.Reg = v.Args[0].Reg()
   935  		p.To.Type = obj.TYPE_REG
   936  		p.To.Reg = v.Reg()
   937  	case ssa.OpAMD64ADDQload, ssa.OpAMD64ADDLload, ssa.OpAMD64SUBQload, ssa.OpAMD64SUBLload,
   938  		ssa.OpAMD64ANDQload, ssa.OpAMD64ANDLload, ssa.OpAMD64ORQload, ssa.OpAMD64ORLload,
   939  		ssa.OpAMD64XORQload, ssa.OpAMD64XORLload, ssa.OpAMD64ADDSDload, ssa.OpAMD64ADDSSload,
   940  		ssa.OpAMD64SUBSDload, ssa.OpAMD64SUBSSload, ssa.OpAMD64MULSDload, ssa.OpAMD64MULSSload,
   941  		ssa.OpAMD64DIVSDload, ssa.OpAMD64DIVSSload:
   942  		p := s.Prog(v.Op.Asm())
   943  		p.From.Type = obj.TYPE_MEM
   944  		p.From.Reg = v.Args[1].Reg()
   945  		ssagen.AddAux(&p.From, v)
   946  		p.To.Type = obj.TYPE_REG
   947  		p.To.Reg = v.Reg()
   948  	case ssa.OpAMD64ADDLloadidx1, ssa.OpAMD64ADDLloadidx4, ssa.OpAMD64ADDLloadidx8, ssa.OpAMD64ADDQloadidx1, ssa.OpAMD64ADDQloadidx8,
   949  		ssa.OpAMD64SUBLloadidx1, ssa.OpAMD64SUBLloadidx4, ssa.OpAMD64SUBLloadidx8, ssa.OpAMD64SUBQloadidx1, ssa.OpAMD64SUBQloadidx8,
   950  		ssa.OpAMD64ANDLloadidx1, ssa.OpAMD64ANDLloadidx4, ssa.OpAMD64ANDLloadidx8, ssa.OpAMD64ANDQloadidx1, ssa.OpAMD64ANDQloadidx8,
   951  		ssa.OpAMD64ORLloadidx1, ssa.OpAMD64ORLloadidx4, ssa.OpAMD64ORLloadidx8, ssa.OpAMD64ORQloadidx1, ssa.OpAMD64ORQloadidx8,
   952  		ssa.OpAMD64XORLloadidx1, ssa.OpAMD64XORLloadidx4, ssa.OpAMD64XORLloadidx8, ssa.OpAMD64XORQloadidx1, ssa.OpAMD64XORQloadidx8,
   953  		ssa.OpAMD64ADDSSloadidx1, ssa.OpAMD64ADDSSloadidx4, ssa.OpAMD64ADDSDloadidx1, ssa.OpAMD64ADDSDloadidx8,
   954  		ssa.OpAMD64SUBSSloadidx1, ssa.OpAMD64SUBSSloadidx4, ssa.OpAMD64SUBSDloadidx1, ssa.OpAMD64SUBSDloadidx8,
   955  		ssa.OpAMD64MULSSloadidx1, ssa.OpAMD64MULSSloadidx4, ssa.OpAMD64MULSDloadidx1, ssa.OpAMD64MULSDloadidx8,
   956  		ssa.OpAMD64DIVSSloadidx1, ssa.OpAMD64DIVSSloadidx4, ssa.OpAMD64DIVSDloadidx1, ssa.OpAMD64DIVSDloadidx8:
   957  		p := s.Prog(v.Op.Asm())
   958  
   959  		r, i := v.Args[1].Reg(), v.Args[2].Reg()
   960  		p.From.Type = obj.TYPE_MEM
   961  		p.From.Scale = v.Op.Scale()
   962  		if p.From.Scale == 1 && i == x86.REG_SP {
   963  			r, i = i, r
   964  		}
   965  		p.From.Reg = r
   966  		p.From.Index = i
   967  
   968  		ssagen.AddAux(&p.From, v)
   969  		p.To.Type = obj.TYPE_REG
   970  		p.To.Reg = v.Reg()
   971  
   972  	case ssa.OpAMD64LoweredZero:
   973  		if s.ABI != obj.ABIInternal {
   974  			// zero X15 manually
   975  			opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
   976  		}
   977  		ptrReg := v.Args[0].Reg()
   978  		n := v.AuxInt
   979  		if n < 16 {
   980  			v.Fatalf("Zero too small %d", n)
   981  		}
   982  		zero16 := func(off int64) {
   983  			zero16(s, ptrReg, off)
   984  		}
   985  
   986  		// Generate zeroing instructions.
   987  		var off int64
   988  		for n >= 16 {
   989  			zero16(off)
   990  			off += 16
   991  			n -= 16
   992  		}
   993  		if n != 0 {
   994  			// use partially overlapped write.
   995  			// TODO: n <= 8, use smaller write?
   996  			zero16(off + n - 16)
   997  		}
   998  
   999  	case ssa.OpAMD64LoweredZeroLoop:
  1000  		if s.ABI != obj.ABIInternal {
  1001  			// zero X15 manually
  1002  			opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
  1003  		}
  1004  		ptrReg := v.Args[0].Reg()
  1005  		countReg := v.RegTmp()
  1006  		n := v.AuxInt
  1007  		loopSize := int64(64)
  1008  		if n < 3*loopSize {
  1009  			// - a loop count of 0 won't work.
  1010  			// - a loop count of 1 is useless.
  1011  			// - a loop count of 2 is a code size ~tie
  1012  			//     4 instructions to implement the loop
  1013  			//     4 instructions in the loop body
  1014  			//   vs
  1015  			//     8 instructions in the straightline code
  1016  			//   Might as well use straightline code.
  1017  			v.Fatalf("ZeroLoop size too small %d", n)
  1018  		}
  1019  		zero16 := func(off int64) {
  1020  			zero16(s, ptrReg, off)
  1021  		}
  1022  
  1023  		// Put iteration count in a register.
  1024  		//   MOVL    $n, countReg
  1025  		p := s.Prog(x86.AMOVL)
  1026  		p.From.Type = obj.TYPE_CONST
  1027  		p.From.Offset = n / loopSize
  1028  		p.To.Type = obj.TYPE_REG
  1029  		p.To.Reg = countReg
  1030  		cntInit := p
  1031  
  1032  		// Zero loopSize bytes starting at ptrReg.
  1033  		for i := range loopSize / 16 {
  1034  			zero16(i * 16)
  1035  		}
  1036  		//   ADDQ    $loopSize, ptrReg
  1037  		p = s.Prog(x86.AADDQ)
  1038  		p.From.Type = obj.TYPE_CONST
  1039  		p.From.Offset = loopSize
  1040  		p.To.Type = obj.TYPE_REG
  1041  		p.To.Reg = ptrReg
  1042  		//   DECL    countReg
  1043  		p = s.Prog(x86.ADECL)
  1044  		p.To.Type = obj.TYPE_REG
  1045  		p.To.Reg = countReg
  1046  		// Jump to first instruction in loop if we're not done yet.
  1047  		//   JNE     head
  1048  		p = s.Prog(x86.AJNE)
  1049  		p.To.Type = obj.TYPE_BRANCH
  1050  		p.To.SetTarget(cntInit.Link)
  1051  
  1052  		// Multiples of the loop size are now done.
  1053  		n %= loopSize
  1054  
  1055  		// Write any fractional portion.
  1056  		var off int64
  1057  		for n >= 16 {
  1058  			zero16(off)
  1059  			off += 16
  1060  			n -= 16
  1061  		}
  1062  		if n != 0 {
  1063  			// Use partially-overlapping write.
  1064  			// TODO: n <= 8, use smaller write?
  1065  			zero16(off + n - 16)
  1066  		}
  1067  
  1068  	case ssa.OpAMD64LoweredMove:
  1069  		dstReg := v.Args[0].Reg()
  1070  		srcReg := v.Args[1].Reg()
  1071  		if dstReg == srcReg {
  1072  			break
  1073  		}
  1074  		tmpReg := int16(x86.REG_X14)
  1075  		n := v.AuxInt
  1076  		if n < 16 {
  1077  			v.Fatalf("Move too small %d", n)
  1078  		}
  1079  		// move 16 bytes from srcReg+off to dstReg+off.
  1080  		move16 := func(off int64) {
  1081  			move16(s, srcReg, dstReg, tmpReg, off)
  1082  		}
  1083  
  1084  		// Generate copying instructions.
  1085  		var off int64
  1086  		for n >= 16 {
  1087  			move16(off)
  1088  			off += 16
  1089  			n -= 16
  1090  		}
  1091  		if n != 0 {
  1092  			// use partially overlapped read/write.
  1093  			// TODO: use smaller operations when we can?
  1094  			move16(off + n - 16)
  1095  		}
  1096  
  1097  	case ssa.OpAMD64LoweredMoveLoop:
  1098  		dstReg := v.Args[0].Reg()
  1099  		srcReg := v.Args[1].Reg()
  1100  		if dstReg == srcReg {
  1101  			break
  1102  		}
  1103  		countReg := v.RegTmp()
  1104  		tmpReg := int16(x86.REG_X14)
  1105  		n := v.AuxInt
  1106  		loopSize := int64(64)
  1107  		if n < 3*loopSize {
  1108  			// - a loop count of 0 won't work.
  1109  			// - a loop count of 1 is useless.
  1110  			// - a loop count of 2 is a code size ~tie
  1111  			//     4 instructions to implement the loop
  1112  			//     4 instructions in the loop body
  1113  			//   vs
  1114  			//     8 instructions in the straightline code
  1115  			//   Might as well use straightline code.
  1116  			v.Fatalf("ZeroLoop size too small %d", n)
  1117  		}
  1118  		// move 16 bytes from srcReg+off to dstReg+off.
  1119  		move16 := func(off int64) {
  1120  			move16(s, srcReg, dstReg, tmpReg, off)
  1121  		}
  1122  
  1123  		// Put iteration count in a register.
  1124  		//   MOVL    $n, countReg
  1125  		p := s.Prog(x86.AMOVL)
  1126  		p.From.Type = obj.TYPE_CONST
  1127  		p.From.Offset = n / loopSize
  1128  		p.To.Type = obj.TYPE_REG
  1129  		p.To.Reg = countReg
  1130  		cntInit := p
  1131  
  1132  		// Copy loopSize bytes starting at srcReg to dstReg.
  1133  		for i := range loopSize / 16 {
  1134  			move16(i * 16)
  1135  		}
  1136  		//   ADDQ    $loopSize, srcReg
  1137  		p = s.Prog(x86.AADDQ)
  1138  		p.From.Type = obj.TYPE_CONST
  1139  		p.From.Offset = loopSize
  1140  		p.To.Type = obj.TYPE_REG
  1141  		p.To.Reg = srcReg
  1142  		//   ADDQ    $loopSize, dstReg
  1143  		p = s.Prog(x86.AADDQ)
  1144  		p.From.Type = obj.TYPE_CONST
  1145  		p.From.Offset = loopSize
  1146  		p.To.Type = obj.TYPE_REG
  1147  		p.To.Reg = dstReg
  1148  		//   DECL    countReg
  1149  		p = s.Prog(x86.ADECL)
  1150  		p.To.Type = obj.TYPE_REG
  1151  		p.To.Reg = countReg
  1152  		// Jump to loop header if we're not done yet.
  1153  		//   JNE     head
  1154  		p = s.Prog(x86.AJNE)
  1155  		p.To.Type = obj.TYPE_BRANCH
  1156  		p.To.SetTarget(cntInit.Link)
  1157  
  1158  		// Multiples of the loop size are now done.
  1159  		n %= loopSize
  1160  
  1161  		// Copy any fractional portion.
  1162  		var off int64
  1163  		for n >= 16 {
  1164  			move16(off)
  1165  			off += 16
  1166  			n -= 16
  1167  		}
  1168  		if n != 0 {
  1169  			// Use partially-overlapping copy.
  1170  			move16(off + n - 16)
  1171  		}
  1172  
  1173  	case ssa.OpCopy: // TODO: use MOVQreg for reg->reg copies instead of OpCopy?
  1174  		if v.Type.IsMemory() {
  1175  			return
  1176  		}
  1177  		x := v.Args[0].Reg()
  1178  		y := v.Reg()
  1179  		if x != y {
  1180  			opregreg(s, moveByType(v.Type), y, x)
  1181  		}
  1182  	case ssa.OpLoadReg:
  1183  		if v.Type.IsFlags() {
  1184  			v.Fatalf("load flags not implemented: %v", v.LongString())
  1185  			return
  1186  		}
  1187  		p := s.Prog(loadByType(v.Type))
  1188  		ssagen.AddrAuto(&p.From, v.Args[0])
  1189  		p.To.Type = obj.TYPE_REG
  1190  		p.To.Reg = v.Reg()
  1191  
  1192  	case ssa.OpStoreReg:
  1193  		if v.Type.IsFlags() {
  1194  			v.Fatalf("store flags not implemented: %v", v.LongString())
  1195  			return
  1196  		}
  1197  		p := s.Prog(storeByType(v.Type))
  1198  		p.From.Type = obj.TYPE_REG
  1199  		p.From.Reg = v.Args[0].Reg()
  1200  		ssagen.AddrAuto(&p.To, v)
  1201  	case ssa.OpAMD64LoweredHasCPUFeature:
  1202  		p := s.Prog(x86.AMOVBLZX)
  1203  		p.From.Type = obj.TYPE_MEM
  1204  		ssagen.AddAux(&p.From, v)
  1205  		p.To.Type = obj.TYPE_REG
  1206  		p.To.Reg = v.Reg()
  1207  	case ssa.OpArgIntReg, ssa.OpArgFloatReg:
  1208  		// The assembler needs to wrap the entry safepoint/stack growth code with spill/unspill
  1209  		// The loop only runs once.
  1210  		for _, ap := range v.Block.Func.RegArgs {
  1211  			// Pass the spill/unspill information along to the assembler, offset by size of return PC pushed on stack.
  1212  			addr := ssagen.SpillSlotAddr(ap, x86.REG_SP, v.Block.Func.Config.PtrSize)
  1213  			s.FuncInfo().AddSpill(
  1214  				obj.RegSpill{Reg: ap.Reg, Addr: addr, Unspill: loadByType(ap.Type), Spill: storeByType(ap.Type)})
  1215  		}
  1216  		v.Block.Func.RegArgs = nil
  1217  		ssagen.CheckArgReg(v)
  1218  	case ssa.OpAMD64LoweredGetClosurePtr:
  1219  		// Closure pointer is DX.
  1220  		ssagen.CheckLoweredGetClosurePtr(v)
  1221  	case ssa.OpAMD64LoweredGetG:
  1222  		if s.ABI == obj.ABIInternal {
  1223  			v.Fatalf("LoweredGetG should not appear in ABIInternal")
  1224  		}
  1225  		r := v.Reg()
  1226  		getgFromTLS(s, r)
  1227  	case ssa.OpAMD64CALLstatic, ssa.OpAMD64CALLtail:
  1228  		if s.ABI == obj.ABI0 && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABIInternal {
  1229  			// zeroing X15 when entering ABIInternal from ABI0
  1230  			opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
  1231  			// set G register from TLS
  1232  			getgFromTLS(s, x86.REG_R14)
  1233  		}
  1234  		if v.Op == ssa.OpAMD64CALLtail {
  1235  			s.TailCall(v)
  1236  			break
  1237  		}
  1238  		s.Call(v)
  1239  		if s.ABI == obj.ABIInternal && v.Aux.(*ssa.AuxCall).Fn.ABI() == obj.ABI0 {
  1240  			// zeroing X15 when entering ABIInternal from ABI0
  1241  			opregreg(s, x86.AXORPS, x86.REG_X15, x86.REG_X15)
  1242  			// set G register from TLS
  1243  			getgFromTLS(s, x86.REG_R14)
  1244  		}
  1245  	case ssa.OpAMD64CALLclosure, ssa.OpAMD64CALLinter:
  1246  		s.Call(v)
  1247  
  1248  	case ssa.OpAMD64LoweredGetCallerPC:
  1249  		p := s.Prog(x86.AMOVQ)
  1250  		p.From.Type = obj.TYPE_MEM
  1251  		p.From.Offset = -8 // PC is stored 8 bytes below first parameter.
  1252  		p.From.Name = obj.NAME_PARAM
  1253  		p.To.Type = obj.TYPE_REG
  1254  		p.To.Reg = v.Reg()
  1255  
  1256  	case ssa.OpAMD64LoweredGetCallerSP:
  1257  		// caller's SP is the address of the first arg
  1258  		mov := x86.AMOVQ
  1259  		if types.PtrSize == 4 {
  1260  			mov = x86.AMOVL
  1261  		}
  1262  		p := s.Prog(mov)
  1263  		p.From.Type = obj.TYPE_ADDR
  1264  		p.From.Offset = -base.Ctxt.Arch.FixedFrameSize // 0 on amd64, just to be consistent with other architectures
  1265  		p.From.Name = obj.NAME_PARAM
  1266  		p.To.Type = obj.TYPE_REG
  1267  		p.To.Reg = v.Reg()
  1268  
  1269  	case ssa.OpAMD64LoweredWB:
  1270  		p := s.Prog(obj.ACALL)
  1271  		p.To.Type = obj.TYPE_MEM
  1272  		p.To.Name = obj.NAME_EXTERN
  1273  		// AuxInt encodes how many buffer entries we need.
  1274  		p.To.Sym = ir.Syms.GCWriteBarrier[v.AuxInt-1]
  1275  
  1276  	case ssa.OpAMD64LoweredPanicBoundsRR, ssa.OpAMD64LoweredPanicBoundsRC, ssa.OpAMD64LoweredPanicBoundsCR, ssa.OpAMD64LoweredPanicBoundsCC:
  1277  		// Compute the constant we put in the PCData entry for this call.
  1278  		code, signed := ssa.BoundsKind(v.AuxInt).Code()
  1279  		xIsReg := false
  1280  		yIsReg := false
  1281  		xVal := 0
  1282  		yVal := 0
  1283  		switch v.Op {
  1284  		case ssa.OpAMD64LoweredPanicBoundsRR:
  1285  			xIsReg = true
  1286  			xVal = int(v.Args[0].Reg() - x86.REG_AX)
  1287  			yIsReg = true
  1288  			yVal = int(v.Args[1].Reg() - x86.REG_AX)
  1289  		case ssa.OpAMD64LoweredPanicBoundsRC:
  1290  			xIsReg = true
  1291  			xVal = int(v.Args[0].Reg() - x86.REG_AX)
  1292  			c := v.Aux.(ssa.PanicBoundsC).C
  1293  			if c >= 0 && c <= abi.BoundsMaxConst {
  1294  				yVal = int(c)
  1295  			} else {
  1296  				// Move constant to a register
  1297  				yIsReg = true
  1298  				if yVal == xVal {
  1299  					yVal = 1
  1300  				}
  1301  				p := s.Prog(x86.AMOVQ)
  1302  				p.From.Type = obj.TYPE_CONST
  1303  				p.From.Offset = c
  1304  				p.To.Type = obj.TYPE_REG
  1305  				p.To.Reg = x86.REG_AX + int16(yVal)
  1306  			}
  1307  		case ssa.OpAMD64LoweredPanicBoundsCR:
  1308  			yIsReg = true
  1309  			yVal = int(v.Args[0].Reg() - x86.REG_AX)
  1310  			c := v.Aux.(ssa.PanicBoundsC).C
  1311  			if c >= 0 && c <= abi.BoundsMaxConst {
  1312  				xVal = int(c)
  1313  			} else {
  1314  				// Move constant to a register
  1315  				xIsReg = true
  1316  				if xVal == yVal {
  1317  					xVal = 1
  1318  				}
  1319  				p := s.Prog(x86.AMOVQ)
  1320  				p.From.Type = obj.TYPE_CONST
  1321  				p.From.Offset = c
  1322  				p.To.Type = obj.TYPE_REG
  1323  				p.To.Reg = x86.REG_AX + int16(xVal)
  1324  			}
  1325  		case ssa.OpAMD64LoweredPanicBoundsCC:
  1326  			c := v.Aux.(ssa.PanicBoundsCC).Cx
  1327  			if c >= 0 && c <= abi.BoundsMaxConst {
  1328  				xVal = int(c)
  1329  			} else {
  1330  				// Move constant to a register
  1331  				xIsReg = true
  1332  				p := s.Prog(x86.AMOVQ)
  1333  				p.From.Type = obj.TYPE_CONST
  1334  				p.From.Offset = c
  1335  				p.To.Type = obj.TYPE_REG
  1336  				p.To.Reg = x86.REG_AX + int16(xVal)
  1337  			}
  1338  			c = v.Aux.(ssa.PanicBoundsCC).Cy
  1339  			if c >= 0 && c <= abi.BoundsMaxConst {
  1340  				yVal = int(c)
  1341  			} else {
  1342  				// Move constant to a register
  1343  				yIsReg = true
  1344  				yVal = 1
  1345  				p := s.Prog(x86.AMOVQ)
  1346  				p.From.Type = obj.TYPE_CONST
  1347  				p.From.Offset = c
  1348  				p.To.Type = obj.TYPE_REG
  1349  				p.To.Reg = x86.REG_AX + int16(yVal)
  1350  			}
  1351  		}
  1352  		c := abi.BoundsEncode(code, signed, xIsReg, yIsReg, xVal, yVal)
  1353  
  1354  		p := s.Prog(obj.APCDATA)
  1355  		p.From.SetConst(abi.PCDATA_PanicBounds)
  1356  		p.To.SetConst(int64(c))
  1357  		p = s.Prog(obj.ACALL)
  1358  		p.To.Type = obj.TYPE_MEM
  1359  		p.To.Name = obj.NAME_EXTERN
  1360  		p.To.Sym = ir.Syms.PanicBounds
  1361  
  1362  	case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL,
  1363  		ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
  1364  		ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL:
  1365  		p := s.Prog(v.Op.Asm())
  1366  		p.To.Type = obj.TYPE_REG
  1367  		p.To.Reg = v.Reg()
  1368  
  1369  	case ssa.OpAMD64NEGLflags:
  1370  		p := s.Prog(v.Op.Asm())
  1371  		p.To.Type = obj.TYPE_REG
  1372  		p.To.Reg = v.Reg0()
  1373  
  1374  	case ssa.OpAMD64ADDQconstflags, ssa.OpAMD64ADDLconstflags:
  1375  		p := s.Prog(v.Op.Asm())
  1376  		p.From.Type = obj.TYPE_CONST
  1377  		p.From.Offset = v.AuxInt
  1378  		// Note: the inc/dec instructions do not modify
  1379  		// the carry flag like add$1 / sub$1 do.
  1380  		// We currently never use the CF/OF flags from
  1381  		// these instructions, so that is ok.
  1382  		switch {
  1383  		case p.As == x86.AADDQ && p.From.Offset == 1:
  1384  			p.As = x86.AINCQ
  1385  			p.From.Type = obj.TYPE_NONE
  1386  		case p.As == x86.AADDQ && p.From.Offset == -1:
  1387  			p.As = x86.ADECQ
  1388  			p.From.Type = obj.TYPE_NONE
  1389  		case p.As == x86.AADDL && p.From.Offset == 1:
  1390  			p.As = x86.AINCL
  1391  			p.From.Type = obj.TYPE_NONE
  1392  		case p.As == x86.AADDL && p.From.Offset == -1:
  1393  			p.As = x86.ADECL
  1394  			p.From.Type = obj.TYPE_NONE
  1395  		}
  1396  		p.To.Type = obj.TYPE_REG
  1397  		p.To.Reg = v.Reg0()
  1398  
  1399  	case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS:
  1400  		p := s.Prog(v.Op.Asm())
  1401  		p.From.Type = obj.TYPE_REG
  1402  		p.From.Reg = v.Args[0].Reg()
  1403  		p.To.Type = obj.TYPE_REG
  1404  		switch v.Op {
  1405  		case ssa.OpAMD64BSFQ, ssa.OpAMD64BSRQ:
  1406  			p.To.Reg = v.Reg0()
  1407  		case ssa.OpAMD64BSFL, ssa.OpAMD64BSRL, ssa.OpAMD64SQRTSD, ssa.OpAMD64SQRTSS:
  1408  			p.To.Reg = v.Reg()
  1409  		}
  1410  	case ssa.OpAMD64LoweredRound32F, ssa.OpAMD64LoweredRound64F:
  1411  		// input is already rounded
  1412  	case ssa.OpAMD64ROUNDSD:
  1413  		p := s.Prog(v.Op.Asm())
  1414  		val := v.AuxInt
  1415  		// 0 means math.RoundToEven, 1 Floor, 2 Ceil, 3 Trunc
  1416  		if val < 0 || val > 3 {
  1417  			v.Fatalf("Invalid rounding mode")
  1418  		}
  1419  		p.From.Offset = val
  1420  		p.From.Type = obj.TYPE_CONST
  1421  		p.AddRestSourceReg(v.Args[0].Reg())
  1422  		p.To.Type = obj.TYPE_REG
  1423  		p.To.Reg = v.Reg()
  1424  	case ssa.OpAMD64POPCNTQ, ssa.OpAMD64POPCNTL,
  1425  		ssa.OpAMD64TZCNTQ, ssa.OpAMD64TZCNTL,
  1426  		ssa.OpAMD64LZCNTQ, ssa.OpAMD64LZCNTL:
  1427  		if v.Args[0].Reg() != v.Reg() {
  1428  			// POPCNT/TZCNT/LZCNT have a false dependency on the destination register on Intel cpus.
  1429  			// TZCNT/LZCNT problem affects pre-Skylake models. See discussion at https://gcc.gnu.org/bugzilla/show_bug.cgi?id=62011#c7.
  1430  			// Xor register with itself to break the dependency.
  1431  			opregreg(s, x86.AXORL, v.Reg(), v.Reg())
  1432  		}
  1433  		p := s.Prog(v.Op.Asm())
  1434  		p.From.Type = obj.TYPE_REG
  1435  		p.From.Reg = v.Args[0].Reg()
  1436  		p.To.Type = obj.TYPE_REG
  1437  		p.To.Reg = v.Reg()
  1438  
  1439  	case ssa.OpAMD64SETEQ, ssa.OpAMD64SETNE,
  1440  		ssa.OpAMD64SETL, ssa.OpAMD64SETLE,
  1441  		ssa.OpAMD64SETG, ssa.OpAMD64SETGE,
  1442  		ssa.OpAMD64SETGF, ssa.OpAMD64SETGEF,
  1443  		ssa.OpAMD64SETB, ssa.OpAMD64SETBE,
  1444  		ssa.OpAMD64SETORD, ssa.OpAMD64SETNAN,
  1445  		ssa.OpAMD64SETA, ssa.OpAMD64SETAE,
  1446  		ssa.OpAMD64SETO:
  1447  		p := s.Prog(v.Op.Asm())
  1448  		p.To.Type = obj.TYPE_REG
  1449  		p.To.Reg = v.Reg()
  1450  
  1451  	case ssa.OpAMD64SETEQstore, ssa.OpAMD64SETNEstore,
  1452  		ssa.OpAMD64SETLstore, ssa.OpAMD64SETLEstore,
  1453  		ssa.OpAMD64SETGstore, ssa.OpAMD64SETGEstore,
  1454  		ssa.OpAMD64SETBstore, ssa.OpAMD64SETBEstore,
  1455  		ssa.OpAMD64SETAstore, ssa.OpAMD64SETAEstore:
  1456  		p := s.Prog(v.Op.Asm())
  1457  		p.To.Type = obj.TYPE_MEM
  1458  		p.To.Reg = v.Args[0].Reg()
  1459  		ssagen.AddAux(&p.To, v)
  1460  
  1461  	case ssa.OpAMD64SETEQstoreidx1, ssa.OpAMD64SETNEstoreidx1,
  1462  		ssa.OpAMD64SETLstoreidx1, ssa.OpAMD64SETLEstoreidx1,
  1463  		ssa.OpAMD64SETGstoreidx1, ssa.OpAMD64SETGEstoreidx1,
  1464  		ssa.OpAMD64SETBstoreidx1, ssa.OpAMD64SETBEstoreidx1,
  1465  		ssa.OpAMD64SETAstoreidx1, ssa.OpAMD64SETAEstoreidx1:
  1466  		p := s.Prog(v.Op.Asm())
  1467  		memIdx(&p.To, v)
  1468  		ssagen.AddAux(&p.To, v)
  1469  
  1470  	case ssa.OpAMD64SETNEF:
  1471  		t := v.RegTmp()
  1472  		p := s.Prog(v.Op.Asm())
  1473  		p.To.Type = obj.TYPE_REG
  1474  		p.To.Reg = v.Reg()
  1475  		q := s.Prog(x86.ASETPS)
  1476  		q.To.Type = obj.TYPE_REG
  1477  		q.To.Reg = t
  1478  		// ORL avoids partial register write and is smaller than ORQ, used by old compiler
  1479  		opregreg(s, x86.AORL, v.Reg(), t)
  1480  
  1481  	case ssa.OpAMD64SETEQF:
  1482  		t := v.RegTmp()
  1483  		p := s.Prog(v.Op.Asm())
  1484  		p.To.Type = obj.TYPE_REG
  1485  		p.To.Reg = v.Reg()
  1486  		q := s.Prog(x86.ASETPC)
  1487  		q.To.Type = obj.TYPE_REG
  1488  		q.To.Reg = t
  1489  		// ANDL avoids partial register write and is smaller than ANDQ, used by old compiler
  1490  		opregreg(s, x86.AANDL, v.Reg(), t)
  1491  
  1492  	case ssa.OpAMD64InvertFlags:
  1493  		v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
  1494  	case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
  1495  		v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
  1496  	case ssa.OpAMD64AddTupleFirst32, ssa.OpAMD64AddTupleFirst64:
  1497  		v.Fatalf("AddTupleFirst* should never make it to codegen %v", v.LongString())
  1498  	case ssa.OpAMD64REPSTOSQ:
  1499  		s.Prog(x86.AREP)
  1500  		s.Prog(x86.ASTOSQ)
  1501  	case ssa.OpAMD64REPMOVSQ:
  1502  		s.Prog(x86.AREP)
  1503  		s.Prog(x86.AMOVSQ)
  1504  	case ssa.OpAMD64LoweredNilCheck:
  1505  		// Issue a load which will fault if the input is nil.
  1506  		// TODO: We currently use the 2-byte instruction TESTB AX, (reg).
  1507  		// Should we use the 3-byte TESTB $0, (reg) instead? It is larger
  1508  		// but it doesn't have false dependency on AX.
  1509  		// Or maybe allocate an output register and use MOVL (reg),reg2 ?
  1510  		// That trades clobbering flags for clobbering a register.
  1511  		p := s.Prog(x86.ATESTB)
  1512  		p.From.Type = obj.TYPE_REG
  1513  		p.From.Reg = x86.REG_AX
  1514  		p.To.Type = obj.TYPE_MEM
  1515  		p.To.Reg = v.Args[0].Reg()
  1516  		if logopt.Enabled() {
  1517  			logopt.LogOpt(v.Pos, "nilcheck", "genssa", v.Block.Func.Name)
  1518  		}
  1519  		if base.Debug.Nil != 0 && v.Pos.Line() > 1 { // v.Pos.Line()==1 in generated wrappers
  1520  			base.WarnfAt(v.Pos, "generated nil check")
  1521  		}
  1522  	case ssa.OpAMD64MOVBatomicload, ssa.OpAMD64MOVLatomicload, ssa.OpAMD64MOVQatomicload:
  1523  		p := s.Prog(v.Op.Asm())
  1524  		p.From.Type = obj.TYPE_MEM
  1525  		p.From.Reg = v.Args[0].Reg()
  1526  		ssagen.AddAux(&p.From, v)
  1527  		p.To.Type = obj.TYPE_REG
  1528  		p.To.Reg = v.Reg0()
  1529  	case ssa.OpAMD64XCHGB, ssa.OpAMD64XCHGL, ssa.OpAMD64XCHGQ:
  1530  		p := s.Prog(v.Op.Asm())
  1531  		p.From.Type = obj.TYPE_REG
  1532  		p.From.Reg = v.Reg0()
  1533  		p.To.Type = obj.TYPE_MEM
  1534  		p.To.Reg = v.Args[1].Reg()
  1535  		ssagen.AddAux(&p.To, v)
  1536  	case ssa.OpAMD64XADDLlock, ssa.OpAMD64XADDQlock:
  1537  		s.Prog(x86.ALOCK)
  1538  		p := s.Prog(v.Op.Asm())
  1539  		p.From.Type = obj.TYPE_REG
  1540  		p.From.Reg = v.Reg0()
  1541  		p.To.Type = obj.TYPE_MEM
  1542  		p.To.Reg = v.Args[1].Reg()
  1543  		ssagen.AddAux(&p.To, v)
  1544  	case ssa.OpAMD64CMPXCHGLlock, ssa.OpAMD64CMPXCHGQlock:
  1545  		if v.Args[1].Reg() != x86.REG_AX {
  1546  			v.Fatalf("input[1] not in AX %s", v.LongString())
  1547  		}
  1548  		s.Prog(x86.ALOCK)
  1549  		p := s.Prog(v.Op.Asm())
  1550  		p.From.Type = obj.TYPE_REG
  1551  		p.From.Reg = v.Args[2].Reg()
  1552  		p.To.Type = obj.TYPE_MEM
  1553  		p.To.Reg = v.Args[0].Reg()
  1554  		ssagen.AddAux(&p.To, v)
  1555  		p = s.Prog(x86.ASETEQ)
  1556  		p.To.Type = obj.TYPE_REG
  1557  		p.To.Reg = v.Reg0()
  1558  	case ssa.OpAMD64ANDBlock, ssa.OpAMD64ANDLlock, ssa.OpAMD64ANDQlock, ssa.OpAMD64ORBlock, ssa.OpAMD64ORLlock, ssa.OpAMD64ORQlock:
  1559  		// Atomic memory operations that don't need to return the old value.
  1560  		s.Prog(x86.ALOCK)
  1561  		p := s.Prog(v.Op.Asm())
  1562  		p.From.Type = obj.TYPE_REG
  1563  		p.From.Reg = v.Args[1].Reg()
  1564  		p.To.Type = obj.TYPE_MEM
  1565  		p.To.Reg = v.Args[0].Reg()
  1566  		ssagen.AddAux(&p.To, v)
  1567  	case ssa.OpAMD64LoweredAtomicAnd64, ssa.OpAMD64LoweredAtomicOr64, ssa.OpAMD64LoweredAtomicAnd32, ssa.OpAMD64LoweredAtomicOr32:
  1568  		// Atomic memory operations that need to return the old value.
  1569  		// We need to do these with compare-and-exchange to get access to the old value.
  1570  		// loop:
  1571  		// MOVQ mask, tmp
  1572  		// MOVQ (addr), AX
  1573  		// ANDQ AX, tmp
  1574  		// LOCK CMPXCHGQ tmp, (addr) : note that AX is implicit old value to compare against
  1575  		// JNE loop
  1576  		// : result in AX
  1577  		mov := x86.AMOVQ
  1578  		op := x86.AANDQ
  1579  		cmpxchg := x86.ACMPXCHGQ
  1580  		switch v.Op {
  1581  		case ssa.OpAMD64LoweredAtomicOr64:
  1582  			op = x86.AORQ
  1583  		case ssa.OpAMD64LoweredAtomicAnd32:
  1584  			mov = x86.AMOVL
  1585  			op = x86.AANDL
  1586  			cmpxchg = x86.ACMPXCHGL
  1587  		case ssa.OpAMD64LoweredAtomicOr32:
  1588  			mov = x86.AMOVL
  1589  			op = x86.AORL
  1590  			cmpxchg = x86.ACMPXCHGL
  1591  		}
  1592  		addr := v.Args[0].Reg()
  1593  		mask := v.Args[1].Reg()
  1594  		tmp := v.RegTmp()
  1595  		p1 := s.Prog(mov)
  1596  		p1.From.Type = obj.TYPE_REG
  1597  		p1.From.Reg = mask
  1598  		p1.To.Type = obj.TYPE_REG
  1599  		p1.To.Reg = tmp
  1600  		p2 := s.Prog(mov)
  1601  		p2.From.Type = obj.TYPE_MEM
  1602  		p2.From.Reg = addr
  1603  		ssagen.AddAux(&p2.From, v)
  1604  		p2.To.Type = obj.TYPE_REG
  1605  		p2.To.Reg = x86.REG_AX
  1606  		p3 := s.Prog(op)
  1607  		p3.From.Type = obj.TYPE_REG
  1608  		p3.From.Reg = x86.REG_AX
  1609  		p3.To.Type = obj.TYPE_REG
  1610  		p3.To.Reg = tmp
  1611  		s.Prog(x86.ALOCK)
  1612  		p5 := s.Prog(cmpxchg)
  1613  		p5.From.Type = obj.TYPE_REG
  1614  		p5.From.Reg = tmp
  1615  		p5.To.Type = obj.TYPE_MEM
  1616  		p5.To.Reg = addr
  1617  		ssagen.AddAux(&p5.To, v)
  1618  		p6 := s.Prog(x86.AJNE)
  1619  		p6.To.Type = obj.TYPE_BRANCH
  1620  		p6.To.SetTarget(p1)
  1621  	case ssa.OpAMD64PrefetchT0, ssa.OpAMD64PrefetchNTA:
  1622  		p := s.Prog(v.Op.Asm())
  1623  		p.From.Type = obj.TYPE_MEM
  1624  		p.From.Reg = v.Args[0].Reg()
  1625  	case ssa.OpClobber:
  1626  		p := s.Prog(x86.AMOVL)
  1627  		p.From.Type = obj.TYPE_CONST
  1628  		p.From.Offset = 0xdeaddead
  1629  		p.To.Type = obj.TYPE_MEM
  1630  		p.To.Reg = x86.REG_SP
  1631  		ssagen.AddAux(&p.To, v)
  1632  		p = s.Prog(x86.AMOVL)
  1633  		p.From.Type = obj.TYPE_CONST
  1634  		p.From.Offset = 0xdeaddead
  1635  		p.To.Type = obj.TYPE_MEM
  1636  		p.To.Reg = x86.REG_SP
  1637  		ssagen.AddAux(&p.To, v)
  1638  		p.To.Offset += 4
  1639  	case ssa.OpClobberReg:
  1640  		x := uint64(0xdeaddeaddeaddead)
  1641  		p := s.Prog(x86.AMOVQ)
  1642  		p.From.Type = obj.TYPE_CONST
  1643  		p.From.Offset = int64(x)
  1644  		p.To.Type = obj.TYPE_REG
  1645  		p.To.Reg = v.Reg()
  1646  	default:
  1647  		v.Fatalf("genValue not implemented: %s", v.LongString())
  1648  	}
  1649  }
  1650  
  1651  var blockJump = [...]struct {
  1652  	asm, invasm obj.As
  1653  }{
  1654  	ssa.BlockAMD64EQ:  {x86.AJEQ, x86.AJNE},
  1655  	ssa.BlockAMD64NE:  {x86.AJNE, x86.AJEQ},
  1656  	ssa.BlockAMD64LT:  {x86.AJLT, x86.AJGE},
  1657  	ssa.BlockAMD64GE:  {x86.AJGE, x86.AJLT},
  1658  	ssa.BlockAMD64LE:  {x86.AJLE, x86.AJGT},
  1659  	ssa.BlockAMD64GT:  {x86.AJGT, x86.AJLE},
  1660  	ssa.BlockAMD64OS:  {x86.AJOS, x86.AJOC},
  1661  	ssa.BlockAMD64OC:  {x86.AJOC, x86.AJOS},
  1662  	ssa.BlockAMD64ULT: {x86.AJCS, x86.AJCC},
  1663  	ssa.BlockAMD64UGE: {x86.AJCC, x86.AJCS},
  1664  	ssa.BlockAMD64UGT: {x86.AJHI, x86.AJLS},
  1665  	ssa.BlockAMD64ULE: {x86.AJLS, x86.AJHI},
  1666  	ssa.BlockAMD64ORD: {x86.AJPC, x86.AJPS},
  1667  	ssa.BlockAMD64NAN: {x86.AJPS, x86.AJPC},
  1668  }
  1669  
  1670  var eqfJumps = [2][2]ssagen.IndexJump{
  1671  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPS, Index: 1}}, // next == b.Succs[0]
  1672  	{{Jump: x86.AJNE, Index: 1}, {Jump: x86.AJPC, Index: 0}}, // next == b.Succs[1]
  1673  }
  1674  var nefJumps = [2][2]ssagen.IndexJump{
  1675  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPC, Index: 1}}, // next == b.Succs[0]
  1676  	{{Jump: x86.AJNE, Index: 0}, {Jump: x86.AJPS, Index: 0}}, // next == b.Succs[1]
  1677  }
  1678  
  1679  func ssaGenBlock(s *ssagen.State, b, next *ssa.Block) {
  1680  	switch b.Kind {
  1681  	case ssa.BlockPlain, ssa.BlockDefer:
  1682  		if b.Succs[0].Block() != next {
  1683  			p := s.Prog(obj.AJMP)
  1684  			p.To.Type = obj.TYPE_BRANCH
  1685  			s.Branches = append(s.Branches, ssagen.Branch{P: p, B: b.Succs[0].Block()})
  1686  		}
  1687  	case ssa.BlockExit, ssa.BlockRetJmp:
  1688  	case ssa.BlockRet:
  1689  		s.Prog(obj.ARET)
  1690  
  1691  	case ssa.BlockAMD64EQF:
  1692  		s.CombJump(b, next, &eqfJumps)
  1693  
  1694  	case ssa.BlockAMD64NEF:
  1695  		s.CombJump(b, next, &nefJumps)
  1696  
  1697  	case ssa.BlockAMD64EQ, ssa.BlockAMD64NE,
  1698  		ssa.BlockAMD64LT, ssa.BlockAMD64GE,
  1699  		ssa.BlockAMD64LE, ssa.BlockAMD64GT,
  1700  		ssa.BlockAMD64OS, ssa.BlockAMD64OC,
  1701  		ssa.BlockAMD64ULT, ssa.BlockAMD64UGT,
  1702  		ssa.BlockAMD64ULE, ssa.BlockAMD64UGE:
  1703  		jmp := blockJump[b.Kind]
  1704  		switch next {
  1705  		case b.Succs[0].Block():
  1706  			s.Br(jmp.invasm, b.Succs[1].Block())
  1707  		case b.Succs[1].Block():
  1708  			s.Br(jmp.asm, b.Succs[0].Block())
  1709  		default:
  1710  			if b.Likely != ssa.BranchUnlikely {
  1711  				s.Br(jmp.asm, b.Succs[0].Block())
  1712  				s.Br(obj.AJMP, b.Succs[1].Block())
  1713  			} else {
  1714  				s.Br(jmp.invasm, b.Succs[1].Block())
  1715  				s.Br(obj.AJMP, b.Succs[0].Block())
  1716  			}
  1717  		}
  1718  
  1719  	case ssa.BlockAMD64JUMPTABLE:
  1720  		// JMP      *(TABLE)(INDEX*8)
  1721  		p := s.Prog(obj.AJMP)
  1722  		p.To.Type = obj.TYPE_MEM
  1723  		p.To.Reg = b.Controls[1].Reg()
  1724  		p.To.Index = b.Controls[0].Reg()
  1725  		p.To.Scale = 8
  1726  		// Save jump tables for later resolution of the target blocks.
  1727  		s.JumpTables = append(s.JumpTables, b)
  1728  
  1729  	default:
  1730  		b.Fatalf("branch not implemented: %s", b.LongString())
  1731  	}
  1732  }
  1733  
  1734  func loadRegResult(s *ssagen.State, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1735  	p := s.Prog(loadByType(t))
  1736  	p.From.Type = obj.TYPE_MEM
  1737  	p.From.Name = obj.NAME_AUTO
  1738  	p.From.Sym = n.Linksym()
  1739  	p.From.Offset = n.FrameOffset() + off
  1740  	p.To.Type = obj.TYPE_REG
  1741  	p.To.Reg = reg
  1742  	return p
  1743  }
  1744  
  1745  func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg int16, n *ir.Name, off int64) *obj.Prog {
  1746  	p = pp.Append(p, storeByType(t), obj.TYPE_REG, reg, 0, obj.TYPE_MEM, 0, n.FrameOffset()+off)
  1747  	p.To.Name = obj.NAME_PARAM
  1748  	p.To.Sym = n.Linksym()
  1749  	p.Pos = p.Pos.WithNotStmt()
  1750  	return p
  1751  }
  1752  
  1753  // zero 16 bytes at reg+off.
  1754  func zero16(s *ssagen.State, reg int16, off int64) {
  1755  	//   MOVUPS  X15, off(ptrReg)
  1756  	p := s.Prog(x86.AMOVUPS)
  1757  	p.From.Type = obj.TYPE_REG
  1758  	p.From.Reg = x86.REG_X15
  1759  	p.To.Type = obj.TYPE_MEM
  1760  	p.To.Reg = reg
  1761  	p.To.Offset = off
  1762  }
  1763  
  1764  // move 16 bytes from src+off to dst+off using temporary register tmp.
  1765  func move16(s *ssagen.State, src, dst, tmp int16, off int64) {
  1766  	//   MOVUPS  off(srcReg), tmpReg
  1767  	//   MOVUPS  tmpReg, off(dstReg)
  1768  	p := s.Prog(x86.AMOVUPS)
  1769  	p.From.Type = obj.TYPE_MEM
  1770  	p.From.Reg = src
  1771  	p.From.Offset = off
  1772  	p.To.Type = obj.TYPE_REG
  1773  	p.To.Reg = tmp
  1774  	p = s.Prog(x86.AMOVUPS)
  1775  	p.From.Type = obj.TYPE_REG
  1776  	p.From.Reg = tmp
  1777  	p.To.Type = obj.TYPE_MEM
  1778  	p.To.Reg = dst
  1779  	p.To.Offset = off
  1780  }
  1781  

View as plain text