Source file src/cmd/internal/obj/x86/asm6.go

     1  // Inferno utils/6l/span.c
     2  // https://bitbucket.org/inferno-os/inferno-os/src/master/utils/6l/span.c
     3  //
     4  //	Copyright © 1994-1999 Lucent Technologies Inc.  All rights reserved.
     5  //	Portions Copyright © 1995-1997 C H Forsyth (forsyth@terzarima.net)
     6  //	Portions Copyright © 1997-1999 Vita Nuova Limited
     7  //	Portions Copyright © 2000-2007 Vita Nuova Holdings Limited (www.vitanuova.com)
     8  //	Portions Copyright © 2004,2006 Bruce Ellis
     9  //	Portions Copyright © 2005-2007 C H Forsyth (forsyth@terzarima.net)
    10  //	Revisions Copyright © 2000-2007 Lucent Technologies Inc. and others
    11  //	Portions Copyright © 2009 The Go Authors. All rights reserved.
    12  //
    13  // Permission is hereby granted, free of charge, to any person obtaining a copy
    14  // of this software and associated documentation files (the "Software"), to deal
    15  // in the Software without restriction, including without limitation the rights
    16  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
    17  // copies of the Software, and to permit persons to whom the Software is
    18  // furnished to do so, subject to the following conditions:
    19  //
    20  // The above copyright notice and this permission notice shall be included in
    21  // all copies or substantial portions of the Software.
    22  //
    23  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    24  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    25  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
    26  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    27  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    28  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    29  // THE SOFTWARE.
    30  
    31  package x86
    32  
    33  import (
    34  	"cmd/internal/obj"
    35  	"cmd/internal/objabi"
    36  	"cmd/internal/sys"
    37  	"encoding/binary"
    38  	"fmt"
    39  	"internal/buildcfg"
    40  	"log"
    41  	"strings"
    42  )
    43  
    44  var (
    45  	plan9privates *obj.LSym
    46  )
    47  
    48  // Instruction layout.
    49  
    50  // Loop alignment constants:
    51  // want to align loop entry to loopAlign-byte boundary,
    52  // and willing to insert at most maxLoopPad bytes of NOP to do so.
    53  // We define a loop entry as the target of a backward jump.
    54  //
    55  // gcc uses maxLoopPad = 10 for its 'generic x86-64' config,
    56  // and it aligns all jump targets, not just backward jump targets.
    57  //
    58  // As of 6/1/2012, the effect of setting maxLoopPad = 10 here
    59  // is very slight but negative, so the alignment is disabled by
    60  // setting MaxLoopPad = 0. The code is here for reference and
    61  // for future experiments.
    62  const (
    63  	loopAlign  = 16
    64  	maxLoopPad = 0
    65  )
    66  
    67  // Bit flags that are used to express jump target properties.
    68  const (
    69  	// branchBackwards marks targets that are located behind.
    70  	// Used to express jumps to loop headers.
    71  	branchBackwards = (1 << iota)
    72  	// branchShort marks branches those target is close,
    73  	// with offset is in -128..127 range.
    74  	branchShort
    75  	// branchLoopHead marks loop entry.
    76  	// Used to insert padding for misaligned loops.
    77  	branchLoopHead
    78  )
    79  
    80  // opBytes holds optab encoding bytes.
    81  // Each ytab reserves fixed amount of bytes in this array.
    82  //
    83  // The size should be the minimal number of bytes that
    84  // are enough to hold biggest optab op lines.
    85  type opBytes [31]uint8
    86  
    87  type Optab struct {
    88  	as     obj.As
    89  	ytab   []ytab
    90  	prefix uint8
    91  	op     opBytes
    92  }
    93  
    94  type movtab struct {
    95  	as   obj.As
    96  	ft   uint8
    97  	f3t  uint8
    98  	tt   uint8
    99  	code uint8
   100  	op   [4]uint8
   101  }
   102  
   103  const (
   104  	Yxxx = iota
   105  	Ynone
   106  	Yi0 // $0
   107  	Yi1 // $1
   108  	Yu2 // $x, x fits in uint2
   109  	Yi8 // $x, x fits in int8
   110  	Yu8 // $x, x fits in uint8
   111  	Yu7 // $x, x in 0..127 (fits in both int8 and uint8)
   112  	Ys32
   113  	Yi32
   114  	Yi64
   115  	Yiauto
   116  	Yal
   117  	Ycl
   118  	Yax
   119  	Ycx
   120  	Yrb
   121  	Yrl
   122  	Yrl32 // Yrl on 32-bit system
   123  	Yrf
   124  	Yf0
   125  	Yrx
   126  	Ymb
   127  	Yml
   128  	Ym
   129  	Ybr
   130  	Ycs
   131  	Yss
   132  	Yds
   133  	Yes
   134  	Yfs
   135  	Ygs
   136  	Ygdtr
   137  	Yidtr
   138  	Yldtr
   139  	Ymsw
   140  	Ytask
   141  	Ycr0
   142  	Ycr1
   143  	Ycr2
   144  	Ycr3
   145  	Ycr4
   146  	Ycr5
   147  	Ycr6
   148  	Ycr7
   149  	Ycr8
   150  	Ydr0
   151  	Ydr1
   152  	Ydr2
   153  	Ydr3
   154  	Ydr4
   155  	Ydr5
   156  	Ydr6
   157  	Ydr7
   158  	Ytr0
   159  	Ytr1
   160  	Ytr2
   161  	Ytr3
   162  	Ytr4
   163  	Ytr5
   164  	Ytr6
   165  	Ytr7
   166  	Ymr
   167  	Ymm
   168  	Yxr0          // X0 only. "<XMM0>" notation in Intel manual.
   169  	YxrEvexMulti4 // [ X<n> - X<n+3> ]; multisource YxrEvex
   170  	Yxr           // X0..X15
   171  	YxrEvex       // X0..X31
   172  	Yxm
   173  	YxmEvex       // YxrEvex+Ym
   174  	Yxvm          // VSIB vector array; vm32x/vm64x
   175  	YxvmEvex      // Yxvm which permits High-16 X register as index.
   176  	YyrEvexMulti4 // [ Y<n> - Y<n+3> ]; multisource YyrEvex
   177  	Yyr           // Y0..Y15
   178  	YyrEvex       // Y0..Y31
   179  	Yym
   180  	YymEvex   // YyrEvex+Ym
   181  	Yyvm      // VSIB vector array; vm32y/vm64y
   182  	YyvmEvex  // Yyvm which permits High-16 Y register as index.
   183  	YzrMulti4 // [ Z<n> - Z<n+3> ]; multisource YzrEvex
   184  	Yzr       // Z0..Z31
   185  	Yzm       // Yzr+Ym
   186  	Yzvm      // VSIB vector array; vm32z/vm64z
   187  	Yk0       // K0
   188  	Yknot0    // K1..K7; write mask
   189  	Yk        // K0..K7; used for KOP
   190  	Ykm       // Yk+Ym; used for KOP
   191  	Ytls
   192  	Ytextsize
   193  	Yindir
   194  	Ymax
   195  )
   196  
   197  const (
   198  	Zxxx = iota
   199  	Zlit
   200  	Zlitm_r
   201  	Zlitr_m
   202  	Zlit_m_r
   203  	Z_rp
   204  	Zbr
   205  	Zcall
   206  	Zcallcon
   207  	Zcallduff
   208  	Zcallind
   209  	Zcallindreg
   210  	Zib_
   211  	Zib_rp
   212  	Zibo_m
   213  	Zibo_m_xm
   214  	Zil_
   215  	Zil_rp
   216  	Ziq_rp
   217  	Zilo_m
   218  	Zjmp
   219  	Zjmpcon
   220  	Zloop
   221  	Zo_iw
   222  	Zm_o
   223  	Zm_r
   224  	Z_m_r
   225  	Zm2_r
   226  	Zm_r_xm
   227  	Zm_r_i_xm
   228  	Zm_r_xm_nr
   229  	Zr_m_xm_nr
   230  	Zibm_r // mmx1,mmx2/mem64,imm8
   231  	Zibr_m
   232  	Zmb_r
   233  	Zaut_r
   234  	Zo_m
   235  	Zo_m64
   236  	Zpseudo
   237  	Zr_m
   238  	Zr_m_xm
   239  	Zrp_
   240  	Z_ib
   241  	Z_il
   242  	Zm_ibo
   243  	Zm_ilo
   244  	Zib_rr
   245  	Zil_rr
   246  	Zbyte
   247  
   248  	Zvex_rm_v_r
   249  	Zvex_rm_v_ro
   250  	Zvex_r_v_rm
   251  	Zvex_i_rm_vo
   252  	Zvex_v_rm_r
   253  	Zvex_i_rm_r
   254  	Zvex_i_r_v
   255  	Zvex_i_rm_v_r
   256  	Zvex
   257  	Zvex_rm_r_vo
   258  	Zvex_i_r_rm
   259  	Zvex_hr_rm_v_r
   260  
   261  	Zevex_first
   262  	Zevex_i_r_k_rm
   263  	Zevex_i_r_rm
   264  	Zevex_i_rm_k_r
   265  	Zevex_i_rm_k_vo
   266  	Zevex_i_rm_r
   267  	Zevex_i_rm_v_k_r
   268  	Zevex_i_rm_v_r
   269  	Zevex_i_rm_vo
   270  	Zevex_k_rmo
   271  	Zevex_r_k_rm
   272  	Zevex_r_v_k_rm
   273  	Zevex_r_v_rm
   274  	Zevex_rm_k_r
   275  	Zevex_rm_v_k_r
   276  	Zevex_rm_v_r
   277  	Zevex_last
   278  
   279  	Zmax
   280  )
   281  
   282  const (
   283  	Px   = 0
   284  	Px1  = 1    // symbolic; exact value doesn't matter
   285  	P32  = 0x32 // 32-bit only
   286  	Pe   = 0x66 // operand escape
   287  	Pm   = 0x0f // 2byte opcode escape
   288  	Pq   = 0xff // both escapes: 66 0f
   289  	Pb   = 0xfe // byte operands
   290  	Pf2  = 0xf2 // xmm escape 1: f2 0f
   291  	Pf3  = 0xf3 // xmm escape 2: f3 0f
   292  	Pef3 = 0xf5 // xmm escape 2 with 16-bit prefix: 66 f3 0f
   293  	Pq3  = 0x67 // xmm escape 3: 66 48 0f
   294  	Pq4  = 0x68 // xmm escape 4: 66 0F 38
   295  	Pq4w = 0x69 // Pq4 with Rex.w 66 0F 38
   296  	Pq5  = 0x6a // xmm escape 5: F3 0F 38
   297  	Pq5w = 0x6b // Pq5 with Rex.w F3 0F 38
   298  	Pfw  = 0xf4 // Pf3 with Rex.w: f3 48 0f
   299  	Pw   = 0x48 // Rex.w
   300  	Pw8  = 0x90 // symbolic; exact value doesn't matter
   301  	Py   = 0x80 // defaults to 64-bit mode
   302  	Py1  = 0x81 // symbolic; exact value doesn't matter
   303  	Py3  = 0x83 // symbolic; exact value doesn't matter
   304  	Pavx = 0x84 // symbolic; exact value doesn't matter
   305  
   306  	RxrEvex = 1 << 4 // AVX512 extension to REX.R/VEX.R
   307  	Rxw     = 1 << 3 // =1, 64-bit operand size
   308  	Rxr     = 1 << 2 // extend modrm reg
   309  	Rxx     = 1 << 1 // extend sib index
   310  	Rxb     = 1 << 0 // extend modrm r/m, sib base, or opcode reg
   311  )
   312  
   313  const (
   314  	// Encoding for VEX prefix in tables.
   315  	// The P, L, and W fields are chosen to match
   316  	// their eventual locations in the VEX prefix bytes.
   317  
   318  	// Encoding for VEX prefix in tables.
   319  	// The P, L, and W fields are chosen to match
   320  	// their eventual locations in the VEX prefix bytes.
   321  
   322  	// Using spare bit to make leading [E]VEX encoding byte different from
   323  	// 0x0f even if all other VEX fields are 0.
   324  	avxEscape = 1 << 6
   325  
   326  	// P field - 2 bits
   327  	vex66 = 1 << 0
   328  	vexF3 = 2 << 0
   329  	vexF2 = 3 << 0
   330  	// L field - 1 bit
   331  	vexLZ  = 0 << 2
   332  	vexLIG = 0 << 2
   333  	vex128 = 0 << 2
   334  	vex256 = 1 << 2
   335  	// W field - 1 bit
   336  	vexWIG = 0 << 7
   337  	vexW0  = 0 << 7
   338  	vexW1  = 1 << 7
   339  	// M field - 5 bits, but mostly reserved; we can store up to 3
   340  	vex0F   = 1 << 3
   341  	vex0F38 = 2 << 3
   342  	vex0F3A = 3 << 3
   343  )
   344  
   345  var ycover [Ymax * Ymax]uint8
   346  
   347  var reg [MAXREG]int
   348  
   349  var regrex [MAXREG + 1]int
   350  
   351  var ynone = []ytab{
   352  	{Zlit, 1, argList{}},
   353  }
   354  
   355  var ytext = []ytab{
   356  	{Zpseudo, 0, argList{Ymb, Ytextsize}},
   357  	{Zpseudo, 1, argList{Ymb, Yi32, Ytextsize}},
   358  }
   359  
   360  var ynop = []ytab{
   361  	{Zpseudo, 0, argList{}},
   362  	{Zpseudo, 0, argList{Yiauto}},
   363  	{Zpseudo, 0, argList{Yml}},
   364  	{Zpseudo, 0, argList{Yrf}},
   365  	{Zpseudo, 0, argList{Yxr}},
   366  	{Zpseudo, 0, argList{Yiauto}},
   367  	{Zpseudo, 0, argList{Yml}},
   368  	{Zpseudo, 0, argList{Yrf}},
   369  	{Zpseudo, 1, argList{Yxr}},
   370  }
   371  
   372  var yfuncdata = []ytab{
   373  	{Zpseudo, 0, argList{Yi32, Ym}},
   374  }
   375  
   376  var ypcdata = []ytab{
   377  	{Zpseudo, 0, argList{Yi32, Yi32}},
   378  }
   379  
   380  var yxorb = []ytab{
   381  	{Zib_, 1, argList{Yi32, Yal}},
   382  	{Zibo_m, 2, argList{Yi32, Ymb}},
   383  	{Zr_m, 1, argList{Yrb, Ymb}},
   384  	{Zm_r, 1, argList{Ymb, Yrb}},
   385  }
   386  
   387  var yaddl = []ytab{
   388  	{Zibo_m, 2, argList{Yi8, Yml}},
   389  	{Zil_, 1, argList{Yi32, Yax}},
   390  	{Zilo_m, 2, argList{Yi32, Yml}},
   391  	{Zr_m, 1, argList{Yrl, Yml}},
   392  	{Zm_r, 1, argList{Yml, Yrl}},
   393  }
   394  
   395  var yincl = []ytab{
   396  	{Z_rp, 1, argList{Yrl}},
   397  	{Zo_m, 2, argList{Yml}},
   398  }
   399  
   400  var yincq = []ytab{
   401  	{Zo_m, 2, argList{Yml}},
   402  }
   403  
   404  var ycmpb = []ytab{
   405  	{Z_ib, 1, argList{Yal, Yi32}},
   406  	{Zm_ibo, 2, argList{Ymb, Yi32}},
   407  	{Zm_r, 1, argList{Ymb, Yrb}},
   408  	{Zr_m, 1, argList{Yrb, Ymb}},
   409  }
   410  
   411  var ycmpl = []ytab{
   412  	{Zm_ibo, 2, argList{Yml, Yi8}},
   413  	{Z_il, 1, argList{Yax, Yi32}},
   414  	{Zm_ilo, 2, argList{Yml, Yi32}},
   415  	{Zm_r, 1, argList{Yml, Yrl}},
   416  	{Zr_m, 1, argList{Yrl, Yml}},
   417  }
   418  
   419  var yshb = []ytab{
   420  	{Zo_m, 2, argList{Yi1, Ymb}},
   421  	{Zibo_m, 2, argList{Yu8, Ymb}},
   422  	{Zo_m, 2, argList{Ycx, Ymb}},
   423  }
   424  
   425  var yshl = []ytab{
   426  	{Zo_m, 2, argList{Yi1, Yml}},
   427  	{Zibo_m, 2, argList{Yu8, Yml}},
   428  	{Zo_m, 2, argList{Ycl, Yml}},
   429  	{Zo_m, 2, argList{Ycx, Yml}},
   430  }
   431  
   432  var ytestl = []ytab{
   433  	{Zil_, 1, argList{Yi32, Yax}},
   434  	{Zilo_m, 2, argList{Yi32, Yml}},
   435  	{Zr_m, 1, argList{Yrl, Yml}},
   436  	{Zm_r, 1, argList{Yml, Yrl}},
   437  }
   438  
   439  var ymovb = []ytab{
   440  	{Zr_m, 1, argList{Yrb, Ymb}},
   441  	{Zm_r, 1, argList{Ymb, Yrb}},
   442  	{Zib_rp, 1, argList{Yi32, Yrb}},
   443  	{Zibo_m, 2, argList{Yi32, Ymb}},
   444  }
   445  
   446  var ybtl = []ytab{
   447  	{Zibo_m, 2, argList{Yi8, Yml}},
   448  	{Zr_m, 1, argList{Yrl, Yml}},
   449  }
   450  
   451  var ymovw = []ytab{
   452  	{Zr_m, 1, argList{Yrl, Yml}},
   453  	{Zm_r, 1, argList{Yml, Yrl}},
   454  	{Zil_rp, 1, argList{Yi32, Yrl}},
   455  	{Zilo_m, 2, argList{Yi32, Yml}},
   456  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   457  }
   458  
   459  var ymovl = []ytab{
   460  	{Zr_m, 1, argList{Yrl, Yml}},
   461  	{Zm_r, 1, argList{Yml, Yrl}},
   462  	{Zil_rp, 1, argList{Yi32, Yrl}},
   463  	{Zilo_m, 2, argList{Yi32, Yml}},
   464  	{Zm_r_xm, 1, argList{Yml, Ymr}}, // MMX MOVD
   465  	{Zr_m_xm, 1, argList{Ymr, Yml}}, // MMX MOVD
   466  	{Zm_r_xm, 2, argList{Yml, Yxr}}, // XMM MOVD (32 bit)
   467  	{Zr_m_xm, 2, argList{Yxr, Yml}}, // XMM MOVD (32 bit)
   468  	{Zaut_r, 2, argList{Yiauto, Yrl}},
   469  }
   470  
   471  var yret = []ytab{
   472  	{Zo_iw, 1, argList{}},
   473  	{Zo_iw, 1, argList{Yi32}},
   474  }
   475  
   476  var ymovq = []ytab{
   477  	// valid in 32-bit mode
   478  	{Zm_r_xm_nr, 1, argList{Ym, Ymr}},  // 0x6f MMX MOVQ (shorter encoding)
   479  	{Zr_m_xm_nr, 1, argList{Ymr, Ym}},  // 0x7f MMX MOVQ
   480  	{Zm_r_xm_nr, 2, argList{Yxr, Ymr}}, // Pf2, 0xd6 MOVDQ2Q
   481  	{Zm_r_xm_nr, 2, argList{Yxm, Yxr}}, // Pf3, 0x7e MOVQ xmm1/m64 -> xmm2
   482  	{Zr_m_xm_nr, 2, argList{Yxr, Yxm}}, // Pe, 0xd6 MOVQ xmm1 -> xmm2/m64
   483  
   484  	// valid only in 64-bit mode, usually with 64-bit prefix
   485  	{Zr_m, 1, argList{Yrl, Yml}},      // 0x89
   486  	{Zm_r, 1, argList{Yml, Yrl}},      // 0x8b
   487  	{Zilo_m, 2, argList{Ys32, Yrl}},   // 32 bit signed 0xc7,(0)
   488  	{Ziq_rp, 1, argList{Yi64, Yrl}},   // 0xb8 -- 32/64 bit immediate
   489  	{Zilo_m, 2, argList{Yi32, Yml}},   // 0xc7,(0)
   490  	{Zm_r_xm, 1, argList{Ymm, Ymr}},   // 0x6e MMX MOVD
   491  	{Zr_m_xm, 1, argList{Ymr, Ymm}},   // 0x7e MMX MOVD
   492  	{Zm_r_xm, 2, argList{Yml, Yxr}},   // Pe, 0x6e MOVD xmm load
   493  	{Zr_m_xm, 2, argList{Yxr, Yml}},   // Pe, 0x7e MOVD xmm store
   494  	{Zaut_r, 1, argList{Yiauto, Yrl}}, // 0 built-in LEAQ
   495  }
   496  
   497  var ymovbe = []ytab{
   498  	{Zlitm_r, 3, argList{Ym, Yrl}},
   499  	{Zlitr_m, 3, argList{Yrl, Ym}},
   500  }
   501  
   502  var ym_rl = []ytab{
   503  	{Zm_r, 1, argList{Ym, Yrl}},
   504  }
   505  
   506  var yrl_m = []ytab{
   507  	{Zr_m, 1, argList{Yrl, Ym}},
   508  }
   509  
   510  var ymb_rl = []ytab{
   511  	{Zmb_r, 1, argList{Ymb, Yrl}},
   512  }
   513  
   514  var yml_rl = []ytab{
   515  	{Zm_r, 1, argList{Yml, Yrl}},
   516  }
   517  
   518  var yrl_ml = []ytab{
   519  	{Zr_m, 1, argList{Yrl, Yml}},
   520  }
   521  
   522  var yml_mb = []ytab{
   523  	{Zr_m, 1, argList{Yrb, Ymb}},
   524  	{Zm_r, 1, argList{Ymb, Yrb}},
   525  }
   526  
   527  var yrb_mb = []ytab{
   528  	{Zr_m, 1, argList{Yrb, Ymb}},
   529  }
   530  
   531  var yxchg = []ytab{
   532  	{Z_rp, 1, argList{Yax, Yrl}},
   533  	{Zrp_, 1, argList{Yrl, Yax}},
   534  	{Zr_m, 1, argList{Yrl, Yml}},
   535  	{Zm_r, 1, argList{Yml, Yrl}},
   536  }
   537  
   538  var ydivl = []ytab{
   539  	{Zm_o, 2, argList{Yml}},
   540  }
   541  
   542  var ydivb = []ytab{
   543  	{Zm_o, 2, argList{Ymb}},
   544  }
   545  
   546  var yimul = []ytab{
   547  	{Zm_o, 2, argList{Yml}},
   548  	{Zib_rr, 1, argList{Yi8, Yrl}},
   549  	{Zil_rr, 1, argList{Yi32, Yrl}},
   550  	{Zm_r, 2, argList{Yml, Yrl}},
   551  }
   552  
   553  var yimul3 = []ytab{
   554  	{Zibm_r, 2, argList{Yi8, Yml, Yrl}},
   555  	{Zibm_r, 2, argList{Yi32, Yml, Yrl}},
   556  }
   557  
   558  var ybyte = []ytab{
   559  	{Zbyte, 1, argList{Yi64}},
   560  }
   561  
   562  var yin = []ytab{
   563  	{Zib_, 1, argList{Yi32}},
   564  	{Zlit, 1, argList{}},
   565  }
   566  
   567  var yint = []ytab{
   568  	{Zib_, 1, argList{Yi32}},
   569  }
   570  
   571  var ypushl = []ytab{
   572  	{Zrp_, 1, argList{Yrl}},
   573  	{Zm_o, 2, argList{Ym}},
   574  	{Zib_, 1, argList{Yi8}},
   575  	{Zil_, 1, argList{Yi32}},
   576  }
   577  
   578  var ypopl = []ytab{
   579  	{Z_rp, 1, argList{Yrl}},
   580  	{Zo_m, 2, argList{Ym}},
   581  }
   582  
   583  var ywrfsbase = []ytab{
   584  	{Zm_o, 2, argList{Yrl}},
   585  }
   586  
   587  var yrdrand = []ytab{
   588  	{Zo_m, 2, argList{Yrl}},
   589  }
   590  
   591  var yclflush = []ytab{
   592  	{Zo_m, 2, argList{Ym}},
   593  }
   594  
   595  var ybswap = []ytab{
   596  	{Z_rp, 2, argList{Yrl}},
   597  }
   598  
   599  var yscond = []ytab{
   600  	{Zo_m, 2, argList{Ymb}},
   601  }
   602  
   603  var yjcond = []ytab{
   604  	{Zbr, 0, argList{Ybr}},
   605  	{Zbr, 0, argList{Yi0, Ybr}},
   606  	{Zbr, 1, argList{Yi1, Ybr}},
   607  }
   608  
   609  var yloop = []ytab{
   610  	{Zloop, 1, argList{Ybr}},
   611  }
   612  
   613  var ycall = []ytab{
   614  	{Zcallindreg, 0, argList{Yml}},
   615  	{Zcallindreg, 2, argList{Yrx, Yrx}},
   616  	{Zcallind, 2, argList{Yindir}},
   617  	{Zcall, 0, argList{Ybr}},
   618  	{Zcallcon, 1, argList{Yi32}},
   619  }
   620  
   621  var yduff = []ytab{
   622  	{Zcallduff, 1, argList{Yi32}},
   623  }
   624  
   625  var yjmp = []ytab{
   626  	{Zo_m64, 2, argList{Yml}},
   627  	{Zjmp, 0, argList{Ybr}},
   628  	{Zjmpcon, 1, argList{Yi32}},
   629  }
   630  
   631  var yfmvd = []ytab{
   632  	{Zm_o, 2, argList{Ym, Yf0}},
   633  	{Zo_m, 2, argList{Yf0, Ym}},
   634  	{Zm_o, 2, argList{Yrf, Yf0}},
   635  	{Zo_m, 2, argList{Yf0, Yrf}},
   636  }
   637  
   638  var yfmvdp = []ytab{
   639  	{Zo_m, 2, argList{Yf0, Ym}},
   640  	{Zo_m, 2, argList{Yf0, Yrf}},
   641  }
   642  
   643  var yfmvf = []ytab{
   644  	{Zm_o, 2, argList{Ym, Yf0}},
   645  	{Zo_m, 2, argList{Yf0, Ym}},
   646  }
   647  
   648  var yfmvx = []ytab{
   649  	{Zm_o, 2, argList{Ym, Yf0}},
   650  }
   651  
   652  var yfmvp = []ytab{
   653  	{Zo_m, 2, argList{Yf0, Ym}},
   654  }
   655  
   656  var yfcmv = []ytab{
   657  	{Zm_o, 2, argList{Yrf, Yf0}},
   658  }
   659  
   660  var yfadd = []ytab{
   661  	{Zm_o, 2, argList{Ym, Yf0}},
   662  	{Zm_o, 2, argList{Yrf, Yf0}},
   663  	{Zo_m, 2, argList{Yf0, Yrf}},
   664  }
   665  
   666  var yfxch = []ytab{
   667  	{Zo_m, 2, argList{Yf0, Yrf}},
   668  	{Zm_o, 2, argList{Yrf, Yf0}},
   669  }
   670  
   671  var ycompp = []ytab{
   672  	{Zo_m, 2, argList{Yf0, Yrf}}, // botch is really f0,f1
   673  }
   674  
   675  var ystsw = []ytab{
   676  	{Zo_m, 2, argList{Ym}},
   677  	{Zlit, 1, argList{Yax}},
   678  }
   679  
   680  var ysvrs_mo = []ytab{
   681  	{Zm_o, 2, argList{Ym}},
   682  }
   683  
   684  // unaryDst version of "ysvrs_mo".
   685  var ysvrs_om = []ytab{
   686  	{Zo_m, 2, argList{Ym}},
   687  }
   688  
   689  var ymm = []ytab{
   690  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   691  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   692  }
   693  
   694  var yxm = []ytab{
   695  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   696  }
   697  
   698  var yxm_q4 = []ytab{
   699  	{Zm_r, 1, argList{Yxm, Yxr}},
   700  }
   701  
   702  var yxcvm1 = []ytab{
   703  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   704  	{Zm_r_xm, 2, argList{Yxm, Ymr}},
   705  }
   706  
   707  var yxcvm2 = []ytab{
   708  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   709  	{Zm_r_xm, 2, argList{Ymm, Yxr}},
   710  }
   711  
   712  var yxr = []ytab{
   713  	{Zm_r_xm, 1, argList{Yxr, Yxr}},
   714  }
   715  
   716  var yxr_ml = []ytab{
   717  	{Zr_m_xm, 1, argList{Yxr, Yml}},
   718  }
   719  
   720  var ymr = []ytab{
   721  	{Zm_r, 1, argList{Ymr, Ymr}},
   722  }
   723  
   724  var ymr_ml = []ytab{
   725  	{Zr_m_xm, 1, argList{Ymr, Yml}},
   726  }
   727  
   728  var yxcmpi = []ytab{
   729  	{Zm_r_i_xm, 2, argList{Yxm, Yxr, Yi8}},
   730  }
   731  
   732  var yxmov = []ytab{
   733  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   734  	{Zr_m_xm, 1, argList{Yxr, Yxm}},
   735  }
   736  
   737  var yxcvfl = []ytab{
   738  	{Zm_r_xm, 1, argList{Yxm, Yrl}},
   739  }
   740  
   741  var yxcvlf = []ytab{
   742  	{Zm_r_xm, 1, argList{Yml, Yxr}},
   743  }
   744  
   745  var yxcvfq = []ytab{
   746  	{Zm_r_xm, 2, argList{Yxm, Yrl}},
   747  }
   748  
   749  var yxcvqf = []ytab{
   750  	{Zm_r_xm, 2, argList{Yml, Yxr}},
   751  }
   752  
   753  var yps = []ytab{
   754  	{Zm_r_xm, 1, argList{Ymm, Ymr}},
   755  	{Zibo_m_xm, 2, argList{Yi8, Ymr}},
   756  	{Zm_r_xm, 2, argList{Yxm, Yxr}},
   757  	{Zibo_m_xm, 3, argList{Yi8, Yxr}},
   758  }
   759  
   760  var yxrrl = []ytab{
   761  	{Zm_r, 1, argList{Yxr, Yrl}},
   762  }
   763  
   764  var ymrxr = []ytab{
   765  	{Zm_r, 1, argList{Ymr, Yxr}},
   766  	{Zm_r_xm, 1, argList{Yxm, Yxr}},
   767  }
   768  
   769  var ymshuf = []ytab{
   770  	{Zibm_r, 2, argList{Yi8, Ymm, Ymr}},
   771  }
   772  
   773  var ymshufb = []ytab{
   774  	{Zm2_r, 2, argList{Yxm, Yxr}},
   775  }
   776  
   777  // It should never have more than 1 entry,
   778  // because some optab entries have opcode sequences that
   779  // are longer than 2 bytes (zoffset=2 here),
   780  // ROUNDPD and ROUNDPS and recently added BLENDPD,
   781  // to name a few.
   782  var yxshuf = []ytab{
   783  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   784  }
   785  
   786  var yextrw = []ytab{
   787  	{Zibm_r, 2, argList{Yu8, Yxr, Yrl}},
   788  	{Zibr_m, 2, argList{Yu8, Yxr, Yml}},
   789  }
   790  
   791  var yextr = []ytab{
   792  	{Zibr_m, 3, argList{Yu8, Yxr, Ymm}},
   793  }
   794  
   795  var yinsrw = []ytab{
   796  	{Zibm_r, 2, argList{Yu8, Yml, Yxr}},
   797  }
   798  
   799  var yinsr = []ytab{
   800  	{Zibm_r, 3, argList{Yu8, Ymm, Yxr}},
   801  }
   802  
   803  var ypsdq = []ytab{
   804  	{Zibo_m, 2, argList{Yi8, Yxr}},
   805  }
   806  
   807  var ymskb = []ytab{
   808  	{Zm_r_xm, 2, argList{Yxr, Yrl}},
   809  	{Zm_r_xm, 1, argList{Ymr, Yrl}},
   810  }
   811  
   812  var ycrc32l = []ytab{
   813  	{Zlitm_r, 0, argList{Yml, Yrl}},
   814  }
   815  
   816  var ycrc32b = []ytab{
   817  	{Zlitm_r, 0, argList{Ymb, Yrl}},
   818  }
   819  
   820  var yprefetch = []ytab{
   821  	{Zm_o, 2, argList{Ym}},
   822  }
   823  
   824  var yaes = []ytab{
   825  	{Zlitm_r, 2, argList{Yxm, Yxr}},
   826  }
   827  
   828  var yxbegin = []ytab{
   829  	{Zjmp, 1, argList{Ybr}},
   830  }
   831  
   832  var yxabort = []ytab{
   833  	{Zib_, 1, argList{Yu8}},
   834  }
   835  
   836  var ylddqu = []ytab{
   837  	{Zm_r, 1, argList{Ym, Yxr}},
   838  }
   839  
   840  var ypalignr = []ytab{
   841  	{Zibm_r, 2, argList{Yu8, Yxm, Yxr}},
   842  }
   843  
   844  var ysha256rnds2 = []ytab{
   845  	{Zlit_m_r, 0, argList{Yxr0, Yxm, Yxr}},
   846  }
   847  
   848  var yblendvpd = []ytab{
   849  	{Z_m_r, 1, argList{Yxr0, Yxm, Yxr}},
   850  }
   851  
   852  var ymmxmm0f38 = []ytab{
   853  	{Zlitm_r, 3, argList{Ymm, Ymr}},
   854  	{Zlitm_r, 5, argList{Yxm, Yxr}},
   855  }
   856  
   857  var yextractps = []ytab{
   858  	{Zibr_m, 2, argList{Yu2, Yxr, Yml}},
   859  }
   860  
   861  var ysha1rnds4 = []ytab{
   862  	{Zibm_r, 2, argList{Yu2, Yxm, Yxr}},
   863  }
   864  
   865  // You are doasm, holding in your hand a *obj.Prog with p.As set to, say,
   866  // ACRC32, and p.From and p.To as operands (obj.Addr).  The linker scans optab
   867  // to find the entry with the given p.As and then looks through the ytable for
   868  // that instruction (the second field in the optab struct) for a line whose
   869  // first two values match the Ytypes of the p.From and p.To operands.  The
   870  // function oclass computes the specific Ytype of an operand and then the set
   871  // of more general Ytypes that it satisfies is implied by the ycover table, set
   872  // up in instinit.  For example, oclass distinguishes the constants 0 and 1
   873  // from the more general 8-bit constants, but instinit says
   874  //
   875  //	ycover[Yi0*Ymax+Ys32] = 1
   876  //	ycover[Yi1*Ymax+Ys32] = 1
   877  //	ycover[Yi8*Ymax+Ys32] = 1
   878  //
   879  // which means that Yi0, Yi1, and Yi8 all count as Ys32 (signed 32)
   880  // if that's what an instruction can handle.
   881  //
   882  // In parallel with the scan through the ytable for the appropriate line, there
   883  // is a z pointer that starts out pointing at the strange magic byte list in
   884  // the Optab struct.  With each step past a non-matching ytable line, z
   885  // advances by the 4th entry in the line.  When a matching line is found, that
   886  // z pointer has the extra data to use in laying down the instruction bytes.
   887  // The actual bytes laid down are a function of the 3rd entry in the line (that
   888  // is, the Ztype) and the z bytes.
   889  //
   890  // For example, let's look at AADDL.  The optab line says:
   891  //
   892  //	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   893  //
   894  // and yaddl says
   895  //
   896  //	var yaddl = []ytab{
   897  //	        {Yi8, Ynone, Yml, Zibo_m, 2},
   898  //	        {Yi32, Ynone, Yax, Zil_, 1},
   899  //	        {Yi32, Ynone, Yml, Zilo_m, 2},
   900  //	        {Yrl, Ynone, Yml, Zr_m, 1},
   901  //	        {Yml, Ynone, Yrl, Zm_r, 1},
   902  //	}
   903  //
   904  // so there are 5 possible types of ADDL instruction that can be laid down, and
   905  // possible states used to lay them down (Ztype and z pointer, assuming z
   906  // points at opBytes{0x83, 00, 0x05,0x81, 00, 0x01, 0x03}) are:
   907  //
   908  //	Yi8, Yml -> Zibo_m, z (0x83, 00)
   909  //	Yi32, Yax -> Zil_, z+2 (0x05)
   910  //	Yi32, Yml -> Zilo_m, z+2+1 (0x81, 0x00)
   911  //	Yrl, Yml -> Zr_m, z+2+1+2 (0x01)
   912  //	Yml, Yrl -> Zm_r, z+2+1+2+1 (0x03)
   913  //
   914  // The Pconstant in the optab line controls the prefix bytes to emit.  That's
   915  // relatively straightforward as this program goes.
   916  //
   917  // The switch on yt.zcase in doasm implements the various Z cases.  Zibo_m, for
   918  // example, is an opcode byte (z[0]) then an asmando (which is some kind of
   919  // encoded addressing mode for the Yml arg), and then a single immediate byte.
   920  // Zilo_m is the same but a long (32-bit) immediate.
   921  var optab =
   922  // as, ytab, andproto, opcode
   923  [...]Optab{
   924  	{obj.AXXX, nil, 0, opBytes{}},
   925  	{AAAA, ynone, P32, opBytes{0x37}},
   926  	{AAAD, ynone, P32, opBytes{0xd5, 0x0a}},
   927  	{AAAM, ynone, P32, opBytes{0xd4, 0x0a}},
   928  	{AAAS, ynone, P32, opBytes{0x3f}},
   929  	{AADCB, yxorb, Pb, opBytes{0x14, 0x80, 02, 0x10, 0x12}},
   930  	{AADCL, yaddl, Px, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   931  	{AADCQ, yaddl, Pw, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   932  	{AADCW, yaddl, Pe, opBytes{0x83, 02, 0x15, 0x81, 02, 0x11, 0x13}},
   933  	{AADCXL, yml_rl, Pq4, opBytes{0xf6}},
   934  	{AADCXQ, yml_rl, Pq4w, opBytes{0xf6}},
   935  	{AADDB, yxorb, Pb, opBytes{0x04, 0x80, 00, 0x00, 0x02}},
   936  	{AADDL, yaddl, Px, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   937  	{AADDPD, yxm, Pq, opBytes{0x58}},
   938  	{AADDPS, yxm, Pm, opBytes{0x58}},
   939  	{AADDQ, yaddl, Pw, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   940  	{AADDSD, yxm, Pf2, opBytes{0x58}},
   941  	{AADDSS, yxm, Pf3, opBytes{0x58}},
   942  	{AADDSUBPD, yxm, Pq, opBytes{0xd0}},
   943  	{AADDSUBPS, yxm, Pf2, opBytes{0xd0}},
   944  	{AADDW, yaddl, Pe, opBytes{0x83, 00, 0x05, 0x81, 00, 0x01, 0x03}},
   945  	{AADOXL, yml_rl, Pq5, opBytes{0xf6}},
   946  	{AADOXQ, yml_rl, Pq5w, opBytes{0xf6}},
   947  	{AADJSP, nil, 0, opBytes{}},
   948  	{AANDB, yxorb, Pb, opBytes{0x24, 0x80, 04, 0x20, 0x22}},
   949  	{AANDL, yaddl, Px, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   950  	{AANDNPD, yxm, Pq, opBytes{0x55}},
   951  	{AANDNPS, yxm, Pm, opBytes{0x55}},
   952  	{AANDPD, yxm, Pq, opBytes{0x54}},
   953  	{AANDPS, yxm, Pm, opBytes{0x54}},
   954  	{AANDQ, yaddl, Pw, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   955  	{AANDW, yaddl, Pe, opBytes{0x83, 04, 0x25, 0x81, 04, 0x21, 0x23}},
   956  	{AARPL, yrl_ml, P32, opBytes{0x63}},
   957  	{ABOUNDL, yrl_m, P32, opBytes{0x62}},
   958  	{ABOUNDW, yrl_m, Pe, opBytes{0x62}},
   959  	{ABSFL, yml_rl, Pm, opBytes{0xbc}},
   960  	{ABSFQ, yml_rl, Pw, opBytes{0x0f, 0xbc}},
   961  	{ABSFW, yml_rl, Pq, opBytes{0xbc}},
   962  	{ABSRL, yml_rl, Pm, opBytes{0xbd}},
   963  	{ABSRQ, yml_rl, Pw, opBytes{0x0f, 0xbd}},
   964  	{ABSRW, yml_rl, Pq, opBytes{0xbd}},
   965  	{ABSWAPL, ybswap, Px, opBytes{0x0f, 0xc8}},
   966  	{ABSWAPQ, ybswap, Pw, opBytes{0x0f, 0xc8}},
   967  	{ABTCL, ybtl, Pm, opBytes{0xba, 07, 0xbb}},
   968  	{ABTCQ, ybtl, Pw, opBytes{0x0f, 0xba, 07, 0x0f, 0xbb}},
   969  	{ABTCW, ybtl, Pq, opBytes{0xba, 07, 0xbb}},
   970  	{ABTL, ybtl, Pm, opBytes{0xba, 04, 0xa3}},
   971  	{ABTQ, ybtl, Pw, opBytes{0x0f, 0xba, 04, 0x0f, 0xa3}},
   972  	{ABTRL, ybtl, Pm, opBytes{0xba, 06, 0xb3}},
   973  	{ABTRQ, ybtl, Pw, opBytes{0x0f, 0xba, 06, 0x0f, 0xb3}},
   974  	{ABTRW, ybtl, Pq, opBytes{0xba, 06, 0xb3}},
   975  	{ABTSL, ybtl, Pm, opBytes{0xba, 05, 0xab}},
   976  	{ABTSQ, ybtl, Pw, opBytes{0x0f, 0xba, 05, 0x0f, 0xab}},
   977  	{ABTSW, ybtl, Pq, opBytes{0xba, 05, 0xab}},
   978  	{ABTW, ybtl, Pq, opBytes{0xba, 04, 0xa3}},
   979  	{ABYTE, ybyte, Px, opBytes{1}},
   980  	{obj.ACALL, ycall, Px, opBytes{0xff, 02, 0xff, 0x15, 0xe8}},
   981  	{ACBW, ynone, Pe, opBytes{0x98}},
   982  	{ACDQ, ynone, Px, opBytes{0x99}},
   983  	{ACDQE, ynone, Pw, opBytes{0x98}},
   984  	{ACLAC, ynone, Pm, opBytes{01, 0xca}},
   985  	{ACLC, ynone, Px, opBytes{0xf8}},
   986  	{ACLD, ynone, Px, opBytes{0xfc}},
   987  	{ACLDEMOTE, yclflush, Pm, opBytes{0x1c, 00}},
   988  	{ACLFLUSH, yclflush, Pm, opBytes{0xae, 07}},
   989  	{ACLFLUSHOPT, yclflush, Pq, opBytes{0xae, 07}},
   990  	{ACLI, ynone, Px, opBytes{0xfa}},
   991  	{ACLTS, ynone, Pm, opBytes{0x06}},
   992  	{ACLWB, yclflush, Pq, opBytes{0xae, 06}},
   993  	{ACMC, ynone, Px, opBytes{0xf5}},
   994  	{ACMOVLCC, yml_rl, Pm, opBytes{0x43}},
   995  	{ACMOVLCS, yml_rl, Pm, opBytes{0x42}},
   996  	{ACMOVLEQ, yml_rl, Pm, opBytes{0x44}},
   997  	{ACMOVLGE, yml_rl, Pm, opBytes{0x4d}},
   998  	{ACMOVLGT, yml_rl, Pm, opBytes{0x4f}},
   999  	{ACMOVLHI, yml_rl, Pm, opBytes{0x47}},
  1000  	{ACMOVLLE, yml_rl, Pm, opBytes{0x4e}},
  1001  	{ACMOVLLS, yml_rl, Pm, opBytes{0x46}},
  1002  	{ACMOVLLT, yml_rl, Pm, opBytes{0x4c}},
  1003  	{ACMOVLMI, yml_rl, Pm, opBytes{0x48}},
  1004  	{ACMOVLNE, yml_rl, Pm, opBytes{0x45}},
  1005  	{ACMOVLOC, yml_rl, Pm, opBytes{0x41}},
  1006  	{ACMOVLOS, yml_rl, Pm, opBytes{0x40}},
  1007  	{ACMOVLPC, yml_rl, Pm, opBytes{0x4b}},
  1008  	{ACMOVLPL, yml_rl, Pm, opBytes{0x49}},
  1009  	{ACMOVLPS, yml_rl, Pm, opBytes{0x4a}},
  1010  	{ACMOVQCC, yml_rl, Pw, opBytes{0x0f, 0x43}},
  1011  	{ACMOVQCS, yml_rl, Pw, opBytes{0x0f, 0x42}},
  1012  	{ACMOVQEQ, yml_rl, Pw, opBytes{0x0f, 0x44}},
  1013  	{ACMOVQGE, yml_rl, Pw, opBytes{0x0f, 0x4d}},
  1014  	{ACMOVQGT, yml_rl, Pw, opBytes{0x0f, 0x4f}},
  1015  	{ACMOVQHI, yml_rl, Pw, opBytes{0x0f, 0x47}},
  1016  	{ACMOVQLE, yml_rl, Pw, opBytes{0x0f, 0x4e}},
  1017  	{ACMOVQLS, yml_rl, Pw, opBytes{0x0f, 0x46}},
  1018  	{ACMOVQLT, yml_rl, Pw, opBytes{0x0f, 0x4c}},
  1019  	{ACMOVQMI, yml_rl, Pw, opBytes{0x0f, 0x48}},
  1020  	{ACMOVQNE, yml_rl, Pw, opBytes{0x0f, 0x45}},
  1021  	{ACMOVQOC, yml_rl, Pw, opBytes{0x0f, 0x41}},
  1022  	{ACMOVQOS, yml_rl, Pw, opBytes{0x0f, 0x40}},
  1023  	{ACMOVQPC, yml_rl, Pw, opBytes{0x0f, 0x4b}},
  1024  	{ACMOVQPL, yml_rl, Pw, opBytes{0x0f, 0x49}},
  1025  	{ACMOVQPS, yml_rl, Pw, opBytes{0x0f, 0x4a}},
  1026  	{ACMOVWCC, yml_rl, Pq, opBytes{0x43}},
  1027  	{ACMOVWCS, yml_rl, Pq, opBytes{0x42}},
  1028  	{ACMOVWEQ, yml_rl, Pq, opBytes{0x44}},
  1029  	{ACMOVWGE, yml_rl, Pq, opBytes{0x4d}},
  1030  	{ACMOVWGT, yml_rl, Pq, opBytes{0x4f}},
  1031  	{ACMOVWHI, yml_rl, Pq, opBytes{0x47}},
  1032  	{ACMOVWLE, yml_rl, Pq, opBytes{0x4e}},
  1033  	{ACMOVWLS, yml_rl, Pq, opBytes{0x46}},
  1034  	{ACMOVWLT, yml_rl, Pq, opBytes{0x4c}},
  1035  	{ACMOVWMI, yml_rl, Pq, opBytes{0x48}},
  1036  	{ACMOVWNE, yml_rl, Pq, opBytes{0x45}},
  1037  	{ACMOVWOC, yml_rl, Pq, opBytes{0x41}},
  1038  	{ACMOVWOS, yml_rl, Pq, opBytes{0x40}},
  1039  	{ACMOVWPC, yml_rl, Pq, opBytes{0x4b}},
  1040  	{ACMOVWPL, yml_rl, Pq, opBytes{0x49}},
  1041  	{ACMOVWPS, yml_rl, Pq, opBytes{0x4a}},
  1042  	{ACMPB, ycmpb, Pb, opBytes{0x3c, 0x80, 07, 0x38, 0x3a}},
  1043  	{ACMPL, ycmpl, Px, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1044  	{ACMPPD, yxcmpi, Px, opBytes{Pe, 0xc2}},
  1045  	{ACMPPS, yxcmpi, Pm, opBytes{0xc2, 0}},
  1046  	{ACMPQ, ycmpl, Pw, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1047  	{ACMPSB, ynone, Pb, opBytes{0xa6}},
  1048  	{ACMPSD, yxcmpi, Px, opBytes{Pf2, 0xc2}},
  1049  	{ACMPSL, ynone, Px, opBytes{0xa7}},
  1050  	{ACMPSQ, ynone, Pw, opBytes{0xa7}},
  1051  	{ACMPSS, yxcmpi, Px, opBytes{Pf3, 0xc2}},
  1052  	{ACMPSW, ynone, Pe, opBytes{0xa7}},
  1053  	{ACMPW, ycmpl, Pe, opBytes{0x83, 07, 0x3d, 0x81, 07, 0x39, 0x3b}},
  1054  	{ACOMISD, yxm, Pe, opBytes{0x2f}},
  1055  	{ACOMISS, yxm, Pm, opBytes{0x2f}},
  1056  	{ACPUID, ynone, Pm, opBytes{0xa2}},
  1057  	{ACVTPL2PD, yxcvm2, Px, opBytes{Pf3, 0xe6, Pe, 0x2a}},
  1058  	{ACVTPL2PS, yxcvm2, Pm, opBytes{0x5b, 0, 0x2a, 0}},
  1059  	{ACVTPD2PL, yxcvm1, Px, opBytes{Pf2, 0xe6, Pe, 0x2d}},
  1060  	{ACVTPD2PS, yxm, Pe, opBytes{0x5a}},
  1061  	{ACVTPS2PL, yxcvm1, Px, opBytes{Pe, 0x5b, Pm, 0x2d}},
  1062  	{ACVTPS2PD, yxm, Pm, opBytes{0x5a}},
  1063  	{ACVTSD2SL, yxcvfl, Pf2, opBytes{0x2d}},
  1064  	{ACVTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2d}},
  1065  	{ACVTSD2SS, yxm, Pf2, opBytes{0x5a}},
  1066  	{ACVTSL2SD, yxcvlf, Pf2, opBytes{0x2a}},
  1067  	{ACVTSQ2SD, yxcvqf, Pw, opBytes{Pf2, 0x2a}},
  1068  	{ACVTSL2SS, yxcvlf, Pf3, opBytes{0x2a}},
  1069  	{ACVTSQ2SS, yxcvqf, Pw, opBytes{Pf3, 0x2a}},
  1070  	{ACVTSS2SD, yxm, Pf3, opBytes{0x5a}},
  1071  	{ACVTSS2SL, yxcvfl, Pf3, opBytes{0x2d}},
  1072  	{ACVTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2d}},
  1073  	{ACVTTPD2PL, yxcvm1, Px, opBytes{Pe, 0xe6, Pe, 0x2c}},
  1074  	{ACVTTPS2PL, yxcvm1, Px, opBytes{Pf3, 0x5b, Pm, 0x2c}},
  1075  	{ACVTTSD2SL, yxcvfl, Pf2, opBytes{0x2c}},
  1076  	{ACVTTSD2SQ, yxcvfq, Pw, opBytes{Pf2, 0x2c}},
  1077  	{ACVTTSS2SL, yxcvfl, Pf3, opBytes{0x2c}},
  1078  	{ACVTTSS2SQ, yxcvfq, Pw, opBytes{Pf3, 0x2c}},
  1079  	{ACWD, ynone, Pe, opBytes{0x99}},
  1080  	{ACWDE, ynone, Px, opBytes{0x98}},
  1081  	{ACQO, ynone, Pw, opBytes{0x99}},
  1082  	{ADAA, ynone, P32, opBytes{0x27}},
  1083  	{ADAS, ynone, P32, opBytes{0x2f}},
  1084  	{ADECB, yscond, Pb, opBytes{0xfe, 01}},
  1085  	{ADECL, yincl, Px1, opBytes{0x48, 0xff, 01}},
  1086  	{ADECQ, yincq, Pw, opBytes{0xff, 01}},
  1087  	{ADECW, yincq, Pe, opBytes{0xff, 01}},
  1088  	{ADIVB, ydivb, Pb, opBytes{0xf6, 06}},
  1089  	{ADIVL, ydivl, Px, opBytes{0xf7, 06}},
  1090  	{ADIVPD, yxm, Pe, opBytes{0x5e}},
  1091  	{ADIVPS, yxm, Pm, opBytes{0x5e}},
  1092  	{ADIVQ, ydivl, Pw, opBytes{0xf7, 06}},
  1093  	{ADIVSD, yxm, Pf2, opBytes{0x5e}},
  1094  	{ADIVSS, yxm, Pf3, opBytes{0x5e}},
  1095  	{ADIVW, ydivl, Pe, opBytes{0xf7, 06}},
  1096  	{ADPPD, yxshuf, Pq, opBytes{0x3a, 0x41, 0}},
  1097  	{ADPPS, yxshuf, Pq, opBytes{0x3a, 0x40, 0}},
  1098  	{AEMMS, ynone, Pm, opBytes{0x77}},
  1099  	{AENDBR64, ynone, Pf3, opBytes{0x1e, 0xfa}},
  1100  	{AEXTRACTPS, yextractps, Pq, opBytes{0x3a, 0x17, 0}},
  1101  	{AENTER, nil, 0, opBytes{}}, // botch
  1102  	{AFXRSTOR, ysvrs_mo, Pm, opBytes{0xae, 01, 0xae, 01}},
  1103  	{AFXSAVE, ysvrs_om, Pm, opBytes{0xae, 00, 0xae, 00}},
  1104  	{AFXRSTOR64, ysvrs_mo, Pw, opBytes{0x0f, 0xae, 01, 0x0f, 0xae, 01}},
  1105  	{AFXSAVE64, ysvrs_om, Pw, opBytes{0x0f, 0xae, 00, 0x0f, 0xae, 00}},
  1106  	{AHLT, ynone, Px, opBytes{0xf4}},
  1107  	{AIDIVB, ydivb, Pb, opBytes{0xf6, 07}},
  1108  	{AIDIVL, ydivl, Px, opBytes{0xf7, 07}},
  1109  	{AIDIVQ, ydivl, Pw, opBytes{0xf7, 07}},
  1110  	{AIDIVW, ydivl, Pe, opBytes{0xf7, 07}},
  1111  	{AIMULB, ydivb, Pb, opBytes{0xf6, 05}},
  1112  	{AIMULL, yimul, Px, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1113  	{AIMULQ, yimul, Pw, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1114  	{AIMULW, yimul, Pe, opBytes{0xf7, 05, 0x6b, 0x69, Pm, 0xaf}},
  1115  	{AIMUL3W, yimul3, Pe, opBytes{0x6b, 00, 0x69, 00}},
  1116  	{AIMUL3L, yimul3, Px, opBytes{0x6b, 00, 0x69, 00}},
  1117  	{AIMUL3Q, yimul3, Pw, opBytes{0x6b, 00, 0x69, 00}},
  1118  	{AINB, yin, Pb, opBytes{0xe4, 0xec}},
  1119  	{AINW, yin, Pe, opBytes{0xe5, 0xed}},
  1120  	{AINL, yin, Px, opBytes{0xe5, 0xed}},
  1121  	{AINCB, yscond, Pb, opBytes{0xfe, 00}},
  1122  	{AINCL, yincl, Px1, opBytes{0x40, 0xff, 00}},
  1123  	{AINCQ, yincq, Pw, opBytes{0xff, 00}},
  1124  	{AINCW, yincq, Pe, opBytes{0xff, 00}},
  1125  	{AINSB, ynone, Pb, opBytes{0x6c}},
  1126  	{AINSL, ynone, Px, opBytes{0x6d}},
  1127  	{AINSERTPS, yxshuf, Pq, opBytes{0x3a, 0x21, 0}},
  1128  	{AINSW, ynone, Pe, opBytes{0x6d}},
  1129  	{AICEBP, ynone, Px, opBytes{0xf1}},
  1130  	{AINT, yint, Px, opBytes{0xcd}},
  1131  	{AINTO, ynone, P32, opBytes{0xce}},
  1132  	{AIRETL, ynone, Px, opBytes{0xcf}},
  1133  	{AIRETQ, ynone, Pw, opBytes{0xcf}},
  1134  	{AIRETW, ynone, Pe, opBytes{0xcf}},
  1135  	{AJCC, yjcond, Px, opBytes{0x73, 0x83, 00}},
  1136  	{AJCS, yjcond, Px, opBytes{0x72, 0x82}},
  1137  	{AJCXZL, yloop, Px, opBytes{0xe3}},
  1138  	{AJCXZW, yloop, Px, opBytes{0xe3}},
  1139  	{AJCXZQ, yloop, Px, opBytes{0xe3}},
  1140  	{AJEQ, yjcond, Px, opBytes{0x74, 0x84}},
  1141  	{AJGE, yjcond, Px, opBytes{0x7d, 0x8d}},
  1142  	{AJGT, yjcond, Px, opBytes{0x7f, 0x8f}},
  1143  	{AJHI, yjcond, Px, opBytes{0x77, 0x87}},
  1144  	{AJLE, yjcond, Px, opBytes{0x7e, 0x8e}},
  1145  	{AJLS, yjcond, Px, opBytes{0x76, 0x86}},
  1146  	{AJLT, yjcond, Px, opBytes{0x7c, 0x8c}},
  1147  	{AJMI, yjcond, Px, opBytes{0x78, 0x88}},
  1148  	{obj.AJMP, yjmp, Px, opBytes{0xff, 04, 0xeb, 0xe9}},
  1149  	{AJNE, yjcond, Px, opBytes{0x75, 0x85}},
  1150  	{AJOC, yjcond, Px, opBytes{0x71, 0x81, 00}},
  1151  	{AJOS, yjcond, Px, opBytes{0x70, 0x80, 00}},
  1152  	{AJPC, yjcond, Px, opBytes{0x7b, 0x8b}},
  1153  	{AJPL, yjcond, Px, opBytes{0x79, 0x89}},
  1154  	{AJPS, yjcond, Px, opBytes{0x7a, 0x8a}},
  1155  	{AHADDPD, yxm, Pq, opBytes{0x7c}},
  1156  	{AHADDPS, yxm, Pf2, opBytes{0x7c}},
  1157  	{AHSUBPD, yxm, Pq, opBytes{0x7d}},
  1158  	{AHSUBPS, yxm, Pf2, opBytes{0x7d}},
  1159  	{ALAHF, ynone, Px, opBytes{0x9f}},
  1160  	{ALARL, yml_rl, Pm, opBytes{0x02}},
  1161  	{ALARQ, yml_rl, Pw, opBytes{0x0f, 0x02}},
  1162  	{ALARW, yml_rl, Pq, opBytes{0x02}},
  1163  	{ALDDQU, ylddqu, Pf2, opBytes{0xf0}},
  1164  	{ALDMXCSR, ysvrs_mo, Pm, opBytes{0xae, 02, 0xae, 02}},
  1165  	{ALEAL, ym_rl, Px, opBytes{0x8d}},
  1166  	{ALEAQ, ym_rl, Pw, opBytes{0x8d}},
  1167  	{ALEAVEL, ynone, P32, opBytes{0xc9}},
  1168  	{ALEAVEQ, ynone, Py, opBytes{0xc9}},
  1169  	{ALEAVEW, ynone, Pe, opBytes{0xc9}},
  1170  	{ALEAW, ym_rl, Pe, opBytes{0x8d}},
  1171  	{ALOCK, ynone, Px, opBytes{0xf0}},
  1172  	{ALODSB, ynone, Pb, opBytes{0xac}},
  1173  	{ALODSL, ynone, Px, opBytes{0xad}},
  1174  	{ALODSQ, ynone, Pw, opBytes{0xad}},
  1175  	{ALODSW, ynone, Pe, opBytes{0xad}},
  1176  	{ALONG, ybyte, Px, opBytes{4}},
  1177  	{ALOOP, yloop, Px, opBytes{0xe2}},
  1178  	{ALOOPEQ, yloop, Px, opBytes{0xe1}},
  1179  	{ALOOPNE, yloop, Px, opBytes{0xe0}},
  1180  	{ALTR, ydivl, Pm, opBytes{0x00, 03}},
  1181  	{ALZCNTL, yml_rl, Pf3, opBytes{0xbd}},
  1182  	{ALZCNTQ, yml_rl, Pfw, opBytes{0xbd}},
  1183  	{ALZCNTW, yml_rl, Pef3, opBytes{0xbd}},
  1184  	{ALSLL, yml_rl, Pm, opBytes{0x03}},
  1185  	{ALSLW, yml_rl, Pq, opBytes{0x03}},
  1186  	{ALSLQ, yml_rl, Pw, opBytes{0x0f, 0x03}},
  1187  	{AMASKMOVOU, yxr, Pe, opBytes{0xf7}},
  1188  	{AMASKMOVQ, ymr, Pm, opBytes{0xf7}},
  1189  	{AMAXPD, yxm, Pe, opBytes{0x5f}},
  1190  	{AMAXPS, yxm, Pm, opBytes{0x5f}},
  1191  	{AMAXSD, yxm, Pf2, opBytes{0x5f}},
  1192  	{AMAXSS, yxm, Pf3, opBytes{0x5f}},
  1193  	{AMINPD, yxm, Pe, opBytes{0x5d}},
  1194  	{AMINPS, yxm, Pm, opBytes{0x5d}},
  1195  	{AMINSD, yxm, Pf2, opBytes{0x5d}},
  1196  	{AMINSS, yxm, Pf3, opBytes{0x5d}},
  1197  	{AMONITOR, ynone, Px, opBytes{0x0f, 0x01, 0xc8, 0}},
  1198  	{AMWAIT, ynone, Px, opBytes{0x0f, 0x01, 0xc9, 0}},
  1199  	{AMOVAPD, yxmov, Pe, opBytes{0x28, 0x29}},
  1200  	{AMOVAPS, yxmov, Pm, opBytes{0x28, 0x29}},
  1201  	{AMOVB, ymovb, Pb, opBytes{0x88, 0x8a, 0xb0, 0xc6, 00}},
  1202  	{AMOVBLSX, ymb_rl, Pm, opBytes{0xbe}},
  1203  	{AMOVBLZX, ymb_rl, Pm, opBytes{0xb6}},
  1204  	{AMOVBQSX, ymb_rl, Pw, opBytes{0x0f, 0xbe}},
  1205  	{AMOVBQZX, ymb_rl, Pw, opBytes{0x0f, 0xb6}},
  1206  	{AMOVBWSX, ymb_rl, Pq, opBytes{0xbe}},
  1207  	{AMOVSWW, ymb_rl, Pe, opBytes{0x0f, 0xbf}},
  1208  	{AMOVBWZX, ymb_rl, Pq, opBytes{0xb6}},
  1209  	{AMOVZWW, ymb_rl, Pe, opBytes{0x0f, 0xb7}},
  1210  	{AMOVO, yxmov, Pe, opBytes{0x6f, 0x7f}},
  1211  	{AMOVOU, yxmov, Pf3, opBytes{0x6f, 0x7f}},
  1212  	{AMOVHLPS, yxr, Pm, opBytes{0x12}},
  1213  	{AMOVHPD, yxmov, Pe, opBytes{0x16, 0x17}},
  1214  	{AMOVHPS, yxmov, Pm, opBytes{0x16, 0x17}},
  1215  	{AMOVL, ymovl, Px, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1216  	{AMOVLHPS, yxr, Pm, opBytes{0x16}},
  1217  	{AMOVLPD, yxmov, Pe, opBytes{0x12, 0x13}},
  1218  	{AMOVLPS, yxmov, Pm, opBytes{0x12, 0x13}},
  1219  	{AMOVLQSX, yml_rl, Pw, opBytes{0x63}},
  1220  	{AMOVLQZX, yml_rl, Px, opBytes{0x8b}},
  1221  	{AMOVMSKPD, yxrrl, Pq, opBytes{0x50}},
  1222  	{AMOVMSKPS, yxrrl, Pm, opBytes{0x50}},
  1223  	{AMOVNTO, yxr_ml, Pe, opBytes{0xe7}},
  1224  	{AMOVNTDQA, ylddqu, Pq4, opBytes{0x2a}},
  1225  	{AMOVNTPD, yxr_ml, Pe, opBytes{0x2b}},
  1226  	{AMOVNTPS, yxr_ml, Pm, opBytes{0x2b}},
  1227  	{AMOVNTQ, ymr_ml, Pm, opBytes{0xe7}},
  1228  	{AMOVQ, ymovq, Pw8, opBytes{0x6f, 0x7f, Pf2, 0xd6, Pf3, 0x7e, Pe, 0xd6, 0x89, 0x8b, 0xc7, 00, 0xb8, 0xc7, 00, 0x6e, 0x7e, Pe, 0x6e, Pe, 0x7e, 0}},
  1229  	{AMOVQOZX, ymrxr, Pf3, opBytes{0xd6, 0x7e}},
  1230  	{AMOVSB, ynone, Pb, opBytes{0xa4}},
  1231  	{AMOVSD, yxmov, Pf2, opBytes{0x10, 0x11}},
  1232  	{AMOVSL, ynone, Px, opBytes{0xa5}},
  1233  	{AMOVSQ, ynone, Pw, opBytes{0xa5}},
  1234  	{AMOVSS, yxmov, Pf3, opBytes{0x10, 0x11}},
  1235  	{AMOVSW, ynone, Pe, opBytes{0xa5}},
  1236  	{AMOVUPD, yxmov, Pe, opBytes{0x10, 0x11}},
  1237  	{AMOVUPS, yxmov, Pm, opBytes{0x10, 0x11}},
  1238  	{AMOVW, ymovw, Pe, opBytes{0x89, 0x8b, 0xb8, 0xc7, 00, 0}},
  1239  	{AMOVWLSX, yml_rl, Pm, opBytes{0xbf}},
  1240  	{AMOVWLZX, yml_rl, Pm, opBytes{0xb7}},
  1241  	{AMOVWQSX, yml_rl, Pw, opBytes{0x0f, 0xbf}},
  1242  	{AMOVWQZX, yml_rl, Pw, opBytes{0x0f, 0xb7}},
  1243  	{AMPSADBW, yxshuf, Pq, opBytes{0x3a, 0x42, 0}},
  1244  	{AMULB, ydivb, Pb, opBytes{0xf6, 04}},
  1245  	{AMULL, ydivl, Px, opBytes{0xf7, 04}},
  1246  	{AMULPD, yxm, Pe, opBytes{0x59}},
  1247  	{AMULPS, yxm, Ym, opBytes{0x59}},
  1248  	{AMULQ, ydivl, Pw, opBytes{0xf7, 04}},
  1249  	{AMULSD, yxm, Pf2, opBytes{0x59}},
  1250  	{AMULSS, yxm, Pf3, opBytes{0x59}},
  1251  	{AMULW, ydivl, Pe, opBytes{0xf7, 04}},
  1252  	{ANEGB, yscond, Pb, opBytes{0xf6, 03}},
  1253  	{ANEGL, yscond, Px, opBytes{0xf7, 03}},
  1254  	{ANEGQ, yscond, Pw, opBytes{0xf7, 03}},
  1255  	{ANEGW, yscond, Pe, opBytes{0xf7, 03}},
  1256  	{obj.ANOP, ynop, Px, opBytes{0, 0}},
  1257  	{ANOTB, yscond, Pb, opBytes{0xf6, 02}},
  1258  	{ANOTL, yscond, Px, opBytes{0xf7, 02}}, // TODO(rsc): yscond is wrong here.
  1259  	{ANOTQ, yscond, Pw, opBytes{0xf7, 02}},
  1260  	{ANOTW, yscond, Pe, opBytes{0xf7, 02}},
  1261  	{AORB, yxorb, Pb, opBytes{0x0c, 0x80, 01, 0x08, 0x0a}},
  1262  	{AORL, yaddl, Px, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1263  	{AORPD, yxm, Pq, opBytes{0x56}},
  1264  	{AORPS, yxm, Pm, opBytes{0x56}},
  1265  	{AORQ, yaddl, Pw, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1266  	{AORW, yaddl, Pe, opBytes{0x83, 01, 0x0d, 0x81, 01, 0x09, 0x0b}},
  1267  	{AOUTB, yin, Pb, opBytes{0xe6, 0xee}},
  1268  	{AOUTL, yin, Px, opBytes{0xe7, 0xef}},
  1269  	{AOUTW, yin, Pe, opBytes{0xe7, 0xef}},
  1270  	{AOUTSB, ynone, Pb, opBytes{0x6e}},
  1271  	{AOUTSL, ynone, Px, opBytes{0x6f}},
  1272  	{AOUTSW, ynone, Pe, opBytes{0x6f}},
  1273  	{APABSB, yxm_q4, Pq4, opBytes{0x1c}},
  1274  	{APABSD, yxm_q4, Pq4, opBytes{0x1e}},
  1275  	{APABSW, yxm_q4, Pq4, opBytes{0x1d}},
  1276  	{APACKSSLW, ymm, Py1, opBytes{0x6b, Pe, 0x6b}},
  1277  	{APACKSSWB, ymm, Py1, opBytes{0x63, Pe, 0x63}},
  1278  	{APACKUSDW, yxm_q4, Pq4, opBytes{0x2b}},
  1279  	{APACKUSWB, ymm, Py1, opBytes{0x67, Pe, 0x67}},
  1280  	{APADDB, ymm, Py1, opBytes{0xfc, Pe, 0xfc}},
  1281  	{APADDL, ymm, Py1, opBytes{0xfe, Pe, 0xfe}},
  1282  	{APADDQ, yxm, Pe, opBytes{0xd4}},
  1283  	{APADDSB, ymm, Py1, opBytes{0xec, Pe, 0xec}},
  1284  	{APADDSW, ymm, Py1, opBytes{0xed, Pe, 0xed}},
  1285  	{APADDUSB, ymm, Py1, opBytes{0xdc, Pe, 0xdc}},
  1286  	{APADDUSW, ymm, Py1, opBytes{0xdd, Pe, 0xdd}},
  1287  	{APADDW, ymm, Py1, opBytes{0xfd, Pe, 0xfd}},
  1288  	{APALIGNR, ypalignr, Pq, opBytes{0x3a, 0x0f}},
  1289  	{APAND, ymm, Py1, opBytes{0xdb, Pe, 0xdb}},
  1290  	{APANDN, ymm, Py1, opBytes{0xdf, Pe, 0xdf}},
  1291  	{APAUSE, ynone, Px, opBytes{0xf3, 0x90}},
  1292  	{APAVGB, ymm, Py1, opBytes{0xe0, Pe, 0xe0}},
  1293  	{APAVGW, ymm, Py1, opBytes{0xe3, Pe, 0xe3}},
  1294  	{APBLENDW, yxshuf, Pq, opBytes{0x3a, 0x0e, 0}},
  1295  	{APCMPEQB, ymm, Py1, opBytes{0x74, Pe, 0x74}},
  1296  	{APCMPEQL, ymm, Py1, opBytes{0x76, Pe, 0x76}},
  1297  	{APCMPEQQ, yxm_q4, Pq4, opBytes{0x29}},
  1298  	{APCMPEQW, ymm, Py1, opBytes{0x75, Pe, 0x75}},
  1299  	{APCMPGTB, ymm, Py1, opBytes{0x64, Pe, 0x64}},
  1300  	{APCMPGTL, ymm, Py1, opBytes{0x66, Pe, 0x66}},
  1301  	{APCMPGTQ, yxm_q4, Pq4, opBytes{0x37}},
  1302  	{APCMPGTW, ymm, Py1, opBytes{0x65, Pe, 0x65}},
  1303  	{APCMPISTRI, yxshuf, Pq, opBytes{0x3a, 0x63, 0}},
  1304  	{APCMPISTRM, yxshuf, Pq, opBytes{0x3a, 0x62, 0}},
  1305  	{APEXTRW, yextrw, Pq, opBytes{0xc5, 0, 0x3a, 0x15, 0}},
  1306  	{APEXTRB, yextr, Pq, opBytes{0x3a, 0x14, 00}},
  1307  	{APEXTRD, yextr, Pq, opBytes{0x3a, 0x16, 00}},
  1308  	{APEXTRQ, yextr, Pq3, opBytes{0x3a, 0x16, 00}},
  1309  	{APHADDD, ymmxmm0f38, Px, opBytes{0x0F, 0x38, 0x02, 0, 0x66, 0x0F, 0x38, 0x02, 0}},
  1310  	{APHADDSW, yxm_q4, Pq4, opBytes{0x03}},
  1311  	{APHADDW, yxm_q4, Pq4, opBytes{0x01}},
  1312  	{APHMINPOSUW, yxm_q4, Pq4, opBytes{0x41}},
  1313  	{APHSUBD, yxm_q4, Pq4, opBytes{0x06}},
  1314  	{APHSUBSW, yxm_q4, Pq4, opBytes{0x07}},
  1315  	{APHSUBW, yxm_q4, Pq4, opBytes{0x05}},
  1316  	{APINSRW, yinsrw, Pq, opBytes{0xc4, 00}},
  1317  	{APINSRB, yinsr, Pq, opBytes{0x3a, 0x20, 00}},
  1318  	{APINSRD, yinsr, Pq, opBytes{0x3a, 0x22, 00}},
  1319  	{APINSRQ, yinsr, Pq3, opBytes{0x3a, 0x22, 00}},
  1320  	{APMADDUBSW, yxm_q4, Pq4, opBytes{0x04}},
  1321  	{APMADDWL, ymm, Py1, opBytes{0xf5, Pe, 0xf5}},
  1322  	{APMAXSB, yxm_q4, Pq4, opBytes{0x3c}},
  1323  	{APMAXSD, yxm_q4, Pq4, opBytes{0x3d}},
  1324  	{APMAXSW, yxm, Pe, opBytes{0xee}},
  1325  	{APMAXUB, yxm, Pe, opBytes{0xde}},
  1326  	{APMAXUD, yxm_q4, Pq4, opBytes{0x3f}},
  1327  	{APMAXUW, yxm_q4, Pq4, opBytes{0x3e}},
  1328  	{APMINSB, yxm_q4, Pq4, opBytes{0x38}},
  1329  	{APMINSD, yxm_q4, Pq4, opBytes{0x39}},
  1330  	{APMINSW, yxm, Pe, opBytes{0xea}},
  1331  	{APMINUB, yxm, Pe, opBytes{0xda}},
  1332  	{APMINUD, yxm_q4, Pq4, opBytes{0x3b}},
  1333  	{APMINUW, yxm_q4, Pq4, opBytes{0x3a}},
  1334  	{APMOVMSKB, ymskb, Px, opBytes{Pe, 0xd7, 0xd7}},
  1335  	{APMOVSXBD, yxm_q4, Pq4, opBytes{0x21}},
  1336  	{APMOVSXBQ, yxm_q4, Pq4, opBytes{0x22}},
  1337  	{APMOVSXBW, yxm_q4, Pq4, opBytes{0x20}},
  1338  	{APMOVSXDQ, yxm_q4, Pq4, opBytes{0x25}},
  1339  	{APMOVSXWD, yxm_q4, Pq4, opBytes{0x23}},
  1340  	{APMOVSXWQ, yxm_q4, Pq4, opBytes{0x24}},
  1341  	{APMOVZXBD, yxm_q4, Pq4, opBytes{0x31}},
  1342  	{APMOVZXBQ, yxm_q4, Pq4, opBytes{0x32}},
  1343  	{APMOVZXBW, yxm_q4, Pq4, opBytes{0x30}},
  1344  	{APMOVZXDQ, yxm_q4, Pq4, opBytes{0x35}},
  1345  	{APMOVZXWD, yxm_q4, Pq4, opBytes{0x33}},
  1346  	{APMOVZXWQ, yxm_q4, Pq4, opBytes{0x34}},
  1347  	{APMULDQ, yxm_q4, Pq4, opBytes{0x28}},
  1348  	{APMULHRSW, yxm_q4, Pq4, opBytes{0x0b}},
  1349  	{APMULHUW, ymm, Py1, opBytes{0xe4, Pe, 0xe4}},
  1350  	{APMULHW, ymm, Py1, opBytes{0xe5, Pe, 0xe5}},
  1351  	{APMULLD, yxm_q4, Pq4, opBytes{0x40}},
  1352  	{APMULLW, ymm, Py1, opBytes{0xd5, Pe, 0xd5}},
  1353  	{APMULULQ, ymm, Py1, opBytes{0xf4, Pe, 0xf4}},
  1354  	{APOPAL, ynone, P32, opBytes{0x61}},
  1355  	{APOPAW, ynone, Pe, opBytes{0x61}},
  1356  	{APOPCNTW, yml_rl, Pef3, opBytes{0xb8}},
  1357  	{APOPCNTL, yml_rl, Pf3, opBytes{0xb8}},
  1358  	{APOPCNTQ, yml_rl, Pfw, opBytes{0xb8}},
  1359  	{APOPFL, ynone, P32, opBytes{0x9d}},
  1360  	{APOPFQ, ynone, Py, opBytes{0x9d}},
  1361  	{APOPFW, ynone, Pe, opBytes{0x9d}},
  1362  	{APOPL, ypopl, P32, opBytes{0x58, 0x8f, 00}},
  1363  	{APOPQ, ypopl, Py, opBytes{0x58, 0x8f, 00}},
  1364  	{APOPW, ypopl, Pe, opBytes{0x58, 0x8f, 00}},
  1365  	{APOR, ymm, Py1, opBytes{0xeb, Pe, 0xeb}},
  1366  	{APSADBW, yxm, Pq, opBytes{0xf6}},
  1367  	{APSHUFHW, yxshuf, Pf3, opBytes{0x70, 00}},
  1368  	{APSHUFL, yxshuf, Pq, opBytes{0x70, 00}},
  1369  	{APSHUFLW, yxshuf, Pf2, opBytes{0x70, 00}},
  1370  	{APSHUFW, ymshuf, Pm, opBytes{0x70, 00}},
  1371  	{APSHUFB, ymshufb, Pq, opBytes{0x38, 0x00}},
  1372  	{APSIGNB, yxm_q4, Pq4, opBytes{0x08}},
  1373  	{APSIGND, yxm_q4, Pq4, opBytes{0x0a}},
  1374  	{APSIGNW, yxm_q4, Pq4, opBytes{0x09}},
  1375  	{APSLLO, ypsdq, Pq, opBytes{0x73, 07}},
  1376  	{APSLLL, yps, Py3, opBytes{0xf2, 0x72, 06, Pe, 0xf2, Pe, 0x72, 06}},
  1377  	{APSLLQ, yps, Py3, opBytes{0xf3, 0x73, 06, Pe, 0xf3, Pe, 0x73, 06}},
  1378  	{APSLLW, yps, Py3, opBytes{0xf1, 0x71, 06, Pe, 0xf1, Pe, 0x71, 06}},
  1379  	{APSRAL, yps, Py3, opBytes{0xe2, 0x72, 04, Pe, 0xe2, Pe, 0x72, 04}},
  1380  	{APSRAW, yps, Py3, opBytes{0xe1, 0x71, 04, Pe, 0xe1, Pe, 0x71, 04}},
  1381  	{APSRLO, ypsdq, Pq, opBytes{0x73, 03}},
  1382  	{APSRLL, yps, Py3, opBytes{0xd2, 0x72, 02, Pe, 0xd2, Pe, 0x72, 02}},
  1383  	{APSRLQ, yps, Py3, opBytes{0xd3, 0x73, 02, Pe, 0xd3, Pe, 0x73, 02}},
  1384  	{APSRLW, yps, Py3, opBytes{0xd1, 0x71, 02, Pe, 0xd1, Pe, 0x71, 02}},
  1385  	{APSUBB, yxm, Pe, opBytes{0xf8}},
  1386  	{APSUBL, yxm, Pe, opBytes{0xfa}},
  1387  	{APSUBQ, yxm, Pe, opBytes{0xfb}},
  1388  	{APSUBSB, yxm, Pe, opBytes{0xe8}},
  1389  	{APSUBSW, yxm, Pe, opBytes{0xe9}},
  1390  	{APSUBUSB, yxm, Pe, opBytes{0xd8}},
  1391  	{APSUBUSW, yxm, Pe, opBytes{0xd9}},
  1392  	{APSUBW, yxm, Pe, opBytes{0xf9}},
  1393  	{APTEST, yxm_q4, Pq4, opBytes{0x17}},
  1394  	{APUNPCKHBW, ymm, Py1, opBytes{0x68, Pe, 0x68}},
  1395  	{APUNPCKHLQ, ymm, Py1, opBytes{0x6a, Pe, 0x6a}},
  1396  	{APUNPCKHQDQ, yxm, Pe, opBytes{0x6d}},
  1397  	{APUNPCKHWL, ymm, Py1, opBytes{0x69, Pe, 0x69}},
  1398  	{APUNPCKLBW, ymm, Py1, opBytes{0x60, Pe, 0x60}},
  1399  	{APUNPCKLLQ, ymm, Py1, opBytes{0x62, Pe, 0x62}},
  1400  	{APUNPCKLQDQ, yxm, Pe, opBytes{0x6c}},
  1401  	{APUNPCKLWL, ymm, Py1, opBytes{0x61, Pe, 0x61}},
  1402  	{APUSHAL, ynone, P32, opBytes{0x60}},
  1403  	{APUSHAW, ynone, Pe, opBytes{0x60}},
  1404  	{APUSHFL, ynone, P32, opBytes{0x9c}},
  1405  	{APUSHFQ, ynone, Py, opBytes{0x9c}},
  1406  	{APUSHFW, ynone, Pe, opBytes{0x9c}},
  1407  	{APUSHL, ypushl, P32, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1408  	{APUSHQ, ypushl, Py, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1409  	{APUSHW, ypushl, Pe, opBytes{0x50, 0xff, 06, 0x6a, 0x68}},
  1410  	{APXOR, ymm, Py1, opBytes{0xef, Pe, 0xef}},
  1411  	{AQUAD, ybyte, Px, opBytes{8}},
  1412  	{ARCLB, yshb, Pb, opBytes{0xd0, 02, 0xc0, 02, 0xd2, 02}},
  1413  	{ARCLL, yshl, Px, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1414  	{ARCLQ, yshl, Pw, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1415  	{ARCLW, yshl, Pe, opBytes{0xd1, 02, 0xc1, 02, 0xd3, 02, 0xd3, 02}},
  1416  	{ARCPPS, yxm, Pm, opBytes{0x53}},
  1417  	{ARCPSS, yxm, Pf3, opBytes{0x53}},
  1418  	{ARCRB, yshb, Pb, opBytes{0xd0, 03, 0xc0, 03, 0xd2, 03}},
  1419  	{ARCRL, yshl, Px, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1420  	{ARCRQ, yshl, Pw, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1421  	{ARCRW, yshl, Pe, opBytes{0xd1, 03, 0xc1, 03, 0xd3, 03, 0xd3, 03}},
  1422  	{AREP, ynone, Px, opBytes{0xf3}},
  1423  	{AREPN, ynone, Px, opBytes{0xf2}},
  1424  	{obj.ARET, ynone, Px, opBytes{0xc3}},
  1425  	{ARETFW, yret, Pe, opBytes{0xcb, 0xca}},
  1426  	{ARETFL, yret, Px, opBytes{0xcb, 0xca}},
  1427  	{ARETFQ, yret, Pw, opBytes{0xcb, 0xca}},
  1428  	{AROLB, yshb, Pb, opBytes{0xd0, 00, 0xc0, 00, 0xd2, 00}},
  1429  	{AROLL, yshl, Px, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1430  	{AROLQ, yshl, Pw, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1431  	{AROLW, yshl, Pe, opBytes{0xd1, 00, 0xc1, 00, 0xd3, 00, 0xd3, 00}},
  1432  	{ARORB, yshb, Pb, opBytes{0xd0, 01, 0xc0, 01, 0xd2, 01}},
  1433  	{ARORL, yshl, Px, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1434  	{ARORQ, yshl, Pw, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1435  	{ARORW, yshl, Pe, opBytes{0xd1, 01, 0xc1, 01, 0xd3, 01, 0xd3, 01}},
  1436  	{ARSQRTPS, yxm, Pm, opBytes{0x52}},
  1437  	{ARSQRTSS, yxm, Pf3, opBytes{0x52}},
  1438  	{ASAHF, ynone, Px, opBytes{0x9e, 00, 0x86, 0xe0, 0x50, 0x9d}}, // XCHGB AH,AL; PUSH AX; POPFL
  1439  	{ASALB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1440  	{ASALL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1441  	{ASALQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1442  	{ASALW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1443  	{ASARB, yshb, Pb, opBytes{0xd0, 07, 0xc0, 07, 0xd2, 07}},
  1444  	{ASARL, yshl, Px, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1445  	{ASARQ, yshl, Pw, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1446  	{ASARW, yshl, Pe, opBytes{0xd1, 07, 0xc1, 07, 0xd3, 07, 0xd3, 07}},
  1447  	{ASBBB, yxorb, Pb, opBytes{0x1c, 0x80, 03, 0x18, 0x1a}},
  1448  	{ASBBL, yaddl, Px, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1449  	{ASBBQ, yaddl, Pw, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1450  	{ASBBW, yaddl, Pe, opBytes{0x83, 03, 0x1d, 0x81, 03, 0x19, 0x1b}},
  1451  	{ASCASB, ynone, Pb, opBytes{0xae}},
  1452  	{ASCASL, ynone, Px, opBytes{0xaf}},
  1453  	{ASCASQ, ynone, Pw, opBytes{0xaf}},
  1454  	{ASCASW, ynone, Pe, opBytes{0xaf}},
  1455  	{ASETCC, yscond, Pb, opBytes{0x0f, 0x93, 00}},
  1456  	{ASETCS, yscond, Pb, opBytes{0x0f, 0x92, 00}},
  1457  	{ASETEQ, yscond, Pb, opBytes{0x0f, 0x94, 00}},
  1458  	{ASETGE, yscond, Pb, opBytes{0x0f, 0x9d, 00}},
  1459  	{ASETGT, yscond, Pb, opBytes{0x0f, 0x9f, 00}},
  1460  	{ASETHI, yscond, Pb, opBytes{0x0f, 0x97, 00}},
  1461  	{ASETLE, yscond, Pb, opBytes{0x0f, 0x9e, 00}},
  1462  	{ASETLS, yscond, Pb, opBytes{0x0f, 0x96, 00}},
  1463  	{ASETLT, yscond, Pb, opBytes{0x0f, 0x9c, 00}},
  1464  	{ASETMI, yscond, Pb, opBytes{0x0f, 0x98, 00}},
  1465  	{ASETNE, yscond, Pb, opBytes{0x0f, 0x95, 00}},
  1466  	{ASETOC, yscond, Pb, opBytes{0x0f, 0x91, 00}},
  1467  	{ASETOS, yscond, Pb, opBytes{0x0f, 0x90, 00}},
  1468  	{ASETPC, yscond, Pb, opBytes{0x0f, 0x9b, 00}},
  1469  	{ASETPL, yscond, Pb, opBytes{0x0f, 0x99, 00}},
  1470  	{ASETPS, yscond, Pb, opBytes{0x0f, 0x9a, 00}},
  1471  	{ASHLB, yshb, Pb, opBytes{0xd0, 04, 0xc0, 04, 0xd2, 04}},
  1472  	{ASHLL, yshl, Px, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1473  	{ASHLQ, yshl, Pw, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1474  	{ASHLW, yshl, Pe, opBytes{0xd1, 04, 0xc1, 04, 0xd3, 04, 0xd3, 04}},
  1475  	{ASHRB, yshb, Pb, opBytes{0xd0, 05, 0xc0, 05, 0xd2, 05}},
  1476  	{ASHRL, yshl, Px, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1477  	{ASHRQ, yshl, Pw, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1478  	{ASHRW, yshl, Pe, opBytes{0xd1, 05, 0xc1, 05, 0xd3, 05, 0xd3, 05}},
  1479  	{ASHUFPD, yxshuf, Pq, opBytes{0xc6, 00}},
  1480  	{ASHUFPS, yxshuf, Pm, opBytes{0xc6, 00}},
  1481  	{ASQRTPD, yxm, Pe, opBytes{0x51}},
  1482  	{ASQRTPS, yxm, Pm, opBytes{0x51}},
  1483  	{ASQRTSD, yxm, Pf2, opBytes{0x51}},
  1484  	{ASQRTSS, yxm, Pf3, opBytes{0x51}},
  1485  	{ASTC, ynone, Px, opBytes{0xf9}},
  1486  	{ASTD, ynone, Px, opBytes{0xfd}},
  1487  	{ASTI, ynone, Px, opBytes{0xfb}},
  1488  	{ASTMXCSR, ysvrs_om, Pm, opBytes{0xae, 03, 0xae, 03}},
  1489  	{ASTOSB, ynone, Pb, opBytes{0xaa}},
  1490  	{ASTOSL, ynone, Px, opBytes{0xab}},
  1491  	{ASTOSQ, ynone, Pw, opBytes{0xab}},
  1492  	{ASTOSW, ynone, Pe, opBytes{0xab}},
  1493  	{ASUBB, yxorb, Pb, opBytes{0x2c, 0x80, 05, 0x28, 0x2a}},
  1494  	{ASUBL, yaddl, Px, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1495  	{ASUBPD, yxm, Pe, opBytes{0x5c}},
  1496  	{ASUBPS, yxm, Pm, opBytes{0x5c}},
  1497  	{ASUBQ, yaddl, Pw, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1498  	{ASUBSD, yxm, Pf2, opBytes{0x5c}},
  1499  	{ASUBSS, yxm, Pf3, opBytes{0x5c}},
  1500  	{ASUBW, yaddl, Pe, opBytes{0x83, 05, 0x2d, 0x81, 05, 0x29, 0x2b}},
  1501  	{ASWAPGS, ynone, Pm, opBytes{0x01, 0xf8}},
  1502  	{ASYSCALL, ynone, Px, opBytes{0x0f, 0x05}}, // fast syscall
  1503  	{ATESTB, yxorb, Pb, opBytes{0xa8, 0xf6, 00, 0x84, 0x84}},
  1504  	{ATESTL, ytestl, Px, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1505  	{ATESTQ, ytestl, Pw, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1506  	{ATESTW, ytestl, Pe, opBytes{0xa9, 0xf7, 00, 0x85, 0x85}},
  1507  	{ATPAUSE, ywrfsbase, Pq, opBytes{0xae, 06}},
  1508  	{obj.ATEXT, ytext, Px, opBytes{}},
  1509  	{AUCOMISD, yxm, Pe, opBytes{0x2e}},
  1510  	{AUCOMISS, yxm, Pm, opBytes{0x2e}},
  1511  	{AUNPCKHPD, yxm, Pe, opBytes{0x15}},
  1512  	{AUNPCKHPS, yxm, Pm, opBytes{0x15}},
  1513  	{AUNPCKLPD, yxm, Pe, opBytes{0x14}},
  1514  	{AUNPCKLPS, yxm, Pm, opBytes{0x14}},
  1515  	{AUMONITOR, ywrfsbase, Pf3, opBytes{0xae, 06}},
  1516  	{AVERR, ydivl, Pm, opBytes{0x00, 04}},
  1517  	{AVERW, ydivl, Pm, opBytes{0x00, 05}},
  1518  	{AWAIT, ynone, Px, opBytes{0x9b}},
  1519  	{AWORD, ybyte, Px, opBytes{2}},
  1520  	{AXCHGB, yml_mb, Pb, opBytes{0x86, 0x86}},
  1521  	{AXCHGL, yxchg, Px, opBytes{0x90, 0x90, 0x87, 0x87}},
  1522  	{AXCHGQ, yxchg, Pw, opBytes{0x90, 0x90, 0x87, 0x87}},
  1523  	{AXCHGW, yxchg, Pe, opBytes{0x90, 0x90, 0x87, 0x87}},
  1524  	{AXLAT, ynone, Px, opBytes{0xd7}},
  1525  	{AXORB, yxorb, Pb, opBytes{0x34, 0x80, 06, 0x30, 0x32}},
  1526  	{AXORL, yaddl, Px, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1527  	{AXORPD, yxm, Pe, opBytes{0x57}},
  1528  	{AXORPS, yxm, Pm, opBytes{0x57}},
  1529  	{AXORQ, yaddl, Pw, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1530  	{AXORW, yaddl, Pe, opBytes{0x83, 06, 0x35, 0x81, 06, 0x31, 0x33}},
  1531  	{AFMOVB, yfmvx, Px, opBytes{0xdf, 04}},
  1532  	{AFMOVBP, yfmvp, Px, opBytes{0xdf, 06}},
  1533  	{AFMOVD, yfmvd, Px, opBytes{0xdd, 00, 0xdd, 02, 0xd9, 00, 0xdd, 02}},
  1534  	{AFMOVDP, yfmvdp, Px, opBytes{0xdd, 03, 0xdd, 03}},
  1535  	{AFMOVF, yfmvf, Px, opBytes{0xd9, 00, 0xd9, 02}},
  1536  	{AFMOVFP, yfmvp, Px, opBytes{0xd9, 03}},
  1537  	{AFMOVL, yfmvf, Px, opBytes{0xdb, 00, 0xdb, 02}},
  1538  	{AFMOVLP, yfmvp, Px, opBytes{0xdb, 03}},
  1539  	{AFMOVV, yfmvx, Px, opBytes{0xdf, 05}},
  1540  	{AFMOVVP, yfmvp, Px, opBytes{0xdf, 07}},
  1541  	{AFMOVW, yfmvf, Px, opBytes{0xdf, 00, 0xdf, 02}},
  1542  	{AFMOVWP, yfmvp, Px, opBytes{0xdf, 03}},
  1543  	{AFMOVX, yfmvx, Px, opBytes{0xdb, 05}},
  1544  	{AFMOVXP, yfmvp, Px, opBytes{0xdb, 07}},
  1545  	{AFCMOVCC, yfcmv, Px, opBytes{0xdb, 00}},
  1546  	{AFCMOVCS, yfcmv, Px, opBytes{0xda, 00}},
  1547  	{AFCMOVEQ, yfcmv, Px, opBytes{0xda, 01}},
  1548  	{AFCMOVHI, yfcmv, Px, opBytes{0xdb, 02}},
  1549  	{AFCMOVLS, yfcmv, Px, opBytes{0xda, 02}},
  1550  	{AFCMOVB, yfcmv, Px, opBytes{0xda, 00}},
  1551  	{AFCMOVBE, yfcmv, Px, opBytes{0xda, 02}},
  1552  	{AFCMOVNB, yfcmv, Px, opBytes{0xdb, 00}},
  1553  	{AFCMOVNBE, yfcmv, Px, opBytes{0xdb, 02}},
  1554  	{AFCMOVE, yfcmv, Px, opBytes{0xda, 01}},
  1555  	{AFCMOVNE, yfcmv, Px, opBytes{0xdb, 01}},
  1556  	{AFCMOVNU, yfcmv, Px, opBytes{0xdb, 03}},
  1557  	{AFCMOVU, yfcmv, Px, opBytes{0xda, 03}},
  1558  	{AFCMOVUN, yfcmv, Px, opBytes{0xda, 03}},
  1559  	{AFCOMD, yfadd, Px, opBytes{0xdc, 02, 0xd8, 02, 0xdc, 02}},  // botch
  1560  	{AFCOMDP, yfadd, Px, opBytes{0xdc, 03, 0xd8, 03, 0xdc, 03}}, // botch
  1561  	{AFCOMDPP, ycompp, Px, opBytes{0xde, 03}},
  1562  	{AFCOMF, yfmvx, Px, opBytes{0xd8, 02}},
  1563  	{AFCOMFP, yfmvx, Px, opBytes{0xd8, 03}},
  1564  	{AFCOMI, yfcmv, Px, opBytes{0xdb, 06}},
  1565  	{AFCOMIP, yfcmv, Px, opBytes{0xdf, 06}},
  1566  	{AFCOML, yfmvx, Px, opBytes{0xda, 02}},
  1567  	{AFCOMLP, yfmvx, Px, opBytes{0xda, 03}},
  1568  	{AFCOMW, yfmvx, Px, opBytes{0xde, 02}},
  1569  	{AFCOMWP, yfmvx, Px, opBytes{0xde, 03}},
  1570  	{AFUCOM, ycompp, Px, opBytes{0xdd, 04}},
  1571  	{AFUCOMI, ycompp, Px, opBytes{0xdb, 05}},
  1572  	{AFUCOMIP, ycompp, Px, opBytes{0xdf, 05}},
  1573  	{AFUCOMP, ycompp, Px, opBytes{0xdd, 05}},
  1574  	{AFUCOMPP, ycompp, Px, opBytes{0xda, 13}},
  1575  	{AFADDDP, ycompp, Px, opBytes{0xde, 00}},
  1576  	{AFADDW, yfmvx, Px, opBytes{0xde, 00}},
  1577  	{AFADDL, yfmvx, Px, opBytes{0xda, 00}},
  1578  	{AFADDF, yfmvx, Px, opBytes{0xd8, 00}},
  1579  	{AFADDD, yfadd, Px, opBytes{0xdc, 00, 0xd8, 00, 0xdc, 00}},
  1580  	{AFMULDP, ycompp, Px, opBytes{0xde, 01}},
  1581  	{AFMULW, yfmvx, Px, opBytes{0xde, 01}},
  1582  	{AFMULL, yfmvx, Px, opBytes{0xda, 01}},
  1583  	{AFMULF, yfmvx, Px, opBytes{0xd8, 01}},
  1584  	{AFMULD, yfadd, Px, opBytes{0xdc, 01, 0xd8, 01, 0xdc, 01}},
  1585  	{AFSUBDP, ycompp, Px, opBytes{0xde, 05}},
  1586  	{AFSUBW, yfmvx, Px, opBytes{0xde, 04}},
  1587  	{AFSUBL, yfmvx, Px, opBytes{0xda, 04}},
  1588  	{AFSUBF, yfmvx, Px, opBytes{0xd8, 04}},
  1589  	{AFSUBD, yfadd, Px, opBytes{0xdc, 04, 0xd8, 04, 0xdc, 05}},
  1590  	{AFSUBRDP, ycompp, Px, opBytes{0xde, 04}},
  1591  	{AFSUBRW, yfmvx, Px, opBytes{0xde, 05}},
  1592  	{AFSUBRL, yfmvx, Px, opBytes{0xda, 05}},
  1593  	{AFSUBRF, yfmvx, Px, opBytes{0xd8, 05}},
  1594  	{AFSUBRD, yfadd, Px, opBytes{0xdc, 05, 0xd8, 05, 0xdc, 04}},
  1595  	{AFDIVDP, ycompp, Px, opBytes{0xde, 07}},
  1596  	{AFDIVW, yfmvx, Px, opBytes{0xde, 06}},
  1597  	{AFDIVL, yfmvx, Px, opBytes{0xda, 06}},
  1598  	{AFDIVF, yfmvx, Px, opBytes{0xd8, 06}},
  1599  	{AFDIVD, yfadd, Px, opBytes{0xdc, 06, 0xd8, 06, 0xdc, 07}},
  1600  	{AFDIVRDP, ycompp, Px, opBytes{0xde, 06}},
  1601  	{AFDIVRW, yfmvx, Px, opBytes{0xde, 07}},
  1602  	{AFDIVRL, yfmvx, Px, opBytes{0xda, 07}},
  1603  	{AFDIVRF, yfmvx, Px, opBytes{0xd8, 07}},
  1604  	{AFDIVRD, yfadd, Px, opBytes{0xdc, 07, 0xd8, 07, 0xdc, 06}},
  1605  	{AFXCHD, yfxch, Px, opBytes{0xd9, 01, 0xd9, 01}},
  1606  	{AFFREE, nil, 0, opBytes{}},
  1607  	{AFLDCW, ysvrs_mo, Px, opBytes{0xd9, 05, 0xd9, 05}},
  1608  	{AFLDENV, ysvrs_mo, Px, opBytes{0xd9, 04, 0xd9, 04}},
  1609  	{AFRSTOR, ysvrs_mo, Px, opBytes{0xdd, 04, 0xdd, 04}},
  1610  	{AFSAVE, ysvrs_om, Px, opBytes{0xdd, 06, 0xdd, 06}},
  1611  	{AFSTCW, ysvrs_om, Px, opBytes{0xd9, 07, 0xd9, 07}},
  1612  	{AFSTENV, ysvrs_om, Px, opBytes{0xd9, 06, 0xd9, 06}},
  1613  	{AFSTSW, ystsw, Px, opBytes{0xdd, 07, 0xdf, 0xe0}},
  1614  	{AF2XM1, ynone, Px, opBytes{0xd9, 0xf0}},
  1615  	{AFABS, ynone, Px, opBytes{0xd9, 0xe1}},
  1616  	{AFBLD, ysvrs_mo, Px, opBytes{0xdf, 04}},
  1617  	{AFBSTP, yclflush, Px, opBytes{0xdf, 06}},
  1618  	{AFCHS, ynone, Px, opBytes{0xd9, 0xe0}},
  1619  	{AFCLEX, ynone, Px, opBytes{0xdb, 0xe2}},
  1620  	{AFCOS, ynone, Px, opBytes{0xd9, 0xff}},
  1621  	{AFDECSTP, ynone, Px, opBytes{0xd9, 0xf6}},
  1622  	{AFINCSTP, ynone, Px, opBytes{0xd9, 0xf7}},
  1623  	{AFINIT, ynone, Px, opBytes{0xdb, 0xe3}},
  1624  	{AFLD1, ynone, Px, opBytes{0xd9, 0xe8}},
  1625  	{AFLDL2E, ynone, Px, opBytes{0xd9, 0xea}},
  1626  	{AFLDL2T, ynone, Px, opBytes{0xd9, 0xe9}},
  1627  	{AFLDLG2, ynone, Px, opBytes{0xd9, 0xec}},
  1628  	{AFLDLN2, ynone, Px, opBytes{0xd9, 0xed}},
  1629  	{AFLDPI, ynone, Px, opBytes{0xd9, 0xeb}},
  1630  	{AFLDZ, ynone, Px, opBytes{0xd9, 0xee}},
  1631  	{AFNOP, ynone, Px, opBytes{0xd9, 0xd0}},
  1632  	{AFPATAN, ynone, Px, opBytes{0xd9, 0xf3}},
  1633  	{AFPREM, ynone, Px, opBytes{0xd9, 0xf8}},
  1634  	{AFPREM1, ynone, Px, opBytes{0xd9, 0xf5}},
  1635  	{AFPTAN, ynone, Px, opBytes{0xd9, 0xf2}},
  1636  	{AFRNDINT, ynone, Px, opBytes{0xd9, 0xfc}},
  1637  	{AFSCALE, ynone, Px, opBytes{0xd9, 0xfd}},
  1638  	{AFSIN, ynone, Px, opBytes{0xd9, 0xfe}},
  1639  	{AFSINCOS, ynone, Px, opBytes{0xd9, 0xfb}},
  1640  	{AFSQRT, ynone, Px, opBytes{0xd9, 0xfa}},
  1641  	{AFTST, ynone, Px, opBytes{0xd9, 0xe4}},
  1642  	{AFXAM, ynone, Px, opBytes{0xd9, 0xe5}},
  1643  	{AFXTRACT, ynone, Px, opBytes{0xd9, 0xf4}},
  1644  	{AFYL2X, ynone, Px, opBytes{0xd9, 0xf1}},
  1645  	{AFYL2XP1, ynone, Px, opBytes{0xd9, 0xf9}},
  1646  	{ACMPXCHGB, yrb_mb, Pb, opBytes{0x0f, 0xb0}},
  1647  	{ACMPXCHGL, yrl_ml, Px, opBytes{0x0f, 0xb1}},
  1648  	{ACMPXCHGW, yrl_ml, Pe, opBytes{0x0f, 0xb1}},
  1649  	{ACMPXCHGQ, yrl_ml, Pw, opBytes{0x0f, 0xb1}},
  1650  	{ACMPXCHG8B, yscond, Pm, opBytes{0xc7, 01}},
  1651  	{ACMPXCHG16B, yscond, Pw, opBytes{0x0f, 0xc7, 01}},
  1652  	{AINVD, ynone, Pm, opBytes{0x08}},
  1653  	{AINVLPG, ydivb, Pm, opBytes{0x01, 07}},
  1654  	{AINVPCID, ycrc32l, Pe, opBytes{0x0f, 0x38, 0x82, 0}},
  1655  	{ALFENCE, ynone, Pm, opBytes{0xae, 0xe8}},
  1656  	{AMFENCE, ynone, Pm, opBytes{0xae, 0xf0}},
  1657  	{AMOVNTIL, yrl_ml, Pm, opBytes{0xc3}},
  1658  	{AMOVNTIQ, yrl_ml, Pw, opBytes{0x0f, 0xc3}},
  1659  	{ARDPKRU, ynone, Pm, opBytes{0x01, 0xee, 0}},
  1660  	{ARDMSR, ynone, Pm, opBytes{0x32}},
  1661  	{ARDPMC, ynone, Pm, opBytes{0x33}},
  1662  	{ARDTSC, ynone, Pm, opBytes{0x31}},
  1663  	{ARSM, ynone, Pm, opBytes{0xaa}},
  1664  	{ASFENCE, ynone, Pm, opBytes{0xae, 0xf8}},
  1665  	{ASYSRET, ynone, Pm, opBytes{0x07}},
  1666  	{AWBINVD, ynone, Pm, opBytes{0x09}},
  1667  	{AWRMSR, ynone, Pm, opBytes{0x30}},
  1668  	{AWRPKRU, ynone, Pm, opBytes{0x01, 0xef, 0}},
  1669  	{AXADDB, yrb_mb, Pb, opBytes{0x0f, 0xc0}},
  1670  	{AXADDL, yrl_ml, Px, opBytes{0x0f, 0xc1}},
  1671  	{AXADDQ, yrl_ml, Pw, opBytes{0x0f, 0xc1}},
  1672  	{AXADDW, yrl_ml, Pe, opBytes{0x0f, 0xc1}},
  1673  	{ACRC32B, ycrc32b, Px, opBytes{0xf2, 0x0f, 0x38, 0xf0, 0}},
  1674  	{ACRC32L, ycrc32l, Px, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1675  	{ACRC32Q, ycrc32l, Pw, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1676  	{ACRC32W, ycrc32l, Pe, opBytes{0xf2, 0x0f, 0x38, 0xf1, 0}},
  1677  	{APREFETCHT0, yprefetch, Pm, opBytes{0x18, 01}},
  1678  	{APREFETCHT1, yprefetch, Pm, opBytes{0x18, 02}},
  1679  	{APREFETCHT2, yprefetch, Pm, opBytes{0x18, 03}},
  1680  	{APREFETCHNTA, yprefetch, Pm, opBytes{0x18, 00}},
  1681  	{AMOVQL, yrl_ml, Px, opBytes{0x89}},
  1682  	{obj.AUNDEF, ynone, Px, opBytes{0x0f, 0x0b}},
  1683  	{AAESENC, yaes, Pq, opBytes{0x38, 0xdc, 0}},
  1684  	{AAESENCLAST, yaes, Pq, opBytes{0x38, 0xdd, 0}},
  1685  	{AAESDEC, yaes, Pq, opBytes{0x38, 0xde, 0}},
  1686  	{AAESDECLAST, yaes, Pq, opBytes{0x38, 0xdf, 0}},
  1687  	{AAESIMC, yaes, Pq, opBytes{0x38, 0xdb, 0}},
  1688  	{AAESKEYGENASSIST, yxshuf, Pq, opBytes{0x3a, 0xdf, 0}},
  1689  	{AROUNDPD, yxshuf, Pq, opBytes{0x3a, 0x09, 0}},
  1690  	{AROUNDPS, yxshuf, Pq, opBytes{0x3a, 0x08, 0}},
  1691  	{AROUNDSD, yxshuf, Pq, opBytes{0x3a, 0x0b, 0}},
  1692  	{AROUNDSS, yxshuf, Pq, opBytes{0x3a, 0x0a, 0}},
  1693  	{APSHUFD, yxshuf, Pq, opBytes{0x70, 0}},
  1694  	{APCLMULQDQ, yxshuf, Pq, opBytes{0x3a, 0x44, 0}},
  1695  	{APCMPESTRI, yxshuf, Pq, opBytes{0x3a, 0x61, 0}},
  1696  	{APCMPESTRM, yxshuf, Pq, opBytes{0x3a, 0x60, 0}},
  1697  	{AMOVDDUP, yxm, Pf2, opBytes{0x12}},
  1698  	{AMOVSHDUP, yxm, Pf3, opBytes{0x16}},
  1699  	{AMOVSLDUP, yxm, Pf3, opBytes{0x12}},
  1700  	{ARDTSCP, ynone, Pm, opBytes{0x01, 0xf9, 0}},
  1701  	{ASTAC, ynone, Pm, opBytes{0x01, 0xcb, 0}},
  1702  	{AUD1, ynone, Pm, opBytes{0xb9, 0}},
  1703  	{AUD2, ynone, Pm, opBytes{0x0b, 0}},
  1704  	{AUMWAIT, ywrfsbase, Pf2, opBytes{0xae, 06}},
  1705  	{ASYSENTER, ynone, Px, opBytes{0x0f, 0x34, 0}},
  1706  	{ASYSENTER64, ynone, Pw, opBytes{0x0f, 0x34, 0}},
  1707  	{ASYSEXIT, ynone, Px, opBytes{0x0f, 0x35, 0}},
  1708  	{ASYSEXIT64, ynone, Pw, opBytes{0x0f, 0x35, 0}},
  1709  	{ALMSW, ydivl, Pm, opBytes{0x01, 06}},
  1710  	{ALLDT, ydivl, Pm, opBytes{0x00, 02}},
  1711  	{ALIDT, ysvrs_mo, Pm, opBytes{0x01, 03}},
  1712  	{ALGDT, ysvrs_mo, Pm, opBytes{0x01, 02}},
  1713  	{ATZCNTW, ycrc32l, Pe, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1714  	{ATZCNTL, ycrc32l, Px, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1715  	{ATZCNTQ, ycrc32l, Pw, opBytes{0xf3, 0x0f, 0xbc, 0}},
  1716  	{AXRSTOR, ydivl, Px, opBytes{0x0f, 0xae, 05}},
  1717  	{AXRSTOR64, ydivl, Pw, opBytes{0x0f, 0xae, 05}},
  1718  	{AXRSTORS, ydivl, Px, opBytes{0x0f, 0xc7, 03}},
  1719  	{AXRSTORS64, ydivl, Pw, opBytes{0x0f, 0xc7, 03}},
  1720  	{AXSAVE, yclflush, Px, opBytes{0x0f, 0xae, 04}},
  1721  	{AXSAVE64, yclflush, Pw, opBytes{0x0f, 0xae, 04}},
  1722  	{AXSAVEOPT, yclflush, Px, opBytes{0x0f, 0xae, 06}},
  1723  	{AXSAVEOPT64, yclflush, Pw, opBytes{0x0f, 0xae, 06}},
  1724  	{AXSAVEC, yclflush, Px, opBytes{0x0f, 0xc7, 04}},
  1725  	{AXSAVEC64, yclflush, Pw, opBytes{0x0f, 0xc7, 04}},
  1726  	{AXSAVES, yclflush, Px, opBytes{0x0f, 0xc7, 05}},
  1727  	{AXSAVES64, yclflush, Pw, opBytes{0x0f, 0xc7, 05}},
  1728  	{ASGDT, yclflush, Pm, opBytes{0x01, 00}},
  1729  	{ASIDT, yclflush, Pm, opBytes{0x01, 01}},
  1730  	{ARDRANDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 06}},
  1731  	{ARDRANDL, yrdrand, Px, opBytes{0x0f, 0xc7, 06}},
  1732  	{ARDRANDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 06}},
  1733  	{ARDSEEDW, yrdrand, Pe, opBytes{0x0f, 0xc7, 07}},
  1734  	{ARDSEEDL, yrdrand, Px, opBytes{0x0f, 0xc7, 07}},
  1735  	{ARDSEEDQ, yrdrand, Pw, opBytes{0x0f, 0xc7, 07}},
  1736  	{ASTRW, yincq, Pe, opBytes{0x0f, 0x00, 01}},
  1737  	{ASTRL, yincq, Px, opBytes{0x0f, 0x00, 01}},
  1738  	{ASTRQ, yincq, Pw, opBytes{0x0f, 0x00, 01}},
  1739  	{AXSETBV, ynone, Pm, opBytes{0x01, 0xd1, 0}},
  1740  	{AMOVBEW, ymovbe, Pq, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1741  	{AMOVBEL, ymovbe, Pm, opBytes{0x38, 0xf0, 0, 0x38, 0xf1, 0}},
  1742  	{AMOVBEQ, ymovbe, Pw, opBytes{0x0f, 0x38, 0xf0, 0, 0x0f, 0x38, 0xf1, 0}},
  1743  	{ANOPW, ydivl, Pe, opBytes{0x0f, 0x1f, 00}},
  1744  	{ANOPL, ydivl, Px, opBytes{0x0f, 0x1f, 00}},
  1745  	{ASLDTW, yincq, Pe, opBytes{0x0f, 0x00, 00}},
  1746  	{ASLDTL, yincq, Px, opBytes{0x0f, 0x00, 00}},
  1747  	{ASLDTQ, yincq, Pw, opBytes{0x0f, 0x00, 00}},
  1748  	{ASMSWW, yincq, Pe, opBytes{0x0f, 0x01, 04}},
  1749  	{ASMSWL, yincq, Px, opBytes{0x0f, 0x01, 04}},
  1750  	{ASMSWQ, yincq, Pw, opBytes{0x0f, 0x01, 04}},
  1751  	{ABLENDVPS, yblendvpd, Pq4, opBytes{0x14}},
  1752  	{ABLENDVPD, yblendvpd, Pq4, opBytes{0x15}},
  1753  	{APBLENDVB, yblendvpd, Pq4, opBytes{0x10}},
  1754  	{ASHA1MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xc9, 0}},
  1755  	{ASHA1MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xca, 0}},
  1756  	{ASHA1NEXTE, yaes, Px, opBytes{0x0f, 0x38, 0xc8, 0}},
  1757  	{ASHA256MSG1, yaes, Px, opBytes{0x0f, 0x38, 0xcc, 0}},
  1758  	{ASHA256MSG2, yaes, Px, opBytes{0x0f, 0x38, 0xcd, 0}},
  1759  	{ASHA1RNDS4, ysha1rnds4, Pm, opBytes{0x3a, 0xcc, 0}},
  1760  	{ASHA256RNDS2, ysha256rnds2, Px, opBytes{0x0f, 0x38, 0xcb, 0}},
  1761  	{ARDFSBASEL, yrdrand, Pf3, opBytes{0xae, 00}},
  1762  	{ARDFSBASEQ, yrdrand, Pfw, opBytes{0xae, 00}},
  1763  	{ARDGSBASEL, yrdrand, Pf3, opBytes{0xae, 01}},
  1764  	{ARDGSBASEQ, yrdrand, Pfw, opBytes{0xae, 01}},
  1765  	{AWRFSBASEL, ywrfsbase, Pf3, opBytes{0xae, 02}},
  1766  	{AWRFSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 02}},
  1767  	{AWRGSBASEL, ywrfsbase, Pf3, opBytes{0xae, 03}},
  1768  	{AWRGSBASEQ, ywrfsbase, Pfw, opBytes{0xae, 03}},
  1769  	{ALFSW, ym_rl, Pe, opBytes{0x0f, 0xb4}},
  1770  	{ALFSL, ym_rl, Px, opBytes{0x0f, 0xb4}},
  1771  	{ALFSQ, ym_rl, Pw, opBytes{0x0f, 0xb4}},
  1772  	{ALGSW, ym_rl, Pe, opBytes{0x0f, 0xb5}},
  1773  	{ALGSL, ym_rl, Px, opBytes{0x0f, 0xb5}},
  1774  	{ALGSQ, ym_rl, Pw, opBytes{0x0f, 0xb5}},
  1775  	{ALSSW, ym_rl, Pe, opBytes{0x0f, 0xb2}},
  1776  	{ALSSL, ym_rl, Px, opBytes{0x0f, 0xb2}},
  1777  	{ALSSQ, ym_rl, Pw, opBytes{0x0f, 0xb2}},
  1778  	{ARDPID, yrdrand, Pf3, opBytes{0xc7, 07}},
  1779  
  1780  	{ABLENDPD, yxshuf, Pq, opBytes{0x3a, 0x0d, 0}},
  1781  	{ABLENDPS, yxshuf, Pq, opBytes{0x3a, 0x0c, 0}},
  1782  	{AXACQUIRE, ynone, Px, opBytes{0xf2}},
  1783  	{AXRELEASE, ynone, Px, opBytes{0xf3}},
  1784  	{AXBEGIN, yxbegin, Px, opBytes{0xc7, 0xf8}},
  1785  	{AXABORT, yxabort, Px, opBytes{0xc6, 0xf8}},
  1786  	{AXEND, ynone, Px, opBytes{0x0f, 01, 0xd5}},
  1787  	{AXTEST, ynone, Px, opBytes{0x0f, 01, 0xd6}},
  1788  	{AXGETBV, ynone, Pm, opBytes{01, 0xd0}},
  1789  	{obj.AFUNCDATA, yfuncdata, Px, opBytes{0, 0}},
  1790  	{obj.APCDATA, ypcdata, Px, opBytes{0, 0}},
  1791  	{obj.ADUFFCOPY, yduff, Px, opBytes{0xe8}},
  1792  	{obj.ADUFFZERO, yduff, Px, opBytes{0xe8}},
  1793  
  1794  	{obj.AEND, nil, 0, opBytes{}},
  1795  	{0, nil, 0, opBytes{}},
  1796  }
  1797  
  1798  var opindex [(ALAST + 1) & obj.AMask]*Optab
  1799  
  1800  // useAbs reports whether s describes a symbol that must avoid pc-relative addressing.
  1801  // This happens on systems like Solaris that call .so functions instead of system calls.
  1802  // It does not seem to be necessary for any other systems. This is probably working
  1803  // around a Solaris-specific bug that should be fixed differently, but we don't know
  1804  // what that bug is. And this does fix it.
  1805  func useAbs(ctxt *obj.Link, s *obj.LSym) bool {
  1806  	if ctxt.Headtype == objabi.Hsolaris {
  1807  		// All the Solaris dynamic imports from libc.so begin with "libc_".
  1808  		return strings.HasPrefix(s.Name, "libc_")
  1809  	}
  1810  	return ctxt.Arch.Family == sys.I386 && !ctxt.Flag_shared
  1811  }
  1812  
  1813  // single-instruction no-ops of various lengths.
  1814  // constructed by hand and disassembled with gdb to verify.
  1815  // see http://www.agner.org/optimize/optimizing_assembly.pdf for discussion.
  1816  var nop = [][16]uint8{
  1817  	{0x90},
  1818  	{0x66, 0x90},
  1819  	{0x0F, 0x1F, 0x00},
  1820  	{0x0F, 0x1F, 0x40, 0x00},
  1821  	{0x0F, 0x1F, 0x44, 0x00, 0x00},
  1822  	{0x66, 0x0F, 0x1F, 0x44, 0x00, 0x00},
  1823  	{0x0F, 0x1F, 0x80, 0x00, 0x00, 0x00, 0x00},
  1824  	{0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1825  	{0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1826  }
  1827  
  1828  // Native Client rejects the repeated 0x66 prefix.
  1829  // {0x66, 0x66, 0x0F, 0x1F, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
  1830  func fillnop(p []byte, n int) {
  1831  	var m int
  1832  
  1833  	for n > 0 {
  1834  		m = n
  1835  		if m > len(nop) {
  1836  			m = len(nop)
  1837  		}
  1838  		copy(p[:m], nop[m-1][:m])
  1839  		p = p[m:]
  1840  		n -= m
  1841  	}
  1842  }
  1843  
  1844  func noppad(ctxt *obj.Link, s *obj.LSym, c int32, pad int32) int32 {
  1845  	s.Grow(int64(c) + int64(pad))
  1846  	fillnop(s.P[c:], int(pad))
  1847  	return c + pad
  1848  }
  1849  
  1850  func spadjop(ctxt *obj.Link, l, q obj.As) obj.As {
  1851  	if ctxt.Arch.Family != sys.AMD64 || ctxt.Arch.PtrSize == 4 {
  1852  		return l
  1853  	}
  1854  	return q
  1855  }
  1856  
  1857  // isJump returns whether p is a jump instruction.
  1858  // It is used to ensure that no standalone or macro-fused jump will straddle
  1859  // or end on a 32 byte boundary by inserting NOPs before the jumps.
  1860  func isJump(p *obj.Prog) bool {
  1861  	return p.To.Target() != nil || p.As == obj.AJMP || p.As == obj.ACALL ||
  1862  		p.As == obj.ARET || p.As == obj.ADUFFCOPY || p.As == obj.ADUFFZERO
  1863  }
  1864  
  1865  // lookForJCC returns the first real instruction starting from p, if that instruction is a conditional
  1866  // jump. Otherwise, nil is returned.
  1867  func lookForJCC(p *obj.Prog) *obj.Prog {
  1868  	// Skip any PCDATA, FUNCDATA or NOP instructions
  1869  	var q *obj.Prog
  1870  	for q = p.Link; q != nil && (q.As == obj.APCDATA || q.As == obj.AFUNCDATA || q.As == obj.ANOP); q = q.Link {
  1871  	}
  1872  
  1873  	if q == nil || q.To.Target() == nil || p.As == obj.AJMP || p.As == obj.ACALL {
  1874  		return nil
  1875  	}
  1876  
  1877  	switch q.As {
  1878  	case AJOS, AJOC, AJCS, AJCC, AJEQ, AJNE, AJLS, AJHI,
  1879  		AJMI, AJPL, AJPS, AJPC, AJLT, AJGE, AJLE, AJGT:
  1880  	default:
  1881  		return nil
  1882  	}
  1883  
  1884  	return q
  1885  }
  1886  
  1887  // fusedJump determines whether p can be fused with a subsequent conditional jump instruction.
  1888  // If it can, we return true followed by the total size of the fused jump. If it can't, we return false.
  1889  // Macro fusion rules are derived from the Intel Optimization Manual (April 2019) section 3.4.2.2.
  1890  func fusedJump(p *obj.Prog) (bool, uint8) {
  1891  	var fusedSize uint8
  1892  
  1893  	// The first instruction in a macro fused pair may be preceded by the LOCK prefix,
  1894  	// or possibly an XACQUIRE/XRELEASE prefix followed by a LOCK prefix. If it is, we
  1895  	// need to be careful to insert any padding before the locks rather than directly after them.
  1896  
  1897  	if p.As == AXRELEASE || p.As == AXACQUIRE {
  1898  		fusedSize += p.Isize
  1899  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1900  		}
  1901  		if p == nil {
  1902  			return false, 0
  1903  		}
  1904  	}
  1905  	if p.As == ALOCK {
  1906  		fusedSize += p.Isize
  1907  		for p = p.Link; p != nil && (p.As == obj.APCDATA || p.As == obj.AFUNCDATA); p = p.Link {
  1908  		}
  1909  		if p == nil {
  1910  			return false, 0
  1911  		}
  1912  	}
  1913  	cmp := p.As == ACMPB || p.As == ACMPL || p.As == ACMPQ || p.As == ACMPW
  1914  
  1915  	cmpAddSub := p.As == AADDB || p.As == AADDL || p.As == AADDW || p.As == AADDQ ||
  1916  		p.As == ASUBB || p.As == ASUBL || p.As == ASUBW || p.As == ASUBQ || cmp
  1917  
  1918  	testAnd := p.As == ATESTB || p.As == ATESTL || p.As == ATESTQ || p.As == ATESTW ||
  1919  		p.As == AANDB || p.As == AANDL || p.As == AANDQ || p.As == AANDW
  1920  
  1921  	incDec := p.As == AINCB || p.As == AINCL || p.As == AINCQ || p.As == AINCW ||
  1922  		p.As == ADECB || p.As == ADECL || p.As == ADECQ || p.As == ADECW
  1923  
  1924  	if !cmpAddSub && !testAnd && !incDec {
  1925  		return false, 0
  1926  	}
  1927  
  1928  	if !incDec {
  1929  		var argOne obj.AddrType
  1930  		var argTwo obj.AddrType
  1931  		if cmp {
  1932  			argOne = p.From.Type
  1933  			argTwo = p.To.Type
  1934  		} else {
  1935  			argOne = p.To.Type
  1936  			argTwo = p.From.Type
  1937  		}
  1938  		if argOne == obj.TYPE_REG {
  1939  			if argTwo != obj.TYPE_REG && argTwo != obj.TYPE_CONST && argTwo != obj.TYPE_MEM {
  1940  				return false, 0
  1941  			}
  1942  		} else if argOne == obj.TYPE_MEM {
  1943  			if argTwo != obj.TYPE_REG {
  1944  				return false, 0
  1945  			}
  1946  		} else {
  1947  			return false, 0
  1948  		}
  1949  	}
  1950  
  1951  	fusedSize += p.Isize
  1952  	jmp := lookForJCC(p)
  1953  	if jmp == nil {
  1954  		return false, 0
  1955  	}
  1956  
  1957  	fusedSize += jmp.Isize
  1958  
  1959  	if testAnd {
  1960  		return true, fusedSize
  1961  	}
  1962  
  1963  	if jmp.As == AJOC || jmp.As == AJOS || jmp.As == AJMI ||
  1964  		jmp.As == AJPL || jmp.As == AJPS || jmp.As == AJPC {
  1965  		return false, 0
  1966  	}
  1967  
  1968  	if cmpAddSub {
  1969  		return true, fusedSize
  1970  	}
  1971  
  1972  	if jmp.As == AJCS || jmp.As == AJCC || jmp.As == AJHI || jmp.As == AJLS {
  1973  		return false, 0
  1974  	}
  1975  
  1976  	return true, fusedSize
  1977  }
  1978  
  1979  type padJumpsCtx int32
  1980  
  1981  func makePjcCtx(ctxt *obj.Link) padJumpsCtx {
  1982  	// Disable jump padding on 32 bit builds by setting
  1983  	// padJumps to 0.
  1984  	if ctxt.Arch.Family == sys.I386 {
  1985  		return padJumpsCtx(0)
  1986  	}
  1987  
  1988  	// Disable jump padding for hand written assembly code.
  1989  	if ctxt.IsAsm {
  1990  		return padJumpsCtx(0)
  1991  	}
  1992  
  1993  	return padJumpsCtx(32)
  1994  }
  1995  
  1996  // padJump detects whether the instruction being assembled is a standalone or a macro-fused
  1997  // jump that needs to be padded. If it is, NOPs are inserted to ensure that the jump does
  1998  // not cross or end on a 32 byte boundary.
  1999  func (pjc padJumpsCtx) padJump(ctxt *obj.Link, s *obj.LSym, p *obj.Prog, c int32) int32 {
  2000  	if pjc == 0 {
  2001  		return c
  2002  	}
  2003  
  2004  	var toPad int32
  2005  	fj, fjSize := fusedJump(p)
  2006  	mask := int32(pjc - 1)
  2007  	if fj {
  2008  		if (c&mask)+int32(fjSize) >= int32(pjc) {
  2009  			toPad = int32(pjc) - (c & mask)
  2010  		}
  2011  	} else if isJump(p) {
  2012  		if (c&mask)+int32(p.Isize) >= int32(pjc) {
  2013  			toPad = int32(pjc) - (c & mask)
  2014  		}
  2015  	}
  2016  	if toPad <= 0 {
  2017  		return c
  2018  	}
  2019  
  2020  	return noppad(ctxt, s, c, toPad)
  2021  }
  2022  
  2023  // reAssemble is called if an instruction's size changes during assembly. If
  2024  // it does and the instruction is a standalone or a macro-fused jump we need to
  2025  // reassemble.
  2026  func (pjc padJumpsCtx) reAssemble(p *obj.Prog) bool {
  2027  	if pjc == 0 {
  2028  		return false
  2029  	}
  2030  
  2031  	fj, _ := fusedJump(p)
  2032  	return fj || isJump(p)
  2033  }
  2034  
  2035  type nopPad struct {
  2036  	p *obj.Prog // Instruction before the pad
  2037  	n int32     // Size of the pad
  2038  }
  2039  
  2040  func span6(ctxt *obj.Link, s *obj.LSym, newprog obj.ProgAlloc) {
  2041  	if ctxt.Retpoline && ctxt.Arch.Family == sys.I386 {
  2042  		ctxt.Diag("-spectre=ret not supported on 386")
  2043  		ctxt.Retpoline = false // don't keep printing
  2044  	}
  2045  
  2046  	pjc := makePjcCtx(ctxt)
  2047  
  2048  	if s.P != nil {
  2049  		return
  2050  	}
  2051  
  2052  	if ycover[0] == 0 {
  2053  		ctxt.Diag("x86 tables not initialized, call x86.instinit first")
  2054  	}
  2055  
  2056  	for p := s.Func().Text; p != nil; p = p.Link {
  2057  		if p.To.Type == obj.TYPE_BRANCH && p.To.Target() == nil {
  2058  			p.To.SetTarget(p)
  2059  		}
  2060  		if p.As == AADJSP {
  2061  			p.To.Type = obj.TYPE_REG
  2062  			p.To.Reg = REG_SP
  2063  			// Generate 'ADDQ $x, SP' or 'SUBQ $x, SP', with x positive.
  2064  			// One exception: It is smaller to encode $-0x80 than $0x80.
  2065  			// For that case, flip the sign and the op:
  2066  			// Instead of 'ADDQ $0x80, SP', generate 'SUBQ $-0x80, SP'.
  2067  			switch v := p.From.Offset; {
  2068  			case v == 0:
  2069  				p.As = obj.ANOP
  2070  			case v == 0x80 || (v < 0 && v != -0x80):
  2071  				p.As = spadjop(ctxt, AADDL, AADDQ)
  2072  				p.From.Offset *= -1
  2073  			default:
  2074  				p.As = spadjop(ctxt, ASUBL, ASUBQ)
  2075  			}
  2076  		}
  2077  		if ctxt.Retpoline && (p.As == obj.ACALL || p.As == obj.AJMP) && (p.To.Type == obj.TYPE_REG || p.To.Type == obj.TYPE_MEM) {
  2078  			if p.To.Type != obj.TYPE_REG {
  2079  				ctxt.Diag("non-retpoline-compatible: %v", p)
  2080  				continue
  2081  			}
  2082  			p.To.Type = obj.TYPE_BRANCH
  2083  			p.To.Name = obj.NAME_EXTERN
  2084  			p.To.Sym = ctxt.Lookup("runtime.retpoline" + obj.Rconv(int(p.To.Reg)))
  2085  			p.To.Reg = 0
  2086  			p.To.Offset = 0
  2087  		}
  2088  	}
  2089  
  2090  	var count int64 // rough count of number of instructions
  2091  	for p := s.Func().Text; p != nil; p = p.Link {
  2092  		count++
  2093  		p.Back = branchShort // use short branches first time through
  2094  		if q := p.To.Target(); q != nil && (q.Back&branchShort != 0) {
  2095  			p.Back |= branchBackwards
  2096  			q.Back |= branchLoopHead
  2097  		}
  2098  	}
  2099  	s.GrowCap(count * 5) // preallocate roughly 5 bytes per instruction
  2100  
  2101  	var ab AsmBuf
  2102  	var n int
  2103  	var c int32
  2104  	errors := ctxt.Errors
  2105  	var nops []nopPad // Padding for a particular assembly (reuse slice storage if multiple assemblies)
  2106  	nrelocs0 := len(s.R)
  2107  	for {
  2108  		// This loop continues while there are reasons to re-assemble
  2109  		// whole block, like the presence of long forward jumps.
  2110  		reAssemble := false
  2111  		for i := range s.R[nrelocs0:] {
  2112  			s.R[nrelocs0+i] = obj.Reloc{}
  2113  		}
  2114  		s.R = s.R[:nrelocs0] // preserve marker relocations generated by the compiler
  2115  		s.P = s.P[:0]
  2116  		c = 0
  2117  		var pPrev *obj.Prog
  2118  		nops = nops[:0]
  2119  		for p := s.Func().Text; p != nil; p = p.Link {
  2120  			c0 := c
  2121  			c = pjc.padJump(ctxt, s, p, c)
  2122  
  2123  			if maxLoopPad > 0 && p.Back&branchLoopHead != 0 && c&(loopAlign-1) != 0 {
  2124  				// pad with NOPs
  2125  				v := -c & (loopAlign - 1)
  2126  
  2127  				if v <= maxLoopPad {
  2128  					s.Grow(int64(c) + int64(v))
  2129  					fillnop(s.P[c:], int(v))
  2130  					c += v
  2131  				}
  2132  			}
  2133  
  2134  			p.Pc = int64(c)
  2135  
  2136  			// process forward jumps to p
  2137  			for q := p.Rel; q != nil; q = q.Forwd {
  2138  				v := int32(p.Pc - (q.Pc + int64(q.Isize)))
  2139  				if q.Back&branchShort != 0 {
  2140  					if v > 127 {
  2141  						reAssemble = true
  2142  						q.Back ^= branchShort
  2143  					}
  2144  
  2145  					if q.As == AJCXZL || q.As == AXBEGIN {
  2146  						s.P[q.Pc+2] = byte(v)
  2147  					} else {
  2148  						s.P[q.Pc+1] = byte(v)
  2149  					}
  2150  				} else {
  2151  					binary.LittleEndian.PutUint32(s.P[q.Pc+int64(q.Isize)-4:], uint32(v))
  2152  				}
  2153  			}
  2154  
  2155  			if p.As == obj.APCALIGN || p.As == obj.APCALIGNMAX {
  2156  				v := obj.AlignmentPadding(c, p, ctxt, s)
  2157  				if v > 0 {
  2158  					s.Grow(int64(c) + int64(v))
  2159  					fillnop(s.P[c:], int(v))
  2160  				}
  2161  				p.Pc = int64(c)
  2162  				c += int32(v)
  2163  				pPrev = p
  2164  				continue
  2165  			}
  2166  
  2167  			p.Rel = nil
  2168  
  2169  			p.Pc = int64(c)
  2170  			ab.asmins(ctxt, s, p)
  2171  			m := ab.Len()
  2172  			if int(p.Isize) != m {
  2173  				p.Isize = uint8(m)
  2174  				if pjc.reAssemble(p) {
  2175  					// We need to re-assemble here to check for jumps and fused jumps
  2176  					// that span or end on 32 byte boundaries.
  2177  					reAssemble = true
  2178  				}
  2179  			}
  2180  
  2181  			s.Grow(p.Pc + int64(m))
  2182  			copy(s.P[p.Pc:], ab.Bytes())
  2183  			// If there was padding, remember it.
  2184  			if pPrev != nil && !ctxt.IsAsm && c > c0 {
  2185  				nops = append(nops, nopPad{p: pPrev, n: c - c0})
  2186  			}
  2187  			c += int32(m)
  2188  			pPrev = p
  2189  		}
  2190  
  2191  		n++
  2192  		if n > 1000 {
  2193  			ctxt.Diag("span must be looping")
  2194  			log.Fatalf("loop")
  2195  		}
  2196  		if !reAssemble {
  2197  			break
  2198  		}
  2199  		if ctxt.Errors > errors {
  2200  			return
  2201  		}
  2202  	}
  2203  	// splice padding nops into Progs
  2204  	for _, n := range nops {
  2205  		pp := n.p
  2206  		np := &obj.Prog{Link: pp.Link, Ctxt: pp.Ctxt, As: obj.ANOP, Pos: pp.Pos.WithNotStmt(), Pc: pp.Pc + int64(pp.Isize), Isize: uint8(n.n)}
  2207  		pp.Link = np
  2208  	}
  2209  
  2210  	s.Size = int64(c)
  2211  
  2212  	if false { /* debug['a'] > 1 */
  2213  		fmt.Printf("span1 %s %d (%d tries)\n %.6x", s.Name, s.Size, n, 0)
  2214  		var i int
  2215  		for i = 0; i < len(s.P); i++ {
  2216  			fmt.Printf(" %.2x", s.P[i])
  2217  			if i%16 == 15 {
  2218  				fmt.Printf("\n  %.6x", uint(i+1))
  2219  			}
  2220  		}
  2221  
  2222  		if i%16 != 0 {
  2223  			fmt.Printf("\n")
  2224  		}
  2225  
  2226  		for i := 0; i < len(s.R); i++ {
  2227  			r := &s.R[i]
  2228  			fmt.Printf(" rel %#.4x/%d %s%+d\n", uint32(r.Off), r.Siz, r.Sym.Name, r.Add)
  2229  		}
  2230  	}
  2231  
  2232  	// Mark nonpreemptible instruction sequences.
  2233  	// The 2-instruction TLS access sequence
  2234  	//	MOVQ TLS, BX
  2235  	//	MOVQ 0(BX)(TLS*1), BX
  2236  	// is not async preemptible, as if it is preempted and resumed on
  2237  	// a different thread, the TLS address may become invalid.
  2238  	if !CanUse1InsnTLS(ctxt) {
  2239  		useTLS := func(p *obj.Prog) bool {
  2240  			// Only need to mark the second instruction, which has
  2241  			// REG_TLS as Index. (It is okay to interrupt and restart
  2242  			// the first instruction.)
  2243  			return p.From.Index == REG_TLS
  2244  		}
  2245  		obj.MarkUnsafePoints(ctxt, s.Func().Text, newprog, useTLS, nil)
  2246  	}
  2247  
  2248  	// Now that we know byte offsets, we can generate jump table entries.
  2249  	// TODO: could this live in obj instead of obj/$ARCH?
  2250  	for _, jt := range s.Func().JumpTables {
  2251  		for i, p := range jt.Targets {
  2252  			// The ith jumptable entry points to the p.Pc'th
  2253  			// byte in the function symbol s.
  2254  			jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, s, p.Pc)
  2255  		}
  2256  	}
  2257  }
  2258  
  2259  func instinit(ctxt *obj.Link) {
  2260  	if ycover[0] != 0 {
  2261  		// Already initialized; stop now.
  2262  		// This happens in the cmd/asm tests,
  2263  		// each of which re-initializes the arch.
  2264  		return
  2265  	}
  2266  
  2267  	switch ctxt.Headtype {
  2268  	case objabi.Hplan9:
  2269  		plan9privates = ctxt.Lookup("_privates")
  2270  	}
  2271  
  2272  	for i := range avxOptab {
  2273  		c := avxOptab[i].as
  2274  		if opindex[c&obj.AMask] != nil {
  2275  			ctxt.Diag("phase error in avxOptab: %d (%v)", i, c)
  2276  		}
  2277  		opindex[c&obj.AMask] = &avxOptab[i]
  2278  	}
  2279  	for i := 1; optab[i].as != 0; i++ {
  2280  		c := optab[i].as
  2281  		if opindex[c&obj.AMask] != nil {
  2282  			ctxt.Diag("phase error in optab: %d (%v)", i, c)
  2283  		}
  2284  		opindex[c&obj.AMask] = &optab[i]
  2285  	}
  2286  
  2287  	for i := 0; i < Ymax; i++ {
  2288  		ycover[i*Ymax+i] = 1
  2289  	}
  2290  
  2291  	ycover[Yi0*Ymax+Yu2] = 1
  2292  	ycover[Yi1*Ymax+Yu2] = 1
  2293  
  2294  	ycover[Yi0*Ymax+Yi8] = 1
  2295  	ycover[Yi1*Ymax+Yi8] = 1
  2296  	ycover[Yu2*Ymax+Yi8] = 1
  2297  	ycover[Yu7*Ymax+Yi8] = 1
  2298  
  2299  	ycover[Yi0*Ymax+Yu7] = 1
  2300  	ycover[Yi1*Ymax+Yu7] = 1
  2301  	ycover[Yu2*Ymax+Yu7] = 1
  2302  
  2303  	ycover[Yi0*Ymax+Yu8] = 1
  2304  	ycover[Yi1*Ymax+Yu8] = 1
  2305  	ycover[Yu2*Ymax+Yu8] = 1
  2306  	ycover[Yu7*Ymax+Yu8] = 1
  2307  
  2308  	ycover[Yi0*Ymax+Ys32] = 1
  2309  	ycover[Yi1*Ymax+Ys32] = 1
  2310  	ycover[Yu2*Ymax+Ys32] = 1
  2311  	ycover[Yu7*Ymax+Ys32] = 1
  2312  	ycover[Yu8*Ymax+Ys32] = 1
  2313  	ycover[Yi8*Ymax+Ys32] = 1
  2314  
  2315  	ycover[Yi0*Ymax+Yi32] = 1
  2316  	ycover[Yi1*Ymax+Yi32] = 1
  2317  	ycover[Yu2*Ymax+Yi32] = 1
  2318  	ycover[Yu7*Ymax+Yi32] = 1
  2319  	ycover[Yu8*Ymax+Yi32] = 1
  2320  	ycover[Yi8*Ymax+Yi32] = 1
  2321  	ycover[Ys32*Ymax+Yi32] = 1
  2322  
  2323  	ycover[Yi0*Ymax+Yi64] = 1
  2324  	ycover[Yi1*Ymax+Yi64] = 1
  2325  	ycover[Yu7*Ymax+Yi64] = 1
  2326  	ycover[Yu2*Ymax+Yi64] = 1
  2327  	ycover[Yu8*Ymax+Yi64] = 1
  2328  	ycover[Yi8*Ymax+Yi64] = 1
  2329  	ycover[Ys32*Ymax+Yi64] = 1
  2330  	ycover[Yi32*Ymax+Yi64] = 1
  2331  
  2332  	ycover[Yal*Ymax+Yrb] = 1
  2333  	ycover[Ycl*Ymax+Yrb] = 1
  2334  	ycover[Yax*Ymax+Yrb] = 1
  2335  	ycover[Ycx*Ymax+Yrb] = 1
  2336  	ycover[Yrx*Ymax+Yrb] = 1
  2337  	ycover[Yrl*Ymax+Yrb] = 1 // but not Yrl32
  2338  
  2339  	ycover[Ycl*Ymax+Ycx] = 1
  2340  
  2341  	ycover[Yax*Ymax+Yrx] = 1
  2342  	ycover[Ycx*Ymax+Yrx] = 1
  2343  
  2344  	ycover[Yax*Ymax+Yrl] = 1
  2345  	ycover[Ycx*Ymax+Yrl] = 1
  2346  	ycover[Yrx*Ymax+Yrl] = 1
  2347  	ycover[Yrl32*Ymax+Yrl] = 1
  2348  
  2349  	ycover[Yf0*Ymax+Yrf] = 1
  2350  
  2351  	ycover[Yal*Ymax+Ymb] = 1
  2352  	ycover[Ycl*Ymax+Ymb] = 1
  2353  	ycover[Yax*Ymax+Ymb] = 1
  2354  	ycover[Ycx*Ymax+Ymb] = 1
  2355  	ycover[Yrx*Ymax+Ymb] = 1
  2356  	ycover[Yrb*Ymax+Ymb] = 1
  2357  	ycover[Yrl*Ymax+Ymb] = 1 // but not Yrl32
  2358  	ycover[Ym*Ymax+Ymb] = 1
  2359  
  2360  	ycover[Yax*Ymax+Yml] = 1
  2361  	ycover[Ycx*Ymax+Yml] = 1
  2362  	ycover[Yrx*Ymax+Yml] = 1
  2363  	ycover[Yrl*Ymax+Yml] = 1
  2364  	ycover[Yrl32*Ymax+Yml] = 1
  2365  	ycover[Ym*Ymax+Yml] = 1
  2366  
  2367  	ycover[Yax*Ymax+Ymm] = 1
  2368  	ycover[Ycx*Ymax+Ymm] = 1
  2369  	ycover[Yrx*Ymax+Ymm] = 1
  2370  	ycover[Yrl*Ymax+Ymm] = 1
  2371  	ycover[Yrl32*Ymax+Ymm] = 1
  2372  	ycover[Ym*Ymax+Ymm] = 1
  2373  	ycover[Ymr*Ymax+Ymm] = 1
  2374  
  2375  	ycover[Yxr0*Ymax+Yxr] = 1
  2376  
  2377  	ycover[Ym*Ymax+Yxm] = 1
  2378  	ycover[Yxr0*Ymax+Yxm] = 1
  2379  	ycover[Yxr*Ymax+Yxm] = 1
  2380  
  2381  	ycover[Ym*Ymax+Yym] = 1
  2382  	ycover[Yyr*Ymax+Yym] = 1
  2383  
  2384  	ycover[Yxr0*Ymax+YxrEvex] = 1
  2385  	ycover[Yxr*Ymax+YxrEvex] = 1
  2386  
  2387  	ycover[Ym*Ymax+YxmEvex] = 1
  2388  	ycover[Yxr0*Ymax+YxmEvex] = 1
  2389  	ycover[Yxr*Ymax+YxmEvex] = 1
  2390  	ycover[YxrEvex*Ymax+YxmEvex] = 1
  2391  
  2392  	ycover[Yyr*Ymax+YyrEvex] = 1
  2393  
  2394  	ycover[Ym*Ymax+YymEvex] = 1
  2395  	ycover[Yyr*Ymax+YymEvex] = 1
  2396  	ycover[YyrEvex*Ymax+YymEvex] = 1
  2397  
  2398  	ycover[Ym*Ymax+Yzm] = 1
  2399  	ycover[Yzr*Ymax+Yzm] = 1
  2400  
  2401  	ycover[Yk0*Ymax+Yk] = 1
  2402  	ycover[Yknot0*Ymax+Yk] = 1
  2403  
  2404  	ycover[Yk0*Ymax+Ykm] = 1
  2405  	ycover[Yknot0*Ymax+Ykm] = 1
  2406  	ycover[Yk*Ymax+Ykm] = 1
  2407  	ycover[Ym*Ymax+Ykm] = 1
  2408  
  2409  	ycover[Yxvm*Ymax+YxvmEvex] = 1
  2410  
  2411  	ycover[Yyvm*Ymax+YyvmEvex] = 1
  2412  
  2413  	for i := 0; i < MAXREG; i++ {
  2414  		reg[i] = -1
  2415  		if i >= REG_AL && i <= REG_R15B {
  2416  			reg[i] = (i - REG_AL) & 7
  2417  			if i >= REG_SPB && i <= REG_DIB {
  2418  				regrex[i] = 0x40
  2419  			}
  2420  			if i >= REG_R8B && i <= REG_R15B {
  2421  				regrex[i] = Rxr | Rxx | Rxb
  2422  			}
  2423  		}
  2424  
  2425  		if i >= REG_AH && i <= REG_BH {
  2426  			reg[i] = 4 + ((i - REG_AH) & 7)
  2427  		}
  2428  		if i >= REG_AX && i <= REG_R15 {
  2429  			reg[i] = (i - REG_AX) & 7
  2430  			if i >= REG_R8 {
  2431  				regrex[i] = Rxr | Rxx | Rxb
  2432  			}
  2433  		}
  2434  
  2435  		if i >= REG_F0 && i <= REG_F0+7 {
  2436  			reg[i] = (i - REG_F0) & 7
  2437  		}
  2438  		if i >= REG_M0 && i <= REG_M0+7 {
  2439  			reg[i] = (i - REG_M0) & 7
  2440  		}
  2441  		if i >= REG_K0 && i <= REG_K0+7 {
  2442  			reg[i] = (i - REG_K0) & 7
  2443  		}
  2444  		if i >= REG_X0 && i <= REG_X0+15 {
  2445  			reg[i] = (i - REG_X0) & 7
  2446  			if i >= REG_X0+8 {
  2447  				regrex[i] = Rxr | Rxx | Rxb
  2448  			}
  2449  		}
  2450  		if i >= REG_X16 && i <= REG_X16+15 {
  2451  			reg[i] = (i - REG_X16) & 7
  2452  			if i >= REG_X16+8 {
  2453  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2454  			} else {
  2455  				regrex[i] = RxrEvex
  2456  			}
  2457  		}
  2458  		if i >= REG_Y0 && i <= REG_Y0+15 {
  2459  			reg[i] = (i - REG_Y0) & 7
  2460  			if i >= REG_Y0+8 {
  2461  				regrex[i] = Rxr | Rxx | Rxb
  2462  			}
  2463  		}
  2464  		if i >= REG_Y16 && i <= REG_Y16+15 {
  2465  			reg[i] = (i - REG_Y16) & 7
  2466  			if i >= REG_Y16+8 {
  2467  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2468  			} else {
  2469  				regrex[i] = RxrEvex
  2470  			}
  2471  		}
  2472  		if i >= REG_Z0 && i <= REG_Z0+15 {
  2473  			reg[i] = (i - REG_Z0) & 7
  2474  			if i > REG_Z0+7 {
  2475  				regrex[i] = Rxr | Rxx | Rxb
  2476  			}
  2477  		}
  2478  		if i >= REG_Z16 && i <= REG_Z16+15 {
  2479  			reg[i] = (i - REG_Z16) & 7
  2480  			if i >= REG_Z16+8 {
  2481  				regrex[i] = Rxr | Rxx | Rxb | RxrEvex
  2482  			} else {
  2483  				regrex[i] = RxrEvex
  2484  			}
  2485  		}
  2486  
  2487  		if i >= REG_CR+8 && i <= REG_CR+15 {
  2488  			regrex[i] = Rxr
  2489  		}
  2490  	}
  2491  }
  2492  
  2493  var isAndroid = buildcfg.GOOS == "android"
  2494  
  2495  func prefixof(ctxt *obj.Link, a *obj.Addr) int {
  2496  	if a.Reg < REG_CS && a.Index < REG_CS { // fast path
  2497  		return 0
  2498  	}
  2499  	if a.Type == obj.TYPE_MEM && a.Name == obj.NAME_NONE {
  2500  		switch a.Reg {
  2501  		case REG_CS:
  2502  			return 0x2e
  2503  
  2504  		case REG_DS:
  2505  			return 0x3e
  2506  
  2507  		case REG_ES:
  2508  			return 0x26
  2509  
  2510  		case REG_FS:
  2511  			return 0x64
  2512  
  2513  		case REG_GS:
  2514  			return 0x65
  2515  
  2516  		case REG_TLS:
  2517  			// NOTE: Systems listed here should be only systems that
  2518  			// support direct TLS references like 8(TLS) implemented as
  2519  			// direct references from FS or GS. Systems that require
  2520  			// the initial-exec model, where you load the TLS base into
  2521  			// a register and then index from that register, do not reach
  2522  			// this code and should not be listed.
  2523  			if ctxt.Arch.Family == sys.I386 {
  2524  				switch ctxt.Headtype {
  2525  				default:
  2526  					if isAndroid {
  2527  						return 0x65 // GS
  2528  					}
  2529  					log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2530  
  2531  				case objabi.Hdarwin,
  2532  					objabi.Hdragonfly,
  2533  					objabi.Hfreebsd,
  2534  					objabi.Hnetbsd,
  2535  					objabi.Hopenbsd:
  2536  					return 0x65 // GS
  2537  				}
  2538  			}
  2539  
  2540  			switch ctxt.Headtype {
  2541  			default:
  2542  				log.Fatalf("unknown TLS base register for %v", ctxt.Headtype)
  2543  
  2544  			case objabi.Hlinux:
  2545  				if isAndroid {
  2546  					return 0x64 // FS
  2547  				}
  2548  
  2549  				if ctxt.Flag_shared {
  2550  					log.Fatalf("unknown TLS base register for linux with -shared")
  2551  				} else {
  2552  					return 0x64 // FS
  2553  				}
  2554  
  2555  			case objabi.Hdragonfly,
  2556  				objabi.Hfreebsd,
  2557  				objabi.Hnetbsd,
  2558  				objabi.Hopenbsd,
  2559  				objabi.Hsolaris:
  2560  				return 0x64 // FS
  2561  
  2562  			case objabi.Hdarwin:
  2563  				return 0x65 // GS
  2564  			}
  2565  		}
  2566  	}
  2567  
  2568  	switch a.Index {
  2569  	case REG_CS:
  2570  		return 0x2e
  2571  
  2572  	case REG_DS:
  2573  		return 0x3e
  2574  
  2575  	case REG_ES:
  2576  		return 0x26
  2577  
  2578  	case REG_TLS:
  2579  		if ctxt.Flag_shared && ctxt.Headtype != objabi.Hwindows {
  2580  			// When building for inclusion into a shared library, an instruction of the form
  2581  			//     MOV off(CX)(TLS*1), AX
  2582  			// becomes
  2583  			//     mov %gs:off(%ecx), %eax // on i386
  2584  			//     mov %fs:off(%rcx), %rax // on amd64
  2585  			// which assumes that the correct TLS offset has been loaded into CX (today
  2586  			// there is only one TLS variable -- g -- so this is OK). When not building for
  2587  			// a shared library the instruction it becomes
  2588  			//     mov 0x0(%ecx), %eax // on i386
  2589  			//     mov 0x0(%rcx), %rax // on amd64
  2590  			// and a R_TLS_LE relocation, and so does not require a prefix.
  2591  			if ctxt.Arch.Family == sys.I386 {
  2592  				return 0x65 // GS
  2593  			}
  2594  			return 0x64 // FS
  2595  		}
  2596  
  2597  	case REG_FS:
  2598  		return 0x64
  2599  
  2600  	case REG_GS:
  2601  		return 0x65
  2602  	}
  2603  
  2604  	return 0
  2605  }
  2606  
  2607  // oclassRegList returns multisource operand class for addr.
  2608  func oclassRegList(ctxt *obj.Link, addr *obj.Addr) int {
  2609  	// TODO(quasilyte): when oclass register case is refactored into
  2610  	// lookup table, use it here to get register kind more easily.
  2611  	// Helper functions like regIsXmm should go away too (they will become redundant).
  2612  
  2613  	regIsXmm := func(r int) bool { return r >= REG_X0 && r <= REG_X31 }
  2614  	regIsYmm := func(r int) bool { return r >= REG_Y0 && r <= REG_Y31 }
  2615  	regIsZmm := func(r int) bool { return r >= REG_Z0 && r <= REG_Z31 }
  2616  
  2617  	reg0, reg1 := decodeRegisterRange(addr.Offset)
  2618  	low := regIndex(int16(reg0))
  2619  	high := regIndex(int16(reg1))
  2620  
  2621  	if ctxt.Arch.Family == sys.I386 {
  2622  		if low >= 8 || high >= 8 {
  2623  			return Yxxx
  2624  		}
  2625  	}
  2626  
  2627  	switch high - low {
  2628  	case 3:
  2629  		switch {
  2630  		case regIsXmm(reg0) && regIsXmm(reg1):
  2631  			return YxrEvexMulti4
  2632  		case regIsYmm(reg0) && regIsYmm(reg1):
  2633  			return YyrEvexMulti4
  2634  		case regIsZmm(reg0) && regIsZmm(reg1):
  2635  			return YzrMulti4
  2636  		default:
  2637  			return Yxxx
  2638  		}
  2639  	default:
  2640  		return Yxxx
  2641  	}
  2642  }
  2643  
  2644  // oclassVMem returns V-mem (vector memory with VSIB) operand class.
  2645  // For addr that is not V-mem returns (Yxxx, false).
  2646  func oclassVMem(ctxt *obj.Link, addr *obj.Addr) (int, bool) {
  2647  	switch addr.Index {
  2648  	case REG_X0 + 0,
  2649  		REG_X0 + 1,
  2650  		REG_X0 + 2,
  2651  		REG_X0 + 3,
  2652  		REG_X0 + 4,
  2653  		REG_X0 + 5,
  2654  		REG_X0 + 6,
  2655  		REG_X0 + 7:
  2656  		return Yxvm, true
  2657  	case REG_X8 + 0,
  2658  		REG_X8 + 1,
  2659  		REG_X8 + 2,
  2660  		REG_X8 + 3,
  2661  		REG_X8 + 4,
  2662  		REG_X8 + 5,
  2663  		REG_X8 + 6,
  2664  		REG_X8 + 7:
  2665  		if ctxt.Arch.Family == sys.I386 {
  2666  			return Yxxx, true
  2667  		}
  2668  		return Yxvm, true
  2669  	case REG_X16 + 0,
  2670  		REG_X16 + 1,
  2671  		REG_X16 + 2,
  2672  		REG_X16 + 3,
  2673  		REG_X16 + 4,
  2674  		REG_X16 + 5,
  2675  		REG_X16 + 6,
  2676  		REG_X16 + 7,
  2677  		REG_X16 + 8,
  2678  		REG_X16 + 9,
  2679  		REG_X16 + 10,
  2680  		REG_X16 + 11,
  2681  		REG_X16 + 12,
  2682  		REG_X16 + 13,
  2683  		REG_X16 + 14,
  2684  		REG_X16 + 15:
  2685  		if ctxt.Arch.Family == sys.I386 {
  2686  			return Yxxx, true
  2687  		}
  2688  		return YxvmEvex, true
  2689  
  2690  	case REG_Y0 + 0,
  2691  		REG_Y0 + 1,
  2692  		REG_Y0 + 2,
  2693  		REG_Y0 + 3,
  2694  		REG_Y0 + 4,
  2695  		REG_Y0 + 5,
  2696  		REG_Y0 + 6,
  2697  		REG_Y0 + 7:
  2698  		return Yyvm, true
  2699  	case REG_Y8 + 0,
  2700  		REG_Y8 + 1,
  2701  		REG_Y8 + 2,
  2702  		REG_Y8 + 3,
  2703  		REG_Y8 + 4,
  2704  		REG_Y8 + 5,
  2705  		REG_Y8 + 6,
  2706  		REG_Y8 + 7:
  2707  		if ctxt.Arch.Family == sys.I386 {
  2708  			return Yxxx, true
  2709  		}
  2710  		return Yyvm, true
  2711  	case REG_Y16 + 0,
  2712  		REG_Y16 + 1,
  2713  		REG_Y16 + 2,
  2714  		REG_Y16 + 3,
  2715  		REG_Y16 + 4,
  2716  		REG_Y16 + 5,
  2717  		REG_Y16 + 6,
  2718  		REG_Y16 + 7,
  2719  		REG_Y16 + 8,
  2720  		REG_Y16 + 9,
  2721  		REG_Y16 + 10,
  2722  		REG_Y16 + 11,
  2723  		REG_Y16 + 12,
  2724  		REG_Y16 + 13,
  2725  		REG_Y16 + 14,
  2726  		REG_Y16 + 15:
  2727  		if ctxt.Arch.Family == sys.I386 {
  2728  			return Yxxx, true
  2729  		}
  2730  		return YyvmEvex, true
  2731  
  2732  	case REG_Z0 + 0,
  2733  		REG_Z0 + 1,
  2734  		REG_Z0 + 2,
  2735  		REG_Z0 + 3,
  2736  		REG_Z0 + 4,
  2737  		REG_Z0 + 5,
  2738  		REG_Z0 + 6,
  2739  		REG_Z0 + 7:
  2740  		return Yzvm, true
  2741  	case REG_Z8 + 0,
  2742  		REG_Z8 + 1,
  2743  		REG_Z8 + 2,
  2744  		REG_Z8 + 3,
  2745  		REG_Z8 + 4,
  2746  		REG_Z8 + 5,
  2747  		REG_Z8 + 6,
  2748  		REG_Z8 + 7,
  2749  		REG_Z8 + 8,
  2750  		REG_Z8 + 9,
  2751  		REG_Z8 + 10,
  2752  		REG_Z8 + 11,
  2753  		REG_Z8 + 12,
  2754  		REG_Z8 + 13,
  2755  		REG_Z8 + 14,
  2756  		REG_Z8 + 15,
  2757  		REG_Z8 + 16,
  2758  		REG_Z8 + 17,
  2759  		REG_Z8 + 18,
  2760  		REG_Z8 + 19,
  2761  		REG_Z8 + 20,
  2762  		REG_Z8 + 21,
  2763  		REG_Z8 + 22,
  2764  		REG_Z8 + 23:
  2765  		if ctxt.Arch.Family == sys.I386 {
  2766  			return Yxxx, true
  2767  		}
  2768  		return Yzvm, true
  2769  	}
  2770  
  2771  	return Yxxx, false
  2772  }
  2773  
  2774  func oclass(ctxt *obj.Link, p *obj.Prog, a *obj.Addr) int {
  2775  	switch a.Type {
  2776  	case obj.TYPE_REGLIST:
  2777  		return oclassRegList(ctxt, a)
  2778  
  2779  	case obj.TYPE_NONE:
  2780  		return Ynone
  2781  
  2782  	case obj.TYPE_BRANCH:
  2783  		return Ybr
  2784  
  2785  	case obj.TYPE_INDIR:
  2786  		if a.Name != obj.NAME_NONE && a.Reg == REG_NONE && a.Index == REG_NONE && a.Scale == 0 {
  2787  			return Yindir
  2788  		}
  2789  		return Yxxx
  2790  
  2791  	case obj.TYPE_MEM:
  2792  		// Pseudo registers have negative index, but SP is
  2793  		// not pseudo on x86, hence REG_SP check is not redundant.
  2794  		if a.Index == REG_SP || a.Index < 0 {
  2795  			// Can't use FP/SB/PC/SP as the index register.
  2796  			return Yxxx
  2797  		}
  2798  
  2799  		if vmem, ok := oclassVMem(ctxt, a); ok {
  2800  			return vmem
  2801  		}
  2802  
  2803  		if ctxt.Arch.Family == sys.AMD64 {
  2804  			switch a.Name {
  2805  			case obj.NAME_EXTERN, obj.NAME_STATIC, obj.NAME_GOTREF:
  2806  				// Global variables can't use index registers and their
  2807  				// base register is %rip (%rip is encoded as REG_NONE).
  2808  				if a.Reg != REG_NONE || a.Index != REG_NONE || a.Scale != 0 {
  2809  					return Yxxx
  2810  				}
  2811  			case obj.NAME_AUTO, obj.NAME_PARAM:
  2812  				// These names must have a base of SP.  The old compiler
  2813  				// uses 0 for the base register. SSA uses REG_SP.
  2814  				if a.Reg != REG_SP && a.Reg != 0 {
  2815  					return Yxxx
  2816  				}
  2817  			case obj.NAME_NONE:
  2818  				// everything is ok
  2819  			default:
  2820  				// unknown name
  2821  				return Yxxx
  2822  			}
  2823  		}
  2824  		return Ym
  2825  
  2826  	case obj.TYPE_ADDR:
  2827  		switch a.Name {
  2828  		case obj.NAME_GOTREF:
  2829  			ctxt.Diag("unexpected TYPE_ADDR with NAME_GOTREF")
  2830  			return Yxxx
  2831  
  2832  		case obj.NAME_EXTERN,
  2833  			obj.NAME_STATIC:
  2834  			if a.Sym != nil && useAbs(ctxt, a.Sym) {
  2835  				return Yi32
  2836  			}
  2837  			return Yiauto // use pc-relative addressing
  2838  
  2839  		case obj.NAME_AUTO,
  2840  			obj.NAME_PARAM:
  2841  			return Yiauto
  2842  		}
  2843  
  2844  		// TODO(rsc): DUFFZERO/DUFFCOPY encoding forgot to set a->index
  2845  		// and got Yi32 in an earlier version of this code.
  2846  		// Keep doing that until we fix yduff etc.
  2847  		if a.Sym != nil && strings.HasPrefix(a.Sym.Name, "runtime.duff") {
  2848  			return Yi32
  2849  		}
  2850  
  2851  		if a.Sym != nil || a.Name != obj.NAME_NONE {
  2852  			ctxt.Diag("unexpected addr: %v", obj.Dconv(p, a))
  2853  		}
  2854  		fallthrough
  2855  
  2856  	case obj.TYPE_CONST:
  2857  		if a.Sym != nil {
  2858  			ctxt.Diag("TYPE_CONST with symbol: %v", obj.Dconv(p, a))
  2859  		}
  2860  
  2861  		v := a.Offset
  2862  		if ctxt.Arch.Family == sys.I386 {
  2863  			v = int64(int32(v))
  2864  		}
  2865  		switch {
  2866  		case v == 0:
  2867  			return Yi0
  2868  		case v == 1:
  2869  			return Yi1
  2870  		case v >= 0 && v <= 3:
  2871  			return Yu2
  2872  		case v >= 0 && v <= 127:
  2873  			return Yu7
  2874  		case v >= 0 && v <= 255:
  2875  			return Yu8
  2876  		case v >= -128 && v <= 127:
  2877  			return Yi8
  2878  		}
  2879  		if ctxt.Arch.Family == sys.I386 {
  2880  			return Yi32
  2881  		}
  2882  		l := int32(v)
  2883  		if int64(l) == v {
  2884  			return Ys32 // can sign extend
  2885  		}
  2886  		if v>>32 == 0 {
  2887  			return Yi32 // unsigned
  2888  		}
  2889  		return Yi64
  2890  
  2891  	case obj.TYPE_TEXTSIZE:
  2892  		return Ytextsize
  2893  	}
  2894  
  2895  	if a.Type != obj.TYPE_REG {
  2896  		ctxt.Diag("unexpected addr1: type=%d %v", a.Type, obj.Dconv(p, a))
  2897  		return Yxxx
  2898  	}
  2899  
  2900  	switch a.Reg {
  2901  	case REG_AL:
  2902  		return Yal
  2903  
  2904  	case REG_AX:
  2905  		return Yax
  2906  
  2907  		/*
  2908  			case REG_SPB:
  2909  		*/
  2910  	case REG_BPB,
  2911  		REG_SIB,
  2912  		REG_DIB,
  2913  		REG_R8B,
  2914  		REG_R9B,
  2915  		REG_R10B,
  2916  		REG_R11B,
  2917  		REG_R12B,
  2918  		REG_R13B,
  2919  		REG_R14B,
  2920  		REG_R15B:
  2921  		if ctxt.Arch.Family == sys.I386 {
  2922  			return Yxxx
  2923  		}
  2924  		fallthrough
  2925  
  2926  	case REG_DL,
  2927  		REG_BL,
  2928  		REG_AH,
  2929  		REG_CH,
  2930  		REG_DH,
  2931  		REG_BH:
  2932  		return Yrb
  2933  
  2934  	case REG_CL:
  2935  		return Ycl
  2936  
  2937  	case REG_CX:
  2938  		return Ycx
  2939  
  2940  	case REG_DX, REG_BX:
  2941  		return Yrx
  2942  
  2943  	case REG_R8, // not really Yrl
  2944  		REG_R9,
  2945  		REG_R10,
  2946  		REG_R11,
  2947  		REG_R12,
  2948  		REG_R13,
  2949  		REG_R14,
  2950  		REG_R15:
  2951  		if ctxt.Arch.Family == sys.I386 {
  2952  			return Yxxx
  2953  		}
  2954  		fallthrough
  2955  
  2956  	case REG_SP, REG_BP, REG_SI, REG_DI:
  2957  		if ctxt.Arch.Family == sys.I386 {
  2958  			return Yrl32
  2959  		}
  2960  		return Yrl
  2961  
  2962  	case REG_F0 + 0:
  2963  		return Yf0
  2964  
  2965  	case REG_F0 + 1,
  2966  		REG_F0 + 2,
  2967  		REG_F0 + 3,
  2968  		REG_F0 + 4,
  2969  		REG_F0 + 5,
  2970  		REG_F0 + 6,
  2971  		REG_F0 + 7:
  2972  		return Yrf
  2973  
  2974  	case REG_M0 + 0,
  2975  		REG_M0 + 1,
  2976  		REG_M0 + 2,
  2977  		REG_M0 + 3,
  2978  		REG_M0 + 4,
  2979  		REG_M0 + 5,
  2980  		REG_M0 + 6,
  2981  		REG_M0 + 7:
  2982  		return Ymr
  2983  
  2984  	case REG_X0:
  2985  		return Yxr0
  2986  
  2987  	case REG_X0 + 1,
  2988  		REG_X0 + 2,
  2989  		REG_X0 + 3,
  2990  		REG_X0 + 4,
  2991  		REG_X0 + 5,
  2992  		REG_X0 + 6,
  2993  		REG_X0 + 7,
  2994  		REG_X0 + 8,
  2995  		REG_X0 + 9,
  2996  		REG_X0 + 10,
  2997  		REG_X0 + 11,
  2998  		REG_X0 + 12,
  2999  		REG_X0 + 13,
  3000  		REG_X0 + 14,
  3001  		REG_X0 + 15:
  3002  		return Yxr
  3003  
  3004  	case REG_X0 + 16,
  3005  		REG_X0 + 17,
  3006  		REG_X0 + 18,
  3007  		REG_X0 + 19,
  3008  		REG_X0 + 20,
  3009  		REG_X0 + 21,
  3010  		REG_X0 + 22,
  3011  		REG_X0 + 23,
  3012  		REG_X0 + 24,
  3013  		REG_X0 + 25,
  3014  		REG_X0 + 26,
  3015  		REG_X0 + 27,
  3016  		REG_X0 + 28,
  3017  		REG_X0 + 29,
  3018  		REG_X0 + 30,
  3019  		REG_X0 + 31:
  3020  		return YxrEvex
  3021  
  3022  	case REG_Y0 + 0,
  3023  		REG_Y0 + 1,
  3024  		REG_Y0 + 2,
  3025  		REG_Y0 + 3,
  3026  		REG_Y0 + 4,
  3027  		REG_Y0 + 5,
  3028  		REG_Y0 + 6,
  3029  		REG_Y0 + 7,
  3030  		REG_Y0 + 8,
  3031  		REG_Y0 + 9,
  3032  		REG_Y0 + 10,
  3033  		REG_Y0 + 11,
  3034  		REG_Y0 + 12,
  3035  		REG_Y0 + 13,
  3036  		REG_Y0 + 14,
  3037  		REG_Y0 + 15:
  3038  		return Yyr
  3039  
  3040  	case REG_Y0 + 16,
  3041  		REG_Y0 + 17,
  3042  		REG_Y0 + 18,
  3043  		REG_Y0 + 19,
  3044  		REG_Y0 + 20,
  3045  		REG_Y0 + 21,
  3046  		REG_Y0 + 22,
  3047  		REG_Y0 + 23,
  3048  		REG_Y0 + 24,
  3049  		REG_Y0 + 25,
  3050  		REG_Y0 + 26,
  3051  		REG_Y0 + 27,
  3052  		REG_Y0 + 28,
  3053  		REG_Y0 + 29,
  3054  		REG_Y0 + 30,
  3055  		REG_Y0 + 31:
  3056  		return YyrEvex
  3057  
  3058  	case REG_Z0 + 0,
  3059  		REG_Z0 + 1,
  3060  		REG_Z0 + 2,
  3061  		REG_Z0 + 3,
  3062  		REG_Z0 + 4,
  3063  		REG_Z0 + 5,
  3064  		REG_Z0 + 6,
  3065  		REG_Z0 + 7:
  3066  		return Yzr
  3067  
  3068  	case REG_Z0 + 8,
  3069  		REG_Z0 + 9,
  3070  		REG_Z0 + 10,
  3071  		REG_Z0 + 11,
  3072  		REG_Z0 + 12,
  3073  		REG_Z0 + 13,
  3074  		REG_Z0 + 14,
  3075  		REG_Z0 + 15,
  3076  		REG_Z0 + 16,
  3077  		REG_Z0 + 17,
  3078  		REG_Z0 + 18,
  3079  		REG_Z0 + 19,
  3080  		REG_Z0 + 20,
  3081  		REG_Z0 + 21,
  3082  		REG_Z0 + 22,
  3083  		REG_Z0 + 23,
  3084  		REG_Z0 + 24,
  3085  		REG_Z0 + 25,
  3086  		REG_Z0 + 26,
  3087  		REG_Z0 + 27,
  3088  		REG_Z0 + 28,
  3089  		REG_Z0 + 29,
  3090  		REG_Z0 + 30,
  3091  		REG_Z0 + 31:
  3092  		if ctxt.Arch.Family == sys.I386 {
  3093  			return Yxxx
  3094  		}
  3095  		return Yzr
  3096  
  3097  	case REG_K0:
  3098  		return Yk0
  3099  
  3100  	case REG_K0 + 1,
  3101  		REG_K0 + 2,
  3102  		REG_K0 + 3,
  3103  		REG_K0 + 4,
  3104  		REG_K0 + 5,
  3105  		REG_K0 + 6,
  3106  		REG_K0 + 7:
  3107  		return Yknot0
  3108  
  3109  	case REG_CS:
  3110  		return Ycs
  3111  	case REG_SS:
  3112  		return Yss
  3113  	case REG_DS:
  3114  		return Yds
  3115  	case REG_ES:
  3116  		return Yes
  3117  	case REG_FS:
  3118  		return Yfs
  3119  	case REG_GS:
  3120  		return Ygs
  3121  	case REG_TLS:
  3122  		return Ytls
  3123  
  3124  	case REG_GDTR:
  3125  		return Ygdtr
  3126  	case REG_IDTR:
  3127  		return Yidtr
  3128  	case REG_LDTR:
  3129  		return Yldtr
  3130  	case REG_MSW:
  3131  		return Ymsw
  3132  	case REG_TASK:
  3133  		return Ytask
  3134  
  3135  	case REG_CR + 0:
  3136  		return Ycr0
  3137  	case REG_CR + 1:
  3138  		return Ycr1
  3139  	case REG_CR + 2:
  3140  		return Ycr2
  3141  	case REG_CR + 3:
  3142  		return Ycr3
  3143  	case REG_CR + 4:
  3144  		return Ycr4
  3145  	case REG_CR + 5:
  3146  		return Ycr5
  3147  	case REG_CR + 6:
  3148  		return Ycr6
  3149  	case REG_CR + 7:
  3150  		return Ycr7
  3151  	case REG_CR + 8:
  3152  		return Ycr8
  3153  
  3154  	case REG_DR + 0:
  3155  		return Ydr0
  3156  	case REG_DR + 1:
  3157  		return Ydr1
  3158  	case REG_DR + 2:
  3159  		return Ydr2
  3160  	case REG_DR + 3:
  3161  		return Ydr3
  3162  	case REG_DR + 4:
  3163  		return Ydr4
  3164  	case REG_DR + 5:
  3165  		return Ydr5
  3166  	case REG_DR + 6:
  3167  		return Ydr6
  3168  	case REG_DR + 7:
  3169  		return Ydr7
  3170  
  3171  	case REG_TR + 0:
  3172  		return Ytr0
  3173  	case REG_TR + 1:
  3174  		return Ytr1
  3175  	case REG_TR + 2:
  3176  		return Ytr2
  3177  	case REG_TR + 3:
  3178  		return Ytr3
  3179  	case REG_TR + 4:
  3180  		return Ytr4
  3181  	case REG_TR + 5:
  3182  		return Ytr5
  3183  	case REG_TR + 6:
  3184  		return Ytr6
  3185  	case REG_TR + 7:
  3186  		return Ytr7
  3187  	}
  3188  
  3189  	return Yxxx
  3190  }
  3191  
  3192  // AsmBuf is a simple buffer to assemble variable-length x86 instructions into
  3193  // and hold assembly state.
  3194  type AsmBuf struct {
  3195  	buf      [100]byte
  3196  	off      int
  3197  	rexflag  int
  3198  	vexflag  bool // Per inst: true for VEX-encoded
  3199  	evexflag bool // Per inst: true for EVEX-encoded
  3200  	rep      bool
  3201  	repn     bool
  3202  	lock     bool
  3203  
  3204  	evex evexBits // Initialized when evexflag is true
  3205  }
  3206  
  3207  // Put1 appends one byte to the end of the buffer.
  3208  func (ab *AsmBuf) Put1(x byte) {
  3209  	ab.buf[ab.off] = x
  3210  	ab.off++
  3211  }
  3212  
  3213  // Put2 appends two bytes to the end of the buffer.
  3214  func (ab *AsmBuf) Put2(x, y byte) {
  3215  	ab.buf[ab.off+0] = x
  3216  	ab.buf[ab.off+1] = y
  3217  	ab.off += 2
  3218  }
  3219  
  3220  // Put3 appends three bytes to the end of the buffer.
  3221  func (ab *AsmBuf) Put3(x, y, z byte) {
  3222  	ab.buf[ab.off+0] = x
  3223  	ab.buf[ab.off+1] = y
  3224  	ab.buf[ab.off+2] = z
  3225  	ab.off += 3
  3226  }
  3227  
  3228  // Put4 appends four bytes to the end of the buffer.
  3229  func (ab *AsmBuf) Put4(x, y, z, w byte) {
  3230  	ab.buf[ab.off+0] = x
  3231  	ab.buf[ab.off+1] = y
  3232  	ab.buf[ab.off+2] = z
  3233  	ab.buf[ab.off+3] = w
  3234  	ab.off += 4
  3235  }
  3236  
  3237  // PutInt16 writes v into the buffer using little-endian encoding.
  3238  func (ab *AsmBuf) PutInt16(v int16) {
  3239  	ab.buf[ab.off+0] = byte(v)
  3240  	ab.buf[ab.off+1] = byte(v >> 8)
  3241  	ab.off += 2
  3242  }
  3243  
  3244  // PutInt32 writes v into the buffer using little-endian encoding.
  3245  func (ab *AsmBuf) PutInt32(v int32) {
  3246  	ab.buf[ab.off+0] = byte(v)
  3247  	ab.buf[ab.off+1] = byte(v >> 8)
  3248  	ab.buf[ab.off+2] = byte(v >> 16)
  3249  	ab.buf[ab.off+3] = byte(v >> 24)
  3250  	ab.off += 4
  3251  }
  3252  
  3253  // PutInt64 writes v into the buffer using little-endian encoding.
  3254  func (ab *AsmBuf) PutInt64(v int64) {
  3255  	ab.buf[ab.off+0] = byte(v)
  3256  	ab.buf[ab.off+1] = byte(v >> 8)
  3257  	ab.buf[ab.off+2] = byte(v >> 16)
  3258  	ab.buf[ab.off+3] = byte(v >> 24)
  3259  	ab.buf[ab.off+4] = byte(v >> 32)
  3260  	ab.buf[ab.off+5] = byte(v >> 40)
  3261  	ab.buf[ab.off+6] = byte(v >> 48)
  3262  	ab.buf[ab.off+7] = byte(v >> 56)
  3263  	ab.off += 8
  3264  }
  3265  
  3266  // Put copies b into the buffer.
  3267  func (ab *AsmBuf) Put(b []byte) {
  3268  	copy(ab.buf[ab.off:], b)
  3269  	ab.off += len(b)
  3270  }
  3271  
  3272  // PutOpBytesLit writes zero terminated sequence of bytes from op,
  3273  // starting at specified offset (e.g. z counter value).
  3274  // Trailing 0 is not written.
  3275  //
  3276  // Intended to be used for literal Z cases.
  3277  // Literal Z cases usually have "Zlit" in their name (Zlit, Zlitr_m, Zlitm_r).
  3278  func (ab *AsmBuf) PutOpBytesLit(offset int, op *opBytes) {
  3279  	for int(op[offset]) != 0 {
  3280  		ab.Put1(byte(op[offset]))
  3281  		offset++
  3282  	}
  3283  }
  3284  
  3285  // Insert inserts b at offset i.
  3286  func (ab *AsmBuf) Insert(i int, b byte) {
  3287  	ab.off++
  3288  	copy(ab.buf[i+1:ab.off], ab.buf[i:ab.off-1])
  3289  	ab.buf[i] = b
  3290  }
  3291  
  3292  // Last returns the byte at the end of the buffer.
  3293  func (ab *AsmBuf) Last() byte { return ab.buf[ab.off-1] }
  3294  
  3295  // Len returns the length of the buffer.
  3296  func (ab *AsmBuf) Len() int { return ab.off }
  3297  
  3298  // Bytes returns the contents of the buffer.
  3299  func (ab *AsmBuf) Bytes() []byte { return ab.buf[:ab.off] }
  3300  
  3301  // Reset empties the buffer.
  3302  func (ab *AsmBuf) Reset() { ab.off = 0 }
  3303  
  3304  // At returns the byte at offset i.
  3305  func (ab *AsmBuf) At(i int) byte { return ab.buf[i] }
  3306  
  3307  // asmidx emits SIB byte.
  3308  func (ab *AsmBuf) asmidx(ctxt *obj.Link, scale int, index int, base int) {
  3309  	var i int
  3310  
  3311  	// X/Y index register is used in VSIB.
  3312  	switch index {
  3313  	default:
  3314  		goto bad
  3315  
  3316  	case REG_NONE:
  3317  		i = 4 << 3
  3318  		goto bas
  3319  
  3320  	case REG_R8,
  3321  		REG_R9,
  3322  		REG_R10,
  3323  		REG_R11,
  3324  		REG_R12,
  3325  		REG_R13,
  3326  		REG_R14,
  3327  		REG_R15,
  3328  		REG_X8,
  3329  		REG_X9,
  3330  		REG_X10,
  3331  		REG_X11,
  3332  		REG_X12,
  3333  		REG_X13,
  3334  		REG_X14,
  3335  		REG_X15,
  3336  		REG_X16,
  3337  		REG_X17,
  3338  		REG_X18,
  3339  		REG_X19,
  3340  		REG_X20,
  3341  		REG_X21,
  3342  		REG_X22,
  3343  		REG_X23,
  3344  		REG_X24,
  3345  		REG_X25,
  3346  		REG_X26,
  3347  		REG_X27,
  3348  		REG_X28,
  3349  		REG_X29,
  3350  		REG_X30,
  3351  		REG_X31,
  3352  		REG_Y8,
  3353  		REG_Y9,
  3354  		REG_Y10,
  3355  		REG_Y11,
  3356  		REG_Y12,
  3357  		REG_Y13,
  3358  		REG_Y14,
  3359  		REG_Y15,
  3360  		REG_Y16,
  3361  		REG_Y17,
  3362  		REG_Y18,
  3363  		REG_Y19,
  3364  		REG_Y20,
  3365  		REG_Y21,
  3366  		REG_Y22,
  3367  		REG_Y23,
  3368  		REG_Y24,
  3369  		REG_Y25,
  3370  		REG_Y26,
  3371  		REG_Y27,
  3372  		REG_Y28,
  3373  		REG_Y29,
  3374  		REG_Y30,
  3375  		REG_Y31,
  3376  		REG_Z8,
  3377  		REG_Z9,
  3378  		REG_Z10,
  3379  		REG_Z11,
  3380  		REG_Z12,
  3381  		REG_Z13,
  3382  		REG_Z14,
  3383  		REG_Z15,
  3384  		REG_Z16,
  3385  		REG_Z17,
  3386  		REG_Z18,
  3387  		REG_Z19,
  3388  		REG_Z20,
  3389  		REG_Z21,
  3390  		REG_Z22,
  3391  		REG_Z23,
  3392  		REG_Z24,
  3393  		REG_Z25,
  3394  		REG_Z26,
  3395  		REG_Z27,
  3396  		REG_Z28,
  3397  		REG_Z29,
  3398  		REG_Z30,
  3399  		REG_Z31:
  3400  		if ctxt.Arch.Family == sys.I386 {
  3401  			goto bad
  3402  		}
  3403  		fallthrough
  3404  
  3405  	case REG_AX,
  3406  		REG_CX,
  3407  		REG_DX,
  3408  		REG_BX,
  3409  		REG_BP,
  3410  		REG_SI,
  3411  		REG_DI,
  3412  		REG_X0,
  3413  		REG_X1,
  3414  		REG_X2,
  3415  		REG_X3,
  3416  		REG_X4,
  3417  		REG_X5,
  3418  		REG_X6,
  3419  		REG_X7,
  3420  		REG_Y0,
  3421  		REG_Y1,
  3422  		REG_Y2,
  3423  		REG_Y3,
  3424  		REG_Y4,
  3425  		REG_Y5,
  3426  		REG_Y6,
  3427  		REG_Y7,
  3428  		REG_Z0,
  3429  		REG_Z1,
  3430  		REG_Z2,
  3431  		REG_Z3,
  3432  		REG_Z4,
  3433  		REG_Z5,
  3434  		REG_Z6,
  3435  		REG_Z7:
  3436  		i = reg[index] << 3
  3437  	}
  3438  
  3439  	switch scale {
  3440  	default:
  3441  		goto bad
  3442  
  3443  	case 1:
  3444  		break
  3445  
  3446  	case 2:
  3447  		i |= 1 << 6
  3448  
  3449  	case 4:
  3450  		i |= 2 << 6
  3451  
  3452  	case 8:
  3453  		i |= 3 << 6
  3454  	}
  3455  
  3456  bas:
  3457  	switch base {
  3458  	default:
  3459  		goto bad
  3460  
  3461  	case REG_NONE: // must be mod=00
  3462  		i |= 5
  3463  
  3464  	case REG_R8,
  3465  		REG_R9,
  3466  		REG_R10,
  3467  		REG_R11,
  3468  		REG_R12,
  3469  		REG_R13,
  3470  		REG_R14,
  3471  		REG_R15:
  3472  		if ctxt.Arch.Family == sys.I386 {
  3473  			goto bad
  3474  		}
  3475  		fallthrough
  3476  
  3477  	case REG_AX,
  3478  		REG_CX,
  3479  		REG_DX,
  3480  		REG_BX,
  3481  		REG_SP,
  3482  		REG_BP,
  3483  		REG_SI,
  3484  		REG_DI:
  3485  		i |= reg[base]
  3486  	}
  3487  
  3488  	ab.Put1(byte(i))
  3489  	return
  3490  
  3491  bad:
  3492  	ctxt.Diag("asmidx: bad address %d/%s/%s", scale, rconv(index), rconv(base))
  3493  	ab.Put1(0)
  3494  }
  3495  
  3496  func (ab *AsmBuf) relput4(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr) {
  3497  	var rel obj.Reloc
  3498  
  3499  	v := vaddr(ctxt, p, a, &rel)
  3500  	if rel.Siz != 0 {
  3501  		if rel.Siz != 4 {
  3502  			ctxt.Diag("bad reloc")
  3503  		}
  3504  		rel.Off = int32(p.Pc + int64(ab.Len()))
  3505  		cursym.AddRel(ctxt, rel)
  3506  	}
  3507  
  3508  	ab.PutInt32(int32(v))
  3509  }
  3510  
  3511  func vaddr(ctxt *obj.Link, p *obj.Prog, a *obj.Addr, r *obj.Reloc) int64 {
  3512  	if r != nil {
  3513  		*r = obj.Reloc{}
  3514  	}
  3515  
  3516  	switch a.Name {
  3517  	case obj.NAME_STATIC,
  3518  		obj.NAME_GOTREF,
  3519  		obj.NAME_EXTERN:
  3520  		s := a.Sym
  3521  		if r == nil {
  3522  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3523  			log.Fatalf("reloc")
  3524  		}
  3525  
  3526  		if a.Name == obj.NAME_GOTREF {
  3527  			r.Siz = 4
  3528  			r.Type = objabi.R_GOTPCREL
  3529  		} else if useAbs(ctxt, s) {
  3530  			r.Siz = 4
  3531  			r.Type = objabi.R_ADDR
  3532  		} else {
  3533  			r.Siz = 4
  3534  			r.Type = objabi.R_PCREL
  3535  		}
  3536  
  3537  		r.Off = -1 // caller must fill in
  3538  		r.Sym = s
  3539  		r.Add = a.Offset
  3540  
  3541  		return 0
  3542  	}
  3543  
  3544  	if (a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Reg == REG_TLS {
  3545  		if r == nil {
  3546  			ctxt.Diag("need reloc for %v", obj.Dconv(p, a))
  3547  			log.Fatalf("reloc")
  3548  		}
  3549  
  3550  		if !ctxt.Flag_shared || isAndroid || ctxt.Headtype == objabi.Hdarwin {
  3551  			r.Type = objabi.R_TLS_LE
  3552  			r.Siz = 4
  3553  			r.Off = -1 // caller must fill in
  3554  			r.Add = a.Offset
  3555  		}
  3556  		return 0
  3557  	}
  3558  
  3559  	return a.Offset
  3560  }
  3561  
  3562  func (ab *AsmBuf) asmandsz(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, r int, rex int, m64 int) {
  3563  	var base int
  3564  	var rel obj.Reloc
  3565  
  3566  	rex &= 0x40 | Rxr
  3567  	if a.Offset != int64(int32(a.Offset)) {
  3568  		// The rules are slightly different for 386 and AMD64,
  3569  		// mostly for historical reasons. We may unify them later,
  3570  		// but it must be discussed beforehand.
  3571  		//
  3572  		// For 64bit mode only LEAL is allowed to overflow.
  3573  		// It's how https://golang.org/cl/59630 made it.
  3574  		// crypto/sha1/sha1block_amd64.s depends on this feature.
  3575  		//
  3576  		// For 32bit mode rules are more permissive.
  3577  		// If offset fits uint32, it's permitted.
  3578  		// This is allowed for assembly that wants to use 32-bit hex
  3579  		// constants, e.g. LEAL 0x99999999(AX), AX.
  3580  		overflowOK := (ctxt.Arch.Family == sys.AMD64 && p.As == ALEAL) ||
  3581  			(ctxt.Arch.Family != sys.AMD64 &&
  3582  				int64(uint32(a.Offset)) == a.Offset &&
  3583  				ab.rexflag&Rxw == 0)
  3584  		if !overflowOK {
  3585  			ctxt.Diag("offset too large in %s", p)
  3586  		}
  3587  	}
  3588  	v := int32(a.Offset)
  3589  	rel.Siz = 0
  3590  
  3591  	switch a.Type {
  3592  	case obj.TYPE_ADDR:
  3593  		if a.Name == obj.NAME_NONE {
  3594  			ctxt.Diag("unexpected TYPE_ADDR with NAME_NONE")
  3595  		}
  3596  		if a.Index == REG_TLS {
  3597  			ctxt.Diag("unexpected TYPE_ADDR with index==REG_TLS")
  3598  		}
  3599  		goto bad
  3600  
  3601  	case obj.TYPE_REG:
  3602  		const regFirst = REG_AL
  3603  		const regLast = REG_Z31
  3604  		if a.Reg < regFirst || regLast < a.Reg {
  3605  			goto bad
  3606  		}
  3607  		if v != 0 {
  3608  			goto bad
  3609  		}
  3610  		ab.Put1(byte(3<<6 | reg[a.Reg]<<0 | r<<3))
  3611  		ab.rexflag |= regrex[a.Reg]&(0x40|Rxb) | rex
  3612  		return
  3613  	}
  3614  
  3615  	if a.Type != obj.TYPE_MEM {
  3616  		goto bad
  3617  	}
  3618  
  3619  	if a.Index != REG_NONE && a.Index != REG_TLS && !(REG_CS <= a.Index && a.Index <= REG_GS) {
  3620  		base := int(a.Reg)
  3621  		switch a.Name {
  3622  		case obj.NAME_EXTERN,
  3623  			obj.NAME_GOTREF,
  3624  			obj.NAME_STATIC:
  3625  			if !useAbs(ctxt, a.Sym) && ctxt.Arch.Family == sys.AMD64 {
  3626  				goto bad
  3627  			}
  3628  			if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3629  				// The base register has already been set. It holds the PC
  3630  				// of this instruction returned by a PC-reading thunk.
  3631  				// See obj6.go:rewriteToPcrel.
  3632  			} else {
  3633  				base = REG_NONE
  3634  			}
  3635  			v = int32(vaddr(ctxt, p, a, &rel))
  3636  
  3637  		case obj.NAME_AUTO,
  3638  			obj.NAME_PARAM:
  3639  			base = REG_SP
  3640  		}
  3641  
  3642  		ab.rexflag |= regrex[int(a.Index)]&Rxx | regrex[base]&Rxb | rex
  3643  		if base == REG_NONE {
  3644  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3645  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3646  			goto putrelv
  3647  		}
  3648  
  3649  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3650  			ab.Put1(byte(0<<6 | 4<<0 | r<<3))
  3651  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3652  			return
  3653  		}
  3654  
  3655  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3656  			ab.Put1(byte(1<<6 | 4<<0 | r<<3))
  3657  			ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3658  			ab.Put1(disp8)
  3659  			return
  3660  		}
  3661  
  3662  		ab.Put1(byte(2<<6 | 4<<0 | r<<3))
  3663  		ab.asmidx(ctxt, int(a.Scale), int(a.Index), base)
  3664  		goto putrelv
  3665  	}
  3666  
  3667  	base = int(a.Reg)
  3668  	switch a.Name {
  3669  	case obj.NAME_STATIC,
  3670  		obj.NAME_GOTREF,
  3671  		obj.NAME_EXTERN:
  3672  		if a.Sym == nil {
  3673  			ctxt.Diag("bad addr: %v", p)
  3674  		}
  3675  		if ctxt.Arch.Family == sys.I386 && ctxt.Flag_shared {
  3676  			// The base register has already been set. It holds the PC
  3677  			// of this instruction returned by a PC-reading thunk.
  3678  			// See obj6.go:rewriteToPcrel.
  3679  		} else {
  3680  			base = REG_NONE
  3681  		}
  3682  		v = int32(vaddr(ctxt, p, a, &rel))
  3683  
  3684  	case obj.NAME_AUTO,
  3685  		obj.NAME_PARAM:
  3686  		base = REG_SP
  3687  	}
  3688  
  3689  	if base == REG_TLS {
  3690  		v = int32(vaddr(ctxt, p, a, &rel))
  3691  	}
  3692  
  3693  	ab.rexflag |= regrex[base]&Rxb | rex
  3694  	if base == REG_NONE || (REG_CS <= base && base <= REG_GS) || base == REG_TLS {
  3695  		if (a.Sym == nil || !useAbs(ctxt, a.Sym)) && base == REG_NONE && (a.Name == obj.NAME_STATIC || a.Name == obj.NAME_EXTERN || a.Name == obj.NAME_GOTREF) || ctxt.Arch.Family != sys.AMD64 {
  3696  			if a.Name == obj.NAME_GOTREF && (a.Offset != 0 || a.Index != 0 || a.Scale != 0) {
  3697  				ctxt.Diag("%v has offset against gotref", p)
  3698  			}
  3699  			ab.Put1(byte(0<<6 | 5<<0 | r<<3))
  3700  			goto putrelv
  3701  		}
  3702  
  3703  		// temporary
  3704  		ab.Put2(
  3705  			byte(0<<6|4<<0|r<<3), // sib present
  3706  			0<<6|4<<3|5<<0,       // DS:d32
  3707  		)
  3708  		goto putrelv
  3709  	}
  3710  
  3711  	if base == REG_SP || base == REG_R12 {
  3712  		if v == 0 {
  3713  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3714  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3715  			return
  3716  		}
  3717  
  3718  		if disp8, ok := toDisp8(v, p, ab); ok {
  3719  			ab.Put1(byte(1<<6 | reg[base]<<0 | r<<3))
  3720  			ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3721  			ab.Put1(disp8)
  3722  			return
  3723  		}
  3724  
  3725  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3726  		ab.asmidx(ctxt, int(a.Scale), REG_NONE, base)
  3727  		goto putrelv
  3728  	}
  3729  
  3730  	if REG_AX <= base && base <= REG_R15 {
  3731  		if a.Index == REG_TLS && !ctxt.Flag_shared && !isAndroid &&
  3732  			ctxt.Headtype != objabi.Hwindows {
  3733  			rel = obj.Reloc{}
  3734  			rel.Type = objabi.R_TLS_LE
  3735  			rel.Siz = 4
  3736  			rel.Sym = nil
  3737  			rel.Add = int64(v)
  3738  			v = 0
  3739  		}
  3740  
  3741  		if v == 0 && rel.Siz == 0 && base != REG_BP && base != REG_R13 {
  3742  			ab.Put1(byte(0<<6 | reg[base]<<0 | r<<3))
  3743  			return
  3744  		}
  3745  
  3746  		if disp8, ok := toDisp8(v, p, ab); ok && rel.Siz == 0 {
  3747  			ab.Put2(byte(1<<6|reg[base]<<0|r<<3), disp8)
  3748  			return
  3749  		}
  3750  
  3751  		ab.Put1(byte(2<<6 | reg[base]<<0 | r<<3))
  3752  		goto putrelv
  3753  	}
  3754  
  3755  	goto bad
  3756  
  3757  putrelv:
  3758  	if rel.Siz != 0 {
  3759  		if rel.Siz != 4 {
  3760  			ctxt.Diag("bad rel")
  3761  			goto bad
  3762  		}
  3763  
  3764  		rel.Off = int32(p.Pc + int64(ab.Len()))
  3765  		cursym.AddRel(ctxt, rel)
  3766  	}
  3767  
  3768  	ab.PutInt32(v)
  3769  	return
  3770  
  3771  bad:
  3772  	ctxt.Diag("asmand: bad address %v", obj.Dconv(p, a))
  3773  }
  3774  
  3775  func (ab *AsmBuf) asmand(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, ra *obj.Addr) {
  3776  	ab.asmandsz(ctxt, cursym, p, a, reg[ra.Reg], regrex[ra.Reg], 0)
  3777  }
  3778  
  3779  func (ab *AsmBuf) asmando(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog, a *obj.Addr, o int) {
  3780  	ab.asmandsz(ctxt, cursym, p, a, o, 0, 0)
  3781  }
  3782  
  3783  func bytereg(a *obj.Addr, t *uint8) {
  3784  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AX <= a.Reg && a.Reg <= REG_R15) {
  3785  		a.Reg += REG_AL - REG_AX
  3786  		*t = 0
  3787  	}
  3788  }
  3789  
  3790  func unbytereg(a *obj.Addr, t *uint8) {
  3791  	if a.Type == obj.TYPE_REG && a.Index == REG_NONE && (REG_AL <= a.Reg && a.Reg <= REG_R15B) {
  3792  		a.Reg += REG_AX - REG_AL
  3793  		*t = 0
  3794  	}
  3795  }
  3796  
  3797  const (
  3798  	movLit uint8 = iota // Like Zlit
  3799  	movRegMem
  3800  	movMemReg
  3801  	movRegMem2op
  3802  	movMemReg2op
  3803  	movFullPtr // Load full pointer, trash heap (unsupported)
  3804  	movDoubleShift
  3805  	movTLSReg
  3806  )
  3807  
  3808  var ymovtab = []movtab{
  3809  	// push
  3810  	{APUSHL, Ycs, Ynone, Ynone, movLit, [4]uint8{0x0e, 0}},
  3811  	{APUSHL, Yss, Ynone, Ynone, movLit, [4]uint8{0x16, 0}},
  3812  	{APUSHL, Yds, Ynone, Ynone, movLit, [4]uint8{0x1e, 0}},
  3813  	{APUSHL, Yes, Ynone, Ynone, movLit, [4]uint8{0x06, 0}},
  3814  	{APUSHL, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3815  	{APUSHL, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3816  	{APUSHQ, Yfs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa0, 0}},
  3817  	{APUSHQ, Ygs, Ynone, Ynone, movLit, [4]uint8{0x0f, 0xa8, 0}},
  3818  	{APUSHW, Ycs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0e, 0}},
  3819  	{APUSHW, Yss, Ynone, Ynone, movLit, [4]uint8{Pe, 0x16, 0}},
  3820  	{APUSHW, Yds, Ynone, Ynone, movLit, [4]uint8{Pe, 0x1e, 0}},
  3821  	{APUSHW, Yes, Ynone, Ynone, movLit, [4]uint8{Pe, 0x06, 0}},
  3822  	{APUSHW, Yfs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa0, 0}},
  3823  	{APUSHW, Ygs, Ynone, Ynone, movLit, [4]uint8{Pe, 0x0f, 0xa8, 0}},
  3824  
  3825  	// pop
  3826  	{APOPL, Ynone, Ynone, Yds, movLit, [4]uint8{0x1f, 0}},
  3827  	{APOPL, Ynone, Ynone, Yes, movLit, [4]uint8{0x07, 0}},
  3828  	{APOPL, Ynone, Ynone, Yss, movLit, [4]uint8{0x17, 0}},
  3829  	{APOPL, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3830  	{APOPL, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3831  	{APOPQ, Ynone, Ynone, Yfs, movLit, [4]uint8{0x0f, 0xa1, 0}},
  3832  	{APOPQ, Ynone, Ynone, Ygs, movLit, [4]uint8{0x0f, 0xa9, 0}},
  3833  	{APOPW, Ynone, Ynone, Yds, movLit, [4]uint8{Pe, 0x1f, 0}},
  3834  	{APOPW, Ynone, Ynone, Yes, movLit, [4]uint8{Pe, 0x07, 0}},
  3835  	{APOPW, Ynone, Ynone, Yss, movLit, [4]uint8{Pe, 0x17, 0}},
  3836  	{APOPW, Ynone, Ynone, Yfs, movLit, [4]uint8{Pe, 0x0f, 0xa1, 0}},
  3837  	{APOPW, Ynone, Ynone, Ygs, movLit, [4]uint8{Pe, 0x0f, 0xa9, 0}},
  3838  
  3839  	// mov seg
  3840  	{AMOVW, Yes, Ynone, Yml, movRegMem, [4]uint8{0x8c, 0, 0, 0}},
  3841  	{AMOVW, Ycs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 1, 0, 0}},
  3842  	{AMOVW, Yss, Ynone, Yml, movRegMem, [4]uint8{0x8c, 2, 0, 0}},
  3843  	{AMOVW, Yds, Ynone, Yml, movRegMem, [4]uint8{0x8c, 3, 0, 0}},
  3844  	{AMOVW, Yfs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 4, 0, 0}},
  3845  	{AMOVW, Ygs, Ynone, Yml, movRegMem, [4]uint8{0x8c, 5, 0, 0}},
  3846  	{AMOVW, Yml, Ynone, Yes, movMemReg, [4]uint8{0x8e, 0, 0, 0}},
  3847  	{AMOVW, Yml, Ynone, Ycs, movMemReg, [4]uint8{0x8e, 1, 0, 0}},
  3848  	{AMOVW, Yml, Ynone, Yss, movMemReg, [4]uint8{0x8e, 2, 0, 0}},
  3849  	{AMOVW, Yml, Ynone, Yds, movMemReg, [4]uint8{0x8e, 3, 0, 0}},
  3850  	{AMOVW, Yml, Ynone, Yfs, movMemReg, [4]uint8{0x8e, 4, 0, 0}},
  3851  	{AMOVW, Yml, Ynone, Ygs, movMemReg, [4]uint8{0x8e, 5, 0, 0}},
  3852  
  3853  	// mov cr
  3854  	{AMOVL, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3855  	{AMOVL, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3856  	{AMOVL, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3857  	{AMOVL, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3858  	{AMOVL, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3859  	{AMOVQ, Ycr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 0, 0}},
  3860  	{AMOVQ, Ycr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 2, 0}},
  3861  	{AMOVQ, Ycr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 3, 0}},
  3862  	{AMOVQ, Ycr4, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 4, 0}},
  3863  	{AMOVQ, Ycr8, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x20, 8, 0}},
  3864  	{AMOVL, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3865  	{AMOVL, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3866  	{AMOVL, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3867  	{AMOVL, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3868  	{AMOVL, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3869  	{AMOVQ, Yrl, Ynone, Ycr0, movMemReg2op, [4]uint8{0x0f, 0x22, 0, 0}},
  3870  	{AMOVQ, Yrl, Ynone, Ycr2, movMemReg2op, [4]uint8{0x0f, 0x22, 2, 0}},
  3871  	{AMOVQ, Yrl, Ynone, Ycr3, movMemReg2op, [4]uint8{0x0f, 0x22, 3, 0}},
  3872  	{AMOVQ, Yrl, Ynone, Ycr4, movMemReg2op, [4]uint8{0x0f, 0x22, 4, 0}},
  3873  	{AMOVQ, Yrl, Ynone, Ycr8, movMemReg2op, [4]uint8{0x0f, 0x22, 8, 0}},
  3874  
  3875  	// mov dr
  3876  	{AMOVL, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3877  	{AMOVL, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3878  	{AMOVL, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3879  	{AMOVQ, Ydr0, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 0, 0}},
  3880  	{AMOVQ, Ydr2, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 2, 0}},
  3881  	{AMOVQ, Ydr3, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 3, 0}},
  3882  	{AMOVQ, Ydr6, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 6, 0}},
  3883  	{AMOVQ, Ydr7, Ynone, Yrl, movRegMem2op, [4]uint8{0x0f, 0x21, 7, 0}},
  3884  	{AMOVL, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3885  	{AMOVL, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3886  	{AMOVL, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3887  	{AMOVQ, Yrl, Ynone, Ydr0, movMemReg2op, [4]uint8{0x0f, 0x23, 0, 0}},
  3888  	{AMOVQ, Yrl, Ynone, Ydr2, movMemReg2op, [4]uint8{0x0f, 0x23, 2, 0}},
  3889  	{AMOVQ, Yrl, Ynone, Ydr3, movMemReg2op, [4]uint8{0x0f, 0x23, 3, 0}},
  3890  	{AMOVQ, Yrl, Ynone, Ydr6, movMemReg2op, [4]uint8{0x0f, 0x23, 6, 0}},
  3891  	{AMOVQ, Yrl, Ynone, Ydr7, movMemReg2op, [4]uint8{0x0f, 0x23, 7, 0}},
  3892  
  3893  	// mov tr
  3894  	{AMOVL, Ytr6, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 6, 0}},
  3895  	{AMOVL, Ytr7, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x24, 7, 0}},
  3896  	{AMOVL, Yml, Ynone, Ytr6, movMemReg2op, [4]uint8{0x0f, 0x26, 6, 0xff}},
  3897  	{AMOVL, Yml, Ynone, Ytr7, movMemReg2op, [4]uint8{0x0f, 0x26, 7, 0xff}},
  3898  
  3899  	// lgdt, sgdt, lidt, sidt
  3900  	{AMOVL, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3901  	{AMOVL, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3902  	{AMOVL, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3903  	{AMOVL, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3904  	{AMOVQ, Ym, Ynone, Ygdtr, movMemReg2op, [4]uint8{0x0f, 0x01, 2, 0}},
  3905  	{AMOVQ, Ygdtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 0, 0}},
  3906  	{AMOVQ, Ym, Ynone, Yidtr, movMemReg2op, [4]uint8{0x0f, 0x01, 3, 0}},
  3907  	{AMOVQ, Yidtr, Ynone, Ym, movRegMem2op, [4]uint8{0x0f, 0x01, 1, 0}},
  3908  
  3909  	// lldt, sldt
  3910  	{AMOVW, Yml, Ynone, Yldtr, movMemReg2op, [4]uint8{0x0f, 0x00, 2, 0}},
  3911  	{AMOVW, Yldtr, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 0, 0}},
  3912  
  3913  	// lmsw, smsw
  3914  	{AMOVW, Yml, Ynone, Ymsw, movMemReg2op, [4]uint8{0x0f, 0x01, 6, 0}},
  3915  	{AMOVW, Ymsw, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x01, 4, 0}},
  3916  
  3917  	// ltr, str
  3918  	{AMOVW, Yml, Ynone, Ytask, movMemReg2op, [4]uint8{0x0f, 0x00, 3, 0}},
  3919  	{AMOVW, Ytask, Ynone, Yml, movRegMem2op, [4]uint8{0x0f, 0x00, 1, 0}},
  3920  
  3921  	/* load full pointer - unsupported
  3922  	{AMOVL, Yml, Ycol, movFullPtr, [4]uint8{0, 0, 0, 0}},
  3923  	{AMOVW, Yml, Ycol, movFullPtr, [4]uint8{Pe, 0, 0, 0}},
  3924  	*/
  3925  
  3926  	// double shift
  3927  	{ASHLL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3928  	{ASHLL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3929  	{ASHLL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xa4, 0xa5, 0, 0}},
  3930  	{ASHRL, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3931  	{ASHRL, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3932  	{ASHRL, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{0xac, 0xad, 0, 0}},
  3933  	{ASHLQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3934  	{ASHLQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3935  	{ASHLQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xa4, 0xa5, 0}},
  3936  	{ASHRQ, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3937  	{ASHRQ, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3938  	{ASHRQ, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pw, 0xac, 0xad, 0}},
  3939  	{ASHLW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3940  	{ASHLW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3941  	{ASHLW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xa4, 0xa5, 0}},
  3942  	{ASHRW, Yi8, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3943  	{ASHRW, Ycl, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3944  	{ASHRW, Ycx, Yrl, Yml, movDoubleShift, [4]uint8{Pe, 0xac, 0xad, 0}},
  3945  
  3946  	// load TLS base
  3947  	{AMOVL, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3948  	{AMOVQ, Ytls, Ynone, Yrl, movTLSReg, [4]uint8{0, 0, 0, 0}},
  3949  	{0, 0, 0, 0, 0, [4]uint8{}},
  3950  }
  3951  
  3952  func isax(a *obj.Addr) bool {
  3953  	switch a.Reg {
  3954  	case REG_AX, REG_AL, REG_AH:
  3955  		return true
  3956  	}
  3957  
  3958  	return a.Index == REG_AX
  3959  }
  3960  
  3961  func subreg(p *obj.Prog, from int, to int) {
  3962  	if false { /* debug['Q'] */
  3963  		fmt.Printf("\n%v\ts/%v/%v/\n", p, rconv(from), rconv(to))
  3964  	}
  3965  
  3966  	if int(p.From.Reg) == from {
  3967  		p.From.Reg = int16(to)
  3968  		p.Ft = 0
  3969  	}
  3970  
  3971  	if int(p.To.Reg) == from {
  3972  		p.To.Reg = int16(to)
  3973  		p.Tt = 0
  3974  	}
  3975  
  3976  	if int(p.From.Index) == from {
  3977  		p.From.Index = int16(to)
  3978  		p.Ft = 0
  3979  	}
  3980  
  3981  	if int(p.To.Index) == from {
  3982  		p.To.Index = int16(to)
  3983  		p.Tt = 0
  3984  	}
  3985  
  3986  	if false { /* debug['Q'] */
  3987  		fmt.Printf("%v\n", p)
  3988  	}
  3989  }
  3990  
  3991  func (ab *AsmBuf) mediaop(ctxt *obj.Link, o *Optab, op int, osize int, z int) int {
  3992  	switch op {
  3993  	case Pm, Pe, Pf2, Pf3:
  3994  		if osize != 1 {
  3995  			if op != Pm {
  3996  				ab.Put1(byte(op))
  3997  			}
  3998  			ab.Put1(Pm)
  3999  			z++
  4000  			op = int(o.op[z])
  4001  			break
  4002  		}
  4003  		fallthrough
  4004  
  4005  	default:
  4006  		if ab.Len() == 0 || ab.Last() != Pm {
  4007  			ab.Put1(Pm)
  4008  		}
  4009  	}
  4010  
  4011  	ab.Put1(byte(op))
  4012  	return z
  4013  }
  4014  
  4015  // asmevex emits EVEX pregis and opcode byte.
  4016  // In addition to asmvex r/m, vvvv and reg fields also requires optional
  4017  // K-masking register.
  4018  //
  4019  // Expects asmbuf.evex to be properly initialized.
  4020  func (ab *AsmBuf) asmevex(ctxt *obj.Link, p *obj.Prog, rm, v, r, k *obj.Addr) {
  4021  	ab.evexflag = true
  4022  	evex := ab.evex
  4023  
  4024  	rexR := byte(1)
  4025  	evexR := byte(1)
  4026  	rexX := byte(1)
  4027  	rexB := byte(1)
  4028  	if r != nil {
  4029  		if regrex[r.Reg]&Rxr != 0 {
  4030  			rexR = 0 // "ModR/M.reg" selector 4th bit.
  4031  		}
  4032  		if regrex[r.Reg]&RxrEvex != 0 {
  4033  			evexR = 0 // "ModR/M.reg" selector 5th bit.
  4034  		}
  4035  	}
  4036  	if rm != nil {
  4037  		if rm.Index == REG_NONE && regrex[rm.Reg]&RxrEvex != 0 {
  4038  			rexX = 0
  4039  		} else if regrex[rm.Index]&Rxx != 0 {
  4040  			rexX = 0
  4041  		}
  4042  		if regrex[rm.Reg]&Rxb != 0 {
  4043  			rexB = 0
  4044  		}
  4045  	}
  4046  	// P0 = [R][X][B][R'][00][mm]
  4047  	p0 := (rexR << 7) |
  4048  		(rexX << 6) |
  4049  		(rexB << 5) |
  4050  		(evexR << 4) |
  4051  		(0 << 2) |
  4052  		(evex.M() << 0)
  4053  
  4054  	vexV := byte(0)
  4055  	if v != nil {
  4056  		// 4bit-wide reg index.
  4057  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4058  	}
  4059  	vexV ^= 0x0F
  4060  	// P1 = [W][vvvv][1][pp]
  4061  	p1 := (evex.W() << 7) |
  4062  		(vexV << 3) |
  4063  		(1 << 2) |
  4064  		(evex.P() << 0)
  4065  
  4066  	suffix := evexSuffixMap[p.Scond]
  4067  	evexZ := byte(0)
  4068  	evexLL := evex.L()
  4069  	evexB := byte(0)
  4070  	evexV := byte(1)
  4071  	evexA := byte(0)
  4072  	if suffix.zeroing {
  4073  		if !evex.ZeroingEnabled() {
  4074  			ctxt.Diag("unsupported zeroing: %v", p)
  4075  		}
  4076  		if k == nil {
  4077  			// When you request zeroing you must specify a mask register.
  4078  			// See issue 57952.
  4079  			ctxt.Diag("mask register must be specified for .Z instructions: %v", p)
  4080  		} else if k.Reg == REG_K0 {
  4081  			// The mask register must not be K0. That restriction is already
  4082  			// handled by the Yknot0 restriction in the opcode tables, so we
  4083  			// won't ever reach here. But put something sensible here just in case.
  4084  			ctxt.Diag("mask register must not be K0 for .Z instructions: %v", p)
  4085  		}
  4086  		evexZ = 1
  4087  	}
  4088  	switch {
  4089  	case suffix.rounding != rcUnset:
  4090  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4091  			ctxt.Diag("illegal rounding with memory argument: %v", p)
  4092  		} else if !evex.RoundingEnabled() {
  4093  			ctxt.Diag("unsupported rounding: %v", p)
  4094  		}
  4095  		evexB = 1
  4096  		evexLL = suffix.rounding
  4097  	case suffix.broadcast:
  4098  		if rm == nil || rm.Type != obj.TYPE_MEM {
  4099  			ctxt.Diag("illegal broadcast without memory argument: %v", p)
  4100  		} else if !evex.BroadcastEnabled() {
  4101  			ctxt.Diag("unsupported broadcast: %v", p)
  4102  		}
  4103  		evexB = 1
  4104  	case suffix.sae:
  4105  		if rm != nil && rm.Type == obj.TYPE_MEM {
  4106  			ctxt.Diag("illegal SAE with memory argument: %v", p)
  4107  		} else if !evex.SaeEnabled() {
  4108  			ctxt.Diag("unsupported SAE: %v", p)
  4109  		}
  4110  		evexB = 1
  4111  	}
  4112  	if rm != nil && regrex[rm.Index]&RxrEvex != 0 {
  4113  		evexV = 0
  4114  	} else if v != nil && regrex[v.Reg]&RxrEvex != 0 {
  4115  		evexV = 0 // VSR selector 5th bit.
  4116  	}
  4117  	if k != nil {
  4118  		evexA = byte(reg[k.Reg])
  4119  	}
  4120  	// P2 = [z][L'L][b][V'][aaa]
  4121  	p2 := (evexZ << 7) |
  4122  		(evexLL << 5) |
  4123  		(evexB << 4) |
  4124  		(evexV << 3) |
  4125  		(evexA << 0)
  4126  
  4127  	const evexEscapeByte = 0x62
  4128  	ab.Put4(evexEscapeByte, p0, p1, p2)
  4129  	ab.Put1(evex.opcode)
  4130  }
  4131  
  4132  // Emit VEX prefix and opcode byte.
  4133  // The three addresses are the r/m, vvvv, and reg fields.
  4134  // The reg and rm arguments appear in the same order as the
  4135  // arguments to asmand, which typically follows the call to asmvex.
  4136  // The final two arguments are the VEX prefix (see encoding above)
  4137  // and the opcode byte.
  4138  // For details about vex prefix see:
  4139  // https://en.wikipedia.org/wiki/VEX_prefix#Technical_description
  4140  func (ab *AsmBuf) asmvex(ctxt *obj.Link, rm, v, r *obj.Addr, vex, opcode uint8) {
  4141  	ab.vexflag = true
  4142  	rexR := 0
  4143  	if r != nil {
  4144  		rexR = regrex[r.Reg] & Rxr
  4145  	}
  4146  	rexB := 0
  4147  	rexX := 0
  4148  	if rm != nil {
  4149  		rexB = regrex[rm.Reg] & Rxb
  4150  		rexX = regrex[rm.Index] & Rxx
  4151  	}
  4152  	vexM := (vex >> 3) & 0x7
  4153  	vexWLP := vex & 0x87
  4154  	vexV := byte(0)
  4155  	if v != nil {
  4156  		vexV = byte(reg[v.Reg]|(regrex[v.Reg]&Rxr)<<1) & 0xF
  4157  	}
  4158  	vexV ^= 0xF
  4159  	if vexM == 1 && (rexX|rexB) == 0 && vex&vexW1 == 0 {
  4160  		// Can use 2-byte encoding.
  4161  		ab.Put2(0xc5, byte(rexR<<5)^0x80|vexV<<3|vexWLP)
  4162  	} else {
  4163  		// Must use 3-byte encoding.
  4164  		ab.Put3(0xc4,
  4165  			(byte(rexR|rexX|rexB)<<5)^0xE0|vexM,
  4166  			vexV<<3|vexWLP,
  4167  		)
  4168  	}
  4169  	ab.Put1(opcode)
  4170  }
  4171  
  4172  // regIndex returns register index that fits in 5 bits.
  4173  //
  4174  //	R         : 3 bit | legacy instructions     | N/A
  4175  //	[R/V]EX.R : 1 bit | REX / VEX extension bit | Rxr
  4176  //	EVEX.R    : 1 bit | EVEX extension bit      | RxrEvex
  4177  //
  4178  // Examples:
  4179  //
  4180  //	REG_Z30 => 30
  4181  //	REG_X15 => 15
  4182  //	REG_R9  => 9
  4183  //	REG_AX  => 0
  4184  func regIndex(r int16) int {
  4185  	lower3bits := reg[r]
  4186  	high4bit := regrex[r] & Rxr << 1
  4187  	high5bit := regrex[r] & RxrEvex << 0
  4188  	return lower3bits | high4bit | high5bit
  4189  }
  4190  
  4191  // avx2gatherValid reports whether p satisfies AVX2 gather constraints.
  4192  // Reports errors via ctxt.
  4193  func avx2gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4194  	// If any pair of the index, mask, or destination registers
  4195  	// are the same, illegal instruction trap (#UD) is triggered.
  4196  	index := regIndex(p.GetFrom3().Index)
  4197  	mask := regIndex(p.From.Reg)
  4198  	dest := regIndex(p.To.Reg)
  4199  	if dest == mask || dest == index || mask == index {
  4200  		ctxt.Diag("mask, index, and destination registers should be distinct: %v", p)
  4201  		return false
  4202  	}
  4203  
  4204  	return true
  4205  }
  4206  
  4207  // avx512gatherValid reports whether p satisfies AVX512 gather constraints.
  4208  // Reports errors via ctxt.
  4209  func avx512gatherValid(ctxt *obj.Link, p *obj.Prog) bool {
  4210  	// Illegal instruction trap (#UD) is triggered if the destination vector
  4211  	// register is the same as index vector in VSIB.
  4212  	index := regIndex(p.From.Index)
  4213  	dest := regIndex(p.To.Reg)
  4214  	if dest == index {
  4215  		ctxt.Diag("index and destination registers should be distinct: %v", p)
  4216  		return false
  4217  	}
  4218  
  4219  	return true
  4220  }
  4221  
  4222  func (ab *AsmBuf) doasm(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  4223  	o := opindex[p.As&obj.AMask]
  4224  
  4225  	if o == nil {
  4226  		ctxt.Diag("asmins: missing op %v", p)
  4227  		return
  4228  	}
  4229  
  4230  	if pre := prefixof(ctxt, &p.From); pre != 0 {
  4231  		ab.Put1(byte(pre))
  4232  	}
  4233  	if pre := prefixof(ctxt, &p.To); pre != 0 {
  4234  		ab.Put1(byte(pre))
  4235  	}
  4236  
  4237  	// Checks to warn about instruction/arguments combinations that
  4238  	// will unconditionally trigger illegal instruction trap (#UD).
  4239  	switch p.As {
  4240  	case AVGATHERDPD,
  4241  		AVGATHERQPD,
  4242  		AVGATHERDPS,
  4243  		AVGATHERQPS,
  4244  		AVPGATHERDD,
  4245  		AVPGATHERQD,
  4246  		AVPGATHERDQ,
  4247  		AVPGATHERQQ:
  4248  		if p.GetFrom3() == nil {
  4249  			// gathers need a 3rd arg. See issue 58822.
  4250  			ctxt.Diag("need a third arg for gather instruction: %v", p)
  4251  			return
  4252  		}
  4253  		// AVX512 gather requires explicit K mask.
  4254  		if p.GetFrom3().Reg >= REG_K0 && p.GetFrom3().Reg <= REG_K7 {
  4255  			if !avx512gatherValid(ctxt, p) {
  4256  				return
  4257  			}
  4258  		} else {
  4259  			if !avx2gatherValid(ctxt, p) {
  4260  				return
  4261  			}
  4262  		}
  4263  	}
  4264  
  4265  	if p.Ft == 0 {
  4266  		p.Ft = uint8(oclass(ctxt, p, &p.From))
  4267  	}
  4268  	if p.Tt == 0 {
  4269  		p.Tt = uint8(oclass(ctxt, p, &p.To))
  4270  	}
  4271  
  4272  	ft := int(p.Ft) * Ymax
  4273  	tt := int(p.Tt) * Ymax
  4274  
  4275  	xo := obj.Bool2int(o.op[0] == 0x0f)
  4276  	z := 0
  4277  
  4278  	args := make([]int, 0, argListMax)
  4279  	if ft != Ynone*Ymax {
  4280  		args = append(args, ft)
  4281  	}
  4282  	for i := range p.RestArgs {
  4283  		args = append(args, oclass(ctxt, p, &p.RestArgs[i].Addr)*Ymax)
  4284  	}
  4285  	if tt != Ynone*Ymax {
  4286  		args = append(args, tt)
  4287  	}
  4288  
  4289  	var f3t int
  4290  	for _, yt := range o.ytab {
  4291  		// ytab matching is purely args-based,
  4292  		// but AVX512 suffixes like "Z" or "RU_SAE" will
  4293  		// add EVEX-only filter that will reject non-EVEX matches.
  4294  		//
  4295  		// Consider "VADDPD.BCST 2032(DX), X0, X0".
  4296  		// Without this rule, operands will lead to VEX-encoded form
  4297  		// and produce "c5b15813" encoding.
  4298  		if !yt.match(args) {
  4299  			// "xo" is always zero for VEX/EVEX encoded insts.
  4300  			z += int(yt.zoffset) + xo
  4301  		} else {
  4302  			if p.Scond != 0 && !evexZcase(yt.zcase) {
  4303  				// Do not signal error and continue to search
  4304  				// for matching EVEX-encoded form.
  4305  				z += int(yt.zoffset)
  4306  				continue
  4307  			}
  4308  
  4309  			switch o.prefix {
  4310  			case Px1: // first option valid only in 32-bit mode
  4311  				if ctxt.Arch.Family == sys.AMD64 && z == 0 {
  4312  					z += int(yt.zoffset) + xo
  4313  					continue
  4314  				}
  4315  			case Pq: // 16 bit escape and opcode escape
  4316  				ab.Put2(Pe, Pm)
  4317  
  4318  			case Pq3: // 16 bit escape and opcode escape + REX.W
  4319  				ab.rexflag |= Pw
  4320  				ab.Put2(Pe, Pm)
  4321  
  4322  			case Pq4: // 66 0F 38
  4323  				ab.Put3(0x66, 0x0F, 0x38)
  4324  
  4325  			case Pq4w: // 66 0F 38 + REX.W
  4326  				ab.rexflag |= Pw
  4327  				ab.Put3(0x66, 0x0F, 0x38)
  4328  
  4329  			case Pq5: // F3 0F 38
  4330  				ab.Put3(0xF3, 0x0F, 0x38)
  4331  
  4332  			case Pq5w: //  F3 0F 38 + REX.W
  4333  				ab.rexflag |= Pw
  4334  				ab.Put3(0xF3, 0x0F, 0x38)
  4335  
  4336  			case Pf2, // xmm opcode escape
  4337  				Pf3:
  4338  				ab.Put2(o.prefix, Pm)
  4339  
  4340  			case Pef3:
  4341  				ab.Put3(Pe, Pf3, Pm)
  4342  
  4343  			case Pfw: // xmm opcode escape + REX.W
  4344  				ab.rexflag |= Pw
  4345  				ab.Put2(Pf3, Pm)
  4346  
  4347  			case Pm: // opcode escape
  4348  				ab.Put1(Pm)
  4349  
  4350  			case Pe: // 16 bit escape
  4351  				ab.Put1(Pe)
  4352  
  4353  			case Pw: // 64-bit escape
  4354  				if ctxt.Arch.Family != sys.AMD64 {
  4355  					ctxt.Diag("asmins: illegal 64: %v", p)
  4356  				}
  4357  				ab.rexflag |= Pw
  4358  
  4359  			case Pw8: // 64-bit escape if z >= 8
  4360  				if z >= 8 {
  4361  					if ctxt.Arch.Family != sys.AMD64 {
  4362  						ctxt.Diag("asmins: illegal 64: %v", p)
  4363  					}
  4364  					ab.rexflag |= Pw
  4365  				}
  4366  
  4367  			case Pb: // botch
  4368  				if ctxt.Arch.Family != sys.AMD64 && (isbadbyte(&p.From) || isbadbyte(&p.To)) {
  4369  					goto bad
  4370  				}
  4371  				// NOTE(rsc): This is probably safe to do always,
  4372  				// but when enabled it chooses different encodings
  4373  				// than the old cmd/internal/obj/i386 code did,
  4374  				// which breaks our "same bits out" checks.
  4375  				// In particular, CMPB AX, $0 encodes as 80 f8 00
  4376  				// in the original obj/i386, and it would encode
  4377  				// (using a valid, shorter form) as 3c 00 if we enabled
  4378  				// the call to bytereg here.
  4379  				if ctxt.Arch.Family == sys.AMD64 {
  4380  					bytereg(&p.From, &p.Ft)
  4381  					bytereg(&p.To, &p.Tt)
  4382  				}
  4383  
  4384  			case P32: // 32 bit but illegal if 64-bit mode
  4385  				if ctxt.Arch.Family == sys.AMD64 {
  4386  					ctxt.Diag("asmins: illegal in 64-bit mode: %v", p)
  4387  				}
  4388  
  4389  			case Py: // 64-bit only, no prefix
  4390  				if ctxt.Arch.Family != sys.AMD64 {
  4391  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4392  				}
  4393  
  4394  			case Py1: // 64-bit only if z < 1, no prefix
  4395  				if z < 1 && ctxt.Arch.Family != sys.AMD64 {
  4396  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4397  				}
  4398  
  4399  			case Py3: // 64-bit only if z < 3, no prefix
  4400  				if z < 3 && ctxt.Arch.Family != sys.AMD64 {
  4401  					ctxt.Diag("asmins: illegal in %d-bit mode: %v", ctxt.Arch.RegSize*8, p)
  4402  				}
  4403  			}
  4404  
  4405  			if z >= len(o.op) {
  4406  				log.Fatalf("asmins bad table %v", p)
  4407  			}
  4408  			op := int(o.op[z])
  4409  			if op == 0x0f {
  4410  				ab.Put1(byte(op))
  4411  				z++
  4412  				op = int(o.op[z])
  4413  			}
  4414  
  4415  			switch yt.zcase {
  4416  			default:
  4417  				ctxt.Diag("asmins: unknown z %d %v", yt.zcase, p)
  4418  				return
  4419  
  4420  			case Zpseudo:
  4421  				break
  4422  
  4423  			case Zlit:
  4424  				ab.PutOpBytesLit(z, &o.op)
  4425  
  4426  			case Zlitr_m:
  4427  				ab.PutOpBytesLit(z, &o.op)
  4428  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4429  
  4430  			case Zlitm_r:
  4431  				ab.PutOpBytesLit(z, &o.op)
  4432  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4433  
  4434  			case Zlit_m_r:
  4435  				ab.PutOpBytesLit(z, &o.op)
  4436  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4437  
  4438  			case Zmb_r:
  4439  				bytereg(&p.From, &p.Ft)
  4440  				fallthrough
  4441  
  4442  			case Zm_r:
  4443  				ab.Put1(byte(op))
  4444  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4445  
  4446  			case Z_m_r:
  4447  				ab.Put1(byte(op))
  4448  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4449  
  4450  			case Zm2_r:
  4451  				ab.Put2(byte(op), o.op[z+1])
  4452  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4453  
  4454  			case Zm_r_xm:
  4455  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4456  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4457  
  4458  			case Zm_r_xm_nr:
  4459  				ab.rexflag = 0
  4460  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4461  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4462  
  4463  			case Zm_r_i_xm:
  4464  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4465  				ab.asmand(ctxt, cursym, p, &p.From, p.GetFrom3())
  4466  				ab.Put1(byte(p.To.Offset))
  4467  
  4468  			case Zibm_r, Zibr_m:
  4469  				ab.PutOpBytesLit(z, &o.op)
  4470  				if yt.zcase == Zibr_m {
  4471  					ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4472  				} else {
  4473  					ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4474  				}
  4475  				switch {
  4476  				default:
  4477  					ab.Put1(byte(p.From.Offset))
  4478  				case yt.args[0] == Yi32 && o.prefix == Pe:
  4479  					ab.PutInt16(int16(p.From.Offset))
  4480  				case yt.args[0] == Yi32:
  4481  					ab.PutInt32(int32(p.From.Offset))
  4482  				}
  4483  
  4484  			case Zaut_r:
  4485  				ab.Put1(0x8d) // leal
  4486  				if p.From.Type != obj.TYPE_ADDR {
  4487  					ctxt.Diag("asmins: Zaut sb type ADDR")
  4488  				}
  4489  				p.From.Type = obj.TYPE_MEM
  4490  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4491  				p.From.Type = obj.TYPE_ADDR
  4492  
  4493  			case Zm_o:
  4494  				ab.Put1(byte(op))
  4495  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4496  
  4497  			case Zr_m:
  4498  				ab.Put1(byte(op))
  4499  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4500  
  4501  			case Zvex:
  4502  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4503  
  4504  			case Zvex_rm_v_r:
  4505  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4506  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4507  
  4508  			case Zvex_rm_v_ro:
  4509  				ab.asmvex(ctxt, &p.From, p.GetFrom3(), &p.To, o.op[z], o.op[z+1])
  4510  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4511  
  4512  			case Zvex_i_rm_vo:
  4513  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4514  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+2]))
  4515  				ab.Put1(byte(p.From.Offset))
  4516  
  4517  			case Zvex_i_r_v:
  4518  				ab.asmvex(ctxt, p.GetFrom3(), &p.To, nil, o.op[z], o.op[z+1])
  4519  				regnum := byte(0x7)
  4520  				if p.GetFrom3().Reg >= REG_X0 && p.GetFrom3().Reg <= REG_X15 {
  4521  					regnum &= byte(p.GetFrom3().Reg - REG_X0)
  4522  				} else {
  4523  					regnum &= byte(p.GetFrom3().Reg - REG_Y0)
  4524  				}
  4525  				ab.Put1(o.op[z+2] | regnum)
  4526  				ab.Put1(byte(p.From.Offset))
  4527  
  4528  			case Zvex_i_rm_v_r:
  4529  				imm, from, from3, to := unpackOps4(p)
  4530  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4531  				ab.asmand(ctxt, cursym, p, from, to)
  4532  				ab.Put1(byte(imm.Offset))
  4533  
  4534  			case Zvex_i_rm_r:
  4535  				ab.asmvex(ctxt, p.GetFrom3(), nil, &p.To, o.op[z], o.op[z+1])
  4536  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4537  				ab.Put1(byte(p.From.Offset))
  4538  
  4539  			case Zvex_v_rm_r:
  4540  				ab.asmvex(ctxt, p.GetFrom3(), &p.From, &p.To, o.op[z], o.op[z+1])
  4541  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4542  
  4543  			case Zvex_r_v_rm:
  4544  				ab.asmvex(ctxt, &p.To, p.GetFrom3(), &p.From, o.op[z], o.op[z+1])
  4545  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4546  
  4547  			case Zvex_rm_r_vo:
  4548  				ab.asmvex(ctxt, &p.From, &p.To, p.GetFrom3(), o.op[z], o.op[z+1])
  4549  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+2]))
  4550  
  4551  			case Zvex_i_r_rm:
  4552  				ab.asmvex(ctxt, &p.To, nil, p.GetFrom3(), o.op[z], o.op[z+1])
  4553  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4554  				ab.Put1(byte(p.From.Offset))
  4555  
  4556  			case Zvex_hr_rm_v_r:
  4557  				hr, from, from3, to := unpackOps4(p)
  4558  				ab.asmvex(ctxt, from, from3, to, o.op[z], o.op[z+1])
  4559  				ab.asmand(ctxt, cursym, p, from, to)
  4560  				ab.Put1(byte(regIndex(hr.Reg) << 4))
  4561  
  4562  			case Zevex_k_rmo:
  4563  				ab.evex = newEVEXBits(z, &o.op)
  4564  				ab.asmevex(ctxt, p, &p.To, nil, nil, &p.From)
  4565  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+3]))
  4566  
  4567  			case Zevex_i_rm_vo:
  4568  				ab.evex = newEVEXBits(z, &o.op)
  4569  				ab.asmevex(ctxt, p, p.GetFrom3(), &p.To, nil, nil)
  4570  				ab.asmando(ctxt, cursym, p, p.GetFrom3(), int(o.op[z+3]))
  4571  				ab.Put1(byte(p.From.Offset))
  4572  
  4573  			case Zevex_i_rm_k_vo:
  4574  				imm, from, kmask, to := unpackOps4(p)
  4575  				ab.evex = newEVEXBits(z, &o.op)
  4576  				ab.asmevex(ctxt, p, from, to, nil, kmask)
  4577  				ab.asmando(ctxt, cursym, p, from, int(o.op[z+3]))
  4578  				ab.Put1(byte(imm.Offset))
  4579  
  4580  			case Zevex_i_r_rm:
  4581  				ab.evex = newEVEXBits(z, &o.op)
  4582  				ab.asmevex(ctxt, p, &p.To, nil, p.GetFrom3(), nil)
  4583  				ab.asmand(ctxt, cursym, p, &p.To, p.GetFrom3())
  4584  				ab.Put1(byte(p.From.Offset))
  4585  
  4586  			case Zevex_i_r_k_rm:
  4587  				imm, from, kmask, to := unpackOps4(p)
  4588  				ab.evex = newEVEXBits(z, &o.op)
  4589  				ab.asmevex(ctxt, p, to, nil, from, kmask)
  4590  				ab.asmand(ctxt, cursym, p, to, from)
  4591  				ab.Put1(byte(imm.Offset))
  4592  
  4593  			case Zevex_i_rm_r:
  4594  				ab.evex = newEVEXBits(z, &o.op)
  4595  				ab.asmevex(ctxt, p, p.GetFrom3(), nil, &p.To, nil)
  4596  				ab.asmand(ctxt, cursym, p, p.GetFrom3(), &p.To)
  4597  				ab.Put1(byte(p.From.Offset))
  4598  
  4599  			case Zevex_i_rm_k_r:
  4600  				imm, from, kmask, to := unpackOps4(p)
  4601  				ab.evex = newEVEXBits(z, &o.op)
  4602  				ab.asmevex(ctxt, p, from, nil, to, kmask)
  4603  				ab.asmand(ctxt, cursym, p, from, to)
  4604  				ab.Put1(byte(imm.Offset))
  4605  
  4606  			case Zevex_i_rm_v_r:
  4607  				imm, from, from3, to := unpackOps4(p)
  4608  				ab.evex = newEVEXBits(z, &o.op)
  4609  				ab.asmevex(ctxt, p, from, from3, to, nil)
  4610  				ab.asmand(ctxt, cursym, p, from, to)
  4611  				ab.Put1(byte(imm.Offset))
  4612  
  4613  			case Zevex_i_rm_v_k_r:
  4614  				imm, from, from3, kmask, to := unpackOps5(p)
  4615  				ab.evex = newEVEXBits(z, &o.op)
  4616  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4617  				ab.asmand(ctxt, cursym, p, from, to)
  4618  				ab.Put1(byte(imm.Offset))
  4619  
  4620  			case Zevex_r_v_rm:
  4621  				ab.evex = newEVEXBits(z, &o.op)
  4622  				ab.asmevex(ctxt, p, &p.To, p.GetFrom3(), &p.From, nil)
  4623  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4624  
  4625  			case Zevex_rm_v_r:
  4626  				ab.evex = newEVEXBits(z, &o.op)
  4627  				ab.asmevex(ctxt, p, &p.From, p.GetFrom3(), &p.To, nil)
  4628  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4629  
  4630  			case Zevex_rm_k_r:
  4631  				ab.evex = newEVEXBits(z, &o.op)
  4632  				ab.asmevex(ctxt, p, &p.From, nil, &p.To, p.GetFrom3())
  4633  				ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  4634  
  4635  			case Zevex_r_k_rm:
  4636  				ab.evex = newEVEXBits(z, &o.op)
  4637  				ab.asmevex(ctxt, p, &p.To, nil, &p.From, p.GetFrom3())
  4638  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4639  
  4640  			case Zevex_rm_v_k_r:
  4641  				from, from3, kmask, to := unpackOps4(p)
  4642  				ab.evex = newEVEXBits(z, &o.op)
  4643  				ab.asmevex(ctxt, p, from, from3, to, kmask)
  4644  				ab.asmand(ctxt, cursym, p, from, to)
  4645  
  4646  			case Zevex_r_v_k_rm:
  4647  				from, from3, kmask, to := unpackOps4(p)
  4648  				ab.evex = newEVEXBits(z, &o.op)
  4649  				ab.asmevex(ctxt, p, to, from3, from, kmask)
  4650  				ab.asmand(ctxt, cursym, p, to, from)
  4651  
  4652  			case Zr_m_xm:
  4653  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4654  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4655  
  4656  			case Zr_m_xm_nr:
  4657  				ab.rexflag = 0
  4658  				ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4659  				ab.asmand(ctxt, cursym, p, &p.To, &p.From)
  4660  
  4661  			case Zo_m:
  4662  				ab.Put1(byte(op))
  4663  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4664  
  4665  			case Zcallindreg:
  4666  				cursym.AddRel(ctxt, obj.Reloc{
  4667  					Type: objabi.R_CALLIND,
  4668  					Off:  int32(p.Pc),
  4669  				})
  4670  				fallthrough
  4671  
  4672  			case Zo_m64:
  4673  				ab.Put1(byte(op))
  4674  				ab.asmandsz(ctxt, cursym, p, &p.To, int(o.op[z+1]), 0, 1)
  4675  
  4676  			case Zm_ibo:
  4677  				ab.Put1(byte(op))
  4678  				ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4679  				ab.Put1(byte(vaddr(ctxt, p, &p.To, nil)))
  4680  
  4681  			case Zibo_m:
  4682  				ab.Put1(byte(op))
  4683  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4684  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4685  
  4686  			case Zibo_m_xm:
  4687  				z = ab.mediaop(ctxt, o, op, int(yt.zoffset), z)
  4688  				ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4689  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4690  
  4691  			case Z_ib, Zib_:
  4692  				var a *obj.Addr
  4693  				if yt.zcase == Zib_ {
  4694  					a = &p.From
  4695  				} else {
  4696  					a = &p.To
  4697  				}
  4698  				ab.Put1(byte(op))
  4699  				if p.As == AXABORT {
  4700  					ab.Put1(o.op[z+1])
  4701  				}
  4702  				ab.Put1(byte(vaddr(ctxt, p, a, nil)))
  4703  
  4704  			case Zib_rp:
  4705  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4706  				ab.Put2(byte(op+reg[p.To.Reg]), byte(vaddr(ctxt, p, &p.From, nil)))
  4707  
  4708  			case Zil_rp:
  4709  				ab.rexflag |= regrex[p.To.Reg] & Rxb
  4710  				ab.Put1(byte(op + reg[p.To.Reg]))
  4711  				if o.prefix == Pe {
  4712  					v := vaddr(ctxt, p, &p.From, nil)
  4713  					ab.PutInt16(int16(v))
  4714  				} else {
  4715  					ab.relput4(ctxt, cursym, p, &p.From)
  4716  				}
  4717  
  4718  			case Zo_iw:
  4719  				ab.Put1(byte(op))
  4720  				if p.From.Type != obj.TYPE_NONE {
  4721  					v := vaddr(ctxt, p, &p.From, nil)
  4722  					ab.PutInt16(int16(v))
  4723  				}
  4724  
  4725  			case Ziq_rp:
  4726  				var rel obj.Reloc
  4727  				v := vaddr(ctxt, p, &p.From, &rel)
  4728  				l := int(v >> 32)
  4729  				if l == 0 && rel.Siz != 8 {
  4730  					ab.rexflag &^= (0x40 | Rxw)
  4731  
  4732  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4733  					ab.Put1(byte(0xb8 + reg[p.To.Reg]))
  4734  					if rel.Type != 0 {
  4735  						rel.Off = int32(p.Pc + int64(ab.Len()))
  4736  						cursym.AddRel(ctxt, rel)
  4737  					}
  4738  
  4739  					ab.PutInt32(int32(v))
  4740  				} else if l == -1 && uint64(v)&(uint64(1)<<31) != 0 { // sign extend
  4741  					ab.Put1(0xc7)
  4742  					ab.asmando(ctxt, cursym, p, &p.To, 0)
  4743  
  4744  					ab.PutInt32(int32(v)) // need all 8
  4745  				} else {
  4746  					ab.rexflag |= regrex[p.To.Reg] & Rxb
  4747  					ab.Put1(byte(op + reg[p.To.Reg]))
  4748  					if rel.Type != 0 {
  4749  						rel.Off = int32(p.Pc + int64(ab.Len()))
  4750  						cursym.AddRel(ctxt, rel)
  4751  					}
  4752  
  4753  					ab.PutInt64(v)
  4754  				}
  4755  
  4756  			case Zib_rr:
  4757  				ab.Put1(byte(op))
  4758  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4759  				ab.Put1(byte(vaddr(ctxt, p, &p.From, nil)))
  4760  
  4761  			case Z_il, Zil_:
  4762  				var a *obj.Addr
  4763  				if yt.zcase == Zil_ {
  4764  					a = &p.From
  4765  				} else {
  4766  					a = &p.To
  4767  				}
  4768  				ab.Put1(byte(op))
  4769  				if o.prefix == Pe {
  4770  					v := vaddr(ctxt, p, a, nil)
  4771  					ab.PutInt16(int16(v))
  4772  				} else {
  4773  					ab.relput4(ctxt, cursym, p, a)
  4774  				}
  4775  
  4776  			case Zm_ilo, Zilo_m:
  4777  				var a *obj.Addr
  4778  				ab.Put1(byte(op))
  4779  				if yt.zcase == Zilo_m {
  4780  					a = &p.From
  4781  					ab.asmando(ctxt, cursym, p, &p.To, int(o.op[z+1]))
  4782  				} else {
  4783  					a = &p.To
  4784  					ab.asmando(ctxt, cursym, p, &p.From, int(o.op[z+1]))
  4785  				}
  4786  
  4787  				if o.prefix == Pe {
  4788  					v := vaddr(ctxt, p, a, nil)
  4789  					ab.PutInt16(int16(v))
  4790  				} else {
  4791  					ab.relput4(ctxt, cursym, p, a)
  4792  				}
  4793  
  4794  			case Zil_rr:
  4795  				ab.Put1(byte(op))
  4796  				ab.asmand(ctxt, cursym, p, &p.To, &p.To)
  4797  				if o.prefix == Pe {
  4798  					v := vaddr(ctxt, p, &p.From, nil)
  4799  					ab.PutInt16(int16(v))
  4800  				} else {
  4801  					ab.relput4(ctxt, cursym, p, &p.From)
  4802  				}
  4803  
  4804  			case Z_rp:
  4805  				ab.rexflag |= regrex[p.To.Reg] & (Rxb | 0x40)
  4806  				ab.Put1(byte(op + reg[p.To.Reg]))
  4807  
  4808  			case Zrp_:
  4809  				ab.rexflag |= regrex[p.From.Reg] & (Rxb | 0x40)
  4810  				ab.Put1(byte(op + reg[p.From.Reg]))
  4811  
  4812  			case Zcallcon, Zjmpcon:
  4813  				if yt.zcase == Zcallcon {
  4814  					ab.Put1(byte(op))
  4815  				} else {
  4816  					ab.Put1(o.op[z+1])
  4817  				}
  4818  				cursym.AddRel(ctxt, obj.Reloc{
  4819  					Type: objabi.R_PCREL,
  4820  					Off:  int32(p.Pc + int64(ab.Len())),
  4821  					Siz:  4,
  4822  					Add:  p.To.Offset,
  4823  				})
  4824  				ab.PutInt32(0)
  4825  
  4826  			case Zcallind:
  4827  				ab.Put2(byte(op), o.op[z+1])
  4828  				typ := objabi.R_ADDR
  4829  				if ctxt.Arch.Family == sys.AMD64 {
  4830  					typ = objabi.R_PCREL
  4831  				}
  4832  				cursym.AddRel(ctxt, obj.Reloc{
  4833  					Type: typ,
  4834  					Off:  int32(p.Pc + int64(ab.Len())),
  4835  					Siz:  4,
  4836  					Sym:  p.To.Sym,
  4837  					Add:  p.To.Offset,
  4838  				})
  4839  				ab.PutInt32(0)
  4840  
  4841  			case Zcall, Zcallduff:
  4842  				if p.To.Sym == nil {
  4843  					ctxt.Diag("call without target")
  4844  					ctxt.DiagFlush()
  4845  					log.Fatalf("bad code")
  4846  				}
  4847  
  4848  				if yt.zcase == Zcallduff && ctxt.Flag_dynlink {
  4849  					ctxt.Diag("directly calling duff when dynamically linking Go")
  4850  				}
  4851  
  4852  				ab.Put1(byte(op))
  4853  				cursym.AddRel(ctxt, obj.Reloc{
  4854  					Type: objabi.R_CALL,
  4855  					Off:  int32(p.Pc + int64(ab.Len())),
  4856  					Siz:  4,
  4857  					Sym:  p.To.Sym,
  4858  					Add:  p.To.Offset,
  4859  				})
  4860  				ab.PutInt32(0)
  4861  
  4862  			// TODO: jump across functions needs reloc
  4863  			case Zbr, Zjmp, Zloop:
  4864  				if p.As == AXBEGIN {
  4865  					ab.Put1(byte(op))
  4866  				}
  4867  				if p.To.Sym != nil {
  4868  					if yt.zcase != Zjmp {
  4869  						ctxt.Diag("branch to ATEXT")
  4870  						ctxt.DiagFlush()
  4871  						log.Fatalf("bad code")
  4872  					}
  4873  
  4874  					ab.Put1(o.op[z+1])
  4875  					cursym.AddRel(ctxt, obj.Reloc{
  4876  						// Note: R_CALL instead of R_PCREL. R_CALL is more permissive in that
  4877  						// it can point to a trampoline instead of the destination itself.
  4878  						Type: objabi.R_CALL,
  4879  						Off:  int32(p.Pc + int64(ab.Len())),
  4880  						Siz:  4,
  4881  						Sym:  p.To.Sym,
  4882  					})
  4883  					ab.PutInt32(0)
  4884  					break
  4885  				}
  4886  
  4887  				// Assumes q is in this function.
  4888  				// TODO: Check in input, preserve in brchain.
  4889  
  4890  				// Fill in backward jump now.
  4891  				q := p.To.Target()
  4892  
  4893  				if q == nil {
  4894  					ctxt.Diag("jmp/branch/loop without target")
  4895  					ctxt.DiagFlush()
  4896  					log.Fatalf("bad code")
  4897  				}
  4898  
  4899  				if p.Back&branchBackwards != 0 {
  4900  					v := q.Pc - (p.Pc + 2)
  4901  					if v >= -128 && p.As != AXBEGIN {
  4902  						if p.As == AJCXZL {
  4903  							ab.Put1(0x67)
  4904  						}
  4905  						ab.Put2(byte(op), byte(v))
  4906  					} else if yt.zcase == Zloop {
  4907  						ctxt.Diag("loop too far: %v", p)
  4908  					} else {
  4909  						v -= 5 - 2
  4910  						if p.As == AXBEGIN {
  4911  							v--
  4912  						}
  4913  						if yt.zcase == Zbr {
  4914  							ab.Put1(0x0f)
  4915  							v--
  4916  						}
  4917  
  4918  						ab.Put1(o.op[z+1])
  4919  						ab.PutInt32(int32(v))
  4920  					}
  4921  
  4922  					break
  4923  				}
  4924  
  4925  				// Annotate target; will fill in later.
  4926  				p.Forwd = q.Rel
  4927  
  4928  				q.Rel = p
  4929  				if p.Back&branchShort != 0 && p.As != AXBEGIN {
  4930  					if p.As == AJCXZL {
  4931  						ab.Put1(0x67)
  4932  					}
  4933  					ab.Put2(byte(op), 0)
  4934  				} else if yt.zcase == Zloop {
  4935  					ctxt.Diag("loop too far: %v", p)
  4936  				} else {
  4937  					if yt.zcase == Zbr {
  4938  						ab.Put1(0x0f)
  4939  					}
  4940  					ab.Put1(o.op[z+1])
  4941  					ab.PutInt32(0)
  4942  				}
  4943  
  4944  			case Zbyte:
  4945  				var rel obj.Reloc
  4946  				v := vaddr(ctxt, p, &p.From, &rel)
  4947  				if rel.Siz != 0 {
  4948  					rel.Siz = uint8(op)
  4949  					rel.Off = int32(p.Pc + int64(ab.Len()))
  4950  					cursym.AddRel(ctxt, rel)
  4951  				}
  4952  
  4953  				ab.Put1(byte(v))
  4954  				if op > 1 {
  4955  					ab.Put1(byte(v >> 8))
  4956  					if op > 2 {
  4957  						ab.PutInt16(int16(v >> 16))
  4958  						if op > 4 {
  4959  							ab.PutInt32(int32(v >> 32))
  4960  						}
  4961  					}
  4962  				}
  4963  			}
  4964  
  4965  			return
  4966  		}
  4967  	}
  4968  	f3t = Ynone * Ymax
  4969  	if p.GetFrom3() != nil {
  4970  		f3t = oclass(ctxt, p, p.GetFrom3()) * Ymax
  4971  	}
  4972  	for mo := ymovtab; mo[0].as != 0; mo = mo[1:] {
  4973  		var pp obj.Prog
  4974  		var t []byte
  4975  		if p.As == mo[0].as {
  4976  			if ycover[ft+int(mo[0].ft)] != 0 && ycover[f3t+int(mo[0].f3t)] != 0 && ycover[tt+int(mo[0].tt)] != 0 {
  4977  				t = mo[0].op[:]
  4978  				switch mo[0].code {
  4979  				default:
  4980  					ctxt.Diag("asmins: unknown mov %d %v", mo[0].code, p)
  4981  
  4982  				case movLit:
  4983  					for z = 0; t[z] != 0; z++ {
  4984  						ab.Put1(t[z])
  4985  					}
  4986  
  4987  				case movRegMem:
  4988  					ab.Put1(t[0])
  4989  					ab.asmando(ctxt, cursym, p, &p.To, int(t[1]))
  4990  
  4991  				case movMemReg:
  4992  					ab.Put1(t[0])
  4993  					ab.asmando(ctxt, cursym, p, &p.From, int(t[1]))
  4994  
  4995  				case movRegMem2op: // r,m - 2op
  4996  					ab.Put2(t[0], t[1])
  4997  					ab.asmando(ctxt, cursym, p, &p.To, int(t[2]))
  4998  					ab.rexflag |= regrex[p.From.Reg] & (Rxr | 0x40)
  4999  
  5000  				case movMemReg2op:
  5001  					ab.Put2(t[0], t[1])
  5002  					ab.asmando(ctxt, cursym, p, &p.From, int(t[2]))
  5003  					ab.rexflag |= regrex[p.To.Reg] & (Rxr | 0x40)
  5004  
  5005  				case movFullPtr:
  5006  					if t[0] != 0 {
  5007  						ab.Put1(t[0])
  5008  					}
  5009  					switch p.To.Index {
  5010  					default:
  5011  						goto bad
  5012  
  5013  					case REG_DS:
  5014  						ab.Put1(0xc5)
  5015  
  5016  					case REG_SS:
  5017  						ab.Put2(0x0f, 0xb2)
  5018  
  5019  					case REG_ES:
  5020  						ab.Put1(0xc4)
  5021  
  5022  					case REG_FS:
  5023  						ab.Put2(0x0f, 0xb4)
  5024  
  5025  					case REG_GS:
  5026  						ab.Put2(0x0f, 0xb5)
  5027  					}
  5028  
  5029  					ab.asmand(ctxt, cursym, p, &p.From, &p.To)
  5030  
  5031  				case movDoubleShift:
  5032  					if t[0] == Pw {
  5033  						if ctxt.Arch.Family != sys.AMD64 {
  5034  							ctxt.Diag("asmins: illegal 64: %v", p)
  5035  						}
  5036  						ab.rexflag |= Pw
  5037  						t = t[1:]
  5038  					} else if t[0] == Pe {
  5039  						ab.Put1(Pe)
  5040  						t = t[1:]
  5041  					}
  5042  
  5043  					switch p.From.Type {
  5044  					default:
  5045  						goto bad
  5046  
  5047  					case obj.TYPE_CONST:
  5048  						ab.Put2(0x0f, t[0])
  5049  						ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5050  						ab.Put1(byte(p.From.Offset))
  5051  
  5052  					case obj.TYPE_REG:
  5053  						switch p.From.Reg {
  5054  						default:
  5055  							goto bad
  5056  
  5057  						case REG_CL, REG_CX:
  5058  							ab.Put2(0x0f, t[1])
  5059  							ab.asmandsz(ctxt, cursym, p, &p.To, reg[p.GetFrom3().Reg], regrex[p.GetFrom3().Reg], 0)
  5060  						}
  5061  					}
  5062  
  5063  				// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5064  				// where you load the TLS base register into a register and then index off that
  5065  				// register to access the actual TLS variables. Systems that allow direct TLS access
  5066  				// are handled in prefixof above and should not be listed here.
  5067  				case movTLSReg:
  5068  					if ctxt.Arch.Family == sys.AMD64 && p.As != AMOVQ || ctxt.Arch.Family == sys.I386 && p.As != AMOVL {
  5069  						ctxt.Diag("invalid load of TLS: %v", p)
  5070  					}
  5071  
  5072  					if ctxt.Arch.Family == sys.I386 {
  5073  						// NOTE: The systems listed here are the ones that use the "TLS initial exec" model,
  5074  						// where you load the TLS base register into a register and then index off that
  5075  						// register to access the actual TLS variables. Systems that allow direct TLS access
  5076  						// are handled in prefixof above and should not be listed here.
  5077  						switch ctxt.Headtype {
  5078  						default:
  5079  							log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5080  
  5081  						case objabi.Hlinux, objabi.Hfreebsd:
  5082  							if ctxt.Flag_shared {
  5083  								// Note that this is not generating the same insns as the other cases.
  5084  								//     MOV TLS, dst
  5085  								// becomes
  5086  								//     call __x86.get_pc_thunk.dst
  5087  								//     movl (gotpc + g@gotntpoff)(dst), dst
  5088  								// which is encoded as
  5089  								//     call __x86.get_pc_thunk.dst
  5090  								//     movq 0(dst), dst
  5091  								// and R_CALL & R_TLS_IE relocs. This all assumes the only tls variable we access
  5092  								// is g, which we can't check here, but will when we assemble the second
  5093  								// instruction.
  5094  								dst := p.To.Reg
  5095  								ab.Put1(0xe8)
  5096  								cursym.AddRel(ctxt, obj.Reloc{
  5097  									Type: objabi.R_CALL,
  5098  									Off:  int32(p.Pc + int64(ab.Len())),
  5099  									Siz:  4,
  5100  									Sym:  ctxt.Lookup("__x86.get_pc_thunk." + strings.ToLower(rconv(int(dst)))),
  5101  								})
  5102  								ab.PutInt32(0)
  5103  
  5104  								ab.Put2(0x8B, byte(2<<6|reg[dst]|(reg[dst]<<3)))
  5105  								cursym.AddRel(ctxt, obj.Reloc{
  5106  									Type: objabi.R_TLS_IE,
  5107  									Off:  int32(p.Pc + int64(ab.Len())),
  5108  									Siz:  4,
  5109  									Add:  2,
  5110  								})
  5111  								ab.PutInt32(0)
  5112  							} else {
  5113  								// ELF TLS base is 0(GS).
  5114  								pp.From = p.From
  5115  
  5116  								pp.From.Type = obj.TYPE_MEM
  5117  								pp.From.Reg = REG_GS
  5118  								pp.From.Offset = 0
  5119  								pp.From.Index = REG_NONE
  5120  								pp.From.Scale = 0
  5121  								ab.Put2(0x65, // GS
  5122  									0x8B)
  5123  								ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5124  							}
  5125  						case objabi.Hplan9:
  5126  							pp.From = obj.Addr{}
  5127  							pp.From.Type = obj.TYPE_MEM
  5128  							pp.From.Name = obj.NAME_EXTERN
  5129  							pp.From.Sym = plan9privates
  5130  							pp.From.Offset = 0
  5131  							pp.From.Index = REG_NONE
  5132  							ab.Put1(0x8B)
  5133  							ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5134  						}
  5135  						break
  5136  					}
  5137  
  5138  					switch ctxt.Headtype {
  5139  					default:
  5140  						log.Fatalf("unknown TLS base location for %v", ctxt.Headtype)
  5141  
  5142  					case objabi.Hlinux, objabi.Hfreebsd:
  5143  						if !ctxt.Flag_shared {
  5144  							log.Fatalf("unknown TLS base location for linux/freebsd without -shared")
  5145  						}
  5146  						// Note that this is not generating the same insn as the other cases.
  5147  						//     MOV TLS, R_to
  5148  						// becomes
  5149  						//     movq g@gottpoff(%rip), R_to
  5150  						// which is encoded as
  5151  						//     movq 0(%rip), R_to
  5152  						// and a R_TLS_IE reloc. This all assumes the only tls variable we access
  5153  						// is g, which we can't check here, but will when we assemble the second
  5154  						// instruction.
  5155  						ab.rexflag = Pw | (regrex[p.To.Reg] & Rxr)
  5156  
  5157  						ab.Put2(0x8B, byte(0x05|(reg[p.To.Reg]<<3)))
  5158  						cursym.AddRel(ctxt, obj.Reloc{
  5159  							Type: objabi.R_TLS_IE,
  5160  							Off:  int32(p.Pc + int64(ab.Len())),
  5161  							Siz:  4,
  5162  							Add:  -4,
  5163  						})
  5164  						ab.PutInt32(0)
  5165  
  5166  					case objabi.Hplan9:
  5167  						pp.From = obj.Addr{}
  5168  						pp.From.Type = obj.TYPE_MEM
  5169  						pp.From.Name = obj.NAME_EXTERN
  5170  						pp.From.Sym = plan9privates
  5171  						pp.From.Offset = 0
  5172  						pp.From.Index = REG_NONE
  5173  						ab.rexflag |= Pw
  5174  						ab.Put1(0x8B)
  5175  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5176  
  5177  					case objabi.Hsolaris: // TODO(rsc): Delete Hsolaris from list. Should not use this code. See progedit in obj6.c.
  5178  						// TLS base is 0(FS).
  5179  						pp.From = p.From
  5180  
  5181  						pp.From.Type = obj.TYPE_MEM
  5182  						pp.From.Name = obj.NAME_NONE
  5183  						pp.From.Reg = REG_NONE
  5184  						pp.From.Offset = 0
  5185  						pp.From.Index = REG_NONE
  5186  						pp.From.Scale = 0
  5187  						ab.rexflag |= Pw
  5188  						ab.Put2(0x64, // FS
  5189  							0x8B)
  5190  						ab.asmand(ctxt, cursym, p, &pp.From, &p.To)
  5191  					}
  5192  				}
  5193  				return
  5194  			}
  5195  		}
  5196  	}
  5197  	goto bad
  5198  
  5199  bad:
  5200  	if ctxt.Arch.Family != sys.AMD64 {
  5201  		// here, the assembly has failed.
  5202  		// if it's a byte instruction that has
  5203  		// unaddressable registers, try to
  5204  		// exchange registers and reissue the
  5205  		// instruction with the operands renamed.
  5206  		pp := *p
  5207  
  5208  		unbytereg(&pp.From, &pp.Ft)
  5209  		unbytereg(&pp.To, &pp.Tt)
  5210  
  5211  		z := int(p.From.Reg)
  5212  		if p.From.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5213  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5214  			// For now, different to keep bit-for-bit compatibility.
  5215  			if ctxt.Arch.Family == sys.I386 {
  5216  				breg := byteswapreg(ctxt, &p.To)
  5217  				if breg != REG_AX {
  5218  					ab.Put1(0x87) // xchg lhs,bx
  5219  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5220  					subreg(&pp, z, breg)
  5221  					ab.doasm(ctxt, cursym, &pp)
  5222  					ab.Put1(0x87) // xchg lhs,bx
  5223  					ab.asmando(ctxt, cursym, p, &p.From, reg[breg])
  5224  				} else {
  5225  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5226  					subreg(&pp, z, REG_AX)
  5227  					ab.doasm(ctxt, cursym, &pp)
  5228  					ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5229  				}
  5230  				return
  5231  			}
  5232  
  5233  			if isax(&p.To) || p.To.Type == obj.TYPE_NONE {
  5234  				// We certainly don't want to exchange
  5235  				// with AX if the op is MUL or DIV.
  5236  				ab.Put1(0x87) // xchg lhs,bx
  5237  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5238  				subreg(&pp, z, REG_BX)
  5239  				ab.doasm(ctxt, cursym, &pp)
  5240  				ab.Put1(0x87) // xchg lhs,bx
  5241  				ab.asmando(ctxt, cursym, p, &p.From, reg[REG_BX])
  5242  			} else {
  5243  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5244  				subreg(&pp, z, REG_AX)
  5245  				ab.doasm(ctxt, cursym, &pp)
  5246  				ab.Put1(byte(0x90 + reg[z])) // xchg lsh,ax
  5247  			}
  5248  			return
  5249  		}
  5250  
  5251  		z = int(p.To.Reg)
  5252  		if p.To.Type == obj.TYPE_REG && z >= REG_BP && z <= REG_DI {
  5253  			// TODO(rsc): Use this code for x86-64 too. It has bug fixes not present in the amd64 code base.
  5254  			// For now, different to keep bit-for-bit compatibility.
  5255  			if ctxt.Arch.Family == sys.I386 {
  5256  				breg := byteswapreg(ctxt, &p.From)
  5257  				if breg != REG_AX {
  5258  					ab.Put1(0x87) //xchg rhs,bx
  5259  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5260  					subreg(&pp, z, breg)
  5261  					ab.doasm(ctxt, cursym, &pp)
  5262  					ab.Put1(0x87) // xchg rhs,bx
  5263  					ab.asmando(ctxt, cursym, p, &p.To, reg[breg])
  5264  				} else {
  5265  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5266  					subreg(&pp, z, REG_AX)
  5267  					ab.doasm(ctxt, cursym, &pp)
  5268  					ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5269  				}
  5270  				return
  5271  			}
  5272  
  5273  			if isax(&p.From) {
  5274  				ab.Put1(0x87) // xchg rhs,bx
  5275  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5276  				subreg(&pp, z, REG_BX)
  5277  				ab.doasm(ctxt, cursym, &pp)
  5278  				ab.Put1(0x87) // xchg rhs,bx
  5279  				ab.asmando(ctxt, cursym, p, &p.To, reg[REG_BX])
  5280  			} else {
  5281  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5282  				subreg(&pp, z, REG_AX)
  5283  				ab.doasm(ctxt, cursym, &pp)
  5284  				ab.Put1(byte(0x90 + reg[z])) // xchg rsh,ax
  5285  			}
  5286  			return
  5287  		}
  5288  	}
  5289  
  5290  	ctxt.Diag("%s: invalid instruction: %v", cursym.Name, p)
  5291  }
  5292  
  5293  // byteswapreg returns a byte-addressable register (AX, BX, CX, DX)
  5294  // which is not referenced in a.
  5295  // If a is empty, it returns BX to account for MULB-like instructions
  5296  // that might use DX and AX.
  5297  func byteswapreg(ctxt *obj.Link, a *obj.Addr) int {
  5298  	cana, canb, canc, cand := true, true, true, true
  5299  	if a.Type == obj.TYPE_NONE {
  5300  		cana, cand = false, false
  5301  	}
  5302  
  5303  	if a.Type == obj.TYPE_REG || ((a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR) && a.Name == obj.NAME_NONE) {
  5304  		switch a.Reg {
  5305  		case REG_NONE:
  5306  			cana, cand = false, false
  5307  		case REG_AX, REG_AL, REG_AH:
  5308  			cana = false
  5309  		case REG_BX, REG_BL, REG_BH:
  5310  			canb = false
  5311  		case REG_CX, REG_CL, REG_CH:
  5312  			canc = false
  5313  		case REG_DX, REG_DL, REG_DH:
  5314  			cand = false
  5315  		}
  5316  	}
  5317  
  5318  	if a.Type == obj.TYPE_MEM || a.Type == obj.TYPE_ADDR {
  5319  		switch a.Index {
  5320  		case REG_AX:
  5321  			cana = false
  5322  		case REG_BX:
  5323  			canb = false
  5324  		case REG_CX:
  5325  			canc = false
  5326  		case REG_DX:
  5327  			cand = false
  5328  		}
  5329  	}
  5330  
  5331  	switch {
  5332  	case cana:
  5333  		return REG_AX
  5334  	case canb:
  5335  		return REG_BX
  5336  	case canc:
  5337  		return REG_CX
  5338  	case cand:
  5339  		return REG_DX
  5340  	default:
  5341  		ctxt.Diag("impossible byte register")
  5342  		ctxt.DiagFlush()
  5343  		log.Fatalf("bad code")
  5344  		return 0
  5345  	}
  5346  }
  5347  
  5348  func isbadbyte(a *obj.Addr) bool {
  5349  	return a.Type == obj.TYPE_REG && (REG_BP <= a.Reg && a.Reg <= REG_DI || REG_BPB <= a.Reg && a.Reg <= REG_DIB)
  5350  }
  5351  
  5352  func (ab *AsmBuf) asmins(ctxt *obj.Link, cursym *obj.LSym, p *obj.Prog) {
  5353  	ab.Reset()
  5354  
  5355  	ab.rexflag = 0
  5356  	ab.vexflag = false
  5357  	ab.evexflag = false
  5358  	mark := ab.Len()
  5359  	ab.doasm(ctxt, cursym, p)
  5360  	if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5361  		// as befits the whole approach of the architecture,
  5362  		// the rex prefix must appear before the first opcode byte
  5363  		// (and thus after any 66/67/f2/f3/26/2e/3e prefix bytes, but
  5364  		// before the 0f opcode escape!), or it might be ignored.
  5365  		// note that the handbook often misleadingly shows 66/f2/f3 in `opcode'.
  5366  		if ctxt.Arch.Family != sys.AMD64 {
  5367  			ctxt.Diag("asmins: illegal in mode %d: %v (%d %d)", ctxt.Arch.RegSize*8, p, p.Ft, p.Tt)
  5368  		}
  5369  		n := ab.Len()
  5370  		var np int
  5371  		for np = mark; np < n; np++ {
  5372  			c := ab.At(np)
  5373  			if c != 0xf2 && c != 0xf3 && (c < 0x64 || c > 0x67) && c != 0x2e && c != 0x3e && c != 0x26 {
  5374  				break
  5375  			}
  5376  		}
  5377  		ab.Insert(np, byte(0x40|ab.rexflag))
  5378  	}
  5379  
  5380  	n := ab.Len()
  5381  	for i := len(cursym.R) - 1; i >= 0; i-- {
  5382  		r := &cursym.R[i]
  5383  		if int64(r.Off) < p.Pc {
  5384  			break
  5385  		}
  5386  		if ab.rexflag != 0 && !ab.vexflag && !ab.evexflag {
  5387  			r.Off++
  5388  		}
  5389  		if r.Type == objabi.R_PCREL {
  5390  			if ctxt.Arch.Family == sys.AMD64 || p.As == obj.AJMP || p.As == obj.ACALL {
  5391  				// PC-relative addressing is relative to the end of the instruction,
  5392  				// but the relocations applied by the linker are relative to the end
  5393  				// of the relocation. Because immediate instruction
  5394  				// arguments can follow the PC-relative memory reference in the
  5395  				// instruction encoding, the two may not coincide. In this case,
  5396  				// adjust addend so that linker can keep relocating relative to the
  5397  				// end of the relocation.
  5398  				r.Add -= p.Pc + int64(n) - (int64(r.Off) + int64(r.Siz))
  5399  			} else if ctxt.Arch.Family == sys.I386 {
  5400  				// On 386 PC-relative addressing (for non-call/jmp instructions)
  5401  				// assumes that the previous instruction loaded the PC of the end
  5402  				// of that instruction into CX, so the adjustment is relative to
  5403  				// that.
  5404  				r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5405  			}
  5406  		}
  5407  		if r.Type == objabi.R_GOTPCREL && ctxt.Arch.Family == sys.I386 {
  5408  			// On 386, R_GOTPCREL makes the same assumptions as R_PCREL.
  5409  			r.Add += int64(r.Off) - p.Pc + int64(r.Siz)
  5410  		}
  5411  
  5412  	}
  5413  }
  5414  
  5415  // unpackOps4 extracts 4 operands from p.
  5416  func unpackOps4(p *obj.Prog) (arg0, arg1, arg2, dst *obj.Addr) {
  5417  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.To
  5418  }
  5419  
  5420  // unpackOps5 extracts 5 operands from p.
  5421  func unpackOps5(p *obj.Prog) (arg0, arg1, arg2, arg3, dst *obj.Addr) {
  5422  	return &p.From, &p.RestArgs[0].Addr, &p.RestArgs[1].Addr, &p.RestArgs[2].Addr, &p.To
  5423  }
  5424  

View as plain text