Source file src/cmd/internal/obj/loong64/asm.go

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package loong64
     6  
     7  import (
     8  	"cmd/internal/obj"
     9  	"cmd/internal/objabi"
    10  	"fmt"
    11  	"log"
    12  	"math/bits"
    13  	"slices"
    14  )
    15  
    16  // ctxt0 holds state while assembling a single function.
    17  // Each function gets a fresh ctxt0.
    18  // This allows for multiple functions to be safely concurrently assembled.
    19  type ctxt0 struct {
    20  	ctxt       *obj.Link
    21  	newprog    obj.ProgAlloc
    22  	cursym     *obj.LSym
    23  	autosize   int32
    24  	instoffset int64
    25  	pc         int64
    26  }
    27  
    28  // Instruction layout.
    29  
    30  const (
    31  	FuncAlign = 4
    32  	loopAlign = 16
    33  )
    34  
    35  type Optab struct {
    36  	as    obj.As
    37  	from1 uint8
    38  	reg   uint8
    39  	from3 uint8
    40  	to1   uint8
    41  	to2   uint8
    42  	type_ int8
    43  	size  int8
    44  	param int16
    45  	flag  uint8
    46  }
    47  
    48  const (
    49  	NOTUSETMP = 1 << iota // p expands to multiple instructions, but does NOT use REGTMP
    50  
    51  	// branchLoopHead marks loop entry.
    52  	// Used to insert padding for under-aligned loops.
    53  	branchLoopHead
    54  )
    55  
    56  var optab = []Optab{
    57  	{obj.ATEXT, C_ADDR, C_NONE, C_NONE, C_TEXTSIZE, C_NONE, 0, 0, 0, 0},
    58  
    59  	{ASUB, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    60  	{ASUB, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    61  
    62  	{AADD, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    63  	{AADD, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    64  	{AADD, C_US12CON, C_REG, C_NONE, C_REG, C_NONE, 4, 4, 0, 0},
    65  	{AADD, C_US12CON, C_NONE, C_NONE, C_REG, C_NONE, 4, 4, 0, 0},
    66  	{AADD, C_U12CON, C_REG, C_NONE, C_REG, C_NONE, 10, 8, 0, 0},
    67  	{AADD, C_U12CON, C_NONE, C_NONE, C_REG, C_NONE, 10, 8, 0, 0},
    68  	{AADD, C_32CON, C_NONE, C_NONE, C_REG, C_NONE, 24, 12, 0, 0},
    69  	{AADD, C_32CON, C_REG, C_NONE, C_REG, C_NONE, 24, 12, 0, 0},
    70  	{AADD, C_32CON20_0, C_REG, C_NONE, C_REG, C_NONE, 26, 8, 0, 0},
    71  	{AADD, C_32CON20_0, C_NONE, C_NONE, C_REG, C_NONE, 26, 8, 0, 0},
    72  
    73  	{AADDV, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    74  	{AADDV, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    75  	{AADDV, C_US12CON, C_REG, C_NONE, C_REG, C_NONE, 4, 4, 0, 0},
    76  	{AADDV, C_US12CON, C_NONE, C_NONE, C_REG, C_NONE, 4, 4, 0, 0},
    77  	{AADDV, C_U12CON, C_REG, C_NONE, C_REG, C_NONE, 10, 8, 0, 0},
    78  	{AADDV, C_U12CON, C_NONE, C_NONE, C_REG, C_NONE, 10, 8, 0, 0},
    79  	{AADDV, C_32CON, C_NONE, C_NONE, C_REG, C_NONE, 24, 12, 0, 0},
    80  	{AADDV, C_32CON, C_REG, C_NONE, C_REG, C_NONE, 24, 12, 0, 0},
    81  	{AADDV, C_32CON20_0, C_REG, C_NONE, C_REG, C_NONE, 26, 8, 0, 0},
    82  	{AADDV, C_32CON20_0, C_NONE, C_NONE, C_REG, C_NONE, 26, 8, 0, 0},
    83  	{AADDV, C_DCON, C_NONE, C_NONE, C_REG, C_NONE, 60, 20, 0, 0},
    84  	{AADDV, C_DCON, C_REG, C_NONE, C_REG, C_NONE, 60, 20, 0, 0},
    85  	{AADDV, C_DCON12_0, C_NONE, C_NONE, C_REG, C_NONE, 70, 8, 0, 0},
    86  	{AADDV, C_DCON12_0, C_REG, C_NONE, C_REG, C_NONE, 70, 8, 0, 0},
    87  	{AADDV, C_DCON12_20S, C_NONE, C_NONE, C_REG, C_NONE, 71, 12, 0, 0},
    88  	{AADDV, C_DCON12_20S, C_REG, C_NONE, C_REG, C_NONE, 71, 12, 0, 0},
    89  	{AADDV, C_DCON32_12S, C_NONE, C_NONE, C_REG, C_NONE, 72, 16, 0, 0},
    90  	{AADDV, C_DCON32_12S, C_REG, C_NONE, C_REG, C_NONE, 72, 16, 0, 0},
    91  
    92  	{AAND, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    93  	{AAND, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    94  	{AAND, C_UU12CON, C_REG, C_NONE, C_REG, C_NONE, 4, 4, 0, 0},
    95  	{AAND, C_UU12CON, C_NONE, C_NONE, C_REG, C_NONE, 4, 4, 0, 0},
    96  	{AAND, C_S12CON, C_REG, C_NONE, C_REG, C_NONE, 10, 8, 0, 0},
    97  	{AAND, C_S12CON, C_NONE, C_NONE, C_REG, C_NONE, 10, 8, 0, 0},
    98  	{AAND, C_32CON, C_REG, C_NONE, C_REG, C_NONE, 24, 12, 0, 0},
    99  	{AAND, C_32CON, C_NONE, C_NONE, C_REG, C_NONE, 24, 12, 0, 0},
   100  	{AAND, C_32CON20_0, C_REG, C_NONE, C_REG, C_NONE, 26, 8, 0, 0},
   101  	{AAND, C_32CON20_0, C_NONE, C_NONE, C_REG, C_NONE, 26, 8, 0, 0},
   102  	{AAND, C_DCON, C_NONE, C_NONE, C_REG, C_NONE, 60, 20, 0, 0},
   103  	{AAND, C_DCON, C_REG, C_NONE, C_REG, C_NONE, 60, 20, 0, 0},
   104  	{AAND, C_DCON12_0, C_NONE, C_NONE, C_REG, C_NONE, 70, 8, 0, 0},
   105  	{AAND, C_DCON12_0, C_REG, C_NONE, C_REG, C_NONE, 70, 8, 0, 0},
   106  	{AAND, C_DCON12_20S, C_NONE, C_NONE, C_REG, C_NONE, 71, 12, 0, 0},
   107  	{AAND, C_DCON12_20S, C_REG, C_NONE, C_REG, C_NONE, 71, 12, 0, 0},
   108  	{AAND, C_DCON32_12S, C_NONE, C_NONE, C_REG, C_NONE, 72, 16, 0, 0},
   109  	{AAND, C_DCON32_12S, C_REG, C_NONE, C_REG, C_NONE, 72, 16, 0, 0},
   110  
   111  	{ASLL, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
   112  	{ASLL, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
   113  	{ASLL, C_U5CON, C_REG, C_NONE, C_REG, C_NONE, 16, 4, 0, 0},
   114  	{ASLL, C_U5CON, C_NONE, C_NONE, C_REG, C_NONE, 16, 4, 0, 0},
   115  	{ASLLV, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
   116  	{ASLLV, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
   117  	{ASLLV, C_U6CON, C_REG, C_NONE, C_REG, C_NONE, 16, 4, 0, 0},
   118  	{ASLLV, C_U6CON, C_NONE, C_NONE, C_REG, C_NONE, 16, 4, 0, 0},
   119  
   120  	{AADDV16, C_32CON, C_REG, C_NONE, C_REG, C_NONE, 4, 4, 0, 0},
   121  	{AADDV16, C_32CON, C_NONE, C_NONE, C_REG, C_NONE, 4, 4, 0, 0},
   122  
   123  	// memory access
   124  	{AMOVB, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0},
   125  	{AMOVB, C_REG, C_NONE, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0},
   126  	{AMOVB, C_SAUTO, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0},
   127  	{AMOVB, C_LAUTO, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0},
   128  	{AMOVB, C_REG, C_NONE, C_NONE, C_SOREG_12, C_NONE, 7, 4, REGZERO, 0},
   129  	{AMOVB, C_REG, C_NONE, C_NONE, C_LOREG_32, C_NONE, 35, 12, REGZERO, 0},
   130  	{AMOVB, C_SOREG_12, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0},
   131  	{AMOVB, C_LOREG_32, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0},
   132  	{AMOVB, C_REG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0},
   133  	{AMOVB, C_ROFF, C_NONE, C_NONE, C_REG, C_NONE, 21, 4, 0, 0},
   134  	// variable access
   135  	{AMOVB, C_REG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0},
   136  	{AMOVB, C_ADDR, C_NONE, C_NONE, C_REG, C_NONE, 51, 8, 0, 0},
   137  	// TLS access
   138  	{AMOVB, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0},
   139  	{AMOVB, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0},
   140  	{AMOVB, C_REG, C_NONE, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, 0},
   141  	{AMOVB, C_TLS_IE, C_NONE, C_NONE, C_REG, C_NONE, 57, 16, 0, 0},
   142  	// moving data between registers
   143  	{AMOVB, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 1, 4, 0, 0},
   144  
   145  	// memory access
   146  	{AMOVBU, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0},
   147  	{AMOVBU, C_REG, C_NONE, C_NONE, C_SOREG_12, C_NONE, 7, 4, REGZERO, 0},
   148  	{AMOVBU, C_SAUTO, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0},
   149  	{AMOVBU, C_SOREG_12, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0},
   150  	{AMOVBU, C_REG, C_NONE, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0},
   151  	{AMOVBU, C_REG, C_NONE, C_NONE, C_LOREG_32, C_NONE, 35, 12, REGZERO, 0},
   152  	{AMOVBU, C_LAUTO, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0},
   153  	{AMOVBU, C_LOREG_32, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0},
   154  	{AMOVBU, C_ROFF, C_NONE, C_NONE, C_REG, C_NONE, 21, 4, 0, 0},
   155  	// variable access
   156  	{AMOVBU, C_REG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0},
   157  	{AMOVBU, C_ADDR, C_NONE, C_NONE, C_REG, C_NONE, 51, 8, 0, 0},
   158  	// TLS access
   159  	{AMOVBU, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0},
   160  	{AMOVBU, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0},
   161  	{AMOVBU, C_REG, C_NONE, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, 0},
   162  	{AMOVBU, C_TLS_IE, C_NONE, C_NONE, C_REG, C_NONE, 57, 16, 0, 0},
   163  	// moving data between registers
   164  	{AMOVBU, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 1, 4, 0, 0},
   165  
   166  	// memory access
   167  	{AMOVW, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0},
   168  	{AMOVW, C_REG, C_NONE, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0},
   169  	{AMOVW, C_SAUTO, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0},
   170  	{AMOVW, C_LAUTO, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0},
   171  	{AMOVW, C_REG, C_NONE, C_NONE, C_SOREG_12, C_NONE, 7, 4, REGZERO, 0},
   172  	{AMOVW, C_REG, C_NONE, C_NONE, C_LOREG_32, C_NONE, 35, 12, REGZERO, 0},
   173  	{AMOVW, C_SOREG_12, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0},
   174  	{AMOVW, C_LOREG_32, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0},
   175  	{AMOVW, C_REG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0},
   176  	{AMOVW, C_ROFF, C_NONE, C_NONE, C_REG, C_NONE, 21, 4, 0, 0},
   177  	// variable access
   178  	{AMOVW, C_REG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0},
   179  	{AMOVW, C_ADDR, C_NONE, C_NONE, C_REG, C_NONE, 51, 8, 0, 0},
   180  	// TLS access
   181  	{AMOVW, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0},
   182  	{AMOVW, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0},
   183  	{AMOVW, C_REG, C_NONE, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, 0},
   184  	{AMOVW, C_TLS_IE, C_NONE, C_NONE, C_REG, C_NONE, 57, 16, 0, 0},
   185  	// moving data between registers
   186  	{AMOVW, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 1, 4, 0, 0},
   187  	{AMOVW, C_REG, C_NONE, C_NONE, C_FREG, C_NONE, 30, 4, 0, 0},
   188  	{AMOVW, C_FREG, C_NONE, C_NONE, C_REG, C_NONE, 30, 4, 0, 0},
   189  	// immediate load
   190  	{AMOVW, C_12CON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0},
   191  	{AMOVW, C_32CON, C_NONE, C_NONE, C_REG, C_NONE, 19, 8, 0, NOTUSETMP},
   192  	{AMOVW, C_32CON20_0, C_NONE, C_NONE, C_REG, C_NONE, 25, 4, 0, 0},
   193  	{AMOVW, C_12CON, C_NONE, C_NONE, C_FREG, C_NONE, 34, 8, 0, 0},
   194  	// get a stack address
   195  	{AMOVW, C_SACON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGSP, 0},
   196  	{AMOVW, C_LACON, C_NONE, C_NONE, C_REG, C_NONE, 27, 12, REGSP, 0},
   197  
   198  	// memory access
   199  	{AMOVV, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0},
   200  	{AMOVV, C_REG, C_NONE, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0},
   201  	{AMOVV, C_SAUTO, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0},
   202  	{AMOVV, C_LAUTO, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0},
   203  	{AMOVV, C_REG, C_NONE, C_NONE, C_SOREG_12, C_NONE, 7, 4, REGZERO, 0},
   204  	{AMOVV, C_REG, C_NONE, C_NONE, C_LOREG_32, C_NONE, 35, 12, REGZERO, 0},
   205  	{AMOVV, C_SOREG_12, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0},
   206  	{AMOVV, C_LOREG_32, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0},
   207  	{AMOVV, C_REG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0},
   208  	{AMOVV, C_ROFF, C_NONE, C_NONE, C_REG, C_NONE, 21, 4, 0, 0},
   209  	// variable access, need relocation
   210  	{AMOVV, C_REG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0},
   211  	{AMOVV, C_ADDR, C_NONE, C_NONE, C_REG, C_NONE, 51, 8, 0, 0},
   212  	// TLS access
   213  	{AMOVV, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0},
   214  	{AMOVV, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0},
   215  	{AMOVV, C_REG, C_NONE, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, 0},
   216  	{AMOVV, C_TLS_IE, C_NONE, C_NONE, C_REG, C_NONE, 57, 16, 0, 0},
   217  	// moving data between registers
   218  	{AMOVV, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 1, 4, 0, 0},
   219  	{AMOVV, C_FCCREG, C_NONE, C_NONE, C_REG, C_NONE, 30, 4, 0, 0},
   220  	{AMOVV, C_FCSRREG, C_NONE, C_NONE, C_REG, C_NONE, 30, 4, 0, 0},
   221  	{AMOVV, C_REG, C_NONE, C_NONE, C_FCCREG, C_NONE, 30, 4, 0, 0},
   222  	{AMOVV, C_REG, C_NONE, C_NONE, C_FREG, C_NONE, 30, 4, 0, 0},
   223  	{AMOVV, C_FREG, C_NONE, C_NONE, C_REG, C_NONE, 30, 4, 0, 0},
   224  	{AMOVV, C_REG, C_NONE, C_NONE, C_FCSRREG, C_NONE, 30, 4, 0, 0},
   225  	{AMOVV, C_FREG, C_NONE, C_NONE, C_FCCREG, C_NONE, 30, 4, 0, 0},
   226  	{AMOVV, C_FCCREG, C_NONE, C_NONE, C_FREG, C_NONE, 30, 4, 0, 0},
   227  	// immediate load
   228  	{AMOVV, C_12CON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0},
   229  	{AMOVV, C_32CON, C_NONE, C_NONE, C_REG, C_NONE, 19, 8, 0, NOTUSETMP},
   230  	{AMOVV, C_32CON20_0, C_NONE, C_NONE, C_REG, C_NONE, 25, 4, 0, 0},
   231  	{AMOVV, C_DCON12_0, C_NONE, C_NONE, C_REG, C_NONE, 67, 4, 0, NOTUSETMP},
   232  	{AMOVV, C_DCON12_20S, C_NONE, C_NONE, C_REG, C_NONE, 68, 8, 0, NOTUSETMP},
   233  	{AMOVV, C_DCON32_12S, C_NONE, C_NONE, C_REG, C_NONE, 69, 12, 0, NOTUSETMP},
   234  	{AMOVV, C_DCON, C_NONE, C_NONE, C_REG, C_NONE, 59, 16, 0, NOTUSETMP},
   235  	// get a stack address
   236  	{AMOVV, C_SACON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGSP, 0},
   237  	{AMOVV, C_LACON, C_NONE, C_NONE, C_REG, C_NONE, 27, 12, REGSP, 0},
   238  	// get an external address, need relocation
   239  	{AMOVV, C_EXTADDR, C_NONE, C_NONE, C_REG, C_NONE, 52, 8, 0, NOTUSETMP},
   240  	// get a got address, need relocation
   241  	{AMOVV, C_GOTADDR, C_NONE, C_NONE, C_REG, C_NONE, 65, 8, 0, 0},
   242  
   243  	// memory access
   244  	{AVMOVQ, C_VREG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGZERO, 0},
   245  	{AVMOVQ, C_VREG, C_NONE, C_NONE, C_SOREG_12, C_NONE, 7, 4, REGZERO, 0},
   246  	{AVMOVQ, C_SAUTO, C_NONE, C_NONE, C_VREG, C_NONE, 8, 4, REGZERO, 0},
   247  	{AVMOVQ, C_SOREG_12, C_NONE, C_NONE, C_VREG, C_NONE, 8, 4, REGZERO, 0},
   248  	{AVMOVQ, C_VREG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0},
   249  	{AVMOVQ, C_ROFF, C_NONE, C_NONE, C_VREG, C_NONE, 21, 4, 0, 0},
   250  	{AVMOVQ, C_SOREG_12, C_NONE, C_NONE, C_ARNG, C_NONE, 42, 4, 0, 0}, // vldrepl.{b/h/w/d}
   251  	// moving data between registers
   252  	{AVMOVQ, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 1, 4, 0, 0},
   253  	{AVMOVQ, C_REG, C_NONE, C_NONE, C_ELEM, C_NONE, 39, 4, 0, 0},  // vinsgr2vr.{b/h/w/d}
   254  	{AVMOVQ, C_ELEM, C_NONE, C_NONE, C_REG, C_NONE, 40, 4, 0, 0},  // vpickve2gr.{b/h/w/d}
   255  	{AVMOVQ, C_ELEM, C_NONE, C_NONE, C_ARNG, C_NONE, 40, 4, 0, 0}, // vreplvei.{b/h/w/d}
   256  	{AVMOVQ, C_REG, C_NONE, C_NONE, C_ARNG, C_NONE, 41, 4, 0, 0},  // vreplgr2vr.{b/h/w/d}
   257  
   258  	// memory access
   259  	{AXVMOVQ, C_XREG, C_NONE, C_NONE, C_SOREG_12, C_NONE, 7, 4, REGZERO, 0},
   260  	{AXVMOVQ, C_XREG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGZERO, 0},
   261  	{AXVMOVQ, C_SOREG_12, C_NONE, C_NONE, C_XREG, C_NONE, 8, 4, REGZERO, 0},
   262  	{AXVMOVQ, C_SAUTO, C_NONE, C_NONE, C_XREG, C_NONE, 8, 4, REGZERO, 0},
   263  	{AXVMOVQ, C_XREG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0},
   264  	{AXVMOVQ, C_ROFF, C_NONE, C_NONE, C_XREG, C_NONE, 21, 4, 0, 0},
   265  	{AXVMOVQ, C_SOREG_12, C_NONE, C_NONE, C_ARNG, C_NONE, 42, 4, 0, 0}, // xvldrepl.{b/h/w/d}
   266  	// moving data between registers
   267  	{AXVMOVQ, C_XREG, C_NONE, C_NONE, C_XREG, C_NONE, 1, 4, 0, 0},
   268  	{AXVMOVQ, C_REG, C_NONE, C_NONE, C_ELEM, C_NONE, 39, 4, 0, 0},  // vinsgr2vr.{b/h/w/d}
   269  	{AXVMOVQ, C_XREG, C_NONE, C_NONE, C_ELEM, C_NONE, 39, 4, 0, 0}, // xvinsve0.{w/d}
   270  	{AXVMOVQ, C_ELEM, C_NONE, C_NONE, C_REG, C_NONE, 40, 4, 0, 0},  // vpickve2gr.{b/h/w/d}
   271  	{AXVMOVQ, C_ELEM, C_NONE, C_NONE, C_XREG, C_NONE, 40, 4, 0, 0}, // xvpickve.{w/d}
   272  	{AXVMOVQ, C_REG, C_NONE, C_NONE, C_ARNG, C_NONE, 41, 4, 0, 0},  // xvreplgr2vr.{b/h/w/d}
   273  	{AXVMOVQ, C_XREG, C_NONE, C_NONE, C_ARNG, C_NONE, 41, 4, 0, 0}, // xvreplve0.{b/h/w/d/q}
   274  
   275  	// memory access
   276  	{AMOVWP, C_REG, C_NONE, C_NONE, C_SOREG_16, C_NONE, 73, 4, 0, 0},
   277  	{AMOVWP, C_REG, C_NONE, C_NONE, C_LOREG_32, C_NONE, 73, 12, 0, 0},
   278  	{AMOVWP, C_REG, C_NONE, C_NONE, C_LOREG_64, C_NONE, 73, 24, 0, 0},
   279  	{AMOVWP, C_SOREG_16, C_NONE, C_NONE, C_REG, C_NONE, 74, 4, 0, 0},
   280  	{AMOVWP, C_LOREG_32, C_NONE, C_NONE, C_REG, C_NONE, 74, 12, 0, 0},
   281  	{AMOVWP, C_LOREG_64, C_NONE, C_NONE, C_REG, C_NONE, 74, 24, 0, 0},
   282  
   283  	// condition branch
   284  	{ABEQ, C_REG, C_REG, C_NONE, C_BRAN, C_NONE, 6, 4, 0, 0},
   285  	{ABEQ, C_REG, C_NONE, C_NONE, C_BRAN, C_NONE, 6, 4, 0, 0},
   286  	{ABLEZ, C_REG, C_NONE, C_NONE, C_BRAN, C_NONE, 6, 4, 0, 0},
   287  	{ABFPT, C_NONE, C_NONE, C_NONE, C_BRAN, C_NONE, 6, 4, 0, 0},
   288  	{ABFPT, C_FCCREG, C_NONE, C_NONE, C_BRAN, C_NONE, 6, 4, 0, 0},
   289  	// jmp and call
   290  	{AJMP, C_NONE, C_NONE, C_NONE, C_BRAN, C_NONE, 11, 4, 0, 0},        // b
   291  	{AJAL, C_NONE, C_NONE, C_NONE, C_BRAN, C_NONE, 11, 4, 0, 0},        // bl
   292  	{AJMP, C_NONE, C_NONE, C_NONE, C_ZOREG, C_NONE, 18, 4, REGZERO, 0}, // jirl r0, rj, 0
   293  	{AJAL, C_NONE, C_NONE, C_NONE, C_ZOREG, C_NONE, 18, 4, REGLINK, 0}, // jirl r1, rj, 0
   294  
   295  	{ABSTRPICKW, C_U6CON, C_REG, C_U6CON, C_REG, C_NONE, 17, 4, 0, 0},
   296  	{ABSTRPICKW, C_U6CON, C_REG, C_ZCON, C_REG, C_NONE, 17, 4, 0, 0},
   297  	{ABSTRPICKW, C_ZCON, C_REG, C_ZCON, C_REG, C_NONE, 17, 4, 0, 0},
   298  
   299  	// preload
   300  	{APRELD, C_SOREG_12, C_U5CON, C_NONE, C_NONE, C_NONE, 47, 4, 0, 0},
   301  	{APRELDX, C_SOREG_16, C_DCON, C_U5CON, C_NONE, C_NONE, 48, 20, 0, 0},
   302  
   303  	{AMASKEQZ, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
   304  
   305  	{ACMPEQF, C_FREG, C_FREG, C_NONE, C_FCCREG, C_NONE, 2, 4, 0, 0},
   306  
   307  	{ARDTIMELW, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0},
   308  
   309  	{AALSLV, C_U3CON, C_REG, C_REG, C_REG, C_NONE, 64, 4, 0, 0},
   310  
   311  	{AAMSWAPW, C_REG, C_NONE, C_NONE, C_ZOREG, C_REG, 66, 4, 0, 0},
   312  
   313  	{ASCQ, C_REG, C_REG, C_NONE, C_ZOREG, C_NONE, 45, 4, 0, 0},
   314  	{ALLACQW, C_ZOREG, C_NONE, C_NONE, C_REG, C_NONE, 46, 4, 0, 0},
   315  	{ASCRELW, C_REG, C_NONE, C_NONE, C_ZOREG, C_NONE, 46, 4, 0, 0},
   316  
   317  	{ASYSCALL, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0},
   318  	{ASYSCALL, C_U15CON, C_NONE, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0},
   319  
   320  	{AFMADDF, C_FREG, C_FREG, C_NONE, C_FREG, C_NONE, 37, 4, 0, 0},
   321  	{AFMADDF, C_FREG, C_FREG, C_FREG, C_FREG, C_NONE, 37, 4, 0, 0},
   322  
   323  	{AADDF, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 2, 4, 0, 0},
   324  	{AADDF, C_FREG, C_FREG, C_NONE, C_FREG, C_NONE, 2, 4, 0, 0},
   325  
   326  	{AFSEL, C_FCCREG, C_FREG, C_FREG, C_FREG, C_NONE, 33, 4, 0, 0},
   327  	{AFSEL, C_FCCREG, C_FREG, C_NONE, C_FREG, C_NONE, 33, 4, 0, 0},
   328  
   329  	{ACLOW, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 9, 4, 0, 0},
   330  	{AABSF, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 9, 4, 0, 0},
   331  
   332  	{AVSETEQV, C_VREG, C_NONE, C_NONE, C_FCCREG, C_NONE, 9, 4, 0, 0},
   333  	{AXVSETEQV, C_XREG, C_NONE, C_NONE, C_FCCREG, C_NONE, 9, 4, 0, 0},
   334  
   335  	{AVPCNTB, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 9, 4, 0, 0},
   336  	{AXVPCNTB, C_XREG, C_NONE, C_NONE, C_XREG, C_NONE, 9, 4, 0, 0},
   337  
   338  	// memory access
   339  	{AMOVF, C_SOREG_12, C_NONE, C_NONE, C_FREG, C_NONE, 28, 4, REGZERO, 0},
   340  	{AMOVF, C_LOREG_32, C_NONE, C_NONE, C_FREG, C_NONE, 28, 12, REGZERO, 0},
   341  	{AMOVF, C_FREG, C_NONE, C_NONE, C_SOREG_12, C_NONE, 29, 4, REGZERO, 0},
   342  	{AMOVF, C_FREG, C_NONE, C_NONE, C_LOREG_32, C_NONE, 29, 12, REGZERO, 0},
   343  	{AMOVF, C_FREG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0},
   344  	{AMOVF, C_ROFF, C_NONE, C_NONE, C_FREG, C_NONE, 21, 4, 0, 0},
   345  	// variable access
   346  	{AMOVF, C_FREG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0},
   347  	{AMOVF, C_ADDR, C_NONE, C_NONE, C_FREG, C_NONE, 51, 8, 0, 0},
   348  	// moving data between registers
   349  	{AMOVF, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 9, 4, 0, 0},
   350  	// load data from stack
   351  	{AMOVF, C_SAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 28, 4, REGSP, 0},
   352  	{AMOVF, C_LAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 28, 12, REGSP, 0},
   353  	// store data to stack
   354  	{AMOVF, C_FREG, C_NONE, C_NONE, C_SAUTO, C_NONE, 29, 4, REGSP, 0},
   355  	{AMOVF, C_FREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 29, 12, REGSP, 0},
   356  
   357  	{AVSHUFB, C_VREG, C_VREG, C_VREG, C_VREG, C_NONE, 37, 4, 0, 0},
   358  	{AXVSHUFB, C_XREG, C_XREG, C_XREG, C_XREG, C_NONE, 37, 4, 0, 0},
   359  
   360  	{AVSEQB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
   361  	{AVSEQB, C_S5CON, C_VREG, C_NONE, C_VREG, C_NONE, 22, 4, 0, 0},
   362  	{AXVSEQB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
   363  	{AXVSEQB, C_S5CON, C_XREG, C_NONE, C_XREG, C_NONE, 22, 4, 0, 0},
   364  
   365  	{AVSLTBU, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
   366  	{AVSLTBU, C_U5CON, C_VREG, C_NONE, C_VREG, C_NONE, 31, 4, 0, 0},
   367  	{AXVSLTBU, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
   368  	{AXVSLTBU, C_U5CON, C_XREG, C_NONE, C_XREG, C_NONE, 31, 4, 0, 0},
   369  
   370  	{AVANDB, C_U8CON, C_VREG, C_NONE, C_VREG, C_NONE, 23, 4, 0, 0},
   371  	{AVANDB, C_U8CON, C_NONE, C_NONE, C_VREG, C_NONE, 23, 4, 0, 0},
   372  	{AXVANDB, C_U8CON, C_XREG, C_NONE, C_XREG, C_NONE, 23, 4, 0, 0},
   373  	{AXVANDB, C_U8CON, C_NONE, C_NONE, C_XREG, C_NONE, 23, 4, 0, 0},
   374  
   375  	{AVADDB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
   376  	{AVADDB, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
   377  	{AXVADDB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
   378  	{AXVADDB, C_XREG, C_NONE, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
   379  
   380  	{AVADDBU, C_U5CON, C_VREG, C_NONE, C_VREG, C_NONE, 31, 4, 0, 0},
   381  	{AVADDBU, C_U5CON, C_NONE, C_NONE, C_VREG, C_NONE, 31, 4, 0, 0},
   382  	{AXVADDBU, C_U5CON, C_XREG, C_NONE, C_XREG, C_NONE, 31, 4, 0, 0},
   383  	{AXVADDBU, C_U5CON, C_NONE, C_NONE, C_XREG, C_NONE, 31, 4, 0, 0},
   384  
   385  	{AVSLLB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
   386  	{AVSLLB, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
   387  	{AVSLLB, C_U3CON, C_VREG, C_NONE, C_VREG, C_NONE, 13, 4, 0, 0},
   388  	{AVSLLB, C_U3CON, C_NONE, C_NONE, C_VREG, C_NONE, 13, 4, 0, 0},
   389  	{AXVSLLB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
   390  	{AXVSLLB, C_XREG, C_NONE, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
   391  	{AXVSLLB, C_U3CON, C_XREG, C_NONE, C_XREG, C_NONE, 13, 4, 0, 0},
   392  	{AXVSLLB, C_U3CON, C_NONE, C_NONE, C_XREG, C_NONE, 13, 4, 0, 0},
   393  
   394  	{AVSLLH, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
   395  	{AVSLLH, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
   396  	{AVSLLH, C_U4CON, C_VREG, C_NONE, C_VREG, C_NONE, 14, 4, 0, 0},
   397  	{AVSLLH, C_U4CON, C_NONE, C_NONE, C_VREG, C_NONE, 14, 4, 0, 0},
   398  	{AXVSLLH, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
   399  	{AXVSLLH, C_XREG, C_NONE, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
   400  	{AXVSLLH, C_U4CON, C_XREG, C_NONE, C_XREG, C_NONE, 14, 4, 0, 0},
   401  	{AXVSLLH, C_U4CON, C_NONE, C_NONE, C_XREG, C_NONE, 14, 4, 0, 0},
   402  
   403  	{AVSLLW, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
   404  	{AVSLLW, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
   405  	{AVSLLW, C_U5CON, C_VREG, C_NONE, C_VREG, C_NONE, 31, 4, 0, 0},
   406  	{AVSLLW, C_U5CON, C_NONE, C_NONE, C_VREG, C_NONE, 31, 4, 0, 0},
   407  	{AXVSLLW, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
   408  	{AXVSLLW, C_XREG, C_NONE, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
   409  	{AXVSLLW, C_U5CON, C_XREG, C_NONE, C_XREG, C_NONE, 31, 4, 0, 0},
   410  	{AXVSLLW, C_U5CON, C_NONE, C_NONE, C_XREG, C_NONE, 31, 4, 0, 0},
   411  
   412  	{AVSLLV, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
   413  	{AVSLLV, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
   414  	{AVSLLV, C_U6CON, C_VREG, C_NONE, C_VREG, C_NONE, 32, 4, 0, 0},
   415  	{AVSLLV, C_U6CON, C_NONE, C_NONE, C_VREG, C_NONE, 32, 4, 0, 0},
   416  	{AXVSLLV, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
   417  	{AXVSLLV, C_XREG, C_NONE, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
   418  	{AXVSLLV, C_U6CON, C_XREG, C_NONE, C_XREG, C_NONE, 32, 4, 0, 0},
   419  	{AXVSLLV, C_U6CON, C_NONE, C_NONE, C_XREG, C_NONE, 32, 4, 0, 0},
   420  
   421  	{AWORD, C_32CON, C_NONE, C_NONE, C_NONE, C_NONE, 38, 4, 0, 0},
   422  	{AWORD, C_DCON, C_NONE, C_NONE, C_NONE, C_NONE, 61, 4, 0, 0},
   423  
   424  	{ANOOP, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0},
   425  
   426  	{ANEGW, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
   427  	{ATEQ, C_US12CON, C_REG, C_NONE, C_REG, C_NONE, 15, 8, 0, 0},
   428  	{ATEQ, C_US12CON, C_NONE, C_NONE, C_REG, C_NONE, 15, 8, 0, 0},
   429  
   430  	{obj.APCALIGN, C_U12CON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0},
   431  	{obj.APCDATA, C_32CON, C_NONE, C_NONE, C_32CON, C_NONE, 0, 0, 0, 0},
   432  	{obj.APCDATA, C_DCON, C_NONE, C_NONE, C_DCON, C_NONE, 0, 0, 0, 0},
   433  	{obj.AFUNCDATA, C_U12CON, C_NONE, C_NONE, C_ADDR, C_NONE, 0, 0, 0, 0},
   434  	{obj.ANOP, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0},
   435  	{obj.ANOP, C_32CON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, // nop variants, see #40689
   436  	{obj.ANOP, C_DCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0},  // nop variants, see #40689
   437  	{obj.ANOP, C_REG, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0},
   438  	{obj.ANOP, C_FREG, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0},
   439  }
   440  
   441  func IsAtomicInst(as obj.As) bool {
   442  	_, ok := atomicInst[as]
   443  
   444  	return ok
   445  }
   446  
   447  // pcAlignPadLength returns the number of bytes required to align pc to alignedValue,
   448  // reporting an error if alignedValue is not a power of two or is out of range.
   449  func pcAlignPadLength(ctxt *obj.Link, pc int64, alignedValue int64) int {
   450  	if !((alignedValue&(alignedValue-1) == 0) && 8 <= alignedValue && alignedValue <= 2048) {
   451  		ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", alignedValue)
   452  	}
   453  	return int(-pc & (alignedValue - 1))
   454  }
   455  
   456  var oprange [ALAST & obj.AMask][]Optab
   457  
   458  var xcmp [C_NCLASS][C_NCLASS]bool
   459  
   460  func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
   461  	if ctxt.Retpoline {
   462  		ctxt.Diag("-spectre=ret not supported on loong64")
   463  		ctxt.Retpoline = false // don't keep printing
   464  	}
   465  
   466  	p := cursym.Func().Text
   467  	if p == nil || p.Link == nil { // handle external functions and ELF section symbols
   468  		return
   469  	}
   470  
   471  	c := ctxt0{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset + ctxt.Arch.FixedFrameSize)}
   472  
   473  	if oprange[AOR&obj.AMask] == nil {
   474  		c.ctxt.Diag("loong64 ops not initialized, call loong64.buildop first")
   475  	}
   476  
   477  	pc := int64(0)
   478  	p.Pc = pc
   479  
   480  	var m int
   481  	var o *Optab
   482  	for p = p.Link; p != nil; p = p.Link {
   483  		p.Pc = pc
   484  		o = c.oplook(p)
   485  		m = int(o.size)
   486  		if m == 0 {
   487  			switch p.As {
   488  			case obj.APCALIGN:
   489  				alignedValue := p.From.Offset
   490  				m = pcAlignPadLength(ctxt, pc, alignedValue)
   491  				// Update the current text symbol alignment value.
   492  				if int16(alignedValue) > cursym.Align {
   493  					cursym.Align = int16(alignedValue)
   494  				}
   495  				break
   496  			case obj.ANOP, obj.AFUNCDATA, obj.APCDATA:
   497  				continue
   498  			default:
   499  				c.ctxt.Diag("zero-width instruction\n%v", p)
   500  			}
   501  		}
   502  
   503  		pc += int64(m)
   504  	}
   505  
   506  	c.cursym.Size = pc
   507  
   508  	// mark loop entry instructions for padding
   509  	// loop entrances are defined as targets of backward branches
   510  	for p = c.cursym.Func().Text.Link; p != nil; p = p.Link {
   511  		if q := p.To.Target(); q != nil && q.Pc < p.Pc {
   512  			q.Mark |= branchLoopHead
   513  		}
   514  	}
   515  
   516  	// Run these passes until convergence.
   517  	for {
   518  		rescan := false
   519  		pc = 0
   520  		prev := c.cursym.Func().Text
   521  		for p = prev.Link; p != nil; prev, p = p, p.Link {
   522  			p.Pc = pc
   523  			o = c.oplook(p)
   524  
   525  			// Prepend a PCALIGN $loopAlign to each of the loop heads
   526  			// that need padding, if not already done so (because this
   527  			// pass may execute more than once).
   528  			//
   529  			// This needs to come before any pass that look at pc,
   530  			// because pc will be adjusted if padding happens.
   531  			if p.Mark&branchLoopHead != 0 && pc&(loopAlign-1) != 0 &&
   532  				!(prev.As == obj.APCALIGN && prev.From.Offset >= loopAlign) {
   533  				q := c.newprog()
   534  				prev.Link = q
   535  				q.Link = p
   536  				q.Pc = pc
   537  				q.As = obj.APCALIGN
   538  				q.From.Type = obj.TYPE_CONST
   539  				q.From.Offset = loopAlign
   540  				// Don't associate the synthesized PCALIGN with
   541  				// the original source position, for deterministic
   542  				// mapping between source and corresponding asm.
   543  				// q.Pos = p.Pos
   544  
   545  				// Manually make the PCALIGN come into effect,
   546  				// since this loop iteration is for p.
   547  				pc += int64(pcAlignPadLength(ctxt, pc, loopAlign))
   548  				p.Pc = pc
   549  				rescan = true
   550  			}
   551  
   552  			// very large conditional branches
   553  			//
   554  			// if any procedure is large enough to generate a large SBRA branch, then
   555  			// generate extra passes putting branches around jmps to fix. this is rare.
   556  			if o.type_ == 6 && p.To.Target() != nil {
   557  				otxt := p.To.Target().Pc - pc
   558  
   559  				// On loong64, the immediate value field of the conditional branch instructions
   560  				// BFPT and BFPT is 21 bits, and the others are 16 bits. The jump target address
   561  				// is to logically shift the immediate value in the instruction code to the left
   562  				// by 2 bits and then sign extend.
   563  				bound := int64(1 << (18 - 1))
   564  
   565  				switch p.As {
   566  				case ABFPT, ABFPF:
   567  					bound = int64(1 << (23 - 1))
   568  				}
   569  
   570  				if otxt < -bound || otxt >= bound {
   571  					q := c.newprog()
   572  					q.Link = p.Link
   573  					p.Link = q
   574  					q.As = AJMP
   575  					q.Pos = p.Pos
   576  					q.To.Type = obj.TYPE_BRANCH
   577  					q.To.SetTarget(p.To.Target())
   578  					p.To.SetTarget(q)
   579  					q = c.newprog()
   580  					q.Link = p.Link
   581  					p.Link = q
   582  					q.As = AJMP
   583  					q.Pos = p.Pos
   584  					q.To.Type = obj.TYPE_BRANCH
   585  					q.To.SetTarget(q.Link.Link)
   586  					rescan = true
   587  				}
   588  			}
   589  
   590  			m = int(o.size)
   591  			if m == 0 {
   592  				switch p.As {
   593  				case obj.APCALIGN:
   594  					alignedValue := p.From.Offset
   595  					m = pcAlignPadLength(ctxt, pc, alignedValue)
   596  					break
   597  				case obj.ANOP, obj.AFUNCDATA, obj.APCDATA:
   598  					continue
   599  				default:
   600  					c.ctxt.Diag("zero-width instruction\n%v", p)
   601  				}
   602  			}
   603  
   604  			pc += int64(m)
   605  		}
   606  
   607  		c.cursym.Size = pc
   608  
   609  		if !rescan {
   610  			break
   611  		}
   612  	}
   613  
   614  	pc += -pc & (FuncAlign - 1)
   615  	c.cursym.Size = pc
   616  
   617  	// lay out the code, emitting code and data relocations.
   618  
   619  	c.cursym.Grow(c.cursym.Size)
   620  
   621  	bp := c.cursym.P
   622  	var i int32
   623  	var out [6]uint32
   624  	for p := c.cursym.Func().Text.Link; p != nil; p = p.Link {
   625  		c.pc = p.Pc
   626  		o = c.oplook(p)
   627  		if int(o.size) > 4*len(out) {
   628  			log.Fatalf("out array in span0 is too small, need at least %d for %v", o.size/4, p)
   629  		}
   630  		if p.As == obj.APCALIGN {
   631  			alignedValue := p.From.Offset
   632  			v := pcAlignPadLength(c.ctxt, p.Pc, alignedValue)
   633  			for i = 0; i < int32(v/4); i++ {
   634  				// emit ANOOP instruction by the padding size
   635  				c.ctxt.Arch.ByteOrder.PutUint32(bp, OP_12IRR(c.opirr(AAND), 0, 0, 0))
   636  				bp = bp[4:]
   637  			}
   638  			continue
   639  		}
   640  		c.asmout(p, o, out[:])
   641  		for i = 0; i < int32(o.size/4); i++ {
   642  			c.ctxt.Arch.ByteOrder.PutUint32(bp, out[i])
   643  			bp = bp[4:]
   644  		}
   645  	}
   646  
   647  	// Mark nonpreemptible instruction sequences.
   648  	// We use REGTMP as a scratch register during call injection,
   649  	// so instruction sequences that use REGTMP are unsafe to
   650  	// preempt asynchronously.
   651  	obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable)
   652  
   653  	// Now that we know byte offsets, we can generate jump table entries.
   654  	for _, jt := range cursym.Func().JumpTables {
   655  		for i, p := range jt.Targets {
   656  			// The ith jumptable entry points to the p.Pc'th
   657  			// byte in the function symbol s.
   658  			jt.Sym.WriteAddr(ctxt, int64(i)*8, 8, cursym, p.Pc)
   659  		}
   660  	}
   661  }
   662  
   663  // isUnsafePoint returns whether p is an unsafe point.
   664  func (c *ctxt0) isUnsafePoint(p *obj.Prog) bool {
   665  	// If p explicitly uses REGTMP, it's unsafe to preempt, because the
   666  	// preemption sequence clobbers REGTMP.
   667  	return p.From.Reg == REGTMP || p.To.Reg == REGTMP || p.Reg == REGTMP
   668  }
   669  
   670  // isRestartable returns whether p is a multi-instruction sequence that,
   671  // if preempted, can be restarted.
   672  func (c *ctxt0) isRestartable(p *obj.Prog) bool {
   673  	if c.isUnsafePoint(p) {
   674  		return false
   675  	}
   676  	// If p is a multi-instruction sequence with uses REGTMP inserted by
   677  	// the assembler in order to materialize a large constant/offset, we
   678  	// can restart p (at the start of the instruction sequence), recompute
   679  	// the content of REGTMP, upon async preemption. Currently, all cases
   680  	// of assembler-inserted REGTMP fall into this category.
   681  	// If p doesn't use REGTMP, it can be simply preempted, so we don't
   682  	// mark it.
   683  	o := c.oplook(p)
   684  	return o.size > 4 && o.flag&NOTUSETMP == 0
   685  }
   686  
   687  func isint32(v int64) bool {
   688  	return int64(int32(v)) == v
   689  }
   690  
   691  func (c *ctxt0) aclass(a *obj.Addr) int {
   692  	switch a.Type {
   693  	case obj.TYPE_NONE:
   694  		return C_NONE
   695  
   696  	case obj.TYPE_REG:
   697  		return c.rclass(a.Reg)
   698  
   699  	case obj.TYPE_MEM:
   700  		switch a.Name {
   701  		case obj.NAME_EXTERN,
   702  			obj.NAME_STATIC:
   703  			if a.Sym == nil {
   704  				break
   705  			}
   706  			c.instoffset = a.Offset
   707  			if a.Sym.Type == objabi.STLSBSS {
   708  				if c.ctxt.Flag_shared {
   709  					return C_TLS_IE
   710  				} else {
   711  					return C_TLS_LE
   712  				}
   713  			}
   714  			return C_ADDR
   715  
   716  		case obj.NAME_AUTO:
   717  			if a.Reg == REGSP {
   718  				// unset base register for better printing, since
   719  				// a.Offset is still relative to pseudo-SP.
   720  				a.Reg = obj.REG_NONE
   721  			}
   722  			c.instoffset = int64(c.autosize) + a.Offset
   723  			if c.instoffset >= -BIG_12 && c.instoffset < BIG_12 {
   724  				return C_SAUTO
   725  			}
   726  			return C_LAUTO
   727  
   728  		case obj.NAME_PARAM:
   729  			if a.Reg == REGSP {
   730  				// unset base register for better printing, since
   731  				// a.Offset is still relative to pseudo-FP.
   732  				a.Reg = obj.REG_NONE
   733  			}
   734  			c.instoffset = int64(c.autosize) + a.Offset + c.ctxt.Arch.FixedFrameSize
   735  			if c.instoffset >= -BIG_12 && c.instoffset < BIG_12 {
   736  				return C_SAUTO
   737  			}
   738  			return C_LAUTO
   739  
   740  		case obj.NAME_NONE:
   741  			if a.Index != 0 {
   742  				if a.Offset != 0 {
   743  					return C_GOK
   744  				}
   745  				// register offset
   746  				return C_ROFF
   747  			}
   748  
   749  			c.instoffset = a.Offset
   750  			if c.instoffset == 0 {
   751  				return C_ZOREG
   752  			}
   753  			if c.instoffset >= -BIG_8 && c.instoffset < BIG_8 {
   754  				return C_SOREG_8
   755  			} else if c.instoffset >= -BIG_9 && c.instoffset < BIG_9 {
   756  				return C_SOREG_9
   757  			} else if c.instoffset >= -BIG_10 && c.instoffset < BIG_10 {
   758  				return C_SOREG_10
   759  			} else if c.instoffset >= -BIG_11 && c.instoffset < BIG_11 {
   760  				return C_SOREG_11
   761  			} else if c.instoffset >= -BIG_12 && c.instoffset < BIG_12 {
   762  				return C_SOREG_12
   763  			} else if c.instoffset >= -BIG_16 && c.instoffset < BIG_16 {
   764  				return C_SOREG_16
   765  			} else if c.instoffset >= -BIG_32 && c.instoffset < BIG_32 {
   766  				return C_LOREG_32
   767  			} else {
   768  				return C_LOREG_64
   769  			}
   770  
   771  		case obj.NAME_GOTREF:
   772  			return C_GOTADDR
   773  		}
   774  
   775  		return C_GOK
   776  
   777  	case obj.TYPE_TEXTSIZE:
   778  		return C_TEXTSIZE
   779  
   780  	case obj.TYPE_CONST,
   781  		obj.TYPE_ADDR:
   782  		switch a.Name {
   783  		case obj.NAME_NONE:
   784  			c.instoffset = a.Offset
   785  			if a.Reg != 0 {
   786  				if -BIG_12 <= c.instoffset && c.instoffset <= BIG_12 {
   787  					return C_SACON
   788  				}
   789  				if isint32(c.instoffset) {
   790  					return C_LACON
   791  				}
   792  				return C_DACON
   793  			}
   794  
   795  		case obj.NAME_EXTERN,
   796  			obj.NAME_STATIC:
   797  			s := a.Sym
   798  			if s == nil {
   799  				return C_GOK
   800  			}
   801  
   802  			c.instoffset = a.Offset
   803  			if s.Type == objabi.STLSBSS {
   804  				c.ctxt.Diag("taking address of TLS variable is not supported")
   805  			}
   806  			return C_EXTADDR
   807  
   808  		case obj.NAME_AUTO:
   809  			if a.Reg == REGSP {
   810  				// unset base register for better printing, since
   811  				// a.Offset is still relative to pseudo-SP.
   812  				a.Reg = obj.REG_NONE
   813  			}
   814  			c.instoffset = int64(c.autosize) + a.Offset
   815  			if c.instoffset >= -BIG_12 && c.instoffset < BIG_12 {
   816  				return C_SACON
   817  			}
   818  			return C_LACON
   819  
   820  		case obj.NAME_PARAM:
   821  			if a.Reg == REGSP {
   822  				// unset base register for better printing, since
   823  				// a.Offset is still relative to pseudo-FP.
   824  				a.Reg = obj.REG_NONE
   825  			}
   826  			c.instoffset = int64(c.autosize) + a.Offset + c.ctxt.Arch.FixedFrameSize
   827  			if c.instoffset >= -BIG_12 && c.instoffset < BIG_12 {
   828  				return C_SACON
   829  			}
   830  			return C_LACON
   831  
   832  		default:
   833  			return C_GOK
   834  		}
   835  
   836  		if c.instoffset != int64(int32(c.instoffset)) {
   837  			return dconClass(c.instoffset)
   838  		}
   839  
   840  		if c.instoffset >= 0 {
   841  			sbits := bits.Len64(uint64(c.instoffset))
   842  			switch {
   843  			case sbits <= 8:
   844  				return C_ZCON + sbits
   845  			case sbits <= 12:
   846  				if c.instoffset <= 0x7ff {
   847  					return C_US12CON
   848  				}
   849  				return C_U12CON
   850  			case sbits <= 13:
   851  				if c.instoffset&0xfff == 0 {
   852  					return C_U13CON20_0
   853  				}
   854  				return C_U13CON
   855  			case sbits <= 15:
   856  				if c.instoffset&0xfff == 0 {
   857  					return C_U15CON20_0
   858  				}
   859  				return C_U15CON
   860  			}
   861  		} else {
   862  			sbits := bits.Len64(uint64(^c.instoffset))
   863  			switch {
   864  			case sbits < 5:
   865  				return C_S5CON
   866  			case sbits < 12:
   867  				return C_S12CON
   868  			case sbits < 13:
   869  				if c.instoffset&0xfff == 0 {
   870  					return C_S13CON20_0
   871  				}
   872  				return C_S13CON
   873  			}
   874  		}
   875  
   876  		if c.instoffset&0xfff == 0 {
   877  			return C_32CON20_0
   878  		}
   879  		return C_32CON
   880  
   881  	case obj.TYPE_BRANCH:
   882  		return C_BRAN
   883  	}
   884  
   885  	return C_GOK
   886  }
   887  
   888  // The constants here define the data characteristics within the bit field range.
   889  //
   890  //	ALL1: The data in the bit field is all 1
   891  //	ALL0: The data in the bit field is all 0
   892  //	ST1: The data in the bit field starts with 1, but not all 1
   893  //	ST0: The data in the bit field starts with 0, but not all 0
   894  const (
   895  	ALL1 = iota
   896  	ALL0
   897  	ST1
   898  	ST0
   899  )
   900  
   901  // mask returns the mask of the specified bit field, which is used to help determine
   902  // the data characteristics of the immediate value at the specified bit.
   903  func mask(suf int8, len int8) (uint64, uint64) {
   904  	if len == 12 {
   905  		if suf == 0 {
   906  			return 0xfff, 0x800
   907  		} else { // suf == 52
   908  			return 0xfff0000000000000, 0x8000000000000000
   909  		}
   910  	} else { // len == 20
   911  		if suf == 12 {
   912  			return 0xfffff000, 0x80000000
   913  		} else { // suf == 32
   914  			return 0xfffff00000000, 0x8000000000000
   915  		}
   916  	}
   917  }
   918  
   919  // bitField return a number represent status of val in bit field
   920  //
   921  //	suf: The starting bit of the bit field
   922  //	len: The length of the bit field
   923  func bitField(val int64, suf int8, len int8) int8 {
   924  	mask1, mask2 := mask(suf, len)
   925  	if uint64(val)&mask1 == mask1 {
   926  		return ALL1
   927  	} else if uint64(val)&mask1 == 0x0 {
   928  		return ALL0
   929  	} else if uint64(val)&mask2 == mask2 {
   930  		return ST1
   931  	} else {
   932  		return ST0
   933  	}
   934  }
   935  
   936  // Loading an immediate value larger than 32 bits requires four instructions
   937  // on loong64 (lu12i.w + ori + lu32i.d + lu52i.d), but in some special cases,
   938  // we can use the sign extension and zero extension features of the instruction
   939  // to fill in the high-order data (all 0 or all 1), which can save one to
   940  // three instructions.
   941  //
   942  //	| 63 ~ 52 | 51 ~ 32 | 31 ~ 12 | 11 ~ 0 |
   943  //	| lu52i.d | lu32i.d | lu12i.w |   ori  |
   944  func dconClass(offset int64) int {
   945  	tzb := bits.TrailingZeros64(uint64(offset))
   946  	hi12 := bitField(offset, 52, 12)
   947  	hi20 := bitField(offset, 32, 20)
   948  	lo20 := bitField(offset, 12, 20)
   949  	lo12 := bitField(offset, 0, 12)
   950  	if tzb >= 52 {
   951  		return C_DCON12_0 // lu52i.d
   952  	}
   953  	if tzb >= 32 {
   954  		if ((hi20 == ALL1 || hi20 == ST1) && hi12 == ALL1) || ((hi20 == ALL0 || hi20 == ST0) && hi12 == ALL0) {
   955  			return C_DCON20S_0 // addi.w + lu32i.d
   956  		}
   957  		return C_DCON32_0 // addi.w + lu32i.d + lu52i.d
   958  	}
   959  	if tzb >= 12 {
   960  		if lo20 == ST1 || lo20 == ALL1 {
   961  			if hi20 == ALL1 {
   962  				return C_DCON12_20S // lu12i.w + lu52i.d
   963  			}
   964  			if (hi20 == ST1 && hi12 == ALL1) || ((hi20 == ST0 || hi20 == ALL0) && hi12 == ALL0) {
   965  				return C_DCON20S_20 // lu12i.w + lu32i.d
   966  			}
   967  			return C_DCON32_20 // lu12i.w + lu32i.d + lu52i.d
   968  		}
   969  		if hi20 == ALL0 {
   970  			return C_DCON12_20S // lu12i.w + lu52i.d
   971  		}
   972  		if (hi20 == ST0 && hi12 == ALL0) || ((hi20 == ST1 || hi20 == ALL1) && hi12 == ALL1) {
   973  			return C_DCON20S_20 // lu12i.w + lu32i.d
   974  		}
   975  		return C_DCON32_20 // lu12i.w + lu32i.d + lu52i.d
   976  	}
   977  	if lo12 == ST1 || lo12 == ALL1 {
   978  		if lo20 == ALL1 {
   979  			if hi20 == ALL1 {
   980  				return C_DCON12_12S // addi.d + lu52i.d
   981  			}
   982  			if (hi20 == ST1 && hi12 == ALL1) || ((hi20 == ST0 || hi20 == ALL0) && hi12 == ALL0) {
   983  				return C_DCON20S_12S // addi.w + lu32i.d
   984  			}
   985  			return C_DCON32_12S // addi.w + lu32i.d + lu52i.d
   986  		}
   987  		if lo20 == ST1 {
   988  			if hi20 == ALL1 {
   989  
   990  				return C_DCON12_32S // lu12i.w + ori + lu52i.d
   991  			}
   992  			if (hi20 == ST1 && hi12 == ALL1) || ((hi20 == ST0 || hi20 == ALL0) && hi12 == ALL0) {
   993  				return C_DCON20S_32 // lu12i.w + ori + lu32i.d
   994  			}
   995  			return C_DCON // lu12i.w + ori + lu32i.d + lu52i.d
   996  		}
   997  		if lo20 == ALL0 {
   998  			if hi20 == ALL0 {
   999  				return C_DCON12_12U // ori + lu52i.d
  1000  			}
  1001  			if ((hi20 == ST1 || hi20 == ALL1) && hi12 == ALL1) || (hi20 == ST0 && hi12 == ALL0) {
  1002  				return C_DCON20S_12U // ori + lu32i.d
  1003  			}
  1004  			return C_DCON32_12U // ori + lu32i.d + lu52i.d
  1005  		}
  1006  		if hi20 == ALL0 {
  1007  			return C_DCON12_32S // lu12i.w + ori + lu52i.d
  1008  		}
  1009  		if ((hi20 == ST1 || hi20 == ALL1) && hi12 == ALL1) || (hi20 == ST0 && hi12 == ALL0) {
  1010  			return C_DCON20S_32 // lu12i.w + ori + lu32i.d
  1011  		}
  1012  		return C_DCON // lu12i.w + ori + lu32i.d + lu52i.d
  1013  	}
  1014  	if lo20 == ALL0 {
  1015  		if hi20 == ALL0 {
  1016  			return C_DCON12_12U // ori + lu52i.d
  1017  		}
  1018  		if ((hi20 == ST1 || hi20 == ALL1) && hi12 == ALL1) || (hi20 == ST0 && hi12 == ALL0) {
  1019  			return C_DCON20S_12U // ori + lu32i.d
  1020  		}
  1021  		return C_DCON32_12U // ori + lu32i.d + lu52i.d
  1022  	}
  1023  	if lo20 == ST1 || lo20 == ALL1 {
  1024  		if hi20 == ALL1 {
  1025  			return C_DCON12_32S // lu12i.w + ori + lu52i.d
  1026  		}
  1027  		if (hi20 == ST1 && hi12 == ALL1) || ((hi20 == ST0 || hi20 == ALL0) && hi12 == ALL0) {
  1028  			return C_DCON20S_32 // lu12i.w + ori + lu32i.d
  1029  		}
  1030  		return C_DCON
  1031  	}
  1032  	if hi20 == ALL0 {
  1033  		return C_DCON12_32S // lu12i.w + ori + lu52i.d
  1034  	}
  1035  	if ((hi20 == ST1 || hi20 == ALL1) && hi12 == ALL1) || (hi20 == ST0 && hi12 == ALL0) {
  1036  		return C_DCON20S_32 // lu12i.w + ori + lu32i.d
  1037  	}
  1038  	return C_DCON
  1039  }
  1040  
  1041  // In Loong64,there are 8 CFRs, denoted as fcc0-fcc7.
  1042  // There are 4 FCSRs, denoted as fcsr0-fcsr3.
  1043  func (c *ctxt0) rclass(r int16) int {
  1044  	switch {
  1045  	case REG_R0 <= r && r <= REG_R31:
  1046  		return C_REG
  1047  	case REG_F0 <= r && r <= REG_F31:
  1048  		return C_FREG
  1049  	case REG_FCC0 <= r && r <= REG_FCC7:
  1050  		return C_FCCREG
  1051  	case REG_FCSR0 <= r && r <= REG_FCSR3:
  1052  		return C_FCSRREG
  1053  	case REG_V0 <= r && r <= REG_V31:
  1054  		return C_VREG
  1055  	case REG_X0 <= r && r <= REG_X31:
  1056  		return C_XREG
  1057  	case r >= REG_ARNG && r < REG_ELEM:
  1058  		return C_ARNG
  1059  	case r >= REG_ELEM && r < REG_ELEM_END:
  1060  		return C_ELEM
  1061  	}
  1062  
  1063  	return C_GOK
  1064  }
  1065  
  1066  func oclass(a *obj.Addr) int {
  1067  	return int(a.Class) - 1
  1068  }
  1069  
  1070  func prasm(p *obj.Prog) {
  1071  	fmt.Printf("%v\n", p)
  1072  }
  1073  
  1074  func (c *ctxt0) oplook(p *obj.Prog) *Optab {
  1075  	if oprange[AOR&obj.AMask] == nil {
  1076  		c.ctxt.Diag("loong64 ops not initialized, call loong64.buildop first")
  1077  	}
  1078  
  1079  	restArgsIndex := 0
  1080  	restArgsLen := len(p.RestArgs)
  1081  	if restArgsLen > 2 {
  1082  		c.ctxt.Diag("too many RestArgs: got %v, maximum is 2\n", restArgsLen)
  1083  		return nil
  1084  	}
  1085  
  1086  	restArgsv := [2]int{C_NONE + 1, C_NONE + 1}
  1087  	for i, ap := range p.RestArgs {
  1088  		restArgsv[i] = int(ap.Addr.Class)
  1089  		if restArgsv[i] == 0 {
  1090  			restArgsv[i] = c.aclass(&ap.Addr) + 1
  1091  			ap.Addr.Class = int8(restArgsv[i])
  1092  		}
  1093  	}
  1094  
  1095  	a1 := int(p.Optab)
  1096  	if a1 != 0 {
  1097  		return &optab[a1-1]
  1098  	}
  1099  
  1100  	// first source operand
  1101  	a1 = int(p.From.Class)
  1102  	if a1 == 0 {
  1103  		a1 = c.aclass(&p.From) + 1
  1104  		p.From.Class = int8(a1)
  1105  	}
  1106  	a1--
  1107  
  1108  	// first destination operand
  1109  	a4 := int(p.To.Class)
  1110  	if a4 == 0 {
  1111  		a4 = c.aclass(&p.To) + 1
  1112  		p.To.Class = int8(a4)
  1113  	}
  1114  	a4--
  1115  
  1116  	// 2nd source operand
  1117  	a2 := C_NONE
  1118  	if p.Reg != 0 {
  1119  		a2 = c.rclass(p.Reg)
  1120  	} else if restArgsLen > 0 {
  1121  		a2 = restArgsv[restArgsIndex] - 1
  1122  		restArgsIndex++
  1123  	}
  1124  
  1125  	// 2nd destination operand
  1126  	a5 := C_NONE
  1127  	if p.RegTo2 != 0 {
  1128  		a5 = C_REG
  1129  	}
  1130  
  1131  	// 3rd source operand
  1132  	a3 := C_NONE
  1133  	if restArgsLen > 0 && restArgsIndex < restArgsLen {
  1134  		a3 = restArgsv[restArgsIndex] - 1
  1135  		restArgsIndex++
  1136  	}
  1137  
  1138  	ops := oprange[p.As&obj.AMask]
  1139  	c1 := &xcmp[a1]
  1140  	c2 := &xcmp[a2]
  1141  	c3 := &xcmp[a3]
  1142  	c4 := &xcmp[a4]
  1143  	c5 := &xcmp[a5]
  1144  	for i := range ops {
  1145  		op := &ops[i]
  1146  		if c1[op.from1] && c2[op.reg] && c3[op.from3] && c4[op.to1] && c5[op.to2] {
  1147  			p.Optab = uint16(cap(optab) - cap(ops) + i + 1)
  1148  			return op
  1149  		}
  1150  	}
  1151  
  1152  	c.ctxt.Diag("illegal combination %v %v %v %v %v %v", p.As, DRconv(a1), DRconv(a2), DRconv(a3), DRconv(a4), DRconv(a5))
  1153  	prasm(p)
  1154  	// Turn illegal instruction into an UNDEF, avoid crashing in asmout.
  1155  	return &Optab{obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0}
  1156  }
  1157  
  1158  func cmp(a int, b int) bool {
  1159  	if a == b {
  1160  		return true
  1161  	}
  1162  	switch a {
  1163  	case C_DCON:
  1164  		return cmp(C_32CON, b) || cmp(C_DCON12_20S, b) || cmp(C_DCON32_12S, b) || b == C_DCON12_0
  1165  	case C_32CON:
  1166  		return cmp(C_32CON20_0, b) || cmp(C_U15CON, b) || cmp(C_13CON, b) || cmp(C_12CON, b)
  1167  	case C_32CON20_0:
  1168  		return b == C_U15CON20_0 || b == C_U13CON20_0 || b == C_S13CON20_0 || b == C_ZCON
  1169  	case C_U15CON:
  1170  		return cmp(C_U12CON, b) || b == C_U15CON20_0 || b == C_U13CON20_0 || b == C_U13CON
  1171  	case C_13CON:
  1172  		return cmp(C_U13CON, b) || cmp(C_S13CON, b)
  1173  	case C_U13CON:
  1174  		return cmp(C_12CON, b) || b == C_U13CON20_0
  1175  	case C_S13CON:
  1176  		return cmp(C_12CON, b) || b == C_S13CON20_0
  1177  	case C_12CON:
  1178  		return cmp(C_U12CON, b) || cmp(C_S12CON, b)
  1179  	case C_UU12CON:
  1180  		return cmp(C_U12CON, b)
  1181  	case C_U12CON:
  1182  		return cmp(C_U8CON, b) || b == C_US12CON
  1183  	case C_U8CON:
  1184  		return cmp(C_U7CON, b)
  1185  	case C_U7CON:
  1186  		return cmp(C_U6CON, b)
  1187  	case C_U6CON:
  1188  		return cmp(C_U5CON, b)
  1189  	case C_U5CON:
  1190  		return cmp(C_U4CON, b)
  1191  	case C_U4CON:
  1192  		return cmp(C_U3CON, b)
  1193  	case C_U3CON:
  1194  		return cmp(C_U2CON, b)
  1195  	case C_U2CON:
  1196  		return cmp(C_U1CON, b)
  1197  	case C_U1CON:
  1198  		return cmp(C_ZCON, b)
  1199  	case C_US12CON:
  1200  		return cmp(C_S12CON, b)
  1201  	case C_S12CON:
  1202  		return cmp(C_S5CON, b) || cmp(C_U8CON, b) || b == C_US12CON
  1203  	case C_S5CON:
  1204  		return cmp(C_ZCON, b) || cmp(C_U4CON, b)
  1205  
  1206  	case C_DCON12_20S:
  1207  		if b == C_DCON20S_20 || b == C_DCON12_12S ||
  1208  			b == C_DCON20S_12S || b == C_DCON12_12U ||
  1209  			b == C_DCON20S_12U || b == C_DCON20S_0 {
  1210  			return true
  1211  		}
  1212  
  1213  	case C_DCON32_12S:
  1214  		if b == C_DCON32_20 || b == C_DCON12_32S ||
  1215  			b == C_DCON20S_32 || b == C_DCON32_12U ||
  1216  			b == C_DCON32_0 {
  1217  			return true
  1218  		}
  1219  
  1220  	case C_LACON:
  1221  		return b == C_SACON
  1222  
  1223  	case C_LAUTO:
  1224  		return b == C_SAUTO
  1225  
  1226  	case C_REG:
  1227  		return b == C_ZCON
  1228  
  1229  	case C_LOREG_64:
  1230  		if b == C_ZOREG || b == C_SOREG_8 ||
  1231  			b == C_SOREG_9 || b == C_SOREG_10 ||
  1232  			b == C_SOREG_11 || b == C_SOREG_12 ||
  1233  			b == C_SOREG_16 || b == C_LOREG_32 {
  1234  			return true
  1235  		}
  1236  
  1237  	case C_LOREG_32:
  1238  		return cmp(C_SOREG_16, b)
  1239  
  1240  	case C_SOREG_16:
  1241  		return cmp(C_SOREG_12, b)
  1242  
  1243  	case C_SOREG_12:
  1244  		return cmp(C_SOREG_11, b)
  1245  
  1246  	case C_SOREG_11:
  1247  		return cmp(C_SOREG_10, b)
  1248  
  1249  	case C_SOREG_10:
  1250  		return cmp(C_SOREG_9, b)
  1251  
  1252  	case C_SOREG_9:
  1253  		return cmp(C_SOREG_8, b)
  1254  
  1255  	case C_SOREG_8:
  1256  		return b == C_ZOREG
  1257  	}
  1258  
  1259  	return false
  1260  }
  1261  
  1262  func ocmp(p1, p2 Optab) int {
  1263  	if p1.as != p2.as {
  1264  		return int(p1.as) - int(p2.as)
  1265  	}
  1266  	if p1.from1 != p2.from1 {
  1267  		return int(p1.from1) - int(p2.from1)
  1268  	}
  1269  	if p1.reg != p2.reg {
  1270  		return int(p1.reg) - int(p2.reg)
  1271  	}
  1272  	if p1.to1 != p2.to1 {
  1273  		return int(p1.to1) - int(p2.to1)
  1274  	}
  1275  	return 0
  1276  }
  1277  
  1278  func opset(a, b0 obj.As) {
  1279  	oprange[a&obj.AMask] = oprange[b0]
  1280  }
  1281  
  1282  func buildop(ctxt *obj.Link) {
  1283  	if ctxt.DiagFunc == nil {
  1284  		ctxt.DiagFunc = func(format string, args ...any) {
  1285  			log.Printf(format, args...)
  1286  		}
  1287  	}
  1288  
  1289  	if oprange[AOR&obj.AMask] != nil {
  1290  		// Already initialized; stop now.
  1291  		// This happens in the cmd/asm tests,
  1292  		// each of which re-initializes the arch.
  1293  		return
  1294  	}
  1295  
  1296  	for i := range C_NCLASS {
  1297  		for j := range C_NCLASS {
  1298  			if cmp(j, i) {
  1299  				xcmp[i][j] = true
  1300  			}
  1301  		}
  1302  	}
  1303  
  1304  	slices.SortFunc(optab, ocmp)
  1305  	for i := 0; i < len(optab); i++ {
  1306  		as, start := optab[i].as, i
  1307  		for ; i < len(optab)-1; i++ {
  1308  			if optab[i+1].as != as {
  1309  				break
  1310  			}
  1311  		}
  1312  		r0 := as & obj.AMask
  1313  		oprange[r0] = optab[start : i+1]
  1314  		switch as {
  1315  		default:
  1316  			ctxt.Diag("unknown op in build: %v", as)
  1317  			ctxt.DiagFlush()
  1318  			log.Fatalf("bad code")
  1319  
  1320  		case AABSF:
  1321  			opset(AMOVFD, r0)
  1322  			opset(AMOVDF, r0)
  1323  			opset(AMOVWF, r0)
  1324  			opset(AMOVFW, r0)
  1325  			opset(AMOVWD, r0)
  1326  			opset(AMOVDW, r0)
  1327  			opset(AMOVVF, r0)
  1328  			opset(AMOVVD, r0)
  1329  			opset(AMOVFV, r0)
  1330  			opset(AMOVDV, r0)
  1331  			opset(AFFINTFW, r0)
  1332  			opset(AFFINTFV, r0)
  1333  			opset(AFFINTDW, r0)
  1334  			opset(AFFINTDV, r0)
  1335  			opset(AFTINTWF, r0)
  1336  			opset(AFTINTWD, r0)
  1337  			opset(AFTINTVF, r0)
  1338  			opset(AFTINTVD, r0)
  1339  			opset(AFRINTF, r0)
  1340  			opset(AFRINTD, r0)
  1341  			opset(ANEGF, r0)
  1342  			opset(ANEGD, r0)
  1343  			opset(AABSD, r0)
  1344  			opset(ATRUNCDW, r0)
  1345  			opset(ATRUNCFW, r0)
  1346  			opset(ASQRTF, r0)
  1347  			opset(ASQRTD, r0)
  1348  			opset(AFCLASSF, r0)
  1349  			opset(AFCLASSD, r0)
  1350  			opset(AFLOGBF, r0)
  1351  			opset(AFLOGBD, r0)
  1352  			opset(ATRUNCDV, r0)
  1353  			opset(ATRUNCFV, r0)
  1354  			opset(AFTINTRPWF, r0)
  1355  			opset(AFTINTRPWD, r0)
  1356  			opset(AFTINTRPVF, r0)
  1357  			opset(AFTINTRPVD, r0)
  1358  			opset(AFTINTRMWF, r0)
  1359  			opset(AFTINTRMWD, r0)
  1360  			opset(AFTINTRMVF, r0)
  1361  			opset(AFTINTRMVD, r0)
  1362  			opset(AFTINTRZWF, r0)
  1363  			opset(AFTINTRZWD, r0)
  1364  			opset(AFTINTRZVF, r0)
  1365  			opset(AFTINTRZVD, r0)
  1366  			opset(AFTINTRNEWF, r0)
  1367  			opset(AFTINTRNEWD, r0)
  1368  			opset(AFTINTRNEVF, r0)
  1369  			opset(AFTINTRNEVD, r0)
  1370  
  1371  		case AADD:
  1372  			opset(AADDW, r0)
  1373  			opset(ASGT, r0)
  1374  			opset(ASGTU, r0)
  1375  
  1376  		case AADDV:
  1377  			opset(AADDVU, r0)
  1378  
  1379  		case AADDF:
  1380  			opset(ADIVF, r0)
  1381  			opset(ADIVD, r0)
  1382  			opset(AMULF, r0)
  1383  			opset(AMULD, r0)
  1384  			opset(ASUBF, r0)
  1385  			opset(ASUBD, r0)
  1386  			opset(AADDD, r0)
  1387  			opset(AFMINF, r0)
  1388  			opset(AFMIND, r0)
  1389  			opset(AFMAXF, r0)
  1390  			opset(AFMAXD, r0)
  1391  			opset(AFCOPYSGF, r0)
  1392  			opset(AFCOPYSGD, r0)
  1393  			opset(AFSCALEBF, r0)
  1394  			opset(AFSCALEBD, r0)
  1395  			opset(AFMAXAF, r0)
  1396  			opset(AFMAXAD, r0)
  1397  			opset(AFMINAF, r0)
  1398  			opset(AFMINAD, r0)
  1399  
  1400  		case AFMADDF:
  1401  			opset(AFMADDD, r0)
  1402  			opset(AFMSUBF, r0)
  1403  			opset(AFMSUBD, r0)
  1404  			opset(AFNMADDF, r0)
  1405  			opset(AFNMADDD, r0)
  1406  			opset(AFNMSUBF, r0)
  1407  			opset(AFNMSUBD, r0)
  1408  
  1409  		case AAND:
  1410  			opset(AOR, r0)
  1411  			opset(AXOR, r0)
  1412  			opset(AORN, r0)
  1413  			opset(AANDN, r0)
  1414  
  1415  		case ABEQ:
  1416  			opset(ABNE, r0)
  1417  			opset(ABLT, r0)
  1418  			opset(ABGE, r0)
  1419  			opset(ABGEU, r0)
  1420  			opset(ABLTU, r0)
  1421  
  1422  		case ABLEZ:
  1423  			opset(ABGEZ, r0)
  1424  			opset(ABLTZ, r0)
  1425  			opset(ABGTZ, r0)
  1426  
  1427  		case AMOVB:
  1428  			opset(AMOVH, r0)
  1429  
  1430  		case AMOVBU:
  1431  			opset(AMOVHU, r0)
  1432  			opset(AMOVWU, r0)
  1433  
  1434  		case AMOVWP:
  1435  			opset(AMOVVP, r0)
  1436  			opset(ASC, r0)
  1437  			opset(ASCW, r0)
  1438  			opset(ASCV, r0)
  1439  			opset(ALL, r0)
  1440  			opset(ALLW, r0)
  1441  			opset(ALLV, r0)
  1442  
  1443  		case ASLL:
  1444  			opset(ASRL, r0)
  1445  			opset(ASRA, r0)
  1446  			opset(AROTR, r0)
  1447  
  1448  		case ASLLV:
  1449  			opset(ASRAV, r0)
  1450  			opset(ASRLV, r0)
  1451  			opset(AROTRV, r0)
  1452  
  1453  		case ABSTRPICKW:
  1454  			opset(ABSTRPICKV, r0)
  1455  			opset(ABSTRINSW, r0)
  1456  			opset(ABSTRINSV, r0)
  1457  
  1458  		case ASUB:
  1459  			opset(ASUBW, r0)
  1460  			opset(ANOR, r0)
  1461  			opset(ASUBV, r0)
  1462  			opset(ASUBVU, r0)
  1463  			opset(AMUL, r0)
  1464  			opset(AMULW, r0)
  1465  			opset(AMULH, r0)
  1466  			opset(AMULHU, r0)
  1467  			opset(AREM, r0)
  1468  			opset(AREMW, r0)
  1469  			opset(AREMU, r0)
  1470  			opset(AREMWU, r0)
  1471  			opset(ADIV, r0)
  1472  			opset(ADIVW, r0)
  1473  			opset(ADIVU, r0)
  1474  			opset(ADIVWU, r0)
  1475  			opset(AMULV, r0)
  1476  			opset(AMULVU, r0)
  1477  			opset(AMULHV, r0)
  1478  			opset(AMULHVU, r0)
  1479  			opset(AREMV, r0)
  1480  			opset(AREMVU, r0)
  1481  			opset(ADIVV, r0)
  1482  			opset(ADIVVU, r0)
  1483  			opset(AMULWVW, r0)
  1484  			opset(AMULWVWU, r0)
  1485  
  1486  		case ASYSCALL:
  1487  			opset(ADBAR, r0)
  1488  			opset(ABREAK, r0)
  1489  
  1490  		case ACMPEQF:
  1491  			opset(ACMPGTF, r0)
  1492  			opset(ACMPGTD, r0)
  1493  			opset(ACMPGEF, r0)
  1494  			opset(ACMPGED, r0)
  1495  			opset(ACMPEQD, r0)
  1496  
  1497  		case ABFPT:
  1498  			opset(ABFPF, r0)
  1499  
  1500  		case AALSLV:
  1501  			opset(AALSLW, r0)
  1502  			opset(AALSLWU, r0)
  1503  
  1504  		case ANEGW:
  1505  			opset(ANEGV, r0)
  1506  
  1507  		case AMOVF:
  1508  			opset(AMOVD, r0)
  1509  
  1510  		case AMOVW,
  1511  			AMOVV,
  1512  			ARFE,
  1513  			AJAL,
  1514  			AJMP,
  1515  			AVMOVQ,
  1516  			AXVMOVQ,
  1517  			AVSHUFB,
  1518  			AXVSHUFB,
  1519  			AWORD,
  1520  			APRELD,
  1521  			APRELDX,
  1522  			AFSEL,
  1523  			AADDV16,
  1524  			ASCQ,
  1525  			obj.ANOP,
  1526  			obj.ATEXT,
  1527  			obj.AFUNCDATA,
  1528  			obj.APCALIGN,
  1529  			obj.APCDATA:
  1530  			break
  1531  
  1532  		case ARDTIMELW:
  1533  			opset(ARDTIMEHW, r0)
  1534  			opset(ARDTIMED, r0)
  1535  
  1536  		case ACLOW:
  1537  			opset(ACLZW, r0)
  1538  			opset(ACTOW, r0)
  1539  			opset(ACTZW, r0)
  1540  			opset(ACLOV, r0)
  1541  			opset(ACLZV, r0)
  1542  			opset(ACTOV, r0)
  1543  			opset(ACTZV, r0)
  1544  			opset(AREVB2H, r0)
  1545  			opset(AREVB4H, r0)
  1546  			opset(AREVB2W, r0)
  1547  			opset(AREVBV, r0)
  1548  			opset(AREVH2W, r0)
  1549  			opset(AREVHV, r0)
  1550  			opset(ABITREV4B, r0)
  1551  			opset(ABITREV8B, r0)
  1552  			opset(ABITREVW, r0)
  1553  			opset(ABITREVV, r0)
  1554  			opset(AEXTWB, r0)
  1555  			opset(AEXTWH, r0)
  1556  			opset(ACPUCFG, r0)
  1557  
  1558  		case ATEQ:
  1559  			opset(ATNE, r0)
  1560  
  1561  		case AMASKEQZ:
  1562  			opset(AMASKNEZ, r0)
  1563  			opset(ACRCWBW, r0)
  1564  			opset(ACRCWHW, r0)
  1565  			opset(ACRCWWW, r0)
  1566  			opset(ACRCWVW, r0)
  1567  			opset(ACRCCWBW, r0)
  1568  			opset(ACRCCWHW, r0)
  1569  			opset(ACRCCWWW, r0)
  1570  			opset(ACRCCWVW, r0)
  1571  
  1572  		case ANOOP:
  1573  			opset(obj.AUNDEF, r0)
  1574  
  1575  		case AAMSWAPW:
  1576  			for i := range atomicInst {
  1577  				if i == AAMSWAPW {
  1578  					continue
  1579  				}
  1580  				opset(i, r0)
  1581  			}
  1582  
  1583  		case ALLACQW:
  1584  			opset(ALLACQV, r0)
  1585  
  1586  		case ASCRELW:
  1587  			opset(ASCRELV, r0)
  1588  
  1589  		// vseq.b vd, vj, vk
  1590  		// vseqi.b vd, vj, si5
  1591  		case AVSEQB:
  1592  			opset(AVSEQH, r0)
  1593  			opset(AVSEQW, r0)
  1594  			opset(AVSEQV, r0)
  1595  			opset(AVSLTB, r0)
  1596  			opset(AVSLTH, r0)
  1597  			opset(AVSLTW, r0)
  1598  			opset(AVSLTV, r0)
  1599  
  1600  		// xvseq.b xd, xj, xk
  1601  		// xvseqi.b xd, xj, si5
  1602  		case AXVSEQB:
  1603  			opset(AXVSEQH, r0)
  1604  			opset(AXVSEQW, r0)
  1605  			opset(AXVSEQV, r0)
  1606  			opset(AXVSLTB, r0)
  1607  			opset(AXVSLTH, r0)
  1608  			opset(AXVSLTW, r0)
  1609  			opset(AXVSLTV, r0)
  1610  
  1611  		// vslt.bu vd, vj, vk
  1612  		// vslti.bu vd, vj, ui5
  1613  		case AVSLTBU:
  1614  			opset(AVSLTHU, r0)
  1615  			opset(AVSLTWU, r0)
  1616  			opset(AVSLTVU, r0)
  1617  
  1618  		// xvslt.bu xd, xj, xk
  1619  		// xvslti.bu xd, xj, ui5
  1620  		case AXVSLTBU:
  1621  			opset(AXVSLTHU, r0)
  1622  			opset(AXVSLTWU, r0)
  1623  			opset(AXVSLTVU, r0)
  1624  
  1625  		// vandi.b vd, vj, ui8
  1626  		case AVANDB:
  1627  			opset(AVORB, r0)
  1628  			opset(AVXORB, r0)
  1629  			opset(AVNORB, r0)
  1630  			opset(AVSHUF4IB, r0)
  1631  			opset(AVSHUF4IH, r0)
  1632  			opset(AVSHUF4IW, r0)
  1633  			opset(AVSHUF4IV, r0)
  1634  			opset(AVPERMIW, r0)
  1635  			opset(AVEXTRINSB, r0)
  1636  			opset(AVEXTRINSH, r0)
  1637  			opset(AVEXTRINSW, r0)
  1638  			opset(AVEXTRINSV, r0)
  1639  
  1640  		// xvandi.b xd, xj, ui8
  1641  		case AXVANDB:
  1642  			opset(AXVORB, r0)
  1643  			opset(AXVXORB, r0)
  1644  			opset(AXVNORB, r0)
  1645  			opset(AXVSHUF4IB, r0)
  1646  			opset(AXVSHUF4IH, r0)
  1647  			opset(AXVSHUF4IW, r0)
  1648  			opset(AXVSHUF4IV, r0)
  1649  			opset(AXVPERMIW, r0)
  1650  			opset(AXVPERMIV, r0)
  1651  			opset(AXVPERMIQ, r0)
  1652  			opset(AXVEXTRINSB, r0)
  1653  			opset(AXVEXTRINSH, r0)
  1654  			opset(AXVEXTRINSW, r0)
  1655  			opset(AXVEXTRINSV, r0)
  1656  
  1657  		// vadd.b vd, vj, vk
  1658  		case AVADDB:
  1659  			opset(AVADDH, r0)
  1660  			opset(AVADDW, r0)
  1661  			opset(AVADDV, r0)
  1662  			opset(AVADDQ, r0)
  1663  			opset(AVSUBB, r0)
  1664  			opset(AVSUBH, r0)
  1665  			opset(AVSUBW, r0)
  1666  			opset(AVSUBV, r0)
  1667  			opset(AVSUBQ, r0)
  1668  			opset(AVSADDB, r0)
  1669  			opset(AVSADDH, r0)
  1670  			opset(AVSADDW, r0)
  1671  			opset(AVSADDV, r0)
  1672  			opset(AVSSUBB, r0)
  1673  			opset(AVSSUBH, r0)
  1674  			opset(AVSSUBW, r0)
  1675  			opset(AVSSUBV, r0)
  1676  			opset(AVSADDBU, r0)
  1677  			opset(AVSADDHU, r0)
  1678  			opset(AVSADDWU, r0)
  1679  			opset(AVSADDVU, r0)
  1680  			opset(AVSSUBBU, r0)
  1681  			opset(AVSSUBHU, r0)
  1682  			opset(AVSSUBWU, r0)
  1683  			opset(AVSSUBVU, r0)
  1684  			opset(AVANDV, r0)
  1685  			opset(AVORV, r0)
  1686  			opset(AVXORV, r0)
  1687  			opset(AVNORV, r0)
  1688  			opset(AVANDNV, r0)
  1689  			opset(AVORNV, r0)
  1690  			opset(AVILVLB, r0)
  1691  			opset(AVILVLH, r0)
  1692  			opset(AVILVLW, r0)
  1693  			opset(AVILVLV, r0)
  1694  			opset(AVILVHB, r0)
  1695  			opset(AVILVHH, r0)
  1696  			opset(AVILVHW, r0)
  1697  			opset(AVILVHV, r0)
  1698  			opset(AVMULB, r0)
  1699  			opset(AVMULH, r0)
  1700  			opset(AVMULW, r0)
  1701  			opset(AVMULV, r0)
  1702  			opset(AVMUHB, r0)
  1703  			opset(AVMUHH, r0)
  1704  			opset(AVMUHW, r0)
  1705  			opset(AVMUHV, r0)
  1706  			opset(AVMUHBU, r0)
  1707  			opset(AVMUHHU, r0)
  1708  			opset(AVMUHWU, r0)
  1709  			opset(AVMUHVU, r0)
  1710  			opset(AVDIVB, r0)
  1711  			opset(AVDIVH, r0)
  1712  			opset(AVDIVW, r0)
  1713  			opset(AVDIVV, r0)
  1714  			opset(AVMODB, r0)
  1715  			opset(AVMODH, r0)
  1716  			opset(AVMODW, r0)
  1717  			opset(AVMODV, r0)
  1718  			opset(AVDIVBU, r0)
  1719  			opset(AVDIVHU, r0)
  1720  			opset(AVDIVWU, r0)
  1721  			opset(AVDIVVU, r0)
  1722  			opset(AVMODBU, r0)
  1723  			opset(AVMODHU, r0)
  1724  			opset(AVMODWU, r0)
  1725  			opset(AVMODVU, r0)
  1726  			opset(AVMULWEVHB, r0)
  1727  			opset(AVMULWEVWH, r0)
  1728  			opset(AVMULWEVVW, r0)
  1729  			opset(AVMULWEVQV, r0)
  1730  			opset(AVMULWODHB, r0)
  1731  			opset(AVMULWODWH, r0)
  1732  			opset(AVMULWODVW, r0)
  1733  			opset(AVMULWODQV, r0)
  1734  			opset(AVMULWEVHBU, r0)
  1735  			opset(AVMULWEVWHU, r0)
  1736  			opset(AVMULWEVVWU, r0)
  1737  			opset(AVMULWEVQVU, r0)
  1738  			opset(AVMULWODHBU, r0)
  1739  			opset(AVMULWODWHU, r0)
  1740  			opset(AVMULWODVWU, r0)
  1741  			opset(AVMULWODQVU, r0)
  1742  			opset(AVMULWEVHBUB, r0)
  1743  			opset(AVMULWEVWHUH, r0)
  1744  			opset(AVMULWEVVWUW, r0)
  1745  			opset(AVMULWEVQVUV, r0)
  1746  			opset(AVMULWODHBUB, r0)
  1747  			opset(AVMULWODWHUH, r0)
  1748  			opset(AVMULWODVWUW, r0)
  1749  			opset(AVMULWODQVUV, r0)
  1750  			opset(AVADDF, r0)
  1751  			opset(AVADDD, r0)
  1752  			opset(AVSUBF, r0)
  1753  			opset(AVSUBD, r0)
  1754  			opset(AVMULF, r0)
  1755  			opset(AVMULD, r0)
  1756  			opset(AVDIVF, r0)
  1757  			opset(AVDIVD, r0)
  1758  			opset(AVSHUFH, r0)
  1759  			opset(AVSHUFW, r0)
  1760  			opset(AVSHUFV, r0)
  1761  			opset(AVADDWEVHB, r0)
  1762  			opset(AVADDWEVWH, r0)
  1763  			opset(AVADDWEVVW, r0)
  1764  			opset(AVADDWEVQV, r0)
  1765  			opset(AVSUBWEVHB, r0)
  1766  			opset(AVSUBWEVWH, r0)
  1767  			opset(AVSUBWEVVW, r0)
  1768  			opset(AVSUBWEVQV, r0)
  1769  			opset(AVADDWODHB, r0)
  1770  			opset(AVADDWODWH, r0)
  1771  			opset(AVADDWODVW, r0)
  1772  			opset(AVADDWODQV, r0)
  1773  			opset(AVSUBWODHB, r0)
  1774  			opset(AVSUBWODWH, r0)
  1775  			opset(AVSUBWODVW, r0)
  1776  			opset(AVSUBWODQV, r0)
  1777  			opset(AVADDWEVHBU, r0)
  1778  			opset(AVADDWEVWHU, r0)
  1779  			opset(AVADDWEVVWU, r0)
  1780  			opset(AVADDWEVQVU, r0)
  1781  			opset(AVSUBWEVHBU, r0)
  1782  			opset(AVSUBWEVWHU, r0)
  1783  			opset(AVSUBWEVVWU, r0)
  1784  			opset(AVSUBWEVQVU, r0)
  1785  			opset(AVADDWODHBU, r0)
  1786  			opset(AVADDWODWHU, r0)
  1787  			opset(AVADDWODVWU, r0)
  1788  			opset(AVADDWODQVU, r0)
  1789  			opset(AVSUBWODHBU, r0)
  1790  			opset(AVSUBWODWHU, r0)
  1791  			opset(AVSUBWODVWU, r0)
  1792  			opset(AVSUBWODQVU, r0)
  1793  			opset(AVMADDB, r0)
  1794  			opset(AVMADDH, r0)
  1795  			opset(AVMADDW, r0)
  1796  			opset(AVMADDV, r0)
  1797  			opset(AVMSUBB, r0)
  1798  			opset(AVMSUBH, r0)
  1799  			opset(AVMSUBW, r0)
  1800  			opset(AVMSUBV, r0)
  1801  			opset(AVMADDWEVHB, r0)
  1802  			opset(AVMADDWEVWH, r0)
  1803  			opset(AVMADDWEVVW, r0)
  1804  			opset(AVMADDWEVQV, r0)
  1805  			opset(AVMADDWODHB, r0)
  1806  			opset(AVMADDWODWH, r0)
  1807  			opset(AVMADDWODVW, r0)
  1808  			opset(AVMADDWODQV, r0)
  1809  			opset(AVMADDWEVHBU, r0)
  1810  			opset(AVMADDWEVWHU, r0)
  1811  			opset(AVMADDWEVVWU, r0)
  1812  			opset(AVMADDWEVQVU, r0)
  1813  			opset(AVMADDWODHBU, r0)
  1814  			opset(AVMADDWODWHU, r0)
  1815  			opset(AVMADDWODVWU, r0)
  1816  			opset(AVMADDWODQVU, r0)
  1817  			opset(AVMADDWEVHBUB, r0)
  1818  			opset(AVMADDWEVWHUH, r0)
  1819  			opset(AVMADDWEVVWUW, r0)
  1820  			opset(AVMADDWEVQVUV, r0)
  1821  			opset(AVMADDWODHBUB, r0)
  1822  			opset(AVMADDWODWHUH, r0)
  1823  			opset(AVMADDWODVWUW, r0)
  1824  			opset(AVMADDWODQVUV, r0)
  1825  
  1826  		// xvadd.b xd, xj, xk
  1827  		case AXVADDB:
  1828  			opset(AXVADDH, r0)
  1829  			opset(AXVADDW, r0)
  1830  			opset(AXVADDV, r0)
  1831  			opset(AXVADDQ, r0)
  1832  			opset(AXVSUBB, r0)
  1833  			opset(AXVSUBH, r0)
  1834  			opset(AXVSUBW, r0)
  1835  			opset(AXVSUBV, r0)
  1836  			opset(AXVSUBQ, r0)
  1837  			opset(AXVSADDB, r0)
  1838  			opset(AXVSADDH, r0)
  1839  			opset(AXVSADDW, r0)
  1840  			opset(AXVSADDV, r0)
  1841  			opset(AXVSSUBB, r0)
  1842  			opset(AXVSSUBH, r0)
  1843  			opset(AXVSSUBW, r0)
  1844  			opset(AXVSSUBV, r0)
  1845  			opset(AXVSADDBU, r0)
  1846  			opset(AXVSADDHU, r0)
  1847  			opset(AXVSADDWU, r0)
  1848  			opset(AXVSADDVU, r0)
  1849  			opset(AXVSSUBBU, r0)
  1850  			opset(AXVSSUBHU, r0)
  1851  			opset(AXVSSUBWU, r0)
  1852  			opset(AXVSSUBVU, r0)
  1853  			opset(AXVANDV, r0)
  1854  			opset(AXVORV, r0)
  1855  			opset(AXVXORV, r0)
  1856  			opset(AXVNORV, r0)
  1857  			opset(AXVANDNV, r0)
  1858  			opset(AXVORNV, r0)
  1859  			opset(AXVILVLB, r0)
  1860  			opset(AXVILVLH, r0)
  1861  			opset(AXVILVLW, r0)
  1862  			opset(AXVILVLV, r0)
  1863  			opset(AXVILVHB, r0)
  1864  			opset(AXVILVHH, r0)
  1865  			opset(AXVILVHW, r0)
  1866  			opset(AXVILVHV, r0)
  1867  			opset(AXVMULB, r0)
  1868  			opset(AXVMULH, r0)
  1869  			opset(AXVMULW, r0)
  1870  			opset(AXVMULV, r0)
  1871  			opset(AXVMUHB, r0)
  1872  			opset(AXVMUHH, r0)
  1873  			opset(AXVMUHW, r0)
  1874  			opset(AXVMUHV, r0)
  1875  			opset(AXVMUHBU, r0)
  1876  			opset(AXVMUHHU, r0)
  1877  			opset(AXVMUHWU, r0)
  1878  			opset(AXVMUHVU, r0)
  1879  			opset(AXVDIVB, r0)
  1880  			opset(AXVDIVH, r0)
  1881  			opset(AXVDIVW, r0)
  1882  			opset(AXVDIVV, r0)
  1883  			opset(AXVMODB, r0)
  1884  			opset(AXVMODH, r0)
  1885  			opset(AXVMODW, r0)
  1886  			opset(AXVMODV, r0)
  1887  			opset(AXVDIVBU, r0)
  1888  			opset(AXVDIVHU, r0)
  1889  			opset(AXVDIVWU, r0)
  1890  			opset(AXVDIVVU, r0)
  1891  			opset(AXVMODBU, r0)
  1892  			opset(AXVMODHU, r0)
  1893  			opset(AXVMODWU, r0)
  1894  			opset(AXVMODVU, r0)
  1895  			opset(AXVMULWEVHB, r0)
  1896  			opset(AXVMULWEVWH, r0)
  1897  			opset(AXVMULWEVVW, r0)
  1898  			opset(AXVMULWEVQV, r0)
  1899  			opset(AXVMULWODHB, r0)
  1900  			opset(AXVMULWODWH, r0)
  1901  			opset(AXVMULWODVW, r0)
  1902  			opset(AXVMULWODQV, r0)
  1903  			opset(AXVMULWEVHBU, r0)
  1904  			opset(AXVMULWEVWHU, r0)
  1905  			opset(AXVMULWEVVWU, r0)
  1906  			opset(AXVMULWEVQVU, r0)
  1907  			opset(AXVMULWODHBU, r0)
  1908  			opset(AXVMULWODWHU, r0)
  1909  			opset(AXVMULWODVWU, r0)
  1910  			opset(AXVMULWODQVU, r0)
  1911  			opset(AXVMULWEVHBUB, r0)
  1912  			opset(AXVMULWEVWHUH, r0)
  1913  			opset(AXVMULWEVVWUW, r0)
  1914  			opset(AXVMULWEVQVUV, r0)
  1915  			opset(AXVMULWODHBUB, r0)
  1916  			opset(AXVMULWODWHUH, r0)
  1917  			opset(AXVMULWODVWUW, r0)
  1918  			opset(AXVMULWODQVUV, r0)
  1919  			opset(AXVADDF, r0)
  1920  			opset(AXVADDD, r0)
  1921  			opset(AXVSUBF, r0)
  1922  			opset(AXVSUBD, r0)
  1923  			opset(AXVMULF, r0)
  1924  			opset(AXVMULD, r0)
  1925  			opset(AXVDIVF, r0)
  1926  			opset(AXVDIVD, r0)
  1927  			opset(AXVSHUFH, r0)
  1928  			opset(AXVSHUFW, r0)
  1929  			opset(AXVSHUFV, r0)
  1930  			opset(AXVADDWEVHB, r0)
  1931  			opset(AXVADDWEVWH, r0)
  1932  			opset(AXVADDWEVVW, r0)
  1933  			opset(AXVADDWEVQV, r0)
  1934  			opset(AXVSUBWEVHB, r0)
  1935  			opset(AXVSUBWEVWH, r0)
  1936  			opset(AXVSUBWEVVW, r0)
  1937  			opset(AXVSUBWEVQV, r0)
  1938  			opset(AXVADDWODHB, r0)
  1939  			opset(AXVADDWODWH, r0)
  1940  			opset(AXVADDWODVW, r0)
  1941  			opset(AXVADDWODQV, r0)
  1942  			opset(AXVSUBWODHB, r0)
  1943  			opset(AXVSUBWODWH, r0)
  1944  			opset(AXVSUBWODVW, r0)
  1945  			opset(AXVSUBWODQV, r0)
  1946  			opset(AXVADDWEVHBU, r0)
  1947  			opset(AXVADDWEVWHU, r0)
  1948  			opset(AXVADDWEVVWU, r0)
  1949  			opset(AXVADDWEVQVU, r0)
  1950  			opset(AXVSUBWEVHBU, r0)
  1951  			opset(AXVSUBWEVWHU, r0)
  1952  			opset(AXVSUBWEVVWU, r0)
  1953  			opset(AXVSUBWEVQVU, r0)
  1954  			opset(AXVADDWODHBU, r0)
  1955  			opset(AXVADDWODWHU, r0)
  1956  			opset(AXVADDWODVWU, r0)
  1957  			opset(AXVADDWODQVU, r0)
  1958  			opset(AXVSUBWODHBU, r0)
  1959  			opset(AXVSUBWODWHU, r0)
  1960  			opset(AXVSUBWODVWU, r0)
  1961  			opset(AXVSUBWODQVU, r0)
  1962  			opset(AXVMADDB, r0)
  1963  			opset(AXVMADDH, r0)
  1964  			opset(AXVMADDW, r0)
  1965  			opset(AXVMADDV, r0)
  1966  			opset(AXVMSUBB, r0)
  1967  			opset(AXVMSUBH, r0)
  1968  			opset(AXVMSUBW, r0)
  1969  			opset(AXVMSUBV, r0)
  1970  			opset(AXVMADDWEVHB, r0)
  1971  			opset(AXVMADDWEVWH, r0)
  1972  			opset(AXVMADDWEVVW, r0)
  1973  			opset(AXVMADDWEVQV, r0)
  1974  			opset(AXVMADDWODHB, r0)
  1975  			opset(AXVMADDWODWH, r0)
  1976  			opset(AXVMADDWODVW, r0)
  1977  			opset(AXVMADDWODQV, r0)
  1978  			opset(AXVMADDWEVHBU, r0)
  1979  			opset(AXVMADDWEVWHU, r0)
  1980  			opset(AXVMADDWEVVWU, r0)
  1981  			opset(AXVMADDWEVQVU, r0)
  1982  			opset(AXVMADDWODHBU, r0)
  1983  			opset(AXVMADDWODWHU, r0)
  1984  			opset(AXVMADDWODVWU, r0)
  1985  			opset(AXVMADDWODQVU, r0)
  1986  			opset(AXVMADDWEVHBUB, r0)
  1987  			opset(AXVMADDWEVWHUH, r0)
  1988  			opset(AXVMADDWEVVWUW, r0)
  1989  			opset(AXVMADDWEVQVUV, r0)
  1990  			opset(AXVMADDWODHBUB, r0)
  1991  			opset(AXVMADDWODWHUH, r0)
  1992  			opset(AXVMADDWODVWUW, r0)
  1993  			opset(AXVMADDWODQVUV, r0)
  1994  
  1995  		// vpcnt.b vd, vj
  1996  		case AVPCNTB:
  1997  			opset(AVPCNTH, r0)
  1998  			opset(AVPCNTW, r0)
  1999  			opset(AVPCNTV, r0)
  2000  			opset(AVFSQRTF, r0)
  2001  			opset(AVFSQRTD, r0)
  2002  			opset(AVFRECIPF, r0)
  2003  			opset(AVFRECIPD, r0)
  2004  			opset(AVFRSQRTF, r0)
  2005  			opset(AVFRSQRTD, r0)
  2006  			opset(AVNEGB, r0)
  2007  			opset(AVNEGH, r0)
  2008  			opset(AVNEGW, r0)
  2009  			opset(AVNEGV, r0)
  2010  			opset(AVFRINTRNEF, r0)
  2011  			opset(AVFRINTRNED, r0)
  2012  			opset(AVFRINTRZF, r0)
  2013  			opset(AVFRINTRZD, r0)
  2014  			opset(AVFRINTRPF, r0)
  2015  			opset(AVFRINTRPD, r0)
  2016  			opset(AVFRINTRMF, r0)
  2017  			opset(AVFRINTRMD, r0)
  2018  			opset(AVFRINTF, r0)
  2019  			opset(AVFRINTD, r0)
  2020  			opset(AVFCLASSF, r0)
  2021  			opset(AVFCLASSD, r0)
  2022  
  2023  		// xvpcnt.b xd, xj
  2024  		case AXVPCNTB:
  2025  			opset(AXVPCNTH, r0)
  2026  			opset(AXVPCNTW, r0)
  2027  			opset(AXVPCNTV, r0)
  2028  			opset(AXVFSQRTF, r0)
  2029  			opset(AXVFSQRTD, r0)
  2030  			opset(AXVFRECIPF, r0)
  2031  			opset(AXVFRECIPD, r0)
  2032  			opset(AXVFRSQRTF, r0)
  2033  			opset(AXVFRSQRTD, r0)
  2034  			opset(AXVNEGB, r0)
  2035  			opset(AXVNEGH, r0)
  2036  			opset(AXVNEGW, r0)
  2037  			opset(AXVNEGV, r0)
  2038  			opset(AXVFRINTRNEF, r0)
  2039  			opset(AXVFRINTRNED, r0)
  2040  			opset(AXVFRINTRZF, r0)
  2041  			opset(AXVFRINTRZD, r0)
  2042  			opset(AXVFRINTRPF, r0)
  2043  			opset(AXVFRINTRPD, r0)
  2044  			opset(AXVFRINTRMF, r0)
  2045  			opset(AXVFRINTRMD, r0)
  2046  			opset(AXVFRINTF, r0)
  2047  			opset(AXVFRINTD, r0)
  2048  			opset(AXVFCLASSF, r0)
  2049  			opset(AXVFCLASSD, r0)
  2050  
  2051  		// vsll.b vd, vj, vk
  2052  		// vslli.b vd, vj, ui3
  2053  		case AVSLLB:
  2054  			opset(AVSRLB, r0)
  2055  			opset(AVSRAB, r0)
  2056  			opset(AVROTRB, r0)
  2057  			opset(AVBITCLRB, r0)
  2058  			opset(AVBITSETB, r0)
  2059  			opset(AVBITREVB, r0)
  2060  
  2061  		// xvsll.b xd, xj, xk
  2062  		// xvslli.b xd, xj, ui3
  2063  		case AXVSLLB:
  2064  			opset(AXVSRLB, r0)
  2065  			opset(AXVSRAB, r0)
  2066  			opset(AXVROTRB, r0)
  2067  			opset(AXVBITCLRB, r0)
  2068  			opset(AXVBITSETB, r0)
  2069  			opset(AXVBITREVB, r0)
  2070  
  2071  		// vsll.h vd, vj, vk
  2072  		// vslli.h vd, vj, ui4
  2073  		case AVSLLH:
  2074  			opset(AVSRLH, r0)
  2075  			opset(AVSRAH, r0)
  2076  			opset(AVROTRH, r0)
  2077  			opset(AVBITCLRH, r0)
  2078  			opset(AVBITSETH, r0)
  2079  			opset(AVBITREVH, r0)
  2080  
  2081  		// xvsll.h xd, xj, xk
  2082  		// xvslli.h xd, xj, ui4
  2083  		case AXVSLLH:
  2084  			opset(AXVSRLH, r0)
  2085  			opset(AXVSRAH, r0)
  2086  			opset(AXVROTRH, r0)
  2087  			opset(AXVBITCLRH, r0)
  2088  			opset(AXVBITSETH, r0)
  2089  			opset(AXVBITREVH, r0)
  2090  
  2091  		// vsll.w vd, vj, vk
  2092  		// vslli.w vd, vj, ui5
  2093  		case AVSLLW:
  2094  			opset(AVSRLW, r0)
  2095  			opset(AVSRAW, r0)
  2096  			opset(AVROTRW, r0)
  2097  			opset(AVBITCLRW, r0)
  2098  			opset(AVBITSETW, r0)
  2099  			opset(AVBITREVW, r0)
  2100  
  2101  		// xvsll.w xd, xj, xk
  2102  		// xvslli.w xd, xj, ui5
  2103  		case AXVSLLW:
  2104  			opset(AXVSRLW, r0)
  2105  			opset(AXVSRAW, r0)
  2106  			opset(AXVROTRW, r0)
  2107  			opset(AXVBITCLRW, r0)
  2108  			opset(AXVBITSETW, r0)
  2109  			opset(AXVBITREVW, r0)
  2110  
  2111  		// vsll.d vd, vj, vk
  2112  		// vslli.d vd, vj, ui6
  2113  		case AVSLLV:
  2114  			opset(AVSRLV, r0)
  2115  			opset(AVSRAV, r0)
  2116  			opset(AVROTRV, r0)
  2117  			opset(AVBITCLRV, r0)
  2118  			opset(AVBITSETV, r0)
  2119  			opset(AVBITREVV, r0)
  2120  
  2121  		// xvsll.d xd, xj, xk
  2122  		// xvslli.d xd, xj, ui6
  2123  		case AXVSLLV:
  2124  			opset(AXVSRLV, r0)
  2125  			opset(AXVSRAV, r0)
  2126  			opset(AXVROTRV, r0)
  2127  			opset(AXVBITCLRV, r0)
  2128  			opset(AXVBITSETV, r0)
  2129  			opset(AXVBITREVV, r0)
  2130  
  2131  		// vaddi.bu vd, vj, ui5
  2132  		case AVADDBU:
  2133  			opset(AVADDHU, r0)
  2134  			opset(AVADDWU, r0)
  2135  			opset(AVADDVU, r0)
  2136  			opset(AVSUBBU, r0)
  2137  			opset(AVSUBHU, r0)
  2138  			opset(AVSUBWU, r0)
  2139  			opset(AVSUBVU, r0)
  2140  
  2141  		// xvaddi.bu xd, xj, ui5
  2142  		case AXVADDBU:
  2143  			opset(AXVADDHU, r0)
  2144  			opset(AXVADDWU, r0)
  2145  			opset(AXVADDVU, r0)
  2146  			opset(AXVSUBBU, r0)
  2147  			opset(AXVSUBHU, r0)
  2148  			opset(AXVSUBWU, r0)
  2149  			opset(AXVSUBVU, r0)
  2150  
  2151  		// vseteqz.v cd, vj
  2152  		case AVSETEQV:
  2153  			opset(AVSETNEV, r0)
  2154  			opset(AVSETANYEQB, r0)
  2155  			opset(AVSETANYEQH, r0)
  2156  			opset(AVSETANYEQW, r0)
  2157  			opset(AVSETANYEQV, r0)
  2158  			opset(AVSETALLNEB, r0)
  2159  			opset(AVSETALLNEH, r0)
  2160  			opset(AVSETALLNEW, r0)
  2161  			opset(AVSETALLNEV, r0)
  2162  
  2163  		// xvseteqz.v cd, xj
  2164  		case AXVSETEQV:
  2165  			opset(AXVSETNEV, r0)
  2166  			opset(AXVSETANYEQB, r0)
  2167  			opset(AXVSETANYEQH, r0)
  2168  			opset(AXVSETANYEQW, r0)
  2169  			opset(AXVSETANYEQV, r0)
  2170  			opset(AXVSETALLNEB, r0)
  2171  			opset(AXVSETALLNEH, r0)
  2172  			opset(AXVSETALLNEW, r0)
  2173  			opset(AXVSETALLNEV, r0)
  2174  
  2175  		}
  2176  	}
  2177  }
  2178  
  2179  func OP_RRRR(op uint32, r1 uint32, r2 uint32, r3 uint32, r4 uint32) uint32 {
  2180  	return op | (r1&0x1F)<<15 | (r2&0x1F)<<10 | (r3&0x1F)<<5 | (r4 & 0x1F)
  2181  }
  2182  
  2183  // r1 -> rk
  2184  // r2 -> rj
  2185  // r3 -> rd
  2186  func OP_RRR(op uint32, r1 uint32, r2 uint32, r3 uint32) uint32 {
  2187  	return op | (r1&0x1F)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0
  2188  }
  2189  
  2190  // r2 -> rj
  2191  // r3 -> rd
  2192  func OP_RR(op uint32, r2 uint32, r3 uint32) uint32 {
  2193  	return op | (r2&0x1F)<<5 | (r3&0x1F)<<0
  2194  }
  2195  
  2196  func OP_2IRRR(op uint32, i uint32, r2 uint32, r3 uint32, r4 uint32) uint32 {
  2197  	return op | (i&0x3)<<15 | (r2&0x1F)<<10 | (r3&0x1F)<<5 | (r4&0x1F)<<0
  2198  }
  2199  
  2200  func OP_16IR_5I(op uint32, i uint32, r2 uint32) uint32 {
  2201  	return op | (i&0xFFFF)<<10 | (r2&0x1F)<<5 | ((i >> 16) & 0x1F)
  2202  }
  2203  
  2204  func OP_16IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 {
  2205  	return op | (i&0xFFFF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0
  2206  }
  2207  
  2208  func OP_14IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 {
  2209  	return op | (i&0x3FFF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0
  2210  }
  2211  
  2212  func OP_12IR_5I(op uint32, i1 uint32, r2 uint32, i2 uint32) uint32 {
  2213  	return op | (i1&0xFFF)<<10 | (r2&0x1F)<<5 | (i2&0x1F)<<0
  2214  }
  2215  
  2216  func OP_12IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 {
  2217  	return op | (i&0xFFF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0
  2218  }
  2219  
  2220  func OP_11IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 {
  2221  	return op | (i&0x7FF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0
  2222  }
  2223  
  2224  func OP_10IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 {
  2225  	return op | (i&0x3FF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0
  2226  }
  2227  
  2228  func OP_9IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 {
  2229  	return op | (i&0x1FF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0
  2230  }
  2231  
  2232  func OP_8IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 {
  2233  	return op | (i&0xFF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0
  2234  }
  2235  
  2236  func OP_6IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 {
  2237  	return op | (i&0x3F)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0
  2238  }
  2239  
  2240  func OP_5IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 {
  2241  	return op | (i&0x1F)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0
  2242  }
  2243  
  2244  func OP_4IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 {
  2245  	return op | (i&0xF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0
  2246  }
  2247  
  2248  func OP_3IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 {
  2249  	return op | (i&0x7)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0
  2250  }
  2251  
  2252  func OP_IR(op uint32, i uint32, r2 uint32) uint32 {
  2253  	return op | (i&0xFFFFF)<<5 | (r2&0x1F)<<0 // ui20, rd5
  2254  }
  2255  
  2256  func OP_15I(op uint32, i uint32) uint32 {
  2257  	return op | (i&0x7FFF)<<0
  2258  }
  2259  
  2260  // i1 -> msb
  2261  // r2 -> rj
  2262  // i3 -> lsb
  2263  // r4 -> rd
  2264  func OP_IRIR(op uint32, i1 uint32, r2 uint32, i3 uint32, r4 uint32) uint32 {
  2265  	return op | (i1 << 16) | (r2&0x1F)<<5 | (i3 << 10) | (r4&0x1F)<<0
  2266  }
  2267  
  2268  // Encoding for the 'b' or 'bl' instruction.
  2269  func OP_B_BL(op uint32, i uint32) uint32 {
  2270  	return op | ((i & 0xFFFF) << 10) | ((i >> 16) & 0x3FF)
  2271  }
  2272  
  2273  func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) {
  2274  	o1 := uint32(0)
  2275  	o2 := uint32(0)
  2276  	o3 := uint32(0)
  2277  	o4 := uint32(0)
  2278  	o5 := uint32(0)
  2279  	o6 := uint32(0)
  2280  
  2281  	add := AADDVU
  2282  
  2283  	switch o.type_ {
  2284  	default:
  2285  		c.ctxt.Diag("unknown type %d", o.type_)
  2286  		prasm(p)
  2287  
  2288  	case 0: // pseudo ops
  2289  		break
  2290  
  2291  	case 1: // mov rj, rd
  2292  		switch p.As {
  2293  		case AMOVB:
  2294  			o1 = OP_RR(c.oprr(AEXTWB), uint32(p.From.Reg), uint32(p.To.Reg))
  2295  		case AMOVH:
  2296  			o1 = OP_RR(c.oprr(AEXTWH), uint32(p.From.Reg), uint32(p.To.Reg))
  2297  		case AMOVW:
  2298  			o1 = OP_RRR(c.oprrr(ASLL), uint32(REGZERO), uint32(p.From.Reg), uint32(p.To.Reg))
  2299  		case AMOVV:
  2300  			o1 = OP_RRR(c.oprrr(AOR), uint32(REGZERO), uint32(p.From.Reg), uint32(p.To.Reg))
  2301  		case AMOVBU:
  2302  			o1 = OP_12IRR(c.opirr(AAND), uint32(0xff), uint32(p.From.Reg), uint32(p.To.Reg))
  2303  		case AMOVHU:
  2304  			o1 = OP_IRIR(c.opirir(ABSTRPICKV), 15, uint32(p.From.Reg), 0, uint32(p.To.Reg))
  2305  		case AMOVWU:
  2306  			o1 = OP_IRIR(c.opirir(ABSTRPICKV), 31, uint32(p.From.Reg), 0, uint32(p.To.Reg))
  2307  		case AVMOVQ:
  2308  			o1 = OP_6IRR(c.opirr(AVSLLV), uint32(0), uint32(p.From.Reg), uint32(p.To.Reg))
  2309  		case AXVMOVQ:
  2310  			o1 = OP_6IRR(c.opirr(AXVSLLV), uint32(0), uint32(p.From.Reg), uint32(p.To.Reg))
  2311  		default:
  2312  			c.ctxt.Diag("unexpected encoding\n%v", p)
  2313  		}
  2314  
  2315  	case 2: // add/sub r1,[r2],r3
  2316  		r := int(p.Reg)
  2317  		if p.As == ANEGW || p.As == ANEGV {
  2318  			r = REGZERO
  2319  		}
  2320  		if r == 0 {
  2321  			r = int(p.To.Reg)
  2322  		}
  2323  		o1 = OP_RRR(c.oprrr(p.As), uint32(p.From.Reg), uint32(r), uint32(p.To.Reg))
  2324  
  2325  	case 3: // mov $soreg, r ==> or/add $i,o,r
  2326  		v := c.regoff(&p.From)
  2327  
  2328  		r := int(p.From.Reg)
  2329  		if r == 0 {
  2330  			r = int(o.param)
  2331  		}
  2332  		a := add
  2333  		if o.from1 == C_12CON && v > 0 {
  2334  			a = AOR
  2335  		}
  2336  
  2337  		o1 = OP_12IRR(c.opirr(a), uint32(v), uint32(r), uint32(p.To.Reg))
  2338  
  2339  	case 4: // add $scon,[r1],r2
  2340  		v := c.regoff(&p.From)
  2341  		r := int(p.Reg)
  2342  		if r == 0 {
  2343  			r = int(p.To.Reg)
  2344  		}
  2345  		if p.As == AADDV16 {
  2346  			if v&65535 != 0 {
  2347  				c.ctxt.Diag("%v: the constant must be a multiple of 65536.\n", p)
  2348  			}
  2349  			o1 = OP_16IRR(c.opirr(p.As), uint32(v>>16), uint32(r), uint32(p.To.Reg))
  2350  		} else {
  2351  			o1 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg))
  2352  		}
  2353  
  2354  	case 5: // syscall
  2355  		v := c.regoff(&p.From)
  2356  		o1 = OP_15I(c.opi(p.As), uint32(v))
  2357  
  2358  	case 6: // beq r1,[r2],sbra
  2359  		v := int32(0)
  2360  		if p.To.Target() != nil {
  2361  			v = int32(p.To.Target().Pc-p.Pc) >> 2
  2362  		}
  2363  		as, rd, rj, width := p.As, p.Reg, p.From.Reg, 16
  2364  		switch as {
  2365  		case ABGTZ, ABLEZ:
  2366  			rd, rj = rj, rd
  2367  		case ABFPT, ABFPF:
  2368  			width = 21
  2369  			// FCC0 is the implicit source operand, now that we
  2370  			// don't register-allocate from the FCC bank.
  2371  			if rj == 0 {
  2372  				rj = REG_FCC0
  2373  			}
  2374  		case ABEQ, ABNE:
  2375  			if rd == 0 || rd == REGZERO || rj == REGZERO {
  2376  				// BEQZ/BNEZ can be encoded with 21-bit offsets.
  2377  				width = 21
  2378  				as = -as
  2379  				if rj == 0 || rj == REGZERO {
  2380  					rj = rd
  2381  				}
  2382  			}
  2383  		}
  2384  		switch width {
  2385  		case 21:
  2386  			if (v<<11)>>11 != v {
  2387  				c.ctxt.Diag("21 bit-width, short branch too far\n%v", p)
  2388  			}
  2389  			o1 = OP_16IR_5I(c.opirr(as), uint32(v), uint32(rj))
  2390  		case 16:
  2391  			if (v<<16)>>16 != v {
  2392  				c.ctxt.Diag("16 bit-width, short branch too far\n%v", p)
  2393  			}
  2394  			o1 = OP_16IRR(c.opirr(as), uint32(v), uint32(rj), uint32(rd))
  2395  		default:
  2396  			c.ctxt.Diag("unexpected branch encoding\n%v", p)
  2397  		}
  2398  
  2399  	case 7: // mov r, soreg
  2400  		r := int(p.To.Reg)
  2401  		if r == 0 {
  2402  			r = int(o.param)
  2403  		}
  2404  		v := c.regoff(&p.To)
  2405  		o1 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.From.Reg))
  2406  
  2407  	case 8: // mov soreg, r
  2408  		r := int(p.From.Reg)
  2409  		if r == 0 {
  2410  			r = int(o.param)
  2411  		}
  2412  		v := c.regoff(&p.From)
  2413  		o1 = OP_12IRR(c.opirr(-p.As), uint32(v), uint32(r), uint32(p.To.Reg))
  2414  
  2415  	case 9: // sll r1,[r2],r3
  2416  		o1 = OP_RR(c.oprr(p.As), uint32(p.From.Reg), uint32(p.To.Reg))
  2417  
  2418  	case 10: // add $con,[r1],r2 ==> mov $con, t; add t,[r1],r2
  2419  		v := c.regoff(&p.From)
  2420  		a := AOR
  2421  		if v < 0 {
  2422  			a = AADD
  2423  		}
  2424  		o1 = OP_12IRR(c.opirr(a), uint32(v), uint32(0), uint32(REGTMP))
  2425  		r := int(p.Reg)
  2426  		if r == 0 {
  2427  			r = int(p.To.Reg)
  2428  		}
  2429  		o2 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg))
  2430  
  2431  	case 11: // jmp lbra
  2432  		v := int32(0)
  2433  		if p.To.Target() != nil {
  2434  			v = int32(p.To.Target().Pc-p.Pc) >> 2
  2435  			if v < -1<<25 || v >= 1<<25 {
  2436  				c.ctxt.Diag("branch too far \n%v", p)
  2437  			}
  2438  		}
  2439  		o1 = OP_B_BL(c.opirr(p.As), uint32(v))
  2440  		if p.To.Sym != nil {
  2441  			c.cursym.AddRel(c.ctxt, obj.Reloc{
  2442  				Type: objabi.R_CALLLOONG64,
  2443  				Off:  int32(c.pc),
  2444  				Siz:  4,
  2445  				Sym:  p.To.Sym,
  2446  				Add:  p.To.Offset,
  2447  			})
  2448  		}
  2449  
  2450  	case 13: // vsll $ui3, [vr1], vr2
  2451  		v := c.regoff(&p.From)
  2452  		r := int(p.Reg)
  2453  		if r == 0 {
  2454  			r = int(p.To.Reg)
  2455  		}
  2456  		o1 = OP_3IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg))
  2457  
  2458  	case 14: // vsll $ui4, [vr1], vr2
  2459  		v := c.regoff(&p.From)
  2460  		r := int(p.Reg)
  2461  		if r == 0 {
  2462  			r = int(p.To.Reg)
  2463  		}
  2464  		o1 = OP_4IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg))
  2465  
  2466  	case 15: // teq $c r,r
  2467  		v := c.regoff(&p.From)
  2468  		r := int(p.Reg)
  2469  		if r == 0 {
  2470  			r = REGZERO
  2471  		}
  2472  		/*
  2473  			teq c, r1, r2
  2474  			fallthrough
  2475  			==>
  2476  			bne r1, r2, 2
  2477  			break c
  2478  			fallthrough
  2479  		*/
  2480  		if p.As == ATEQ {
  2481  			o1 = OP_16IRR(c.opirr(ABNE), uint32(2), uint32(r), uint32(p.To.Reg))
  2482  		} else { // ATNE
  2483  			o1 = OP_16IRR(c.opirr(ABEQ), uint32(2), uint32(r), uint32(p.To.Reg))
  2484  		}
  2485  		o2 = OP_15I(c.opi(ABREAK), uint32(v))
  2486  
  2487  	case 16: // sll $c,[r1],r2
  2488  		v := c.regoff(&p.From)
  2489  		r := int(p.Reg)
  2490  		if r == 0 {
  2491  			r = int(p.To.Reg)
  2492  		}
  2493  
  2494  		// instruction ending with V:6-digit immediate, others:5-digit immediate
  2495  		if v >= 32 && vshift(p.As) {
  2496  			o1 = OP_16IRR(c.opirr(p.As), uint32(v)&0x3f, uint32(r), uint32(p.To.Reg))
  2497  		} else {
  2498  			o1 = OP_16IRR(c.opirr(p.As), uint32(v)&0x1f, uint32(r), uint32(p.To.Reg))
  2499  		}
  2500  
  2501  	case 17: // bstrpickw $msbw, r1, $lsbw, r2
  2502  		rd, rj := p.To.Reg, p.Reg
  2503  		if rj == obj.REG_NONE {
  2504  			rj = rd
  2505  		}
  2506  		msb, lsb := p.From.Offset, p.GetFrom3().Offset
  2507  
  2508  		// check the range of msb and lsb
  2509  		var b uint32
  2510  		if p.As == ABSTRPICKW || p.As == ABSTRINSW {
  2511  			b = 32
  2512  		} else {
  2513  			b = 64
  2514  		}
  2515  		if lsb < 0 || uint32(lsb) >= b || msb < 0 || uint32(msb) >= b || uint32(lsb) > uint32(msb) {
  2516  			c.ctxt.Diag("illegal bit number\n%v", p)
  2517  		}
  2518  
  2519  		o1 = OP_IRIR(c.opirir(p.As), uint32(msb), uint32(rj), uint32(lsb), uint32(rd))
  2520  
  2521  	case 18: // jmp [r1],0(r2)
  2522  		r := int(p.Reg)
  2523  		if r == 0 {
  2524  			r = int(o.param)
  2525  		}
  2526  		o1 = OP_RRR(c.oprrr(p.As), uint32(0), uint32(p.To.Reg), uint32(r))
  2527  		if p.As == obj.ACALL {
  2528  			c.cursym.AddRel(c.ctxt, obj.Reloc{
  2529  				Type: objabi.R_CALLIND,
  2530  				Off:  int32(c.pc),
  2531  			})
  2532  		}
  2533  
  2534  	case 19: // mov $lcon,r
  2535  		// NOTE: this case does not use REGTMP. If it ever does,
  2536  		// remove the NOTUSETMP flag in optab.
  2537  		v := c.regoff(&p.From)
  2538  		o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg))
  2539  		o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg))
  2540  
  2541  	case 20: // mov Rsrc, (Rbase)(Roff)
  2542  		o1 = OP_RRR(c.oprrr(p.As), uint32(p.To.Index), uint32(p.To.Reg), uint32(p.From.Reg))
  2543  
  2544  	case 21: // mov (Rbase)(Roff), Rdst
  2545  		o1 = OP_RRR(c.oprrr(-p.As), uint32(p.From.Index), uint32(p.From.Reg), uint32(p.To.Reg))
  2546  
  2547  	case 22: // add $si5,[r1],r2
  2548  		v := c.regoff(&p.From)
  2549  		r := int(p.Reg)
  2550  		if r == 0 {
  2551  			r = int(p.To.Reg)
  2552  		}
  2553  
  2554  		o1 = OP_5IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg))
  2555  
  2556  	case 23: // add $ui8,[r1],r2
  2557  		v := c.regoff(&p.From)
  2558  		r := int(p.Reg)
  2559  		if r == 0 {
  2560  			r = int(p.To.Reg)
  2561  		}
  2562  
  2563  		// the operand range available for instructions VSHUF4IV and XVSHUF4IV is [0, 15]
  2564  		if p.As == AVSHUF4IV || p.As == AXVSHUF4IV {
  2565  			operand := uint32(v)
  2566  			c.checkoperand(p, operand, 15)
  2567  		}
  2568  
  2569  		o1 = OP_8IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg))
  2570  
  2571  	case 24: // add $lcon,r1,r2
  2572  		v := c.regoff(&p.From)
  2573  		o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
  2574  		o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP))
  2575  		r := int(p.Reg)
  2576  		if r == 0 {
  2577  			r = int(p.To.Reg)
  2578  		}
  2579  		o3 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg))
  2580  
  2581  	case 25: // mov $ucon,r
  2582  		v := c.regoff(&p.From)
  2583  		o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg))
  2584  
  2585  	case 26: // add/and $ucon,[r1],r2
  2586  		v := c.regoff(&p.From)
  2587  		o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
  2588  		r := int(p.Reg)
  2589  		if r == 0 {
  2590  			r = int(p.To.Reg)
  2591  		}
  2592  		o2 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg))
  2593  
  2594  	case 27: // mov $lsext/auto/oreg,r
  2595  		v := c.regoff(&p.From)
  2596  		o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
  2597  		o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP))
  2598  		r := int(p.From.Reg)
  2599  		if r == 0 {
  2600  			r = int(o.param)
  2601  		}
  2602  		o3 = OP_RRR(c.oprrr(add), uint32(REGTMP), uint32(r), uint32(p.To.Reg))
  2603  
  2604  	case 28: // mov [sl]ext/auto/oreg,fr
  2605  		v := c.regoff(&p.From)
  2606  		r := int(p.From.Reg)
  2607  		if r == 0 {
  2608  			r = int(o.param)
  2609  		}
  2610  		switch o.size {
  2611  		case 12:
  2612  			o1 = OP_IR(c.opir(ALU12IW), uint32((v+1<<11)>>12), uint32(REGTMP))
  2613  			o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP))
  2614  			o3 = OP_12IRR(c.opirr(-p.As), uint32(v), uint32(REGTMP), uint32(p.To.Reg))
  2615  
  2616  		case 4:
  2617  			o1 = OP_12IRR(c.opirr(-p.As), uint32(v), uint32(r), uint32(p.To.Reg))
  2618  		}
  2619  
  2620  	case 29: // mov fr,[sl]ext/auto/oreg
  2621  		v := c.regoff(&p.To)
  2622  		r := int(p.To.Reg)
  2623  		if r == 0 {
  2624  			r = int(o.param)
  2625  		}
  2626  		switch o.size {
  2627  		case 12:
  2628  			o1 = OP_IR(c.opir(ALU12IW), uint32((v+1<<11)>>12), uint32(REGTMP))
  2629  			o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP))
  2630  			o3 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(REGTMP), uint32(p.From.Reg))
  2631  
  2632  		case 4:
  2633  			o1 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.From.Reg))
  2634  		}
  2635  
  2636  	case 30: // mov gr/fr/fcc/fcsr, fr/fcc/fcsr/gr
  2637  		a := c.specialFpMovInst(p.As, oclass(&p.From), oclass(&p.To))
  2638  		o1 = OP_RR(a, uint32(p.From.Reg), uint32(p.To.Reg))
  2639  
  2640  	case 31: // vsll $ui5, [vr1], vr2
  2641  		v := c.regoff(&p.From)
  2642  		r := int(p.Reg)
  2643  		if r == 0 {
  2644  			r = int(p.To.Reg)
  2645  		}
  2646  		o1 = OP_5IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg))
  2647  
  2648  	case 32: // vsll $ui6, [vr1], vr2
  2649  		v := c.regoff(&p.From)
  2650  		r := int(p.Reg)
  2651  		if r == 0 {
  2652  			r = int(p.To.Reg)
  2653  		}
  2654  		o1 = OP_6IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg))
  2655  
  2656  	case 33: // fsel ca, fk, [fj], fd
  2657  		ca := uint32(p.From.Reg)
  2658  		fk := uint32(p.Reg)
  2659  		fd := uint32(p.To.Reg)
  2660  		fj := fd
  2661  		if len(p.RestArgs) > 0 {
  2662  			fj = uint32(p.GetFrom3().Reg)
  2663  		}
  2664  		o1 = 0x340<<18 | (ca&0x7)<<15 | (fk&0x1F)<<10 | (fj&0x1F)<<5 | (fd & 0x1F)
  2665  
  2666  	case 34: // mov $con,fr
  2667  		v := c.regoff(&p.From)
  2668  		a := AADD
  2669  		if v > 0 {
  2670  			a = AOR
  2671  		}
  2672  		a2 := c.specialFpMovInst(p.As, C_REG, oclass(&p.To))
  2673  		o1 = OP_12IRR(c.opirr(a), uint32(v), uint32(0), uint32(REGTMP))
  2674  		o2 = OP_RR(a2, uint32(REGTMP), uint32(p.To.Reg))
  2675  
  2676  	case 35: // mov r,lext/auto/oreg
  2677  		v := c.regoff(&p.To)
  2678  		r := int(p.To.Reg)
  2679  		if r == 0 {
  2680  			r = int(o.param)
  2681  		}
  2682  		o1 = OP_IR(c.opir(ALU12IW), uint32((v+1<<11)>>12), uint32(REGTMP))
  2683  		o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP))
  2684  		o3 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(REGTMP), uint32(p.From.Reg))
  2685  
  2686  	case 36: // mov lext/auto/oreg,r
  2687  		v := c.regoff(&p.From)
  2688  		r := int(p.From.Reg)
  2689  		if r == 0 {
  2690  			r = int(o.param)
  2691  		}
  2692  		o1 = OP_IR(c.opir(ALU12IW), uint32((v+1<<11)>>12), uint32(REGTMP))
  2693  		o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP))
  2694  		o3 = OP_12IRR(c.opirr(-p.As), uint32(v), uint32(REGTMP), uint32(p.To.Reg))
  2695  
  2696  	case 37: // fmadd r1, r2, [r3], r4
  2697  		r := int(p.To.Reg)
  2698  		if len(p.RestArgs) > 0 {
  2699  			r = int(p.GetFrom3().Reg)
  2700  		}
  2701  		o1 = OP_RRRR(c.oprrrr(p.As), uint32(p.From.Reg), uint32(p.Reg), uint32(r), uint32(p.To.Reg))
  2702  
  2703  	case 38: // word
  2704  		o1 = uint32(c.regoff(&p.From))
  2705  
  2706  	case 39: // vmov Rn, Vd.<T>[index]
  2707  		v, m := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg, false)
  2708  		if v == 0 {
  2709  			c.ctxt.Diag("illegal arng type combination: %v\n", p)
  2710  		}
  2711  
  2712  		rj := uint32(p.From.Reg & EXT_REG_MASK)
  2713  		rd := uint32(p.To.Reg & EXT_REG_MASK)
  2714  		index := uint32(p.To.Index)
  2715  		c.checkindex(p, index, m)
  2716  		o1 = v | (index << 10) | (rj << 5) | rd
  2717  
  2718  	case 40: // vmov Vd.<T>[index], Rn
  2719  		v, m := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg, false)
  2720  		if v == 0 {
  2721  			c.ctxt.Diag("illegal arng type combination: %v\n", p)
  2722  		}
  2723  
  2724  		rj := uint32(p.From.Reg & EXT_REG_MASK)
  2725  		rd := uint32(p.To.Reg & EXT_REG_MASK)
  2726  		index := uint32(p.From.Index)
  2727  		c.checkindex(p, index, m)
  2728  		o1 = v | (index << 10) | (rj << 5) | rd
  2729  
  2730  	case 41: // vmov Rn, Vd.<T>
  2731  		v, _ := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg, false)
  2732  		if v == 0 {
  2733  			c.ctxt.Diag("illegal arng type combination: %v\n", p)
  2734  		}
  2735  
  2736  		rj := uint32(p.From.Reg & EXT_REG_MASK)
  2737  		rd := uint32(p.To.Reg & EXT_REG_MASK)
  2738  		o1 = v | (rj << 5) | rd
  2739  
  2740  	case 42: // vmov offset(vj), vd.<T>
  2741  		v, _ := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg, true)
  2742  		if v == 0 {
  2743  			c.ctxt.Diag("illegal arng type combination: %v\n", p)
  2744  		}
  2745  
  2746  		si := c.regoff(&p.From)
  2747  		Rj := uint32(p.From.Reg & EXT_REG_MASK)
  2748  		Vd := uint32(p.To.Reg & EXT_REG_MASK)
  2749  		switch v & 0xc00000 {
  2750  		case 0x800000: // [x]vldrepl.b
  2751  			o1 = OP_12IRR(v, uint32(si), Rj, Vd)
  2752  		case 0x400000: // [x]vldrepl.h
  2753  			if si&1 != 0 {
  2754  				c.ctxt.Diag("%v: offset must be a multiple of 2.\n", p)
  2755  			}
  2756  			o1 = OP_11IRR(v, uint32(si>>1), Rj, Vd)
  2757  		case 0x0:
  2758  			switch v & 0x300000 {
  2759  			case 0x200000: // [x]vldrepl.w
  2760  				if si&3 != 0 {
  2761  					c.ctxt.Diag("%v: offset must be a multiple of 4.\n", p)
  2762  				}
  2763  				o1 = OP_10IRR(v, uint32(si>>2), Rj, Vd)
  2764  			case 0x100000: // [x]vldrepl.d
  2765  				if si&7 != 0 {
  2766  					c.ctxt.Diag("%v: offset must be a multiple of 8.\n", p)
  2767  				}
  2768  				o1 = OP_9IRR(v, uint32(si>>3), Rj, Vd)
  2769  			}
  2770  		}
  2771  
  2772  	case 45:
  2773  		// sc.q rd, rk, (rj)
  2774  		o1 = OP_RRR(c.oprrr(p.As), uint32(p.Reg), uint32(p.To.Reg), uint32(p.From.Reg))
  2775  
  2776  	case 46:
  2777  		// ll.acq.{w/d}  (rj), rd
  2778  		rj := uint32(p.From.Reg)
  2779  		rd := uint32(p.To.Reg)
  2780  
  2781  		switch p.As {
  2782  		case ASCRELW, ASCRELV:
  2783  			rj = uint32(p.To.Reg)
  2784  			rd = uint32(p.From.Reg)
  2785  		}
  2786  
  2787  		o1 = OP_RR(c.oprr(p.As), rj, rd)
  2788  
  2789  	case 47: // preld  offset(Rbase), $hint
  2790  		offs := c.regoff(&p.From)
  2791  		hint := p.GetFrom3().Offset
  2792  		o1 = OP_12IR_5I(c.opiir(p.As), uint32(offs), uint32(p.From.Reg), uint32(hint))
  2793  
  2794  	case 48: // preldx offset(Rbase), $n, $hint
  2795  		offs := c.regoff(&p.From)
  2796  		hint := p.RestArgs[1].Offset
  2797  		n := uint64(p.GetFrom3().Offset)
  2798  
  2799  		addrSeq := (n >> 0) & 0x1
  2800  		blkSize := (n >> 1) & 0x7ff
  2801  		blkNums := (n >> 12) & 0x1ff
  2802  		stride := (n >> 21) & 0xffff
  2803  
  2804  		if blkSize > 1024 {
  2805  			c.ctxt.Diag("%v: block_size amount out of range[16, 1024]: %v\n", p, blkSize)
  2806  		}
  2807  
  2808  		if blkNums > 256 {
  2809  			c.ctxt.Diag("%v: block_nums amount out of range[1, 256]: %v\n", p, blkNums)
  2810  		}
  2811  
  2812  		v := (uint64(offs) & 0xffff)
  2813  		v += addrSeq << 16
  2814  		v += ((blkSize / 16) - 1) << 20
  2815  		v += (blkNums - 1) << 32
  2816  		v += stride << 44
  2817  
  2818  		o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
  2819  		o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP))
  2820  		o3 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP))
  2821  		o4 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP))
  2822  		o5 = OP_5IRR(c.opirr(p.As), uint32(REGTMP), uint32(p.From.Reg), uint32(hint))
  2823  
  2824  	case 49:
  2825  		if p.As == ANOOP {
  2826  			// andi r0, r0, 0
  2827  			o1 = OP_12IRR(c.opirr(AAND), 0, 0, 0)
  2828  		} else {
  2829  			// undef
  2830  			o1 = OP_15I(c.opi(ABREAK), 0)
  2831  		}
  2832  
  2833  	// relocation operations
  2834  	case 50: // mov r,addr ==> pcalau12i + sw
  2835  		o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP))
  2836  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  2837  			Type: objabi.R_LOONG64_ADDR_HI,
  2838  			Off:  int32(c.pc),
  2839  			Siz:  4,
  2840  			Sym:  p.To.Sym,
  2841  			Add:  p.To.Offset,
  2842  		})
  2843  		o2 = OP_12IRR(c.opirr(p.As), uint32(0), uint32(REGTMP), uint32(p.From.Reg))
  2844  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  2845  			Type: objabi.R_LOONG64_ADDR_LO,
  2846  			Off:  int32(c.pc + 4),
  2847  			Siz:  4,
  2848  			Sym:  p.To.Sym,
  2849  			Add:  p.To.Offset,
  2850  		})
  2851  
  2852  	case 51: // mov addr,r ==> pcalau12i + lw
  2853  		o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP))
  2854  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  2855  			Type: objabi.R_LOONG64_ADDR_HI,
  2856  			Off:  int32(c.pc),
  2857  			Siz:  4,
  2858  			Sym:  p.From.Sym,
  2859  			Add:  p.From.Offset,
  2860  		})
  2861  		o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(p.To.Reg))
  2862  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  2863  			Type: objabi.R_LOONG64_ADDR_LO,
  2864  			Off:  int32(c.pc + 4),
  2865  			Siz:  4,
  2866  			Sym:  p.From.Sym,
  2867  			Add:  p.From.Offset,
  2868  		})
  2869  
  2870  	case 52: // mov $ext, r
  2871  		// NOTE: this case does not use REGTMP. If it ever does,
  2872  		// remove the NOTUSETMP flag in optab.
  2873  		o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(p.To.Reg))
  2874  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  2875  			Type: objabi.R_LOONG64_ADDR_HI,
  2876  			Off:  int32(c.pc),
  2877  			Siz:  4,
  2878  			Sym:  p.From.Sym,
  2879  			Add:  p.From.Offset,
  2880  		})
  2881  		o2 = OP_12IRR(c.opirr(add), uint32(0), uint32(p.To.Reg), uint32(p.To.Reg))
  2882  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  2883  			Type: objabi.R_LOONG64_ADDR_LO,
  2884  			Off:  int32(c.pc + 4),
  2885  			Siz:  4,
  2886  			Sym:  p.From.Sym,
  2887  			Add:  p.From.Offset,
  2888  		})
  2889  
  2890  	case 53: // mov r, tlsvar ==>  lu12i.w + ori + add r2, regtmp + sw o(regtmp)
  2891  		// NOTE: this case does not use REGTMP. If it ever does,
  2892  		// remove the NOTUSETMP flag in optab.
  2893  		o1 = OP_IR(c.opir(ALU12IW), uint32(0), uint32(REGTMP))
  2894  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  2895  			Type: objabi.R_LOONG64_TLS_LE_HI,
  2896  			Off:  int32(c.pc),
  2897  			Siz:  4,
  2898  			Sym:  p.To.Sym,
  2899  			Add:  p.To.Offset,
  2900  		})
  2901  		o2 = OP_12IRR(c.opirr(AOR), uint32(0), uint32(REGTMP), uint32(REGTMP))
  2902  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  2903  			Type: objabi.R_LOONG64_TLS_LE_LO,
  2904  			Off:  int32(c.pc + 4),
  2905  			Siz:  4,
  2906  			Sym:  p.To.Sym,
  2907  			Add:  p.To.Offset,
  2908  		})
  2909  		o3 = OP_RRR(c.oprrr(AADDV), uint32(REG_R2), uint32(REGTMP), uint32(REGTMP))
  2910  		o4 = OP_12IRR(c.opirr(p.As), uint32(0), uint32(REGTMP), uint32(p.From.Reg))
  2911  
  2912  	case 54: // lu12i.w + ori + add r2, regtmp + lw o(regtmp)
  2913  		// NOTE: this case does not use REGTMP. If it ever does,
  2914  		// remove the NOTUSETMP flag in optab.
  2915  		o1 = OP_IR(c.opir(ALU12IW), uint32(0), uint32(REGTMP))
  2916  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  2917  			Type: objabi.R_LOONG64_TLS_LE_HI,
  2918  			Off:  int32(c.pc),
  2919  			Siz:  4,
  2920  			Sym:  p.From.Sym,
  2921  			Add:  p.From.Offset,
  2922  		})
  2923  		o2 = OP_12IRR(c.opirr(AOR), uint32(0), uint32(REGTMP), uint32(REGTMP))
  2924  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  2925  			Type: objabi.R_LOONG64_TLS_LE_LO,
  2926  			Off:  int32(c.pc + 4),
  2927  			Siz:  4,
  2928  			Sym:  p.From.Sym,
  2929  			Add:  p.From.Offset,
  2930  		})
  2931  		o3 = OP_RRR(c.oprrr(AADDV), uint32(REG_R2), uint32(REGTMP), uint32(REGTMP))
  2932  		o4 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(p.To.Reg))
  2933  
  2934  	case 56: // mov r, tlsvar IE model ==> (pcalau12i + ld.d)tlsvar@got + add.d + st.d
  2935  		o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP))
  2936  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  2937  			Type: objabi.R_LOONG64_TLS_IE_HI,
  2938  			Off:  int32(c.pc),
  2939  			Siz:  4,
  2940  			Sym:  p.To.Sym,
  2941  		})
  2942  		o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(REGTMP))
  2943  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  2944  			Type: objabi.R_LOONG64_TLS_IE_LO,
  2945  			Off:  int32(c.pc + 4),
  2946  			Siz:  4,
  2947  			Sym:  p.To.Sym,
  2948  		})
  2949  		o3 = OP_RRR(c.oprrr(AADDVU), uint32(REGTMP), uint32(REG_R2), uint32(REGTMP))
  2950  		o4 = OP_12IRR(c.opirr(p.As), uint32(0), uint32(REGTMP), uint32(p.From.Reg))
  2951  
  2952  	case 57: // mov tlsvar, r IE model ==> (pcalau12i + ld.d)tlsvar@got + add.d + ld.d
  2953  		o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP))
  2954  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  2955  			Type: objabi.R_LOONG64_TLS_IE_HI,
  2956  			Off:  int32(c.pc),
  2957  			Siz:  4,
  2958  			Sym:  p.From.Sym,
  2959  		})
  2960  		o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(REGTMP))
  2961  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  2962  			Type: objabi.R_LOONG64_TLS_IE_LO,
  2963  			Off:  int32(c.pc + 4),
  2964  			Siz:  4,
  2965  			Sym:  p.From.Sym,
  2966  		})
  2967  		o3 = OP_RRR(c.oprrr(AADDVU), uint32(REGTMP), uint32(REG_R2), uint32(REGTMP))
  2968  		o4 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(p.To.Reg))
  2969  
  2970  	case 59: // mov $dcon,r
  2971  		// NOTE: this case does not use REGTMP. If it ever does,
  2972  		// remove the NOTUSETMP flag in optab.
  2973  		v := c.vregoff(&p.From)
  2974  		o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg))
  2975  		o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg))
  2976  		o3 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg))
  2977  		o4 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg))
  2978  
  2979  	case 60: // add $dcon,r1,r2
  2980  		v := c.vregoff(&p.From)
  2981  		o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
  2982  		o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP))
  2983  		o3 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP))
  2984  		o4 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP))
  2985  		r := int(p.Reg)
  2986  		if r == 0 {
  2987  			r = int(p.To.Reg)
  2988  		}
  2989  		o5 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg))
  2990  
  2991  	case 61: // word C_DCON
  2992  		o1 = uint32(c.vregoff(&p.From))
  2993  		o2 = uint32(c.vregoff(&p.From) >> 32)
  2994  
  2995  	case 62: // rdtimex rd, rj
  2996  		o1 = OP_RR(c.oprr(p.As), uint32(p.To.Reg), uint32(p.RegTo2))
  2997  
  2998  	case 64: // alsl rd, rj, rk, sa2
  2999  		sa := p.From.Offset - 1
  3000  		if sa < 0 || sa > 3 {
  3001  			c.ctxt.Diag("%v: shift amount out of range[1, 4].\n", p)
  3002  		}
  3003  		r := p.GetFrom3().Reg
  3004  		o1 = OP_2IRRR(c.opirrr(p.As), uint32(sa), uint32(r), uint32(p.Reg), uint32(p.To.Reg))
  3005  
  3006  	case 65: // mov sym@GOT, r ==> pcalau12i + ld.d
  3007  		o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(p.To.Reg))
  3008  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  3009  			Type: objabi.R_LOONG64_GOT_HI,
  3010  			Off:  int32(c.pc),
  3011  			Siz:  4,
  3012  			Sym:  p.From.Sym,
  3013  		})
  3014  		o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(p.To.Reg), uint32(p.To.Reg))
  3015  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  3016  			Type: objabi.R_LOONG64_GOT_LO,
  3017  			Off:  int32(c.pc + 4),
  3018  			Siz:  4,
  3019  			Sym:  p.From.Sym,
  3020  		})
  3021  
  3022  	case 66: // am* From, To, RegTo2 ==> am* RegTo2, From, To
  3023  		rk := p.From.Reg
  3024  		rj := p.To.Reg
  3025  		rd := p.RegTo2
  3026  
  3027  		// See section 2.2.7.1 of https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html
  3028  		// for the register usage constraints.
  3029  		if rd == rj || rd == rk {
  3030  			c.ctxt.Diag("illegal register combination: %v\n", p)
  3031  		}
  3032  		o1 = OP_RRR(atomicInst[p.As], uint32(rk), uint32(rj), uint32(rd))
  3033  
  3034  	case 67: // mov $dcon12_0, r
  3035  		v := c.vregoff(&p.From)
  3036  		o1 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(0), uint32(p.To.Reg))
  3037  
  3038  	case 68: // mov $dcon12_20S, r
  3039  		v := c.vregoff(&p.From)
  3040  		contype := c.aclass(&p.From)
  3041  		switch contype {
  3042  		default: // C_DCON12_20S
  3043  			o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg))
  3044  			o2 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg))
  3045  		case C_DCON20S_20:
  3046  			o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg))
  3047  			o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg))
  3048  		case C_DCON12_12S:
  3049  			o1 = OP_12IRR(c.opirr(AADDV), uint32(v), uint32(0), uint32(p.To.Reg))
  3050  			o2 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg))
  3051  		case C_DCON20S_12S, C_DCON20S_0:
  3052  			o1 = OP_12IRR(c.opirr(AADD), uint32(v), uint32(0), uint32(p.To.Reg))
  3053  			o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg))
  3054  		case C_DCON12_12U:
  3055  			o1 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(0), uint32(p.To.Reg))
  3056  			o2 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg))
  3057  		case C_DCON20S_12U:
  3058  			o1 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(0), uint32(p.To.Reg))
  3059  			o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg))
  3060  		}
  3061  
  3062  	case 69: // mov $dcon32_12S, r
  3063  		v := c.vregoff(&p.From)
  3064  		contype := c.aclass(&p.From)
  3065  		switch contype {
  3066  		default: // C_DCON32_12S, C_DCON32_0
  3067  			o1 = OP_12IRR(c.opirr(AADD), uint32(v), uint32(0), uint32(p.To.Reg))
  3068  			o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg))
  3069  			o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg))
  3070  		case C_DCON32_20:
  3071  			o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg))
  3072  			o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg))
  3073  			o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg))
  3074  		case C_DCON12_32S:
  3075  			o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg))
  3076  			o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg))
  3077  			o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg))
  3078  		case C_DCON20S_32:
  3079  			o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg))
  3080  			o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg))
  3081  			o3 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg))
  3082  		case C_DCON32_12U:
  3083  			o1 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(0), uint32(p.To.Reg))
  3084  			o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg))
  3085  			o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg))
  3086  		}
  3087  
  3088  	case 70: // add $dcon12_0,[r1],r2
  3089  		v := c.vregoff(&p.From)
  3090  		r := int(p.Reg)
  3091  		if r == 0 {
  3092  			r = int(p.To.Reg)
  3093  		}
  3094  		o1 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(0), uint32(REGTMP))
  3095  		o2 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg))
  3096  
  3097  	case 71: // add $dcon12_20S,[r1],r2
  3098  		v := c.vregoff(&p.From)
  3099  		r := int(p.Reg)
  3100  		if r == 0 {
  3101  			r = int(p.To.Reg)
  3102  		}
  3103  		contype := c.aclass(&p.From)
  3104  		switch contype {
  3105  		default: // C_DCON12_20S
  3106  			o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
  3107  			o2 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP))
  3108  		case C_DCON20S_20:
  3109  			o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
  3110  			o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP))
  3111  		case C_DCON12_12S:
  3112  			o1 = OP_12IRR(c.opirr(AADDV), uint32(v), uint32(0), uint32(REGTMP))
  3113  			o2 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP))
  3114  		case C_DCON20S_12S, C_DCON20S_0:
  3115  			o1 = OP_12IRR(c.opirr(AADD), uint32(v), uint32(0), uint32(REGTMP))
  3116  			o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP))
  3117  		case C_DCON12_12U:
  3118  			o1 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(0), uint32(REGTMP))
  3119  			o2 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP))
  3120  		case C_DCON20S_12U:
  3121  			o1 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(0), uint32(REGTMP))
  3122  			o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP))
  3123  		}
  3124  		o3 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg))
  3125  
  3126  	case 72: // add $dcon32_12S,[r1],r2
  3127  		v := c.vregoff(&p.From)
  3128  		r := int(p.Reg)
  3129  		if r == 0 {
  3130  			r = int(p.To.Reg)
  3131  		}
  3132  		contype := c.aclass(&p.From)
  3133  		switch contype {
  3134  		default: // C_DCON32_12S, C_DCON32_0
  3135  			o1 = OP_12IRR(c.opirr(AADD), uint32(v), uint32(0), uint32(REGTMP))
  3136  			o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP))
  3137  			o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP))
  3138  		case C_DCON32_20:
  3139  			o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
  3140  			o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP))
  3141  			o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP))
  3142  		case C_DCON12_32S:
  3143  			o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
  3144  			o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP))
  3145  			o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP))
  3146  		case C_DCON20S_32:
  3147  			o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
  3148  			o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP))
  3149  			o3 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP))
  3150  		case C_DCON32_12U:
  3151  			o1 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(0), uint32(REGTMP))
  3152  			o2 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP))
  3153  			o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP))
  3154  		}
  3155  		o4 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg))
  3156  
  3157  	case 73:
  3158  		v := c.vregoff(&p.To)
  3159  		r := p.To.Reg
  3160  		if v&3 != 0 {
  3161  			c.ctxt.Diag("%v: offset must be a multiple of 4.\n", p)
  3162  		}
  3163  
  3164  		switch o.size {
  3165  		case 4: // 16 bit
  3166  			o1 = OP_14IRR(c.opirr(p.As), uint32(v>>2), uint32(r), uint32(p.From.Reg))
  3167  		case 12: // 32 bit
  3168  			o1 = OP_16IRR(c.opirr(AADDV16), uint32(v>>16), uint32(REG_R0), uint32(REGTMP))
  3169  			o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP))
  3170  			o3 = OP_14IRR(c.opirr(p.As), uint32(v>>2), uint32(REGTMP), uint32(p.From.Reg))
  3171  		case 24: // 64 bit
  3172  			o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
  3173  			o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP))
  3174  			o3 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP))
  3175  			o4 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP))
  3176  			o5 = OP_RRR(c.oprrr(add), uint32(REGTMP), uint32(r), uint32(r))
  3177  			o6 = OP_14IRR(c.opirr(p.As), uint32(0), uint32(r), uint32(p.From.Reg))
  3178  		}
  3179  
  3180  	case 74:
  3181  		v := c.vregoff(&p.From)
  3182  		r := p.From.Reg
  3183  		if v&3 != 0 {
  3184  			c.ctxt.Diag("%v: offset must be a multiple of 4.\n", p)
  3185  		}
  3186  
  3187  		switch o.size {
  3188  		case 4: // 16 bit
  3189  			o1 = OP_14IRR(c.opirr(-p.As), uint32(v>>2), uint32(r), uint32(p.To.Reg))
  3190  		case 12: // 32 bit
  3191  			o1 = OP_16IRR(c.opirr(AADDV16), uint32(v>>16), uint32(REG_R0), uint32(REGTMP))
  3192  			o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP))
  3193  			o3 = OP_14IRR(c.opirr(-p.As), uint32(v>>2), uint32(REGTMP), uint32(p.To.Reg))
  3194  		case 24: // 64 bit
  3195  			o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
  3196  			o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP))
  3197  			o3 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP))
  3198  			o4 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP))
  3199  			o5 = OP_RRR(c.oprrr(add), uint32(REGTMP), uint32(r), uint32(r))
  3200  			o6 = OP_14IRR(c.opirr(p.As), uint32(0), uint32(r), uint32(p.To.Reg))
  3201  		}
  3202  
  3203  	}
  3204  
  3205  	out[0] = o1
  3206  	out[1] = o2
  3207  	out[2] = o3
  3208  	out[3] = o4
  3209  	out[4] = o5
  3210  	out[5] = o6
  3211  }
  3212  
  3213  // checkoperand checks if operand >= 0 && operand <= maxoperand
  3214  func (c *ctxt0) checkoperand(p *obj.Prog, operand uint32, mask uint32) {
  3215  	if (operand & ^mask) != 0 {
  3216  		c.ctxt.Diag("operand out of range 0 to %d: %v", mask, p)
  3217  	}
  3218  }
  3219  
  3220  // checkindex checks if index >= 0 && index <= maxindex
  3221  func (c *ctxt0) checkindex(p *obj.Prog, index uint32, mask uint32) {
  3222  	if (index & ^mask) != 0 {
  3223  		c.ctxt.Diag("register element index out of range 0 to %d: %v", mask, p)
  3224  	}
  3225  }
  3226  
  3227  func (c *ctxt0) vregoff(a *obj.Addr) int64 {
  3228  	c.instoffset = 0
  3229  	c.aclass(a)
  3230  	return c.instoffset
  3231  }
  3232  
  3233  func (c *ctxt0) regoff(a *obj.Addr) int32 {
  3234  	return int32(c.vregoff(a))
  3235  }
  3236  
  3237  func (c *ctxt0) oprrrr(a obj.As) uint32 {
  3238  	op, ok := oprrrr[a]
  3239  	if ok {
  3240  		return op
  3241  	}
  3242  	c.ctxt.Diag("bad rrrr opcode %v", a)
  3243  	return 0
  3244  }
  3245  
  3246  func (c *ctxt0) oprrr(a obj.As) uint32 {
  3247  	op, ok := oprrr[a]
  3248  	if ok {
  3249  		return op
  3250  	}
  3251  	c.ctxt.Diag("bad rrr opcode %v", a)
  3252  	return 0
  3253  }
  3254  
  3255  func (c *ctxt0) oprr(a obj.As) uint32 {
  3256  	op, ok := oprr[a]
  3257  	if ok {
  3258  		return op
  3259  	}
  3260  	c.ctxt.Diag("bad rr opcode %v", a)
  3261  	return 0
  3262  }
  3263  
  3264  func (c *ctxt0) opi(a obj.As) uint32 {
  3265  	op, ok := opi[a]
  3266  	if ok {
  3267  		return op
  3268  	}
  3269  	c.ctxt.Diag("bad i opcode %v", a)
  3270  	return 0
  3271  }
  3272  
  3273  func (c *ctxt0) opir(a obj.As) uint32 {
  3274  	op, ok := opir[a]
  3275  	if ok {
  3276  		return op
  3277  	}
  3278  	c.ctxt.Diag("bad ir opcode %v", a)
  3279  	return 0
  3280  }
  3281  
  3282  func (c *ctxt0) opirr(a obj.As) uint32 {
  3283  	op, ok := opirr[a]
  3284  	if ok {
  3285  		return op
  3286  	}
  3287  	c.ctxt.Diag("bad irr opcode %v", a)
  3288  	return 0
  3289  }
  3290  
  3291  func (c *ctxt0) opirrr(a obj.As) uint32 {
  3292  	op, ok := opirrr[a]
  3293  	if ok {
  3294  		return op
  3295  	}
  3296  	c.ctxt.Diag("bad irrr opcode %v", a)
  3297  	return 0
  3298  }
  3299  
  3300  func (c *ctxt0) opirir(a obj.As) uint32 {
  3301  	op, ok := opirir[a]
  3302  	if ok {
  3303  		return op
  3304  	}
  3305  	c.ctxt.Diag("bad irir opcode %v", a)
  3306  	return 0
  3307  }
  3308  
  3309  func (c *ctxt0) opiir(a obj.As) uint32 {
  3310  	op, ok := opiir[a]
  3311  	if ok {
  3312  		return op
  3313  	}
  3314  	c.ctxt.Diag("bad iir opcode %v", a)
  3315  	return 0
  3316  }
  3317  
  3318  func vshift(a obj.As) bool {
  3319  	switch a {
  3320  	case ASLLV,
  3321  		ASRLV,
  3322  		ASRAV,
  3323  		AROTRV:
  3324  		return true
  3325  	}
  3326  	return false
  3327  }
  3328  

View as plain text