Source file src/cmd/internal/obj/loong64/asm.go

     1  // Copyright 2022 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package loong64
     6  
     7  import (
     8  	"cmd/internal/obj"
     9  	"cmd/internal/objabi"
    10  	"fmt"
    11  	"log"
    12  	"slices"
    13  )
    14  
    15  // ctxt0 holds state while assembling a single function.
    16  // Each function gets a fresh ctxt0.
    17  // This allows for multiple functions to be safely concurrently assembled.
    18  type ctxt0 struct {
    19  	ctxt       *obj.Link
    20  	newprog    obj.ProgAlloc
    21  	cursym     *obj.LSym
    22  	autosize   int32
    23  	instoffset int64
    24  	pc         int64
    25  }
    26  
    27  // Instruction layout.
    28  
    29  const (
    30  	FuncAlign = 4
    31  	loopAlign = 16
    32  )
    33  
    34  type Optab struct {
    35  	as    obj.As
    36  	from1 uint8
    37  	reg   uint8
    38  	from3 uint8
    39  	to1   uint8
    40  	to2   uint8
    41  	type_ int8
    42  	size  int8
    43  	param int16
    44  	flag  uint8
    45  }
    46  
    47  const (
    48  	NOTUSETMP = 1 << iota // p expands to multiple instructions, but does NOT use REGTMP
    49  
    50  	// branchLoopHead marks loop entry.
    51  	// Used to insert padding for under-aligned loops.
    52  	branchLoopHead
    53  )
    54  
    55  var optab = []Optab{
    56  	{obj.ATEXT, C_ADDR, C_NONE, C_NONE, C_TEXTSIZE, C_NONE, 0, 0, 0, 0},
    57  
    58  	{AMOVW, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 1, 4, 0, 0},
    59  	{AMOVV, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 1, 4, 0, 0},
    60  	{AMOVB, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 12, 4, 0, 0},
    61  	{AMOVBU, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 12, 4, 0, 0},
    62  	{AMOVWU, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 12, 4, 0, 0},
    63  
    64  	{ASUB, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    65  	{ASUBV, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    66  	{AADD, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    67  	{AADDV, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    68  	{AAND, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    69  	{ASUB, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    70  	{ASUBV, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    71  	{AADD, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    72  	{AADDV, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    73  	{AAND, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    74  	{ANEGW, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    75  	{ANEGV, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    76  	{AMASKEQZ, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    77  	{ASLL, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    78  	{ASLL, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    79  	{ASLLV, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    80  	{ASLLV, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    81  	{AMUL, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    82  	{AMUL, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    83  	{AMULV, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    84  	{AMULV, C_REG, C_REG, C_NONE, C_REG, C_NONE, 2, 4, 0, 0},
    85  	{AADDF, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 2, 4, 0, 0},
    86  	{AADDF, C_FREG, C_FREG, C_NONE, C_FREG, C_NONE, 2, 4, 0, 0},
    87  	{ACMPEQF, C_FREG, C_FREG, C_NONE, C_FCCREG, C_NONE, 2, 4, 0, 0},
    88  	{AVSEQB, C_VREG, C_VREG, C_NONE, C_VREG, C_NONE, 2, 4, 0, 0},
    89  	{AXVSEQB, C_XREG, C_XREG, C_NONE, C_XREG, C_NONE, 2, 4, 0, 0},
    90  
    91  	{ACLOW, C_REG, C_NONE, C_NONE, C_REG, C_NONE, 9, 4, 0, 0},
    92  	{AABSF, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 9, 4, 0, 0},
    93  	{AMOVVF, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 9, 4, 0, 0},
    94  	{AMOVF, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 9, 4, 0, 0},
    95  	{AMOVD, C_FREG, C_NONE, C_NONE, C_FREG, C_NONE, 9, 4, 0, 0},
    96  	{AVPCNTB, C_VREG, C_NONE, C_NONE, C_VREG, C_NONE, 9, 4, 0, 0},
    97  	{AXVPCNTB, C_XREG, C_NONE, C_NONE, C_XREG, C_NONE, 9, 4, 0, 0},
    98  
    99  	{AFMADDF, C_FREG, C_FREG, C_NONE, C_FREG, C_NONE, 37, 4, 0, 0},
   100  	{AFMADDF, C_FREG, C_FREG, C_FREG, C_FREG, C_NONE, 37, 4, 0, 0},
   101  
   102  	{AMOVW, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0},
   103  	{AMOVWU, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0},
   104  	{AMOVV, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0},
   105  	{AMOVB, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0},
   106  	{AMOVBU, C_REG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGSP, 0},
   107  	{AMOVW, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0},
   108  	{AMOVWU, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0},
   109  	{AMOVV, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0},
   110  	{AMOVB, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0},
   111  	{AMOVBU, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0},
   112  	{AVMOVQ, C_VREG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0},
   113  	{AXVMOVQ, C_XREG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0},
   114  	{AVMOVQ, C_VREG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGZERO, 0},
   115  	{AXVMOVQ, C_XREG, C_NONE, C_NONE, C_SAUTO, C_NONE, 7, 4, REGZERO, 0},
   116  	{ASC, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0},
   117  	{ASCV, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 7, 4, REGZERO, 0},
   118  
   119  	{AMOVW, C_SAUTO, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0},
   120  	{AMOVWU, C_SAUTO, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0},
   121  	{AMOVV, C_SAUTO, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0},
   122  	{AMOVB, C_SAUTO, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0},
   123  	{AMOVBU, C_SAUTO, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGSP, 0},
   124  	{AMOVW, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0},
   125  	{AMOVWU, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0},
   126  	{AMOVV, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0},
   127  	{AMOVB, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0},
   128  	{AMOVBU, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0},
   129  	{AVMOVQ, C_SOREG, C_NONE, C_NONE, C_VREG, C_NONE, 8, 4, REGZERO, 0},
   130  	{AXVMOVQ, C_SOREG, C_NONE, C_NONE, C_XREG, C_NONE, 8, 4, REGZERO, 0},
   131  	{AVMOVQ, C_SAUTO, C_NONE, C_NONE, C_VREG, C_NONE, 8, 4, REGZERO, 0},
   132  	{AXVMOVQ, C_SAUTO, C_NONE, C_NONE, C_XREG, C_NONE, 8, 4, REGZERO, 0},
   133  	{ALL, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0},
   134  	{ALLV, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 8, 4, REGZERO, 0},
   135  
   136  	{AMOVW, C_REG, C_NONE, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0},
   137  	{AMOVWU, C_REG, C_NONE, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0},
   138  	{AMOVV, C_REG, C_NONE, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0},
   139  	{AMOVB, C_REG, C_NONE, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0},
   140  	{AMOVBU, C_REG, C_NONE, C_NONE, C_LAUTO, C_NONE, 35, 12, REGSP, 0},
   141  	{AMOVW, C_REG, C_NONE, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0},
   142  	{AMOVWU, C_REG, C_NONE, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0},
   143  	{AMOVV, C_REG, C_NONE, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0},
   144  	{AMOVB, C_REG, C_NONE, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0},
   145  	{AMOVBU, C_REG, C_NONE, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0},
   146  	{ASC, C_REG, C_NONE, C_NONE, C_LOREG, C_NONE, 35, 12, REGZERO, 0},
   147  	{AMOVW, C_REG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0},
   148  	{AMOVWU, C_REG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0},
   149  	{AMOVV, C_REG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0},
   150  	{AMOVB, C_REG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0},
   151  	{AMOVBU, C_REG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0},
   152  	{AMOVW, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0},
   153  	{AMOVWU, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0},
   154  	{AMOVV, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0},
   155  	{AMOVB, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0},
   156  	{AMOVBU, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0},
   157  
   158  	{AMOVW, C_LAUTO, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0},
   159  	{AMOVWU, C_LAUTO, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0},
   160  	{AMOVV, C_LAUTO, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0},
   161  	{AMOVB, C_LAUTO, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0},
   162  	{AMOVBU, C_LAUTO, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0},
   163  	{AMOVW, C_LOREG, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0},
   164  	{AMOVWU, C_LOREG, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0},
   165  	{AMOVV, C_LOREG, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0},
   166  	{AMOVB, C_LOREG, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0},
   167  	{AMOVBU, C_LOREG, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGZERO, 0},
   168  	{AMOVW, C_ADDR, C_NONE, C_NONE, C_REG, C_NONE, 51, 8, 0, 0},
   169  	{AMOVWU, C_ADDR, C_NONE, C_NONE, C_REG, C_NONE, 51, 8, 0, 0},
   170  	{AMOVV, C_ADDR, C_NONE, C_NONE, C_REG, C_NONE, 51, 8, 0, 0},
   171  	{AMOVB, C_ADDR, C_NONE, C_NONE, C_REG, C_NONE, 51, 8, 0, 0},
   172  	{AMOVBU, C_ADDR, C_NONE, C_NONE, C_REG, C_NONE, 51, 8, 0, 0},
   173  	{AMOVW, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0},
   174  	{AMOVWU, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0},
   175  	{AMOVV, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0},
   176  	{AMOVB, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0},
   177  	{AMOVBU, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0},
   178  
   179  	{AMOVW, C_SACON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGSP, 0},
   180  	{AMOVV, C_SACON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGSP, 0},
   181  	{AMOVW, C_EXTADDR, C_NONE, C_NONE, C_REG, C_NONE, 52, 8, 0, NOTUSETMP},
   182  	{AMOVV, C_EXTADDR, C_NONE, C_NONE, C_REG, C_NONE, 52, 8, 0, NOTUSETMP},
   183  
   184  	{AMOVW, C_LACON, C_NONE, C_NONE, C_REG, C_NONE, 26, 12, REGSP, 0},
   185  	{AMOVV, C_LACON, C_NONE, C_NONE, C_REG, C_NONE, 26, 12, REGSP, 0},
   186  	{AMOVW, C_ADDCON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0},
   187  	{AMOVV, C_ADDCON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0},
   188  	{AMOVW, C_ANDCON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0},
   189  	{AMOVV, C_ANDCON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGZERO, 0},
   190  
   191  	{AMOVW, C_UCON, C_NONE, C_NONE, C_REG, C_NONE, 24, 4, 0, 0},
   192  	{AMOVV, C_UCON, C_NONE, C_NONE, C_REG, C_NONE, 24, 4, 0, 0},
   193  	{AMOVW, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 19, 8, 0, NOTUSETMP},
   194  	{AMOVV, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 19, 8, 0, NOTUSETMP},
   195  	{AMOVV, C_DCON, C_NONE, C_NONE, C_REG, C_NONE, 59, 16, 0, NOTUSETMP},
   196  
   197  	{AADD, C_ADD0CON, C_REG, C_NONE, C_REG, C_NONE, 4, 4, 0, 0},
   198  	{AADD, C_ADD0CON, C_NONE, C_NONE, C_REG, C_NONE, 4, 4, 0, 0},
   199  	{AADD, C_ANDCON, C_REG, C_NONE, C_REG, C_NONE, 10, 8, 0, 0},
   200  	{AADD, C_ANDCON, C_NONE, C_NONE, C_REG, C_NONE, 10, 8, 0, 0},
   201  
   202  	{AADDV, C_ADD0CON, C_REG, C_NONE, C_REG, C_NONE, 4, 4, 0, 0},
   203  	{AADDV, C_ADD0CON, C_NONE, C_NONE, C_REG, C_NONE, 4, 4, 0, 0},
   204  	{AADDV, C_ANDCON, C_REG, C_NONE, C_REG, C_NONE, 10, 8, 0, 0},
   205  	{AADDV, C_ANDCON, C_NONE, C_NONE, C_REG, C_NONE, 10, 8, 0, 0},
   206  
   207  	{AAND, C_AND0CON, C_REG, C_NONE, C_REG, C_NONE, 4, 4, 0, 0},
   208  	{AAND, C_AND0CON, C_NONE, C_NONE, C_REG, C_NONE, 4, 4, 0, 0},
   209  	{AAND, C_ADDCON, C_REG, C_NONE, C_REG, C_NONE, 10, 8, 0, 0},
   210  	{AAND, C_ADDCON, C_NONE, C_NONE, C_REG, C_NONE, 10, 8, 0, 0},
   211  
   212  	{AADD, C_UCON, C_REG, C_NONE, C_REG, C_NONE, 25, 8, 0, 0},
   213  	{AADD, C_UCON, C_NONE, C_NONE, C_REG, C_NONE, 25, 8, 0, 0},
   214  	{AADDV, C_UCON, C_REG, C_NONE, C_REG, C_NONE, 25, 8, 0, 0},
   215  	{AADDV, C_UCON, C_NONE, C_NONE, C_REG, C_NONE, 25, 8, 0, 0},
   216  	{AAND, C_UCON, C_REG, C_NONE, C_REG, C_NONE, 25, 8, 0, 0},
   217  	{AAND, C_UCON, C_NONE, C_NONE, C_REG, C_NONE, 25, 8, 0, 0},
   218  
   219  	{AADD, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 23, 12, 0, 0},
   220  	{AADDV, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 23, 12, 0, 0},
   221  	{AAND, C_LCON, C_NONE, C_NONE, C_REG, C_NONE, 23, 12, 0, 0},
   222  	{AADD, C_LCON, C_REG, C_NONE, C_REG, C_NONE, 23, 12, 0, 0},
   223  	{AADDV, C_LCON, C_REG, C_NONE, C_REG, C_NONE, 23, 12, 0, 0},
   224  	{AAND, C_LCON, C_REG, C_NONE, C_REG, C_NONE, 23, 12, 0, 0},
   225  
   226  	{AADDV, C_DCON, C_NONE, C_NONE, C_REG, C_NONE, 60, 20, 0, 0},
   227  	{AADDV, C_DCON, C_REG, C_NONE, C_REG, C_NONE, 60, 20, 0, 0},
   228  
   229  	{ASLL, C_SCON, C_REG, C_NONE, C_REG, C_NONE, 16, 4, 0, 0},
   230  	{ASLL, C_SCON, C_NONE, C_NONE, C_REG, C_NONE, 16, 4, 0, 0},
   231  
   232  	{ASLLV, C_SCON, C_REG, C_NONE, C_REG, C_NONE, 16, 4, 0, 0},
   233  	{ASLLV, C_SCON, C_NONE, C_NONE, C_REG, C_NONE, 16, 4, 0, 0},
   234  
   235  	{ABSTRPICKW, C_SCON, C_REG, C_SCON, C_REG, C_NONE, 17, 4, 0, 0},
   236  	{ABSTRPICKW, C_SCON, C_REG, C_ZCON, C_REG, C_NONE, 17, 4, 0, 0},
   237  	{ABSTRPICKW, C_ZCON, C_REG, C_ZCON, C_REG, C_NONE, 17, 4, 0, 0},
   238  
   239  	{ASYSCALL, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0},
   240  	{ASYSCALL, C_ANDCON, C_NONE, C_NONE, C_NONE, C_NONE, 5, 4, 0, 0},
   241  
   242  	{ABEQ, C_REG, C_REG, C_NONE, C_BRAN, C_NONE, 6, 4, 0, 0},
   243  	{ABEQ, C_REG, C_NONE, C_NONE, C_BRAN, C_NONE, 6, 4, 0, 0},
   244  	{ABLEZ, C_REG, C_NONE, C_NONE, C_BRAN, C_NONE, 6, 4, 0, 0},
   245  	{ABFPT, C_NONE, C_NONE, C_NONE, C_BRAN, C_NONE, 6, 4, 0, NOTUSETMP},
   246  
   247  	{AJMP, C_NONE, C_NONE, C_NONE, C_BRAN, C_NONE, 11, 4, 0, 0}, // b
   248  	{AJAL, C_NONE, C_NONE, C_NONE, C_BRAN, C_NONE, 11, 4, 0, 0}, // bl
   249  
   250  	{AJMP, C_NONE, C_NONE, C_NONE, C_ZOREG, C_NONE, 18, 4, REGZERO, 0}, // jirl r0, rj, 0
   251  	{AJAL, C_NONE, C_NONE, C_NONE, C_ZOREG, C_NONE, 18, 4, REGLINK, 0}, // jirl r1, rj, 0
   252  
   253  	{AMOVF, C_SAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 27, 4, REGSP, 0},
   254  	{AMOVD, C_SAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 27, 4, REGSP, 0},
   255  	{AMOVF, C_SOREG, C_NONE, C_NONE, C_FREG, C_NONE, 27, 4, REGZERO, 0},
   256  	{AMOVD, C_SOREG, C_NONE, C_NONE, C_FREG, C_NONE, 27, 4, REGZERO, 0},
   257  
   258  	{AMOVF, C_LAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 27, 12, REGSP, 0},
   259  	{AMOVD, C_LAUTO, C_NONE, C_NONE, C_FREG, C_NONE, 27, 12, REGSP, 0},
   260  	{AMOVF, C_LOREG, C_NONE, C_NONE, C_FREG, C_NONE, 27, 12, REGZERO, 0},
   261  	{AMOVD, C_LOREG, C_NONE, C_NONE, C_FREG, C_NONE, 27, 12, REGZERO, 0},
   262  	{AMOVF, C_ADDR, C_NONE, C_NONE, C_FREG, C_NONE, 51, 8, 0, 0},
   263  	{AMOVD, C_ADDR, C_NONE, C_NONE, C_FREG, C_NONE, 51, 8, 0, 0},
   264  
   265  	{AMOVF, C_FREG, C_NONE, C_NONE, C_SAUTO, C_NONE, 28, 4, REGSP, 0},
   266  	{AMOVD, C_FREG, C_NONE, C_NONE, C_SAUTO, C_NONE, 28, 4, REGSP, 0},
   267  	{AMOVF, C_FREG, C_NONE, C_NONE, C_SOREG, C_NONE, 28, 4, REGZERO, 0},
   268  	{AMOVD, C_FREG, C_NONE, C_NONE, C_SOREG, C_NONE, 28, 4, REGZERO, 0},
   269  
   270  	{AMOVF, C_FREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 28, 12, REGSP, 0},
   271  	{AMOVD, C_FREG, C_NONE, C_NONE, C_LAUTO, C_NONE, 28, 12, REGSP, 0},
   272  	{AMOVF, C_FREG, C_NONE, C_NONE, C_LOREG, C_NONE, 28, 12, REGZERO, 0},
   273  	{AMOVD, C_FREG, C_NONE, C_NONE, C_LOREG, C_NONE, 28, 12, REGZERO, 0},
   274  	{AMOVF, C_FREG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0},
   275  	{AMOVD, C_FREG, C_NONE, C_NONE, C_ADDR, C_NONE, 50, 8, 0, 0},
   276  
   277  	{AMOVW, C_REG, C_NONE, C_NONE, C_FREG, C_NONE, 30, 4, 0, 0},
   278  	{AMOVV, C_REG, C_NONE, C_NONE, C_FREG, C_NONE, 30, 4, 0, 0},
   279  	{AMOVW, C_FREG, C_NONE, C_NONE, C_REG, C_NONE, 30, 4, 0, 0},
   280  	{AMOVV, C_FREG, C_NONE, C_NONE, C_REG, C_NONE, 30, 4, 0, 0},
   281  	{AMOVV, C_FCCREG, C_NONE, C_NONE, C_REG, C_NONE, 30, 4, 0, 0},
   282  	{AMOVV, C_FCSRREG, C_NONE, C_NONE, C_REG, C_NONE, 30, 4, 0, 0},
   283  	{AMOVV, C_REG, C_NONE, C_NONE, C_FCCREG, C_NONE, 30, 4, 0, 0},
   284  	{AMOVV, C_REG, C_NONE, C_NONE, C_FCSRREG, C_NONE, 30, 4, 0, 0},
   285  	{AMOVV, C_FREG, C_NONE, C_NONE, C_FCCREG, C_NONE, 30, 4, 0, 0},
   286  	{AMOVV, C_FCCREG, C_NONE, C_NONE, C_FREG, C_NONE, 30, 4, 0, 0},
   287  
   288  	{AMOVW, C_ADDCON, C_NONE, C_NONE, C_FREG, C_NONE, 34, 8, 0, 0},
   289  	{AMOVW, C_ANDCON, C_NONE, C_NONE, C_FREG, C_NONE, 34, 8, 0, 0},
   290  
   291  	{AMOVB, C_REG, C_NONE, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, 0},
   292  	{AMOVW, C_REG, C_NONE, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, 0},
   293  	{AMOVV, C_REG, C_NONE, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, 0},
   294  	{AMOVBU, C_REG, C_NONE, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, 0},
   295  	{AMOVWU, C_REG, C_NONE, C_NONE, C_TLS_IE, C_NONE, 56, 16, 0, 0},
   296  
   297  	{AMOVB, C_TLS_IE, C_NONE, C_NONE, C_REG, C_NONE, 57, 16, 0, 0},
   298  	{AMOVW, C_TLS_IE, C_NONE, C_NONE, C_REG, C_NONE, 57, 16, 0, 0},
   299  	{AMOVV, C_TLS_IE, C_NONE, C_NONE, C_REG, C_NONE, 57, 16, 0, 0},
   300  	{AMOVBU, C_TLS_IE, C_NONE, C_NONE, C_REG, C_NONE, 57, 16, 0, 0},
   301  	{AMOVWU, C_TLS_IE, C_NONE, C_NONE, C_REG, C_NONE, 57, 16, 0, 0},
   302  
   303  	{AWORD, C_LCON, C_NONE, C_NONE, C_NONE, C_NONE, 38, 4, 0, 0},
   304  	{AWORD, C_DCON, C_NONE, C_NONE, C_NONE, C_NONE, 61, 4, 0, 0},
   305  
   306  	{AMOVV, C_GOTADDR, C_NONE, C_NONE, C_REG, C_NONE, 65, 8, 0, 0},
   307  
   308  	{ATEQ, C_SCON, C_REG, C_NONE, C_REG, C_NONE, 15, 8, 0, 0},
   309  	{ATEQ, C_SCON, C_NONE, C_NONE, C_REG, C_NONE, 15, 8, 0, 0},
   310  
   311  	{ARDTIMELW, C_NONE, C_NONE, C_NONE, C_REG, C_REG, 62, 4, 0, 0},
   312  	{AAMSWAPW, C_REG, C_NONE, C_NONE, C_ZOREG, C_REG, 66, 4, 0, 0},
   313  	{ANOOP, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0},
   314  
   315  	/* store with extended register offset */
   316  	{AMOVB, C_REG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0},
   317  	{AMOVW, C_REG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0},
   318  	{AMOVV, C_REG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0},
   319  	{AMOVF, C_FREG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0},
   320  	{AMOVD, C_FREG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0},
   321  	{AVMOVQ, C_VREG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0},
   322  	{AXVMOVQ, C_XREG, C_NONE, C_NONE, C_ROFF, C_NONE, 20, 4, 0, 0},
   323  
   324  	/* load with extended register offset */
   325  	{AMOVB, C_ROFF, C_NONE, C_NONE, C_REG, C_NONE, 21, 4, 0, 0},
   326  	{AMOVBU, C_ROFF, C_NONE, C_NONE, C_REG, C_NONE, 21, 4, 0, 0},
   327  	{AMOVW, C_ROFF, C_NONE, C_NONE, C_REG, C_NONE, 21, 4, 0, 0},
   328  	{AMOVWU, C_ROFF, C_NONE, C_NONE, C_REG, C_NONE, 21, 4, 0, 0},
   329  	{AMOVV, C_ROFF, C_NONE, C_NONE, C_REG, C_NONE, 21, 4, 0, 0},
   330  	{AMOVF, C_ROFF, C_NONE, C_NONE, C_FREG, C_NONE, 21, 4, 0, 0},
   331  	{AMOVD, C_ROFF, C_NONE, C_NONE, C_FREG, C_NONE, 21, 4, 0, 0},
   332  	{AVMOVQ, C_ROFF, C_NONE, C_NONE, C_VREG, C_NONE, 21, 4, 0, 0},
   333  	{AXVMOVQ, C_ROFF, C_NONE, C_NONE, C_XREG, C_NONE, 21, 4, 0, 0},
   334  
   335  	{AVMOVQ, C_REG, C_NONE, C_NONE, C_ELEM, C_NONE, 39, 4, 0, 0},
   336  	{AVMOVQ, C_ELEM, C_NONE, C_NONE, C_REG, C_NONE, 40, 4, 0, 0},
   337  	{AXVMOVQ, C_REG, C_NONE, C_NONE, C_ELEM, C_NONE, 39, 4, 0, 0},
   338  	{AXVMOVQ, C_ELEM, C_NONE, C_NONE, C_REG, C_NONE, 40, 4, 0, 0},
   339  
   340  	{AXVMOVQ, C_XREG, C_NONE, C_NONE, C_ELEM, C_NONE, 43, 4, 0, 0},
   341  	{AXVMOVQ, C_ELEM, C_NONE, C_NONE, C_XREG, C_NONE, 44, 4, 0, 0},
   342  
   343  	{AVMOVQ, C_REG, C_NONE, C_NONE, C_ARNG, C_NONE, 41, 4, 0, 0},
   344  	{AXVMOVQ, C_REG, C_NONE, C_NONE, C_ARNG, C_NONE, 41, 4, 0, 0},
   345  	{AXVMOVQ, C_XREG, C_NONE, C_NONE, C_ARNG, C_NONE, 42, 4, 0, 0},
   346  
   347  	{AVMOVQ, C_ELEM, C_NONE, C_NONE, C_ARNG, C_NONE, 45, 4, 0, 0},
   348  
   349  	{obj.APCALIGN, C_SCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0},
   350  	{obj.APCDATA, C_LCON, C_NONE, C_NONE, C_LCON, C_NONE, 0, 0, 0, 0},
   351  	{obj.APCDATA, C_DCON, C_NONE, C_NONE, C_DCON, C_NONE, 0, 0, 0, 0},
   352  	{obj.AFUNCDATA, C_SCON, C_NONE, C_NONE, C_ADDR, C_NONE, 0, 0, 0, 0},
   353  	{obj.ANOP, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0},
   354  	{obj.ANOP, C_LCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, // nop variants, see #40689
   355  	{obj.ANOP, C_DCON, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0}, // nop variants, see #40689
   356  	{obj.ANOP, C_REG, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0},
   357  	{obj.ANOP, C_FREG, C_NONE, C_NONE, C_NONE, C_NONE, 0, 0, 0, 0},
   358  	{obj.ADUFFZERO, C_NONE, C_NONE, C_NONE, C_BRAN, C_NONE, 11, 4, 0, 0}, // same as AJMP
   359  	{obj.ADUFFCOPY, C_NONE, C_NONE, C_NONE, C_BRAN, C_NONE, 11, 4, 0, 0}, // same as AJMP
   360  
   361  	{obj.AXXX, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 0, 4, 0, 0},
   362  }
   363  
   364  var atomicInst = map[obj.As]uint32{
   365  	AAMSWAPB:   0x070B8 << 15, // amswap.b
   366  	AAMSWAPH:   0x070B9 << 15, // amswap.h
   367  	AAMSWAPW:   0x070C0 << 15, // amswap.w
   368  	AAMSWAPV:   0x070C1 << 15, // amswap.d
   369  	AAMCASB:    0x070B0 << 15, // amcas.b
   370  	AAMCASH:    0x070B1 << 15, // amcas.h
   371  	AAMCASW:    0x070B2 << 15, // amcas.w
   372  	AAMCASV:    0x070B3 << 15, // amcas.d
   373  	AAMADDW:    0x070C2 << 15, // amadd.w
   374  	AAMADDV:    0x070C3 << 15, // amadd.d
   375  	AAMANDW:    0x070C4 << 15, // amand.w
   376  	AAMANDV:    0x070C5 << 15, // amand.d
   377  	AAMORW:     0x070C6 << 15, // amor.w
   378  	AAMORV:     0x070C7 << 15, // amor.d
   379  	AAMXORW:    0x070C8 << 15, // amxor.w
   380  	AAMXORV:    0x070C9 << 15, // amxor.d
   381  	AAMMAXW:    0x070CA << 15, // ammax.w
   382  	AAMMAXV:    0x070CB << 15, // ammax.d
   383  	AAMMINW:    0x070CC << 15, // ammin.w
   384  	AAMMINV:    0x070CD << 15, // ammin.d
   385  	AAMMAXWU:   0x070CE << 15, // ammax.wu
   386  	AAMMAXVU:   0x070CF << 15, // ammax.du
   387  	AAMMINWU:   0x070D0 << 15, // ammin.wu
   388  	AAMMINVU:   0x070D1 << 15, // ammin.du
   389  	AAMSWAPDBB: 0x070BC << 15, // amswap_db.b
   390  	AAMSWAPDBH: 0x070BD << 15, // amswap_db.h
   391  	AAMSWAPDBW: 0x070D2 << 15, // amswap_db.w
   392  	AAMSWAPDBV: 0x070D3 << 15, // amswap_db.d
   393  	AAMCASDBB:  0x070B4 << 15, // amcas_db.b
   394  	AAMCASDBH:  0x070B5 << 15, // amcas_db.h
   395  	AAMCASDBW:  0x070B6 << 15, // amcas_db.w
   396  	AAMCASDBV:  0x070B7 << 15, // amcas_db.d
   397  	AAMADDDBW:  0x070D4 << 15, // amadd_db.w
   398  	AAMADDDBV:  0x070D5 << 15, // amadd_db.d
   399  	AAMANDDBW:  0x070D6 << 15, // amand_db.w
   400  	AAMANDDBV:  0x070D7 << 15, // amand_db.d
   401  	AAMORDBW:   0x070D8 << 15, // amor_db.w
   402  	AAMORDBV:   0x070D9 << 15, // amor_db.d
   403  	AAMXORDBW:  0x070DA << 15, // amxor_db.w
   404  	AAMXORDBV:  0x070DB << 15, // amxor_db.d
   405  	AAMMAXDBW:  0x070DC << 15, // ammax_db.w
   406  	AAMMAXDBV:  0x070DD << 15, // ammax_db.d
   407  	AAMMINDBW:  0x070DE << 15, // ammin_db.w
   408  	AAMMINDBV:  0x070DF << 15, // ammin_db.d
   409  	AAMMAXDBWU: 0x070E0 << 15, // ammax_db.wu
   410  	AAMMAXDBVU: 0x070E1 << 15, // ammax_db.du
   411  	AAMMINDBWU: 0x070E2 << 15, // ammin_db.wu
   412  	AAMMINDBVU: 0x070E3 << 15, // ammin_db.du
   413  }
   414  
   415  func IsAtomicInst(as obj.As) bool {
   416  	_, ok := atomicInst[as]
   417  
   418  	return ok
   419  }
   420  
   421  // pcAlignPadLength returns the number of bytes required to align pc to alignedValue,
   422  // reporting an error if alignedValue is not a power of two or is out of range.
   423  func pcAlignPadLength(ctxt *obj.Link, pc int64, alignedValue int64) int {
   424  	if !((alignedValue&(alignedValue-1) == 0) && 8 <= alignedValue && alignedValue <= 2048) {
   425  		ctxt.Diag("alignment value of an instruction must be a power of two and in the range [8, 2048], got %d\n", alignedValue)
   426  	}
   427  	return int(-pc & (alignedValue - 1))
   428  }
   429  
   430  var oprange [ALAST & obj.AMask][]Optab
   431  
   432  var xcmp [C_NCLASS][C_NCLASS]bool
   433  
   434  func span0(ctxt *obj.Link, cursym *obj.LSym, newprog obj.ProgAlloc) {
   435  	if ctxt.Retpoline {
   436  		ctxt.Diag("-spectre=ret not supported on loong64")
   437  		ctxt.Retpoline = false // don't keep printing
   438  	}
   439  
   440  	p := cursym.Func().Text
   441  	if p == nil || p.Link == nil { // handle external functions and ELF section symbols
   442  		return
   443  	}
   444  
   445  	c := ctxt0{ctxt: ctxt, newprog: newprog, cursym: cursym, autosize: int32(p.To.Offset + ctxt.Arch.FixedFrameSize)}
   446  
   447  	if oprange[AOR&obj.AMask] == nil {
   448  		c.ctxt.Diag("loong64 ops not initialized, call loong64.buildop first")
   449  	}
   450  
   451  	pc := int64(0)
   452  	p.Pc = pc
   453  
   454  	var m int
   455  	var o *Optab
   456  	for p = p.Link; p != nil; p = p.Link {
   457  		p.Pc = pc
   458  		o = c.oplook(p)
   459  		m = int(o.size)
   460  		if m == 0 {
   461  			switch p.As {
   462  			case obj.APCALIGN:
   463  				alignedValue := p.From.Offset
   464  				m = pcAlignPadLength(ctxt, pc, alignedValue)
   465  				// Update the current text symbol alignment value.
   466  				if int32(alignedValue) > cursym.Func().Align {
   467  					cursym.Func().Align = int32(alignedValue)
   468  				}
   469  				break
   470  			case obj.ANOP, obj.AFUNCDATA, obj.APCDATA:
   471  				continue
   472  			default:
   473  				c.ctxt.Diag("zero-width instruction\n%v", p)
   474  			}
   475  		}
   476  
   477  		pc += int64(m)
   478  	}
   479  
   480  	c.cursym.Size = pc
   481  
   482  	// mark loop entry instructions for padding
   483  	// loop entrances are defined as targets of backward branches
   484  	for p = c.cursym.Func().Text.Link; p != nil; p = p.Link {
   485  		if q := p.To.Target(); q != nil && q.Pc < p.Pc {
   486  			q.Mark |= branchLoopHead
   487  		}
   488  	}
   489  
   490  	// Run these passes until convergence.
   491  	for {
   492  		rescan := false
   493  		pc = 0
   494  		prev := c.cursym.Func().Text
   495  		for p = prev.Link; p != nil; prev, p = p, p.Link {
   496  			p.Pc = pc
   497  			o = c.oplook(p)
   498  
   499  			// Prepend a PCALIGN $loopAlign to each of the loop heads
   500  			// that need padding, if not already done so (because this
   501  			// pass may execute more than once).
   502  			//
   503  			// This needs to come before any pass that look at pc,
   504  			// because pc will be adjusted if padding happens.
   505  			if p.Mark&branchLoopHead != 0 && pc&(loopAlign-1) != 0 &&
   506  				!(prev.As == obj.APCALIGN && prev.From.Offset >= loopAlign) {
   507  				q := c.newprog()
   508  				prev.Link = q
   509  				q.Link = p
   510  				q.Pc = pc
   511  				q.As = obj.APCALIGN
   512  				q.From.Type = obj.TYPE_CONST
   513  				q.From.Offset = loopAlign
   514  				// Don't associate the synthesized PCALIGN with
   515  				// the original source position, for deterministic
   516  				// mapping between source and corresponding asm.
   517  				// q.Pos = p.Pos
   518  
   519  				// Manually make the PCALIGN come into effect,
   520  				// since this loop iteration is for p.
   521  				pc += int64(pcAlignPadLength(ctxt, pc, loopAlign))
   522  				p.Pc = pc
   523  				rescan = true
   524  			}
   525  
   526  			// very large conditional branches
   527  			//
   528  			// if any procedure is large enough to generate a large SBRA branch, then
   529  			// generate extra passes putting branches around jmps to fix. this is rare.
   530  			if o.type_ == 6 && p.To.Target() != nil {
   531  				otxt := p.To.Target().Pc - pc
   532  
   533  				// On loong64, the immediate value field of the conditional branch instructions
   534  				// BFPT and BFPT is 21 bits, and the others are 16 bits. The jump target address
   535  				// is to logically shift the immediate value in the instruction code to the left
   536  				// by 2 bits and then sign extend.
   537  				bound := int64(1 << (18 - 1))
   538  
   539  				switch p.As {
   540  				case ABFPT, ABFPF:
   541  					bound = int64(1 << (23 - 1))
   542  				}
   543  
   544  				if otxt < -bound || otxt >= bound {
   545  					q := c.newprog()
   546  					q.Link = p.Link
   547  					p.Link = q
   548  					q.As = AJMP
   549  					q.Pos = p.Pos
   550  					q.To.Type = obj.TYPE_BRANCH
   551  					q.To.SetTarget(p.To.Target())
   552  					p.To.SetTarget(q)
   553  					q = c.newprog()
   554  					q.Link = p.Link
   555  					p.Link = q
   556  					q.As = AJMP
   557  					q.Pos = p.Pos
   558  					q.To.Type = obj.TYPE_BRANCH
   559  					q.To.SetTarget(q.Link.Link)
   560  					rescan = true
   561  				}
   562  			}
   563  
   564  			m = int(o.size)
   565  			if m == 0 {
   566  				switch p.As {
   567  				case obj.APCALIGN:
   568  					alignedValue := p.From.Offset
   569  					m = pcAlignPadLength(ctxt, pc, alignedValue)
   570  					break
   571  				case obj.ANOP, obj.AFUNCDATA, obj.APCDATA:
   572  					continue
   573  				default:
   574  					c.ctxt.Diag("zero-width instruction\n%v", p)
   575  				}
   576  			}
   577  
   578  			pc += int64(m)
   579  		}
   580  
   581  		c.cursym.Size = pc
   582  
   583  		if !rescan {
   584  			break
   585  		}
   586  	}
   587  
   588  	pc += -pc & (FuncAlign - 1)
   589  	c.cursym.Size = pc
   590  
   591  	// lay out the code, emitting code and data relocations.
   592  
   593  	c.cursym.Grow(c.cursym.Size)
   594  
   595  	bp := c.cursym.P
   596  	var i int32
   597  	var out [5]uint32
   598  	for p := c.cursym.Func().Text.Link; p != nil; p = p.Link {
   599  		c.pc = p.Pc
   600  		o = c.oplook(p)
   601  		if int(o.size) > 4*len(out) {
   602  			log.Fatalf("out array in span0 is too small, need at least %d for %v", o.size/4, p)
   603  		}
   604  		if p.As == obj.APCALIGN {
   605  			alignedValue := p.From.Offset
   606  			v := pcAlignPadLength(c.ctxt, p.Pc, alignedValue)
   607  			for i = 0; i < int32(v/4); i++ {
   608  				// emit ANOOP instruction by the padding size
   609  				c.ctxt.Arch.ByteOrder.PutUint32(bp, OP_12IRR(c.opirr(AAND), 0, 0, 0))
   610  				bp = bp[4:]
   611  			}
   612  			continue
   613  		}
   614  		c.asmout(p, o, out[:])
   615  		for i = 0; i < int32(o.size/4); i++ {
   616  			c.ctxt.Arch.ByteOrder.PutUint32(bp, out[i])
   617  			bp = bp[4:]
   618  		}
   619  	}
   620  
   621  	// Mark nonpreemptible instruction sequences.
   622  	// We use REGTMP as a scratch register during call injection,
   623  	// so instruction sequences that use REGTMP are unsafe to
   624  	// preempt asynchronously.
   625  	obj.MarkUnsafePoints(c.ctxt, c.cursym.Func().Text, c.newprog, c.isUnsafePoint, c.isRestartable)
   626  }
   627  
   628  // isUnsafePoint returns whether p is an unsafe point.
   629  func (c *ctxt0) isUnsafePoint(p *obj.Prog) bool {
   630  	// If p explicitly uses REGTMP, it's unsafe to preempt, because the
   631  	// preemption sequence clobbers REGTMP.
   632  	return p.From.Reg == REGTMP || p.To.Reg == REGTMP || p.Reg == REGTMP
   633  }
   634  
   635  // isRestartable returns whether p is a multi-instruction sequence that,
   636  // if preempted, can be restarted.
   637  func (c *ctxt0) isRestartable(p *obj.Prog) bool {
   638  	if c.isUnsafePoint(p) {
   639  		return false
   640  	}
   641  	// If p is a multi-instruction sequence with uses REGTMP inserted by
   642  	// the assembler in order to materialize a large constant/offset, we
   643  	// can restart p (at the start of the instruction sequence), recompute
   644  	// the content of REGTMP, upon async preemption. Currently, all cases
   645  	// of assembler-inserted REGTMP fall into this category.
   646  	// If p doesn't use REGTMP, it can be simply preempted, so we don't
   647  	// mark it.
   648  	o := c.oplook(p)
   649  	return o.size > 4 && o.flag&NOTUSETMP == 0
   650  }
   651  
   652  func isint32(v int64) bool {
   653  	return int64(int32(v)) == v
   654  }
   655  
   656  func isuint32(v uint64) bool {
   657  	return uint64(uint32(v)) == v
   658  }
   659  
   660  func (c *ctxt0) aclass(a *obj.Addr) int {
   661  	switch a.Type {
   662  	case obj.TYPE_NONE:
   663  		return C_NONE
   664  
   665  	case obj.TYPE_REG:
   666  		return c.rclass(a.Reg)
   667  
   668  	case obj.TYPE_MEM:
   669  		switch a.Name {
   670  		case obj.NAME_EXTERN,
   671  			obj.NAME_STATIC:
   672  			if a.Sym == nil {
   673  				break
   674  			}
   675  			c.instoffset = a.Offset
   676  			if a.Sym.Type == objabi.STLSBSS {
   677  				if c.ctxt.Flag_shared {
   678  					return C_TLS_IE
   679  				} else {
   680  					return C_TLS_LE
   681  				}
   682  			}
   683  			return C_ADDR
   684  
   685  		case obj.NAME_AUTO:
   686  			if a.Reg == REGSP {
   687  				// unset base register for better printing, since
   688  				// a.Offset is still relative to pseudo-SP.
   689  				a.Reg = obj.REG_NONE
   690  			}
   691  			c.instoffset = int64(c.autosize) + a.Offset
   692  			if c.instoffset >= -BIG && c.instoffset < BIG {
   693  				return C_SAUTO
   694  			}
   695  			return C_LAUTO
   696  
   697  		case obj.NAME_PARAM:
   698  			if a.Reg == REGSP {
   699  				// unset base register for better printing, since
   700  				// a.Offset is still relative to pseudo-FP.
   701  				a.Reg = obj.REG_NONE
   702  			}
   703  			c.instoffset = int64(c.autosize) + a.Offset + c.ctxt.Arch.FixedFrameSize
   704  			if c.instoffset >= -BIG && c.instoffset < BIG {
   705  				return C_SAUTO
   706  			}
   707  			return C_LAUTO
   708  
   709  		case obj.NAME_NONE:
   710  			if a.Index != 0 {
   711  				if a.Offset != 0 {
   712  					return C_GOK
   713  				}
   714  				// register offset
   715  				return C_ROFF
   716  			}
   717  
   718  			c.instoffset = a.Offset
   719  			if c.instoffset == 0 {
   720  				return C_ZOREG
   721  			}
   722  			if c.instoffset >= -BIG && c.instoffset < BIG {
   723  				return C_SOREG
   724  			}
   725  			return C_LOREG
   726  
   727  		case obj.NAME_GOTREF:
   728  			return C_GOTADDR
   729  		}
   730  
   731  		return C_GOK
   732  
   733  	case obj.TYPE_TEXTSIZE:
   734  		return C_TEXTSIZE
   735  
   736  	case obj.TYPE_CONST,
   737  		obj.TYPE_ADDR:
   738  		switch a.Name {
   739  		case obj.NAME_NONE:
   740  			c.instoffset = a.Offset
   741  			if a.Reg != 0 {
   742  				if -BIG <= c.instoffset && c.instoffset <= BIG {
   743  					return C_SACON
   744  				}
   745  				if isint32(c.instoffset) {
   746  					return C_LACON
   747  				}
   748  				return C_DACON
   749  			}
   750  
   751  		case obj.NAME_EXTERN,
   752  			obj.NAME_STATIC:
   753  			s := a.Sym
   754  			if s == nil {
   755  				return C_GOK
   756  			}
   757  
   758  			c.instoffset = a.Offset
   759  			if s.Type == objabi.STLSBSS {
   760  				c.ctxt.Diag("taking address of TLS variable is not supported")
   761  			}
   762  			return C_EXTADDR
   763  
   764  		case obj.NAME_AUTO:
   765  			if a.Reg == REGSP {
   766  				// unset base register for better printing, since
   767  				// a.Offset is still relative to pseudo-SP.
   768  				a.Reg = obj.REG_NONE
   769  			}
   770  			c.instoffset = int64(c.autosize) + a.Offset
   771  			if c.instoffset >= -BIG && c.instoffset < BIG {
   772  				return C_SACON
   773  			}
   774  			return C_LACON
   775  
   776  		case obj.NAME_PARAM:
   777  			if a.Reg == REGSP {
   778  				// unset base register for better printing, since
   779  				// a.Offset is still relative to pseudo-FP.
   780  				a.Reg = obj.REG_NONE
   781  			}
   782  			c.instoffset = int64(c.autosize) + a.Offset + c.ctxt.Arch.FixedFrameSize
   783  			if c.instoffset >= -BIG && c.instoffset < BIG {
   784  				return C_SACON
   785  			}
   786  			return C_LACON
   787  
   788  		default:
   789  			return C_GOK
   790  		}
   791  
   792  		if c.instoffset != int64(int32(c.instoffset)) {
   793  			return C_DCON
   794  		}
   795  
   796  		if c.instoffset >= 0 {
   797  			if c.instoffset == 0 {
   798  				return C_ZCON
   799  			}
   800  			if c.instoffset <= 0x7ff {
   801  				return C_SCON
   802  			}
   803  			if c.instoffset <= 0xfff {
   804  				return C_ANDCON
   805  			}
   806  			if c.instoffset&0xfff == 0 && isuint32(uint64(c.instoffset)) { // && ((instoffset & (1<<31)) == 0)
   807  				return C_UCON
   808  			}
   809  			if isint32(c.instoffset) || isuint32(uint64(c.instoffset)) {
   810  				return C_LCON
   811  			}
   812  			return C_LCON
   813  		}
   814  
   815  		if c.instoffset >= -0x800 {
   816  			return C_ADDCON
   817  		}
   818  		if c.instoffset&0xfff == 0 && isint32(c.instoffset) {
   819  			return C_UCON
   820  		}
   821  		if isint32(c.instoffset) {
   822  			return C_LCON
   823  		}
   824  		return C_LCON
   825  
   826  	case obj.TYPE_BRANCH:
   827  		return C_BRAN
   828  	}
   829  
   830  	return C_GOK
   831  }
   832  
   833  // In Loong64,there are 8 CFRs, denoted as fcc0-fcc7.
   834  // There are 4 FCSRs, denoted as fcsr0-fcsr3.
   835  func (c *ctxt0) rclass(r int16) int {
   836  	switch {
   837  	case REG_R0 <= r && r <= REG_R31:
   838  		return C_REG
   839  	case REG_F0 <= r && r <= REG_F31:
   840  		return C_FREG
   841  	case REG_FCC0 <= r && r <= REG_FCC7:
   842  		return C_FCCREG
   843  	case REG_FCSR0 <= r && r <= REG_FCSR3:
   844  		return C_FCSRREG
   845  	case REG_V0 <= r && r <= REG_V31:
   846  		return C_VREG
   847  	case REG_X0 <= r && r <= REG_X31:
   848  		return C_XREG
   849  	case r >= REG_ARNG && r < REG_ELEM:
   850  		return C_ARNG
   851  	case r >= REG_ELEM && r < REG_ELEM_END:
   852  		return C_ELEM
   853  	}
   854  
   855  	return C_GOK
   856  }
   857  
   858  func oclass(a *obj.Addr) int {
   859  	return int(a.Class) - 1
   860  }
   861  
   862  func prasm(p *obj.Prog) {
   863  	fmt.Printf("%v\n", p)
   864  }
   865  
   866  func (c *ctxt0) oplook(p *obj.Prog) *Optab {
   867  	if oprange[AOR&obj.AMask] == nil {
   868  		c.ctxt.Diag("loong64 ops not initialized, call loong64.buildop first")
   869  	}
   870  
   871  	a1 := int(p.Optab)
   872  	if a1 != 0 {
   873  		return &optab[a1-1]
   874  	}
   875  
   876  	// first source operand
   877  	a1 = int(p.From.Class)
   878  	if a1 == 0 {
   879  		a1 = c.aclass(&p.From) + 1
   880  		p.From.Class = int8(a1)
   881  	}
   882  	a1--
   883  
   884  	// first destination operand
   885  	a4 := int(p.To.Class)
   886  	if a4 == 0 {
   887  		a4 = c.aclass(&p.To) + 1
   888  		p.To.Class = int8(a4)
   889  	}
   890  	a4--
   891  
   892  	// 2nd source operand
   893  	a2 := C_NONE
   894  	if p.Reg != 0 {
   895  		a2 = c.rclass(p.Reg)
   896  	}
   897  
   898  	// 2nd destination operand
   899  	a5 := C_NONE
   900  	if p.RegTo2 != 0 {
   901  		a5 = C_REG
   902  	}
   903  
   904  	// 3rd source operand
   905  	a3 := C_NONE
   906  	if len(p.RestArgs) > 0 {
   907  		a3 = int(p.RestArgs[0].Class)
   908  		if a3 == 0 {
   909  			a3 = c.aclass(&p.RestArgs[0].Addr) + 1
   910  			p.RestArgs[0].Class = int8(a3)
   911  		}
   912  		a3--
   913  	}
   914  
   915  	ops := oprange[p.As&obj.AMask]
   916  	c1 := &xcmp[a1]
   917  	c4 := &xcmp[a4]
   918  	for i := range ops {
   919  		op := &ops[i]
   920  		if (int(op.reg) == a2) && int(op.from3) == a3 && c1[op.from1] && c4[op.to1] && (int(op.to2) == a5) {
   921  			p.Optab = uint16(cap(optab) - cap(ops) + i + 1)
   922  			return op
   923  		}
   924  	}
   925  
   926  	c.ctxt.Diag("illegal combination %v %v %v %v %v %v", p.As, DRconv(a1), DRconv(a2), DRconv(a3), DRconv(a4), DRconv(a5))
   927  	prasm(p)
   928  	// Turn illegal instruction into an UNDEF, avoid crashing in asmout.
   929  	return &Optab{obj.AUNDEF, C_NONE, C_NONE, C_NONE, C_NONE, C_NONE, 49, 4, 0, 0}
   930  }
   931  
   932  func cmp(a int, b int) bool {
   933  	if a == b {
   934  		return true
   935  	}
   936  	switch a {
   937  	case C_DCON:
   938  		if b == C_LCON {
   939  			return true
   940  		}
   941  		fallthrough
   942  	case C_LCON:
   943  		if b == C_ZCON || b == C_SCON || b == C_UCON || b == C_ADDCON || b == C_ANDCON {
   944  			return true
   945  		}
   946  
   947  	case C_ADD0CON:
   948  		if b == C_ADDCON {
   949  			return true
   950  		}
   951  		fallthrough
   952  
   953  	case C_ADDCON:
   954  		if b == C_ZCON || b == C_SCON {
   955  			return true
   956  		}
   957  
   958  	case C_AND0CON:
   959  		if b == C_ANDCON {
   960  			return true
   961  		}
   962  		fallthrough
   963  
   964  	case C_ANDCON:
   965  		if b == C_ZCON || b == C_SCON {
   966  			return true
   967  		}
   968  
   969  	case C_UCON:
   970  		if b == C_ZCON {
   971  			return true
   972  		}
   973  
   974  	case C_SCON:
   975  		if b == C_ZCON {
   976  			return true
   977  		}
   978  
   979  	case C_LACON:
   980  		if b == C_SACON {
   981  			return true
   982  		}
   983  
   984  	case C_LAUTO:
   985  		if b == C_SAUTO {
   986  			return true
   987  		}
   988  
   989  	case C_REG:
   990  		if b == C_ZCON {
   991  			return true
   992  		}
   993  
   994  	case C_LOREG:
   995  		if b == C_ZOREG || b == C_SOREG {
   996  			return true
   997  		}
   998  
   999  	case C_SOREG:
  1000  		if b == C_ZOREG {
  1001  			return true
  1002  		}
  1003  	}
  1004  
  1005  	return false
  1006  }
  1007  
  1008  func ocmp(p1, p2 Optab) int {
  1009  	if p1.as != p2.as {
  1010  		return int(p1.as) - int(p2.as)
  1011  	}
  1012  	if p1.from1 != p2.from1 {
  1013  		return int(p1.from1) - int(p2.from1)
  1014  	}
  1015  	if p1.reg != p2.reg {
  1016  		return int(p1.reg) - int(p2.reg)
  1017  	}
  1018  	if p1.to1 != p2.to1 {
  1019  		return int(p1.to1) - int(p2.to1)
  1020  	}
  1021  	return 0
  1022  }
  1023  
  1024  func opset(a, b0 obj.As) {
  1025  	oprange[a&obj.AMask] = oprange[b0]
  1026  }
  1027  
  1028  func buildop(ctxt *obj.Link) {
  1029  	if ctxt.DiagFunc == nil {
  1030  		ctxt.DiagFunc = func(format string, args ...interface{}) {
  1031  			log.Printf(format, args...)
  1032  		}
  1033  	}
  1034  
  1035  	if oprange[AOR&obj.AMask] != nil {
  1036  		// Already initialized; stop now.
  1037  		// This happens in the cmd/asm tests,
  1038  		// each of which re-initializes the arch.
  1039  		return
  1040  	}
  1041  
  1042  	var n int
  1043  
  1044  	for i := 0; i < C_NCLASS; i++ {
  1045  		for n = 0; n < C_NCLASS; n++ {
  1046  			if cmp(n, i) {
  1047  				xcmp[i][n] = true
  1048  			}
  1049  		}
  1050  	}
  1051  	for n = 0; optab[n].as != obj.AXXX; n++ {
  1052  	}
  1053  	slices.SortFunc(optab[:n], ocmp)
  1054  	for i := 0; i < n; i++ {
  1055  		r := optab[i].as
  1056  		r0 := r & obj.AMask
  1057  		start := i
  1058  		for optab[i].as == r {
  1059  			i++
  1060  		}
  1061  		oprange[r0] = optab[start:i]
  1062  		i--
  1063  
  1064  		switch r {
  1065  		default:
  1066  			ctxt.Diag("unknown op in build: %v", r)
  1067  			ctxt.DiagFlush()
  1068  			log.Fatalf("bad code")
  1069  
  1070  		case AABSF:
  1071  			opset(AMOVFD, r0)
  1072  			opset(AMOVDF, r0)
  1073  			opset(AMOVWF, r0)
  1074  			opset(AMOVFW, r0)
  1075  			opset(AMOVWD, r0)
  1076  			opset(AMOVDW, r0)
  1077  			opset(ANEGF, r0)
  1078  			opset(ANEGD, r0)
  1079  			opset(AABSD, r0)
  1080  			opset(ATRUNCDW, r0)
  1081  			opset(ATRUNCFW, r0)
  1082  			opset(ASQRTF, r0)
  1083  			opset(ASQRTD, r0)
  1084  			opset(AFCLASSF, r0)
  1085  			opset(AFCLASSD, r0)
  1086  			opset(AFLOGBF, r0)
  1087  			opset(AFLOGBD, r0)
  1088  
  1089  		case AMOVVF:
  1090  			opset(AMOVVD, r0)
  1091  			opset(AMOVFV, r0)
  1092  			opset(AMOVDV, r0)
  1093  			opset(ATRUNCDV, r0)
  1094  			opset(ATRUNCFV, r0)
  1095  			opset(AFFINTFW, r0)
  1096  			opset(AFFINTFV, r0)
  1097  			opset(AFFINTDW, r0)
  1098  			opset(AFFINTDV, r0)
  1099  			opset(AFTINTWF, r0)
  1100  			opset(AFTINTWD, r0)
  1101  			opset(AFTINTVF, r0)
  1102  			opset(AFTINTVD, r0)
  1103  			opset(AFTINTRPWF, r0)
  1104  			opset(AFTINTRPWD, r0)
  1105  			opset(AFTINTRPVF, r0)
  1106  			opset(AFTINTRPVD, r0)
  1107  			opset(AFTINTRMWF, r0)
  1108  			opset(AFTINTRMWD, r0)
  1109  			opset(AFTINTRMVF, r0)
  1110  			opset(AFTINTRMVD, r0)
  1111  			opset(AFTINTRZWF, r0)
  1112  			opset(AFTINTRZWD, r0)
  1113  			opset(AFTINTRZVF, r0)
  1114  			opset(AFTINTRZVD, r0)
  1115  			opset(AFTINTRNEWF, r0)
  1116  			opset(AFTINTRNEWD, r0)
  1117  			opset(AFTINTRNEVF, r0)
  1118  			opset(AFTINTRNEVD, r0)
  1119  
  1120  		case AADD:
  1121  			opset(ASGT, r0)
  1122  			opset(ASGTU, r0)
  1123  			opset(AADDU, r0)
  1124  
  1125  		case AADDV:
  1126  			opset(AADDVU, r0)
  1127  
  1128  		case AADDF:
  1129  			opset(ADIVF, r0)
  1130  			opset(ADIVD, r0)
  1131  			opset(AMULF, r0)
  1132  			opset(AMULD, r0)
  1133  			opset(ASUBF, r0)
  1134  			opset(ASUBD, r0)
  1135  			opset(AADDD, r0)
  1136  			opset(AFMINF, r0)
  1137  			opset(AFMIND, r0)
  1138  			opset(AFMAXF, r0)
  1139  			opset(AFMAXD, r0)
  1140  			opset(AFCOPYSGF, r0)
  1141  			opset(AFCOPYSGD, r0)
  1142  			opset(AFSCALEBF, r0)
  1143  			opset(AFSCALEBD, r0)
  1144  
  1145  		case AFMADDF:
  1146  			opset(AFMADDD, r0)
  1147  			opset(AFMSUBF, r0)
  1148  			opset(AFMSUBD, r0)
  1149  			opset(AFNMADDF, r0)
  1150  			opset(AFNMADDD, r0)
  1151  			opset(AFNMSUBF, r0)
  1152  			opset(AFNMSUBD, r0)
  1153  
  1154  		case AAND:
  1155  			opset(AOR, r0)
  1156  			opset(AXOR, r0)
  1157  			opset(AORN, r0)
  1158  			opset(AANDN, r0)
  1159  
  1160  		case ABEQ:
  1161  			opset(ABNE, r0)
  1162  			opset(ABLT, r0)
  1163  			opset(ABGE, r0)
  1164  			opset(ABGEU, r0)
  1165  			opset(ABLTU, r0)
  1166  
  1167  		case ABLEZ:
  1168  			opset(ABGEZ, r0)
  1169  			opset(ABLTZ, r0)
  1170  			opset(ABGTZ, r0)
  1171  
  1172  		case AMOVB:
  1173  			opset(AMOVH, r0)
  1174  
  1175  		case AMOVBU:
  1176  			opset(AMOVHU, r0)
  1177  
  1178  		case AMUL:
  1179  			opset(AMULU, r0)
  1180  			opset(AMULH, r0)
  1181  			opset(AMULHU, r0)
  1182  			opset(AREM, r0)
  1183  			opset(AREMU, r0)
  1184  			opset(ADIV, r0)
  1185  			opset(ADIVU, r0)
  1186  
  1187  		case AMULV:
  1188  			opset(AMULVU, r0)
  1189  			opset(AMULHV, r0)
  1190  			opset(AMULHVU, r0)
  1191  			opset(AREMV, r0)
  1192  			opset(AREMVU, r0)
  1193  			opset(ADIVV, r0)
  1194  			opset(ADIVVU, r0)
  1195  
  1196  		case ASLL:
  1197  			opset(ASRL, r0)
  1198  			opset(ASRA, r0)
  1199  			opset(AROTR, r0)
  1200  
  1201  		case ASLLV:
  1202  			opset(ASRAV, r0)
  1203  			opset(ASRLV, r0)
  1204  			opset(AROTRV, r0)
  1205  
  1206  		case ABSTRPICKW:
  1207  			opset(ABSTRPICKV, r0)
  1208  			opset(ABSTRINSW, r0)
  1209  			opset(ABSTRINSV, r0)
  1210  
  1211  		case ASUB:
  1212  			opset(ASUBU, r0)
  1213  			opset(ANOR, r0)
  1214  
  1215  		case ASUBV:
  1216  			opset(ASUBVU, r0)
  1217  
  1218  		case ASYSCALL:
  1219  			opset(ADBAR, r0)
  1220  			opset(ABREAK, r0)
  1221  
  1222  		case ACMPEQF:
  1223  			opset(ACMPGTF, r0)
  1224  			opset(ACMPGTD, r0)
  1225  			opset(ACMPGEF, r0)
  1226  			opset(ACMPGED, r0)
  1227  			opset(ACMPEQD, r0)
  1228  
  1229  		case ABFPT:
  1230  			opset(ABFPF, r0)
  1231  
  1232  		case AMOVW,
  1233  			AMOVD,
  1234  			AMOVF,
  1235  			AMOVV,
  1236  			ARFE,
  1237  			AJAL,
  1238  			AJMP,
  1239  			AMOVWU,
  1240  			AVMOVQ,
  1241  			AXVMOVQ,
  1242  			ALL,
  1243  			ALLV,
  1244  			ASC,
  1245  			ASCV,
  1246  			ANEGW,
  1247  			ANEGV,
  1248  			AWORD,
  1249  			obj.ANOP,
  1250  			obj.ATEXT,
  1251  			obj.AFUNCDATA,
  1252  			obj.APCALIGN,
  1253  			obj.APCDATA,
  1254  			obj.ADUFFZERO,
  1255  			obj.ADUFFCOPY:
  1256  			break
  1257  
  1258  		case ARDTIMELW:
  1259  			opset(ARDTIMEHW, r0)
  1260  			opset(ARDTIMED, r0)
  1261  
  1262  		case ACLOW:
  1263  			opset(ACLZW, r0)
  1264  			opset(ACTOW, r0)
  1265  			opset(ACTZW, r0)
  1266  			opset(ACLOV, r0)
  1267  			opset(ACLZV, r0)
  1268  			opset(ACTOV, r0)
  1269  			opset(ACTZV, r0)
  1270  			opset(AREVB2H, r0)
  1271  			opset(AREVB4H, r0)
  1272  			opset(AREVB2W, r0)
  1273  			opset(AREVBV, r0)
  1274  			opset(AREVH2W, r0)
  1275  			opset(AREVHV, r0)
  1276  			opset(ABITREV4B, r0)
  1277  			opset(ABITREV8B, r0)
  1278  			opset(ABITREVW, r0)
  1279  			opset(ABITREVV, r0)
  1280  			opset(AEXTWB, r0)
  1281  			opset(AEXTWH, r0)
  1282  			opset(ACPUCFG, r0)
  1283  
  1284  		case ATEQ:
  1285  			opset(ATNE, r0)
  1286  
  1287  		case AMASKEQZ:
  1288  			opset(AMASKNEZ, r0)
  1289  			opset(ACRCWBW, r0)
  1290  			opset(ACRCWHW, r0)
  1291  			opset(ACRCWWW, r0)
  1292  			opset(ACRCWVW, r0)
  1293  			opset(ACRCCWBW, r0)
  1294  			opset(ACRCCWHW, r0)
  1295  			opset(ACRCCWWW, r0)
  1296  			opset(ACRCCWVW, r0)
  1297  
  1298  		case ANOOP:
  1299  			opset(obj.AUNDEF, r0)
  1300  
  1301  		case AAMSWAPW:
  1302  			for i := range atomicInst {
  1303  				if i == AAMSWAPW {
  1304  					continue
  1305  				}
  1306  				opset(i, r0)
  1307  			}
  1308  		case AVSEQB:
  1309  			opset(AVSEQH, r0)
  1310  			opset(AVSEQW, r0)
  1311  			opset(AVSEQV, r0)
  1312  
  1313  		case AXVSEQB:
  1314  			opset(AXVSEQH, r0)
  1315  			opset(AXVSEQW, r0)
  1316  			opset(AXVSEQV, r0)
  1317  
  1318  		case AVPCNTB:
  1319  			opset(AVPCNTH, r0)
  1320  			opset(AVPCNTW, r0)
  1321  			opset(AVPCNTV, r0)
  1322  
  1323  		case AXVPCNTB:
  1324  			opset(AXVPCNTH, r0)
  1325  			opset(AXVPCNTW, r0)
  1326  			opset(AXVPCNTV, r0)
  1327  		}
  1328  	}
  1329  }
  1330  
  1331  func OP_RRRR(op uint32, r1 uint32, r2 uint32, r3 uint32, r4 uint32) uint32 {
  1332  	return op | (r1&0x1F)<<15 | (r2&0x1F)<<10 | (r3&0x1F)<<5 | (r4 & 0x1F)
  1333  }
  1334  
  1335  // r1 -> rk
  1336  // r2 -> rj
  1337  // r3 -> rd
  1338  func OP_RRR(op uint32, r1 uint32, r2 uint32, r3 uint32) uint32 {
  1339  	return op | (r1&0x1F)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0
  1340  }
  1341  
  1342  // r2 -> rj
  1343  // r3 -> rd
  1344  func OP_RR(op uint32, r2 uint32, r3 uint32) uint32 {
  1345  	return op | (r2&0x1F)<<5 | (r3&0x1F)<<0
  1346  }
  1347  
  1348  func OP_16IR_5I(op uint32, i uint32, r2 uint32) uint32 {
  1349  	return op | (i&0xFFFF)<<10 | (r2&0x1F)<<5 | ((i >> 16) & 0x1F)
  1350  }
  1351  
  1352  func OP_16IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 {
  1353  	return op | (i&0xFFFF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0
  1354  }
  1355  
  1356  func OP_12IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 {
  1357  	return op | (i&0xFFF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0
  1358  }
  1359  
  1360  func OP_IR(op uint32, i uint32, r2 uint32) uint32 {
  1361  	return op | (i&0xFFFFF)<<5 | (r2&0x1F)<<0 // ui20, rd5
  1362  }
  1363  
  1364  func OP_15I(op uint32, i uint32) uint32 {
  1365  	return op | (i&0x7FFF)<<0
  1366  }
  1367  
  1368  // i1 -> msb
  1369  // r2 -> rj
  1370  // i3 -> lsb
  1371  // r4 -> rd
  1372  func OP_IRIR(op uint32, i1 uint32, r2 uint32, i3 uint32, r4 uint32) uint32 {
  1373  	return op | (i1 << 16) | (r2&0x1F)<<5 | (i3 << 10) | (r4&0x1F)<<0
  1374  }
  1375  
  1376  // Encoding for the 'b' or 'bl' instruction.
  1377  func OP_B_BL(op uint32, i uint32) uint32 {
  1378  	return op | ((i & 0xFFFF) << 10) | ((i >> 16) & 0x3FF)
  1379  }
  1380  
  1381  func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) {
  1382  	o1 := uint32(0)
  1383  	o2 := uint32(0)
  1384  	o3 := uint32(0)
  1385  	o4 := uint32(0)
  1386  	o5 := uint32(0)
  1387  
  1388  	add := AADDU
  1389  	add = AADDVU
  1390  
  1391  	switch o.type_ {
  1392  	default:
  1393  		c.ctxt.Diag("unknown type %d %v", o.type_)
  1394  		prasm(p)
  1395  
  1396  	case 0: // pseudo ops
  1397  		break
  1398  
  1399  	case 1: // mov r1,r2 ==> OR r1,r0,r2
  1400  		a := AOR
  1401  		if p.As == AMOVW {
  1402  			a = ASLL
  1403  		}
  1404  		o1 = OP_RRR(c.oprrr(a), uint32(REGZERO), uint32(p.From.Reg), uint32(p.To.Reg))
  1405  
  1406  	case 2: // add/sub r1,[r2],r3
  1407  		r := int(p.Reg)
  1408  		if p.As == ANEGW || p.As == ANEGV {
  1409  			r = REGZERO
  1410  		}
  1411  		if r == 0 {
  1412  			r = int(p.To.Reg)
  1413  		}
  1414  		o1 = OP_RRR(c.oprrr(p.As), uint32(p.From.Reg), uint32(r), uint32(p.To.Reg))
  1415  
  1416  	case 3: // mov $soreg, r ==> or/add $i,o,r
  1417  		v := c.regoff(&p.From)
  1418  
  1419  		r := int(p.From.Reg)
  1420  		if r == 0 {
  1421  			r = int(o.param)
  1422  		}
  1423  		a := add
  1424  		if o.from1 == C_ANDCON {
  1425  			a = AOR
  1426  		}
  1427  
  1428  		o1 = OP_12IRR(c.opirr(a), uint32(v), uint32(r), uint32(p.To.Reg))
  1429  
  1430  	case 4: // add $scon,[r1],r2
  1431  		v := c.regoff(&p.From)
  1432  
  1433  		r := int(p.Reg)
  1434  		if r == 0 {
  1435  			r = int(p.To.Reg)
  1436  		}
  1437  
  1438  		o1 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.To.Reg))
  1439  
  1440  	case 5: // syscall
  1441  		v := c.regoff(&p.From)
  1442  		o1 = OP_15I(c.opi(p.As), uint32(v))
  1443  
  1444  	case 6: // beq r1,[r2],sbra
  1445  		v := int32(0)
  1446  		if p.To.Target() != nil {
  1447  			v = int32(p.To.Target().Pc-p.Pc) >> 2
  1448  		}
  1449  		as, rd, rj, width := p.As, p.Reg, p.From.Reg, 16
  1450  		switch as {
  1451  		case ABGTZ, ABLEZ:
  1452  			rd, rj = rj, rd
  1453  		case ABFPT, ABFPF:
  1454  			width = 21
  1455  			// FCC0 is the implicit source operand, now that we
  1456  			// don't register-allocate from the FCC bank.
  1457  			rj = REG_FCC0
  1458  		case ABEQ, ABNE:
  1459  			if rd == 0 || rd == REGZERO || rj == REGZERO {
  1460  				// BEQZ/BNEZ can be encoded with 21-bit offsets.
  1461  				width = 21
  1462  				as = -as
  1463  				if rj == 0 || rj == REGZERO {
  1464  					rj = rd
  1465  				}
  1466  			}
  1467  		}
  1468  		switch width {
  1469  		case 21:
  1470  			if (v<<11)>>11 != v {
  1471  				c.ctxt.Diag("21 bit-width, short branch too far\n%v", p)
  1472  			}
  1473  			o1 = OP_16IR_5I(c.opirr(as), uint32(v), uint32(rj))
  1474  		case 16:
  1475  			if (v<<16)>>16 != v {
  1476  				c.ctxt.Diag("16 bit-width, short branch too far\n%v", p)
  1477  			}
  1478  			o1 = OP_16IRR(c.opirr(as), uint32(v), uint32(rj), uint32(rd))
  1479  		default:
  1480  			c.ctxt.Diag("unexpected branch encoding\n%v", p)
  1481  		}
  1482  
  1483  	case 7: // mov r, soreg
  1484  		r := int(p.To.Reg)
  1485  		if r == 0 {
  1486  			r = int(o.param)
  1487  		}
  1488  		v := c.regoff(&p.To)
  1489  		o1 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.From.Reg))
  1490  
  1491  	case 8: // mov soreg, r
  1492  		r := int(p.From.Reg)
  1493  		if r == 0 {
  1494  			r = int(o.param)
  1495  		}
  1496  		v := c.regoff(&p.From)
  1497  		o1 = OP_12IRR(c.opirr(-p.As), uint32(v), uint32(r), uint32(p.To.Reg))
  1498  
  1499  	case 9: // sll r1,[r2],r3
  1500  		o1 = OP_RR(c.oprr(p.As), uint32(p.From.Reg), uint32(p.To.Reg))
  1501  
  1502  	case 10: // add $con,[r1],r2 ==> mov $con, t; add t,[r1],r2
  1503  		v := c.regoff(&p.From)
  1504  		a := AOR
  1505  		if v < 0 {
  1506  			a = AADDU
  1507  		}
  1508  		o1 = OP_12IRR(c.opirr(a), uint32(v), uint32(0), uint32(REGTMP))
  1509  		r := int(p.Reg)
  1510  		if r == 0 {
  1511  			r = int(p.To.Reg)
  1512  		}
  1513  		o2 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg))
  1514  
  1515  	case 11: // jmp lbra
  1516  		v := int32(0)
  1517  		if p.To.Target() != nil {
  1518  			v = int32(p.To.Target().Pc-p.Pc) >> 2
  1519  		}
  1520  		o1 = OP_B_BL(c.opirr(p.As), uint32(v))
  1521  		if p.To.Sym != nil {
  1522  			c.cursym.AddRel(c.ctxt, obj.Reloc{
  1523  				Type: objabi.R_CALLLOONG64,
  1524  				Off:  int32(c.pc),
  1525  				Siz:  4,
  1526  				Sym:  p.To.Sym,
  1527  				Add:  p.To.Offset,
  1528  			})
  1529  		}
  1530  
  1531  	case 12: // movbs r,r
  1532  		switch p.As {
  1533  		case AMOVB:
  1534  			o1 = OP_RR(c.oprr(AEXTWB), uint32(p.From.Reg), uint32(p.To.Reg))
  1535  		case AMOVH:
  1536  			o1 = OP_RR(c.oprr(AEXTWH), uint32(p.From.Reg), uint32(p.To.Reg))
  1537  		case AMOVBU:
  1538  			o1 = OP_12IRR(c.opirr(AAND), uint32(0xff), uint32(p.From.Reg), uint32(p.To.Reg))
  1539  		case AMOVHU:
  1540  			o1 = OP_IRIR(c.opirir(ABSTRPICKV), 15, uint32(p.From.Reg), 0, uint32(p.To.Reg))
  1541  		case AMOVWU:
  1542  			o1 = OP_IRIR(c.opirir(ABSTRPICKV), 31, uint32(p.From.Reg), 0, uint32(p.To.Reg))
  1543  		default:
  1544  			c.ctxt.Diag("unexpected encoding\n%v", p)
  1545  		}
  1546  
  1547  	case 15: // teq $c r,r
  1548  		v := c.regoff(&p.From)
  1549  		r := int(p.Reg)
  1550  		if r == 0 {
  1551  			r = REGZERO
  1552  		}
  1553  		/*
  1554  			teq c, r1, r2
  1555  			fallthrough
  1556  			==>
  1557  			bne r1, r2, 2
  1558  			break c
  1559  			fallthrough
  1560  		*/
  1561  		if p.As == ATEQ {
  1562  			o1 = OP_16IRR(c.opirr(ABNE), uint32(2), uint32(r), uint32(p.To.Reg))
  1563  		} else { // ATNE
  1564  			o1 = OP_16IRR(c.opirr(ABEQ), uint32(2), uint32(r), uint32(p.To.Reg))
  1565  		}
  1566  		o2 = OP_15I(c.opi(ABREAK), uint32(v))
  1567  
  1568  	case 16: // sll $c,[r1],r2
  1569  		v := c.regoff(&p.From)
  1570  		r := int(p.Reg)
  1571  		if r == 0 {
  1572  			r = int(p.To.Reg)
  1573  		}
  1574  
  1575  		// instruction ending with V:6-digit immediate, others:5-digit immediate
  1576  		if v >= 32 && vshift(p.As) {
  1577  			o1 = OP_16IRR(c.opirr(p.As), uint32(v)&0x3f, uint32(r), uint32(p.To.Reg))
  1578  		} else {
  1579  			o1 = OP_16IRR(c.opirr(p.As), uint32(v)&0x1f, uint32(r), uint32(p.To.Reg))
  1580  		}
  1581  
  1582  	case 17: // bstrpickw $msbw, r1, $lsbw, r2
  1583  		rd, rj := p.To.Reg, p.Reg
  1584  		if rj == obj.REG_NONE {
  1585  			rj = rd
  1586  		}
  1587  		msb, lsb := p.From.Offset, p.GetFrom3().Offset
  1588  
  1589  		// check the range of msb and lsb
  1590  		var b uint32
  1591  		if p.As == ABSTRPICKW || p.As == ABSTRINSW {
  1592  			b = 32
  1593  		} else {
  1594  			b = 64
  1595  		}
  1596  		if lsb < 0 || uint32(lsb) >= b || msb < 0 || uint32(msb) >= b || uint32(lsb) > uint32(msb) {
  1597  			c.ctxt.Diag("illegal bit number\n%v", p)
  1598  		}
  1599  
  1600  		o1 = OP_IRIR(c.opirir(p.As), uint32(msb), uint32(rj), uint32(lsb), uint32(rd))
  1601  
  1602  	case 18: // jmp [r1],0(r2)
  1603  		r := int(p.Reg)
  1604  		if r == 0 {
  1605  			r = int(o.param)
  1606  		}
  1607  		o1 = OP_RRR(c.oprrr(p.As), uint32(0), uint32(p.To.Reg), uint32(r))
  1608  		if p.As == obj.ACALL {
  1609  			c.cursym.AddRel(c.ctxt, obj.Reloc{
  1610  				Type: objabi.R_CALLIND,
  1611  				Off:  int32(c.pc),
  1612  			})
  1613  		}
  1614  
  1615  	case 19: // mov $lcon,r
  1616  		// NOTE: this case does not use REGTMP. If it ever does,
  1617  		// remove the NOTUSETMP flag in optab.
  1618  		v := c.regoff(&p.From)
  1619  		o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg))
  1620  		o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg))
  1621  
  1622  	case 20: // mov Rsrc, (Rbase)(Roff)
  1623  		o1 = OP_RRR(c.oprrr(p.As), uint32(p.To.Index), uint32(p.To.Reg), uint32(p.From.Reg))
  1624  
  1625  	case 21: // mov (Rbase)(Roff), Rdst
  1626  		o1 = OP_RRR(c.oprrr(-p.As), uint32(p.From.Index), uint32(p.From.Reg), uint32(p.To.Reg))
  1627  
  1628  	case 23: // add $lcon,r1,r2
  1629  		v := c.regoff(&p.From)
  1630  		o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
  1631  		o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP))
  1632  		r := int(p.Reg)
  1633  		if r == 0 {
  1634  			r = int(p.To.Reg)
  1635  		}
  1636  		o3 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg))
  1637  
  1638  	case 24: // mov $ucon,r
  1639  		v := c.regoff(&p.From)
  1640  		o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg))
  1641  
  1642  	case 25: // add/and $ucon,[r1],r2
  1643  		v := c.regoff(&p.From)
  1644  		o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
  1645  		r := int(p.Reg)
  1646  		if r == 0 {
  1647  			r = int(p.To.Reg)
  1648  		}
  1649  		o2 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg))
  1650  
  1651  	case 26: // mov $lsext/auto/oreg,r
  1652  		v := c.regoff(&p.From)
  1653  		o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
  1654  		o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP))
  1655  		r := int(p.From.Reg)
  1656  		if r == 0 {
  1657  			r = int(o.param)
  1658  		}
  1659  		o3 = OP_RRR(c.oprrr(add), uint32(REGTMP), uint32(r), uint32(p.To.Reg))
  1660  
  1661  	case 27: // mov [sl]ext/auto/oreg,fr
  1662  		v := c.regoff(&p.From)
  1663  		r := int(p.From.Reg)
  1664  		if r == 0 {
  1665  			r = int(o.param)
  1666  		}
  1667  		switch o.size {
  1668  		case 12:
  1669  			o1 = OP_IR(c.opir(ALU12IW), uint32((v+1<<11)>>12), uint32(REGTMP))
  1670  			o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP))
  1671  			o3 = OP_12IRR(c.opirr(-p.As), uint32(v), uint32(REGTMP), uint32(p.To.Reg))
  1672  
  1673  		case 4:
  1674  			o1 = OP_12IRR(c.opirr(-p.As), uint32(v), uint32(r), uint32(p.To.Reg))
  1675  		}
  1676  
  1677  	case 28: // mov fr,[sl]ext/auto/oreg
  1678  		v := c.regoff(&p.To)
  1679  		r := int(p.To.Reg)
  1680  		if r == 0 {
  1681  			r = int(o.param)
  1682  		}
  1683  		switch o.size {
  1684  		case 12:
  1685  			o1 = OP_IR(c.opir(ALU12IW), uint32((v+1<<11)>>12), uint32(REGTMP))
  1686  			o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP))
  1687  			o3 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(REGTMP), uint32(p.From.Reg))
  1688  
  1689  		case 4:
  1690  			o1 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(r), uint32(p.From.Reg))
  1691  		}
  1692  
  1693  	case 30: // mov gr/fr/fcc/fcsr, fr/fcc/fcsr/gr
  1694  		a := c.specialFpMovInst(p.As, oclass(&p.From), oclass(&p.To))
  1695  		o1 = OP_RR(a, uint32(p.From.Reg), uint32(p.To.Reg))
  1696  
  1697  	case 34: // mov $con,fr
  1698  		v := c.regoff(&p.From)
  1699  		a := AADDU
  1700  		if o.from1 == C_ANDCON {
  1701  			a = AOR
  1702  		}
  1703  		a2 := c.specialFpMovInst(p.As, C_REG, oclass(&p.To))
  1704  		o1 = OP_12IRR(c.opirr(a), uint32(v), uint32(0), uint32(REGTMP))
  1705  		o2 = OP_RR(a2, uint32(REGTMP), uint32(p.To.Reg))
  1706  
  1707  	case 35: // mov r,lext/auto/oreg
  1708  		v := c.regoff(&p.To)
  1709  		r := int(p.To.Reg)
  1710  		if r == 0 {
  1711  			r = int(o.param)
  1712  		}
  1713  		o1 = OP_IR(c.opir(ALU12IW), uint32((v+1<<11)>>12), uint32(REGTMP))
  1714  		o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP))
  1715  		o3 = OP_12IRR(c.opirr(p.As), uint32(v), uint32(REGTMP), uint32(p.From.Reg))
  1716  
  1717  	case 36: // mov lext/auto/oreg,r
  1718  		v := c.regoff(&p.From)
  1719  		r := int(p.From.Reg)
  1720  		if r == 0 {
  1721  			r = int(o.param)
  1722  		}
  1723  		o1 = OP_IR(c.opir(ALU12IW), uint32((v+1<<11)>>12), uint32(REGTMP))
  1724  		o2 = OP_RRR(c.oprrr(add), uint32(r), uint32(REGTMP), uint32(REGTMP))
  1725  		o3 = OP_12IRR(c.opirr(-p.As), uint32(v), uint32(REGTMP), uint32(p.To.Reg))
  1726  
  1727  	case 37: // fmadd r1, r2, [r3], r4
  1728  		r := int(p.To.Reg)
  1729  		if len(p.RestArgs) > 0 {
  1730  			r = int(p.GetFrom3().Reg)
  1731  		}
  1732  		o1 = OP_RRRR(c.oprrrr(p.As), uint32(p.From.Reg), uint32(p.Reg), uint32(r), uint32(p.To.Reg))
  1733  
  1734  	case 38: // word
  1735  		o1 = uint32(c.regoff(&p.From))
  1736  
  1737  	case 39: // vmov Rn, Vd.<T>[index]
  1738  		v, m := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg)
  1739  		if v == 0 {
  1740  			c.ctxt.Diag("illegal arng type combination: %v\n", p)
  1741  		}
  1742  
  1743  		Rj := uint32(p.From.Reg & EXT_REG_MASK)
  1744  		Vd := uint32(p.To.Reg & EXT_REG_MASK)
  1745  		index := uint32(p.To.Index)
  1746  		c.checkindex(p, index, m)
  1747  		o1 = v | (index << 10) | (Rj << 5) | Vd
  1748  
  1749  	case 40: // vmov Vd.<T>[index], Rn
  1750  		v, m := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg)
  1751  		if v == 0 {
  1752  			c.ctxt.Diag("illegal arng type combination: %v\n", p)
  1753  		}
  1754  
  1755  		Vj := uint32(p.From.Reg & EXT_REG_MASK)
  1756  		Rd := uint32(p.To.Reg & EXT_REG_MASK)
  1757  		index := uint32(p.From.Index)
  1758  		c.checkindex(p, index, m)
  1759  		o1 = v | (index << 10) | (Vj << 5) | Rd
  1760  
  1761  	case 41: // vmov Rn, Vd.<T>
  1762  		v, _ := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg)
  1763  		if v == 0 {
  1764  			c.ctxt.Diag("illegal arng type combination: %v\n", p)
  1765  		}
  1766  
  1767  		Rj := uint32(p.From.Reg & EXT_REG_MASK)
  1768  		Vd := uint32(p.To.Reg & EXT_REG_MASK)
  1769  		o1 = v | (Rj << 5) | Vd
  1770  
  1771  	case 42: // vmov  xj, xd.<T>
  1772  		v, _ := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg)
  1773  		if v == 0 {
  1774  			c.ctxt.Diag("illegal arng type combination: %v\n", p)
  1775  		}
  1776  
  1777  		Xj := uint32(p.From.Reg & EXT_REG_MASK)
  1778  		Xd := uint32(p.To.Reg & EXT_REG_MASK)
  1779  		o1 = v | (Xj << 5) | Xd
  1780  
  1781  	case 43: // vmov  xj, xd.<T>[index]
  1782  		v, m := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg)
  1783  		if v == 0 {
  1784  			c.ctxt.Diag("illegal arng type combination: %v\n", p)
  1785  		}
  1786  
  1787  		Xj := uint32(p.From.Reg & EXT_REG_MASK)
  1788  		Xd := uint32(p.To.Reg & EXT_REG_MASK)
  1789  		index := uint32(p.To.Index)
  1790  		c.checkindex(p, index, m)
  1791  		o1 = v | (index << 10) | (Xj << 5) | Xd
  1792  
  1793  	case 44: // vmov  xj.<T>[index], xd
  1794  		v, m := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg)
  1795  		if v == 0 {
  1796  			c.ctxt.Diag("illegal arng type combination: %v\n", p)
  1797  		}
  1798  
  1799  		Xj := uint32(p.From.Reg & EXT_REG_MASK)
  1800  		Xd := uint32(p.To.Reg & EXT_REG_MASK)
  1801  		index := uint32(p.From.Index)
  1802  		c.checkindex(p, index, m)
  1803  		o1 = v | (index << 10) | (Xj << 5) | Xd
  1804  
  1805  	case 45: // vmov  vj.<T>[index], vd.<T>
  1806  		v, m := c.specialLsxMovInst(p.As, p.From.Reg, p.To.Reg)
  1807  		if v == 0 {
  1808  			c.ctxt.Diag("illegal arng type combination: %v\n", p)
  1809  		}
  1810  
  1811  		vj := uint32(p.From.Reg & EXT_REG_MASK)
  1812  		vd := uint32(p.To.Reg & EXT_REG_MASK)
  1813  		index := uint32(p.From.Index)
  1814  		c.checkindex(p, index, m)
  1815  		o1 = v | (index << 10) | (vj << 5) | vd
  1816  
  1817  	case 49:
  1818  		if p.As == ANOOP {
  1819  			// andi r0, r0, 0
  1820  			o1 = OP_12IRR(c.opirr(AAND), 0, 0, 0)
  1821  		} else {
  1822  			// undef
  1823  			o1 = OP_15I(c.opi(ABREAK), 0)
  1824  		}
  1825  
  1826  	// relocation operations
  1827  	case 50: // mov r,addr ==> pcalau12i + sw
  1828  		o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP))
  1829  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  1830  			Type: objabi.R_LOONG64_ADDR_HI,
  1831  			Off:  int32(c.pc),
  1832  			Siz:  4,
  1833  			Sym:  p.To.Sym,
  1834  			Add:  p.To.Offset,
  1835  		})
  1836  		o2 = OP_12IRR(c.opirr(p.As), uint32(0), uint32(REGTMP), uint32(p.From.Reg))
  1837  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  1838  			Type: objabi.R_LOONG64_ADDR_LO,
  1839  			Off:  int32(c.pc + 4),
  1840  			Siz:  4,
  1841  			Sym:  p.To.Sym,
  1842  			Add:  p.To.Offset,
  1843  		})
  1844  
  1845  	case 51: // mov addr,r ==> pcalau12i + lw
  1846  		o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP))
  1847  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  1848  			Type: objabi.R_LOONG64_ADDR_HI,
  1849  			Off:  int32(c.pc),
  1850  			Siz:  4,
  1851  			Sym:  p.From.Sym,
  1852  			Add:  p.From.Offset,
  1853  		})
  1854  		o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(p.To.Reg))
  1855  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  1856  			Type: objabi.R_LOONG64_ADDR_LO,
  1857  			Off:  int32(c.pc + 4),
  1858  			Siz:  4,
  1859  			Sym:  p.From.Sym,
  1860  			Add:  p.From.Offset,
  1861  		})
  1862  
  1863  	case 52: // mov $ext, r
  1864  		// NOTE: this case does not use REGTMP. If it ever does,
  1865  		// remove the NOTUSETMP flag in optab.
  1866  		o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(p.To.Reg))
  1867  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  1868  			Type: objabi.R_LOONG64_ADDR_HI,
  1869  			Off:  int32(c.pc),
  1870  			Siz:  4,
  1871  			Sym:  p.From.Sym,
  1872  			Add:  p.From.Offset,
  1873  		})
  1874  		o2 = OP_12IRR(c.opirr(add), uint32(0), uint32(p.To.Reg), uint32(p.To.Reg))
  1875  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  1876  			Type: objabi.R_LOONG64_ADDR_LO,
  1877  			Off:  int32(c.pc + 4),
  1878  			Siz:  4,
  1879  			Sym:  p.From.Sym,
  1880  			Add:  p.From.Offset,
  1881  		})
  1882  
  1883  	case 53: // mov r, tlsvar ==>  lu12i.w + ori + add r2, regtmp + sw o(regtmp)
  1884  		// NOTE: this case does not use REGTMP. If it ever does,
  1885  		// remove the NOTUSETMP flag in optab.
  1886  		o1 = OP_IR(c.opir(ALU12IW), uint32(0), uint32(REGTMP))
  1887  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  1888  			Type: objabi.R_LOONG64_TLS_LE_HI,
  1889  			Off:  int32(c.pc),
  1890  			Siz:  4,
  1891  			Sym:  p.To.Sym,
  1892  			Add:  p.To.Offset,
  1893  		})
  1894  		o2 = OP_12IRR(c.opirr(AOR), uint32(0), uint32(REGTMP), uint32(REGTMP))
  1895  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  1896  			Type: objabi.R_LOONG64_TLS_LE_LO,
  1897  			Off:  int32(c.pc + 4),
  1898  			Siz:  4,
  1899  			Sym:  p.To.Sym,
  1900  			Add:  p.To.Offset,
  1901  		})
  1902  		o3 = OP_RRR(c.oprrr(AADDV), uint32(REG_R2), uint32(REGTMP), uint32(REGTMP))
  1903  		o4 = OP_12IRR(c.opirr(p.As), uint32(0), uint32(REGTMP), uint32(p.From.Reg))
  1904  
  1905  	case 54: // lu12i.w + ori + add r2, regtmp + lw o(regtmp)
  1906  		// NOTE: this case does not use REGTMP. If it ever does,
  1907  		// remove the NOTUSETMP flag in optab.
  1908  		o1 = OP_IR(c.opir(ALU12IW), uint32(0), uint32(REGTMP))
  1909  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  1910  			Type: objabi.R_LOONG64_TLS_LE_HI,
  1911  			Off:  int32(c.pc),
  1912  			Siz:  4,
  1913  			Sym:  p.From.Sym,
  1914  			Add:  p.From.Offset,
  1915  		})
  1916  		o2 = OP_12IRR(c.opirr(AOR), uint32(0), uint32(REGTMP), uint32(REGTMP))
  1917  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  1918  			Type: objabi.R_LOONG64_TLS_LE_LO,
  1919  			Off:  int32(c.pc + 4),
  1920  			Siz:  4,
  1921  			Sym:  p.From.Sym,
  1922  			Add:  p.From.Offset,
  1923  		})
  1924  		o3 = OP_RRR(c.oprrr(AADDV), uint32(REG_R2), uint32(REGTMP), uint32(REGTMP))
  1925  		o4 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(p.To.Reg))
  1926  
  1927  	case 56: // mov r, tlsvar IE model ==> (pcalau12i + ld.d)tlsvar@got + add.d + st.d
  1928  		o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP))
  1929  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  1930  			Type: objabi.R_LOONG64_TLS_IE_HI,
  1931  			Off:  int32(c.pc),
  1932  			Siz:  4,
  1933  			Sym:  p.To.Sym,
  1934  		})
  1935  		o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(REGTMP))
  1936  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  1937  			Type: objabi.R_LOONG64_TLS_IE_LO,
  1938  			Off:  int32(c.pc + 4),
  1939  			Siz:  4,
  1940  			Sym:  p.To.Sym,
  1941  		})
  1942  		o3 = OP_RRR(c.oprrr(AADDVU), uint32(REGTMP), uint32(REG_R2), uint32(REGTMP))
  1943  		o4 = OP_12IRR(c.opirr(p.As), uint32(0), uint32(REGTMP), uint32(p.From.Reg))
  1944  
  1945  	case 57: // mov tlsvar, r IE model ==> (pcalau12i + ld.d)tlsvar@got + add.d + ld.d
  1946  		o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(REGTMP))
  1947  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  1948  			Type: objabi.R_LOONG64_TLS_IE_HI,
  1949  			Off:  int32(c.pc),
  1950  			Siz:  4,
  1951  			Sym:  p.From.Sym,
  1952  		})
  1953  		o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(REGTMP))
  1954  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  1955  			Type: objabi.R_LOONG64_TLS_IE_LO,
  1956  			Off:  int32(c.pc + 4),
  1957  			Siz:  4,
  1958  			Sym:  p.From.Sym,
  1959  		})
  1960  		o3 = OP_RRR(c.oprrr(AADDVU), uint32(REGTMP), uint32(REG_R2), uint32(REGTMP))
  1961  		o4 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(REGTMP), uint32(p.To.Reg))
  1962  
  1963  	case 59: // mov $dcon,r
  1964  		// NOTE: this case does not use REGTMP. If it ever does,
  1965  		// remove the NOTUSETMP flag in optab.
  1966  		v := c.vregoff(&p.From)
  1967  		o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(p.To.Reg))
  1968  		o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(p.To.Reg), uint32(p.To.Reg))
  1969  		o3 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(p.To.Reg))
  1970  		o4 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(p.To.Reg), uint32(p.To.Reg))
  1971  
  1972  	case 60: // add $dcon,r1,r2
  1973  		v := c.vregoff(&p.From)
  1974  		o1 = OP_IR(c.opir(ALU12IW), uint32(v>>12), uint32(REGTMP))
  1975  		o2 = OP_12IRR(c.opirr(AOR), uint32(v), uint32(REGTMP), uint32(REGTMP))
  1976  		o3 = OP_IR(c.opir(ALU32ID), uint32(v>>32), uint32(REGTMP))
  1977  		o4 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP))
  1978  		r := int(p.Reg)
  1979  		if r == 0 {
  1980  			r = int(p.To.Reg)
  1981  		}
  1982  		o5 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg))
  1983  
  1984  	case 61: // word C_DCON
  1985  		o1 = uint32(c.vregoff(&p.From))
  1986  		o2 = uint32(c.vregoff(&p.From) >> 32)
  1987  
  1988  	case 62: // rdtimex rd, rj
  1989  		o1 = OP_RR(c.oprr(p.As), uint32(p.To.Reg), uint32(p.RegTo2))
  1990  
  1991  	case 65: // mov sym@GOT, r ==> pcalau12i + ld.d
  1992  		o1 = OP_IR(c.opir(APCALAU12I), uint32(0), uint32(p.To.Reg))
  1993  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  1994  			Type: objabi.R_LOONG64_GOT_HI,
  1995  			Off:  int32(c.pc),
  1996  			Siz:  4,
  1997  			Sym:  p.From.Sym,
  1998  		})
  1999  		o2 = OP_12IRR(c.opirr(-p.As), uint32(0), uint32(p.To.Reg), uint32(p.To.Reg))
  2000  		c.cursym.AddRel(c.ctxt, obj.Reloc{
  2001  			Type: objabi.R_LOONG64_GOT_LO,
  2002  			Off:  int32(c.pc + 4),
  2003  			Siz:  4,
  2004  			Sym:  p.From.Sym,
  2005  		})
  2006  
  2007  	case 66: // am* From, To, RegTo2 ==> am* RegTo2, From, To
  2008  		rk := p.From.Reg
  2009  		rj := p.To.Reg
  2010  		rd := p.RegTo2
  2011  
  2012  		// See section 2.2.7.1 of https://loongson.github.io/LoongArch-Documentation/LoongArch-Vol1-EN.html
  2013  		// for the register usage constraints.
  2014  		if rd == rj || rd == rk {
  2015  			c.ctxt.Diag("illegal register combination: %v\n", p)
  2016  		}
  2017  		o1 = OP_RRR(atomicInst[p.As], uint32(rk), uint32(rj), uint32(rd))
  2018  	}
  2019  
  2020  	out[0] = o1
  2021  	out[1] = o2
  2022  	out[2] = o3
  2023  	out[3] = o4
  2024  	out[4] = o5
  2025  }
  2026  
  2027  // checkindex checks if index >= 0 && index <= maxindex
  2028  func (c *ctxt0) checkindex(p *obj.Prog, index uint32, mask uint32) {
  2029  	if (index & ^mask) != 0 {
  2030  		c.ctxt.Diag("register element index out of range 0 to %d: %v", mask, p)
  2031  	}
  2032  }
  2033  
  2034  func (c *ctxt0) vregoff(a *obj.Addr) int64 {
  2035  	c.instoffset = 0
  2036  	c.aclass(a)
  2037  	return c.instoffset
  2038  }
  2039  
  2040  func (c *ctxt0) regoff(a *obj.Addr) int32 {
  2041  	return int32(c.vregoff(a))
  2042  }
  2043  
  2044  func (c *ctxt0) oprrrr(a obj.As) uint32 {
  2045  	switch a {
  2046  	case AFMADDF:
  2047  		return 0x81 << 20 // fmadd.s
  2048  	case AFMADDD:
  2049  		return 0x82 << 20 // fmadd.d
  2050  	case AFMSUBF:
  2051  		return 0x85 << 20 // fmsub.s
  2052  	case AFMSUBD:
  2053  		return 0x86 << 20 // fmsub.d
  2054  	case AFNMADDF:
  2055  		return 0x89 << 20 // fnmadd.f
  2056  	case AFNMADDD:
  2057  		return 0x8a << 20 // fnmadd.d
  2058  	case AFNMSUBF:
  2059  		return 0x8d << 20 // fnmsub.s
  2060  	case AFNMSUBD:
  2061  		return 0x8e << 20 // fnmsub.d
  2062  	}
  2063  
  2064  	c.ctxt.Diag("bad rrrr opcode %v", a)
  2065  	return 0
  2066  }
  2067  
  2068  func (c *ctxt0) oprrr(a obj.As) uint32 {
  2069  	switch a {
  2070  	case AADD:
  2071  		return 0x20 << 15
  2072  	case AADDU:
  2073  		return 0x20 << 15
  2074  	case ASGT:
  2075  		return 0x24 << 15 // SLT
  2076  	case ASGTU:
  2077  		return 0x25 << 15 // SLTU
  2078  	case AMASKEQZ:
  2079  		return 0x26 << 15
  2080  	case AMASKNEZ:
  2081  		return 0x27 << 15
  2082  	case AAND:
  2083  		return 0x29 << 15
  2084  	case AOR:
  2085  		return 0x2a << 15
  2086  	case AXOR:
  2087  		return 0x2b << 15
  2088  	case AORN:
  2089  		return 0x2c << 15 // orn
  2090  	case AANDN:
  2091  		return 0x2d << 15 // andn
  2092  	case ASUB:
  2093  		return 0x22 << 15
  2094  	case ASUBU, ANEGW:
  2095  		return 0x22 << 15
  2096  	case ANOR:
  2097  		return 0x28 << 15
  2098  	case ASLL:
  2099  		return 0x2e << 15
  2100  	case ASRL:
  2101  		return 0x2f << 15
  2102  	case ASRA:
  2103  		return 0x30 << 15
  2104  	case AROTR:
  2105  		return 0x36 << 15
  2106  	case ASLLV:
  2107  		return 0x31 << 15
  2108  	case ASRLV:
  2109  		return 0x32 << 15
  2110  	case ASRAV:
  2111  		return 0x33 << 15
  2112  	case AROTRV:
  2113  		return 0x37 << 15
  2114  	case AADDV:
  2115  		return 0x21 << 15
  2116  	case AADDVU:
  2117  		return 0x21 << 15
  2118  	case ASUBV:
  2119  		return 0x23 << 15
  2120  	case ASUBVU, ANEGV:
  2121  		return 0x23 << 15
  2122  
  2123  	case AMUL:
  2124  		return 0x38 << 15 // mul.w
  2125  	case AMULU:
  2126  		return 0x38 << 15 // mul.w
  2127  	case AMULH:
  2128  		return 0x39 << 15 // mulh.w
  2129  	case AMULHU:
  2130  		return 0x3a << 15 // mulhu.w
  2131  	case AMULV:
  2132  		return 0x3b << 15 // mul.d
  2133  	case AMULVU:
  2134  		return 0x3b << 15 // mul.d
  2135  	case AMULHV:
  2136  		return 0x3c << 15 // mulh.d
  2137  	case AMULHVU:
  2138  		return 0x3d << 15 // mulhu.d
  2139  	case ADIV:
  2140  		return 0x40 << 15 // div.w
  2141  	case ADIVU:
  2142  		return 0x42 << 15 // div.wu
  2143  	case ADIVV:
  2144  		return 0x44 << 15 // div.d
  2145  	case ADIVVU:
  2146  		return 0x46 << 15 // div.du
  2147  	case AREM:
  2148  		return 0x41 << 15 // mod.w
  2149  	case AREMU:
  2150  		return 0x43 << 15 // mod.wu
  2151  	case AREMV:
  2152  		return 0x45 << 15 // mod.d
  2153  	case AREMVU:
  2154  		return 0x47 << 15 // mod.du
  2155  	case ACRCWBW:
  2156  		return 0x48 << 15 // crc.w.b.w
  2157  	case ACRCWHW:
  2158  		return 0x49 << 15 // crc.w.h.w
  2159  	case ACRCWWW:
  2160  		return 0x4a << 15 // crc.w.w.w
  2161  	case ACRCWVW:
  2162  		return 0x4b << 15 // crc.w.d.w
  2163  	case ACRCCWBW:
  2164  		return 0x4c << 15 // crcc.w.b.w
  2165  	case ACRCCWHW:
  2166  		return 0x4d << 15 // crcc.w.h.w
  2167  	case ACRCCWWW:
  2168  		return 0x4e << 15 // crcc.w.w.w
  2169  	case ACRCCWVW:
  2170  		return 0x4f << 15 // crcc.w.d.w
  2171  	case AJMP:
  2172  		return 0x13 << 26 // jirl r0, rj, 0
  2173  	case AJAL:
  2174  		return (0x13 << 26) | 1 // jirl r1, rj, 0
  2175  
  2176  	case ADIVF:
  2177  		return 0x20d << 15
  2178  	case ADIVD:
  2179  		return 0x20e << 15
  2180  	case AMULF:
  2181  		return 0x209 << 15
  2182  	case AMULD:
  2183  		return 0x20a << 15
  2184  	case ASUBF:
  2185  		return 0x205 << 15
  2186  	case ASUBD:
  2187  		return 0x206 << 15
  2188  	case AADDF:
  2189  		return 0x201 << 15
  2190  	case AADDD:
  2191  		return 0x202 << 15
  2192  	case ACMPEQF:
  2193  		return 0x0c1<<20 | 0x4<<15 // FCMP.CEQ.S
  2194  	case ACMPEQD:
  2195  		return 0x0c2<<20 | 0x4<<15 // FCMP.CEQ.D
  2196  	case ACMPGED:
  2197  		return 0x0c2<<20 | 0x7<<15 // FCMP.SLE.D
  2198  	case ACMPGEF:
  2199  		return 0x0c1<<20 | 0x7<<15 // FCMP.SLE.S
  2200  	case ACMPGTD:
  2201  		return 0x0c2<<20 | 0x3<<15 // FCMP.SLT.D
  2202  	case ACMPGTF:
  2203  		return 0x0c1<<20 | 0x3<<15 // FCMP.SLT.S
  2204  	case AFMINF:
  2205  		return 0x215 << 15 // fmin.s
  2206  	case AFMIND:
  2207  		return 0x216 << 15 // fmin.d
  2208  	case AFMAXF:
  2209  		return 0x211 << 15 // fmax.s
  2210  	case AFMAXD:
  2211  		return 0x212 << 15 // fmax.d
  2212  	case AFSCALEBF:
  2213  		return 0x221 << 15 // fscaleb.s
  2214  	case AFSCALEBD:
  2215  		return 0x222 << 15 // fscaleb.d
  2216  	case AFCOPYSGF:
  2217  		return 0x225 << 15 // fcopysign.s
  2218  	case AFCOPYSGD:
  2219  		return 0x226 << 15 // fcopysign.d
  2220  	case -AMOVB:
  2221  		return 0x07000 << 15 // ldx.b
  2222  	case -AMOVH:
  2223  		return 0x07008 << 15 // ldx.h
  2224  	case -AMOVW:
  2225  		return 0x07010 << 15 // ldx.w
  2226  	case -AMOVV:
  2227  		return 0x07018 << 15 // ldx.d
  2228  	case -AMOVBU:
  2229  		return 0x07040 << 15 // ldx.bu
  2230  	case -AMOVHU:
  2231  		return 0x07048 << 15 // ldx.hu
  2232  	case -AMOVWU:
  2233  		return 0x07050 << 15 // ldx.wu
  2234  	case AMOVB:
  2235  		return 0x07020 << 15 // stx.b
  2236  	case AMOVH:
  2237  		return 0x07028 << 15 // stx.h
  2238  	case AMOVW:
  2239  		return 0x07030 << 15 // stx.w
  2240  	case AMOVV:
  2241  		return 0x07038 << 15 // stx.d
  2242  	case -AMOVF:
  2243  		return 0x07060 << 15 // fldx.s
  2244  	case -AMOVD:
  2245  		return 0x07068 << 15 // fldx.d
  2246  	case AMOVF:
  2247  		return 0x07070 << 15 // fstx.s
  2248  	case AMOVD:
  2249  		return 0x07078 << 15 // fstx.d
  2250  	case -AVMOVQ:
  2251  		return 0x07080 << 15 // vldx
  2252  	case -AXVMOVQ:
  2253  		return 0x07090 << 15 // xvldx
  2254  	case AVMOVQ:
  2255  		return 0x07088 << 15 // vstx
  2256  	case AXVMOVQ:
  2257  		return 0x07098 << 15 // xvstx
  2258  	case AVSEQB:
  2259  		return 0x0e000 << 15 // vseq.b
  2260  	case AXVSEQB:
  2261  		return 0x0e800 << 15 // xvseq.b
  2262  	case AVSEQH:
  2263  		return 0x0e001 << 15 // vseq.h
  2264  	case AXVSEQH:
  2265  		return 0x0e801 << 15 // xvseq.h
  2266  	case AVSEQW:
  2267  		return 0x0e002 << 15 // vseq.w
  2268  	case AXVSEQW:
  2269  		return 0x0e802 << 15 // xvseq.w
  2270  	case AVSEQV:
  2271  		return 0x0e003 << 15 // vseq.d
  2272  	case AXVSEQV:
  2273  		return 0x0e803 << 15 // xvseq.d
  2274  	}
  2275  
  2276  	if a < 0 {
  2277  		c.ctxt.Diag("bad rrr opcode -%v", -a)
  2278  	} else {
  2279  		c.ctxt.Diag("bad rrr opcode %v", a)
  2280  	}
  2281  	return 0
  2282  }
  2283  
  2284  func (c *ctxt0) oprr(a obj.As) uint32 {
  2285  	switch a {
  2286  	case ACLOW:
  2287  		return 0x4 << 10 // clo.w
  2288  	case ACLZW:
  2289  		return 0x5 << 10 // clz.w
  2290  	case ACTOW:
  2291  		return 0x6 << 10 // cto.w
  2292  	case ACTZW:
  2293  		return 0x7 << 10 // ctz.w
  2294  	case ACLOV:
  2295  		return 0x8 << 10 // clo.d
  2296  	case ACLZV:
  2297  		return 0x9 << 10 // clz.d
  2298  	case ACTOV:
  2299  		return 0xa << 10 // cto.d
  2300  	case ACTZV:
  2301  		return 0xb << 10 // ctz.d
  2302  	case AREVB2H:
  2303  		return 0xc << 10 // revb.2h
  2304  	case AREVB4H:
  2305  		return 0xd << 10 // revb.4h
  2306  	case AREVB2W:
  2307  		return 0xe << 10 // revb.2w
  2308  	case AREVBV:
  2309  		return 0xf << 10 // revb.d
  2310  	case AREVH2W:
  2311  		return 0x10 << 10 // revh.2w
  2312  	case AREVHV:
  2313  		return 0x11 << 10 // revh.d
  2314  	case ABITREV4B:
  2315  		return 0x12 << 10 // bitrev.4b
  2316  	case ABITREV8B:
  2317  		return 0x13 << 10 // bitrev.8b
  2318  	case ABITREVW:
  2319  		return 0x14 << 10 // bitrev.w
  2320  	case ABITREVV:
  2321  		return 0x15 << 10 // bitrev.d
  2322  	case AEXTWH:
  2323  		return 0x16 << 10 // ext.w.h
  2324  	case AEXTWB:
  2325  		return 0x17 << 10 // ext.w.h
  2326  	case ACPUCFG:
  2327  		return 0x1b << 10
  2328  	case ARDTIMELW:
  2329  		return 0x18 << 10
  2330  	case ARDTIMEHW:
  2331  		return 0x19 << 10
  2332  	case ARDTIMED:
  2333  		return 0x1a << 10
  2334  	case ATRUNCFV:
  2335  		return 0x46a9 << 10
  2336  	case ATRUNCDV:
  2337  		return 0x46aa << 10
  2338  	case ATRUNCFW:
  2339  		return 0x46a1 << 10
  2340  	case ATRUNCDW:
  2341  		return 0x46a2 << 10
  2342  	case AMOVFV:
  2343  		return 0x46c9 << 10
  2344  	case AMOVDV:
  2345  		return 0x46ca << 10
  2346  	case AMOVVF:
  2347  		return 0x4746 << 10
  2348  	case AMOVVD:
  2349  		return 0x474a << 10
  2350  	case AMOVFW:
  2351  		return 0x46c1 << 10
  2352  	case AMOVDW:
  2353  		return 0x46c2 << 10
  2354  	case AMOVWF:
  2355  		return 0x4744 << 10
  2356  	case AMOVDF:
  2357  		return 0x4646 << 10
  2358  	case AMOVWD:
  2359  		return 0x4748 << 10
  2360  	case AMOVFD:
  2361  		return 0x4649 << 10
  2362  	case AABSF:
  2363  		return 0x4501 << 10
  2364  	case AABSD:
  2365  		return 0x4502 << 10
  2366  	case AMOVF:
  2367  		return 0x4525 << 10
  2368  	case AMOVD:
  2369  		return 0x4526 << 10
  2370  	case ANEGF:
  2371  		return 0x4505 << 10
  2372  	case ANEGD:
  2373  		return 0x4506 << 10
  2374  	case ASQRTF:
  2375  		return 0x4511 << 10
  2376  	case ASQRTD:
  2377  		return 0x4512 << 10
  2378  	case AFLOGBF:
  2379  		return 0x4509 << 10 // flogb.s
  2380  	case AFLOGBD:
  2381  		return 0x450a << 10 // flogb.d
  2382  	case AFCLASSF:
  2383  		return 0x450d << 10 // fclass.s
  2384  	case AFCLASSD:
  2385  		return 0x450e << 10 // fclass.d
  2386  	case AFFINTFW:
  2387  		return 0x4744 << 10 // ffint.s.w
  2388  	case AFFINTFV:
  2389  		return 0x4746 << 10 // ffint.s.l
  2390  	case AFFINTDW:
  2391  		return 0x4748 << 10 // ffint.d.w
  2392  	case AFFINTDV:
  2393  		return 0x474a << 10 // ffint.d.l
  2394  	case AFTINTWF:
  2395  		return 0x46c1 << 10 // ftint.w.s
  2396  	case AFTINTWD:
  2397  		return 0x46c2 << 10 // ftint.w.d
  2398  	case AFTINTVF:
  2399  		return 0x46c9 << 10 // ftint.l.s
  2400  	case AFTINTVD:
  2401  		return 0x46ca << 10 // ftint.l.d
  2402  	case AFTINTRMWF:
  2403  		return 0x4681 << 10 // ftintrm.w.s
  2404  	case AFTINTRMWD:
  2405  		return 0x4682 << 10 // ftintrm.w.d
  2406  	case AFTINTRMVF:
  2407  		return 0x4689 << 10 // ftintrm.l.s
  2408  	case AFTINTRMVD:
  2409  		return 0x468a << 10 // ftintrm.l.d
  2410  	case AFTINTRPWF:
  2411  		return 0x4691 << 10 // ftintrp.w.s
  2412  	case AFTINTRPWD:
  2413  		return 0x4692 << 10 // ftintrp.w.d
  2414  	case AFTINTRPVF:
  2415  		return 0x4699 << 10 // ftintrp.l.s
  2416  	case AFTINTRPVD:
  2417  		return 0x469a << 10 // ftintrp.l.d
  2418  	case AFTINTRZWF:
  2419  		return 0x46a1 << 10 // ftintrz.w.s
  2420  	case AFTINTRZWD:
  2421  		return 0x46a2 << 10 // ftintrz.w.d
  2422  	case AFTINTRZVF:
  2423  		return 0x46a9 << 10 // ftintrz.l.s
  2424  	case AFTINTRZVD:
  2425  		return 0x46aa << 10 // ftintrz.l.d
  2426  	case AFTINTRNEWF:
  2427  		return 0x46b1 << 10 // ftintrne.w.s
  2428  	case AFTINTRNEWD:
  2429  		return 0x46b2 << 10 // ftintrne.w.d
  2430  	case AFTINTRNEVF:
  2431  		return 0x46b9 << 10 // ftintrne.l.s
  2432  	case AFTINTRNEVD:
  2433  		return 0x46ba << 10 // ftintrne.l.d
  2434  	case AVPCNTB:
  2435  		return 0x1ca708 << 10 // vpcnt.b
  2436  	case AVPCNTH:
  2437  		return 0x1ca709 << 10 // vpcnt.h
  2438  	case AVPCNTW:
  2439  		return 0x1ca70a << 10 // vpcnt.w
  2440  	case AVPCNTV:
  2441  		return 0x1ca70b << 10 // vpcnt.v
  2442  	case AXVPCNTB:
  2443  		return 0x1da708 << 10 // xvpcnt.b
  2444  	case AXVPCNTH:
  2445  		return 0x1da709 << 10 // xvpcnt.h
  2446  	case AXVPCNTW:
  2447  		return 0x1da70a << 10 // xvpcnt.w
  2448  	case AXVPCNTV:
  2449  		return 0x1da70b << 10 // xvpcnt.v
  2450  	}
  2451  
  2452  	c.ctxt.Diag("bad rr opcode %v", a)
  2453  	return 0
  2454  }
  2455  
  2456  func (c *ctxt0) opi(a obj.As) uint32 {
  2457  	switch a {
  2458  	case ASYSCALL:
  2459  		return 0x56 << 15
  2460  	case ABREAK:
  2461  		return 0x54 << 15
  2462  	case ADBAR:
  2463  		return 0x70e4 << 15
  2464  	}
  2465  
  2466  	c.ctxt.Diag("bad ic opcode %v", a)
  2467  
  2468  	return 0
  2469  }
  2470  
  2471  func (c *ctxt0) opir(a obj.As) uint32 {
  2472  	switch a {
  2473  	case ALU12IW:
  2474  		return 0x0a << 25
  2475  	case ALU32ID:
  2476  		return 0x0b << 25
  2477  	case APCALAU12I:
  2478  		return 0x0d << 25
  2479  	case APCADDU12I:
  2480  		return 0x0e << 25
  2481  	}
  2482  	return 0
  2483  }
  2484  
  2485  func (c *ctxt0) opirr(a obj.As) uint32 {
  2486  	switch a {
  2487  	case AADD, AADDU:
  2488  		return 0x00a << 22
  2489  	case ASGT:
  2490  		return 0x008 << 22
  2491  	case ASGTU:
  2492  		return 0x009 << 22
  2493  	case AAND:
  2494  		return 0x00d << 22
  2495  	case AOR:
  2496  		return 0x00e << 22
  2497  	case ALU52ID:
  2498  		return 0x00c << 22
  2499  	case AXOR:
  2500  		return 0x00f << 22
  2501  	case ASLL:
  2502  		return 0x00081 << 15
  2503  	case ASRL:
  2504  		return 0x00089 << 15
  2505  	case ASRA:
  2506  		return 0x00091 << 15
  2507  	case AROTR:
  2508  		return 0x00099 << 15
  2509  	case AADDV:
  2510  		return 0x00b << 22
  2511  	case AADDVU:
  2512  		return 0x00b << 22
  2513  
  2514  	case AJMP:
  2515  		return 0x14 << 26
  2516  	case AJAL,
  2517  		obj.ADUFFZERO,
  2518  		obj.ADUFFCOPY:
  2519  		return 0x15 << 26
  2520  
  2521  	case AJIRL:
  2522  		return 0x13 << 26
  2523  	case ABLTU:
  2524  		return 0x1a << 26
  2525  	case ABLT, ABLTZ, ABGTZ:
  2526  		return 0x18 << 26
  2527  	case ABGEU:
  2528  		return 0x1b << 26
  2529  	case ABGE, ABGEZ, ABLEZ:
  2530  		return 0x19 << 26
  2531  	case -ABEQ: // beqz
  2532  		return 0x10 << 26
  2533  	case -ABNE: // bnez
  2534  		return 0x11 << 26
  2535  	case ABEQ:
  2536  		return 0x16 << 26
  2537  	case ABNE:
  2538  		return 0x17 << 26
  2539  	case ABFPT:
  2540  		return 0x12<<26 | 0x1<<8
  2541  	case ABFPF:
  2542  		return 0x12<<26 | 0x0<<8
  2543  
  2544  	case AMOVB,
  2545  		AMOVBU:
  2546  		return 0x0a4 << 22
  2547  	case AMOVH,
  2548  		AMOVHU:
  2549  		return 0x0a5 << 22
  2550  	case AMOVW,
  2551  		AMOVWU:
  2552  		return 0x0a6 << 22
  2553  	case AMOVV:
  2554  		return 0x0a7 << 22
  2555  	case AMOVF:
  2556  		return 0x0ad << 22
  2557  	case AMOVD:
  2558  		return 0x0af << 22
  2559  	case -AMOVB:
  2560  		return 0x0a0 << 22
  2561  	case -AMOVBU:
  2562  		return 0x0a8 << 22
  2563  	case -AMOVH:
  2564  		return 0x0a1 << 22
  2565  	case -AMOVHU:
  2566  		return 0x0a9 << 22
  2567  	case -AMOVW:
  2568  		return 0x0a2 << 22
  2569  	case -AMOVWU:
  2570  		return 0x0aa << 22
  2571  	case -AMOVV:
  2572  		return 0x0a3 << 22
  2573  	case -AMOVF:
  2574  		return 0x0ac << 22
  2575  	case -AMOVD:
  2576  		return 0x0ae << 22
  2577  	case -AVMOVQ:
  2578  		return 0x0b0 << 22 // vld
  2579  	case -AXVMOVQ:
  2580  		return 0x0b2 << 22 // xvld
  2581  	case AVMOVQ:
  2582  		return 0x0b1 << 22 // vst
  2583  	case AXVMOVQ:
  2584  		return 0x0b3 << 22 // xvst
  2585  	case ASLLV:
  2586  		return 0x0041 << 16
  2587  	case ASRLV:
  2588  		return 0x0045 << 16
  2589  	case ASRAV:
  2590  		return 0x0049 << 16
  2591  	case AROTRV:
  2592  		return 0x004d << 16
  2593  	case -ALL:
  2594  		return 0x020 << 24
  2595  	case -ALLV:
  2596  		return 0x022 << 24
  2597  	case ASC:
  2598  		return 0x021 << 24
  2599  	case ASCV:
  2600  		return 0x023 << 24
  2601  	}
  2602  
  2603  	if a < 0 {
  2604  		c.ctxt.Diag("bad irr opcode -%v", -a)
  2605  	} else {
  2606  		c.ctxt.Diag("bad irr opcode %v", a)
  2607  	}
  2608  	return 0
  2609  }
  2610  
  2611  func (c *ctxt0) opirir(a obj.As) uint32 {
  2612  	switch a {
  2613  	case ABSTRINSW:
  2614  		return 0x3<<21 | 0x0<<15 // bstrins.w
  2615  	case ABSTRINSV:
  2616  		return 0x2 << 22 // bstrins.d
  2617  	case ABSTRPICKW:
  2618  		return 0x3<<21 | 0x1<<15 // bstrpick.w
  2619  	case ABSTRPICKV:
  2620  		return 0x3 << 22 // bstrpick.d
  2621  	}
  2622  
  2623  	return 0
  2624  }
  2625  
  2626  func (c *ctxt0) specialFpMovInst(a obj.As, fclass int, tclass int) uint32 {
  2627  	switch a {
  2628  	case AMOVV:
  2629  		switch fclass {
  2630  		case C_REG:
  2631  			switch tclass {
  2632  			case C_FREG:
  2633  				return 0x452a << 10 // movgr2fr.d
  2634  			case C_FCCREG:
  2635  				return 0x4536 << 10 // movgr2cf
  2636  			case C_FCSRREG:
  2637  				return 0x4530 << 10 // movgr2fcsr
  2638  			}
  2639  		case C_FREG:
  2640  			switch tclass {
  2641  			case C_REG:
  2642  				return 0x452e << 10 // movfr2gr.d
  2643  			case C_FCCREG:
  2644  				return 0x4534 << 10 // movfr2cf
  2645  			}
  2646  		case C_FCCREG:
  2647  			switch tclass {
  2648  			case C_REG:
  2649  				return 0x4537 << 10 // movcf2gr
  2650  			case C_FREG:
  2651  				return 0x4535 << 10 // movcf2fr
  2652  			}
  2653  		case C_FCSRREG:
  2654  			switch tclass {
  2655  			case C_REG:
  2656  				return 0x4532 << 10 // movfcsr2gr
  2657  			}
  2658  		}
  2659  
  2660  	case AMOVW:
  2661  		switch fclass {
  2662  		case C_REG:
  2663  			switch tclass {
  2664  			case C_FREG:
  2665  				return 0x4529 << 10 // movgr2fr.w
  2666  			}
  2667  		case C_FREG:
  2668  			switch tclass {
  2669  			case C_REG:
  2670  				return 0x452d << 10 // movfr2gr.s
  2671  			}
  2672  		}
  2673  	}
  2674  
  2675  	c.ctxt.Diag("bad class combination: %s %s,%s\n", a, fclass, tclass)
  2676  
  2677  	return 0
  2678  }
  2679  
  2680  func (c *ctxt0) specialLsxMovInst(a obj.As, fReg, tReg int16) (op_code, index_mask uint32) {
  2681  	farng := (fReg >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK
  2682  	tarng := (tReg >> EXT_TYPE_SHIFT) & EXT_TYPE_MASK
  2683  	fclass := c.rclass(fReg)
  2684  	tclass := c.rclass(tReg)
  2685  
  2686  	switch fclass | (tclass << 16) {
  2687  	case C_REG | (C_ELEM << 16):
  2688  		// vmov Rn, Vd.<T>[index]
  2689  		switch a {
  2690  		case AVMOVQ:
  2691  			switch tarng {
  2692  			case ARNG_B:
  2693  				return (0x01CBAE << 14), 0xf // vinsgr2vr.b
  2694  			case ARNG_H:
  2695  				return (0x03975E << 13), 0x7 // vinsgr2vr.h
  2696  			case ARNG_W:
  2697  				return (0x072EBE << 12), 0x3 // vinsgr2vr.w
  2698  			case ARNG_V:
  2699  				return (0x0E5D7E << 11), 0x1 // vinsgr2vr.d
  2700  			}
  2701  		case AXVMOVQ:
  2702  			switch tarng {
  2703  			case ARNG_W:
  2704  				return (0x03B75E << 13), 0x7 // xvinsgr2vr.w
  2705  			case ARNG_V:
  2706  				return (0x076EBE << 12), 0x3 // xvinsgr2vr.d
  2707  			}
  2708  		}
  2709  
  2710  	case C_ELEM | (C_REG << 16):
  2711  		// vmov Vd.<T>[index], Rn
  2712  		switch a {
  2713  		case AVMOVQ:
  2714  			switch farng {
  2715  			case ARNG_B:
  2716  				return (0x01CBBE << 14), 0xf // vpickve2gr.b
  2717  			case ARNG_H:
  2718  				return (0x03977E << 13), 0x7 // vpickve2gr.h
  2719  			case ARNG_W:
  2720  				return (0x072EFE << 12), 0x3 // vpickve2gr.w
  2721  			case ARNG_V:
  2722  				return (0x0E5DFE << 11), 0x1 // vpickve2gr.d
  2723  			case ARNG_BU:
  2724  				return (0x01CBCE << 14), 0xf // vpickve2gr.bu
  2725  			case ARNG_HU:
  2726  				return (0x03979E << 13), 0x7 // vpickve2gr.hu
  2727  			case ARNG_WU:
  2728  				return (0x072F3E << 12), 0x3 // vpickve2gr.wu
  2729  			case ARNG_VU:
  2730  				return (0x0E5E7E << 11), 0x1 // vpickve2gr.du
  2731  			}
  2732  		case AXVMOVQ:
  2733  			switch farng {
  2734  			case ARNG_W:
  2735  				return (0x03B77E << 13), 0x7 // xvpickve2gr.w
  2736  			case ARNG_V:
  2737  				return (0x076EFE << 12), 0x3 // xvpickve2gr.d
  2738  			case ARNG_WU:
  2739  				return (0x03B79E << 13), 0x7 // xvpickve2gr.wu
  2740  			case ARNG_VU:
  2741  				return (0x076F3E << 12), 0x3 // xvpickve2gr.du
  2742  			}
  2743  		}
  2744  
  2745  	case C_REG | (C_ARNG << 16):
  2746  		// vmov Rn, Vd.<T>
  2747  		switch a {
  2748  		case AVMOVQ:
  2749  			switch tarng {
  2750  			case ARNG_16B:
  2751  				return (0x1CA7C0 << 10), 0x0 // vreplgr2vr.b
  2752  			case ARNG_8H:
  2753  				return (0x1CA7C1 << 10), 0x0 // vreplgr2vr.h
  2754  			case ARNG_4W:
  2755  				return (0x1CA7C2 << 10), 0x0 // vreplgr2vr.w
  2756  			case ARNG_2V:
  2757  				return (0x1CA7C3 << 10), 0x0 // vreplgr2vr.d
  2758  			}
  2759  		case AXVMOVQ:
  2760  			switch tarng {
  2761  			case ARNG_32B:
  2762  				return (0x1DA7C0 << 10), 0x0 // xvreplgr2vr.b
  2763  			case ARNG_16H:
  2764  				return (0x1DA7C1 << 10), 0x0 // xvreplgr2vr.h
  2765  			case ARNG_8W:
  2766  				return (0x1DA7C2 << 10), 0x0 // xvreplgr2vr.w
  2767  			case ARNG_4V:
  2768  				return (0x1DA7C3 << 10), 0x0 // xvreplgr2vr.d
  2769  			}
  2770  		}
  2771  
  2772  	case C_XREG | (C_ARNG << 16):
  2773  		// vmov  xj, xd.<T>
  2774  		switch a {
  2775  		case AVMOVQ:
  2776  			return 0, 0 // unsupported op
  2777  		case AXVMOVQ:
  2778  			switch tarng {
  2779  			case ARNG_32B:
  2780  				return (0x1DC1C0 << 10), 0x0 // xvreplve0.b
  2781  			case ARNG_16H:
  2782  				return (0x1DC1E0 << 10), 0x0 // xvreplve0.h
  2783  			case ARNG_8W:
  2784  				return (0x1DC1F0 << 10), 0x0 // xvreplve0.w
  2785  			case ARNG_4V:
  2786  				return (0x1DC1F8 << 10), 0x0 // xvreplve0.d
  2787  			case ARNG_2Q:
  2788  				return (0x1DC1FC << 10), 0x0 // xvreplve0.q
  2789  			}
  2790  		}
  2791  
  2792  	case C_XREG | (C_ELEM << 16):
  2793  		// vmov  xj, xd.<T>[index]
  2794  		switch a {
  2795  		case AVMOVQ:
  2796  			return 0, 0 // unsupported op
  2797  		case AXVMOVQ:
  2798  			switch tarng {
  2799  			case ARNG_W:
  2800  				return (0x03B7FE << 13), 0x7 // xvinsve0.w
  2801  			case ARNG_V:
  2802  				return (0x076FFE << 12), 0x3 // xvinsve0.d
  2803  			}
  2804  		}
  2805  
  2806  	case C_ELEM | (C_XREG << 16):
  2807  		// vmov  xj.<T>[index], xd
  2808  		switch a {
  2809  		case AVMOVQ:
  2810  			return 0, 0 // unsupported op
  2811  		case AXVMOVQ:
  2812  			switch farng {
  2813  			case ARNG_W:
  2814  				return (0x03B81E << 13), 0x7 // xvpickve.w
  2815  			case ARNG_V:
  2816  				return (0x07703E << 12), 0x3 // xvpickve.d
  2817  			}
  2818  		}
  2819  
  2820  	case C_ELEM | (C_ARNG << 16):
  2821  		// vmov  vj.<T>[index], vd.<T>
  2822  		switch a {
  2823  		case AVMOVQ:
  2824  			switch int32(farng) | (int32(tarng) << 16) {
  2825  			case int32(ARNG_B) | (int32(ARNG_16B) << 16):
  2826  				return (0x01CBDE << 14), 0xf // vreplvei.b
  2827  			case int32(ARNG_H) | (int32(ARNG_8H) << 16):
  2828  				return (0x0397BE << 13), 0x7 // vreplvei.h
  2829  			case int32(ARNG_W) | (int32(ARNG_4W) << 16):
  2830  				return (0x072F7E << 12), 0x3 // vreplvei.w
  2831  			case int32(ARNG_V) | (int32(ARNG_2V) << 16):
  2832  				return (0x0E5EFE << 11), 0x1 // vreplvei.d
  2833  			}
  2834  		case AXVMOVQ:
  2835  			return 0, 0 // unsupported op
  2836  		}
  2837  	}
  2838  
  2839  	return 0, 0
  2840  }
  2841  
  2842  func vshift(a obj.As) bool {
  2843  	switch a {
  2844  	case ASLLV,
  2845  		ASRLV,
  2846  		ASRAV,
  2847  		AROTRV:
  2848  		return true
  2849  	}
  2850  	return false
  2851  }
  2852  

View as plain text