Source file src/cmd/internal/obj/loong64/doc.go

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  /*
     6  Package loong64 implements an LoongArch64 assembler. Go assembly syntax is different from
     7  GNU LoongArch64 syntax, but we can still follow the general rules to map between them.
     8  
     9  # Register Convention
    10  
    11  	Name                |  Alias  | Meaning
    12  	-----------------------------------------------------------------------------------------------------------------------
    13  	R0                  | REGZERO | Constant zero
    14  	R1                  | REGLINK | Return address
    15  	R3                  | REGSP   | Stack pointer
    16  	R12,R13,R14,R15,R20 |         | For plt and trampoline, use with caution in assembly code, save before calling function
    17  	R22                 | REGG    | Goroutine pointer
    18  	R29                 | REGCTXT | Context for closures
    19  	R30                 | REGTMP  | Tmp register used by assembler
    20  
    21  # Instructions mnemonics mapping rules
    22  
    23  1. Bit widths represented by various instruction suffixes and prefixes
    24  V (vlong)     = 64 bit
    25  WU (word)     = 32 bit unsigned
    26  W (word)      = 32 bit
    27  H (half word) = 16 bit
    28  HU            = 16 bit unsigned
    29  B (byte)      = 8 bit
    30  BU            = 8 bit unsigned
    31  F (float)     = 32 bit float
    32  D (double)    = 64 bit float
    33  
    34  V  (LSX)      = 128 bit
    35  XV (LASX)     = 256 bit
    36  
    37  Examples:
    38  
    39  	MOVB  (R2), R3  // Load 8 bit memory data into R3 register
    40  	MOVH  (R2), R3  // Load 16 bit memory data into R3 register
    41  	MOVW  (R2), R3  // Load 32 bit memory data into R3 register
    42  	MOVV  (R2), R3  // Load 64 bit memory data into R3 register
    43  	VMOVQ  (R2), V1 // Load 128 bit memory data into V1 register
    44  	XVMOVQ (R2), X1 // Load 256 bit memory data into X1 register
    45  
    46  2. Align directive
    47  Go asm supports the PCALIGN directive, which indicates that the next instruction should
    48  be aligned to a specified boundary by padding with NOOP instruction. The alignment value
    49  supported on loong64 must be a power of 2 and in the range of [8, 2048].
    50  
    51  Examples:
    52  
    53  	PCALIGN	$16
    54  	MOVV	$2, R4	// This instruction is aligned with 16 bytes.
    55  	PCALIGN	$1024
    56  	MOVV	$3, R5	// This instruction is aligned with 1024 bytes.
    57  
    58  # On loong64, auto-align loop heads to 16-byte boundaries
    59  
    60  Examples:
    61  
    62  	TEXT ·Add(SB),NOSPLIT|NOFRAME,$0
    63  
    64  start:
    65  
    66  	MOVV	$1, R4	// This instruction is aligned with 16 bytes.
    67  	MOVV	$-1, R5
    68  	BNE	R5, start
    69  	RET
    70  
    71  # Register mapping rules
    72  
    73  1. All generial-prupose register names are written as Rn.
    74  
    75  2. All floating-point register names are written as Fn.
    76  
    77  3. All LSX register names are written as Vn.
    78  
    79  4. All LASX register names are written as Xn.
    80  
    81  # Argument mapping rules
    82  
    83  1. The operands appear in left-to-right assignment order.
    84  
    85  Go reverses the arguments of most instructions.
    86  
    87  Examples:
    88  
    89  	ADDV	R11, R12, R13 <=> add.d R13, R12, R11
    90  	LLV	(R4), R7      <=> ll.d R7, R4
    91  	OR	R5, R6        <=> or R6, R6, R5
    92  
    93  Special Cases.
    94  (1) Argument order is the same as in the GNU Loong64 syntax: jump instructions,
    95  
    96  Examples:
    97  
    98  	BEQ	R0, R4, lable1  <=>  beq R0, R4, lable1
    99  	JMP	lable1          <=>  b lable1
   100  
   101  (2) BSTRINSW, BSTRINSV, BSTRPICKW, BSTRPICKV $<msb>, <Rj>, $<lsb>, <Rd>
   102  
   103  Examples:
   104  
   105  	BSTRPICKW $15, R4, $6, R5  <=>  bstrpick.w r5, r4, 15, 6
   106  
   107  2. Expressions for special arguments.
   108  
   109  Memory references: a base register and an offset register is written as (Rbase)(Roff).
   110  
   111  Examples:
   112  
   113  	MOVB (R4)(R5), R6  <=>  ldx.b R6, R4, R5
   114  	MOVV (R4)(R5), R6  <=>  ldx.d R6, R4, R5
   115  	MOVD (R4)(R5), F6  <=>  fldx.d F6, R4, R5
   116  	MOVB R6, (R4)(R5)  <=>  stx.b R6, R5, R5
   117  	MOVV R6, (R4)(R5)  <=>  stx.d R6, R5, R5
   118  	MOVV F6, (R4)(R5)  <=>  fstx.d F6, R5, R5
   119  
   120  3. Alphabetical list of SIMD instructions
   121  
   122  Note: In the following sections 3.1 to 3.6, "ui4" (4-bit unsigned int immediate),
   123  "ui3", "ui2", and "ui1" represent the related "index".
   124  
   125  3.1 Move general-purpose register to a vector element:
   126  
   127  	Instruction format:
   128  	        VMOVQ  Rj, <Vd>.<T>[index]
   129  
   130  	Mapping between Go and platform assembly:
   131  	       Go assembly       |      platform assembly     |          semantics
   132  	-------------------------------------------------------------------------------------
   133  	 VMOVQ  Rj, Vd.B[index]  |  vinsgr2vr.b  Vd, Rj, ui4  |  VR[vd].b[ui4] = GR[rj][7:0]
   134  	 VMOVQ  Rj, Vd.H[index]  |  vinsgr2vr.h  Vd, Rj, ui3  |  VR[vd].h[ui3] = GR[rj][15:0]
   135  	 VMOVQ  Rj, Vd.W[index]  |  vinsgr2vr.w  Vd, Rj, ui2  |  VR[vd].w[ui2] = GR[rj][31:0]
   136  	 VMOVQ  Rj, Vd.V[index]  |  vinsgr2vr.d  Vd, Rj, ui1  |  VR[vd].d[ui1] = GR[rj][63:0]
   137  	XVMOVQ  Rj, Xd.W[index]  | xvinsgr2vr.w  Xd, Rj, ui3  |  XR[xd].w[ui3] = GR[rj][31:0]
   138  	XVMOVQ  Rj, Xd.V[index]  | xvinsgr2vr.d  Xd, Rj, ui2  |  XR[xd].d[ui2] = GR[rj][63:0]
   139  
   140  3.2 Move vector element to general-purpose register
   141  
   142  	Instruction format:
   143  	        VMOVQ     <Vj>.<T>[index], Rd
   144  
   145  	Mapping between Go and platform assembly:
   146  	        Go assembly       |       platform assembly      |            semantics
   147  	---------------------------------------------------------------------------------------------
   148  	 VMOVQ  Vj.B[index],  Rd  |   vpickve2gr.b   rd, vj, ui4 | GR[rd] = SignExtend(VR[vj].b[ui4])
   149  	 VMOVQ  Vj.H[index],  Rd  |   vpickve2gr.h   rd, vj, ui3 | GR[rd] = SignExtend(VR[vj].h[ui3])
   150  	 VMOVQ  Vj.W[index],  Rd  |   vpickve2gr.w   rd, vj, ui2 | GR[rd] = SignExtend(VR[vj].w[ui2])
   151  	 VMOVQ  Vj.V[index],  Rd  |   vpickve2gr.d   rd, vj, ui1 | GR[rd] = SignExtend(VR[vj].d[ui1])
   152  	 VMOVQ  Vj.BU[index], Rd  |   vpickve2gr.bu  rd, vj, ui4 | GR[rd] = ZeroExtend(VR[vj].bu[ui4])
   153  	 VMOVQ  Vj.HU[index], Rd  |   vpickve2gr.hu  rd, vj, ui3 | GR[rd] = ZeroExtend(VR[vj].hu[ui3])
   154  	 VMOVQ  Vj.WU[index], Rd  |   vpickve2gr.wu  rd, vj, ui2 | GR[rd] = ZeroExtend(VR[vj].wu[ui2])
   155  	 VMOVQ  Vj.VU[index], Rd  |   vpickve2gr.du  rd, vj, ui1 | GR[rd] = ZeroExtend(VR[vj].du[ui1])
   156  	XVMOVQ  Xj.W[index],  Rd  |  xvpickve2gr.w   rd, xj, ui3 | GR[rd] = SignExtend(VR[xj].w[ui3])
   157  	XVMOVQ  Xj.V[index],  Rd  |  xvpickve2gr.d   rd, xj, ui2 | GR[rd] = SignExtend(VR[xj].d[ui2])
   158  	XVMOVQ  Xj.WU[index], Rd  |  xvpickve2gr.wu  rd, xj, ui3 | GR[rd] = ZeroExtend(VR[xj].wu[ui3])
   159  	XVMOVQ  Xj.VU[index], Rd  |  xvpickve2gr.du  rd, xj, ui2 | GR[rd] = ZeroExtend(VR[xj].du[ui2])
   160  
   161  3.3 Duplicate general-purpose register to vector.
   162  
   163  	Instruction format:
   164  	        VMOVQ    Rj, <Vd>.<T>
   165  
   166  	Mapping between Go and platform assembly:
   167  	   Go assembly      |    platform assembly    |                    semantics
   168  	------------------------------------------------------------------------------------------------
   169  	 VMOVQ  Rj, Vd.B16  |   vreplgr2vr.b  Vd, Rj  |  for i in range(16): VR[vd].b[i] = GR[rj][7:0]
   170  	 VMOVQ  Rj, Vd.H8   |   vreplgr2vr.h  Vd, Rj  |  for i in range(8) : VR[vd].h[i] = GR[rj][16:0]
   171  	 VMOVQ  Rj, Vd.W4   |   vreplgr2vr.w  Vd, Rj  |  for i in range(4) : VR[vd].w[i] = GR[rj][31:0]
   172  	 VMOVQ  Rj, Vd.V2   |   vreplgr2vr.d  Vd, Rj  |  for i in range(2) : VR[vd].d[i] = GR[rj][63:0]
   173  	XVMOVQ  Rj, Xd.B32  |  xvreplgr2vr.b  Xd, Rj  |  for i in range(32): XR[xd].b[i] = GR[rj][7:0]
   174  	XVMOVQ  Rj, Xd.H16  |  xvreplgr2vr.h  Xd, Rj  |  for i in range(16): XR[xd].h[i] = GR[rj][16:0]
   175  	XVMOVQ  Rj, Xd.W8   |  xvreplgr2vr.w  Xd, Rj  |  for i in range(8) : XR[xd].w[i] = GR[rj][31:0]
   176  	XVMOVQ  Rj, Xd.V4   |  xvreplgr2vr.d  Xd, Rj  |  for i in range(4) : XR[xd].d[i] = GR[rj][63:0]
   177  
   178  3.4 Replace vector elements
   179  
   180  	Instruction format:
   181  	        XVMOVQ    Xj, <Xd>.<T>
   182  
   183  	Mapping between Go and platform assembly:
   184  	   Go assembly      |   platform assembly   |                semantics
   185  	------------------------------------------------------------------------------------------------
   186  	XVMOVQ  Xj, Xd.B32  |  xvreplve0.b  Xd, Xj  | for i in range(32): XR[xd].b[i] = XR[xj].b[0]
   187  	XVMOVQ  Xj, Xd.H16  |  xvreplve0.h  Xd, Xj  | for i in range(16): XR[xd].h[i] = XR[xj].h[0]
   188  	XVMOVQ  Xj, Xd.W8   |  xvreplve0.w  Xd, Xj  | for i in range(8) : XR[xd].w[i] = XR[xj].w[0]
   189  	XVMOVQ  Xj, Xd.V4   |  xvreplve0.d  Xd, Xj  | for i in range(4) : XR[xd].d[i] = XR[xj].d[0]
   190  	XVMOVQ  Xj, Xd.Q2   |  xvreplve0.q  Xd, Xj  | for i in range(2) : XR[xd].q[i] = XR[xj].q[0]
   191  
   192  3.5 Move vector element to scalar
   193  
   194  	Instruction format:
   195  	        XVMOVQ  Xj, <Xd>.<T>[index]
   196  	        XVMOVQ  Xj.<T>[index], Xd
   197  
   198  	Mapping between Go and platform assembly:
   199  	       Go assembly        |     platform assembly     |               semantics
   200  	------------------------------------------------------------------------------------------------
   201  	 XVMOVQ  Xj, Xd.W[index]  |  xvinsve0.w   xd, xj, ui3 | XR[xd].w[ui3] = XR[xj].w[0]
   202  	 XVMOVQ  Xj, Xd.V[index]  |  xvinsve0.d   xd, xj, ui2 | XR[xd].d[ui2] = XR[xj].d[0]
   203  	 XVMOVQ  Xj.W[index], Xd  |  xvpickve.w   xd, xj, ui3 | XR[xd].w[0] = XR[xj].w[ui3], XR[xd][255:32] = 0
   204  	 XVMOVQ  Xj.V[index], Xd  |  xvpickve.d   xd, xj, ui2 | XR[xd].d[0] = XR[xj].d[ui2], XR[xd][255:64] = 0
   205  
   206  3.6 Move vector element to vector register.
   207  
   208  	Instruction format:
   209  	VMOVQ     <Vn>.<T>[index], Vn.<T>
   210  
   211  	Mapping between Go and platform assembly:
   212  	         Go assembly      |    platform assembly   |               semantics
   213  	VMOVQ Vj.B[index], Vd.B16 | vreplvei.b vd, vj, ui4 | for i in range(16): VR[vd].b[i] = VR[vj].b[ui4]
   214  	VMOVQ Vj.H[index], Vd.H8  | vreplvei.h vd, vj, ui3 | for i in range(8) : VR[vd].h[i] = VR[vj].h[ui3]
   215  	VMOVQ Vj.W[index], Vd.W4  | vreplvei.w vd, vj, ui2 | for i in range(4) : VR[vd].w[i] = VR[vj].w[ui2]
   216  	VMOVQ Vj.V[index], Vd.V2  | vreplvei.d vd, vj, ui1 | for i in range(2) : VR[vd].d[i] = VR[vj].d[ui1]
   217  
   218  3.7 Move vector register to vector register.
   219          Instruction format:
   220          VMOVQ     Vj, Vd
   221  
   222          Mapping between Go and platform assembly:
   223            Go assembly   |   platform assembly   |                         semantics
   224          VMOVQ   Vj, Vd  |  vslli.d vd, vj, 0x0  | for i in range(2) : VR[vd].D[i] = SLL(VR[vj].D[i], 0)
   225          VXMOVQ  Xj, Xd  | xvslli.d xd, xj, 0x0  | for i in range(4) : XR[xd].D[i] = SLL(XR[xj].D[i], 0)
   226  
   227  3.7 Load data from memory and broadcast to each element of a vector register.
   228  
   229  	Instruction format:
   230  	        VMOVQ    offset(Rj), <Vd>.<T>
   231  
   232  	Mapping between Go and platform assembly:
   233  	   Go assembly              |     platform assembly      |                                semantics
   234  	-------------------------------------------------------------------------------------------------------------------------------------------------------
   235  	 VMOVQ  offset(Rj), Vd.B16  |   vldrepl.b  Vd, Rj, si12  |  for i in range(16): VR[vd].b[i] = load 8 bit memory data from (GR[rj]+SignExtend(si12))
   236  	 VMOVQ  offset(Rj), Vd.H8   |   vldrepl.h  Vd, Rj, si11  |  for i in range(8) : VR[vd].h[i] = load 16 bit memory data from (GR[rj]+SignExtend(si11<<1))
   237  	 VMOVQ  offset(Rj), Vd.W4   |   vldrepl.w  Vd, Rj, si10  |  for i in range(4) : VR[vd].w[i] = load 32 bit memory data from (GR[rj]+SignExtend(si10<<2))
   238  	 VMOVQ  offset(Rj), Vd.V2   |   vldrepl.d  Vd, Rj, si9   |  for i in range(2) : VR[vd].d[i] = load 64 bit memory data from (GR[rj]+SignExtend(si9<<3))
   239  	XVMOVQ  offset(Rj), Xd.B32  |  xvldrepl.b  Xd, Rj, si12  |  for i in range(32): XR[xd].b[i] = load 8 bit memory data from (GR[rj]+SignExtend(si12))
   240  	XVMOVQ  offset(Rj), Xd.H16  |  xvldrepl.h  Xd, Rj, si11  |  for i in range(16): XR[xd].h[i] = load 16 bit memory data from (GR[rj]+SignExtend(si11<<1))
   241  	XVMOVQ  offset(Rj), Xd.W8   |  xvldrepl.w  Xd, Rj, si10  |  for i in range(8) : XR[xd].w[i] = load 32 bit memory data from (GR[rj]+SignExtend(si10<<2))
   242  	XVMOVQ  offset(Rj), Xd.V4   |  xvldrepl.d  Xd, Rj, si9   |  for i in range(4) : XR[xd].d[i] = load 64 bit memory data from (GR[rj]+SignExtend(si9<<3))
   243  
   244  	note: In Go assembly, for ease of understanding, offset representing the actual address offset.
   245  	      However, during platform encoding, the offset is shifted to increase the encodable offset range, as follows:
   246  
   247  	   Go assembly           |      platform assembly
   248           VMOVQ  1(R4), V5.B16    |      vldrepl.b  v5, r4, $1
   249           VMOVQ  2(R4), V5.H8     |      vldrepl.h  v5, r4, $1
   250           VMOVQ  8(R4), V5.W4     |      vldrepl.w  v5, r4, $2
   251           VMOVQ  8(R4), V5.V2     |      vldrepl.d  v5, r4, $1
   252  
   253  3.8 Vector permutation instruction
   254  	Instruction format:
   255  	VPERMIW    ui8, Vj, Vd
   256  
   257  	Mapping between Go and platform assembly:
   258  	     Go assembly     |   platform assembly   |                                 semantics
   259  	VPERMIW  ui8, Vj, Vd |  vpermi.w vd, vj, ui8 | VR[vd].W[0] = VR[vj].W[ui8[1:0]], VR[vd].W[1] = VR[vj].W[ui8[3:2]],
   260  	                     |                       | VR[vd].W[2] = VR[vd].W[ui8[5:4]], VR[vd].W[3] = VR[vd].W[ui8[7:6]]
   261  	XVPERMIW ui8, Xj, Xd | xvpermi.w xd, xj, ui8 | XR[xd].W[0] = XR[xj].W[ui8[1:0]],   XR[xd].W[1] = XR[xj].W[ui8[3:2]],
   262  	                     |                       | XR[xd].W[3] = XR[xd].W[ui8[7:6]],   XR[xd].W[2] = XR[xd].W[ui8[5:4]],
   263  	                     |                       | XR[xd].W[4] = XR[xj].W[ui8[1:0]+4], XR[xd].W[5] = XR[xj].W[ui8[3:2]+4],
   264  	                     |                       | XR[xd].W[6] = XR[xd].W[ui8[5:4]+4], XR[xd].W[7] = XR[xd].W[ui8[7:6]+4]
   265  	XVPERMIV ui8, Xj, Xd | xvpermi.d xd, xj, ui8 | XR[xd].D[0] = XR[xj].D[ui8[1:0]], XR[xd].D[1] = XR[xj].D[ui8[3:2]],
   266  	                     |                       | XR[xd].D[2] = XR[xj].D[ui8[5:4]], XR[xd].D[3] = XR[xj].D[ui8[7:6]]
   267  	XVPERMIQ ui8, Xj, Xd | xvpermi.q xd, xj, ui8 | vec = {XR[xd], XR[xj]}, XR[xd].Q[0] = vec.Q[ui8[1:0]], XR[xd].Q[1] = vec.Q[ui8[5:4]]
   268  
   269  3.9 Vector misc instruction
   270  
   271  3.9.1 {,X}VEXTRINS.{B,H,W,V}
   272  
   273  	Instruction format:
   274  	VEXTRINSB   ui8, Vj, Vd
   275  
   276  	Mapping between Go and platform assembly:
   277  	      Go assembly      |    platform assembly    |             semantics
   278  	 VEXTRINSB ui8, Vj, Vd |  vextrins.b vd, vj, ui8 | VR[vd].B[ui8[7:4]] = VR[vj].B[ui8[3:0]]
   279  	 VEXTRINSH ui8, Vj, Vd |  vextrins.h vd, vj, ui8 | VR[vd].H[ui8[6:4]] = VR[vj].H[ui8[2:0]]
   280  	 VEXTRINSW ui8, Vj, Vd |  vextrins.w vd, vj, ui8 | VR[vd].W[ui8[5:4]] = VR[vj].W[ui8[1:0]]
   281  	 VEXTRINSV ui8, Vj, Vd |  vextrins.d vd, vj, ui8 | VR[vd].D[ui8[4]] = VR[vj].D[ui8[0]]
   282  	XVEXTRINSB ui8, Vj, Vd | xvextrins.b vd, vj, ui8 | XR[xd].B[ui8[7:4]] = XR[xj].B[ui8[3:0]], XR[xd].B[ui8[7:4]+16] = XR[xj].B[ui8[3:0]+16]
   283  	XVEXTRINSH ui8, Vj, Vd | xvextrins.h vd, vj, ui8 | XR[xd].H[ui8[6:4]] = XR[xj].H[ui8[2:0]], XR[xd].H[ui8[6:4]+8] = XR[xj].H[ui8[2:0]+8]
   284  	XVEXTRINSW ui8, Vj, Vd | xvextrins.w vd, vj, ui8 | XR[xd].W[ui8[5:4]] = XR[xj].W[ui8[1:0]], XR[xd].W[ui8[5:4]+4] = XR[xj].W[ui8[1:0]+4]
   285  	XVEXTRINSV ui8, Vj, Vd | xvextrins.d vd, vj, ui8 | XR[xd].D[ui8[4]] = XR[xj].D[ui8[0]],XR[xd].D[ui8[4]+2] = XR[xj].D[ui8[0]+2]
   286  
   287  # Special instruction encoding definition and description on LoongArch
   288  
   289   1. DBAR hint encoding for LA664(Loongson 3A6000) and later micro-architectures, paraphrased
   290      from the Linux kernel implementation: https://git.kernel.org/torvalds/c/e031a5f3f1ed
   291  
   292      - Bit4: ordering or completion (0: completion, 1: ordering)
   293      - Bit3: barrier for previous read (0: true, 1: false)
   294      - Bit2: barrier for previous write (0: true, 1: false)
   295      - Bit1: barrier for succeeding read (0: true, 1: false)
   296      - Bit0: barrier for succeeding write (0: true, 1: false)
   297      - Hint 0x700: barrier for "read after read" from the same address
   298  
   299      Traditionally, on microstructures that do not support dbar grading such as LA464
   300      (Loongson 3A5000, 3C5000) all variants are treated as “dbar 0” (full barrier).
   301  
   302  2. Notes on using atomic operation instructions
   303  
   304    - AM*_DB.W[U]/V[U] instructions such as AMSWAPDBW not only complete the corresponding
   305      atomic operation sequence, but also implement the complete full data barrier function.
   306  
   307    - When using the AM*_.W[U]/D[U] instruction, registers rd and rj cannot be the same,
   308      otherwise an exception is triggered, and rd and rk cannot be the same, otherwise
   309      the execution result is uncertain.
   310  
   311  3. Prefetch instructions
   312      Instruction format:
   313        PRELD	offset(Rbase), $hint
   314        PRELDX	offset(Rbase), $n, $hint
   315  
   316      Mapping between Go and platform assembly:
   317                 Go assembly            |    platform assembly
   318        PRELD  offset(Rbase), $hint     | preld hint, Rbase, offset
   319        PRELDX offset(Rbase), $n, $hint | move rk, $x; preldx hint, Rbase, rk
   320  
   321        note: $x is the value after $n and offset are reassembled
   322  
   323      Definition of hint value:
   324        0: load to L1
   325        2: load to L3
   326        8: store to L1
   327  
   328        The meaning of the rest of values is not defined yet, and the processor executes it as NOP
   329  
   330      Definition of $n in the PRELDX instruction:
   331        bit[0]: address sequence, 0 indicating ascending and 1 indicating descending
   332        bits[11:1]:  block size, the value range is [16, 1024], and it must be an integer multiple of 16
   333        bits[20:12]: block num, the value range is [1, 256]
   334        bits[36:21]: stride, the value range is [0, 0xffff]
   335  
   336  4. ShiftAdd instructions
   337      Mapping between Go and platform assembly:
   338                  Go assembly            |    platform assembly
   339       ALSL.W/WU/V $Imm, Rj, Rk, Rd      |    alsl.w/wu/d rd, rj, rk, $imm
   340  
   341      Instruction encoding format is as follows:
   342  
   343  	| 31 ~ 17 | 16 ~ 15 | 14 ~ 10 | 9 ~ 5 | 4 ~ 0 |
   344  	|  opcode |   sa2   |   rk    |   rj  |   rd  |
   345  
   346      The alsl.w/wu/v series of instructions shift the data in rj left by sa+1, add the value
   347      in rk, and write the result to rd.
   348  
   349      To allow programmers to directly write the desired shift amount in assembly code, we actually write
   350      the value of sa2+1 in the assembly code and then include the value of sa2 in the instruction encoding.
   351  
   352      For example:
   353  
   354              Go assembly      | instruction Encoding
   355          ALSLV $4, r4, r5, R6 |      002d9486
   356  
   357  5. Note of special memory access instructions
   358      Instruction format:
   359        MOVWP	offset(Rj), Rd
   360        MOVVP	offset(Rj), Rd
   361        MOVWP	Rd, offset(Rj)
   362        MOVVP	Rd, offset(Rj)
   363  
   364      Mapping between Go and platform assembly:
   365                 Go assembly      |      platform assembly
   366        MOVWP  offset(Rj), Rd     |    ldptr.w  rd, rj, si14
   367        MOVVP  offset(Rj), Rd     |    ldptr.d  rd, rj, si14
   368        MOVWP  Rd, offset(Rj)     |    stptr.w  rd, rj, si14
   369        MOVVP  Rd, offset(Rj)     |    stptr.d  rd, rj, si14
   370  
   371        note: In Go assembly, for ease of understanding, offset is a 16-bit immediate number representing
   372              the actual address offset, but in platform assembly, it need a 14-bit immediate number.
   373  	    si14 = offset>>2
   374  
   375      The addressing calculation for the above instruction involves logically left-shifting the 14-bit
   376      immediate number si14 by 2 bits, then sign-extending it, and finally adding it to the value in the
   377      general-purpose register rj to obtain the sum.
   378  
   379      For example:
   380  
   381              Go assembly      |      platform assembly
   382           MOVWP  8(R4), R5    |      ldptr.w r5, r4, $2
   383  
   384  6. Note of special add instrction
   385      Mapping between Go and platform assembly:
   386                Go assembly        |      platform assembly
   387        ADDV16  si16<<16, Rj, Rd   |    addu16i.d  rd, rj, si16
   388  
   389        note: si16 is a 16-bit immediate number, and si16<<16 is the actual operand.
   390  
   391      The addu16i.d instruction logically left-shifts the 16-bit immediate number si16 by 16 bits, then
   392      sign-extends it. The resulting data is added to the [63:0] bits of data in the general-purpose register
   393      rj, and the sum is written into the general-purpose register rd.
   394      The addu16i.d instruction is used in conjunction with the ldptr.w/d and stptr.w/d instructions to
   395      accelerate access based on the GOT table in position-independent code.
   396  */
   397  
   398  package loong64
   399  

View as plain text