Text file src/crypto/internal/bigmod/nat_loong64.s

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // derived from crypto/internal/bigmod/nat_riscv64.s
     6  
     7  //go:build !purego
     8  
     9  #include "textflag.h"
    10  
    11  // func addMulVVW1024(z, x *uint, y uint) (c uint)
    12  TEXT ·addMulVVW1024(SB),$0-32
    13  	MOVV	$16, R8
    14  	JMP	addMulVVWx(SB)
    15  
    16  // func addMulVVW1536(z, x *uint, y uint) (c uint)
    17  TEXT ·addMulVVW1536(SB),$0-32
    18  	MOVV	$24, R8
    19  	JMP	addMulVVWx(SB)
    20  
    21  // func addMulVVW2048(z, x *uint, y uint) (c uint)
    22  TEXT ·addMulVVW2048(SB),$0-32
    23  	MOVV	$32, R8
    24  	JMP	addMulVVWx(SB)
    25  
    26  TEXT addMulVVWx(SB),NOFRAME|NOSPLIT,$0
    27  	MOVV	z+0(FP), R4
    28  	MOVV	x+8(FP), R6
    29  	MOVV	y+16(FP), R5
    30  	MOVV	$0, R7
    31  
    32  	BEQ	R8, R0, done
    33  loop:
    34  	MOVV	0*8(R4), R9	// z[0]
    35  	MOVV	1*8(R4), R10	// z[1]
    36  	MOVV	2*8(R4), R11	// z[2]
    37  	MOVV	3*8(R4), R12	// z[3]
    38  
    39  	MOVV	0*8(R6), R13	// x[0]
    40  	MOVV	1*8(R6), R14	// x[1]
    41  	MOVV	2*8(R6), R15	// x[2]
    42  	MOVV	3*8(R6), R16	// x[3]
    43  
    44  	MULHVU	R13, R5, R17	// z_hi[0] = x[0] * y
    45  	MULV	R13, R5, R13	// z_lo[0] = x[0] * y
    46  	ADDV	R13, R9, R18	// z_lo[0] = x[0] * y + z[0]
    47  	SGTU	R13, R18, R19
    48  	ADDV	R17, R19, R17	// z_hi[0] = x[0] * y + z[0]
    49  	ADDV	R18, R7, R9	// z_lo[0] = x[0] * y + z[0] + c
    50  	SGTU	R18, R9, R19
    51  	ADDV	R17, R19, R7	// next c
    52  
    53  	MULHVU	R14, R5, R24	// z_hi[1] = x[1] * y
    54  	MULV	R14, R5, R14	// z_lo[1] = x[1] * y
    55  	ADDV	R14, R10, R18	// z_lo[1] = x[1] * y + z[1]
    56  	SGTU	R14, R18, R19
    57  	ADDV	R24, R19, R24	// z_hi[1] = x[1] * y + z[1]
    58  	ADDV	R18, R7, R10	// z_lo[1] = x[1] * y + z[1] + c
    59  	SGTU	R18, R10, R19
    60  	ADDV	R24, R19, R7	// next c
    61  
    62  	MULHVU	R15, R5, R25	// z_hi[2] = x[2] * y
    63  	MULV	R15, R5, R15	// z_lo[2] = x[2] * y
    64  	ADDV	R15, R11, R18	// z_lo[2] = x[2] * y + z[2]
    65  	SGTU	R15, R18, R19
    66  	ADDV	R25, R19, R25	// z_hi[2] = x[2] * y + z[2]
    67  	ADDV	R18, R7, R11	// z_lo[2] = x[2] * y + z[2] + c
    68  	SGTU	R18, R11, R19
    69  	ADDV	R25, R19, R7	// next c
    70  
    71  	MULHVU	R16, R5, R26	// z_hi[3] = x[3] * y
    72  	MULV	R16, R5, R16	// z_lo[3] = x[3] * y
    73  	ADDV	R16, R12, R18	// z_lo[3] = x[3] * y + z[3]
    74  	SGTU	R16, R18, R19
    75  	ADDV	R26, R19, R26	// z_hi[3] = x[3] * y + z[3]
    76  	ADDV	R18, R7, R12	// z_lo[3] = x[3] * y + z[3] + c
    77  	SGTU	R18, R12, R19
    78  	ADDV	R26, R19, R7	// next c
    79  
    80  	MOVV	R9, 0*8(R4)	// z[0]
    81  	MOVV	R10, 1*8(R4)	// z[1]
    82  	MOVV	R11, 2*8(R4)	// z[2]
    83  	MOVV	R12, 3*8(R4)	// z[3]
    84  
    85  	ADDV	$32, R4
    86  	ADDV	$32, R6
    87  
    88  	SUBV	$4, R8
    89  	BNE	R8, R0, loop
    90  
    91  done:
    92  	MOVV	R7, c+24(FP)
    93  	RET
    94  

View as plain text