1 // Copyright 2024 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 // derived from crypto/internal/bigmod/nat_riscv64.s
6
7 //go:build !purego
8
9 #include "textflag.h"
10
11 // func addMulVVW1024(z, x *uint, y uint) (c uint)
12 TEXT ·addMulVVW1024(SB),$0-32
13 MOVV $16, R8
14 JMP addMulVVWx(SB)
15
16 // func addMulVVW1536(z, x *uint, y uint) (c uint)
17 TEXT ·addMulVVW1536(SB),$0-32
18 MOVV $24, R8
19 JMP addMulVVWx(SB)
20
21 // func addMulVVW2048(z, x *uint, y uint) (c uint)
22 TEXT ·addMulVVW2048(SB),$0-32
23 MOVV $32, R8
24 JMP addMulVVWx(SB)
25
26 TEXT addMulVVWx(SB),NOFRAME|NOSPLIT,$0
27 MOVV z+0(FP), R4
28 MOVV x+8(FP), R6
29 MOVV y+16(FP), R5
30 MOVV $0, R7
31
32 BEQ R8, R0, done
33 loop:
34 MOVV 0*8(R4), R9 // z[0]
35 MOVV 1*8(R4), R10 // z[1]
36 MOVV 2*8(R4), R11 // z[2]
37 MOVV 3*8(R4), R12 // z[3]
38
39 MOVV 0*8(R6), R13 // x[0]
40 MOVV 1*8(R6), R14 // x[1]
41 MOVV 2*8(R6), R15 // x[2]
42 MOVV 3*8(R6), R16 // x[3]
43
44 MULHVU R13, R5, R17 // z_hi[0] = x[0] * y
45 MULV R13, R5, R13 // z_lo[0] = x[0] * y
46 ADDV R13, R9, R18 // z_lo[0] = x[0] * y + z[0]
47 SGTU R13, R18, R19
48 ADDV R17, R19, R17 // z_hi[0] = x[0] * y + z[0]
49 ADDV R18, R7, R9 // z_lo[0] = x[0] * y + z[0] + c
50 SGTU R18, R9, R19
51 ADDV R17, R19, R7 // next c
52
53 MULHVU R14, R5, R24 // z_hi[1] = x[1] * y
54 MULV R14, R5, R14 // z_lo[1] = x[1] * y
55 ADDV R14, R10, R18 // z_lo[1] = x[1] * y + z[1]
56 SGTU R14, R18, R19
57 ADDV R24, R19, R24 // z_hi[1] = x[1] * y + z[1]
58 ADDV R18, R7, R10 // z_lo[1] = x[1] * y + z[1] + c
59 SGTU R18, R10, R19
60 ADDV R24, R19, R7 // next c
61
62 MULHVU R15, R5, R25 // z_hi[2] = x[2] * y
63 MULV R15, R5, R15 // z_lo[2] = x[2] * y
64 ADDV R15, R11, R18 // z_lo[2] = x[2] * y + z[2]
65 SGTU R15, R18, R19
66 ADDV R25, R19, R25 // z_hi[2] = x[2] * y + z[2]
67 ADDV R18, R7, R11 // z_lo[2] = x[2] * y + z[2] + c
68 SGTU R18, R11, R19
69 ADDV R25, R19, R7 // next c
70
71 MULHVU R16, R5, R26 // z_hi[3] = x[3] * y
72 MULV R16, R5, R16 // z_lo[3] = x[3] * y
73 ADDV R16, R12, R18 // z_lo[3] = x[3] * y + z[3]
74 SGTU R16, R18, R19
75 ADDV R26, R19, R26 // z_hi[3] = x[3] * y + z[3]
76 ADDV R18, R7, R12 // z_lo[3] = x[3] * y + z[3] + c
77 SGTU R18, R12, R19
78 ADDV R26, R19, R7 // next c
79
80 MOVV R9, 0*8(R4) // z[0]
81 MOVV R10, 1*8(R4) // z[1]
82 MOVV R11, 2*8(R4) // z[2]
83 MOVV R12, 3*8(R4) // z[3]
84
85 ADDV $32, R4
86 ADDV $32, R6
87
88 SUBV $4, R8
89 BNE R8, R0, loop
90
91 done:
92 MOVV R7, c+24(FP)
93 RET
94
View as plain text