1 // Copyright 2016 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 #include "textflag.h"
8
9 // func addMulVVW1024(z, x *uint, y uint) (c uint)
10 TEXT ·addMulVVW1024(SB), $0-32
11 MOVD $16, R5
12 JMP addMulVVWx(SB)
13
14 // func addMulVVW1536(z, x *uint, y uint) (c uint)
15 TEXT ·addMulVVW1536(SB), $0-32
16 MOVD $24, R5
17 JMP addMulVVWx(SB)
18
19 // func addMulVVW2048(z, x *uint, y uint) (c uint)
20 TEXT ·addMulVVW2048(SB), $0-32
21 MOVD $32, R5
22 JMP addMulVVWx(SB)
23
24 TEXT addMulVVWx(SB), NOFRAME|NOSPLIT, $0
25 MOVD z+0(FP), R2
26 MOVD x+8(FP), R8
27 MOVD y+16(FP), R9
28
29 MOVD $0, R1 // i*8 = 0
30 MOVD $0, R7 // i = 0
31 MOVD $0, R0 // make sure it's zero
32 MOVD $0, R4 // c = 0
33
34 MOVD R5, R12
35 AND $-2, R12
36 CMPBGE R5, $2, A6
37 BR E6
38
39 A6:
40 MOVD (R8)(R1*1), R6
41 MULHDU R9, R6
42 MOVD (R2)(R1*1), R10
43 ADDC R10, R11 // add to low order bits
44 ADDE R0, R6
45 ADDC R4, R11
46 ADDE R0, R6
47 MOVD R6, R4
48 MOVD R11, (R2)(R1*1)
49
50 MOVD (8)(R8)(R1*1), R6
51 MULHDU R9, R6
52 MOVD (8)(R2)(R1*1), R10
53 ADDC R10, R11 // add to low order bits
54 ADDE R0, R6
55 ADDC R4, R11
56 ADDE R0, R6
57 MOVD R6, R4
58 MOVD R11, (8)(R2)(R1*1)
59
60 ADD $16, R1 // i*8 + 8
61 ADD $2, R7 // i++
62
63 CMPBLT R7, R12, A6
64 BR E6
65
66 L6:
67 // TODO: drop unused single-step loop.
68 MOVD (R8)(R1*1), R6
69 MULHDU R9, R6
70 MOVD (R2)(R1*1), R10
71 ADDC R10, R11 // add to low order bits
72 ADDE R0, R6
73 ADDC R4, R11
74 ADDE R0, R6
75 MOVD R6, R4
76 MOVD R11, (R2)(R1*1)
77
78 ADD $8, R1 // i*8 + 8
79 ADD $1, R7 // i++
80
81 E6:
82 CMPBLT R7, R5, L6 // i < n
83
84 MOVD R4, c+24(FP)
85 RET
86
View as plain text