1 // Copyright 2019 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build gc && !purego && (ppc64 || ppc64le)
6
7 #include "textflag.h"
8
9 // This was ported from the amd64 implementation.
10
11 #ifdef GOARCH_ppc64le
12 #define LE_MOVD MOVD
13 #define LE_MOVWZ MOVWZ
14 #define LE_MOVHZ MOVHZ
15 #else
16 #define LE_MOVD MOVDBR
17 #define LE_MOVWZ MOVWBR
18 #define LE_MOVHZ MOVHBR
19 #endif
20
21 #define POLY1305_ADD(msg, h0, h1, h2, t0, t1, t2) \
22 LE_MOVD (msg)( R0), t0; \
23 LE_MOVD (msg)(R24), t1; \
24 MOVD $1, t2; \
25 ADDC t0, h0, h0; \
26 ADDE t1, h1, h1; \
27 ADDE t2, h2; \
28 ADD $16, msg
29
30 #define POLY1305_MUL(h0, h1, h2, r0, r1, t0, t1, t2, t3, t4, t5) \
31 MULLD r0, h0, t0; \
32 MULHDU r0, h0, t1; \
33 MULLD r0, h1, t4; \
34 MULHDU r0, h1, t5; \
35 ADDC t4, t1, t1; \
36 MULLD r0, h2, t2; \
37 MULHDU r1, h0, t4; \
38 MULLD r1, h0, h0; \
39 ADDE t5, t2, t2; \
40 ADDC h0, t1, t1; \
41 MULLD h2, r1, t3; \
42 ADDZE t4, h0; \
43 MULHDU r1, h1, t5; \
44 MULLD r1, h1, t4; \
45 ADDC t4, t2, t2; \
46 ADDE t5, t3, t3; \
47 ADDC h0, t2, t2; \
48 MOVD $-4, t4; \
49 ADDZE t3; \
50 RLDICL $0, t2, $62, h2; \
51 AND t2, t4, h0; \
52 ADDC t0, h0, h0; \
53 ADDE t3, t1, h1; \
54 SLD $62, t3, t4; \
55 SRD $2, t2; \
56 ADDZE h2; \
57 OR t4, t2, t2; \
58 SRD $2, t3; \
59 ADDC t2, h0, h0; \
60 ADDE t3, h1, h1; \
61 ADDZE h2
62
63 // func update(state *[7]uint64, msg []byte)
64 TEXT ·update(SB), $0-32
65 MOVD state+0(FP), R3
66 MOVD msg_base+8(FP), R4
67 MOVD msg_len+16(FP), R5
68
69 MOVD 0(R3), R8 // h0
70 MOVD 8(R3), R9 // h1
71 MOVD 16(R3), R10 // h2
72 MOVD 24(R3), R11 // r0
73 MOVD 32(R3), R12 // r1
74
75 MOVD $8, R24
76
77 CMP R5, $16
78 BLT bytes_between_0_and_15
79
80 loop:
81 POLY1305_ADD(R4, R8, R9, R10, R20, R21, R22)
82
83 PCALIGN $16
84 multiply:
85 POLY1305_MUL(R8, R9, R10, R11, R12, R16, R17, R18, R14, R20, R21)
86 ADD $-16, R5
87 CMP R5, $16
88 BGE loop
89
90 bytes_between_0_and_15:
91 CMP R5, $0
92 BEQ done
93 MOVD $0, R16 // h0
94 MOVD $0, R17 // h1
95
96 flush_buffer:
97 CMP R5, $8
98 BLE just1
99
100 MOVD $8, R21
101 SUB R21, R5, R21
102
103 // Greater than 8 -- load the rightmost remaining bytes in msg
104 // and put into R17 (h1)
105 LE_MOVD (R4)(R21), R17
106 MOVD $16, R22
107
108 // Find the offset to those bytes
109 SUB R5, R22, R22
110 SLD $3, R22
111
112 // Shift to get only the bytes in msg
113 SRD R22, R17, R17
114
115 // Put 1 at high end
116 MOVD $1, R23
117 SLD $3, R21
118 SLD R21, R23, R23
119 OR R23, R17, R17
120
121 // Remainder is 8
122 MOVD $8, R5
123
124 just1:
125 CMP R5, $8
126 BLT less8
127
128 // Exactly 8
129 LE_MOVD (R4), R16
130
131 CMP R17, $0
132
133 // Check if we've already set R17; if not
134 // set 1 to indicate end of msg.
135 BNE carry
136 MOVD $1, R17
137 BR carry
138
139 less8:
140 MOVD $0, R16 // h0
141 MOVD $0, R22 // shift count
142 CMP R5, $4
143 BLT less4
144 LE_MOVWZ (R4), R16
145 ADD $4, R4
146 ADD $-4, R5
147 MOVD $32, R22
148
149 less4:
150 CMP R5, $2
151 BLT less2
152 LE_MOVHZ (R4), R21
153 SLD R22, R21, R21
154 OR R16, R21, R16
155 ADD $16, R22
156 ADD $-2, R5
157 ADD $2, R4
158
159 less2:
160 CMP R5, $0
161 BEQ insert1
162 MOVBZ (R4), R21
163 SLD R22, R21, R21
164 OR R16, R21, R16
165 ADD $8, R22
166
167 insert1:
168 // Insert 1 at end of msg
169 MOVD $1, R21
170 SLD R22, R21, R21
171 OR R16, R21, R16
172
173 carry:
174 // Add new values to h0, h1, h2
175 ADDC R16, R8
176 ADDE R17, R9
177 ADDZE R10, R10
178 MOVD $16, R5
179 ADD R5, R4
180 BR multiply
181
182 done:
183 // Save h0, h1, h2 in state
184 MOVD R8, 0(R3)
185 MOVD R9, 8(R3)
186 MOVD R10, 16(R3)
187 RET
188
View as plain text