Text file
src/hash/crc32/crc32_loong64.s
1 // Copyright 2024 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 // castagnoliUpdate updates the non-inverted crc with the given data.
8
9 // func castagnoliUpdate(crc uint32, p []byte) uint32
10 TEXT ·castagnoliUpdate(SB),NOSPLIT,$0-36
11 MOVWU crc+0(FP), R4 // a0 = CRC value
12 MOVV p+8(FP), R5 // a1 = data pointer
13 MOVV p_len+16(FP), R6 // a2 = len(p)
14
15 SGT $8, R6, R12
16 BNE R12, less_than_8
17 AND $7, R5, R12
18 BEQ R12, aligned
19
20 // Process the first few bytes to 8-byte align the input.
21 // t0 = 8 - t0. We need to process this many bytes to align.
22 SUB $1, R12
23 XOR $7, R12
24
25 AND $1, R12, R13
26 BEQ R13, align_2
27 MOVB (R5), R13
28 CRCCWBW R4, R13, R4
29 ADDV $1, R5
30 ADDV $-1, R6
31
32 align_2:
33 AND $2, R12, R13
34 BEQ R13, align_4
35 MOVH (R5), R13
36 CRCCWHW R4, R13, R4
37 ADDV $2, R5
38 ADDV $-2, R6
39
40 align_4:
41 AND $4, R12, R13
42 BEQ R13, aligned
43 MOVW (R5), R13
44 CRCCWWW R4, R13, R4
45 ADDV $4, R5
46 ADDV $-4, R6
47
48 aligned:
49 // The input is now 8-byte aligned and we can process 8-byte chunks.
50 SGT $8, R6, R12
51 BNE R12, less_than_8
52 MOVV (R5), R13
53 CRCCWVW R4, R13, R4
54 ADDV $8, R5
55 ADDV $-8, R6
56 JMP aligned
57
58 less_than_8:
59 // We may have some bytes left over; process 4 bytes, then 2, then 1.
60 AND $4, R6, R12
61 BEQ R12, less_than_4
62 MOVW (R5), R13
63 CRCCWWW R4, R13, R4
64 ADDV $4, R5
65 ADDV $-4, R6
66
67 less_than_4:
68 AND $2, R6, R12
69 BEQ R12, less_than_2
70 MOVH (R5), R13
71 CRCCWHW R4, R13, R4
72 ADDV $2, R5
73 ADDV $-2, R6
74
75 less_than_2:
76 BEQ R6, done
77 MOVB (R5), R13
78 CRCCWBW R4, R13, R4
79
80 done:
81 MOVW R4, ret+32(FP)
82 RET
83
84 // ieeeUpdate updates the non-inverted crc with the given data.
85
86 // func ieeeUpdate(crc uint32, p []byte) uint32
87 TEXT ·ieeeUpdate(SB),NOSPLIT,$0-36
88 MOVWU crc+0(FP), R4 // a0 = CRC value
89 MOVV p+8(FP), R5 // a1 = data pointer
90 MOVV p_len+16(FP), R6 // a2 = len(p)
91
92 SGT $8, R6, R12
93 BNE R12, less_than_8
94 AND $7, R5, R12
95 BEQ R12, aligned
96
97 // Process the first few bytes to 8-byte align the input.
98 // t0 = 8 - t0. We need to process this many bytes to align.
99 SUB $1, R12
100 XOR $7, R12
101
102 AND $1, R12, R13
103 BEQ R13, align_2
104 MOVB (R5), R13
105 CRCWBW R4, R13, R4
106 ADDV $1, R5
107 ADDV $-1, R6
108
109 align_2:
110 AND $2, R12, R13
111 BEQ R13, align_4
112 MOVH (R5), R13
113 CRCWHW R4, R13, R4
114 ADDV $2, R5
115 ADDV $-2, R6
116
117 align_4:
118 AND $4, R12, R13
119 BEQ R13, aligned
120 MOVW (R5), R13
121 CRCWWW R4, R13, R4
122 ADDV $4, R5
123 ADDV $-4, R6
124
125 aligned:
126 // The input is now 8-byte aligned and we can process 8-byte chunks.
127 SGT $8, R6, R12
128 BNE R12, less_than_8
129 MOVV (R5), R13
130 CRCWVW R4, R13, R4
131 ADDV $8, R5
132 ADDV $-8, R6
133 JMP aligned
134
135 less_than_8:
136 // We may have some bytes left over; process 4 bytes, then 2, then 1.
137 AND $4, R6, R12
138 BEQ R12, less_than_4
139 MOVW (R5), R13
140 CRCWWW R4, R13, R4
141 ADDV $4, R5
142 ADDV $-4, R6
143
144 less_than_4:
145 AND $2, R6, R12
146 BEQ R12, less_than_2
147 MOVH (R5), R13
148 CRCWHW R4, R13, R4
149 ADDV $2, R5
150 ADDV $-2, R6
151
152 less_than_2:
153 BEQ R6, done
154 MOVB (R5), R13
155 CRCWBW R4, R13, R4
156
157 done:
158 MOVW R4, ret+32(FP)
159 RET
160
161
View as plain text