1 // Copyright 2025 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 #include "textflag.h"
8
9 // func xorBytes(dst, a, b *byte, n int)
10 TEXT ·xorBytes(SB), NOSPLIT|NOFRAME, $0
11 MOV dst+0(FP), X10
12 MOV a+8(FP), X11
13 MOV b+16(FP), X12
14 MOV n+24(FP), X13
15
16 MOV $32, X15
17 BLT X13, X15, loop4_check
18
19 // Check alignment - if alignment differs we have to do one byte at a time.
20 AND $7, X10, X5
21 AND $7, X11, X6
22 AND $7, X12, X7
23 BNE X5, X6, loop4_check
24 BNE X5, X7, loop4_check
25 BEQZ X5, loop64_check
26
27 // Check one byte at a time until we reach 8 byte alignment.
28 MOV $8, X8
29 SUB X5, X8
30 SUB X8, X13
31 align:
32 MOVBU 0(X11), X16
33 MOVBU 0(X12), X17
34 XOR X16, X17
35 MOVB X17, 0(X10)
36 ADD $1, X10
37 ADD $1, X11
38 ADD $1, X12
39 SUB $1, X8
40 BNEZ X8, align
41
42 loop64_check:
43 MOV $64, X15
44 BLT X13, X15, tail32_check
45 PCALIGN $16
46 loop64:
47 MOV 0(X11), X16
48 MOV 0(X12), X17
49 MOV 8(X11), X18
50 MOV 8(X12), X19
51 XOR X16, X17
52 XOR X18, X19
53 MOV X17, 0(X10)
54 MOV X19, 8(X10)
55 MOV 16(X11), X20
56 MOV 16(X12), X21
57 MOV 24(X11), X22
58 MOV 24(X12), X23
59 XOR X20, X21
60 XOR X22, X23
61 MOV X21, 16(X10)
62 MOV X23, 24(X10)
63 MOV 32(X11), X16
64 MOV 32(X12), X17
65 MOV 40(X11), X18
66 MOV 40(X12), X19
67 XOR X16, X17
68 XOR X18, X19
69 MOV X17, 32(X10)
70 MOV X19, 40(X10)
71 MOV 48(X11), X20
72 MOV 48(X12), X21
73 MOV 56(X11), X22
74 MOV 56(X12), X23
75 XOR X20, X21
76 XOR X22, X23
77 MOV X21, 48(X10)
78 MOV X23, 56(X10)
79 ADD $64, X10
80 ADD $64, X11
81 ADD $64, X12
82 SUB $64, X13
83 BGE X13, X15, loop64
84 BEQZ X13, done
85
86 tail32_check:
87 MOV $32, X15
88 BLT X13, X15, tail16_check
89 MOV 0(X11), X16
90 MOV 0(X12), X17
91 MOV 8(X11), X18
92 MOV 8(X12), X19
93 XOR X16, X17
94 XOR X18, X19
95 MOV X17, 0(X10)
96 MOV X19, 8(X10)
97 MOV 16(X11), X20
98 MOV 16(X12), X21
99 MOV 24(X11), X22
100 MOV 24(X12), X23
101 XOR X20, X21
102 XOR X22, X23
103 MOV X21, 16(X10)
104 MOV X23, 24(X10)
105 ADD $32, X10
106 ADD $32, X11
107 ADD $32, X12
108 SUB $32, X13
109 BEQZ X13, done
110
111 tail16_check:
112 MOV $16, X15
113 BLT X13, X15, loop4_check
114 MOV 0(X11), X16
115 MOV 0(X12), X17
116 MOV 8(X11), X18
117 MOV 8(X12), X19
118 XOR X16, X17
119 XOR X18, X19
120 MOV X17, 0(X10)
121 MOV X19, 8(X10)
122 ADD $16, X10
123 ADD $16, X11
124 ADD $16, X12
125 SUB $16, X13
126 BEQZ X13, done
127
128 loop4_check:
129 MOV $4, X15
130 BLT X13, X15, loop1
131 PCALIGN $16
132 loop4:
133 MOVBU 0(X11), X16
134 MOVBU 0(X12), X17
135 MOVBU 1(X11), X18
136 MOVBU 1(X12), X19
137 XOR X16, X17
138 XOR X18, X19
139 MOVB X17, 0(X10)
140 MOVB X19, 1(X10)
141 MOVBU 2(X11), X20
142 MOVBU 2(X12), X21
143 MOVBU 3(X11), X22
144 MOVBU 3(X12), X23
145 XOR X20, X21
146 XOR X22, X23
147 MOVB X21, 2(X10)
148 MOVB X23, 3(X10)
149 ADD $4, X10
150 ADD $4, X11
151 ADD $4, X12
152 SUB $4, X13
153 BGE X13, X15, loop4
154
155 PCALIGN $16
156 loop1:
157 BEQZ X13, done
158 MOVBU 0(X11), X16
159 MOVBU 0(X12), X17
160 XOR X16, X17
161 MOVB X17, 0(X10)
162 ADD $1, X10
163 ADD $1, X11
164 ADD $1, X12
165 SUB $1, X13
166 JMP loop1
167
168 done:
169 RET
170
View as plain text