1 // Copyright 2018 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build (ppc64 || ppc64le) && !purego
6
7 #include "textflag.h"
8
9 // func xorBytes(dst, a, b *byte, n int)
10 TEXT ·xorBytes(SB), NOSPLIT, $0
11 MOVD dst+0(FP), R3 // R3 = dst
12 MOVD a+8(FP), R4 // R4 = a
13 MOVD b+16(FP), R5 // R5 = b
14 MOVD n+24(FP), R6 // R6 = n
15
16 CMPU R6, $64, CR7 // Check if n ≥ 64 bytes
17 MOVD R0, R8 // R8 = index
18 CMPU R6, $8, CR6 // Check if 8 ≤ n < 64 bytes
19 BLE CR6, small // <= 8
20 BLT CR7, xor32 // Case for 32 ≤ n < 64 bytes
21
22 // Case for n ≥ 64 bytes
23 preloop64:
24 SRD $6, R6, R7 // Set up loop counter
25 MOVD R7, CTR
26 MOVD $16, R10
27 MOVD $32, R14
28 MOVD $48, R15
29 ANDCC $63, R6, R9 // Check for tailing bytes for later
30 PCALIGN $16
31 // Case for >= 64 bytes
32 // Process 64 bytes per iteration
33 // Load 4 vectors of a and b
34 // XOR the corresponding vectors
35 // from a and b and store the result
36 loop64:
37 LXVD2X (R4)(R8), VS32
38 LXVD2X (R4)(R10), VS34
39 LXVD2X (R4)(R14), VS36
40 LXVD2X (R4)(R15), VS38
41 LXVD2X (R5)(R8), VS33
42 LXVD2X (R5)(R10), VS35
43 LXVD2X (R5)(R14), VS37
44 LXVD2X (R5)(R15), VS39
45 XXLXOR VS32, VS33, VS32
46 XXLXOR VS34, VS35, VS34
47 XXLXOR VS36, VS37, VS36
48 XXLXOR VS38, VS39, VS38
49 STXVD2X VS32, (R3)(R8)
50 STXVD2X VS34, (R3)(R10)
51 STXVD2X VS36, (R3)(R14)
52 STXVD2X VS38, (R3)(R15)
53 ADD $64, R8
54 ADD $64, R10
55 ADD $64, R14
56 ADD $64, R15
57 BDNZ loop64
58 BC 12,2,LR // BEQLR
59 MOVD R9, R6
60 CMP R6, $8
61 BLE small
62 // Case for 8 <= n < 64 bytes
63 // Process 32 bytes if available
64 xor32:
65 CMP R6, $32
66 BLT xor16
67 ADD $16, R8, R9
68 LXVD2X (R4)(R8), VS32
69 LXVD2X (R4)(R9), VS33
70 LXVD2X (R5)(R8), VS34
71 LXVD2X (R5)(R9), VS35
72 XXLXOR VS32, VS34, VS32
73 XXLXOR VS33, VS35, VS33
74 STXVD2X VS32, (R3)(R8)
75 STXVD2X VS33, (R3)(R9)
76 ADD $32, R8
77 ADD $-32, R6
78 CMP R6, $8
79 BLE small
80 // Case for 8 <= n < 32 bytes
81 // Process 16 bytes if available
82 xor16:
83 CMP R6, $16
84 BLT xor8
85 LXVD2X (R4)(R8), VS32
86 LXVD2X (R5)(R8), VS33
87 XXLXOR VS32, VS33, VS32
88 STXVD2X VS32, (R3)(R8)
89 ADD $16, R8
90 ADD $-16, R6
91 small:
92 CMP R6, $0
93 BC 12,2,LR // BEQLR
94 xor8:
95 #ifdef GOPPC64_power10
96 SLD $56,R6,R17
97 ADD R4,R8,R18
98 ADD R5,R8,R19
99 ADD R3,R8,R20
100 LXVL R18,R17,V0
101 LXVL R19,R17,V1
102 VXOR V0,V1,V1
103 STXVL V1,R20,R17
104 RET
105 #else
106 CMP R6, $8
107 BLT xor4
108 // Case for 8 ≤ n < 16 bytes
109 MOVD (R4)(R8), R14 // R14 = a[i,...,i+7]
110 MOVD (R5)(R8), R15 // R15 = b[i,...,i+7]
111 XOR R14, R15, R16 // R16 = a[] ^ b[]
112 SUB $8, R6 // n = n - 8
113 MOVD R16, (R3)(R8) // Store to dst
114 ADD $8, R8
115 xor4:
116 CMP R6, $4
117 BLT xor2
118 MOVWZ (R4)(R8), R14
119 MOVWZ (R5)(R8), R15
120 XOR R14, R15, R16
121 MOVW R16, (R3)(R8)
122 ADD $4,R8
123 ADD $-4,R6
124 xor2:
125 CMP R6, $2
126 BLT xor1
127 MOVHZ (R4)(R8), R14
128 MOVHZ (R5)(R8), R15
129 XOR R14, R15, R16
130 MOVH R16, (R3)(R8)
131 ADD $2,R8
132 ADD $-2,R6
133 xor1:
134 CMP R6, $0
135 BC 12,2,LR // BEQLR
136 MOVBZ (R4)(R8), R14 // R14 = a[i]
137 MOVBZ (R5)(R8), R15 // R15 = b[i]
138 XOR R14, R15, R16 // R16 = a[i] ^ b[i]
139 MOVB R16, (R3)(R8) // Store to dst
140 #endif
141 done:
142 RET
143
View as plain text