1 // Copyright 2017 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 //go:build !purego
6
7 #include "textflag.h"
8
9 #define HASHUPDATE \
10 SHA256H V9.S4, V3, V2 \
11 SHA256H2 V9.S4, V8, V3 \
12 VMOV V2.B16, V8.B16
13
14 // func blockSHA2(dig *Digest, p []byte)
15 TEXT ·blockSHA2(SB),NOSPLIT,$0
16 MOVD dig+0(FP), R0 // Hash value first address
17 MOVD p_base+8(FP), R1 // message first address
18 MOVD p_len+16(FP), R3 // message length
19 MOVD $·_K+0(SB), R2 // k constants first address
20 VLD1 (R0), [V0.S4, V1.S4] // load h(a,b,c,d,e,f,g,h)
21 VLD1.P 64(R2), [V16.S4, V17.S4, V18.S4, V19.S4]
22 VLD1.P 64(R2), [V20.S4, V21.S4, V22.S4, V23.S4]
23 VLD1.P 64(R2), [V24.S4, V25.S4, V26.S4, V27.S4]
24 VLD1 (R2), [V28.S4, V29.S4, V30.S4, V31.S4] //load 64*4bytes K constant(K0-K63)
25
26 blockloop:
27
28 VLD1.P 16(R1), [V4.B16] // load 16bytes message
29 VLD1.P 16(R1), [V5.B16] // load 16bytes message
30 VLD1.P 16(R1), [V6.B16] // load 16bytes message
31 VLD1.P 16(R1), [V7.B16] // load 16bytes message
32 VMOV V0.B16, V2.B16 // backup: VO h(dcba)
33 VMOV V1.B16, V3.B16 // backup: V1 h(hgfe)
34 VMOV V2.B16, V8.B16
35 VREV32 V4.B16, V4.B16 // prepare for using message in Byte format
36 VREV32 V5.B16, V5.B16
37 VREV32 V6.B16, V6.B16
38 VREV32 V7.B16, V7.B16
39
40 VADD V16.S4, V4.S4, V9.S4 // V18(W0+K0...W3+K3)
41 SHA256SU0 V5.S4, V4.S4 // V4: (su0(W1)+W0,...,su0(W4)+W3)
42 HASHUPDATE // H4
43
44 VADD V17.S4, V5.S4, V9.S4 // V18(W4+K4...W7+K7)
45 SHA256SU0 V6.S4, V5.S4 // V5: (su0(W5)+W4,...,su0(W8)+W7)
46 SHA256SU1 V7.S4, V6.S4, V4.S4 // V4: W16-W19
47 HASHUPDATE // H8
48
49 VADD V18.S4, V6.S4, V9.S4 // V18(W8+K8...W11+K11)
50 SHA256SU0 V7.S4, V6.S4 // V6: (su0(W9)+W8,...,su0(W12)+W11)
51 SHA256SU1 V4.S4, V7.S4, V5.S4 // V5: W20-W23
52 HASHUPDATE // H12
53
54 VADD V19.S4, V7.S4, V9.S4 // V18(W12+K12...W15+K15)
55 SHA256SU0 V4.S4, V7.S4 // V7: (su0(W13)+W12,...,su0(W16)+W15)
56 SHA256SU1 V5.S4, V4.S4, V6.S4 // V6: W24-W27
57 HASHUPDATE // H16
58
59 VADD V20.S4, V4.S4, V9.S4 // V18(W16+K16...W19+K19)
60 SHA256SU0 V5.S4, V4.S4 // V4: (su0(W17)+W16,...,su0(W20)+W19)
61 SHA256SU1 V6.S4, V5.S4, V7.S4 // V7: W28-W31
62 HASHUPDATE // H20
63
64 VADD V21.S4, V5.S4, V9.S4 // V18(W20+K20...W23+K23)
65 SHA256SU0 V6.S4, V5.S4 // V5: (su0(W21)+W20,...,su0(W24)+W23)
66 SHA256SU1 V7.S4, V6.S4, V4.S4 // V4: W32-W35
67 HASHUPDATE // H24
68
69 VADD V22.S4, V6.S4, V9.S4 // V18(W24+K24...W27+K27)
70 SHA256SU0 V7.S4, V6.S4 // V6: (su0(W25)+W24,...,su0(W28)+W27)
71 SHA256SU1 V4.S4, V7.S4, V5.S4 // V5: W36-W39
72 HASHUPDATE // H28
73
74 VADD V23.S4, V7.S4, V9.S4 // V18(W28+K28...W31+K31)
75 SHA256SU0 V4.S4, V7.S4 // V7: (su0(W29)+W28,...,su0(W32)+W31)
76 SHA256SU1 V5.S4, V4.S4, V6.S4 // V6: W40-W43
77 HASHUPDATE // H32
78
79 VADD V24.S4, V4.S4, V9.S4 // V18(W32+K32...W35+K35)
80 SHA256SU0 V5.S4, V4.S4 // V4: (su0(W33)+W32,...,su0(W36)+W35)
81 SHA256SU1 V6.S4, V5.S4, V7.S4 // V7: W44-W47
82 HASHUPDATE // H36
83
84 VADD V25.S4, V5.S4, V9.S4 // V18(W36+K36...W39+K39)
85 SHA256SU0 V6.S4, V5.S4 // V5: (su0(W37)+W36,...,su0(W40)+W39)
86 SHA256SU1 V7.S4, V6.S4, V4.S4 // V4: W48-W51
87 HASHUPDATE // H40
88
89 VADD V26.S4, V6.S4, V9.S4 // V18(W40+K40...W43+K43)
90 SHA256SU0 V7.S4, V6.S4 // V6: (su0(W41)+W40,...,su0(W44)+W43)
91 SHA256SU1 V4.S4, V7.S4, V5.S4 // V5: W52-W55
92 HASHUPDATE // H44
93
94 VADD V27.S4, V7.S4, V9.S4 // V18(W44+K44...W47+K47)
95 SHA256SU0 V4.S4, V7.S4 // V7: (su0(W45)+W44,...,su0(W48)+W47)
96 SHA256SU1 V5.S4, V4.S4, V6.S4 // V6: W56-W59
97 HASHUPDATE // H48
98
99 VADD V28.S4, V4.S4, V9.S4 // V18(W48+K48,...,W51+K51)
100 HASHUPDATE // H52
101 SHA256SU1 V6.S4, V5.S4, V7.S4 // V7: W60-W63
102
103 VADD V29.S4, V5.S4, V9.S4 // V18(W52+K52,...,W55+K55)
104 HASHUPDATE // H56
105
106 VADD V30.S4, V6.S4, V9.S4 // V18(W59+K59,...,W59+K59)
107 HASHUPDATE // H60
108
109 VADD V31.S4, V7.S4, V9.S4 // V18(W60+K60,...,W63+K63)
110 HASHUPDATE // H64
111
112 SUB $64, R3, R3 // message length - 64bytes, then compare with 64bytes
113 VADD V2.S4, V0.S4, V0.S4
114 VADD V3.S4, V1.S4, V1.S4
115 CBNZ R3, blockloop
116
117 sha256ret:
118
119 VST1 [V0.S4, V1.S4], (R0) // store hash value H
120 RET
121
122
View as plain text