// Copyright 2025 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. //go:build !purego #include "textflag.h" #define LOAD(index) \ MOVBU ((index*4)+0)(X29), X5; \ MOVBU ((index*4)+1)(X29), X6; \ MOVBU ((index*4)+2)(X29), X7; \ MOVBU ((index*4)+3)(X29), X8; \ SLL $24, X5; \ SLL $16, X6; \ OR X5, X6, X5; \ SLL $8, X7; \ OR X5, X7, X5; \ OR X5, X8, X5; \ MOVW X5, (index*4)(X19) #define SHUFFLE(index) \ MOVWU (((index)&0xf)*4)(X19), X5; \ MOVWU (((index-3)&0xf)*4)(X19), X6; \ MOVWU (((index-8)&0xf)*4)(X19), X7; \ MOVWU (((index-14)&0xf)*4)(X19), X8; \ XOR X6, X5; \ XOR X7, X5; \ XOR X8, X5; \ RORW $31, X5; \ MOVW X5, (((index)&0xf)*4)(X19) // f = d ^ (b & (c ^ d)) #define FUNC1(a, b, c, d, e) \ XOR c, d, X7; \ AND b, X7; \ XOR d, X7 // f = b ^ c ^ d #define FUNC2(a, b, c, d, e) \ XOR b, c, X7; \ XOR d, X7 // f = (b & c) | ((b | c) & d) #define FUNC3(a, b, c, d, e) \ OR b, c, X8; \ AND b, c, X6; \ AND d, X8; \ OR X6, X8, X7 #define FUNC4 FUNC2 #define MIX(a, b, c, d, e, key) \ RORW $2, b; \ ADD X7, e; \ RORW $27, a, X8; \ ADD X5, e; \ ADD key, e; \ ADD X8, e #define ROUND1(a, b, c, d, e, index) \ LOAD(index); \ FUNC1(a, b, c, d, e); \ MIX(a, b, c, d, e, X15) #define ROUND1x(a, b, c, d, e, index) \ SHUFFLE(index); \ FUNC1(a, b, c, d, e); \ MIX(a, b, c, d, e, X15) #define ROUND2(a, b, c, d, e, index) \ SHUFFLE(index); \ FUNC2(a, b, c, d, e); \ MIX(a, b, c, d, e, X16) #define ROUND3(a, b, c, d, e, index) \ SHUFFLE(index); \ FUNC3(a, b, c, d, e); \ MIX(a, b, c, d, e, X17) #define ROUND4(a, b, c, d, e, index) \ SHUFFLE(index); \ FUNC4(a, b, c, d, e); \ MIX(a, b, c, d, e, X18) // func block(dig *Digest, p []byte) TEXT ·block(SB),NOSPLIT,$64-32 MOV p_base+8(FP), X29 MOV p_len+16(FP), X30 SRL $6, X30 SLL $6, X30 ADD X29, X30, X28 BEQ X28, X29, end ADD $8, X2, X19 // message schedule buffer on stack MOV dig+0(FP), X20 MOVWU (0*4)(X20), X10 // a = H0 MOVWU (1*4)(X20), X11 // b = H1 MOVWU (2*4)(X20), X12 // c = H2 MOVWU (3*4)(X20), X13 // d = H3 MOVWU (4*4)(X20), X14 // e = H4 MOV $·_K(SB), X21 MOVW (0*4)(X21), X15 MOVW (1*4)(X21), X16 MOVW (2*4)(X21), X17 MOVW (3*4)(X21), X18 loop: MOVW X10, X22 MOVW X11, X23 MOVW X12, X24 MOVW X13, X25 MOVW X14, X26 ROUND1(X10, X11, X12, X13, X14, 0) ROUND1(X14, X10, X11, X12, X13, 1) ROUND1(X13, X14, X10, X11, X12, 2) ROUND1(X12, X13, X14, X10, X11, 3) ROUND1(X11, X12, X13, X14, X10, 4) ROUND1(X10, X11, X12, X13, X14, 5) ROUND1(X14, X10, X11, X12, X13, 6) ROUND1(X13, X14, X10, X11, X12, 7) ROUND1(X12, X13, X14, X10, X11, 8) ROUND1(X11, X12, X13, X14, X10, 9) ROUND1(X10, X11, X12, X13, X14, 10) ROUND1(X14, X10, X11, X12, X13, 11) ROUND1(X13, X14, X10, X11, X12, 12) ROUND1(X12, X13, X14, X10, X11, 13) ROUND1(X11, X12, X13, X14, X10, 14) ROUND1(X10, X11, X12, X13, X14, 15) ROUND1x(X14, X10, X11, X12, X13, 16) ROUND1x(X13, X14, X10, X11, X12, 17) ROUND1x(X12, X13, X14, X10, X11, 18) ROUND1x(X11, X12, X13, X14, X10, 19) ROUND2(X10, X11, X12, X13, X14, 20) ROUND2(X14, X10, X11, X12, X13, 21) ROUND2(X13, X14, X10, X11, X12, 22) ROUND2(X12, X13, X14, X10, X11, 23) ROUND2(X11, X12, X13, X14, X10, 24) ROUND2(X10, X11, X12, X13, X14, 25) ROUND2(X14, X10, X11, X12, X13, 26) ROUND2(X13, X14, X10, X11, X12, 27) ROUND2(X12, X13, X14, X10, X11, 28) ROUND2(X11, X12, X13, X14, X10, 29) ROUND2(X10, X11, X12, X13, X14, 30) ROUND2(X14, X10, X11, X12, X13, 31) ROUND2(X13, X14, X10, X11, X12, 32) ROUND2(X12, X13, X14, X10, X11, 33) ROUND2(X11, X12, X13, X14, X10, 34) ROUND2(X10, X11, X12, X13, X14, 35) ROUND2(X14, X10, X11, X12, X13, 36) ROUND2(X13, X14, X10, X11, X12, 37) ROUND2(X12, X13, X14, X10, X11, 38) ROUND2(X11, X12, X13, X14, X10, 39) ROUND3(X10, X11, X12, X13, X14, 40) ROUND3(X14, X10, X11, X12, X13, 41) ROUND3(X13, X14, X10, X11, X12, 42) ROUND3(X12, X13, X14, X10, X11, 43) ROUND3(X11, X12, X13, X14, X10, 44) ROUND3(X10, X11, X12, X13, X14, 45) ROUND3(X14, X10, X11, X12, X13, 46) ROUND3(X13, X14, X10, X11, X12, 47) ROUND3(X12, X13, X14, X10, X11, 48) ROUND3(X11, X12, X13, X14, X10, 49) ROUND3(X10, X11, X12, X13, X14, 50) ROUND3(X14, X10, X11, X12, X13, 51) ROUND3(X13, X14, X10, X11, X12, 52) ROUND3(X12, X13, X14, X10, X11, 53) ROUND3(X11, X12, X13, X14, X10, 54) ROUND3(X10, X11, X12, X13, X14, 55) ROUND3(X14, X10, X11, X12, X13, 56) ROUND3(X13, X14, X10, X11, X12, 57) ROUND3(X12, X13, X14, X10, X11, 58) ROUND3(X11, X12, X13, X14, X10, 59) ROUND4(X10, X11, X12, X13, X14, 60) ROUND4(X14, X10, X11, X12, X13, 61) ROUND4(X13, X14, X10, X11, X12, 62) ROUND4(X12, X13, X14, X10, X11, 63) ROUND4(X11, X12, X13, X14, X10, 64) ROUND4(X10, X11, X12, X13, X14, 65) ROUND4(X14, X10, X11, X12, X13, 66) ROUND4(X13, X14, X10, X11, X12, 67) ROUND4(X12, X13, X14, X10, X11, 68) ROUND4(X11, X12, X13, X14, X10, 69) ROUND4(X10, X11, X12, X13, X14, 70) ROUND4(X14, X10, X11, X12, X13, 71) ROUND4(X13, X14, X10, X11, X12, 72) ROUND4(X12, X13, X14, X10, X11, 73) ROUND4(X11, X12, X13, X14, X10, 74) ROUND4(X10, X11, X12, X13, X14, 75) ROUND4(X14, X10, X11, X12, X13, 76) ROUND4(X13, X14, X10, X11, X12, 77) ROUND4(X12, X13, X14, X10, X11, 78) ROUND4(X11, X12, X13, X14, X10, 79) ADD X22, X10 ADD X23, X11 ADD X24, X12 ADD X25, X13 ADD X26, X14 ADD $64, X29 BNE X28, X29, loop end: MOVW X10, (0*4)(X20) MOVW X11, (1*4)(X20) MOVW X12, (2*4)(X20) MOVW X13, (3*4)(X20) MOVW X14, (4*4)(X20) RET GLOBL ·_K(SB),RODATA,$16 DATA ·_K+0(SB)/4, $0x5A827999 DATA ·_K+4(SB)/4, $0x6ED9EBA1 DATA ·_K+8(SB)/4, $0x8F1BBCDC DATA ·_K+12(SB)/4, $0xCA62C1D6