// Copyright 2024 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style // license that can be found in the LICENSE file. // Original source: // http://www.zorinaq.com/papers/md5-amd64.html // http://www.zorinaq.com/papers/md5-amd64.tar.bz2 // // Translated from Perl generating GNU assembly into // #defines generating 6a assembly by the Go Authors. package main import ( . "github.com/mmcloughlin/avo/build" . "github.com/mmcloughlin/avo/operand" . "github.com/mmcloughlin/avo/reg" ) //go:generate go run . -out ../md5block_amd64.s -pkg md5 func main() { Package("crypto/md5") ConstraintExpr("!purego") block() Generate() } // MD5 optimized for AMD64. // // Author: Marc Bevand // Licence: I hereby disclaim the copyright on this code and place it // in the public domain. func block() { Implement("block") Attributes(NOSPLIT) AllocLocal(8) Load(Param("dig"), RBP) Load(Param("p").Base(), RSI) Load(Param("p").Len(), RDX) SHRQ(Imm(6), RDX) SHLQ(Imm(6), RDX) LEAQ(Mem{Base: SI, Index: DX, Scale: 1}, RDI) MOVL(Mem{Base: BP}.Offset(0*4), EAX) MOVL(Mem{Base: BP}.Offset(1*4), EBX) MOVL(Mem{Base: BP}.Offset(2*4), ECX) MOVL(Mem{Base: BP}.Offset(3*4), EDX) MOVL(Imm(0xffffffff), R11L) CMPQ(RSI, RDI) JEQ(LabelRef("end")) loop() end() } func loop() { Label("loop") MOVL(EAX, R12L) MOVL(EBX, R13L) MOVL(ECX, R14L) MOVL(EDX, R15L) MOVL(Mem{Base: SI}.Offset(0*4), R8L) MOVL(EDX, R9L) ROUND1(EAX, EBX, ECX, EDX, 1, 0xd76aa478, 7) ROUND1(EDX, EAX, EBX, ECX, 2, 0xe8c7b756, 12) ROUND1(ECX, EDX, EAX, EBX, 3, 0x242070db, 17) ROUND1(EBX, ECX, EDX, EAX, 4, 0xc1bdceee, 22) ROUND1(EAX, EBX, ECX, EDX, 5, 0xf57c0faf, 7) ROUND1(EDX, EAX, EBX, ECX, 6, 0x4787c62a, 12) ROUND1(ECX, EDX, EAX, EBX, 7, 0xa8304613, 17) ROUND1(EBX, ECX, EDX, EAX, 8, 0xfd469501, 22) ROUND1(EAX, EBX, ECX, EDX, 9, 0x698098d8, 7) ROUND1(EDX, EAX, EBX, ECX, 10, 0x8b44f7af, 12) ROUND1(ECX, EDX, EAX, EBX, 11, 0xffff5bb1, 17) ROUND1(EBX, ECX, EDX, EAX, 12, 0x895cd7be, 22) ROUND1(EAX, EBX, ECX, EDX, 13, 0x6b901122, 7) ROUND1(EDX, EAX, EBX, ECX, 14, 0xfd987193, 12) ROUND1(ECX, EDX, EAX, EBX, 15, 0xa679438e, 17) ROUND1(EBX, ECX, EDX, EAX, 1, 0x49b40821, 22) MOVL(EDX, R9L) MOVL(EDX, R10L) ROUND2(EAX, EBX, ECX, EDX, 6, 0xf61e2562, 5) ROUND2(EDX, EAX, EBX, ECX, 11, 0xc040b340, 9) ROUND2(ECX, EDX, EAX, EBX, 0, 0x265e5a51, 14) ROUND2(EBX, ECX, EDX, EAX, 5, 0xe9b6c7aa, 20) ROUND2(EAX, EBX, ECX, EDX, 10, 0xd62f105d, 5) ROUND2(EDX, EAX, EBX, ECX, 15, 0x2441453, 9) ROUND2(ECX, EDX, EAX, EBX, 4, 0xd8a1e681, 14) ROUND2(EBX, ECX, EDX, EAX, 9, 0xe7d3fbc8, 20) ROUND2(EAX, EBX, ECX, EDX, 14, 0x21e1cde6, 5) ROUND2(EDX, EAX, EBX, ECX, 3, 0xc33707d6, 9) ROUND2(ECX, EDX, EAX, EBX, 8, 0xf4d50d87, 14) ROUND2(EBX, ECX, EDX, EAX, 13, 0x455a14ed, 20) ROUND2(EAX, EBX, ECX, EDX, 2, 0xa9e3e905, 5) ROUND2(EDX, EAX, EBX, ECX, 7, 0xfcefa3f8, 9) ROUND2(ECX, EDX, EAX, EBX, 12, 0x676f02d9, 14) ROUND2(EBX, ECX, EDX, EAX, 5, 0x8d2a4c8a, 20) MOVL(ECX, R9L) ROUND3FIRST(EAX, EBX, ECX, EDX, 8, 0xfffa3942, 4) ROUND3(EDX, EAX, EBX, ECX, 11, 0x8771f681, 11) ROUND3(ECX, EDX, EAX, EBX, 14, 0x6d9d6122, 16) ROUND3(EBX, ECX, EDX, EAX, 1, 0xfde5380c, 23) ROUND3(EAX, EBX, ECX, EDX, 4, 0xa4beea44, 4) ROUND3(EDX, EAX, EBX, ECX, 7, 0x4bdecfa9, 11) ROUND3(ECX, EDX, EAX, EBX, 10, 0xf6bb4b60, 16) ROUND3(EBX, ECX, EDX, EAX, 13, 0xbebfbc70, 23) ROUND3(EAX, EBX, ECX, EDX, 0, 0x289b7ec6, 4) ROUND3(EDX, EAX, EBX, ECX, 3, 0xeaa127fa, 11) ROUND3(ECX, EDX, EAX, EBX, 6, 0xd4ef3085, 16) ROUND3(EBX, ECX, EDX, EAX, 9, 0x4881d05, 23) ROUND3(EAX, EBX, ECX, EDX, 12, 0xd9d4d039, 4) ROUND3(EDX, EAX, EBX, ECX, 15, 0xe6db99e5, 11) ROUND3(ECX, EDX, EAX, EBX, 2, 0x1fa27cf8, 16) ROUND3(EBX, ECX, EDX, EAX, 0, 0xc4ac5665, 23) MOVL(R11L, R9L) XORL(EDX, R9L) ROUND4(EAX, EBX, ECX, EDX, 7, 0xf4292244, 6) ROUND4(EDX, EAX, EBX, ECX, 14, 0x432aff97, 10) ROUND4(ECX, EDX, EAX, EBX, 5, 0xab9423a7, 15) ROUND4(EBX, ECX, EDX, EAX, 12, 0xfc93a039, 21) ROUND4(EAX, EBX, ECX, EDX, 3, 0x655b59c3, 6) ROUND4(EDX, EAX, EBX, ECX, 10, 0x8f0ccc92, 10) ROUND4(ECX, EDX, EAX, EBX, 1, 0xffeff47d, 15) ROUND4(EBX, ECX, EDX, EAX, 8, 0x85845dd1, 21) ROUND4(EAX, EBX, ECX, EDX, 15, 0x6fa87e4f, 6) ROUND4(EDX, EAX, EBX, ECX, 6, 0xfe2ce6e0, 10) ROUND4(ECX, EDX, EAX, EBX, 13, 0xa3014314, 15) ROUND4(EBX, ECX, EDX, EAX, 4, 0x4e0811a1, 21) ROUND4(EAX, EBX, ECX, EDX, 11, 0xf7537e82, 6) ROUND4(EDX, EAX, EBX, ECX, 2, 0xbd3af235, 10) ROUND4(ECX, EDX, EAX, EBX, 9, 0x2ad7d2bb, 15) ROUND4(EBX, ECX, EDX, EAX, 0, 0xeb86d391, 21) ADDL(R12L, EAX) ADDL(R13L, EBX) ADDL(R14L, ECX) ADDL(R15L, EDX) ADDQ(Imm(64), RSI) CMPQ(RSI, RDI) JB(LabelRef("loop")) } func end() { Label("end") MOVL(EAX, Mem{Base: BP}.Offset(0*4)) MOVL(EBX, Mem{Base: BP}.Offset(1*4)) MOVL(ECX, Mem{Base: BP}.Offset(2*4)) MOVL(EDX, Mem{Base: BP}.Offset(3*4)) RET() } func ROUND1(a, b, c, d GPPhysical, index int, konst, shift uint64) { XORL(c, R9L) ADDL(Imm(konst), a) ADDL(R8L, a) ANDL(b, R9L) XORL(d, R9L) MOVL(Mem{Base: SI}.Offset(index*4), R8L) ADDL(R9L, a) ROLL(Imm(shift), a) MOVL(c, R9L) ADDL(b, a) } // Uses https://github.com/animetosho/md5-optimisation#dependency-shortcut-in-g-function func ROUND2(a, b, c, d GPPhysical, index int, konst, shift uint64) { XORL(R11L, R9L) ADDL(Imm(konst), a) ADDL(R8L, a) ANDL(b, R10L) ANDL(c, R9L) MOVL(Mem{Base: SI}.Offset(index*4), R8L) ADDL(R9L, a) ADDL(R10L, a) MOVL(c, R9L) MOVL(c, R10L) ROLL(Imm(shift), a) ADDL(b, a) } // Uses https://github.com/animetosho/md5-optimisation#h-function-re-use func ROUND3FIRST(a, b, c, d GPPhysical, index int, konst, shift uint64) { MOVL(d, R9L) XORL(c, R9L) XORL(b, R9L) ADDL(Imm(konst), a) ADDL(R8L, a) MOVL(Mem{Base: SI}.Offset(index*4), R8L) ADDL(R9L, a) ROLL(Imm(shift), a) ADDL(b, a) } func ROUND3(a, b, c, d GPPhysical, index int, konst, shift uint64) { XORL(a, R9L) XORL(b, R9L) ADDL(Imm(konst), a) ADDL(R8L, a) MOVL(Mem{Base: SI}.Offset(index*4), R8L) ADDL(R9L, a) ROLL(Imm(shift), a) ADDL(b, a) } func ROUND4(a, b, c, d GPPhysical, index int, konst, shift uint64) { ADDL(Imm(konst), a) ADDL(R8L, a) ORL(b, R9L) XORL(c, R9L) ADDL(R9L, a) MOVL(Mem{Base: SI}.Offset(index*4), R8L) MOVL(Imm(0xffffffff), R9L) ROLL(Imm(shift), a) XORL(c, R9L) ADDL(b, a) }