Source file src/crypto/md5/_asm/md5block_amd64_asm.go

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Original source:
     6  //	http://www.zorinaq.com/papers/md5-amd64.html
     7  //	http://www.zorinaq.com/papers/md5-amd64.tar.bz2
     8  //
     9  // Translated from Perl generating GNU assembly into
    10  // #defines generating 6a assembly by the Go Authors.
    11  
    12  package main
    13  
    14  import (
    15  	. "github.com/mmcloughlin/avo/build"
    16  	. "github.com/mmcloughlin/avo/operand"
    17  	. "github.com/mmcloughlin/avo/reg"
    18  )
    19  
    20  //go:generate go run . -out ../md5block_amd64.s -pkg md5
    21  
    22  func main() {
    23  	Package("crypto/md5")
    24  	ConstraintExpr("!purego")
    25  	block()
    26  	Generate()
    27  }
    28  
    29  // MD5 optimized for AMD64.
    30  //
    31  // Author: Marc Bevand <bevand_m (at) epita.fr>
    32  // Licence: I hereby disclaim the copyright on this code and place it
    33  // in the public domain.
    34  func block() {
    35  	Implement("block")
    36  	Attributes(NOSPLIT)
    37  	AllocLocal(8)
    38  
    39  	Load(Param("dig"), RBP)
    40  	Load(Param("p").Base(), RSI)
    41  	Load(Param("p").Len(), RDX)
    42  	SHRQ(Imm(6), RDX)
    43  	SHLQ(Imm(6), RDX)
    44  
    45  	LEAQ(Mem{Base: SI, Index: DX, Scale: 1}, RDI)
    46  	MOVL(Mem{Base: BP}.Offset(0*4), EAX)
    47  	MOVL(Mem{Base: BP}.Offset(1*4), EBX)
    48  	MOVL(Mem{Base: BP}.Offset(2*4), ECX)
    49  	MOVL(Mem{Base: BP}.Offset(3*4), EDX)
    50  	MOVL(Imm(0xffffffff), R11L)
    51  
    52  	CMPQ(RSI, RDI)
    53  	JEQ(LabelRef("end"))
    54  
    55  	loop()
    56  	end()
    57  }
    58  
    59  func loop() {
    60  	Label("loop")
    61  	MOVL(EAX, R12L)
    62  	MOVL(EBX, R13L)
    63  	MOVL(ECX, R14L)
    64  	MOVL(EDX, R15L)
    65  
    66  	MOVL(Mem{Base: SI}.Offset(0*4), R8L)
    67  	MOVL(EDX, R9L)
    68  
    69  	ROUND1(EAX, EBX, ECX, EDX, 1, 0xd76aa478, 7)
    70  	ROUND1(EDX, EAX, EBX, ECX, 2, 0xe8c7b756, 12)
    71  	ROUND1(ECX, EDX, EAX, EBX, 3, 0x242070db, 17)
    72  	ROUND1(EBX, ECX, EDX, EAX, 4, 0xc1bdceee, 22)
    73  	ROUND1(EAX, EBX, ECX, EDX, 5, 0xf57c0faf, 7)
    74  	ROUND1(EDX, EAX, EBX, ECX, 6, 0x4787c62a, 12)
    75  	ROUND1(ECX, EDX, EAX, EBX, 7, 0xa8304613, 17)
    76  	ROUND1(EBX, ECX, EDX, EAX, 8, 0xfd469501, 22)
    77  	ROUND1(EAX, EBX, ECX, EDX, 9, 0x698098d8, 7)
    78  	ROUND1(EDX, EAX, EBX, ECX, 10, 0x8b44f7af, 12)
    79  	ROUND1(ECX, EDX, EAX, EBX, 11, 0xffff5bb1, 17)
    80  	ROUND1(EBX, ECX, EDX, EAX, 12, 0x895cd7be, 22)
    81  	ROUND1(EAX, EBX, ECX, EDX, 13, 0x6b901122, 7)
    82  	ROUND1(EDX, EAX, EBX, ECX, 14, 0xfd987193, 12)
    83  	ROUND1(ECX, EDX, EAX, EBX, 15, 0xa679438e, 17)
    84  	ROUND1(EBX, ECX, EDX, EAX, 1, 0x49b40821, 22)
    85  
    86  	MOVL(EDX, R9L)
    87  	MOVL(EDX, R10L)
    88  
    89  	ROUND2(EAX, EBX, ECX, EDX, 6, 0xf61e2562, 5)
    90  	ROUND2(EDX, EAX, EBX, ECX, 11, 0xc040b340, 9)
    91  	ROUND2(ECX, EDX, EAX, EBX, 0, 0x265e5a51, 14)
    92  	ROUND2(EBX, ECX, EDX, EAX, 5, 0xe9b6c7aa, 20)
    93  	ROUND2(EAX, EBX, ECX, EDX, 10, 0xd62f105d, 5)
    94  	ROUND2(EDX, EAX, EBX, ECX, 15, 0x2441453, 9)
    95  	ROUND2(ECX, EDX, EAX, EBX, 4, 0xd8a1e681, 14)
    96  	ROUND2(EBX, ECX, EDX, EAX, 9, 0xe7d3fbc8, 20)
    97  	ROUND2(EAX, EBX, ECX, EDX, 14, 0x21e1cde6, 5)
    98  	ROUND2(EDX, EAX, EBX, ECX, 3, 0xc33707d6, 9)
    99  	ROUND2(ECX, EDX, EAX, EBX, 8, 0xf4d50d87, 14)
   100  	ROUND2(EBX, ECX, EDX, EAX, 13, 0x455a14ed, 20)
   101  	ROUND2(EAX, EBX, ECX, EDX, 2, 0xa9e3e905, 5)
   102  	ROUND2(EDX, EAX, EBX, ECX, 7, 0xfcefa3f8, 9)
   103  	ROUND2(ECX, EDX, EAX, EBX, 12, 0x676f02d9, 14)
   104  	ROUND2(EBX, ECX, EDX, EAX, 5, 0x8d2a4c8a, 20)
   105  
   106  	MOVL(ECX, R9L)
   107  
   108  	ROUND3FIRST(EAX, EBX, ECX, EDX, 8, 0xfffa3942, 4)
   109  	ROUND3(EDX, EAX, EBX, ECX, 11, 0x8771f681, 11)
   110  	ROUND3(ECX, EDX, EAX, EBX, 14, 0x6d9d6122, 16)
   111  	ROUND3(EBX, ECX, EDX, EAX, 1, 0xfde5380c, 23)
   112  	ROUND3(EAX, EBX, ECX, EDX, 4, 0xa4beea44, 4)
   113  	ROUND3(EDX, EAX, EBX, ECX, 7, 0x4bdecfa9, 11)
   114  	ROUND3(ECX, EDX, EAX, EBX, 10, 0xf6bb4b60, 16)
   115  	ROUND3(EBX, ECX, EDX, EAX, 13, 0xbebfbc70, 23)
   116  	ROUND3(EAX, EBX, ECX, EDX, 0, 0x289b7ec6, 4)
   117  	ROUND3(EDX, EAX, EBX, ECX, 3, 0xeaa127fa, 11)
   118  	ROUND3(ECX, EDX, EAX, EBX, 6, 0xd4ef3085, 16)
   119  	ROUND3(EBX, ECX, EDX, EAX, 9, 0x4881d05, 23)
   120  	ROUND3(EAX, EBX, ECX, EDX, 12, 0xd9d4d039, 4)
   121  	ROUND3(EDX, EAX, EBX, ECX, 15, 0xe6db99e5, 11)
   122  	ROUND3(ECX, EDX, EAX, EBX, 2, 0x1fa27cf8, 16)
   123  	ROUND3(EBX, ECX, EDX, EAX, 0, 0xc4ac5665, 23)
   124  
   125  	MOVL(R11L, R9L)
   126  	XORL(EDX, R9L)
   127  
   128  	ROUND4(EAX, EBX, ECX, EDX, 7, 0xf4292244, 6)
   129  	ROUND4(EDX, EAX, EBX, ECX, 14, 0x432aff97, 10)
   130  	ROUND4(ECX, EDX, EAX, EBX, 5, 0xab9423a7, 15)
   131  	ROUND4(EBX, ECX, EDX, EAX, 12, 0xfc93a039, 21)
   132  	ROUND4(EAX, EBX, ECX, EDX, 3, 0x655b59c3, 6)
   133  	ROUND4(EDX, EAX, EBX, ECX, 10, 0x8f0ccc92, 10)
   134  	ROUND4(ECX, EDX, EAX, EBX, 1, 0xffeff47d, 15)
   135  	ROUND4(EBX, ECX, EDX, EAX, 8, 0x85845dd1, 21)
   136  	ROUND4(EAX, EBX, ECX, EDX, 15, 0x6fa87e4f, 6)
   137  	ROUND4(EDX, EAX, EBX, ECX, 6, 0xfe2ce6e0, 10)
   138  	ROUND4(ECX, EDX, EAX, EBX, 13, 0xa3014314, 15)
   139  	ROUND4(EBX, ECX, EDX, EAX, 4, 0x4e0811a1, 21)
   140  	ROUND4(EAX, EBX, ECX, EDX, 11, 0xf7537e82, 6)
   141  	ROUND4(EDX, EAX, EBX, ECX, 2, 0xbd3af235, 10)
   142  	ROUND4(ECX, EDX, EAX, EBX, 9, 0x2ad7d2bb, 15)
   143  	ROUND4(EBX, ECX, EDX, EAX, 0, 0xeb86d391, 21)
   144  
   145  	ADDL(R12L, EAX)
   146  	ADDL(R13L, EBX)
   147  	ADDL(R14L, ECX)
   148  	ADDL(R15L, EDX)
   149  
   150  	ADDQ(Imm(64), RSI)
   151  	CMPQ(RSI, RDI)
   152  	JB(LabelRef("loop"))
   153  }
   154  
   155  func end() {
   156  	Label("end")
   157  	MOVL(EAX, Mem{Base: BP}.Offset(0*4))
   158  	MOVL(EBX, Mem{Base: BP}.Offset(1*4))
   159  	MOVL(ECX, Mem{Base: BP}.Offset(2*4))
   160  	MOVL(EDX, Mem{Base: BP}.Offset(3*4))
   161  	RET()
   162  }
   163  
   164  func ROUND1(a, b, c, d GPPhysical, index int, konst, shift uint64) {
   165  	XORL(c, R9L)
   166  	ADDL(Imm(konst), a)
   167  	ADDL(R8L, a)
   168  	ANDL(b, R9L)
   169  	XORL(d, R9L)
   170  	MOVL(Mem{Base: SI}.Offset(index*4), R8L)
   171  	ADDL(R9L, a)
   172  	ROLL(Imm(shift), a)
   173  	MOVL(c, R9L)
   174  	ADDL(b, a)
   175  }
   176  
   177  // Uses https://github.com/animetosho/md5-optimisation#dependency-shortcut-in-g-function
   178  func ROUND2(a, b, c, d GPPhysical, index int, konst, shift uint64) {
   179  	XORL(R11L, R9L)
   180  	ADDL(Imm(konst), a)
   181  	ADDL(R8L, a)
   182  	ANDL(b, R10L)
   183  	ANDL(c, R9L)
   184  	MOVL(Mem{Base: SI}.Offset(index*4), R8L)
   185  	ADDL(R9L, a)
   186  	ADDL(R10L, a)
   187  	MOVL(c, R9L)
   188  	MOVL(c, R10L)
   189  	ROLL(Imm(shift), a)
   190  	ADDL(b, a)
   191  }
   192  
   193  // Uses https://github.com/animetosho/md5-optimisation#h-function-re-use
   194  func ROUND3FIRST(a, b, c, d GPPhysical, index int, konst, shift uint64) {
   195  	MOVL(d, R9L)
   196  	XORL(c, R9L)
   197  	XORL(b, R9L)
   198  	ADDL(Imm(konst), a)
   199  	ADDL(R8L, a)
   200  	MOVL(Mem{Base: SI}.Offset(index*4), R8L)
   201  	ADDL(R9L, a)
   202  	ROLL(Imm(shift), a)
   203  	ADDL(b, a)
   204  }
   205  
   206  func ROUND3(a, b, c, d GPPhysical, index int, konst, shift uint64) {
   207  	XORL(a, R9L)
   208  	XORL(b, R9L)
   209  	ADDL(Imm(konst), a)
   210  	ADDL(R8L, a)
   211  	MOVL(Mem{Base: SI}.Offset(index*4), R8L)
   212  	ADDL(R9L, a)
   213  	ROLL(Imm(shift), a)
   214  	ADDL(b, a)
   215  }
   216  
   217  func ROUND4(a, b, c, d GPPhysical, index int, konst, shift uint64) {
   218  	ADDL(Imm(konst), a)
   219  	ADDL(R8L, a)
   220  	ORL(b, R9L)
   221  	XORL(c, R9L)
   222  	ADDL(R9L, a)
   223  	MOVL(Mem{Base: SI}.Offset(index*4), R8L)
   224  	MOVL(Imm(0xffffffff), R9L)
   225  	ROLL(Imm(shift), a)
   226  	XORL(c, R9L)
   227  	ADDL(b, a)
   228  }
   229  

View as plain text