Source file src/crypto/internal/fips/sha256/_asm/sha256block_amd64_shani.go

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package main
     6  
     7  import (
     8  	. "github.com/mmcloughlin/avo/build"
     9  	. "github.com/mmcloughlin/avo/operand"
    10  	. "github.com/mmcloughlin/avo/reg"
    11  )
    12  
    13  // The sha-ni implementation uses Intel(R) SHA extensions SHA256RNDS2, SHA256MSG1, SHA256MSG2
    14  // It also reuses portions of the flip_mask (half) and K256 table (stride 32) from the avx2 version
    15  //
    16  // Reference
    17  // S. Gulley, et al, "New Instructions Supporting the Secure Hash
    18  // Algorithm on IntelĀ® Architecture Processors", July 2013
    19  // https://www.intel.com/content/www/us/en/developer/articles/technical/intel-sha-extensions.html
    20  
    21  func blockSHANI() {
    22  	Implement("blockSHANI")
    23  	Load(Param("dig"), digestPtr)    //                   init digest hash vector H0, H1,..., H7 pointer
    24  	Load(Param("p").Base(), dataPtr) //                   init input data base pointer
    25  	Load(Param("p").Len(), numBytes) //                   get number of input bytes to hash
    26  	SHRQ(Imm(6), numBytes)           //                   force modulo 64 input buffer length
    27  	SHLQ(Imm(6), numBytes)
    28  	CMPQ(numBytes, Imm(0)) //                             exit early for zero-length input buffer
    29  	JEQ(LabelRef("done"))
    30  	ADDQ(dataPtr, numBytes)                            // point numBytes to end of input buffer
    31  	VMOVDQU(Mem{Base: digestPtr}.Offset(0*16), state0) // load initial hash values and reorder
    32  	VMOVDQU(Mem{Base: digestPtr}.Offset(1*16), state1) // DCBA, HGFE -> ABEF, CDGH
    33  	PSHUFD(Imm(0xb1), state0, state0)                  // CDAB
    34  	PSHUFD(Imm(0x1b), state1, state1)                  // EFGH
    35  	VMOVDQA(state0, m4)
    36  	PALIGNR(Imm(8), state1, state0) //                    ABEF
    37  	PBLENDW(Imm(0xf0), m4, state1)  //                    CDGH
    38  	flip_mask := flip_mask_DATA()
    39  	VMOVDQA(flip_mask, shufMask)
    40  	LEAQ(K256_DATA(), sha256Constants)
    41  
    42  	roundLoop()
    43  	done()
    44  }
    45  
    46  func roundLoop() {
    47  	Label("roundLoop")
    48  	Comment("save hash values for addition after rounds")
    49  	VMOVDQA(state0, abefSave)
    50  	VMOVDQA(state1, cdghSave)
    51  
    52  	Comment("do rounds 0-59")
    53  	rounds0to11(m0, nil, 0, nop)       //                 0-3
    54  	rounds0to11(m1, m0, 1, sha256msg1) //                 4-7
    55  	rounds0to11(m2, m1, 2, sha256msg1) //                8-11
    56  	VMOVDQU(Mem{Base: dataPtr}.Offset(3*16), msg)
    57  	PSHUFB(shufMask, msg)
    58  	rounds12to59(m3, 3, m2, m0, sha256msg1, vmovrev) // 12-15
    59  	rounds12to59(m0, 4, m3, m1, sha256msg1, vmov)    // 16-19
    60  	rounds12to59(m1, 5, m0, m2, sha256msg1, vmov)    // 20-23
    61  	rounds12to59(m2, 6, m1, m3, sha256msg1, vmov)    // 24-27
    62  	rounds12to59(m3, 7, m2, m0, sha256msg1, vmov)    // 28-31
    63  	rounds12to59(m0, 8, m3, m1, sha256msg1, vmov)    // 32-35
    64  	rounds12to59(m1, 9, m0, m2, sha256msg1, vmov)    // 36-39
    65  	rounds12to59(m2, 10, m1, m3, sha256msg1, vmov)   // 40-43
    66  	rounds12to59(m3, 11, m2, m0, sha256msg1, vmov)   // 44-47
    67  	rounds12to59(m0, 12, m3, m1, sha256msg1, vmov)   // 48-51
    68  	rounds12to59(m1, 13, m0, m2, nop, vmov)          // 52-55
    69  	rounds12to59(m2, 14, m1, m3, nop, vmov)          // 56-59
    70  
    71  	Comment("do rounds 60-63")
    72  	VMOVDQA(m3, msg)
    73  	PADDD(Mem{Base: sha256Constants}.Offset(15*32), msg)
    74  	SHA256RNDS2(msg, state0, state1)
    75  	PSHUFD(Imm(0x0e), msg, msg)
    76  	SHA256RNDS2(msg, state1, state0)
    77  
    78  	Comment("add current hash values with previously saved")
    79  	PADDD(abefSave, state0)
    80  	PADDD(cdghSave, state1)
    81  
    82  	Comment("advance data pointer; loop until buffer empty")
    83  	ADDQ(Imm(64), dataPtr)
    84  	CMPQ(numBytes, dataPtr)
    85  	JNE(LabelRef("roundLoop"))
    86  
    87  	Comment("write hash values back in the correct order")
    88  	PSHUFD(Imm(0x1b), state0, state0)
    89  	PSHUFD(Imm(0xb1), state1, state1)
    90  	VMOVDQA(state0, m4)
    91  	PBLENDW(Imm(0xf0), state1, state0)
    92  	PALIGNR(Imm(8), m4, state1)
    93  	VMOVDQU(state0, Mem{Base: digestPtr}.Offset(0*16))
    94  	VMOVDQU(state1, Mem{Base: digestPtr}.Offset(1*16))
    95  }
    96  
    97  func done() {
    98  	Label("done")
    99  	RET()
   100  }
   101  
   102  var (
   103  	digestPtr       GPPhysical  = RDI // input/output, base pointer to digest hash vector H0, H1, ..., H7
   104  	dataPtr                     = RSI // input, base pointer to first input data block
   105  	numBytes                    = RDX // input, number of input bytes to be processed
   106  	sha256Constants             = RAX // round contents from K256 table, indexed by round number x 32
   107  	msg             VecPhysical = X0  // input data
   108  	state0                      = X1  // round intermediates and outputs
   109  	state1                      = X2
   110  	m0                          = X3 //  m0, m1,... m4 -- round message temps
   111  	m1                          = X4
   112  	m2                          = X5
   113  	m3                          = X6
   114  	m4                          = X7
   115  	shufMask                    = X8  // input data endian conversion control mask
   116  	abefSave                    = X9  // digest hash vector inter-block buffer abef
   117  	cdghSave                    = X10 // digest hash vector inter-block buffer cdgh
   118  )
   119  
   120  // nop instead of final SHA256MSG1 for first and last few rounds
   121  func nop(m, a VecPhysical) {
   122  }
   123  
   124  // final SHA256MSG1 for middle rounds that require it
   125  func sha256msg1(m, a VecPhysical) {
   126  	SHA256MSG1(m, a)
   127  }
   128  
   129  // msg copy for all but rounds 12-15
   130  func vmov(a, b VecPhysical) {
   131  	VMOVDQA(a, b)
   132  }
   133  
   134  // reverse copy for rounds 12-15
   135  func vmovrev(a, b VecPhysical) {
   136  	VMOVDQA(b, a)
   137  }
   138  
   139  type VecFunc func(a, b VecPhysical)
   140  
   141  // sha rounds 0 to 11
   142  //
   143  // identical with the exception of the final msg op
   144  // which is replaced with a nop for rounds where it is not needed
   145  // refer to Gulley, et al for more information
   146  func rounds0to11(m, a VecPhysical, c int, sha256msg1 VecFunc) {
   147  	VMOVDQU(Mem{Base: dataPtr}.Offset(c*16), msg)
   148  	PSHUFB(shufMask, msg)
   149  	VMOVDQA(msg, m)
   150  	PADDD(Mem{Base: sha256Constants}.Offset(c*32), msg)
   151  	SHA256RNDS2(msg, state0, state1)
   152  	PSHUFD(U8(0x0e), msg, msg)
   153  	SHA256RNDS2(msg, state1, state0)
   154  	sha256msg1(m, a)
   155  }
   156  
   157  // sha rounds 12 to 59
   158  //
   159  // identical with the exception of the final msg op
   160  // and the reverse copy(m,msg) in round 12 which is required
   161  // after the last data load
   162  // refer to Gulley, et al for more information
   163  func rounds12to59(m VecPhysical, c int, a, t VecPhysical, sha256msg1, movop VecFunc) {
   164  	movop(m, msg)
   165  	PADDD(Mem{Base: sha256Constants}.Offset(c*32), msg)
   166  	SHA256RNDS2(msg, state0, state1)
   167  	VMOVDQA(m, m4)
   168  	PALIGNR(Imm(4), a, m4)
   169  	PADDD(m4, t)
   170  	SHA256MSG2(m, t)
   171  	PSHUFD(Imm(0x0e), msg, msg)
   172  	SHA256RNDS2(msg, state1, state0)
   173  	sha256msg1(m, a)
   174  }
   175  

View as plain text