Text file src/hash/crc32/crc32_loong64.s

     1  // Copyright 2024 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  #include "textflag.h"
     6  
     7  // castagnoliUpdate updates the non-inverted crc with the given data.
     8  
     9  // func castagnoliUpdate(crc uint32, p []byte) uint32
    10  TEXT ·castagnoliUpdate(SB),NOSPLIT,$0-36
    11  	MOVWU	crc+0(FP), R4		// a0 = CRC value
    12  	MOVV	p+8(FP), R5		// a1 = data pointer
    13  	MOVV	p_len+16(FP), R6	// a2 = len(p)
    14  
    15  	SGT	$8, R6, R12
    16  	BNE	R12, less_than_8
    17  	AND	$7, R5, R12
    18  	BEQ	R12, aligned
    19  
    20  	// Process the first few bytes to 8-byte align the input.
    21  	// t0 = 8 - t0. We need to process this many bytes to align.
    22  	SUB	$1, R12
    23  	XOR	$7, R12
    24  
    25  	AND	$1, R12, R13
    26  	BEQ	R13, align_2
    27  	MOVB	(R5), R13
    28  	CRCCWBW	R4, R13, R4
    29  	ADDV	$1, R5
    30  	ADDV	$-1, R6
    31  
    32  align_2:
    33  	AND	$2, R12, R13
    34  	BEQ	R13, align_4
    35  	MOVH	(R5), R13
    36  	CRCCWHW	R4, R13, R4
    37  	ADDV	$2, R5
    38  	ADDV	$-2, R6
    39  
    40  align_4:
    41  	AND	$4, R12, R13
    42  	BEQ	R13, aligned
    43  	MOVW	(R5), R13
    44  	CRCCWWW	R4, R13, R4
    45  	ADDV	$4, R5
    46  	ADDV	$-4, R6
    47  
    48  aligned:
    49  	// The input is now 8-byte aligned and we can process 8-byte chunks.
    50  	SGT	$8, R6, R12
    51  	BNE	R12, less_than_8
    52  	MOVV	(R5), R13
    53  	CRCCWVW	R4, R13, R4
    54  	ADDV	$8, R5
    55  	ADDV	$-8, R6
    56  	JMP	aligned
    57  
    58  less_than_8:
    59  	// We may have some bytes left over; process 4 bytes, then 2, then 1.
    60  	AND	$4, R6, R12
    61  	BEQ	R12, less_than_4
    62  	MOVW	(R5), R13
    63  	CRCCWWW	R4, R13, R4
    64  	ADDV	$4, R5
    65  	ADDV	$-4, R6
    66  
    67  less_than_4:
    68  	AND	$2, R6, R12
    69  	BEQ	R12, less_than_2
    70  	MOVH	(R5), R13
    71  	CRCCWHW	R4, R13, R4
    72  	ADDV	$2, R5
    73  	ADDV	$-2, R6
    74  
    75  less_than_2:
    76  	BEQ	R6, done
    77  	MOVB	(R5), R13
    78  	CRCCWBW	R4, R13, R4
    79  
    80  done:
    81  	MOVW	R4, ret+32(FP)
    82  	RET
    83  
    84  // ieeeUpdate updates the non-inverted crc with the given data.
    85  
    86  // func ieeeUpdate(crc uint32, p []byte) uint32
    87  TEXT ·ieeeUpdate(SB),NOSPLIT,$0-36
    88  	MOVWU	crc+0(FP), R4		// a0 = CRC value
    89  	MOVV	p+8(FP), R5		// a1 = data pointer
    90  	MOVV	p_len+16(FP), R6	// a2 = len(p)
    91  
    92  	SGT	$8, R6, R12
    93  	BNE	R12, less_than_8
    94  	AND	$7, R5, R12
    95  	BEQ	R12, aligned
    96  
    97  	// Process the first few bytes to 8-byte align the input.
    98  	// t0 = 8 - t0. We need to process this many bytes to align.
    99  	SUB	$1, R12
   100  	XOR	$7, R12
   101  
   102  	AND	$1, R12, R13
   103  	BEQ	R13, align_2
   104  	MOVB	(R5), R13
   105  	CRCWBW	R4, R13, R4
   106  	ADDV	$1, R5
   107  	ADDV	$-1, R6
   108  
   109  align_2:
   110  	AND	$2, R12, R13
   111  	BEQ	R13, align_4
   112  	MOVH	(R5), R13
   113  	CRCWHW	R4, R13, R4
   114  	ADDV	$2, R5
   115  	ADDV	$-2, R6
   116  
   117  align_4:
   118  	AND	$4, R12, R13
   119  	BEQ	R13, aligned
   120  	MOVW	(R5), R13
   121  	CRCWWW	R4, R13, R4
   122  	ADDV	$4, R5
   123  	ADDV	$-4, R6
   124  
   125  aligned:
   126  	// The input is now 8-byte aligned and we can process 8-byte chunks.
   127  	SGT	$8, R6, R12
   128  	BNE	R12, less_than_8
   129  	MOVV	(R5), R13
   130  	CRCWVW	R4, R13, R4
   131  	ADDV	$8, R5
   132  	ADDV	$-8, R6
   133  	JMP	aligned
   134  
   135  less_than_8:
   136  	// We may have some bytes left over; process 4 bytes, then 2, then 1.
   137  	AND	$4, R6, R12
   138  	BEQ	R12, less_than_4
   139  	MOVW	(R5), R13
   140  	CRCWWW	R4, R13, R4
   141  	ADDV	$4, R5
   142  	ADDV	$-4, R6
   143  
   144  less_than_4:
   145  	AND	$2, R6, R12
   146  	BEQ	R12, less_than_2
   147  	MOVH	(R5), R13
   148  	CRCWHW	R4, R13, R4
   149  	ADDV	$2, R5
   150  	ADDV	$-2, R6
   151  
   152  less_than_2:
   153  	BEQ	R6, done
   154  	MOVB	(R5), R13
   155  	CRCWBW	R4, R13, R4
   156  
   157  done:
   158  	MOVW	R4, ret+32(FP)
   159  	RET
   160  
   161  

View as plain text