Text file
src/math/exp_loong64.s
1 // Copyright 2025 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #include "textflag.h"
6
7 #define NearZero 0x3e30000000000000 // 2**-28
8 #define PosInf 0x7ff0000000000000
9 #define FracMask 0x000fffffffffffff
10 #define C1 0x3cb0000000000000 // 2**-52
11
12 DATA exprodata<>+0(SB)/8, $0.0
13 DATA exprodata<>+8(SB)/8, $0.5
14 DATA exprodata<>+16(SB)/8, $1.0
15 DATA exprodata<>+24(SB)/8, $2.0
16 DATA exprodata<>+32(SB)/8, $6.93147180369123816490e-01 // Ln2Hi
17 DATA exprodata<>+40(SB)/8, $1.90821492927058770002e-10 // Ln2Lo
18 DATA exprodata<>+48(SB)/8, $1.44269504088896338700e+00 // Log2e
19 DATA exprodata<>+56(SB)/8, $7.09782712893383973096e+02 // Overflow
20 DATA exprodata<>+64(SB)/8, $-7.45133219101941108420e+02 // Underflow
21 DATA exprodata<>+72(SB)/8, $1.0239999999999999e+03 // Overflow2
22 DATA exprodata<>+80(SB)/8, $-1.0740e+03 // Underflow2
23 DATA exprodata<>+88(SB)/8, $3.7252902984619141e-09 // NearZero
24 GLOBL exprodata<>+0(SB), NOPTR|RODATA, $96
25
26 DATA expmultirodata<>+0(SB)/8, $1.66666666666666657415e-01 // P1
27 DATA expmultirodata<>+8(SB)/8, $-2.77777777770155933842e-03 // P2
28 DATA expmultirodata<>+16(SB)/8, $6.61375632143793436117e-05 // P3
29 DATA expmultirodata<>+24(SB)/8, $-1.65339022054652515390e-06 // P4
30 DATA expmultirodata<>+32(SB)/8, $4.13813679705723846039e-08 // P5
31 GLOBL expmultirodata<>+0(SB), NOPTR|RODATA, $40
32
33 // Exp returns e**x, the base-e exponential of x.
34 // This is an assembly implementation of the method used for function Exp in file exp.go.
35 //
36 // func Exp(x float64) float64
37 TEXT ·archExp(SB),$0-16
38 MOVD x+0(FP), F0 // F0 = x
39
40 MOVV $exprodata<>+0(SB), R10
41 MOVD 56(R10), F1 // Overflow
42 MOVD 64(R10), F2 // Underflow
43 MOVD 88(R10), F3 // NearZero
44 MOVD 16(R10), F17 // 1.0
45
46 CMPEQD F0, F0, FCC0
47 BFPF isNaN // x = NaN, return NaN
48
49 CMPGTD F0, F1, FCC0
50 BFPT overflow // x > Overflow, return PosInf
51
52 CMPGTD F2, F0, FCC0
53 BFPT underflow // x < Underflow, return 0
54
55 ABSD F0, F5
56 CMPGTD F3, F5, FCC0
57 BFPT nearzero // fabs(x) < NearZero, return 1 + x
58
59 // argument reduction, x = k*ln2 + r, |r| <= 0.5*ln2
60 // computed as r = hi - lo for extra precision.
61 MOVD 0(R10), F5
62 MOVD 8(R10), F3
63 MOVD 48(R10), F2
64 CMPGTD F0, F5, FCC0
65 BFPT add // x > 0
66 sub:
67 FMSUBD F3, F2, F0, F3 // Log2e*x - 0.5
68 JMP 2(PC)
69 add:
70 FMADDD F3, F2, F0, F3 // Log2e*x + 0.5
71
72 FTINTRZVD F3, F4 // float64 -> int64
73 MOVV F4, R5 // R5 = int(k)
74 FFINTDV F4, F3 // int64 -> float64
75
76 MOVD 32(R10), F4
77 MOVD 40(R10), F5
78 FNMSUBD F0, F3, F4, F4
79 MULD F3, F5, F5
80 SUBD F5, F4, F6
81 MULD F6, F6, F7
82
83 // compute c
84 MOVV $expmultirodata<>+0(SB), R11
85 MOVD 32(R11), F8
86 MOVD 24(R11), F9
87 FMADDD F9, F8, F7, F13
88 MOVD 16(R11), F10
89 FMADDD F10, F13, F7, F13
90 MOVD 8(R11), F11
91 FMADDD F11, F13, F7, F13
92 MOVD 0(R11), F12
93 FMADDD F12, F13, F7, F13
94 FNMSUBD F6, F13, F7, F13
95
96 // compute y
97 MOVD 24(R10), F14
98 SUBD F13, F14, F14
99 MULD F6, F13, F15
100 DIVD F14, F15, F15
101 SUBD F15, F5, F15
102 SUBD F4, F15, F15
103 SUBD F15, F17, F16
104
105 // inline Ldexp(y, k), benefit:
106 // 1, no parameter pass overhead.
107 // 2, skip unnecessary checks for Inf/NaN/Zero
108 MOVV F16, R4
109 MOVV $FracMask, R9
110 AND R9, R4, R6 // fraction
111 SRLV $52, R4, R7 // exponent
112 ADDV R5, R7
113 MOVV $1, R12
114 BGE R7, R12, normal
115 ADDV $52, R7 // denormal
116 MOVV $C1, R8
117 MOVV R8, F17
118 normal:
119 SLLV $52, R7
120 OR R7, R6, R4
121 MOVV R4, F0
122 MULD F17, F0 // return m * x
123 MOVD F0, ret+8(FP)
124 RET
125 nearzero:
126 ADDD F17, F0, F0
127 isNaN:
128 MOVD F0, ret+8(FP)
129 RET
130 underflow:
131 MOVV R0, ret+8(FP)
132 RET
133 overflow:
134 MOVV $PosInf, R4
135 MOVV R4, ret+8(FP)
136 RET
137
138
139 // Exp2 returns 2**x, the base-2 exponential of x.
140 // This is an assembly implementation of the method used for function Exp2 in file exp.go.
141 //
142 // func Exp2(x float64) float64
143 TEXT ·archExp2(SB),$0-16
144 MOVD x+0(FP), F0 // F0 = x
145
146 MOVV $exprodata<>+0(SB), R10
147 MOVD 72(R10), F1 // Overflow2
148 MOVD 80(R10), F2 // Underflow2
149 MOVD 88(R10), F3 // NearZero
150
151 CMPEQD F0, F0, FCC0
152 BFPF isNaN // x = NaN, return NaN
153
154 CMPGTD F0, F1, FCC0
155 BFPT overflow // x > Overflow, return PosInf
156
157 CMPGTD F2, F0, FCC0
158 BFPT underflow // x < Underflow, return 0
159
160 // argument reduction; x = r*lg(e) + k with |r| <= ln(2)/2
161 // computed as r = hi - lo for extra precision.
162 MOVD 0(R10), F10
163 MOVD 8(R10), F2
164 CMPGTD F0, F10, FCC0
165 BFPT add
166 sub:
167 SUBD F2, F0, F3 // x - 0.5
168 JMP 2(PC)
169 add:
170 ADDD F2, F0, F3 // x + 0.5
171
172 FTINTRZVD F3, F4
173 MOVV F4, R5
174 FFINTDV F4, F3
175
176 MOVD 32(R10), F4
177 MOVD 40(R10), F5
178 SUBD F3, F0, F3
179 MULD F3, F4
180 FNMSUBD F10, F3, F5, F5
181 SUBD F5, F4, F6
182 MULD F6, F6, F7
183
184 // compute c
185 MOVV $expmultirodata<>+0(SB), R11
186 MOVD 32(R11), F8
187 MOVD 24(R11), F9
188 FMADDD F9, F8, F7, F13
189 MOVD 16(R11), F10
190 FMADDD F10, F13, F7, F13
191 MOVD 8(R11), F11
192 FMADDD F11, F13, F7, F13
193 MOVD 0(R11), F12
194 FMADDD F12, F13, F7, F13
195 FNMSUBD F6, F13, F7, F13
196
197 // compute y
198 MOVD 24(R10), F14
199 SUBD F13, F14, F14
200 MULD F6, F13, F15
201 DIVD F14, F15
202
203 MOVD 16(R10), F17
204 SUBD F15, F5, F15
205 SUBD F4, F15, F15
206 SUBD F15, F17, F16
207
208 // inline Ldexp(y, k), benefit:
209 // 1, no parameter pass overhead.
210 // 2, skip unnecessary checks for Inf/NaN/Zero
211 MOVV F16, R4
212 MOVV $FracMask, R9
213 SRLV $52, R4, R7 // exponent
214 AND R9, R4, R6 // fraction
215 ADDV R5, R7
216 MOVV $1, R12
217 BGE R7, R12, normal
218
219 ADDV $52, R7 // denormal
220 MOVV $C1, R8
221 MOVV R8, F17
222 normal:
223 SLLV $52, R7
224 OR R7, R6, R4
225 MOVV R4, F0
226 MULD F17, F0
227 isNaN:
228 MOVD F0, ret+8(FP)
229 RET
230 underflow:
231 MOVV R0, ret+8(FP)
232 RET
233 overflow:
234 MOVV $PosInf, R4
235 MOVV R4, ret+8(FP)
236 RET
237
View as plain text