Text file
src/math/exp_arm64.s
1 // Copyright 2017 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 #define Ln2Hi 6.93147180369123816490e-01
6 #define Ln2Lo 1.90821492927058770002e-10
7 #define Log2e 1.44269504088896338700e+00
8 #define Overflow 7.09782712893383973096e+02
9 #define Underflow -7.45133219101941108420e+02
10 #define Overflow2 1.0239999999999999e+03
11 #define Underflow2 -1.0740e+03
12 #define NearZero 0x3e30000000000000 // 2**-28
13 #define PosInf 0x7ff0000000000000
14 #define FracMask 0x000fffffffffffff
15 #define C1 0x3cb0000000000000 // 2**-52
16 #define P1 1.66666666666666657415e-01 // 0x3FC55555; 0x55555555
17 #define P2 -2.77777777770155933842e-03 // 0xBF66C16C; 0x16BEBD93
18 #define P3 6.61375632143793436117e-05 // 0x3F11566A; 0xAF25DE2C
19 #define P4 -1.65339022054652515390e-06 // 0xBEBBBD41; 0xC5D26BF1
20 #define P5 4.13813679705723846039e-08 // 0x3E663769; 0x72BEA4D0
21
22 // Exp returns e**x, the base-e exponential of x.
23 // This is an assembly implementation of the method used for function Exp in file exp.go.
24 //
25 // func Exp(x float64) float64
26 TEXT ·archExp(SB),$0-16
27 FMOVD x+0(FP), F0 // F0 = x
28 FCMPD F0, F0
29 BNE isNaN // x = NaN, return NaN
30 FMOVD $Overflow, F1
31 FCMPD F1, F0
32 BGT overflow // x > Overflow, return PosInf
33 FMOVD $Underflow, F1
34 FCMPD F1, F0
35 BLT underflow // x < Underflow, return 0
36 MOVD $NearZero, R0
37 FMOVD R0, F2
38 FABSD F0, F3
39 FMOVD $1.0, F1 // F1 = 1.0
40 FCMPD F2, F3
41 BLT nearzero // fabs(x) < NearZero, return 1 + x
42 // argument reduction, x = k*ln2 + r, |r| <= 0.5*ln2
43 // computed as r = hi - lo for extra precision.
44 FMOVD $Log2e, F2
45 FMOVD $0.5, F3
46 FNMSUBD F0, F3, F2, F4 // Log2e*x - 0.5
47 FMADDD F0, F3, F2, F3 // Log2e*x + 0.5
48 FCMPD $0.0, F0
49 FCSELD LT, F4, F3, F3 // F3 = k
50 FCVTZSD F3, R1 // R1 = int(k)
51 SCVTFD R1, F3 // F3 = float64(int(k))
52 FMOVD $Ln2Hi, F4 // F4 = Ln2Hi
53 FMOVD $Ln2Lo, F5 // F5 = Ln2Lo
54 FMSUBD F3, F0, F4, F4 // F4 = hi = x - float64(int(k))*Ln2Hi
55 FMULD F3, F5 // F5 = lo = float64(int(k)) * Ln2Lo
56 FSUBD F5, F4, F6 // F6 = r = hi - lo
57 FMULD F6, F6, F7 // F7 = t = r * r
58 // compute y
59 FMOVD $P5, F8 // F8 = P5
60 FMOVD $P4, F9 // F9 = P4
61 FMADDD F7, F9, F8, F13 // P4+t*P5
62 FMOVD $P3, F10 // F10 = P3
63 FMADDD F7, F10, F13, F13 // P3+t*(P4+t*P5)
64 FMOVD $P2, F11 // F11 = P2
65 FMADDD F7, F11, F13, F13 // P2+t*(P3+t*(P4+t*P5))
66 FMOVD $P1, F12 // F12 = P1
67 FMADDD F7, F12, F13, F13 // P1+t*(P2+t*(P3+t*(P4+t*P5)))
68 FMSUBD F7, F6, F13, F13 // F13 = c = r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))))
69 FMOVD $2.0, F14
70 FSUBD F13, F14
71 FMULD F6, F13, F15
72 FDIVD F14, F15 // F15 = (r*c)/(2-c)
73 FSUBD F15, F5, F15 // lo-(r*c)/(2-c)
74 FSUBD F4, F15, F15 // (lo-(r*c)/(2-c))-hi
75 FSUBD F15, F1, F16 // F16 = y = 1-((lo-(r*c)/(2-c))-hi)
76 // inline Ldexp(y, k), benefit:
77 // 1, no parameter pass overhead.
78 // 2, skip unnecessary checks for Inf/NaN/Zero
79 FMOVD F16, R0
80 AND $FracMask, R0, R2 // fraction
81 LSR $52, R0, R5 // exponent
82 ADD R1, R5 // R1 = int(k)
83 CMP $1, R5
84 BGE normal
85 ADD $52, R5 // denormal
86 MOVD $C1, R8
87 FMOVD R8, F1 // m = 2**-52
88 normal:
89 ORR R5<<52, R2, R0
90 FMOVD R0, F0
91 FMULD F1, F0 // return m * x
92 FMOVD F0, ret+8(FP)
93 RET
94 nearzero:
95 FADDD F1, F0
96 isNaN:
97 FMOVD F0, ret+8(FP)
98 RET
99 underflow:
100 MOVD ZR, ret+8(FP)
101 RET
102 overflow:
103 MOVD $PosInf, R0
104 MOVD R0, ret+8(FP)
105 RET
106
107
108 // Exp2 returns 2**x, the base-2 exponential of x.
109 // This is an assembly implementation of the method used for function Exp2 in file exp.go.
110 //
111 // func Exp2(x float64) float64
112 TEXT ·archExp2(SB),$0-16
113 FMOVD x+0(FP), F0 // F0 = x
114 FCMPD F0, F0
115 BNE isNaN // x = NaN, return NaN
116 FMOVD $Overflow2, F1
117 FCMPD F1, F0
118 BGT overflow // x > Overflow, return PosInf
119 FMOVD $Underflow2, F1
120 FCMPD F1, F0
121 BLT underflow // x < Underflow, return 0
122 // argument reduction; x = r*lg(e) + k with |r| <= ln(2)/2
123 // computed as r = hi - lo for extra precision.
124 FMOVD $0.5, F2
125 FSUBD F2, F0, F3 // x + 0.5
126 FADDD F2, F0, F4 // x - 0.5
127 FCMPD $0.0, F0
128 FCSELD LT, F3, F4, F3 // F3 = k
129 FCVTZSD F3, R1 // R1 = int(k)
130 SCVTFD R1, F3 // F3 = float64(int(k))
131 FSUBD F3, F0, F3 // t = x - float64(int(k))
132 FMOVD $Ln2Hi, F4 // F4 = Ln2Hi
133 FMOVD $Ln2Lo, F5 // F5 = Ln2Lo
134 FMULD F3, F4 // F4 = hi = t * Ln2Hi
135 FNMULD F3, F5 // F5 = lo = -t * Ln2Lo
136 FSUBD F5, F4, F6 // F6 = r = hi - lo
137 FMULD F6, F6, F7 // F7 = t = r * r
138 // compute y
139 FMOVD $P5, F8 // F8 = P5
140 FMOVD $P4, F9 // F9 = P4
141 FMADDD F7, F9, F8, F13 // P4+t*P5
142 FMOVD $P3, F10 // F10 = P3
143 FMADDD F7, F10, F13, F13 // P3+t*(P4+t*P5)
144 FMOVD $P2, F11 // F11 = P2
145 FMADDD F7, F11, F13, F13 // P2+t*(P3+t*(P4+t*P5))
146 FMOVD $P1, F12 // F12 = P1
147 FMADDD F7, F12, F13, F13 // P1+t*(P2+t*(P3+t*(P4+t*P5)))
148 FMSUBD F7, F6, F13, F13 // F13 = c = r - t*(P1+t*(P2+t*(P3+t*(P4+t*P5))))
149 FMOVD $2.0, F14
150 FSUBD F13, F14
151 FMULD F6, F13, F15
152 FDIVD F14, F15 // F15 = (r*c)/(2-c)
153 FMOVD $1.0, F1 // F1 = 1.0
154 FSUBD F15, F5, F15 // lo-(r*c)/(2-c)
155 FSUBD F4, F15, F15 // (lo-(r*c)/(2-c))-hi
156 FSUBD F15, F1, F16 // F16 = y = 1-((lo-(r*c)/(2-c))-hi)
157 // inline Ldexp(y, k), benefit:
158 // 1, no parameter pass overhead.
159 // 2, skip unnecessary checks for Inf/NaN/Zero
160 FMOVD F16, R0
161 AND $FracMask, R0, R2 // fraction
162 LSR $52, R0, R5 // exponent
163 ADD R1, R5 // R1 = int(k)
164 CMP $1, R5
165 BGE normal
166 ADD $52, R5 // denormal
167 MOVD $C1, R8
168 FMOVD R8, F1 // m = 2**-52
169 normal:
170 ORR R5<<52, R2, R0
171 FMOVD R0, F0
172 FMULD F1, F0 // return m * x
173 isNaN:
174 FMOVD F0, ret+8(FP)
175 RET
176 underflow:
177 MOVD ZR, ret+8(FP)
178 RET
179 overflow:
180 MOVD $PosInf, R0
181 MOVD R0, ret+8(FP)
182 RET
183
View as plain text