sparrow 2.3.0
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
float16_t.hpp
Go to the documentation of this file.
1// half - IEEE 754-based half-precision floating-point library.
2//
3// Copyright (c) 2012-2019 Christian Rau <rauy@users.sourceforge.net>
4// Copyright (c) 2020 0xBYTESHIFT
5//
6// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
7// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
8// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
9// Software is furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
14// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
15// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
16// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17
20
21#pragma once
22
23// Disable some warnings so that it builds with sparrow build options
24#if defined(__clang__)
25# pragma clang diagnostic push
26# pragma clang diagnostic ignored "-Wconversion"
27# pragma clang diagnostic ignored "-Wsign-conversion"
28# pragma clang diagnostic ignored "-Wold-style-cast"
29# pragma clang diagnostic ignored "-Wdeprecated-declarations"
30# pragma clang diagnostic ignored "-Wshadow"
31#elif defined(__GNUC__)
32# pragma GCC diagnostic push
33# pragma GCC diagnostic ignored "-Wconversion"
34# pragma GCC diagnostic ignored "-Wsign-conversion"
35# pragma GCC diagnostic ignored "-Wold-style-cast"
36#elif defined(_MSC_VER)
37# pragma warning(push)
38# pragma warning(disable : 4127) // conditional expression is constant
39# pragma warning(disable : 4146) // unary minus operator applied to unsigned type, result still unsigned
40# pragma warning(disable : 4365) // 'action' : conversion from 'type_1' to 'type_2', signed/unsigned
41 // mismatch
42# pragma warning(disable : 4514) // 'function' : unreferenced inline function has been removed
43# pragma warning(disable : 4668) // 'symbol' is not defined as a preprocessor macro, replacing with
44 // '0' for 'directives'
45# pragma warning(disable : 4996) // std::float_denorm_style
46
47#endif
48
49
50#define HALF_TWOS_COMPLEMENT_INT 1
51
52// any error throwing C++ exceptions?
53#if defined(HALF_ERRHANDLING_THROW_INVALID) || defined(HALF_ERRHANDLING_THROW_DIVBYZERO) || defined(HALF_ERRHANDLING_THROW_OVERFLOW) || defined(HALF_ERRHANDLING_THROW_UNDERFLOW) || defined(HALF_ERRHANDLING_THROW_INEXACT)
54#define HALF_ERRHANDLING_THROWS 1
55#endif
56
57// any error handling enabled?
58#define HALF_ERRHANDLING (HALF_ERRHANDLING_FLAGS||HALF_ERRHANDLING_ERRNO||HALF_ERRHANDLING_FENV||HALF_ERRHANDLING_THROWS)
59
60#if HALF_ERRHANDLING
61 #define HALF_UNUSED_NOERR(name) name
62#else
63 #define HALF_UNUSED_NOERR(name)
64#endif
65
66// support constexpr
67#if HALF_ERRHANDLING
68 #define constexpr_NOERR
69#else
70 #define constexpr_NOERR constexpr
71#endif
72
73#include <utility>
74#include <algorithm>
75#include <istream>
76#include <ostream>
77#include <limits>
78#include <stdexcept>
79#include <climits>
80#include <cmath>
81#include <cstring>
82#include <cstdlib>
83#include <type_traits>
84#include <cstdint>
85#if HALF_ERRHANDLING_ERRNO
86 #include <cerrno>
87#endif
88#include <cfenv>
89#include <functional>
90
91#if defined(__cpp_lib_format)
92# include <format>
93# include <sstream>
94#endif
95
96#include <concepts>
97
98#ifndef HALF_ENABLE_F16C_INTRINSICS
105 #define HALF_ENABLE_F16C_INTRINSICS __F16C__
106#endif
107
108#if HALF_ENABLE_F16C_INTRINSICS
109 #include <immintrin.h>
110#endif
111
112#ifndef HALF_ERRHANDLING_OVERFLOW_TO_INEXACT
116#define HALF_ERRHANDLING_OVERFLOW_TO_INEXACT 1
117#endif
118
119#ifndef HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT
126#define HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT 1
127#endif
128
147#ifndef HALF_ROUND_STYLE
148 #define HALF_ROUND_STYLE 1 // = std::round_to_nearest
149#endif
150
156#define HUGE_VALH std::numeric_limits<half_float::half>::infinity()
157
163#define FP_FAST_FMAH 1
164
170#define HLF_ROUNDS HALF_ROUND_STYLE
171
172#ifndef FP_ILOGB0
173 #define FP_ILOGB0 INT_MIN
174#endif
175#ifndef FP_ILOGBNAN
176 #define FP_ILOGBNAN INT_MAX
177#endif
178#ifndef FP_SUBNORMAL
179 #define FP_SUBNORMAL 0
180#endif
181#ifndef FP_ZERO
182 #define FP_ZERO 1
183#endif
184#ifndef FP_NAN
185 #define FP_NAN 2
186#endif
187#ifndef FP_INFINITE
188 #define FP_INFINITE 3
189#endif
190#ifndef FP_NORMAL
191 #define FP_NORMAL 4
192#endif
193
194#if !defined(FE_ALL_EXCEPT)
195 #define FE_INVALID 0x10
196 #define FE_DIVBYZERO 0x08
197 #define FE_OVERFLOW 0x04
198 #define FE_UNDERFLOW 0x02
199 #define FE_INEXACT 0x01
200 #define FE_ALL_EXCEPT (FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT)
201#endif
202
203#ifdef __EMSCRIPTEN__
204// Emscripten defines FE_ALL_EXCEPT as 0, which causes the fallback above to be skipped,
205// but does not define the individual FE_* macros. So we patch them manually here.
206
207#ifndef FE_INEXACT
208#define FE_INEXACT 0x02
209#endif
210#ifndef FE_INVALID
211#define FE_INVALID 0x04
212#endif
213#ifndef FE_OVERFLOW
214#define FE_OVERFLOW 0x08
215#endif
216#ifndef FE_UNDERFLOW
217#define FE_UNDERFLOW 0x10
218#endif
219#ifndef FE_DIVBYZERO
220#define FE_DIVBYZERO 0x01
221#endif
222#ifndef FE_ALL_EXCEPT
223#define FE_ALL_EXCEPT (FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW | FE_INEXACT)
224#endif
225#endif // __EMSCRIPTEN__
226
227
230namespace half_float {
231 class half;
232
233}
234
235namespace half_float {
242 namespace literal {
243 half operator ""_h(long double);
244 }
245
248 namespace detail {
250 template<bool B, class T, class F> struct conditional : std::conditional<B,T,F> {};
251
253 template<bool B> struct bool_type : std::integral_constant<bool,B> {};
254 using std::true_type;
255 using std::false_type;
256
258 template<class T> struct is_float : std::is_floating_point<T> {};
259
261 template<class T> struct bits { using type = unsigned char; };
262 template<class T> struct bits<const T> : bits<T> {};
263 template<class T> struct bits<volatile T> : bits<T> {};
264 template<class T> struct bits<const volatile T> : bits<T> {};
265
267 using uint16 = std::uint_least16_t;
268
270 using uint32 = std::uint_fast32_t;
271
273 using int32 = std::int_fast32_t;
274
276 template<> struct bits<float> { using type = std::uint_least32_t; };
277
279 template<> struct bits<double> { using type = std::uint_least64_t; };
280 template<class T> using bits_t = typename bits<T>::type;
281
282 #ifdef HALF_ARITHMETIC_TYPE
284 typedef HALF_ARITHMETIC_TYPE internal_t;
285 #endif
286
288 struct binary_t {};
289
291 constexpr binary_t binary = binary_t();
292
295
301 template<class T> bool builtin_isinf(T arg) { return std::isinf(arg); }
302
308 template<class T> bool builtin_isnan(T arg) { return std::isnan(arg); }
309
315 template<class T> bool builtin_signbit(T arg) { return std::signbit(arg); }
316
321 inline uint32 sign_mask(uint32 arg) {
322 static const int N = std::numeric_limits<uint32>::digits - 1;
323 #if HALF_TWOS_COMPLEMENT_INT
324 return static_cast<int32>(arg) >> N;
325 #else
326 return -((arg>>N)&1);
327 #endif
328 }
329
334 inline uint32 arithmetic_shift(uint32 arg, int i) {
335 #if HALF_TWOS_COMPLEMENT_INT
336 return static_cast<int32>(arg) >> i;
337 #else
338 return static_cast<int32>(arg)/(static_cast<int32>(1)<<i) - ((arg>>(std::numeric_limits<uint32>::digits-1))&1);
339 #endif
340 }
341
345
348 inline int& errflags() { thread_local int flags = 0; return flags; }
349
353 inline void raise(int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond) = true) {
354 #if HALF_ERRHANDLING
355 if(!cond)
356 return;
357 #if HALF_ERRHANDLING_FLAGS
358 errflags() |= flags;
359 #endif
360 #if HALF_ERRHANDLING_ERRNO
361 if(flags & FE_INVALID)
362 errno = EDOM;
363 else if(flags & (FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW))
364 errno = ERANGE;
365 #endif
366 #if HALF_ERRHANDLING_FENV
367 std::feraiseexcept(flags);
368 #endif
369 #ifdef HALF_ERRHANDLING_THROW_INVALID
370 if(flags & FE_INVALID)
371 throw std::domain_error(HALF_ERRHANDLING_THROW_INVALID);
372 #endif
373 #ifdef HALF_ERRHANDLING_THROW_DIVBYZERO
374 if(flags & FE_DIVBYZERO)
375 throw std::domain_error(HALF_ERRHANDLING_THROW_DIVBYZERO);
376 #endif
377 #ifdef HALF_ERRHANDLING_THROW_OVERFLOW
378 if(flags & FE_OVERFLOW)
379 throw std::overflow_error(HALF_ERRHANDLING_THROW_OVERFLOW);
380 #endif
381 #ifdef HALF_ERRHANDLING_THROW_UNDERFLOW
382 if(flags & FE_UNDERFLOW)
383 throw std::underflow_error(HALF_ERRHANDLING_THROW_UNDERFLOW);
384 #endif
385 #ifdef HALF_ERRHANDLING_THROW_INEXACT
386 if(flags & FE_INEXACT)
387 throw std::range_error(HALF_ERRHANDLING_THROW_INEXACT);
388 #endif
389 #if HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT
390 if((flags & FE_UNDERFLOW) && !(flags & FE_INEXACT))
392 #endif
393 #if HALF_ERRHANDLING_OVERFLOW_TO_INEXACT
394 if((flags & FE_OVERFLOW) && !(flags & FE_INEXACT))
396 #endif
397 #endif
398 }
399
406 inline constexpr_NOERR bool compsignal(unsigned int x, unsigned int y) {
407 #if HALF_ERRHANDLING
408 raise(FE_INVALID, (x&0x7FFF)>0x7C00 || (y&0x7FFF)>0x7C00);
409 #endif
410 return (x&0x7FFF) > 0x7C00 || (y&0x7FFF) > 0x7C00;
411 }
412
417 inline constexpr_NOERR unsigned int signal(unsigned int nan) {
418 #if HALF_ERRHANDLING
419 raise(FE_INVALID, !(nan&0x200));
420 #endif
421 return nan | 0x200;
422 }
423
429 inline constexpr_NOERR unsigned int signal(unsigned int x, unsigned int y) {
430 #if HALF_ERRHANDLING
431 raise(FE_INVALID, ((x&0x7FFF)>0x7C00 && !(x&0x200)) || ((y&0x7FFF)>0x7C00 && !(y&0x200)));
432 #endif
433 return ((x&0x7FFF)>0x7C00) ? (x|0x200) : (y|0x200);
434 }
435
442 inline constexpr_NOERR unsigned int signal(unsigned int x, unsigned int y, unsigned int z) {
443 #if HALF_ERRHANDLING
444 raise(FE_INVALID, ((x&0x7FFF)>0x7C00 && !(x&0x200)) || ((y&0x7FFF)>0x7C00 && !(y&0x200)) || ((z&0x7FFF)>0x7C00 && !(z&0x200)));
445 #endif
446 return ((x&0x7FFF)>0x7C00) ? (x|0x200) : ((y&0x7FFF)>0x7C00) ? (y|0x200) : (z|0x200);
447 }
448
454 inline constexpr_NOERR unsigned int select(unsigned int x, unsigned int HALF_UNUSED_NOERR(y)) {
455 #if HALF_ERRHANDLING
456 return (((y&0x7FFF)>0x7C00) && !(y&0x200)) ? signal(y) : x;
457 #else
458 return x;
459 #endif
460 }
461
465 inline constexpr_NOERR unsigned int invalid() {
466 #if HALF_ERRHANDLING
468 #endif
469 return 0x7FFF;
470 }
471
476 inline constexpr_NOERR unsigned int pole(unsigned int sign = 0) {
477 #if HALF_ERRHANDLING
479 #endif
480 return sign | 0x7C00;
481 }
482
487 inline constexpr_NOERR unsigned int check_underflow(unsigned int arg) {
488 #if HALF_ERRHANDLING && !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT
489 raise(FE_UNDERFLOW, !(arg&0x7C00));
490 #endif
491 return arg;
492 }
493
497
503 template<std::float_round_style R> constexpr_NOERR unsigned int overflow(unsigned int sign = 0) {
504 #if HALF_ERRHANDLING
506 #endif
507 return (R==std::round_toward_infinity) ? (sign+0x7C00-(sign>>15)) :
508 (R==std::round_toward_neg_infinity) ? (sign+0x7BFF+(sign>>15)) :
509 (R==std::round_toward_zero) ? (sign|0x7BFF) :
510 (sign|0x7C00);
511 }
512
518 template<std::float_round_style R> constexpr_NOERR unsigned int underflow(unsigned int sign = 0) {
519 #if HALF_ERRHANDLING
521 #endif
522 return (R==std::round_toward_infinity) ? (sign+1-(sign>>15)) :
523 (R==std::round_toward_neg_infinity) ? (sign+(sign>>15)) :
524 sign;
525 }
526
537 template<std::float_round_style R,bool I> constexpr_NOERR unsigned int rounded(unsigned int value, int g, int s) {
538 #if HALF_ERRHANDLING
539 value += (R==std::round_to_nearest) ? (g&(s|value)) :
540 (R==std::round_toward_infinity) ? (~(value>>15)&(g|s)) :
541 (R==std::round_toward_neg_infinity) ? ((value>>15)&(g|s)) : 0;
542 if((value&0x7C00) == 0x7C00)
544 else if(value & 0x7C00)
545 raise(FE_INEXACT, I || (g|s)!=0);
546 else
548 return value;
549 #else
550 return (R==std::round_to_nearest) ? (value+(g&(s|value))) :
551 (R==std::round_toward_infinity) ? (value+(~(value>>15)&(g|s))) :
552 (R==std::round_toward_neg_infinity) ? (value+((value>>15)&(g|s))) :
553 value;
554 #endif
555 }
556
565 template<std::float_round_style R,bool E,bool I> unsigned int integral(unsigned int value) {
566 unsigned int abs = value & 0x7FFF;
567 if(abs < 0x3C00) {
568 raise(FE_INEXACT, I);
569 return ((R==std::round_to_nearest) ? (0x3C00&-static_cast<unsigned>(abs>=(0x3800+E))) :
570 (R==std::round_toward_infinity) ? (0x3C00&-(~(value>>15)&(abs!=0))) :
571 (R==std::round_toward_neg_infinity) ? (0x3C00&-static_cast<unsigned>(value>0x8000)) :
572 0) | (value&0x8000);
573 }
574 if(abs >= 0x6400)
575 return (abs>0x7C00) ? signal(value) : value;
576 unsigned int exp = 25 - (abs>>10), mask = (1<<exp) - 1;
577 raise(FE_INEXACT, I && (value&mask));
578 return (( (R==std::round_to_nearest) ? ((1<<(exp-1))-(~(value>>exp)&E)) :
579 (R==std::round_toward_infinity) ? (mask&((value>>15)-1)) :
580 (R==std::round_toward_neg_infinity) ? (mask&-(value>>15)) :
581 0) + value) & ~mask;
582 }
583
598 template<std::float_round_style R,unsigned int F,bool S,bool N,bool I> unsigned int fixed2half(uint32 m, int exp = 14, unsigned int sign = 0, int s = 0) {
599 if(S) {
600 uint32 msign = sign_mask(m);
601 m = (m^msign) - msign;
602 sign = msign & 0x8000;
603 }
604 if(N)
605 for(; m<(static_cast<uint32>(1)<<F) && exp; m<<=1,--exp) ;
606 else if(exp < 0)
607 return rounded<R,I>(sign+(m>>(F-10-exp)), (m>>(F-11-exp))&1, s|((m&((static_cast<uint32>(1)<<(F-11-exp))-1))!=0));
608 return rounded<R,I>(sign+(exp<<10)+(m>>(F-10)), (m>>(F-11))&1, s|((m&((static_cast<uint32>(1)<<(F-11))-1))!=0));
609 }
610
619 template<std::float_round_style R> unsigned int float2half_impl(float value, true_type) {
620 #if HALF_ENABLE_F16C_INTRINSICS
621 return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(value),
622 (R==std::round_to_nearest) ? _MM_FROUND_TO_NEAREST_INT :
623 (R==std::round_toward_zero) ? _MM_FROUND_TO_ZERO :
624 (R==std::round_toward_infinity) ? _MM_FROUND_TO_POS_INF :
625 (R==std::round_toward_neg_infinity) ? _MM_FROUND_TO_NEG_INF :
626 _MM_FROUND_CUR_DIRECTION));
627 #else
628 bits_t<float> fbits;
629 std::memcpy(&fbits, &value, sizeof(float));
630 #if 1
631 unsigned int sign = (fbits>>16) & 0x8000;
632 fbits &= 0x7FFFFFFF;
633 if(fbits >= 0x7F800000)
634 return sign | 0x7C00 | ((fbits>0x7F800000) ? (0x200|((fbits>>13)&0x3FF)) : 0);
635 if(fbits >= 0x47800000)
636 return overflow<R>(sign);
637 if(fbits >= 0x38800000)
638 return rounded<R,false>(sign|(((fbits>>23)-112)<<10)|((fbits>>13)&0x3FF), (fbits>>12)&1, (fbits&0xFFF)!=0);
639 if(fbits >= 0x33000000)
640 {
641 int i = 125 - (fbits>>23);
642 fbits = (fbits&0x7FFFFF) | 0x800000;
643 return rounded<R,false>(sign|(fbits>>(i+1)), (fbits>>i)&1, (fbits&((static_cast<uint32>(1)<<i)-1))!=0);
644 }
645 if(fbits != 0)
646 return underflow<R>(sign);
647 return sign;
648 #else
649 static const uint16 base_table[512] = {
650 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
651 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
652 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
653 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
654 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
655 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
656 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100,
657 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00,
658 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7BFF,
659 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF,
660 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF,
661 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF,
662 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF,
663 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF,
664 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF,
665 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7C00,
666 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
667 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
668 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
669 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
670 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
671 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
672 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100,
673 0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00,
674 0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFBFF,
675 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF,
676 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF,
677 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF,
678 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF,
679 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF,
680 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF,
681 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFC00 };
682 static const unsigned char shift_table[256] = {
683 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
684 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
685 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
686 25, 25, 25, 25, 25, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
687 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
688 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
689 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
690 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13 };
691 int sexp = fbits >> 23, exp = sexp & 0xFF, i = shift_table[exp];
692 fbits &= 0x7FFFFF;
693 uint32 m = (fbits|((exp!=0)<<23)) & -static_cast<uint32>(exp!=0xFF);
694 return rounded<R,false>(base_table[sexp]+(fbits>>i), (m>>(i-1))&1, (((static_cast<uint32>(1)<<(i-1))-1)&m)!=0);
695 #endif
696 #endif
697 }
698
706 template<std::float_round_style R> unsigned int float2half_impl(double value, true_type) {
707 #if HALF_ENABLE_F16C_INTRINSICS
708 if(R == std::round_indeterminate)
709 return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_cvtpd_ps(_mm_set_sd(value)), _MM_FROUND_CUR_DIRECTION));
710 #endif
711 bits_t<double> dbits;
712 std::memcpy(&dbits, &value, sizeof(double));
713 uint32 hi = dbits >> 32, lo = dbits & 0xFFFFFFFF;
714 unsigned int sign = (hi>>16) & 0x8000;
715 hi &= 0x7FFFFFFF;
716 if(hi >= 0x7FF00000)
717 return sign | 0x7C00 | ((dbits&0xFFFFFFFFFFFFF) ? (0x200|((hi>>10)&0x3FF)) : 0);
718 if(hi >= 0x40F00000)
719 return overflow<R>(sign);
720 if(hi >= 0x3F100000)
721 return rounded<R,false>(sign|(((hi>>20)-1008)<<10)|((hi>>10)&0x3FF), (hi>>9)&1, ((hi&0x1FF)|lo)!=0);
722 if(hi >= 0x3E600000) {
723 int i = 1018 - (hi>>20);
724 hi = (hi&0xFFFFF) | 0x100000;
725 return rounded<R,false>(sign|(hi>>(i+1)), (hi>>i)&1, ((hi&((static_cast<uint32>(1)<<i)-1))|lo)!=0);
726 }
727 if((hi|lo) != 0)
728 return underflow<R>(sign);
729 return sign;
730 }
731
740 template<std::float_round_style R,class T> unsigned int float2half_impl(T value, ...) {
741 unsigned int hbits = static_cast<unsigned>(builtin_signbit(value)) << 15;
742 if(value == T())
743 return hbits;
744 if(builtin_isnan(value))
745 return hbits | 0x7FFF;
746 if(builtin_isinf(value))
747 return hbits | 0x7C00;
748 int exp;
749 std::frexp(value, &exp);
750 if(exp > 16)
751 return overflow<R>(hbits);
752 if(exp < -13)
753 value = std::ldexp(value, 25);
754 else {
755 value = std::ldexp(value, 12-exp);
756 hbits |= ((exp+13)<<10);
757 }
758 T ival, frac = std::modf(value, &ival);
759 int m = std::abs(static_cast<int>(ival));
760 return rounded<R,false>(hbits+(m>>1), m&1, frac!=T());
761 }
762
771 template<std::float_round_style R,class T> unsigned int float2half(T value) {
772 return float2half_impl<R>(value, bool_type<std::numeric_limits<T>::is_iec559&&sizeof(bits_t<T>)==sizeof(T)>());
773 }
774 template<class T> unsigned int float2half(T value) {
775 return float2half_impl<(std::float_round_style)(HALF_ROUND_STYLE)>(value, bool_type<std::numeric_limits<T>::is_iec559&&sizeof(bits_t<T>)==sizeof(T)>());
776 }
777
785 template<std::float_round_style R,class T> unsigned int int2half(T value) {
786 unsigned int bits = static_cast<unsigned>(value<0) << 15;
787 if(!value)
788 return bits;
789 if(bits)
790 value = -value;
791 if(value > 0xFFFF)
792 return overflow<R>(bits);
793 unsigned int m = static_cast<unsigned int>(value), exp = 24;
794 for(; m<0x400; m<<=1,--exp) ;
795 for(; m>0x7FF; m>>=1,++exp) ;
796 bits |= (exp<<10) + m;
797 return (exp>24) ? rounded<R,false>(bits, (value>>(exp-25))&1, (((1<<(exp-25))-1)&value)!=0) : bits;
798 }
799
804 inline float half2float_impl(unsigned int value, float, true_type) {
805 #if HALF_ENABLE_F16C_INTRINSICS
806 return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(value)));
807 #else
808 #if 0
809 bits_t<float> fbits = static_cast<bits_t<float>>(value&0x8000) << 16;
810 int abs = value & 0x7FFF;
811 if(abs)
812 {
813 fbits |= 0x38000000 << static_cast<unsigned>(abs>=0x7C00);
814 for(; abs<0x400; abs<<=1,fbits-=0x800000) ;
815 fbits += static_cast<bits_t<float>>(abs) << 13;
816 }
817 #else
818 static const bits_t<float> mantissa_table[2048] = {
819 0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000,
820 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000,
821 0x36000000, 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000,
822 0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, 0x36700000, 0x36740000, 0x36780000, 0x367C0000,
823 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, 0x369C0000, 0x369E0000,
824 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000,
825 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000,
826 0x36E00000, 0x36E20000, 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000,
827 0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000,
828 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000,
829 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000,
830 0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000,
831 0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000,
832 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, 0x375E0000, 0x375F0000,
833 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000,
834 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000,
835 0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000,
836 0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000,
837 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000,
838 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000,
839 0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000,
840 0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000,
841 0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, 0x37B70000, 0x37B78000,
842 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000,
843 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000,
844 0x37C80000, 0x37C88000, 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000,
845 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000,
846 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000,
847 0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000,
848 0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000,
849 0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000,
850 0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, 0x37FF0000, 0x37FF8000,
851 0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000,
852 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000,
853 0x38080000, 0x38084000, 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000,
854 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000,
855 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, 0x3813C000,
856 0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000,
857 0x38180000, 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000,
858 0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000,
859 0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, 0x38238000, 0x3823C000,
860 0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000,
861 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000,
862 0x382C0000, 0x382C4000, 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000,
863 0x38300000, 0x38304000, 0x38308000, 0x3830C000, 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000,
864 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, 0x3837C000,
865 0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000,
866 0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000,
867 0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, 0x38430000, 0x38434000, 0x38438000, 0x3843C000,
868 0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, 0x38478000, 0x3847C000,
869 0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000,
870 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000,
871 0x38500000, 0x38504000, 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000,
872 0x38540000, 0x38544000, 0x38548000, 0x3854C000, 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000,
873 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000,
874 0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000,
875 0x38600000, 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000,
876 0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, 0x38670000, 0x38674000, 0x38678000, 0x3867C000,
877 0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, 0x386B8000, 0x386BC000,
878 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000,
879 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000,
880 0x38740000, 0x38744000, 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000,
881 0x38780000, 0x38784000, 0x38788000, 0x3878C000, 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000,
882 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000,
883 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000,
884 0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000,
885 0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000,
886 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, 0x3807C000, 0x3807E000,
887 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000,
888 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000,
889 0x380C0000, 0x380C2000, 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000,
890 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000,
891 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000,
892 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000,
893 0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000,
894 0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000,
895 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, 0x3819C000, 0x3819E000,
896 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000,
897 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000,
898 0x381E0000, 0x381E2000, 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000,
899 0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000,
900 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000,
901 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000,
902 0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000,
903 0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000,
904 0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, 0x382BC000, 0x382BE000,
905 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000,
906 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000,
907 0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000,
908 0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000,
909 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000,
910 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000,
911 0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000,
912 0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000,
913 0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, 0x383DC000, 0x383DE000,
914 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000,
915 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000,
916 0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000,
917 0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000,
918 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000,
919 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000,
920 0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000,
921 0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000,
922 0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, 0x384FC000, 0x384FE000,
923 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000,
924 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000,
925 0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000,
926 0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000,
927 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000,
928 0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000,
929 0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000,
930 0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000,
931 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, 0x3861C000, 0x3861E000,
932 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000,
933 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000,
934 0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000,
935 0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000,
936 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000,
937 0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000,
938 0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000,
939 0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000,
940 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, 0x3873C000, 0x3873E000,
941 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000,
942 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000,
943 0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000,
944 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000,
945 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000,
946 0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000 };
947 static const bits_t<float> exponent_table[64] = {
948 0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000,
949 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000,
950 0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000,
951 0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000 };
952 static const unsigned short offset_table[64] = {
953 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
954 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 };
955 bits_t<float> fbits = mantissa_table[offset_table[value>>10]+(value&0x3FF)] + exponent_table[value>>10];
956 #endif
957 float out;
958 std::memcpy(&out, &fbits, sizeof(float));
959 return out;
960 #endif
961 }
962
966 inline double half2float_impl(unsigned int value, double, true_type) {
967 #if HALF_ENABLE_F16C_INTRINSICS
968 return _mm_cvtsd_f64(_mm_cvtps_pd(_mm_cvtph_ps(_mm_cvtsi32_si128(value))));
969 #else
970 uint32 hi = static_cast<uint32>(value&0x8000) << 16;
971 unsigned int abs = value & 0x7FFF;
972 if(abs) {
973 hi |= 0x3F000000 << static_cast<unsigned>(abs>=0x7C00);
974 for(; abs<0x400; abs<<=1,hi-=0x100000) ;
975 hi += static_cast<uint32>(abs) << 10;
976 }
977 bits_t<double> dbits = static_cast<bits_t<double>>(hi) << 32;
978 double out;
979 std::memcpy(&out, &dbits, sizeof(double));
980 return out;
981 #endif
982 }
983
988 template<class T> T half2float_impl(unsigned int value, T, ...) {
989 T out;
990 unsigned int abs = value & 0x7FFF;
991 if(abs > 0x7C00)
992 out = (std::numeric_limits<T>::has_signaling_NaN && !(abs&0x200)) ? std::numeric_limits<T>::signaling_NaN() :
993 std::numeric_limits<T>::has_quiet_NaN ? std::numeric_limits<T>::quiet_NaN() : T();
994 else if(abs == 0x7C00)
995 out = std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : std::numeric_limits<T>::max();
996 else if(abs > 0x3FF)
997 out = std::ldexp(static_cast<T>((abs&0x3FF)|0x400), (abs>>10)-25);
998 else
999 out = std::ldexp(static_cast<T>(abs), -24);
1000 return (value&0x8000) ? -out : out;
1001 }
1002
1007 template<class T> T half2float(unsigned int value) {
1008 return half2float_impl(value, T(), bool_type<std::numeric_limits<T>::is_iec559&&sizeof(bits_t<T>)==sizeof(T)>());
1009 }
1010
1020 template<std::float_round_style R,bool E,bool I,class T> T half2int(unsigned int value) {
1021 unsigned int abs = value & 0x7FFF;
1022 if(abs >= 0x7C00) {
1024 return (value&0x8000) ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
1025 }
1026 if(abs < 0x3800) {
1027 raise(FE_INEXACT, I);
1028 return (R==std::round_toward_infinity) ? T(~(value>>15)&(abs!=0)) :
1029 (R==std::round_toward_neg_infinity) ? -T(value>0x8000) :
1030 T();
1031 }
1032 int exp = 25 - (abs>>10);
1033 unsigned int m = (value&0x3FF) | 0x400;
1034 int32 i = static_cast<int32>((exp<=0) ? (m<<-exp) : ((m+(
1035 (R==std::round_to_nearest) ? ((1<<(exp-1))-(~(m>>exp)&E)) :
1036 (R==std::round_toward_infinity) ? (((1<<exp)-1)&((value>>15)-1)) :
1037 (R==std::round_toward_neg_infinity) ? (((1<<exp)-1)&-(value>>15)) : 0))>>exp));
1038 if((!std::numeric_limits<T>::is_signed && (value&0x8000)) || (std::numeric_limits<T>::digits<16 &&
1039 ((value&0x8000) ? (-i<std::numeric_limits<T>::min()) : (i>std::numeric_limits<T>::max()))))
1041 else if(I && exp > 0 && (m&((1<<exp)-1)))
1043 return static_cast<T>((value&0x8000) ? -i : i);
1044 }
1045
1049
1055 template<std::float_round_style R> uint32 mulhi(uint32 x, uint32 y) {
1056 uint32 xy = (x>>16) * (y&0xFFFF), yx = (x&0xFFFF) * (y>>16), c = (xy&0xFFFF) + (yx&0xFFFF) + (((x&0xFFFF)*(y&0xFFFF))>>16);
1057 return (x>>16)*(y>>16) + (xy>>16) + (yx>>16) + (c>>16) +
1058 ((R==std::round_to_nearest) ? ((c>>15)&1) : (R==std::round_toward_infinity) ? ((c&0xFFFF)!=0) : 0);
1059 }
1060
1066 return static_cast<uint32>((static_cast<unsigned long long>(x)*static_cast<unsigned long long>(y)+0x80000000)>>32);
1067 }
1068
1074 inline uint32 divide64(uint32 x, uint32 y, int &s) {
1075 unsigned long long xx = static_cast<unsigned long long>(x) << 32;
1076 return s = (xx%y!=0), static_cast<uint32>(xx/y);
1077 }
1078
1086 template<bool Q,bool R> unsigned int mod(unsigned int x, unsigned int y, int *quo = NULL) {
1087 unsigned int q = 0;
1088 if(x > y) {
1089 int absx = x, absy = y, expx = 0, expy = 0;
1090 for(; absx<0x400; absx<<=1,--expx) ;
1091 for(; absy<0x400; absy<<=1,--expy) ;
1092 expx += absx >> 10;
1093 expy += absy >> 10;
1094 int mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400;
1095 for(int d=expx-expy; d; --d) {
1096 if(!Q && mx == my)
1097 return 0;
1098 if(mx >= my) {
1099 mx -= my;
1100 q += Q;
1101 }
1102 mx <<= 1;
1103 q <<= static_cast<int>(Q);
1104 }
1105 if(!Q && mx == my)
1106 return 0;
1107 if(mx >= my) {
1108 mx -= my;
1109 ++q;
1110 }
1111 if(Q) {
1112 q &= (1<<(std::numeric_limits<int>::digits-1)) - 1;
1113 if(!mx)
1114 return *quo = q, 0;
1115 }
1116 for(; mx<0x400; mx<<=1,--expy) ;
1117 x = (expy>0) ? ((expy<<10)|(mx&0x3FF)) : (mx>>(1-expy));
1118 }
1119 if(R) {
1120 unsigned int a, b;
1121 if(y < 0x800) {
1122 a = (x<0x400) ? (x<<1) : (x+0x400);
1123 b = y;
1124 } else {
1125 a = x;
1126 b = y - 0x400;
1127 }
1128 if(a > b || (a == b && (q&1))) {
1129 int exp = (y>>10) + (y<=0x3FF), d = exp - (x>>10) - (x<=0x3FF);
1130 int m = (((y&0x3FF)|((y>0x3FF)<<10))<<1) - (((x&0x3FF)|((x>0x3FF)<<10))<<(1-d));
1131 for(; m<0x800 && exp>1; m<<=1,--exp) ;
1132 x = 0x8000 + ((exp-1)<<10) + (m>>1);
1133 q += Q;
1134 }
1135 }
1136 if(Q)
1137 *quo = q;
1138 return x;
1139 }
1140
1146 template<unsigned int F> uint32 sqrt(uint32 &r, int &exp) {
1147 int i = exp & 1;
1148 r <<= i;
1149 exp = (exp-i) / 2;
1150 uint32 m = 0;
1151 for(uint32 bit=static_cast<uint32>(1)<<F; bit; bit>>=2) {
1152 if(r < m+bit)
1153 m >>= 1;
1154 else {
1155 r -= m + bit;
1156 m = (m>>1) + bit;
1157 }
1158 }
1159 return m;
1160 }
1161
1167 inline uint32 exp2(uint32 m, unsigned int n = 32) {
1168 static const uint32 logs[] = {
1169 0x80000000, 0x4AE00D1D, 0x2934F098, 0x15C01A3A, 0x0B31FB7D, 0x05AEB4DD, 0x02DCF2D1, 0x016FE50B,
1170 0x00B84E23, 0x005C3E10, 0x002E24CA, 0x001713D6, 0x000B8A47, 0x0005C53B, 0x0002E2A3, 0x00017153,
1171 0x0000B8AA, 0x00005C55, 0x00002E2B, 0x00001715, 0x00000B8B, 0x000005C5, 0x000002E3, 0x00000171,
1172 0x000000B9, 0x0000005C, 0x0000002E, 0x00000017, 0x0000000C, 0x00000006, 0x00000003, 0x00000001 };
1173 if(!m)
1174 return 0x80000000;
1175 uint32 mx = 0x80000000, my = 0;
1176 for(unsigned int i=1; i<n; ++i) {
1177 uint32 mz = my + logs[i];
1178 if(mz <= m) {
1179 my = mz;
1180 mx += mx >> i;
1181 }
1182 }
1183 return mx;
1184 }
1185
1191 inline uint32 log2(uint32 m, unsigned int n = 32) {
1192 static const uint32 logs[] = {
1193 0x80000000, 0x4AE00D1D, 0x2934F098, 0x15C01A3A, 0x0B31FB7D, 0x05AEB4DD, 0x02DCF2D1, 0x016FE50B,
1194 0x00B84E23, 0x005C3E10, 0x002E24CA, 0x001713D6, 0x000B8A47, 0x0005C53B, 0x0002E2A3, 0x00017153,
1195 0x0000B8AA, 0x00005C55, 0x00002E2B, 0x00001715, 0x00000B8B, 0x000005C5, 0x000002E3, 0x00000171,
1196 0x000000B9, 0x0000005C, 0x0000002E, 0x00000017, 0x0000000C, 0x00000006, 0x00000003, 0x00000001 };
1197 if(m == 0x40000000)
1198 return 0;
1199 uint32 mx = 0x40000000, my = 0;
1200 for(unsigned int i=1; i<n; ++i) {
1201 uint32 mz = mx + (mx>>i);
1202 if(mz <= m) {
1203 mx = mz;
1204 my += logs[i];
1205 }
1206 }
1207 return my;
1208 }
1209
1215 inline std::pair<uint32,uint32> sincos(uint32 mz, unsigned int n = 31) {
1216 static const uint32 angles[] = {
1217 0x3243F6A9, 0x1DAC6705, 0x0FADBAFD, 0x07F56EA7, 0x03FEAB77, 0x01FFD55C, 0x00FFFAAB, 0x007FFF55,
1218 0x003FFFEB, 0x001FFFFD, 0x00100000, 0x00080000, 0x00040000, 0x00020000, 0x00010000, 0x00008000,
1219 0x00004000, 0x00002000, 0x00001000, 0x00000800, 0x00000400, 0x00000200, 0x00000100, 0x00000080,
1220 0x00000040, 0x00000020, 0x00000010, 0x00000008, 0x00000004, 0x00000002, 0x00000001 };
1221 uint32 mx = 0x26DD3B6A, my = 0;
1222 for(unsigned int i=0; i<n; ++i) {
1223 uint32 sign = sign_mask(mz);
1224 uint32 tx = mx - (arithmetic_shift(my, i)^sign) + sign;
1225 uint32 ty = my + (arithmetic_shift(mx, i)^sign) - sign;
1226 mx = tx; my = ty; mz -= (angles[i]^sign) - sign;
1227 }
1228 return std::make_pair(my, mx);
1229 }
1230
1237 inline uint32 atan2(uint32 my, uint32 mx, unsigned int n = 31) {
1238 static const uint32 angles[] = {
1239 0x3243F6A9, 0x1DAC6705, 0x0FADBAFD, 0x07F56EA7, 0x03FEAB77, 0x01FFD55C, 0x00FFFAAB, 0x007FFF55,
1240 0x003FFFEB, 0x001FFFFD, 0x00100000, 0x00080000, 0x00040000, 0x00020000, 0x00010000, 0x00008000,
1241 0x00004000, 0x00002000, 0x00001000, 0x00000800, 0x00000400, 0x00000200, 0x00000100, 0x00000080,
1242 0x00000040, 0x00000020, 0x00000010, 0x00000008, 0x00000004, 0x00000002, 0x00000001 };
1243 uint32 mz = 0;
1244 for(unsigned int i=0; i<n; ++i) {
1245 uint32 sign = sign_mask(my);
1246 uint32 tx = mx + (arithmetic_shift(my, i)^sign) - sign;
1247 uint32 ty = my - (arithmetic_shift(mx, i)^sign) + sign;
1248 mx = tx; my = ty; mz += (angles[i]^sign) - sign;
1249 }
1250 return mz;
1251 }
1252
1257 inline uint32 angle_arg(unsigned int abs, int &k) {
1258 uint32 m = (abs&0x3FF) | ((abs>0x3FF)<<10);
1259 int exp = (abs>>10) + (abs<=0x3FF) - 15;
1260 if(abs < 0x3A48)
1261 return k = 0, m << (exp+20);
1262 unsigned long long y = m * 0xA2F9836E4E442, mask = (1ULL<<(62-exp)) - 1, yi = (y+(mask>>1)) & ~mask, f = y - yi;
1263 uint32 sign = -static_cast<uint32>(f>>63);
1264 k = static_cast<int>(yi>>(62-exp));
1265 return (multiply64(static_cast<uint32>((sign ? -f : f)>>(31-exp)), 0xC90FDAA2)^sign) - sign;
1266 }
1267
1271 inline std::pair<uint32,uint32> atan2_args(unsigned int abs) {
1272 int exp = -15;
1273 for(; abs<0x400; abs<<=1,--exp) ;
1274 exp += abs >> 10;
1275 uint32 my = ((abs&0x3FF)|0x400) << 5, r = my * my;
1276 int rexp = 2 * exp;
1277 r = 0x40000000 - ((rexp>-31) ? ((r>>-rexp)|((r&((static_cast<uint32>(1)<<-rexp)-1))!=0)) : 1);
1278 for(rexp=0; r<0x40000000; r<<=1,--rexp) ;
1279 uint32 mx = sqrt<30>(r, rexp);
1280 int d = exp - rexp;
1281 if(d < 0)
1282 return std::make_pair((d<-14) ? ((my>>(-d-14))+((my>>(-d-15))&1)) : (my<<(14+d)), (mx<<14)+(r<<13)/mx);
1283 if(d > 0)
1284 return std::make_pair(my<<14, (d>14) ? ((mx>>(d-14))+((mx>>(d-15))&1)) : ((d==14) ? mx : ((mx<<(14-d))+(r<<(13-d))/mx)));
1285 return std::make_pair(my<<13, (mx<<13)+(r<<12)/mx);
1286 }
1287
1293 inline std::pair<uint32,uint32> hyperbolic_args(unsigned int abs, int &exp, unsigned int n = 32) {
1294 uint32 mx = detail::multiply64(static_cast<uint32>((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29), my;
1295 int e = (abs>>10) + (abs<=0x3FF);
1296 if(e < 14) {
1297 exp = 0;
1298 mx >>= 14 - e;
1299 } else {
1300 exp = mx >> (45-e);
1301 mx = (mx<<(e-14)) & 0x7FFFFFFF;
1302 }
1303 mx = exp2(mx, n);
1304 int d = exp << 1, s;
1305 if(mx > 0x80000000) {
1306 my = divide64(0x80000000, mx, s);
1307 my |= s;
1308 ++d;
1309 } else
1310 my = mx;
1311 return std::make_pair(mx, (d<31) ? ((my>>d)|((my&((static_cast<uint32>(1)<<d)-1))!=0)) : 1);
1312 }
1313
1325 template<std::float_round_style R,bool I> unsigned int exp2_post(uint32 m, int exp, bool esign, unsigned int sign = 0) {
1326 int s = 0;
1327 if(esign) {
1328 if(m > 0x80000000) {
1329 m = divide64(0x80000000, m, s);
1330 ++exp;
1331 }
1332 if(exp > 25)
1333 return underflow<R>(sign);
1334 else if(exp == 25)
1335 return rounded<R,I>(sign, 1, (m&0x7FFFFFFF)!=0);
1336 exp = -exp;
1337 } else if(exp > 15)
1338 return overflow<R>(sign);
1339 return fixed2half<R,31,false,false,I>(m, exp+14, sign, s);
1340 }
1341
1353 template<std::float_round_style R,uint32 L> unsigned int log2_post(uint32 m, int ilog, int exp, unsigned int sign = 0) {
1354 uint32 msign = sign_mask(ilog);
1355 m = (((static_cast<uint32>(ilog)<<27)+(m>>4))^msign) - msign;
1356 if(!m)
1357 return 0;
1358 for(; m<0x80000000; m<<=1,--exp) ;
1359 int i = m >= L, s;
1360 exp += i;
1361 m >>= 1 + i;
1362 sign ^= msign & 0x8000;
1363 if(exp < -11)
1364 return underflow<R>(sign);
1365 m = divide64(m, L, s);
1366 return fixed2half<R,30,false,false,true>(m, exp, sign, 1);
1367 }
1368
1377 template<std::float_round_style R> unsigned int hypot_post(uint32 r, int exp) {
1378 int i = r >> 31;
1379 if((exp+=i) > 46)
1380 return overflow<R>();
1381 if(exp < -34)
1382 return underflow<R>();
1383 r = (r>>i) | (r&i);
1384 uint32 m = sqrt<30>(r, exp+=15);
1385 return fixed2half<R,15,false,false,false>(m, exp-1, 0, r!=0);
1386 }
1387
1398 template<std::float_round_style R> unsigned int tangent_post(uint32 my, uint32 mx, int exp, unsigned int sign = 0) {
1399 int i = my >= mx, s;
1400 exp += i;
1401 if(exp > 29)
1402 return overflow<R>(sign);
1403 if(exp < -11)
1404 return underflow<R>(sign);
1405 uint32 m = divide64(my>>(i+1), mx, s);
1406 return fixed2half<R,30,false,false,true>(m, exp, sign, s);
1407 }
1408
1418 template<std::float_round_style R,bool S> unsigned int area(unsigned int arg) {
1419 int abs = arg & 0x7FFF, expx = (abs>>10) + (abs<=0x3FF) - 15, expy = -15, ilog, i;
1420 uint32 mx = static_cast<uint32>((abs&0x3FF)|((abs>0x3FF)<<10)) << 20, my, r;
1421 for(; abs<0x400; abs<<=1,--expy) ;
1422 expy += abs >> 10;
1423 r = ((abs&0x3FF)|0x400) << 5;
1424 r *= r;
1425 i = r >> 31;
1426 expy = 2*expy + i;
1427 r >>= i;
1428 if(S) {
1429 if(expy < 0) {
1430 r = 0x40000000 + ((expy>-30) ? ((r>>-expy)|((r&((static_cast<uint32>(1)<<-expy)-1))!=0)) : 1);
1431 expy = 0;
1432 } else {
1433 r += 0x40000000 >> expy;
1434 i = r >> 31;
1435 r = (r>>i) | (r&i);
1436 expy += i;
1437 }
1438 } else {
1439 r -= 0x40000000 >> expy;
1440 for(; r<0x40000000; r<<=1,--expy) ;
1441 }
1442 my = sqrt<30>(r, expy);
1443 my = (my<<15) + (r<<14)/my;
1444 if(S) {
1445 mx >>= expy - expx;
1446 ilog = expy;
1447 } else {
1448 my >>= expx - expy;
1449 ilog = expx;
1450 }
1451 my += mx;
1452 i = my >> 31;
1453 static const int G = S && (R==std::round_to_nearest);
1454 return log2_post<R,0xB8AA3B2A>(log2(my>>i, 26+S+G)+(G<<3), ilog+i, 17, arg&(static_cast<unsigned>(S)<<15));
1455 }
1456
1458 struct f31 {
1462 constexpr f31(uint32 mant, int e) : m(mant), exp(e) {}
1463
1466 f31(unsigned int abs) : exp(-15) {
1467 for(; abs<0x400; abs<<=1,--exp) ;
1468 m = static_cast<uint32>((abs&0x3FF)|0x400) << 21;
1469 exp += (abs>>10);
1470 }
1471
1476 friend f31 operator+(f31 a, f31 b) {
1477 if(b.exp > a.exp)
1478 std::swap(a, b);
1479 int d = a.exp - b.exp;
1480 uint32 m = a.m + ((d<32) ? (b.m>>d) : 0);
1481 int i = (m&0xFFFFFFFF) < a.m;
1482 return f31(((m+i)>>i)|0x80000000, a.exp+i);
1483 }
1484
1489 friend f31 operator-(f31 a, f31 b) {
1490 int d = a.exp - b.exp, exp = a.exp;
1491 uint32 m = a.m - ((d<32) ? (b.m>>d) : 0);
1492 if(!m)
1493 return f31(0, -32);
1494 for(; m<0x80000000; m<<=1,--exp) ;
1495 return f31(m, exp);
1496 }
1497
1502 friend f31 operator*(f31 a, f31 b) {
1503 uint32 m = multiply64(a.m, b.m);
1504 int i = m >> 31;
1505 return f31(m<<(1-i), a.exp + b.exp + i);
1506 }
1507
1512 friend f31 operator/(f31 a, f31 b) {
1513 int i = a.m >= b.m, s;
1514 uint32 m = divide64((a.m+i)>>i, b.m, s);
1515 return f31(m, a.exp - b.exp + i - 1);
1516 }
1517
1519 int exp;
1520 };
1521
1532 template<std::float_round_style R,bool C> unsigned int erf(unsigned int arg) {
1533 unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
1534 f31 x(abs), x2 = x * x * f31(0xB8AA3B29, 0), t = f31(0x80000000, 0) / (f31(0x80000000, 0)+f31(0xA7BA054A, -2)*x), t2 = t * t;
1535 f31 e = ((f31(0x87DC2213, 0)*t2+f31(0xB5F0E2AE, 0))*t2+f31(0x82790637, -2)-(f31(0xBA00E2B8, 0)*t2+f31(0x91A98E62, -2))*t) * t /
1536 ((x2.exp<0) ? f31(exp2((x2.exp>-32) ? (x2.m>>-x2.exp) : 0, 30), 0) : f31(exp2((x2.m<<x2.exp)&0x7FFFFFFF, 22), x2.m>>(31-x2.exp)));
1537 return (!C || sign) ? fixed2half<R,31,false,true,true>(0x80000000-(e.m>>(C-e.exp)), 14+C, sign&(C-1U)) :
1538 (e.exp<-25) ? underflow<R>() : fixed2half<R,30,false,false,true>(e.m>>1, e.exp+14, 0, e.m&1);
1539 }
1540
1550 template<std::float_round_style R,bool L> unsigned int gamma(unsigned int arg) {
1551/* static const double p[] ={ 2.50662827563479526904, 225.525584619175212544, -268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837, 0.0114684895434781459556 };
1552 double t = arg + 4.65, s = p[0];
1553 for(unsigned int i=0; i<5; ++i)
1554 s += p[i+1] / (arg+i);
1555 return std::log(s) + (arg-0.5)*std::log(t) - t;
1556*/ static const f31 pi(0xC90FDAA2, 1), lbe(0xB8AA3B29, 0);
1557 unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
1558 bool bsign = sign != 0;
1559 f31 z(abs), x = sign ? (z+f31(0x80000000, 0)) : z, t = x + f31(0x94CCCCCD, 2), s =
1560 f31(0xA06C9901, 1) + f31(0xBBE654E2, -7)/(x+f31(0x80000000, 2)) + f31(0xA1CE6098, 6)/(x+f31(0x80000000, 1))
1561 + f31(0xE1868CB7, 7)/x - f31(0x8625E279, 8)/(x+f31(0x80000000, 0)) - f31(0xA03E158F, 2)/(x+f31(0xC0000000, 1));
1562 int i = (s.exp>=2) + (s.exp>=4) + (s.exp>=8) + (s.exp>=16);
1563 s = f31((static_cast<uint32>(s.exp)<<(31-i))+(log2(s.m>>1, 28)>>i), i) / lbe;
1564 if(x.exp != -1 || x.m != 0x80000000) {
1565 i = (t.exp>=2) + (t.exp>=4) + (t.exp>=8);
1566 f31 l = f31((static_cast<uint32>(t.exp)<<(31-i))+(log2(t.m>>1, 30)>>i), i) / lbe;
1567 s = (x.exp<-1) ? (s-(f31(0x80000000, -1)-x)*l) : (s+(x-f31(0x80000000, -1))*l);
1568 }
1569 s = x.exp ? (s-t) : (t-s);
1570 if(bsign) {
1571 if(z.exp >= 0) {
1572 sign &= (L|((z.m>>(31-z.exp))&1)) - 1;
1573 for(z=f31((z.m<<(1+z.exp))&0xFFFFFFFF, -1); z.m<0x80000000; z.m<<=1,--z.exp) ;
1574 }
1575 if(z.exp == -1)
1576 z = f31(0x80000000, 0) - z;
1577 if(z.exp < -1) {
1578 z = z * pi;
1579 z.m = sincos(z.m>>(1-z.exp), 30).first;
1580 for(z.exp=1; z.m<0x80000000; z.m<<=1,--z.exp) ;
1581 }
1582 else
1583 z = f31(0x80000000, 0);
1584 } if(L) {
1585 if(bsign) {
1586 f31 l(0x92868247, 0);
1587 if(z.exp < 0) {
1588 uint32 m = log2((z.m+1)>>1, 27);
1589 z = f31(-((static_cast<uint32>(z.exp)<<26)+(m>>5)), 5);
1590 for(; z.m<0x80000000; z.m<<=1,--z.exp) ;
1591 l = l + z / lbe;
1592 }
1593 sign = static_cast<unsigned>(x.exp&&(l.exp<s.exp||(l.exp==s.exp&&l.m<s.m))) << 15;
1594 s = sign ? (s-l) : x.exp ? (l-s) : (l+s);
1595 } else {
1596 sign = static_cast<unsigned>(x.exp==0) << 15;
1597 if(s.exp < -24)
1598 return underflow<R>(sign);
1599 if(s.exp > 15)
1600 return overflow<R>(sign);
1601 }
1602 } else {
1603 s = s * lbe;
1604 uint32 m;
1605 if(s.exp < 0) {
1606 m = s.m >> -s.exp;
1607 s.exp = 0;
1608 } else {
1609 m = (s.m<<s.exp) & 0x7FFFFFFF;
1610 s.exp = (s.m>>(31-s.exp));
1611 }
1612 s.m = exp2(m, 27);
1613 if(!x.exp)
1614 s = f31(0x80000000, 0) / s;
1615 if(bsign) {
1616 if(z.exp < 0)
1617 s = s * z;
1618 s = pi / s;
1619 if(s.exp < -24)
1620 return underflow<R>(sign);
1621 } else if(z.exp > 0 && !(z.m&((1<<(31-z.exp))-1)))
1622 return ((s.exp+14)<<10) + (s.m>>21);
1623 if(s.exp > 15)
1624 return overflow<R>(sign);
1625 }
1626 return fixed2half<R,31,false,false,true>(s.m, s.exp+14, sign);
1627 }
1628
1629
1630 template<class,class,std::float_round_style> struct half_caster;
1631
1632 template <class T>
1633 concept arithmetic = std::integral<T> || std::floating_point<T>;
1634 }
1635
1653 class half {
1654 public:
1657
1661 constexpr half() noexcept : data_() {}
1662
1666 //explicit half(float rhs) : data_(static_cast<detail::uint16>(detail::float2half<round_style>(rhs))) {}
1667
1671 template<detail::arithmetic T>
1672 half(T rhs) : data_(static_cast<detail::uint16>(detail::float2half<round_style>(static_cast<float>(rhs)))) {}
1673
1676 operator float() const { return detail::half2float<float>(data_); }
1677
1682 half& operator=(const float &rhs) { data_ = static_cast<detail::uint16>(detail::float2half<round_style>(rhs)); return *this; }
1683
1684 template<detail::arithmetic T>
1685 half& operator=(const T &rhs) { return *this = static_cast<float>(rhs); }
1686
1690
1696 half& operator+=(half rhs) { return *this = *this + rhs; }
1697
1703 half& operator-=(half rhs) { return *this = *this - rhs; }
1704
1710 half& operator*=(half rhs) { return *this = *this * rhs; }
1711
1717 half& operator/=(half rhs) { return *this = *this / rhs; }
1718
1719 /*
1724 half& operator+=(float rhs) { return *this = *this + rhs; }
1725
1730 half& operator-=(float rhs) { return *this = *this - rhs; }
1731
1736 half& operator*=(float rhs) { return *this = *this * rhs; }
1737
1742 half& operator/=(float rhs) { return *this = *this / rhs; }
1743 */
1744
1748
1752 half& operator++() { return *this = *this + half(detail::binary, 0x3C00); }
1753
1757 half& operator--() { return *this = *this + half(detail::binary, 0xBC00); }
1758
1762 half operator++(int) { half out(*this); ++*this; return out; }
1763
1767 half operator--(int) { half out(*this); --*this; return out; }
1769 detail::uint16 get_data() const{ return data_; }
1770
1771 private:
1773 static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE);
1774
1777 constexpr half(detail::binary_t, unsigned int bits) noexcept : data_(static_cast<detail::uint16>(bits)) {}
1778
1780 detail::uint16 data_;
1781
1782 friend constexpr_NOERR bool operator==(half, half);
1783 template<detail::arithmetic T> friend constexpr_NOERR bool operator==(half, T);
1784 friend constexpr_NOERR std::partial_ordering operator<=>(half, half);
1785 template <detail::arithmetic T> friend constexpr_NOERR std::partial_ordering operator<=>(half, T);
1786 friend constexpr half operator+(half);
1787 friend constexpr half operator-(half);
1788 friend half operator+(half, half);
1789 template<class T> friend half operator+(half, T);
1790 template<class T> friend half operator+(T, half);
1791 friend half operator-(half, half);
1792 template<class T> friend half operator-(half, T);
1793 template<class T> friend half operator-(T, half);
1794 friend half operator*(half, half);
1795 template<class T> friend half operator*(half, T);
1796 template<class T> friend half operator*(T, half);
1797 friend half operator/(half, half);
1798 template<class T> friend half operator/(half, T);
1799 template<class T> friend half operator/(T, half);
1800 template<class charT,class traits> friend std::basic_ostream<charT,traits>& operator<<(std::basic_ostream<charT,traits>&, half);
1801 template<class charT,class traits> friend std::basic_istream<charT,traits>& operator>>(std::basic_istream<charT,traits>&, half&);
1802 friend constexpr half fabs(half);
1803 friend half fmod(half, half);
1804 friend half remainder(half, half);
1805 friend half remquo(half, half, int*);
1806 friend half fma(half, half, half);
1809 friend half fdim(half, half);
1810 friend half nanh(const char*);
1811 friend half exp(half);
1812 friend half exp2(half);
1813 friend half expm1(half);
1814 friend half log(half);
1815 friend half log10(half);
1816 friend half log2(half);
1817 friend half log1p(half);
1818 friend half sqrt(half);
1819 friend half cbrt(half);
1820 friend half hypot(half, half);
1821 friend half hypot(half, half, half);
1822 friend half pow(half, half);
1823 friend void sincos(half, half*, half*);
1824 friend half sin(half);
1825 friend half cos(half);
1826 friend half tan(half);
1827 friend half asin(half);
1828 friend half acos(half);
1829 friend half atan(half);
1830 friend half atan2(half, half);
1831 friend half sinh(half);
1832 friend half cosh(half);
1833 friend half tanh(half);
1834 friend half asinh(half);
1835 friend half acosh(half);
1836 friend half atanh(half);
1837 friend half erf(half);
1838 friend half erfc(half);
1839 friend half lgamma(half);
1840 friend half tgamma(half);
1841 friend half ceil(half);
1842 friend half floor(half);
1843 friend half trunc(half);
1844 friend half round(half);
1845 friend long lround(half);
1846 friend half rint(half);
1847 friend long lrint(half);
1848 friend half nearbyint(half);
1849 friend long long llround(half);
1850 friend long long llrint(half);
1851 friend half frexp(half, int*);
1852 friend half scalbln(half, long);
1853 friend half modf(half, half*);
1854 friend int ilogb(half);
1855 friend half logb(half);
1856 friend half nextafter(half, half);
1857 friend half nexttoward(half, long double);
1858 friend constexpr half copysign(half, half);
1859 friend constexpr int fpclassify(half);
1860 friend constexpr bool isfinite(half);
1861 friend constexpr bool isinf(half);
1862 friend constexpr bool isnan(half);
1863 friend constexpr bool isnormal(half);
1864 friend constexpr bool signbit(half);
1865 friend constexpr bool isgreater(half, half);
1866 friend constexpr bool isgreaterequal(half, half);
1867 friend constexpr bool isless(half, half);
1868 friend constexpr bool islessequal(half, half);
1869 friend constexpr bool islessgreater(half, half);
1870 template<class,class,std::float_round_style> friend struct detail::half_caster;
1871 friend class std::numeric_limits<half>;
1872 friend struct std::hash<half>;
1873 friend half literal::operator ""_h(long double);
1874 };
1875
1876 namespace literal {
1884 inline half operator ""_h(long double value) { return half(detail::binary, detail::float2half<half::round_style>(value)); }
1885 }
1886
1887 namespace detail {
1894 template<class T,class U,std::float_round_style R=(std::float_round_style)(HALF_ROUND_STYLE)> struct half_caster {};
1895 template<class U,std::float_round_style R> struct half_caster<half,U,R> {
1896 static_assert(std::is_arithmetic<U>::value, "half_cast from non-arithmetic type unsupported");
1897 static half cast(U arg) { return cast_impl(arg, is_float<U>()); };
1898 private:
1899 static half cast_impl(U arg, true_type) { return half(binary, float2half<R>(arg)); }
1900 static half cast_impl(U arg, false_type) { return half(binary, int2half<R>(arg)); }
1901 };
1902 template<class T,std::float_round_style R> struct half_caster<T,half,R> {
1903 static_assert(std::is_arithmetic<T>::value, "half_cast to non-arithmetic type unsupported");
1904 static T cast(half arg) { return cast_impl(arg, is_float<T>()); }
1905 private:
1906 static T cast_impl(half arg, true_type) { return half2float<T>(arg.data_); }
1907 static T cast_impl(half arg, false_type) { return half2int<R,true,true,T>(arg.data_); }
1908 };
1909 template<std::float_round_style R> struct half_caster<half,half,R> {
1910 static half cast(half arg) { return arg; }
1911 };
1912 }
1913}
1914
1916namespace std {
1919 template<> class numeric_limits<half_float::half> {
1920 public:
1922 static constexpr bool is_specialized = true;
1923
1925 static constexpr bool is_signed = true;
1926
1928 static constexpr bool is_integer = false;
1929
1931 static constexpr bool is_exact = false;
1932
1934 static constexpr bool is_modulo = false;
1935
1937 static constexpr bool is_bounded = true;
1938
1940 static constexpr bool is_iec559 = true;
1941
1943 static constexpr bool has_infinity = true;
1944
1946 static constexpr bool has_quiet_NaN = true;
1947
1949 static constexpr bool has_signaling_NaN = true;
1950
1951// if C++ version < 23
1952#if __cplusplus < 202300L
1954 static constexpr float_denorm_style has_denorm = denorm_present;
1955#endif
1956
1958 static constexpr bool has_denorm_loss = false;
1959
1960 #if HALF_ERRHANDLING_THROWS
1961 static constexpr bool traps = true;
1962 #else
1964 static constexpr bool traps = false;
1965 #endif
1966
1968 static constexpr bool tinyness_before = false;
1969
1971 static constexpr float_round_style round_style = half_float::half::round_style;
1972
1974 static constexpr int digits = 11;
1975
1977 static constexpr int digits10 = 3;
1978
1980 static constexpr int max_digits10 = 5;
1981
1983 static constexpr int radix = 2;
1984
1986 static constexpr int min_exponent = -13;
1987
1989 static constexpr int min_exponent10 = -4;
1990
1992 static constexpr int max_exponent = 16;
1993
1995 static constexpr int max_exponent10 = 4;
1996
1998 static constexpr half_float::half min() noexcept { return half_float::half(half_float::detail::binary, 0x0400); }
1999
2001 static constexpr half_float::half lowest() noexcept { return half_float::half(half_float::detail::binary, 0xFBFF); }
2002
2004 static constexpr half_float::half max() noexcept { return half_float::half(half_float::detail::binary, 0x7BFF); }
2005
2007 static constexpr half_float::half epsilon() noexcept { return half_float::half(half_float::detail::binary, 0x1400); }
2008
2010 static constexpr half_float::half round_error() noexcept
2011 { return half_float::half(half_float::detail::binary, (round_style==std::round_to_nearest) ? 0x3800 : 0x3C00); }
2012
2014 static constexpr half_float::half infinity() noexcept { return half_float::half(half_float::detail::binary, 0x7C00); }
2015
2017 static constexpr half_float::half quiet_NaN() noexcept { return half_float::half(half_float::detail::binary, 0x7FFF); }
2018
2020 static constexpr half_float::half signaling_NaN() noexcept { return half_float::half(half_float::detail::binary, 0x7DFF); }
2021
2023 static constexpr half_float::half denorm_min() noexcept { return half_float::half(half_float::detail::binary, 0x0001); }
2024 };
2025
2028 template<> struct hash<half_float::half> {
2031
2033 typedef size_t result_type;
2034
2038 result_type operator()(argument_type arg) const { return hash<half_float::detail::uint16>()(arg.data_&-static_cast<unsigned>(arg.data_!=0x8000)); }
2039 };
2040
2041#if defined(__cpp_lib_format)
2042 template <>
2043 struct formatter<half_float::half>
2044 {
2045 constexpr auto parse(std::format_parse_context& ctx)
2046 {
2047 return ctx.begin(); // Simple implementation
2048 }
2049
2050 auto format(const half_float::half& value, std::format_context& ctx) const
2051 {
2052 std::ostringstream oss;
2053 oss << value;
2054 return std::format_to(ctx.out(), "{}", oss.str());
2055 }
2056 };
2057#endif
2058}
2059
2060namespace half_float {
2064
2072 return !detail::compsignal(x.data_, y.data_) && (x.data_==y.data_ || !((x.data_|y.data_)&0x7FFF));
2073 }
2074 template<detail::arithmetic T>
2075 inline constexpr_NOERR bool operator==(half x, T y) { return x == static_cast<half>(y); }
2076
2077 inline constexpr_NOERR std::partial_ordering operator<=>(half x, half y)
2078 {
2079 auto x_data = (x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15);
2080 auto y_data = (y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15);
2081 if (x_data < y_data) return std::partial_ordering::less;
2082 if (x_data > y_data) return std::partial_ordering::greater;
2083 if (x_data == y_data) return std::partial_ordering::equivalent;
2084 return std::partial_ordering::unordered;
2085 }
2086
2087 template <detail::arithmetic T>
2088 inline constexpr_NOERR std::partial_ordering operator<=>(half x, T y)
2089 {
2090 return x <=> half(y);
2091 }
2092
2097
2101 inline constexpr half operator+(half arg) { return arg; }
2102
2106 inline constexpr half operator-(half arg) { return half(detail::binary, arg.data_^0x8000); }
2107
2115 inline half operator+(half x, half y) {
2116 #ifdef HALF_ARITHMETIC_TYPE
2118 #else
2119 int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF;
2120 bool sub = ((x.data_^y.data_)&0x8000) != 0;
2121 if(absx >= 0x7C00 || absy >= 0x7C00)
2122 return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : (absy!=0x7C00) ? x.data_ :
2123 (sub && absx==0x7C00) ? detail::invalid() : y.data_);
2124 if(!absx)
2125 return absy ? y : half(detail::binary, (half::round_style==std::round_toward_neg_infinity) ? (x.data_|y.data_) : (x.data_&y.data_));
2126 if(!absy)
2127 return x;
2128 unsigned int sign = ((sub && absy>absx) ? y.data_ : x.data_) & 0x8000;
2129 if(absy > absx)
2130 std::swap(absx, absy);
2131 int exp = (absx>>10) + (absx<=0x3FF), d = exp - (absy>>10) - (absy<=0x3FF), mx = ((absx&0x3FF)|((absx>0x3FF)<<10)) << 3, my;
2132 if(d < 13) {
2133 my = ((absy&0x3FF)|((absy>0x3FF)<<10)) << 3;
2134 my = (my>>d) | ((my&((1<<d)-1))!=0);
2135 } else
2136 my = 1;
2137 if(sub) {
2138 if(!(mx-=my))
2139 return half(detail::binary, static_cast<unsigned>(half::round_style==std::round_toward_neg_infinity)<<15);
2140 for(; mx<0x2000 && exp>1; mx<<=1,--exp) ;
2141 } else {
2142 mx += my;
2143 int i = mx >> 14;
2144 if((exp+=i) > 30)
2146 mx = (mx>>i) | (mx&i);
2147 }
2148 return half(detail::binary, detail::rounded<half::round_style,false>(sign+((exp-1)<<10)+(mx>>3), (mx>>2)&1, (mx&0x3)!=0));
2149 #endif
2150 }
2151 template<class T>
2152 inline half operator+(half x, T y) { return x + static_cast<half>(y); }
2153 template<class T>
2154 inline half operator+(T x, half y) { return static_cast<half>(x) + y; }
2155
2163 inline half operator-(half x, half y) {
2164 #ifdef HALF_ARITHMETIC_TYPE
2166 #else
2167 return x + (-y);
2168 #endif
2169 }
2170 template<class T>
2171 inline half operator-(half x, T y) { return x - static_cast<half>(y); }
2172 template<class T>
2173 inline half operator-(T x, half y) { return static_cast<half>(x) - y; }
2174
2182 inline half operator*(half x, half y) {
2183 #ifdef HALF_ARITHMETIC_TYPE
2185 #else
2186 int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = -16;
2187 unsigned int sign = (x.data_^y.data_) & 0x8000;
2188 if(absx >= 0x7C00 || absy >= 0x7C00)
2189 return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
2190 ((absx==0x7C00 && !absy)||(absy==0x7C00 && !absx)) ? detail::invalid() : (sign|0x7C00));
2191 if(!absx || !absy)
2192 return half(detail::binary, sign);
2193 for(; absx<0x400; absx<<=1,--exp) ;
2194 for(; absy<0x400; absy<<=1,--exp) ;
2195 detail::uint32 m = static_cast<detail::uint32>((absx&0x3FF)|0x400) * static_cast<detail::uint32>((absy&0x3FF)|0x400);
2196 int i = m >> 21, s = m & i;
2197 exp += (absx>>10) + (absy>>10) + i;
2198 if(exp > 29)
2200 else if(exp < -11)
2203 #endif
2204 }
2205 template<class T>
2206 inline half operator*(half x, T y) { return x * static_cast<half>(y); }
2207 template<class T>
2208 inline half operator*(T x, half y) { return static_cast<half>(x) * y; }
2209
2218 inline half operator/(half x, half y) {
2219 #ifdef HALF_ARITHMETIC_TYPE
2221 #else
2222 int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = 14;
2223 unsigned int sign = (x.data_^y.data_) & 0x8000;
2224 if(absx >= 0x7C00 || absy >= 0x7C00)
2225 return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
2226 (absx==absy) ? detail::invalid() : (sign|((absx==0x7C00) ? 0x7C00 : 0)));
2227 if(!absx)
2228 return half(detail::binary, absy ? sign : detail::invalid());
2229 if(!absy)
2230 return half(detail::binary, detail::pole(sign));
2231 for(; absx<0x400; absx<<=1,--exp) ;
2232 for(; absy<0x400; absy<<=1,++exp) ;
2233 detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400;
2234 int i = mx < my;
2235 exp += (absx>>10) - (absy>>10) - i;
2236 if(exp > 29)
2238 else if(exp < -11)
2240 mx <<= 12 + i;
2241 my <<= 1;
2243 #endif
2244 }
2245 template<class T>
2246 inline half operator/(half x, T y) { return x / static_cast<half>(y); }
2247 template<class T>
2248 inline half operator/(T x, half y) { return static_cast<half>(x) / y; }
2249
2254
2260 template<class charT,class traits> std::basic_ostream<charT,traits>& operator<<(std::basic_ostream<charT,traits> &out, half arg) {
2261 #ifdef HALF_ARITHMETIC_TYPE
2262 return out << detail::half2float<detail::internal_t>(arg.data_);
2263 #else
2264 return out << detail::half2float<float>(arg.data_);
2265 #endif
2266 }
2267
2277 template<class charT,class traits> std::basic_istream<charT,traits>& operator>>(std::basic_istream<charT,traits> &in, half &arg) {
2278 #ifdef HALF_ARITHMETIC_TYPE
2279 detail::internal_t f;
2280 #else
2281 double f;
2282 #endif
2283 if(in >> f)
2285 return in;
2286 }
2287
2292
2297 inline constexpr half fabs(half arg) { return half(detail::binary, arg.data_&0x7FFF); }
2298
2303 inline constexpr half abs(half arg) { return fabs(arg); }
2304
2311 inline half fmod(half x, half y) {
2312 unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000;
2313 if(absx >= 0x7C00 || absy >= 0x7C00)
2314 return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
2315 (absx==0x7C00) ? detail::invalid() : x.data_);
2316 if(!absy)
2318 if(!absx)
2319 return x;
2320 if(absx == absy)
2321 return half(detail::binary, sign);
2322 return half(detail::binary, sign|detail::mod<false,false>(absx, absy));
2323 }
2324
2331 inline half remainder(half x, half y) {
2332 unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000;
2333 if(absx >= 0x7C00 || absy >= 0x7C00)
2334 return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
2335 (absx==0x7C00) ? detail::invalid() : x.data_);
2336 if(!absy)
2338 if(absx == absy)
2339 return half(detail::binary, sign);
2340 return half(detail::binary, sign^detail::mod<false,true>(absx, absy));
2341 }
2342
2350 inline half remquo(half x, half y, int *quo) {
2351 unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, value = x.data_ & 0x8000;
2352 if(absx >= 0x7C00 || absy >= 0x7C00)
2353 return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
2354 (absx==0x7C00) ? detail::invalid() : (*quo = 0, x.data_));
2355 if(!absy)
2357 bool qsign = ((value^y.data_)&0x8000) != 0;
2358 int q = 1;
2359 if(absx != absy)
2360 value ^= detail::mod<true, true>(absx, absy, &q);
2361 return *quo = qsign ? -q : q, half(detail::binary, value);
2362 }
2363
2374 inline half fma(half x, half y, half z) {
2375 #ifdef HALF_ARITHMETIC_TYPE
2377 #if FP_FAST_FMA
2378 return half(detail::binary, detail::float2half<half::round_style>(std::fma(fx, fy, fz)));
2379 #else
2381 #endif
2382 #else
2383 int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, absz = z.data_ & 0x7FFF, exp = -15;
2384 unsigned int sign = (x.data_^y.data_) & 0x8000;
2385 bool sub = ((sign^z.data_)&0x8000) != 0;
2386 if(absx >= 0x7C00 || absy >= 0x7C00 || absz >= 0x7C00)
2387 return (absx>0x7C00 || absy>0x7C00 || absz>0x7C00) ? half(detail::binary, detail::signal(x.data_, y.data_, z.data_)) :
2388 (absx==0x7C00) ? half(detail::binary, (!absy || (sub && absz==0x7C00)) ? detail::invalid() : (sign|0x7C00)) :
2389 (absy==0x7C00) ? half(detail::binary, (!absx || (sub && absz==0x7C00)) ? detail::invalid() : (sign|0x7C00)) : z;
2390 if(!absx || !absy)
2391 return absz ? z : half(detail::binary, (half::round_style==std::round_toward_neg_infinity) ? (z.data_|sign) : (z.data_&sign));
2392 for(; absx<0x400; absx<<=1,--exp) ;
2393 for(; absy<0x400; absy<<=1,--exp) ;
2394 detail::uint32 m = static_cast<detail::uint32>((absx&0x3FF)|0x400) * static_cast<detail::uint32>((absy&0x3FF)|0x400);
2395 int i = m >> 21;
2396 exp += (absx>>10) + (absy>>10) + i;
2397 m <<= 3 - i;
2398 if(absz) {
2399 int expz = 0;
2400 for(; absz<0x400; absz<<=1,--expz) ;
2401 expz += absz >> 10;
2402 detail::uint32 mz = static_cast<detail::uint32>((absz&0x3FF)|0x400) << 13;
2403 if(expz > exp || (expz == exp && mz > m)) {
2404 std::swap(m, mz);
2405 std::swap(exp, expz);
2406 if(sub)
2407 sign = z.data_ & 0x8000;
2408 }
2409 int d = exp - expz;
2410 mz = (d<23) ? ((mz>>d)|((mz&((static_cast<detail::uint32>(1)<<d)-1))!=0)) : 1;
2411 if(sub) {
2412 m = m - mz;
2413 if(!m)
2414 return half(detail::binary, static_cast<unsigned>(half::round_style==std::round_toward_neg_infinity)<<15);
2415 for(; m<0x800000; m<<=1,--exp) ;
2416 } else {
2417 m += mz;
2418 i = m >> 24;
2419 m = (m>>i) | (m&i);
2420 exp += i;
2421 }
2422 }
2423 if(exp > 30)
2425 else if(exp < -10)
2428 #endif
2429 }
2430
2438 return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) <
2439 (y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_));
2440 }
2441
2449 return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) >
2450 (y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_));
2451 }
2452
2461 inline half fdim(half x, half y) {
2462 if(isnan(x) || isnan(y))
2463 return half(detail::binary, detail::signal(x.data_, y.data_));
2464 return (x.data_^(0x8000|(0x8000-(x.data_>>15)))) <= (y.data_^(0x8000|(0x8000-(y.data_>>15)))) ? half(detail::binary, 0) : (x-y);
2465 }
2466
2471 inline half nanh(const char *arg) {
2472 unsigned int value = 0x7FFF;
2473 while(*arg)
2474 value ^= static_cast<unsigned>(*arg++) & 0xFF;
2475 return half(detail::binary, value);
2476 }
2477
2482
2491 inline half exp(half arg) {
2492 #ifdef HALF_ARITHMETIC_TYPE
2494 #else
2495 int abs = arg.data_ & 0x7FFF;
2496 if(!abs)
2497 return half(detail::binary, 0x3C00);
2498 if(abs >= 0x7C00)
2499 return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_));
2500 if(abs >= 0x4C80)
2502 detail::uint32 m = detail::multiply64(static_cast<detail::uint32>((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29);
2503 int e = (abs>>10) + (abs<=0x3FF), exp;
2504 if(e < 14) {
2505 exp = 0;
2506 m >>= 14 - e;
2507 } else {
2508 exp = m >> (45-e);
2509 m = (m<<(e-14)) & 0x7FFFFFFF;
2510 }
2511 return half(detail::binary, detail::exp2_post<half::round_style,true>(detail::exp2(m, 26), exp, (arg.data_&0x8000)!=0));
2512 #endif
2513 }
2514
2523 inline half exp2(half arg) {
2524 #if defined(HALF_ARITHMETIC_TYPE)
2526 #else
2527 int abs = arg.data_ & 0x7FFF;
2528 if(!abs)
2529 return half(detail::binary, 0x3C00);
2530 if(abs >= 0x7C00)
2531 return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_));
2532 if(abs >= 0x4E40)
2534 int e = (abs>>10) + (abs<=0x3FF), exp = (abs&0x3FF) + ((abs>0x3FF)<<10);
2535 detail::uint32 m = detail::exp2((static_cast<detail::uint32>(exp)<<(6+e))&0x7FFFFFFF, 28);
2536 exp >>= 25 - e;
2537 if(m == 0x80000000) {
2538 if(arg.data_&0x8000)
2539 exp = -exp;
2540 else if(exp > 15)
2543 }
2544 return half(detail::binary, detail::exp2_post<half::round_style,true>(m, exp, (arg.data_&0x8000)!=0));
2545 #endif
2546 }
2547
2557 inline half expm1(half arg) {
2558 #if defined(HALF_ARITHMETIC_TYPE)
2560 #else
2561 unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
2562 if(!abs)
2563 return arg;
2564 if(abs >= 0x7C00)
2565 return half(detail::binary, (abs==0x7C00) ? (0x7C00+(sign>>1)) : detail::signal(arg.data_));
2566 if(abs >= 0x4A00)
2568 detail::uint32 m = detail::multiply64(static_cast<detail::uint32>((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29);
2569 int e = (abs>>10) + (abs<=0x3FF), exp;
2570 if(e < 14) {
2571 exp = 0;
2572 m >>= 14 - e;
2573 } else {
2574 exp = m >> (45-e);
2575 m = (m<<(e-14)) & 0x7FFFFFFF;
2576 }
2577 m = detail::exp2(m);
2578 if(sign) {
2579 int s = 0;
2580 if(m > 0x80000000) {
2581 ++exp;
2582 m = detail::divide64(0x80000000, m, s);
2583 }
2584 m = 0x80000000 - ((m>>exp)|((m&((static_cast<detail::uint32>(1)<<exp)-1))!=0)|s);
2585 exp = 0;
2586 } else
2587 m -= (exp<31) ? (0x80000000>>exp) : 1;
2588 for(exp+=14; m<0x80000000 && exp; m<<=1,--exp) ;
2589 if(exp > 29)
2591 return half(detail::binary, detail::rounded<half::round_style,true>(sign+(exp<<10)+(m>>21), (m>>20)&1, (m&0xFFFFF)!=0));
2592 #endif
2593 }
2594
2604 inline half log(half arg) {
2605 #ifdef HALF_ARITHMETIC_TYPE
2607 #else
2608 int abs = arg.data_ & 0x7FFF, exp = -15;
2609 if(!abs)
2610 return half(detail::binary, detail::pole(0x8000));
2611 if(arg.data_ & 0x8000)
2612 return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_));
2613 if(abs >= 0x7C00)
2614 return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_));
2615 for(; abs<0x400; abs<<=1,--exp) ;
2616 exp += abs >> 10;
2618 detail::log2(static_cast<detail::uint32>((abs&0x3FF)|0x400)<<20, 27)+8, exp, 17));
2619 #endif
2620 }
2621
2631 inline half log10(half arg) {
2632 #ifdef HALF_ARITHMETIC_TYPE
2634 #else
2635 int abs = arg.data_ & 0x7FFF, exp = -15;
2636 if(!abs)
2637 return half(detail::binary, detail::pole(0x8000));
2638 if(arg.data_ & 0x8000)
2639 return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_));
2640 if(abs >= 0x7C00)
2641 return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_));
2642 switch(abs) {
2643 case 0x4900: return half(detail::binary, 0x3C00);
2644 case 0x5640: return half(detail::binary, 0x4000);
2645 case 0x63D0: return half(detail::binary, 0x4200);
2646 case 0x70E2: return half(detail::binary, 0x4400);
2647 }
2648 for(; abs<0x400; abs<<=1,--exp) ;
2649 exp += abs >> 10;
2651 detail::log2(static_cast<detail::uint32>((abs&0x3FF)|0x400)<<20, 27)+8, exp, 16));
2652 #endif
2653 }
2654
2664 inline half log2(half arg) {
2665 #if defined(HALF_ARITHMETIC_TYPE)
2667 #else
2668 int abs = arg.data_ & 0x7FFF, exp = -15, s = 0;
2669 if(!abs)
2670 return half(detail::binary, detail::pole(0x8000));
2671 if(arg.data_ & 0x8000)
2672 return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_));
2673 if(abs >= 0x7C00)
2674 return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_));
2675 if(abs == 0x3C00)
2676 return half(detail::binary, 0);
2677 for(; abs<0x400; abs<<=1,--exp) ;
2678 exp += (abs>>10);
2679 if(!(abs&0x3FF)) {
2680 unsigned int value = static_cast<unsigned>(exp<0) << 15, m = std::abs(exp) << 6;
2681 for(exp=18; m<0x400; m<<=1,--exp) ;
2682 return half(detail::binary, value+(exp<<10)+m);
2683 }
2684 detail::uint32 ilog = exp, sign = detail::sign_mask(ilog), m =
2685 (((ilog<<27)+(detail::log2(static_cast<detail::uint32>((abs&0x3FF)|0x400)<<20, 28)>>4))^sign) - sign;
2686 if(!m)
2687 return half(detail::binary, 0);
2688 for(exp=14; m<0x8000000 && exp; m<<=1,--exp) ;
2689 for(; m>0xFFFFFFF; m>>=1,++exp)
2690 s |= m & 1;
2692 #endif
2693 }
2694
2705 inline half log1p(half arg) {
2706 #if defined(HALF_ARITHMETIC_TYPE)
2708 #else
2709 if(arg.data_ >= 0xBC00)
2710 return half(detail::binary, (arg.data_==0xBC00) ? detail::pole(0x8000) : (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_));
2711 int abs = arg.data_ & 0x7FFF, exp = -15;
2712 if(!abs || abs >= 0x7C00)
2713 return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
2714 for(; abs<0x400; abs<<=1,--exp) ;
2715 exp += abs >> 10;
2716 detail::uint32 m = static_cast<detail::uint32>((abs&0x3FF)|0x400) << 20;
2717 if(arg.data_ & 0x8000) {
2718 m = 0x40000000 - (m>>-exp);
2719 for(exp=0; m<0x40000000; m<<=1,--exp) ;
2720 } else {
2721 if(exp < 0) {
2722 m = 0x40000000 + (m>>-exp);
2723 exp = 0;
2724 } else {
2725 m += 0x40000000 >> exp;
2726 int i = m >> 31;
2727 m >>= i;
2728 exp += i;
2729 }
2730 }
2732 #endif
2733 }
2734
2739
2748 inline half sqrt(half arg) {
2749 #ifdef HALF_ARITHMETIC_TYPE
2751 #else
2752 int abs = arg.data_ & 0x7FFF, exp = 15;
2753 if(!abs || arg.data_ >= 0x7C00)
2754 return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_>0x8000) ? detail::invalid() : arg.data_);
2755 for(; abs<0x400; abs<<=1,--exp) ;
2756 detail::uint32 r = static_cast<detail::uint32>((abs&0x3FF)|0x400) << 10, m = detail::sqrt<20>(r, exp+=abs>>10);
2757 return half(detail::binary, detail::rounded<half::round_style,false>((exp<<10)+(m&0x3FF), r>m, r!=0));
2758 #endif
2759 }
2760
2769 inline half cbrt(half arg) {
2770 #if defined(HALF_ARITHMETIC_TYPE)
2772 #else
2773 int abs = arg.data_ & 0x7FFF, exp = -15;
2774 if(!abs || abs == 0x3C00 || abs >= 0x7C00)
2775 return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
2776 for(; abs<0x400; abs<<=1, --exp);
2777 detail::uint32 ilog = exp + (abs>>10), sign = detail::sign_mask(ilog), f, m =
2778 (((ilog<<27)+(detail::log2(static_cast<detail::uint32>((abs&0x3FF)|0x400)<<20, 24)>>4))^sign) - sign;
2779 for(exp=2; m<0x80000000; m<<=1,--exp) ;
2780 m = detail::multiply64(m, 0xAAAAAAAB);
2781 int i = m >> 31, s;
2782 exp += i;
2783 m <<= 1 - i;
2784 if(exp < 0) {
2785 f = m >> -exp;
2786 exp = 0;
2787 } else {
2788 f = (m<<exp) & 0x7FFFFFFF;
2789 exp = m >> (31-exp);
2790 }
2791 m = detail::exp2(f, (half::round_style==std::round_to_nearest) ? 29 : 26);
2792 if(sign) {
2793 if(m > 0x80000000) {
2794 m = detail::divide64(0x80000000, m, s);
2795 ++exp;
2796 }
2797 exp = -exp;
2798 }
2799 return half(detail::binary, (half::round_style==std::round_to_nearest) ?
2801 detail::fixed2half<half::round_style,23,false,false,false>((m+0x80)>>8, exp+14, arg.data_&0x8000));
2802 #endif
2803 }
2804
2814 inline half hypot(half x, half y) {
2815 #ifdef HALF_ARITHMETIC_TYPE
2816 detail::internal_t fx = detail::half2float<detail::internal_t>(x.data_), fy = detail::half2float<detail::internal_t>(y.data_);
2817 return half(detail::binary, detail::float2half<half::round_style>(std::hypot(fx, fy)));
2818 #else
2819 int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, expx = 0, expy = 0;
2820 if(absx >= 0x7C00 || absy >= 0x7C00)
2821 return half(detail::binary, (absx==0x7C00) ? detail::select(0x7C00, y.data_) :
2822 (absy==0x7C00) ? detail::select(0x7C00, x.data_) : detail::signal(x.data_, y.data_));
2823 if(!absx)
2824 return half(detail::binary, absy ? detail::check_underflow(absy) : 0);
2825 if(!absy)
2827 if(absy > absx)
2828 std::swap(absx, absy);
2829 for(; absx<0x400; absx<<=1,--expx) ;
2830 for(; absy<0x400; absy<<=1,--expy) ;
2831 detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400;
2832 mx *= mx;
2833 my *= my;
2834 int ix = mx >> 21, iy = my >> 21;
2835 expx = 2*(expx+(absx>>10)) - 15 + ix;
2836 expy = 2*(expy+(absy>>10)) - 15 + iy;
2837 mx <<= 10 - ix;
2838 my <<= 10 - iy;
2839 int d = expx - expy;
2840 my = (d<30) ? ((my>>d)|((my&((static_cast<detail::uint32>(1)<<d)-1))!=0)) : 1;
2842 #endif
2843 }
2844
2855 inline half hypot(half x, half y, half z) {
2856 #ifdef HALF_ARITHMETIC_TYPE
2858 return half(detail::binary, detail::float2half<half::round_style>(std::sqrt(fx*fx+fy*fy+fz*fz)));
2859 #else
2860 int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, absz = z.data_ & 0x7FFF, expx = 0, expy = 0, expz = 0;
2861 if(!absx)
2862 return hypot(y, z);
2863 if(!absy)
2864 return hypot(x, z);
2865 if(!absz)
2866 return hypot(x, y);
2867 if(absx >= 0x7C00 || absy >= 0x7C00 || absz >= 0x7C00)
2868 return half(detail::binary, (absx==0x7C00) ? detail::select(0x7C00, detail::select(y.data_, z.data_)) :
2869 (absy==0x7C00) ? detail::select(0x7C00, detail::select(x.data_, z.data_)) :
2870 (absz==0x7C00) ? detail::select(0x7C00, detail::select(x.data_, y.data_)) :
2871 detail::signal(x.data_, y.data_, z.data_));
2872 if(absz > absy)
2873 std::swap(absy, absz);
2874 if(absy > absx)
2875 std::swap(absx, absy);
2876 if(absz > absy)
2877 std::swap(absy, absz);
2878 for(; absx<0x400; absx<<=1,--expx) ;
2879 for(; absy<0x400; absy<<=1,--expy) ;
2880 for(; absz<0x400; absz<<=1,--expz) ;
2881 detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400, mz = (absz&0x3FF) | 0x400;
2882 mx *= mx;
2883 my *= my;
2884 mz *= mz;
2885 int ix = mx >> 21, iy = my >> 21, iz = mz >> 21;
2886 expx = 2*(expx+(absx>>10)) - 15 + ix;
2887 expy = 2*(expy+(absy>>10)) - 15 + iy;
2888 expz = 2*(expz+(absz>>10)) - 15 + iz;
2889 mx <<= 10 - ix;
2890 my <<= 10 - iy;
2891 mz <<= 10 - iz;
2892 int d = expy - expz;
2893 mz = (d<30) ? ((mz>>d)|((mz&((static_cast<detail::uint32>(1)<<d)-1))!=0)) : 1;
2894 my += mz;
2895 if(my & 0x80000000) {
2896 my = (my>>1) | (my&1);
2897 if(++expy > expx) {
2898 std::swap(mx, my);
2899 std::swap(expx, expy);
2900 }
2901 }
2902 d = expx - expy;
2903 my = (d<30) ? ((my>>d)|((my&((static_cast<detail::uint32>(1)<<d)-1))!=0)) : 1;
2905 #endif
2906 }
2907
2918 inline half pow(half x, half y) {
2919 #ifdef HALF_ARITHMETIC_TYPE
2921 #else
2922 int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = -15;
2923 if(!absy || x.data_ == 0x3C00)
2924 return half(detail::binary, detail::select(0x3C00, (x.data_==0x3C00) ? y.data_ : x.data_));
2925 bool is_int = absy >= 0x6400 || (absy>=0x3C00 && !(absy&((1<<(25-(absy>>10)))-1)));
2926 unsigned int sign = x.data_ & (static_cast<unsigned>((absy<0x6800)&&is_int&&((absy>>(25-(absy>>10)))&1))<<15);
2927 if(absx >= 0x7C00 || absy >= 0x7C00)
2928 return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
2929 (absy==0x7C00) ? ((absx==0x3C00) ? 0x3C00 : (!absx && y.data_==0xFC00) ? detail::pole() :
2930 (0x7C00&-((y.data_>>15)^(absx>0x3C00)))) : (sign|(0x7C00&((y.data_>>15)-1U))));
2931 if(!absx)
2932 return half(detail::binary, (y.data_&0x8000) ? detail::pole(sign) : sign);
2933 if((x.data_&0x8000) && !is_int)
2935 if(x.data_ == 0xBC00)
2936 return half(detail::binary, sign|0x3C00);
2937 if(y.data_ == 0x3800)
2938 return sqrt(x);
2939 if(y.data_ == 0x3C00)
2941 if(y.data_ == 0x4000)
2942 return x * x;
2943 for(; absx<0x400; absx<<=1,--exp) ;
2944 detail::uint32 ilog = exp + (absx>>10), msign = detail::sign_mask(ilog), f, m =
2945 (((ilog<<27)+((detail::log2(static_cast<detail::uint32>((absx&0x3FF)|0x400)<<20)+8)>>4))^msign) - msign;
2946 for(exp=-11; m<0x80000000; m<<=1,--exp) ;
2947 for(; absy<0x400; absy<<=1,--exp) ;
2948 m = detail::multiply64(m, static_cast<detail::uint32>((absy&0x3FF)|0x400)<<21);
2949 int i = m >> 31;
2950 exp += (absy>>10) + i;
2951 m <<= 1 - i;
2952 if(exp < 0) {
2953 f = m >> -exp;
2954 exp = 0;
2955 } else {
2956 f = (m<<exp) & 0x7FFFFFFF;
2957 exp = m >> (31-exp);
2958 }
2959 return half(detail::binary, detail::exp2_post<half::round_style,false>(detail::exp2(f), exp, ((msign&1)^(y.data_>>15))!=0, sign));
2960 #endif
2961 }
2962
2967
2977 inline void sincos(half arg, half *sin, half *cos) {
2978 #ifdef HALF_ARITHMETIC_TYPE
2979 detail::internal_t f = detail::half2float<detail::internal_t>(arg.data_);
2982 #else
2983 int abs = arg.data_ & 0x7FFF, sign = arg.data_ >> 15, k;
2984 if(abs >= 0x7C00)
2985 *sin = *cos = half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_));
2986 else if(!abs) {
2987 *sin = arg;
2988 *cos = half(detail::binary, 0x3C00);
2989 } else if(abs < 0x2500) {
2992 } else {
2993 if constexpr (half::round_style != std::round_to_nearest) {
2994 switch(abs) {
2995 case 0x48B7:
2996 *sin = half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x1D07, 1, 1));
2998 return;
2999 case 0x598C:
3000 *sin = half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x3BFF, 1, 1));
3002 return;
3003 case 0x6A64:
3004 *sin = half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x3BFE, 1, 1));
3006 return;
3007 case 0x6D8C:
3008 *sin = half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x0FE6, 1, 1));
3010 return;
3011 }
3012 }
3013 std::pair<detail::uint32,detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 28);
3014 switch(k & 3) {
3015 case 1: sc = std::make_pair(sc.second, -sc.first); break;
3016 case 2: sc = std::make_pair(-sc.first, -sc.second); break;
3017 case 3: sc = std::make_pair(-sc.second, sc.first); break;
3018 }
3021 }
3022 #endif
3023 }
3024
3033 inline half sin(half arg) {
3034 #ifdef HALF_ARITHMETIC_TYPE
3036 #else
3037 int abs = arg.data_ & 0x7FFF, k;
3038 if(!abs)
3039 return arg;
3040 if(abs >= 0x7C00)
3041 return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_));
3042 if(abs < 0x2900)
3044 if constexpr (half::round_style != std::round_to_nearest)
3045 switch(abs) {
3046 case 0x48B7: return half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x1D07, 1, 1));
3047 case 0x6A64: return half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x3BFE, 1, 1));
3048 case 0x6D8C: return half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x0FE6, 1, 1));
3049 }
3050 std::pair<detail::uint32,detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 28);
3051 detail::uint32 sign = -static_cast<detail::uint32>(((k>>1)&1)^(arg.data_>>15));
3052 return half(detail::binary, detail::fixed2half<half::round_style,30,true,true,true>((((k&1) ? sc.second : sc.first)^sign) - sign));
3053 #endif
3054 }
3055
3064 inline half cos(half arg) {
3065 #ifdef HALF_ARITHMETIC_TYPE
3067 #else
3068 int abs = arg.data_ & 0x7FFF, k;
3069 if(!abs)
3070 return half(detail::binary, 0x3C00);
3071 if(abs >= 0x7C00)
3072 return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_));
3073 if(abs < 0x2500)
3075 if constexpr (half::round_style != std::round_to_nearest)
3076 if(abs == 0x598C)
3078 std::pair<detail::uint32,detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 28);
3079 detail::uint32 sign = -static_cast<detail::uint32>(((k>>1)^k)&1);
3080 return half(detail::binary, detail::fixed2half<half::round_style,30,true,true,true>((((k&1) ? sc.first : sc.second)^sign) - sign));
3081 #endif
3082 }
3083
3092 inline half tan(half arg) {
3093 #ifdef HALF_ARITHMETIC_TYPE
3095 #else
3096 int abs = arg.data_ & 0x7FFF, exp = 13, k;
3097 if(!abs)
3098 return arg;
3099 if(abs >= 0x7C00)
3100 return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_));
3101 if(abs < 0x2700)
3103 if(half::round_style != std::round_to_nearest)
3104 switch(abs) {
3105 case 0x658C: return half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x07E6, 1, 1));
3106 case 0x7330: return half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x4B62, 1, 1));
3107 }
3108 std::pair<detail::uint32,detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 30);
3109 if(k & 1)
3110 sc = std::make_pair(-sc.second, sc.first);
3111 detail::uint32 signy = detail::sign_mask(sc.first), signx = detail::sign_mask(sc.second);
3112 detail::uint32 my = (sc.first^signy) - signy, mx = (sc.second^signx) - signx;
3113 for(; my<0x80000000; my<<=1,--exp) ;
3114 for(; mx<0x80000000; mx<<=1,++exp) ;
3115 return half(detail::binary, detail::tangent_post<half::round_style>(my, mx, exp, (signy^signx^arg.data_)&0x8000));
3116 #endif
3117 }
3118
3127 inline half asin(half arg) {
3128 #ifdef HALF_ARITHMETIC_TYPE
3130 #else
3131 unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
3132 if(!abs)
3133 return arg;
3134 if(abs >= 0x3C00)
3135 return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (abs>0x3C00) ? detail::invalid() :
3136 detail::rounded<half::round_style,true>(sign|0x3E48, 0, 1));
3137 if(abs < 0x2900)
3139 if(half::round_style != std::round_to_nearest && (abs == 0x2B44 || abs == 0x2DC3))
3141 std::pair<detail::uint32,detail::uint32> sc = detail::atan2_args(abs);
3142 detail::uint32 m = detail::atan2(sc.first, sc.second, (half::round_style==std::round_to_nearest) ? 27 : 26);
3144 #endif
3145 }
3146
3155 inline half acos(half arg) {
3156 #ifdef HALF_ARITHMETIC_TYPE
3158 #else
3159 unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ >> 15;
3160 if(!abs)
3162 if(abs >= 0x3C00)
3163 return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (abs>0x3C00) ? detail::invalid() :
3164 sign ? detail::rounded<half::round_style,true>(0x4248, 0, 1) : 0);
3165 std::pair<detail::uint32,detail::uint32> cs = detail::atan2_args(abs);
3166 detail::uint32 m = detail::atan2(cs.second, cs.first, 28);
3167 return half(detail::binary, detail::fixed2half<half::round_style,31,false,true,true>(sign ? (0xC90FDAA2-m) : m, 15, 0, sign));
3168 #endif
3169 }
3170
3179 inline half atan(half arg) {
3180 #ifdef HALF_ARITHMETIC_TYPE
3182 #else
3183 unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
3184 if(!abs)
3185 return arg;
3186 if(abs >= 0x7C00)
3187 return half(detail::binary, (abs==0x7C00) ? detail::rounded<half::round_style,true>(sign|0x3E48, 0, 1) : detail::signal(arg.data_));
3188 if(abs <= 0x2700)
3190 int exp = (abs>>10) + (abs<=0x3FF);
3191 detail::uint32 my = (abs&0x3FF) | ((abs>0x3FF)<<10);
3192 detail::uint32 m = (exp>15) ? detail::atan2(my<<19, 0x20000000>>(exp-15), (half::round_style==std::round_to_nearest) ? 26 : 24) :
3193 detail::atan2(my<<(exp+4), 0x20000000, (half::round_style==std::round_to_nearest) ? 30 : 28);
3195 #endif
3196 }
3197
3208 inline half atan2(half y, half x) {
3209 #ifdef HALF_ARITHMETIC_TYPE
3211 #else
3212 unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, signx = x.data_ >> 15, signy = y.data_ & 0x8000;
3213 if(absx >= 0x7C00 || absy >= 0x7C00) {
3214 if(absx > 0x7C00 || absy > 0x7C00)
3215 return half(detail::binary, detail::signal(x.data_, y.data_));
3216 if(absy == 0x7C00)
3217 return half(detail::binary, (absx<0x7C00) ? detail::rounded<half::round_style,true>(signy|0x3E48, 0, 1) :
3218 signx ? detail::rounded<half::round_style,true>(signy|0x40B6, 0, 1) :
3219 detail::rounded<half::round_style,true>(signy|0x3A48, 0, 1));
3220 return (x.data_==0x7C00) ? half(detail::binary, signy) : half(detail::binary, detail::rounded<half::round_style,true>(signy|0x4248, 0, 1));
3221 }
3222 if(!absy)
3223 return signx ? half(detail::binary, detail::rounded<half::round_style,true>(signy|0x4248, 0, 1)) : y;
3224 if(!absx)
3225 return half(detail::binary, detail::rounded<half::round_style,true>(signy|0x3E48, 0, 1));
3226 int d = (absy>>10) + (absy<=0x3FF) - (absx>>10) - (absx<=0x3FF);
3227 if(d > (signx ? 18 : 12))
3228 return half(detail::binary, detail::rounded<half::round_style,true>(signy|0x3E48, 0, 1));
3229 if(signx && d < -11)
3230 return half(detail::binary, detail::rounded<half::round_style,true>(signy|0x4248, 0, 1));
3231 if(!signx && d < ((half::round_style==std::round_toward_zero) ? -15 : -9)) {
3232 for(; absy<0x400; absy<<=1,--d) ;
3233 detail::uint32 mx = ((absx<<1)&0x7FF) | 0x800, my = ((absy<<1)&0x7FF) | 0x800;
3234 int i = my < mx;
3235 d -= i;
3236 if(d < -25)
3238 my <<= 11 + i;
3240 }
3241 detail::uint32 m = detail::atan2( ((absy&0x3FF)|((absy>0x3FF)<<10))<<(19+((d<0) ? d : (d>0) ? 0 : -1)),
3242 ((absx&0x3FF)|((absx>0x3FF)<<10))<<(19-((d>0) ? d : (d<0) ? 0 : 1)));
3243 return half(detail::binary, detail::fixed2half<half::round_style,31,false,true,true>(signx ? (0xC90FDAA2-m) : m, 15, signy, signx));
3244 #endif
3245 }
3246
3251
3260 inline half sinh(half arg) {
3261 #ifdef HALF_ARITHMETIC_TYPE
3263 #else
3264 int abs = arg.data_ & 0x7FFF, exp;
3265 if(!abs || abs >= 0x7C00)
3266 return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
3267 if(abs <= 0x2900)
3269 std::pair<detail::uint32,detail::uint32> mm = detail::hyperbolic_args(abs, exp, (half::round_style==std::round_to_nearest) ? 29 : 27);
3270 detail::uint32 m = mm.first - mm.second;
3271 for(exp+=13; m<0x80000000 && exp; m<<=1,--exp) ;
3272 unsigned int sign = arg.data_ & 0x8000;
3273 if(exp > 29)
3276 #endif
3277 }
3278
3287 inline half cosh(half arg) {
3288 #ifdef HALF_ARITHMETIC_TYPE
3290 #else
3291 int abs = arg.data_ & 0x7FFF, exp;
3292 if(!abs)
3293 return half(detail::binary, 0x3C00);
3294 if(abs >= 0x7C00)
3295 return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : 0x7C00);
3296 std::pair<detail::uint32,detail::uint32> mm = detail::hyperbolic_args(abs, exp, (half::round_style==std::round_to_nearest) ? 23 : 26);
3297 detail::uint32 m = mm.first + mm.second, i = (~m&0xFFFFFFFF) >> 31;
3298 m = (m>>i) | (m&i) | 0x80000000;
3299 if((exp+=13+i) > 29)
3302 #endif
3303 }
3304
3313 inline half tanh(half arg) {
3314 #ifdef HALF_ARITHMETIC_TYPE
3316 #else
3317 int abs = arg.data_ & 0x7FFF, exp;
3318 if(!abs)
3319 return arg;
3320 if(abs >= 0x7C00)
3321 return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_-0x4000));
3322 if(abs >= 0x4500)
3323 return half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x3BFF, 1, 1));
3324 if(abs < 0x2700)
3326 if(half::round_style != std::round_to_nearest && abs == 0x2D3F)
3328 std::pair<detail::uint32,detail::uint32> mm = detail::hyperbolic_args(abs, exp, 27);
3329 detail::uint32 my = mm.first - mm.second - (half::round_style!=std::round_to_nearest), mx = mm.first + mm.second, i = (~mx&0xFFFFFFFF) >> 31;
3330 for(exp=13; my<0x80000000; my<<=1,--exp) ;
3331 mx = (mx>>i) | 0x80000000;
3332 return half(detail::binary, detail::tangent_post<half::round_style>(my, mx, exp-i, arg.data_&0x8000));
3333 #endif
3334 }
3335
3344 inline half asinh(half arg) {
3345 #if defined(HALF_ARITHMETIC_TYPE)
3347 #else
3348 int abs = arg.data_ & 0x7FFF;
3349 if(!abs || abs >= 0x7C00)
3350 return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
3351 if(abs <= 0x2900)
3353 if(half::round_style != std::round_to_nearest)
3354 switch(abs)
3355 {
3356 case 0x32D4: return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-13, 1, 1));
3357 case 0x3B5B: return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-197, 1, 1));
3358 }
3360 #endif
3361 }
3362
3371 inline half acosh(half arg) {
3372 #if defined(HALF_ARITHMETIC_TYPE)
3374 #else
3375 int abs = arg.data_ & 0x7FFF;
3376 if((arg.data_&0x8000) || abs < 0x3C00)
3377 return half(detail::binary, (abs<=0x7C00) ? detail::invalid() : detail::signal(arg.data_));
3378 if(abs == 0x3C00)
3379 return half(detail::binary, 0);
3380 if(arg.data_ >= 0x7C00)
3381 return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
3383 #endif
3384 }
3385
3395 inline half atanh(half arg) {
3396 #if defined(HALF_ARITHMETIC_TYPE)
3398 #else
3399 int abs = arg.data_ & 0x7FFF, exp = 0;
3400 if(!abs)
3401 return arg;
3402 if(abs >= 0x3C00)
3403 return half(detail::binary, (abs==0x3C00) ? detail::pole(arg.data_&0x8000) : (abs<=0x7C00) ? detail::invalid() : detail::signal(arg.data_));
3404 if(abs < 0x2700)
3406 detail::uint32 m = static_cast<detail::uint32>((abs&0x3FF)|((abs>0x3FF)<<10)) << ((abs>>10)+(abs<=0x3FF)+6), my = 0x80000000 + m, mx = 0x80000000 - m;
3407 for(; mx<0x80000000; mx<<=1,++exp) ;
3408 int i = my >= mx, s;
3410 (detail::divide64(my>>i, mx, s)+1)>>1, 27)+0x10, exp+i-1, 16, arg.data_&0x8000));
3411 #endif
3412 }
3413
3418
3427 inline half erf(half arg) {
3428 #if defined(HALF_ARITHMETIC_TYPE)
3430 #else
3431 unsigned int abs = arg.data_ & 0x7FFF;
3432 if(!abs || abs >= 0x7C00)
3433 return (abs>=0x7C00) ? half(detail::binary, (abs==0x7C00) ? (arg.data_-0x4000) : detail::signal(arg.data_)) : arg;
3434 if(abs >= 0x4200)
3435 return half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x3BFF, 1, 1));
3437 #endif
3438 }
3439
3448 inline half erfc(half arg) {
3449 #if defined(HALF_ARITHMETIC_TYPE)
3451 #else
3452 unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
3453 if(abs >= 0x7C00)
3454 return (abs>=0x7C00) ? half(detail::binary, (abs==0x7C00) ? (sign>>1) : detail::signal(arg.data_)) : arg;
3455 if(!abs)
3456 return half(detail::binary, 0x3C00);
3457 if(abs >= 0x4400)
3458 return half(detail::binary, detail::rounded<half::round_style,true>((sign>>1)-(sign>>15), sign>>15, 1));
3460 #endif
3461 }
3462
3472 inline half lgamma(half arg) {
3473 #if defined(HALF_ARITHMETIC_TYPE)
3475 #else
3476 int abs = arg.data_ & 0x7FFF;
3477 if(abs >= 0x7C00)
3478 return half(detail::binary, (abs==0x7C00) ? 0x7C00 : detail::signal(arg.data_));
3479 if(!abs || arg.data_ >= 0xE400 || (arg.data_ >= 0xBC00 && !(abs&((1<<(25-(abs>>10)))-1))))
3480 return half(detail::binary, detail::pole());
3481 if(arg.data_ == 0x3C00 || arg.data_ == 0x4000)
3482 return half(detail::binary, 0);
3484 #endif
3485 }
3486
3496 inline half tgamma(half arg) {
3497 #if defined(HALF_ARITHMETIC_TYPE)
3499 #else
3500 unsigned int abs = arg.data_ & 0x7FFF;
3501 if(!abs)
3502 return half(detail::binary, detail::pole(arg.data_));
3503 if(abs >= 0x7C00)
3504 return (arg.data_==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_));
3505 if(arg.data_ >= 0xE400 || (arg.data_ >= 0xBC00 && !(abs&((1<<(25-(abs>>10)))-1))))
3507 if(arg.data_ >= 0xCA80)
3508 return half(detail::binary, detail::underflow<half::round_style>((1-((abs>>(25-(abs>>10)))&1))<<15));
3509 if(arg.data_ <= 0x100 || (arg.data_ >= 0x4900 && arg.data_ < 0x8000))
3511 if(arg.data_ == 0x3C00)
3512 return arg;
3514 #endif
3515 }
3516
3521
3529
3537
3545
3553
3560
3568
3575 inline long lrint(half arg) { return detail::half2int<half::round_style,true,true,long>(arg.data_); }
3576
3589
3596 inline long long llrint(half arg) { return detail::half2int<half::round_style,true,true,long long>(arg.data_); }
3597
3602
3609 inline half frexp(half arg, int *exp) {
3610 *exp = 0;
3611 unsigned int abs = arg.data_ & 0x7FFF;
3612 if(abs >= 0x7C00 || !abs)
3613 return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
3614 for(; abs<0x400; abs<<=1,--*exp) ;
3615 *exp += (abs>>10) - 14;
3616 return half(detail::binary, (arg.data_&0x8000)|0x3800|(abs&0x3FF));
3617 }
3618
3628 inline half scalbln(half arg, long exp) {
3629 unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
3630 if(abs >= 0x7C00 || !abs)
3631 return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
3632 for(; abs<0x400; abs<<=1,--exp) ;
3633 exp += abs >> 10;
3634 if(exp > 30)
3636 else if(exp < -10)
3638 else if(exp > 0)
3639 return half(detail::binary, sign|(exp<<10)|(abs&0x3FF));
3640 unsigned int m = (abs&0x3FF) | 0x400;
3641 return half(detail::binary, detail::rounded<half::round_style,false>(sign|(m>>(1-exp)), (m>>-exp)&1, (m&((1<<-exp)-1))!=0));
3642 }
3643
3653 inline half scalbn(half arg, int exp) { return scalbln(arg, exp); }
3654
3664 inline half ldexp(half arg, int exp) { return scalbln(arg, exp); }
3665
3672 inline half modf(half arg, half *iptr) {
3673 unsigned int abs = arg.data_ & 0x7FFF;
3674 if(abs > 0x7C00) {
3675 arg = half(detail::binary, detail::signal(arg.data_));
3676 return *iptr = arg, arg;
3677 }
3678 if(abs >= 0x6400)
3679 return *iptr = arg, half(detail::binary, arg.data_&0x8000);
3680 if(abs < 0x3C00)
3681 return iptr->data_ = arg.data_ & 0x8000, arg;
3682 unsigned int exp = abs >> 10, mask = (1<<(25-exp)) - 1, m = arg.data_ & mask;
3683 iptr->data_ = arg.data_ & ~mask;
3684 if(!m)
3685 return half(detail::binary, arg.data_&0x8000);
3686 for(; m<0x400; m<<=1,--exp) ;
3687 return half(detail::binary, (arg.data_&0x8000)|(exp<<10)|(m&0x3FF));
3688 }
3689
3698 inline int ilogb(half arg) {
3699 int abs = arg.data_ & 0x7FFF, exp;
3700 if(!abs || abs >= 0x7C00) {
3702 #if defined(__GNUC__) && !defined(__clang__)
3703 #pragma GCC diagnostic push
3704 #pragma GCC diagnostic ignored "-Wduplicated-branches"
3705 #endif
3706 return !abs ? FP_ILOGB0 : (abs==0x7C00) ? INT_MAX : FP_ILOGBNAN;
3707 #if defined(__GNUC__) && !defined(__clang__)
3708 #pragma GCC diagnostic pop
3709 #endif
3710 }
3711 for(exp=(abs>>10)-15; abs<0x200; abs<<=1,--exp) ;
3712 return exp;
3713 }
3714
3721 inline half logb(half arg) {
3722 int abs = arg.data_ & 0x7FFF, exp;
3723 if(!abs)
3724 return half(detail::binary, detail::pole(0x8000));
3725 if(abs >= 0x7C00)
3726 return half(detail::binary, (abs==0x7C00) ? 0x7C00 : detail::signal(arg.data_));
3727 for(exp=(abs>>10)-15; abs<0x200; abs<<=1,--exp) ;
3728 unsigned int value = static_cast<unsigned>(exp<0) << 15;
3729 if(exp) {
3730 unsigned int m = std::abs(exp) << 6;
3731 for(exp=18; m<0x400; m<<=1,--exp) ;
3732 value |= (exp<<10) + m;
3733 }
3734 return half(detail::binary, value);
3735 }
3736
3745 inline half nextafter(half from, half to) {
3746 int fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF;
3747 if(fabs > 0x7C00 || tabs > 0x7C00)
3748 return half(detail::binary, detail::signal(from.data_, to.data_));
3749 if(from.data_ == to.data_ || !(fabs|tabs))
3750 return to;
3751 if(!fabs) {
3753 return half(detail::binary, (to.data_&0x8000)+1);
3754 }
3755 unsigned int out = from.data_ + (((from.data_>>15)^static_cast<unsigned>(
3756 (from.data_^(0x8000|(0x8000-(from.data_>>15))))<(to.data_^(0x8000|(0x8000-(to.data_>>15))))))<<1) - 1;
3757 detail::raise(FE_OVERFLOW, fabs<0x7C00 && (out&0x7C00)==0x7C00);
3759 return half(detail::binary, out);
3760 }
3761
3770 inline half nexttoward(half from, long double to) {
3771 int fabs = from.data_ & 0x7FFF;
3772 if(fabs > 0x7C00)
3773 return half(detail::binary, detail::signal(from.data_));
3774 long double lfrom = static_cast<long double>(from);
3775 if(detail::builtin_isnan(to) || lfrom == to)
3776 return half(static_cast<float>(to));
3777 if(!fabs) {
3779 return half(detail::binary, (static_cast<unsigned>(detail::builtin_signbit(to))<<15)+1);
3780 }
3781 unsigned int out = from.data_ + (((from.data_>>15)^static_cast<unsigned>(lfrom<to))<<1) - 1;
3782 detail::raise(FE_OVERFLOW, (out&0x7FFF)==0x7C00);
3784 return half(detail::binary, out);
3785 }
3786
3792 inline constexpr half copysign(half x, half y) { return half(detail::binary, x.data_^((x.data_^y.data_)&0x8000)); }
3793
3798
3807 inline constexpr int fpclassify(half arg) {
3808 return !(arg.data_&0x7FFF) ? FP_ZERO :
3809 ((arg.data_&0x7FFF)<0x400) ? FP_SUBNORMAL :
3810 ((arg.data_&0x7FFF)<0x7C00) ? FP_NORMAL :
3811 ((arg.data_&0x7FFF)==0x7C00) ? FP_INFINITE :
3812 FP_NAN;
3813 }
3814
3820 inline constexpr bool isfinite(half arg) { return (arg.data_&0x7C00) != 0x7C00; }
3821
3827 inline constexpr bool isinf(half arg) { return (arg.data_&0x7FFF) == 0x7C00; }
3828
3834 inline constexpr bool isnan(half arg) { return (arg.data_&0x7FFF) > 0x7C00; }
3835
3841 inline constexpr bool isnormal(half arg) { return ((arg.data_&0x7C00)!=0) & ((arg.data_&0x7C00)!=0x7C00); }
3842
3848 inline constexpr bool signbit(half arg) { return (arg.data_&0x8000) != 0; }
3849
3854
3861 inline constexpr bool isgreater(half x, half y) {
3862 return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
3863 }
3864
3871 inline constexpr bool isgreaterequal(half x, half y) {
3872 return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
3873 }
3874
3881 inline constexpr bool isless(half x, half y) {
3882 return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
3883 }
3884
3891 inline constexpr bool islessequal(half x, half y) {
3892 return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
3893 }
3894
3901 inline constexpr bool islessgreater(half x, half y) {
3902 return x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF) && !isnan(x) && !isnan(y);
3903 }
3904
3911 inline constexpr bool isunordered(half x, half y) { return isnan(x) || isnan(y); }
3912
3917
3931 template<class T,class U> T half_cast(U arg) { return detail::half_caster<T,U>::cast(arg); }
3932
3947 template<class T,std::float_round_style R,class U> T half_cast(U arg) { return detail::half_caster<T,U,R>::cast(arg); }
3949
3954
3962 inline int feclearexcept(int excepts) { detail::errflags() &= ~excepts; return 0; }
3963
3971 inline int fetestexcept(int excepts) { return detail::errflags() & excepts; }
3972
3982 inline int feraiseexcept(int excepts) { detail::errflags() |= excepts; detail::raise(excepts); return 0; }
3983
3992 inline int fegetexceptflag(int *flagp, int excepts) { *flagp = detail::errflags() & excepts; return 0; }
3993
4003 inline int fesetexceptflag(const int *flagp, int excepts) { detail::errflags() = (detail::errflags()|(*flagp&excepts)) & (*flagp|~excepts); return 0; }
4004
4016 inline void fethrowexcept([[maybe_unused]] int excepts, const char *msg = "") {
4017 excepts &= detail::errflags();
4018#if HALF_ERRHANDLING_THROWS
4019 #ifdef HALF_ERRHANDLING_THROW_INVALID
4020 if(excepts & FE_INVALID)
4021 throw std::domain_error(msg);
4022 #endif
4023 #ifdef HALF_ERRHANDLING_THROW_DIVBYZERO
4024 if(excepts & FE_DIVBYZERO)
4025 throw std::domain_error(msg);
4026 #endif
4027 #ifdef HALF_ERRHANDLING_THROW_OVERFLOW
4028 if(excepts & FE_OVERFLOW)
4029 throw std::overflow_error(msg);
4030 #endif
4031 #ifdef HALF_ERRHANDLING_THROW_UNDERFLOW
4032 if(excepts & FE_UNDERFLOW)
4033 throw std::underflow_error(msg);
4034 #endif
4035 #ifdef HALF_ERRHANDLING_THROW_INEXACT
4036 if(excepts & FE_INEXACT)
4037 throw std::range_error(msg);
4038 #endif
4039#else
4040 std::fprintf(stderr, "%s\n", msg);
4041 std::terminate();
4042#endif
4043 }
4044
4045}
4046
4047#undef HALF_UNUSED_NOERR
4048#undef constexpr_NOERR
4049#undef HALF_TWOS_COMPLEMENT_INT
4050#ifdef HALF_POP_WARNINGS
4051 #pragma warning(pop)
4052 #undef HALF_POP_WARNINGS
4053#endif
4054
4055#if defined(__GNUC__)
4056# pragma GCC diagnostic pop
4057#elif defined(__clang__)
4058# pragma clang diagnostic pop
4059#elif defined(_MSC_VER)
4060# pragma warning(pop)
4061#endif
4062
Half-precision floating-point type.
half(T rhs)
Conversion constructor.
friend constexpr bool isgreater(half, half)
Quiet comparison for greater than.
friend constexpr bool isgreaterequal(half, half)
Quiet comparison for greater equal.
friend half cosh(half)
Hyperbolic cosine.
friend long long llrint(half)
Nearest integer using half's internal rounding mode.
friend constexpr half fabs(half)
Absolute value.
friend half erf(half)
Error function.
friend half tgamma(half)
Gamma function.
friend half expm1(half)
Exponential minus one.
friend constexpr half operator+(half)
Identity.
half operator++(int)
Postfix increment.
friend constexpr int fpclassify(half)
Classify floating-point value.
friend constexpr bool islessequal(half, half)
Quiet comparison for less equal.
friend half fdim(half, half)
Positive difference.
friend half fma(half, half, half)
Fused multiply add.
friend half log10(half)
Common logarithm.
half & operator*=(half rhs)
Arithmetic assignment.
friend half log2(half)
Binary logarithm.
half & operator=(const T &rhs)
friend half modf(half, half *)
Extract integer and fractional parts.
friend constexpr half copysign(half, half)
Take sign.
friend half sinh(half)
Hyperbolic sine.
friend half atanh(half)
Hyperbolic area tangent.
friend constexpr bool signbit(half)
Check sign.
friend constexpr_NOERR half fmin(half, half)
Minimum of half expressions.
friend void sincos(half, half *, half *)
Compute sine and cosine simultaneously.
friend half acosh(half)
Hyperbolic area cosine.
friend half atan2(half, half)
Arc tangent function.
friend constexpr bool isfinite(half)
Check if finite number.
half & operator=(const float &rhs)
Assignment operator.
friend half log1p(half)
Natural logarithm plus one.
friend constexpr bool isinf(half)
Check for infinity.
friend half nextafter(half, half)
Next representable value.
friend half round(half)
Nearest integer.
friend half fmod(half, half)
Remainder of division.
friend long lround(half)
Nearest integer.
friend half sin(half)
Sine function.
friend constexpr bool isnan(half)
Check for NaN.
friend half floor(half)
Nearest integer not greater than half value.
half & operator--()
Prefix decrement.
half & operator+=(half rhs)
Arithmetic assignment.
friend half erfc(half)
Complementary error function.
friend half log(half)
Natural logarithm.
friend long long llround(half)
Nearest integer.
friend half acos(half)
Arc cosine function.
friend int ilogb(half)
Extract exponent.
friend constexpr bool isnormal(half)
Check if normal number.
friend half hypot(half, half)
Hypotenuse function.
friend half cos(half)
Cosine function.
friend half exp2(half)
Binary exponential.
friend half asinh(half)
Hyperbolic area sine.
detail::uint16 get_data() const
friend constexpr_NOERR bool operator==(half, half)
Comparison for equality.
friend half logb(half)
Extract exponent.
friend std::basic_ostream< charT, traits > & operator<<(std::basic_ostream< charT, traits > &, half)
Output operator.
friend half remquo(half, half, int *)
Remainder of division.
friend constexpr_NOERR half fmax(half, half)
Maximum of half expressions.
friend half tanh(half)
Hyperbolic tangent.
friend std::basic_istream< charT, traits > & operator>>(std::basic_istream< charT, traits > &, half &)
Input operator.
friend half scalbln(half, long)
Multiply by power of two.
friend half tan(half)
Tangent function.
friend half operator*(half, half)
Multiplication.
friend half operator/(half, half)
Division.
constexpr half() noexcept
Default constructor.
friend half frexp(half, int *)
Decompress floating-point number.
friend half rint(half)
Nearest integer using half's internal rounding mode.
friend half exp(half)
Exponential function.
friend half nearbyint(half)
Nearest integer using half's internal rounding mode.
half & operator-=(half rhs)
Arithmetic assignment.
friend half nexttoward(half, long double)
Next representable value.
friend half trunc(half)
Nearest integer not greater in magnitude than half value.
half & operator/=(half rhs)
Arithmetic assignment.
friend constexpr half operator-(half)
Negation.
friend constexpr bool islessgreater(half, half)
Quiet comarison for less or greater.
half & operator++()
Prefix increment.
friend half atan(half)
Arc tangent function.
friend half pow(half, half)
Power function.
friend half sqrt(half)
Square root.
friend half nanh(const char *)
Get NaN value.
half operator--(int)
Postfix decrement.
friend half ceil(half)
Nearest integer not less than half value.
friend half cbrt(half)
Cubic root.
friend long lrint(half)
Nearest integer using half's internal rounding mode.
friend half asin(half)
Arc sine.
friend half lgamma(half)
Natural logarithm of gamma function.
friend half remainder(half, half)
Remainder of division.
friend constexpr_NOERR std::partial_ordering operator<=>(half, half)
friend constexpr bool isless(half, half)
Quiet comparison for less than.
static constexpr bool is_modulo
Doesn't provide modulo arithmetic.
static constexpr float_round_style round_style
Rounding mode.
static constexpr int max_digits10
Required decimal digits to represent all possible values.
static constexpr bool tinyness_before
Does not support no pre-rounding underflow detection.
static constexpr bool is_integer
Is not an integer type.
static constexpr int radix
Number base.
static constexpr half_float::half lowest() noexcept
Smallest finite value.
static constexpr half_float::half epsilon() noexcept
Difference between 1 and next representable value.
static constexpr int digits
Significant digits.
static constexpr bool traps
Traps only if HALF_ERRHANDLING_THROW_... is acitvated.
static constexpr int digits10
Significant decimal digits.
static constexpr int min_exponent
One more than smallest exponent.
static constexpr bool has_infinity
Supports infinity.
static constexpr bool is_iec559
IEEE conformant.
static constexpr int min_exponent10
Smallest normalized representable power of 10.
static constexpr half_float::half round_error() noexcept
Maximum rounding error in ULP (units in the last place).
static constexpr half_float::half infinity() noexcept
Positive infinity.
static constexpr float_denorm_style has_denorm
Supports subnormal values.
static constexpr bool has_denorm_loss
Supports no denormalization detection.
static constexpr half_float::half max() noexcept
Largest finite value.
static constexpr half_float::half denorm_min() noexcept
Smallest positive subnormal value.
static constexpr bool is_signed
Supports signed values.
static constexpr half_float::half min() noexcept
Smallest positive normal value.
static constexpr half_float::half quiet_NaN() noexcept
Quiet NaN.
static constexpr int max_exponent10
Largest finitely representable power of 10.
static constexpr half_float::half signaling_NaN() noexcept
Signaling NaN.
static constexpr bool has_signaling_NaN
Supports signaling NaNs.
static constexpr bool has_quiet_NaN
Supports quiet NaNs.
static constexpr bool is_bounded
Has a finite set of values.
static constexpr int max_exponent
One more than largest exponent.
static constexpr bool is_exact
Is not exact.
static constexpr bool is_specialized
Is template specialization.
#define FP_ILOGBNAN
#define FP_NAN
#define FP_ZERO
#define constexpr_NOERR
Definition float16_t.hpp:70
#define FP_INFINITE
#define FE_UNDERFLOW
#define FE_INVALID
#define FE_INEXACT
#define FP_ILOGB0
#define HALF_UNUSED_NOERR(name)
Definition float16_t.hpp:63
#define FE_DIVBYZERO
#define FP_NORMAL
#define FE_OVERFLOW
#define FP_SUBNORMAL
#define HALF_ROUND_STYLE
Default rounding mode.
#define HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT
Raise INEXACT exception on underflow.
unsigned int integral(unsigned int value)
Round half-precision number to nearest integer value.
uint32 exp2(uint32 m, unsigned int n=32)
Fixed point binary exponential.
constexpr_NOERR unsigned int underflow(unsigned int sign=0)
Half-precision underflow.
bool builtin_signbit(T arg)
Check sign.
unsigned int hypot_post(uint32 r, int exp)
Hypotenuse square root and postprocessing.
constexpr_NOERR bool compsignal(unsigned int x, unsigned int y)
Check and signal for any NaN.
bool builtin_isnan(T arg)
Check for NaN.
constexpr binary_t binary
Tag for binary construction.
std::uint_fast32_t uint32
Fastest unsigned integer of (at least) 32 bits width.
unsigned int fixed2half(uint32 m, int exp=14, unsigned int sign=0, int s=0)
Convert fixed point to half-precision floating-point.
unsigned int mod(unsigned int x, unsigned int y, int *quo=NULL)
Half precision positive modulus.
uint32 sqrt(uint32 &r, int &exp)
Fixed point square root.
float half2float_impl(unsigned int value, float, true_type)
Convert half-precision to IEEE single-precision.
constexpr_NOERR unsigned int check_underflow(unsigned int arg)
Check value for underflow.
std::pair< uint32, uint32 > sincos(uint32 mz, unsigned int n=31)
Fixed point sine and cosine.
uint32 multiply64(uint32 x, uint32 y)
64-bit multiplication.
int & errflags()
Internal exception flags.
constexpr_NOERR unsigned int invalid()
Raise domain error and return NaN.
T half2int(unsigned int value)
Convert half-precision floating-point to integer.
uint32 mulhi(uint32 x, uint32 y)
upper part of 64-bit multiplication.
uint32 atan2(uint32 my, uint32 mx, unsigned int n=31)
Fixed point arc tangent.
constexpr_NOERR unsigned int signal(unsigned int nan)
Signal and silence signaling NaN.
unsigned int float2half(T value)
Convert floating-point to half-precision.
std::pair< uint32, uint32 > hyperbolic_args(unsigned int abs, int &exp, unsigned int n=32)
Get exponentials for hyperbolic computation.
unsigned int gamma(unsigned int arg)
Gamma function and postprocessing.
constexpr_NOERR unsigned int select(unsigned int x, unsigned int HALF_UNUSED_NOERR(y))
Select value or signaling NaN.
constexpr_NOERR unsigned int overflow(unsigned int sign=0)
Half-precision overflow.
constexpr_NOERR unsigned int pole(unsigned int sign=0)
Raise pole error and return infinity.
uint32 arithmetic_shift(uint32 arg, int i)
Platform-independent arithmetic right shift.
std::pair< uint32, uint32 > atan2_args(unsigned int abs)
Get arguments for atan2 function.
uint32 log2(uint32 m, unsigned int n=32)
Fixed point binary logarithm.
T half2float(unsigned int value)
Convert half-precision to floating-point.
uint32 divide64(uint32 x, uint32 y, int &s)
64-bit division.
void raise(int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond)=true)
Raise floating-point exception.
uint32 angle_arg(unsigned int abs, int &k)
Reduce argument for trigonometric functions.
unsigned int log2_post(uint32 m, int ilog, int exp, unsigned int sign=0)
Postprocessing for binary logarithm.
unsigned int area(unsigned int arg)
Area function and postprocessing.
unsigned int exp2_post(uint32 m, int exp, bool esign, unsigned int sign=0)
Postprocessing for binary exponential.
std::int_fast32_t int32
Fastest signed integer of (at least) 32 bits width.
unsigned int float2half_impl(float value, true_type)
Convert IEEE single-precision to half-precision.
typename bits< T >::type bits_t
uint32 sign_mask(uint32 arg)
Platform-independent sign mask.
std::uint_least16_t uint16
Unsigned integer of (at least) 16 bits width.
bool builtin_isinf(T arg)
Check for infinity.
unsigned int erf(unsigned int arg)
Error function and postprocessing.
constexpr_NOERR unsigned int rounded(unsigned int value, int g, int s)
Round half-precision number.
unsigned int int2half(T value)
Convert integer to half-precision floating-point.
unsigned int tangent_post(uint32 my, uint32 mx, int exp, unsigned int sign=0)
Division and postprocessing for tangents.
Library-defined half-precision literals.
Main namespace for half-precision functionality.
half asinh(half arg)
Hyperbolic area sine.
half sinh(half arg)
Hyperbolic sine.
constexpr_NOERR half fmax(half x, half y)
Maximum of half expressions.
int feclearexcept(int excepts)
Clear exception flags.
half nextafter(half from, half to)
Next representable value.
half atan(half arg)
Arc tangent function.
half hypot(half x, half y)
Hypotenuse function.
half fdim(half x, half y)
Positive difference.
half remquo(half x, half y, int *quo)
Remainder of division.
int fegetexceptflag(int *flagp, int excepts)
Save exception flags.
constexpr bool isfinite(half arg)
Check if finite number.
int ilogb(half arg)
Extract exponent.
half lgamma(half arg)
Natural logarithm of gamma function.
int fesetexceptflag(const int *flagp, int excepts)
Restore exception flags.
half fma(half x, half y, half z)
Fused multiply add.
half nearbyint(half arg)
Nearest integer using half's internal rounding mode.
constexpr half abs(half arg)
Absolute value.
half expm1(half arg)
Exponential minus one.
half ldexp(half arg, int exp)
Multiply by power of two.
half sin(half arg)
Sine function.
half tanh(half arg)
Hyperbolic tangent.
half rint(half arg)
Nearest integer using half's internal rounding mode.
T half_cast(U arg)
Cast to or from half-precision floating-point number.
half fmod(half x, half y)
Remainder of division.
constexpr bool islessgreater(half x, half y)
Quiet comarison for less or greater.
half log(half arg)
Natural logarithm.
half cos(half arg)
Cosine function.
half scalbn(half arg, int exp)
Multiply by power of two.
half exp2(half arg)
Binary exponential.
constexpr bool isless(half x, half y)
Quiet comparison for less than.
half atanh(half arg)
Hyperbolic area tangent.
std::basic_istream< charT, traits > & operator>>(std::basic_istream< charT, traits > &in, half &arg)
Input operator.
long long llround(half arg)
Nearest integer.
half nexttoward(half from, long double to)
Next representable value.
half round(half arg)
Nearest integer.
half log2(half arg)
Binary logarithm.
half asin(half arg)
Arc sine.
half sqrt(half arg)
Square root.
half trunc(half arg)
Nearest integer not greater in magnitude than half value.
half erfc(half arg)
Complementary error function.
half tan(half arg)
Tangent function.
std::basic_ostream< charT, traits > & operator<<(std::basic_ostream< charT, traits > &out, half arg)
Output operator.
half log10(half arg)
Common logarithm.
half floor(half arg)
Nearest integer not greater than half value.
half acosh(half arg)
Hyperbolic area cosine.
constexpr bool isnan(half arg)
Check for NaN.
constexpr half operator-(half arg)
Negation.
half operator*(half x, half y)
Multiplication.
long long llrint(half arg)
Nearest integer using half's internal rounding mode.
half atan2(half y, half x)
Arc tangent function.
int feraiseexcept(int excepts)
Raise exception flags.
half scalbln(half arg, long exp)
Multiply by power of two.
half tgamma(half arg)
Gamma function.
constexpr bool signbit(half arg)
Check sign.
long lrint(half arg)
Nearest integer using half's internal rounding mode.
constexpr bool isinf(half arg)
Check for infinity.
constexpr half copysign(half x, half y)
Take sign.
half cosh(half arg)
Hyperbolic cosine.
half logb(half arg)
Extract exponent.
constexpr bool islessequal(half x, half y)
Quiet comparison for less equal.
half erf(half arg)
Error function.
void sincos(half arg, half *sin, half *cos)
Compute sine and cosine simultaneously.
half ceil(half arg)
Nearest integer not less than half value.
half frexp(half arg, int *exp)
Decompress floating-point number.
constexpr bool isunordered(half x, half y)
Quiet check if unordered.
half log1p(half arg)
Natural logarithm plus one.
constexpr_NOERR half fmin(half x, half y)
Minimum of half expressions.
constexpr int fpclassify(half arg)
Classify floating-point value.
long lround(half arg)
Nearest integer.
half acos(half arg)
Arc cosine function.
constexpr bool isgreater(half x, half y)
Quiet comparison for greater than.
half pow(half x, half y)
Power function.
half nanh(const char *arg)
Get NaN value.
half modf(half arg, half *iptr)
Extract integer and fractional parts.
half cbrt(half arg)
Cubic root.
constexpr bool isgreaterequal(half x, half y)
Quiet comparison for greater equal.
void fethrowexcept(int excepts, const char *msg="")
Throw C++ exceptions based on set exception flags.
int fetestexcept(int excepts)
Test exception flags.
half exp(half arg)
Exponential function.
constexpr_NOERR std::partial_ordering operator<=>(half x, half y)
constexpr bool isnormal(half arg)
Check if normal number.
half remainder(half x, half y)
Remainder of division.
half operator/(half x, half y)
Division.
constexpr half fabs(half arg)
Absolute value.
constexpr half operator+(half arg)
Identity.
constexpr_NOERR bool operator==(half x, half y)
Comparison for equality.
Extensions to the C++ standard library.
Tag type for binary construction.
Type traits for floating-point bits.
Helper for tag dispatching.
Class for 1.31 unsigned floating-point computation.
friend f31 operator-(f31 a, f31 b)
Subtraction operator.
friend f31 operator*(f31 a, f31 b)
Multiplication operator.
friend f31 operator/(f31 a, f31 b)
Division operator.
constexpr f31(uint32 mant, int e)
Constructor.
friend f31 operator+(f31 a, f31 b)
Addition operator.
f31(unsigned int abs)
Constructor.
uint32 m
mantissa as 1.31.
Helper class for half casts.
Type traits for floating-point types.
result_type operator()(argument_type arg) const
Compute hash function.
size_t result_type
Function return type.
half_float::half argument_type
Type of function argument.