sparrow 1.4.0
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
float16_t.hpp
Go to the documentation of this file.
1// half - IEEE 754-based half-precision floating-point library.
2//
3// Copyright (c) 2012-2019 Christian Rau <rauy@users.sourceforge.net>
4// Copyright (c) 2020 0xBYTESHIFT
5//
6// Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation
7// files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy,
8// modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the
9// Software is furnished to do so, subject to the following conditions:
10//
11// The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
12//
13// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE
14// WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
15// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
16// ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
17
20
21#pragma once
22
23// Disable some warnings so that it builds with sparrow build options
24#if defined(__clang__)
25# pragma clang diagnostic push
26# pragma clang diagnostic ignored "-Wconversion"
27# pragma clang diagnostic ignored "-Wsign-conversion"
28# pragma clang diagnostic ignored "-Wold-style-cast"
29# pragma clang diagnostic ignored "-Wdeprecated-declarations"
30# pragma clang diagnostic ignored "-Wshadow"
31#elif defined(__GNUC__)
32# pragma GCC diagnostic push
33# pragma GCC diagnostic ignored "-Wconversion"
34# pragma GCC diagnostic ignored "-Wsign-conversion"
35# pragma GCC diagnostic ignored "-Wold-style-cast"
36#elif defined(_MSC_VER)
37# pragma warning(push)
38# pragma warning(disable : 4127) // conditional expression is constant
39# pragma warning(disable : 4146) // unary minus operator applied to unsigned type, result still unsigned
40# pragma warning(disable : 4365) // 'action' : conversion from 'type_1' to 'type_2', signed/unsigned
41 // mismatch
42# pragma warning(disable : 4514) // 'function' : unreferenced inline function has been removed
43# pragma warning(disable : 4668) // 'symbol' is not defined as a preprocessor macro, replacing with
44 // '0' for 'directives'
45# pragma warning(disable : 4996) // std::float_denorm_style
46
47#endif
48
49
50#define HALF_TWOS_COMPLEMENT_INT 1
51
52// any error throwing C++ exceptions?
53#if defined(HALF_ERRHANDLING_THROW_INVALID) || defined(HALF_ERRHANDLING_THROW_DIVBYZERO) || defined(HALF_ERRHANDLING_THROW_OVERFLOW) || defined(HALF_ERRHANDLING_THROW_UNDERFLOW) || defined(HALF_ERRHANDLING_THROW_INEXACT)
54#define HALF_ERRHANDLING_THROWS 1
55#endif
56
57// any error handling enabled?
58#define HALF_ERRHANDLING (HALF_ERRHANDLING_FLAGS||HALF_ERRHANDLING_ERRNO||HALF_ERRHANDLING_FENV||HALF_ERRHANDLING_THROWS)
59
60#if HALF_ERRHANDLING
61 #define HALF_UNUSED_NOERR(name) name
62#else
63 #define HALF_UNUSED_NOERR(name)
64#endif
65
66// support constexpr
67#if HALF_ERRHANDLING
68 #define constexpr_NOERR
69#else
70 #define constexpr_NOERR constexpr
71#endif
72
73#include <utility>
74#include <algorithm>
75#include <istream>
76#include <ostream>
77#include <limits>
78#include <stdexcept>
79#include <climits>
80#include <cmath>
81#include <cstring>
82#include <cstdlib>
83#include <type_traits>
84#include <cstdint>
85#if HALF_ERRHANDLING_ERRNO
86 #include <cerrno>
87#endif
88#include <cfenv>
89#include <functional>
90
91#if defined(__cpp_lib_format)
92# include <format>
93# include <sstream>
94#endif
95
96#include <concepts>
97
98#ifndef HALF_ENABLE_F16C_INTRINSICS
105 #define HALF_ENABLE_F16C_INTRINSICS __F16C__
106#endif
107
108#if HALF_ENABLE_F16C_INTRINSICS
109 #include <immintrin.h>
110#endif
111
112#ifndef HALF_ERRHANDLING_OVERFLOW_TO_INEXACT
116#define HALF_ERRHANDLING_OVERFLOW_TO_INEXACT 1
117#endif
118
119#ifndef HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT
126#define HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT 1
127#endif
128
147#ifndef HALF_ROUND_STYLE
148 #define HALF_ROUND_STYLE 1 // = std::round_to_nearest
149#endif
150
156#define HUGE_VALH std::numeric_limits<half_float::half>::infinity()
157
163#define FP_FAST_FMAH 1
164
170#define HLF_ROUNDS HALF_ROUND_STYLE
171
172#ifndef FP_ILOGB0
173 #define FP_ILOGB0 INT_MIN
174#endif
175#ifndef FP_ILOGBNAN
176 #define FP_ILOGBNAN INT_MAX
177#endif
178#ifndef FP_SUBNORMAL
179 #define FP_SUBNORMAL 0
180#endif
181#ifndef FP_ZERO
182 #define FP_ZERO 1
183#endif
184#ifndef FP_NAN
185 #define FP_NAN 2
186#endif
187#ifndef FP_INFINITE
188 #define FP_INFINITE 3
189#endif
190#ifndef FP_NORMAL
191 #define FP_NORMAL 4
192#endif
193
194#if !defined(FE_ALL_EXCEPT)
195 #define FE_INVALID 0x10
196 #define FE_DIVBYZERO 0x08
197 #define FE_OVERFLOW 0x04
198 #define FE_UNDERFLOW 0x02
199 #define FE_INEXACT 0x01
200 #define FE_ALL_EXCEPT (FE_INVALID|FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW|FE_INEXACT)
201#endif
202
203#ifdef __EMSCRIPTEN__
204// Emscripten defines FE_ALL_EXCEPT as 0, which causes the fallback above to be skipped,
205// but does not define the individual FE_* macros. So we patch them manually here.
206
207#ifndef FE_INEXACT
208#define FE_INEXACT 0x02
209#endif
210#ifndef FE_INVALID
211#define FE_INVALID 0x04
212#endif
213#ifndef FE_OVERFLOW
214#define FE_OVERFLOW 0x08
215#endif
216#ifndef FE_UNDERFLOW
217#define FE_UNDERFLOW 0x10
218#endif
219#ifndef FE_DIVBYZERO
220#define FE_DIVBYZERO 0x01
221#endif
222#ifndef FE_ALL_EXCEPT
223#define FE_ALL_EXCEPT (FE_INVALID | FE_DIVBYZERO | FE_OVERFLOW | FE_UNDERFLOW | FE_INEXACT)
224#endif
225#endif // __EMSCRIPTEN__
226
227
230namespace half_float {
231 class half;
232
233}
234
235namespace std
236{
237 template <>
238 struct is_floating_point<half_float::half> : std::true_type
239 {};
240
241 // Looks like some compiler have a definition of
242 // is_floating_point_v independent from is_floating_point ...
243 template <>
244 inline constexpr bool is_floating_point_v<half_float::half> = true;
245
246 template <>
247 struct is_scalar<half_float::half> : std::true_type
248 {};
249
250 template <>
251 inline constexpr bool is_scalar_v<half_float::half> = true;
252
253 template <>
254 struct is_signed<half_float::half> : std::true_type
255 {};
256
257 template <>
258 inline constexpr bool is_signed_v<half_float::half> = true;
259}
260
261namespace half_float {
268 namespace literal {
269 half operator ""_h(long double);
270 }
271
274 namespace detail {
276 template<bool B, class T, class F> struct conditional : std::conditional<B,T,F> {};
277
279 template<bool B> struct bool_type : std::integral_constant<bool,B> {};
280 using std::true_type;
281 using std::false_type;
282
284 template<class T> struct is_float : std::is_floating_point<T> {};
285
287 template<class T> struct bits { using type = unsigned char; };
288 template<class T> struct bits<const T> : bits<T> {};
289 template<class T> struct bits<volatile T> : bits<T> {};
290 template<class T> struct bits<const volatile T> : bits<T> {};
291
293 using uint16 = std::uint_least16_t;
294
296 using uint32 = std::uint_fast32_t;
297
299 using int32 = std::int_fast32_t;
300
302 template<> struct bits<float> { using type = std::uint_least32_t; };
303
305 template<> struct bits<double> { using type = std::uint_least64_t; };
306 template<class T> using bits_t = typename bits<T>::type;
307
308 #ifdef HALF_ARITHMETIC_TYPE
310 typedef HALF_ARITHMETIC_TYPE internal_t;
311 #endif
312
314 struct binary_t {};
315
317 constexpr binary_t binary = binary_t();
318
321
327 template<class T> bool builtin_isinf(T arg) { return std::isinf(arg); }
328
334 template<class T> bool builtin_isnan(T arg) { return std::isnan(arg); }
335
341 template<class T> bool builtin_signbit(T arg) { return std::signbit(arg); }
342
347 inline uint32 sign_mask(uint32 arg) {
348 static const int N = std::numeric_limits<uint32>::digits - 1;
349 #if HALF_TWOS_COMPLEMENT_INT
350 return static_cast<int32>(arg) >> N;
351 #else
352 return -((arg>>N)&1);
353 #endif
354 }
355
360 inline uint32 arithmetic_shift(uint32 arg, int i) {
361 #if HALF_TWOS_COMPLEMENT_INT
362 return static_cast<int32>(arg) >> i;
363 #else
364 return static_cast<int32>(arg)/(static_cast<int32>(1)<<i) - ((arg>>(std::numeric_limits<uint32>::digits-1))&1);
365 #endif
366 }
367
371
374 inline int& errflags() { thread_local int flags = 0; return flags; }
375
379 inline void raise(int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond) = true) {
380 #if HALF_ERRHANDLING
381 if(!cond)
382 return;
383 #if HALF_ERRHANDLING_FLAGS
384 errflags() |= flags;
385 #endif
386 #if HALF_ERRHANDLING_ERRNO
387 if(flags & FE_INVALID)
388 errno = EDOM;
389 else if(flags & (FE_DIVBYZERO|FE_OVERFLOW|FE_UNDERFLOW))
390 errno = ERANGE;
391 #endif
392 #if HALF_ERRHANDLING_FENV
393 std::feraiseexcept(flags);
394 #endif
395 #ifdef HALF_ERRHANDLING_THROW_INVALID
396 if(flags & FE_INVALID)
397 throw std::domain_error(HALF_ERRHANDLING_THROW_INVALID);
398 #endif
399 #ifdef HALF_ERRHANDLING_THROW_DIVBYZERO
400 if(flags & FE_DIVBYZERO)
401 throw std::domain_error(HALF_ERRHANDLING_THROW_DIVBYZERO);
402 #endif
403 #ifdef HALF_ERRHANDLING_THROW_OVERFLOW
404 if(flags & FE_OVERFLOW)
405 throw std::overflow_error(HALF_ERRHANDLING_THROW_OVERFLOW);
406 #endif
407 #ifdef HALF_ERRHANDLING_THROW_UNDERFLOW
408 if(flags & FE_UNDERFLOW)
409 throw std::underflow_error(HALF_ERRHANDLING_THROW_UNDERFLOW);
410 #endif
411 #ifdef HALF_ERRHANDLING_THROW_INEXACT
412 if(flags & FE_INEXACT)
413 throw std::range_error(HALF_ERRHANDLING_THROW_INEXACT);
414 #endif
415 #if HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT
416 if((flags & FE_UNDERFLOW) && !(flags & FE_INEXACT))
418 #endif
419 #if HALF_ERRHANDLING_OVERFLOW_TO_INEXACT
420 if((flags & FE_OVERFLOW) && !(flags & FE_INEXACT))
422 #endif
423 #endif
424 }
425
432 inline constexpr_NOERR bool compsignal(unsigned int x, unsigned int y) {
433 #if HALF_ERRHANDLING
434 raise(FE_INVALID, (x&0x7FFF)>0x7C00 || (y&0x7FFF)>0x7C00);
435 #endif
436 return (x&0x7FFF) > 0x7C00 || (y&0x7FFF) > 0x7C00;
437 }
438
443 inline constexpr_NOERR unsigned int signal(unsigned int nan) {
444 #if HALF_ERRHANDLING
445 raise(FE_INVALID, !(nan&0x200));
446 #endif
447 return nan | 0x200;
448 }
449
455 inline constexpr_NOERR unsigned int signal(unsigned int x, unsigned int y) {
456 #if HALF_ERRHANDLING
457 raise(FE_INVALID, ((x&0x7FFF)>0x7C00 && !(x&0x200)) || ((y&0x7FFF)>0x7C00 && !(y&0x200)));
458 #endif
459 return ((x&0x7FFF)>0x7C00) ? (x|0x200) : (y|0x200);
460 }
461
468 inline constexpr_NOERR unsigned int signal(unsigned int x, unsigned int y, unsigned int z) {
469 #if HALF_ERRHANDLING
470 raise(FE_INVALID, ((x&0x7FFF)>0x7C00 && !(x&0x200)) || ((y&0x7FFF)>0x7C00 && !(y&0x200)) || ((z&0x7FFF)>0x7C00 && !(z&0x200)));
471 #endif
472 return ((x&0x7FFF)>0x7C00) ? (x|0x200) : ((y&0x7FFF)>0x7C00) ? (y|0x200) : (z|0x200);
473 }
474
480 inline constexpr_NOERR unsigned int select(unsigned int x, unsigned int HALF_UNUSED_NOERR(y)) {
481 #if HALF_ERRHANDLING
482 return (((y&0x7FFF)>0x7C00) && !(y&0x200)) ? signal(y) : x;
483 #else
484 return x;
485 #endif
486 }
487
491 inline constexpr_NOERR unsigned int invalid() {
492 #if HALF_ERRHANDLING
494 #endif
495 return 0x7FFF;
496 }
497
502 inline constexpr_NOERR unsigned int pole(unsigned int sign = 0) {
503 #if HALF_ERRHANDLING
505 #endif
506 return sign | 0x7C00;
507 }
508
513 inline constexpr_NOERR unsigned int check_underflow(unsigned int arg) {
514 #if HALF_ERRHANDLING && !HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT
515 raise(FE_UNDERFLOW, !(arg&0x7C00));
516 #endif
517 return arg;
518 }
519
523
529 template<std::float_round_style R> constexpr_NOERR unsigned int overflow(unsigned int sign = 0) {
530 #if HALF_ERRHANDLING
532 #endif
533 return (R==std::round_toward_infinity) ? (sign+0x7C00-(sign>>15)) :
534 (R==std::round_toward_neg_infinity) ? (sign+0x7BFF+(sign>>15)) :
535 (R==std::round_toward_zero) ? (sign|0x7BFF) :
536 (sign|0x7C00);
537 }
538
544 template<std::float_round_style R> constexpr_NOERR unsigned int underflow(unsigned int sign = 0) {
545 #if HALF_ERRHANDLING
547 #endif
548 return (R==std::round_toward_infinity) ? (sign+1-(sign>>15)) :
549 (R==std::round_toward_neg_infinity) ? (sign+(sign>>15)) :
550 sign;
551 }
552
563 template<std::float_round_style R,bool I> constexpr_NOERR unsigned int rounded(unsigned int value, int g, int s) {
564 #if HALF_ERRHANDLING
565 value += (R==std::round_to_nearest) ? (g&(s|value)) :
566 (R==std::round_toward_infinity) ? (~(value>>15)&(g|s)) :
567 (R==std::round_toward_neg_infinity) ? ((value>>15)&(g|s)) : 0;
568 if((value&0x7C00) == 0x7C00)
570 else if(value & 0x7C00)
571 raise(FE_INEXACT, I || (g|s)!=0);
572 else
574 return value;
575 #else
576 return (R==std::round_to_nearest) ? (value+(g&(s|value))) :
577 (R==std::round_toward_infinity) ? (value+(~(value>>15)&(g|s))) :
578 (R==std::round_toward_neg_infinity) ? (value+((value>>15)&(g|s))) :
579 value;
580 #endif
581 }
582
591 template<std::float_round_style R,bool E,bool I> unsigned int integral(unsigned int value) {
592 unsigned int abs = value & 0x7FFF;
593 if(abs < 0x3C00) {
594 raise(FE_INEXACT, I);
595 return ((R==std::round_to_nearest) ? (0x3C00&-static_cast<unsigned>(abs>=(0x3800+E))) :
596 (R==std::round_toward_infinity) ? (0x3C00&-(~(value>>15)&(abs!=0))) :
597 (R==std::round_toward_neg_infinity) ? (0x3C00&-static_cast<unsigned>(value>0x8000)) :
598 0) | (value&0x8000);
599 }
600 if(abs >= 0x6400)
601 return (abs>0x7C00) ? signal(value) : value;
602 unsigned int exp = 25 - (abs>>10), mask = (1<<exp) - 1;
603 raise(FE_INEXACT, I && (value&mask));
604 return (( (R==std::round_to_nearest) ? ((1<<(exp-1))-(~(value>>exp)&E)) :
605 (R==std::round_toward_infinity) ? (mask&((value>>15)-1)) :
606 (R==std::round_toward_neg_infinity) ? (mask&-(value>>15)) :
607 0) + value) & ~mask;
608 }
609
624 template<std::float_round_style R,unsigned int F,bool S,bool N,bool I> unsigned int fixed2half(uint32 m, int exp = 14, unsigned int sign = 0, int s = 0) {
625 if(S) {
626 uint32 msign = sign_mask(m);
627 m = (m^msign) - msign;
628 sign = msign & 0x8000;
629 }
630 if(N)
631 for(; m<(static_cast<uint32>(1)<<F) && exp; m<<=1,--exp) ;
632 else if(exp < 0)
633 return rounded<R,I>(sign+(m>>(F-10-exp)), (m>>(F-11-exp))&1, s|((m&((static_cast<uint32>(1)<<(F-11-exp))-1))!=0));
634 return rounded<R,I>(sign+(exp<<10)+(m>>(F-10)), (m>>(F-11))&1, s|((m&((static_cast<uint32>(1)<<(F-11))-1))!=0));
635 }
636
645 template<std::float_round_style R> unsigned int float2half_impl(float value, true_type) {
646 #if HALF_ENABLE_F16C_INTRINSICS
647 return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_set_ss(value),
648 (R==std::round_to_nearest) ? _MM_FROUND_TO_NEAREST_INT :
649 (R==std::round_toward_zero) ? _MM_FROUND_TO_ZERO :
650 (R==std::round_toward_infinity) ? _MM_FROUND_TO_POS_INF :
651 (R==std::round_toward_neg_infinity) ? _MM_FROUND_TO_NEG_INF :
652 _MM_FROUND_CUR_DIRECTION));
653 #else
654 bits_t<float> fbits;
655 std::memcpy(&fbits, &value, sizeof(float));
656 #if 1
657 unsigned int sign = (fbits>>16) & 0x8000;
658 fbits &= 0x7FFFFFFF;
659 if(fbits >= 0x7F800000)
660 return sign | 0x7C00 | ((fbits>0x7F800000) ? (0x200|((fbits>>13)&0x3FF)) : 0);
661 if(fbits >= 0x47800000)
662 return overflow<R>(sign);
663 if(fbits >= 0x38800000)
664 return rounded<R,false>(sign|(((fbits>>23)-112)<<10)|((fbits>>13)&0x3FF), (fbits>>12)&1, (fbits&0xFFF)!=0);
665 if(fbits >= 0x33000000)
666 {
667 int i = 125 - (fbits>>23);
668 fbits = (fbits&0x7FFFFF) | 0x800000;
669 return rounded<R,false>(sign|(fbits>>(i+1)), (fbits>>i)&1, (fbits&((static_cast<uint32>(1)<<i)-1))!=0);
670 }
671 if(fbits != 0)
672 return underflow<R>(sign);
673 return sign;
674 #else
675 static const uint16 base_table[512] = {
676 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
677 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
678 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
679 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
680 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
681 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
682 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0001, 0x0002, 0x0004, 0x0008, 0x0010, 0x0020, 0x0040, 0x0080, 0x0100,
683 0x0200, 0x0400, 0x0800, 0x0C00, 0x1000, 0x1400, 0x1800, 0x1C00, 0x2000, 0x2400, 0x2800, 0x2C00, 0x3000, 0x3400, 0x3800, 0x3C00,
684 0x4000, 0x4400, 0x4800, 0x4C00, 0x5000, 0x5400, 0x5800, 0x5C00, 0x6000, 0x6400, 0x6800, 0x6C00, 0x7000, 0x7400, 0x7800, 0x7BFF,
685 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF,
686 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF,
687 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF,
688 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF,
689 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF,
690 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF,
691 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7BFF, 0x7C00,
692 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
693 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
694 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
695 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
696 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
697 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000,
698 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8000, 0x8001, 0x8002, 0x8004, 0x8008, 0x8010, 0x8020, 0x8040, 0x8080, 0x8100,
699 0x8200, 0x8400, 0x8800, 0x8C00, 0x9000, 0x9400, 0x9800, 0x9C00, 0xA000, 0xA400, 0xA800, 0xAC00, 0xB000, 0xB400, 0xB800, 0xBC00,
700 0xC000, 0xC400, 0xC800, 0xCC00, 0xD000, 0xD400, 0xD800, 0xDC00, 0xE000, 0xE400, 0xE800, 0xEC00, 0xF000, 0xF400, 0xF800, 0xFBFF,
701 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF,
702 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF,
703 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF,
704 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF,
705 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF,
706 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF,
707 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFBFF, 0xFC00 };
708 static const unsigned char shift_table[256] = {
709 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
710 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
711 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,
712 25, 25, 25, 25, 25, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13,
713 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
714 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
715 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24,
716 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 13 };
717 int sexp = fbits >> 23, exp = sexp & 0xFF, i = shift_table[exp];
718 fbits &= 0x7FFFFF;
719 uint32 m = (fbits|((exp!=0)<<23)) & -static_cast<uint32>(exp!=0xFF);
720 return rounded<R,false>(base_table[sexp]+(fbits>>i), (m>>(i-1))&1, (((static_cast<uint32>(1)<<(i-1))-1)&m)!=0);
721 #endif
722 #endif
723 }
724
732 template<std::float_round_style R> unsigned int float2half_impl(double value, true_type) {
733 #if HALF_ENABLE_F16C_INTRINSICS
734 if(R == std::round_indeterminate)
735 return _mm_cvtsi128_si32(_mm_cvtps_ph(_mm_cvtpd_ps(_mm_set_sd(value)), _MM_FROUND_CUR_DIRECTION));
736 #endif
737 bits_t<double> dbits;
738 std::memcpy(&dbits, &value, sizeof(double));
739 uint32 hi = dbits >> 32, lo = dbits & 0xFFFFFFFF;
740 unsigned int sign = (hi>>16) & 0x8000;
741 hi &= 0x7FFFFFFF;
742 if(hi >= 0x7FF00000)
743 return sign | 0x7C00 | ((dbits&0xFFFFFFFFFFFFF) ? (0x200|((hi>>10)&0x3FF)) : 0);
744 if(hi >= 0x40F00000)
745 return overflow<R>(sign);
746 if(hi >= 0x3F100000)
747 return rounded<R,false>(sign|(((hi>>20)-1008)<<10)|((hi>>10)&0x3FF), (hi>>9)&1, ((hi&0x1FF)|lo)!=0);
748 if(hi >= 0x3E600000) {
749 int i = 1018 - (hi>>20);
750 hi = (hi&0xFFFFF) | 0x100000;
751 return rounded<R,false>(sign|(hi>>(i+1)), (hi>>i)&1, ((hi&((static_cast<uint32>(1)<<i)-1))|lo)!=0);
752 }
753 if((hi|lo) != 0)
754 return underflow<R>(sign);
755 return sign;
756 }
757
766 template<std::float_round_style R,class T> unsigned int float2half_impl(T value, ...) {
767 unsigned int hbits = static_cast<unsigned>(builtin_signbit(value)) << 15;
768 if(value == T())
769 return hbits;
770 if(builtin_isnan(value))
771 return hbits | 0x7FFF;
772 if(builtin_isinf(value))
773 return hbits | 0x7C00;
774 int exp;
775 std::frexp(value, &exp);
776 if(exp > 16)
777 return overflow<R>(hbits);
778 if(exp < -13)
779 value = std::ldexp(value, 25);
780 else {
781 value = std::ldexp(value, 12-exp);
782 hbits |= ((exp+13)<<10);
783 }
784 T ival, frac = std::modf(value, &ival);
785 int m = std::abs(static_cast<int>(ival));
786 return rounded<R,false>(hbits+(m>>1), m&1, frac!=T());
787 }
788
797 template<std::float_round_style R,class T> unsigned int float2half(T value) {
798 return float2half_impl<R>(value, bool_type<std::numeric_limits<T>::is_iec559&&sizeof(bits_t<T>)==sizeof(T)>());
799 }
800 template<class T> unsigned int float2half(T value) {
801 return float2half_impl<(std::float_round_style)(HALF_ROUND_STYLE)>(value, bool_type<std::numeric_limits<T>::is_iec559&&sizeof(bits_t<T>)==sizeof(T)>());
802 }
803
811 template<std::float_round_style R,class T> unsigned int int2half(T value) {
812 unsigned int bits = static_cast<unsigned>(value<0) << 15;
813 if(!value)
814 return bits;
815 if(bits)
816 value = -value;
817 if(value > 0xFFFF)
818 return overflow<R>(bits);
819 unsigned int m = static_cast<unsigned int>(value), exp = 24;
820 for(; m<0x400; m<<=1,--exp) ;
821 for(; m>0x7FF; m>>=1,++exp) ;
822 bits |= (exp<<10) + m;
823 return (exp>24) ? rounded<R,false>(bits, (value>>(exp-25))&1, (((1<<(exp-25))-1)&value)!=0) : bits;
824 }
825
830 inline float half2float_impl(unsigned int value, float, true_type) {
831 #if HALF_ENABLE_F16C_INTRINSICS
832 return _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(value)));
833 #else
834 #if 0
835 bits_t<float> fbits = static_cast<bits_t<float>>(value&0x8000) << 16;
836 int abs = value & 0x7FFF;
837 if(abs)
838 {
839 fbits |= 0x38000000 << static_cast<unsigned>(abs>=0x7C00);
840 for(; abs<0x400; abs<<=1,fbits-=0x800000) ;
841 fbits += static_cast<bits_t<float>>(abs) << 13;
842 }
843 #else
844 static const bits_t<float> mantissa_table[2048] = {
845 0x00000000, 0x33800000, 0x34000000, 0x34400000, 0x34800000, 0x34A00000, 0x34C00000, 0x34E00000, 0x35000000, 0x35100000, 0x35200000, 0x35300000, 0x35400000, 0x35500000, 0x35600000, 0x35700000,
846 0x35800000, 0x35880000, 0x35900000, 0x35980000, 0x35A00000, 0x35A80000, 0x35B00000, 0x35B80000, 0x35C00000, 0x35C80000, 0x35D00000, 0x35D80000, 0x35E00000, 0x35E80000, 0x35F00000, 0x35F80000,
847 0x36000000, 0x36040000, 0x36080000, 0x360C0000, 0x36100000, 0x36140000, 0x36180000, 0x361C0000, 0x36200000, 0x36240000, 0x36280000, 0x362C0000, 0x36300000, 0x36340000, 0x36380000, 0x363C0000,
848 0x36400000, 0x36440000, 0x36480000, 0x364C0000, 0x36500000, 0x36540000, 0x36580000, 0x365C0000, 0x36600000, 0x36640000, 0x36680000, 0x366C0000, 0x36700000, 0x36740000, 0x36780000, 0x367C0000,
849 0x36800000, 0x36820000, 0x36840000, 0x36860000, 0x36880000, 0x368A0000, 0x368C0000, 0x368E0000, 0x36900000, 0x36920000, 0x36940000, 0x36960000, 0x36980000, 0x369A0000, 0x369C0000, 0x369E0000,
850 0x36A00000, 0x36A20000, 0x36A40000, 0x36A60000, 0x36A80000, 0x36AA0000, 0x36AC0000, 0x36AE0000, 0x36B00000, 0x36B20000, 0x36B40000, 0x36B60000, 0x36B80000, 0x36BA0000, 0x36BC0000, 0x36BE0000,
851 0x36C00000, 0x36C20000, 0x36C40000, 0x36C60000, 0x36C80000, 0x36CA0000, 0x36CC0000, 0x36CE0000, 0x36D00000, 0x36D20000, 0x36D40000, 0x36D60000, 0x36D80000, 0x36DA0000, 0x36DC0000, 0x36DE0000,
852 0x36E00000, 0x36E20000, 0x36E40000, 0x36E60000, 0x36E80000, 0x36EA0000, 0x36EC0000, 0x36EE0000, 0x36F00000, 0x36F20000, 0x36F40000, 0x36F60000, 0x36F80000, 0x36FA0000, 0x36FC0000, 0x36FE0000,
853 0x37000000, 0x37010000, 0x37020000, 0x37030000, 0x37040000, 0x37050000, 0x37060000, 0x37070000, 0x37080000, 0x37090000, 0x370A0000, 0x370B0000, 0x370C0000, 0x370D0000, 0x370E0000, 0x370F0000,
854 0x37100000, 0x37110000, 0x37120000, 0x37130000, 0x37140000, 0x37150000, 0x37160000, 0x37170000, 0x37180000, 0x37190000, 0x371A0000, 0x371B0000, 0x371C0000, 0x371D0000, 0x371E0000, 0x371F0000,
855 0x37200000, 0x37210000, 0x37220000, 0x37230000, 0x37240000, 0x37250000, 0x37260000, 0x37270000, 0x37280000, 0x37290000, 0x372A0000, 0x372B0000, 0x372C0000, 0x372D0000, 0x372E0000, 0x372F0000,
856 0x37300000, 0x37310000, 0x37320000, 0x37330000, 0x37340000, 0x37350000, 0x37360000, 0x37370000, 0x37380000, 0x37390000, 0x373A0000, 0x373B0000, 0x373C0000, 0x373D0000, 0x373E0000, 0x373F0000,
857 0x37400000, 0x37410000, 0x37420000, 0x37430000, 0x37440000, 0x37450000, 0x37460000, 0x37470000, 0x37480000, 0x37490000, 0x374A0000, 0x374B0000, 0x374C0000, 0x374D0000, 0x374E0000, 0x374F0000,
858 0x37500000, 0x37510000, 0x37520000, 0x37530000, 0x37540000, 0x37550000, 0x37560000, 0x37570000, 0x37580000, 0x37590000, 0x375A0000, 0x375B0000, 0x375C0000, 0x375D0000, 0x375E0000, 0x375F0000,
859 0x37600000, 0x37610000, 0x37620000, 0x37630000, 0x37640000, 0x37650000, 0x37660000, 0x37670000, 0x37680000, 0x37690000, 0x376A0000, 0x376B0000, 0x376C0000, 0x376D0000, 0x376E0000, 0x376F0000,
860 0x37700000, 0x37710000, 0x37720000, 0x37730000, 0x37740000, 0x37750000, 0x37760000, 0x37770000, 0x37780000, 0x37790000, 0x377A0000, 0x377B0000, 0x377C0000, 0x377D0000, 0x377E0000, 0x377F0000,
861 0x37800000, 0x37808000, 0x37810000, 0x37818000, 0x37820000, 0x37828000, 0x37830000, 0x37838000, 0x37840000, 0x37848000, 0x37850000, 0x37858000, 0x37860000, 0x37868000, 0x37870000, 0x37878000,
862 0x37880000, 0x37888000, 0x37890000, 0x37898000, 0x378A0000, 0x378A8000, 0x378B0000, 0x378B8000, 0x378C0000, 0x378C8000, 0x378D0000, 0x378D8000, 0x378E0000, 0x378E8000, 0x378F0000, 0x378F8000,
863 0x37900000, 0x37908000, 0x37910000, 0x37918000, 0x37920000, 0x37928000, 0x37930000, 0x37938000, 0x37940000, 0x37948000, 0x37950000, 0x37958000, 0x37960000, 0x37968000, 0x37970000, 0x37978000,
864 0x37980000, 0x37988000, 0x37990000, 0x37998000, 0x379A0000, 0x379A8000, 0x379B0000, 0x379B8000, 0x379C0000, 0x379C8000, 0x379D0000, 0x379D8000, 0x379E0000, 0x379E8000, 0x379F0000, 0x379F8000,
865 0x37A00000, 0x37A08000, 0x37A10000, 0x37A18000, 0x37A20000, 0x37A28000, 0x37A30000, 0x37A38000, 0x37A40000, 0x37A48000, 0x37A50000, 0x37A58000, 0x37A60000, 0x37A68000, 0x37A70000, 0x37A78000,
866 0x37A80000, 0x37A88000, 0x37A90000, 0x37A98000, 0x37AA0000, 0x37AA8000, 0x37AB0000, 0x37AB8000, 0x37AC0000, 0x37AC8000, 0x37AD0000, 0x37AD8000, 0x37AE0000, 0x37AE8000, 0x37AF0000, 0x37AF8000,
867 0x37B00000, 0x37B08000, 0x37B10000, 0x37B18000, 0x37B20000, 0x37B28000, 0x37B30000, 0x37B38000, 0x37B40000, 0x37B48000, 0x37B50000, 0x37B58000, 0x37B60000, 0x37B68000, 0x37B70000, 0x37B78000,
868 0x37B80000, 0x37B88000, 0x37B90000, 0x37B98000, 0x37BA0000, 0x37BA8000, 0x37BB0000, 0x37BB8000, 0x37BC0000, 0x37BC8000, 0x37BD0000, 0x37BD8000, 0x37BE0000, 0x37BE8000, 0x37BF0000, 0x37BF8000,
869 0x37C00000, 0x37C08000, 0x37C10000, 0x37C18000, 0x37C20000, 0x37C28000, 0x37C30000, 0x37C38000, 0x37C40000, 0x37C48000, 0x37C50000, 0x37C58000, 0x37C60000, 0x37C68000, 0x37C70000, 0x37C78000,
870 0x37C80000, 0x37C88000, 0x37C90000, 0x37C98000, 0x37CA0000, 0x37CA8000, 0x37CB0000, 0x37CB8000, 0x37CC0000, 0x37CC8000, 0x37CD0000, 0x37CD8000, 0x37CE0000, 0x37CE8000, 0x37CF0000, 0x37CF8000,
871 0x37D00000, 0x37D08000, 0x37D10000, 0x37D18000, 0x37D20000, 0x37D28000, 0x37D30000, 0x37D38000, 0x37D40000, 0x37D48000, 0x37D50000, 0x37D58000, 0x37D60000, 0x37D68000, 0x37D70000, 0x37D78000,
872 0x37D80000, 0x37D88000, 0x37D90000, 0x37D98000, 0x37DA0000, 0x37DA8000, 0x37DB0000, 0x37DB8000, 0x37DC0000, 0x37DC8000, 0x37DD0000, 0x37DD8000, 0x37DE0000, 0x37DE8000, 0x37DF0000, 0x37DF8000,
873 0x37E00000, 0x37E08000, 0x37E10000, 0x37E18000, 0x37E20000, 0x37E28000, 0x37E30000, 0x37E38000, 0x37E40000, 0x37E48000, 0x37E50000, 0x37E58000, 0x37E60000, 0x37E68000, 0x37E70000, 0x37E78000,
874 0x37E80000, 0x37E88000, 0x37E90000, 0x37E98000, 0x37EA0000, 0x37EA8000, 0x37EB0000, 0x37EB8000, 0x37EC0000, 0x37EC8000, 0x37ED0000, 0x37ED8000, 0x37EE0000, 0x37EE8000, 0x37EF0000, 0x37EF8000,
875 0x37F00000, 0x37F08000, 0x37F10000, 0x37F18000, 0x37F20000, 0x37F28000, 0x37F30000, 0x37F38000, 0x37F40000, 0x37F48000, 0x37F50000, 0x37F58000, 0x37F60000, 0x37F68000, 0x37F70000, 0x37F78000,
876 0x37F80000, 0x37F88000, 0x37F90000, 0x37F98000, 0x37FA0000, 0x37FA8000, 0x37FB0000, 0x37FB8000, 0x37FC0000, 0x37FC8000, 0x37FD0000, 0x37FD8000, 0x37FE0000, 0x37FE8000, 0x37FF0000, 0x37FF8000,
877 0x38000000, 0x38004000, 0x38008000, 0x3800C000, 0x38010000, 0x38014000, 0x38018000, 0x3801C000, 0x38020000, 0x38024000, 0x38028000, 0x3802C000, 0x38030000, 0x38034000, 0x38038000, 0x3803C000,
878 0x38040000, 0x38044000, 0x38048000, 0x3804C000, 0x38050000, 0x38054000, 0x38058000, 0x3805C000, 0x38060000, 0x38064000, 0x38068000, 0x3806C000, 0x38070000, 0x38074000, 0x38078000, 0x3807C000,
879 0x38080000, 0x38084000, 0x38088000, 0x3808C000, 0x38090000, 0x38094000, 0x38098000, 0x3809C000, 0x380A0000, 0x380A4000, 0x380A8000, 0x380AC000, 0x380B0000, 0x380B4000, 0x380B8000, 0x380BC000,
880 0x380C0000, 0x380C4000, 0x380C8000, 0x380CC000, 0x380D0000, 0x380D4000, 0x380D8000, 0x380DC000, 0x380E0000, 0x380E4000, 0x380E8000, 0x380EC000, 0x380F0000, 0x380F4000, 0x380F8000, 0x380FC000,
881 0x38100000, 0x38104000, 0x38108000, 0x3810C000, 0x38110000, 0x38114000, 0x38118000, 0x3811C000, 0x38120000, 0x38124000, 0x38128000, 0x3812C000, 0x38130000, 0x38134000, 0x38138000, 0x3813C000,
882 0x38140000, 0x38144000, 0x38148000, 0x3814C000, 0x38150000, 0x38154000, 0x38158000, 0x3815C000, 0x38160000, 0x38164000, 0x38168000, 0x3816C000, 0x38170000, 0x38174000, 0x38178000, 0x3817C000,
883 0x38180000, 0x38184000, 0x38188000, 0x3818C000, 0x38190000, 0x38194000, 0x38198000, 0x3819C000, 0x381A0000, 0x381A4000, 0x381A8000, 0x381AC000, 0x381B0000, 0x381B4000, 0x381B8000, 0x381BC000,
884 0x381C0000, 0x381C4000, 0x381C8000, 0x381CC000, 0x381D0000, 0x381D4000, 0x381D8000, 0x381DC000, 0x381E0000, 0x381E4000, 0x381E8000, 0x381EC000, 0x381F0000, 0x381F4000, 0x381F8000, 0x381FC000,
885 0x38200000, 0x38204000, 0x38208000, 0x3820C000, 0x38210000, 0x38214000, 0x38218000, 0x3821C000, 0x38220000, 0x38224000, 0x38228000, 0x3822C000, 0x38230000, 0x38234000, 0x38238000, 0x3823C000,
886 0x38240000, 0x38244000, 0x38248000, 0x3824C000, 0x38250000, 0x38254000, 0x38258000, 0x3825C000, 0x38260000, 0x38264000, 0x38268000, 0x3826C000, 0x38270000, 0x38274000, 0x38278000, 0x3827C000,
887 0x38280000, 0x38284000, 0x38288000, 0x3828C000, 0x38290000, 0x38294000, 0x38298000, 0x3829C000, 0x382A0000, 0x382A4000, 0x382A8000, 0x382AC000, 0x382B0000, 0x382B4000, 0x382B8000, 0x382BC000,
888 0x382C0000, 0x382C4000, 0x382C8000, 0x382CC000, 0x382D0000, 0x382D4000, 0x382D8000, 0x382DC000, 0x382E0000, 0x382E4000, 0x382E8000, 0x382EC000, 0x382F0000, 0x382F4000, 0x382F8000, 0x382FC000,
889 0x38300000, 0x38304000, 0x38308000, 0x3830C000, 0x38310000, 0x38314000, 0x38318000, 0x3831C000, 0x38320000, 0x38324000, 0x38328000, 0x3832C000, 0x38330000, 0x38334000, 0x38338000, 0x3833C000,
890 0x38340000, 0x38344000, 0x38348000, 0x3834C000, 0x38350000, 0x38354000, 0x38358000, 0x3835C000, 0x38360000, 0x38364000, 0x38368000, 0x3836C000, 0x38370000, 0x38374000, 0x38378000, 0x3837C000,
891 0x38380000, 0x38384000, 0x38388000, 0x3838C000, 0x38390000, 0x38394000, 0x38398000, 0x3839C000, 0x383A0000, 0x383A4000, 0x383A8000, 0x383AC000, 0x383B0000, 0x383B4000, 0x383B8000, 0x383BC000,
892 0x383C0000, 0x383C4000, 0x383C8000, 0x383CC000, 0x383D0000, 0x383D4000, 0x383D8000, 0x383DC000, 0x383E0000, 0x383E4000, 0x383E8000, 0x383EC000, 0x383F0000, 0x383F4000, 0x383F8000, 0x383FC000,
893 0x38400000, 0x38404000, 0x38408000, 0x3840C000, 0x38410000, 0x38414000, 0x38418000, 0x3841C000, 0x38420000, 0x38424000, 0x38428000, 0x3842C000, 0x38430000, 0x38434000, 0x38438000, 0x3843C000,
894 0x38440000, 0x38444000, 0x38448000, 0x3844C000, 0x38450000, 0x38454000, 0x38458000, 0x3845C000, 0x38460000, 0x38464000, 0x38468000, 0x3846C000, 0x38470000, 0x38474000, 0x38478000, 0x3847C000,
895 0x38480000, 0x38484000, 0x38488000, 0x3848C000, 0x38490000, 0x38494000, 0x38498000, 0x3849C000, 0x384A0000, 0x384A4000, 0x384A8000, 0x384AC000, 0x384B0000, 0x384B4000, 0x384B8000, 0x384BC000,
896 0x384C0000, 0x384C4000, 0x384C8000, 0x384CC000, 0x384D0000, 0x384D4000, 0x384D8000, 0x384DC000, 0x384E0000, 0x384E4000, 0x384E8000, 0x384EC000, 0x384F0000, 0x384F4000, 0x384F8000, 0x384FC000,
897 0x38500000, 0x38504000, 0x38508000, 0x3850C000, 0x38510000, 0x38514000, 0x38518000, 0x3851C000, 0x38520000, 0x38524000, 0x38528000, 0x3852C000, 0x38530000, 0x38534000, 0x38538000, 0x3853C000,
898 0x38540000, 0x38544000, 0x38548000, 0x3854C000, 0x38550000, 0x38554000, 0x38558000, 0x3855C000, 0x38560000, 0x38564000, 0x38568000, 0x3856C000, 0x38570000, 0x38574000, 0x38578000, 0x3857C000,
899 0x38580000, 0x38584000, 0x38588000, 0x3858C000, 0x38590000, 0x38594000, 0x38598000, 0x3859C000, 0x385A0000, 0x385A4000, 0x385A8000, 0x385AC000, 0x385B0000, 0x385B4000, 0x385B8000, 0x385BC000,
900 0x385C0000, 0x385C4000, 0x385C8000, 0x385CC000, 0x385D0000, 0x385D4000, 0x385D8000, 0x385DC000, 0x385E0000, 0x385E4000, 0x385E8000, 0x385EC000, 0x385F0000, 0x385F4000, 0x385F8000, 0x385FC000,
901 0x38600000, 0x38604000, 0x38608000, 0x3860C000, 0x38610000, 0x38614000, 0x38618000, 0x3861C000, 0x38620000, 0x38624000, 0x38628000, 0x3862C000, 0x38630000, 0x38634000, 0x38638000, 0x3863C000,
902 0x38640000, 0x38644000, 0x38648000, 0x3864C000, 0x38650000, 0x38654000, 0x38658000, 0x3865C000, 0x38660000, 0x38664000, 0x38668000, 0x3866C000, 0x38670000, 0x38674000, 0x38678000, 0x3867C000,
903 0x38680000, 0x38684000, 0x38688000, 0x3868C000, 0x38690000, 0x38694000, 0x38698000, 0x3869C000, 0x386A0000, 0x386A4000, 0x386A8000, 0x386AC000, 0x386B0000, 0x386B4000, 0x386B8000, 0x386BC000,
904 0x386C0000, 0x386C4000, 0x386C8000, 0x386CC000, 0x386D0000, 0x386D4000, 0x386D8000, 0x386DC000, 0x386E0000, 0x386E4000, 0x386E8000, 0x386EC000, 0x386F0000, 0x386F4000, 0x386F8000, 0x386FC000,
905 0x38700000, 0x38704000, 0x38708000, 0x3870C000, 0x38710000, 0x38714000, 0x38718000, 0x3871C000, 0x38720000, 0x38724000, 0x38728000, 0x3872C000, 0x38730000, 0x38734000, 0x38738000, 0x3873C000,
906 0x38740000, 0x38744000, 0x38748000, 0x3874C000, 0x38750000, 0x38754000, 0x38758000, 0x3875C000, 0x38760000, 0x38764000, 0x38768000, 0x3876C000, 0x38770000, 0x38774000, 0x38778000, 0x3877C000,
907 0x38780000, 0x38784000, 0x38788000, 0x3878C000, 0x38790000, 0x38794000, 0x38798000, 0x3879C000, 0x387A0000, 0x387A4000, 0x387A8000, 0x387AC000, 0x387B0000, 0x387B4000, 0x387B8000, 0x387BC000,
908 0x387C0000, 0x387C4000, 0x387C8000, 0x387CC000, 0x387D0000, 0x387D4000, 0x387D8000, 0x387DC000, 0x387E0000, 0x387E4000, 0x387E8000, 0x387EC000, 0x387F0000, 0x387F4000, 0x387F8000, 0x387FC000,
909 0x38000000, 0x38002000, 0x38004000, 0x38006000, 0x38008000, 0x3800A000, 0x3800C000, 0x3800E000, 0x38010000, 0x38012000, 0x38014000, 0x38016000, 0x38018000, 0x3801A000, 0x3801C000, 0x3801E000,
910 0x38020000, 0x38022000, 0x38024000, 0x38026000, 0x38028000, 0x3802A000, 0x3802C000, 0x3802E000, 0x38030000, 0x38032000, 0x38034000, 0x38036000, 0x38038000, 0x3803A000, 0x3803C000, 0x3803E000,
911 0x38040000, 0x38042000, 0x38044000, 0x38046000, 0x38048000, 0x3804A000, 0x3804C000, 0x3804E000, 0x38050000, 0x38052000, 0x38054000, 0x38056000, 0x38058000, 0x3805A000, 0x3805C000, 0x3805E000,
912 0x38060000, 0x38062000, 0x38064000, 0x38066000, 0x38068000, 0x3806A000, 0x3806C000, 0x3806E000, 0x38070000, 0x38072000, 0x38074000, 0x38076000, 0x38078000, 0x3807A000, 0x3807C000, 0x3807E000,
913 0x38080000, 0x38082000, 0x38084000, 0x38086000, 0x38088000, 0x3808A000, 0x3808C000, 0x3808E000, 0x38090000, 0x38092000, 0x38094000, 0x38096000, 0x38098000, 0x3809A000, 0x3809C000, 0x3809E000,
914 0x380A0000, 0x380A2000, 0x380A4000, 0x380A6000, 0x380A8000, 0x380AA000, 0x380AC000, 0x380AE000, 0x380B0000, 0x380B2000, 0x380B4000, 0x380B6000, 0x380B8000, 0x380BA000, 0x380BC000, 0x380BE000,
915 0x380C0000, 0x380C2000, 0x380C4000, 0x380C6000, 0x380C8000, 0x380CA000, 0x380CC000, 0x380CE000, 0x380D0000, 0x380D2000, 0x380D4000, 0x380D6000, 0x380D8000, 0x380DA000, 0x380DC000, 0x380DE000,
916 0x380E0000, 0x380E2000, 0x380E4000, 0x380E6000, 0x380E8000, 0x380EA000, 0x380EC000, 0x380EE000, 0x380F0000, 0x380F2000, 0x380F4000, 0x380F6000, 0x380F8000, 0x380FA000, 0x380FC000, 0x380FE000,
917 0x38100000, 0x38102000, 0x38104000, 0x38106000, 0x38108000, 0x3810A000, 0x3810C000, 0x3810E000, 0x38110000, 0x38112000, 0x38114000, 0x38116000, 0x38118000, 0x3811A000, 0x3811C000, 0x3811E000,
918 0x38120000, 0x38122000, 0x38124000, 0x38126000, 0x38128000, 0x3812A000, 0x3812C000, 0x3812E000, 0x38130000, 0x38132000, 0x38134000, 0x38136000, 0x38138000, 0x3813A000, 0x3813C000, 0x3813E000,
919 0x38140000, 0x38142000, 0x38144000, 0x38146000, 0x38148000, 0x3814A000, 0x3814C000, 0x3814E000, 0x38150000, 0x38152000, 0x38154000, 0x38156000, 0x38158000, 0x3815A000, 0x3815C000, 0x3815E000,
920 0x38160000, 0x38162000, 0x38164000, 0x38166000, 0x38168000, 0x3816A000, 0x3816C000, 0x3816E000, 0x38170000, 0x38172000, 0x38174000, 0x38176000, 0x38178000, 0x3817A000, 0x3817C000, 0x3817E000,
921 0x38180000, 0x38182000, 0x38184000, 0x38186000, 0x38188000, 0x3818A000, 0x3818C000, 0x3818E000, 0x38190000, 0x38192000, 0x38194000, 0x38196000, 0x38198000, 0x3819A000, 0x3819C000, 0x3819E000,
922 0x381A0000, 0x381A2000, 0x381A4000, 0x381A6000, 0x381A8000, 0x381AA000, 0x381AC000, 0x381AE000, 0x381B0000, 0x381B2000, 0x381B4000, 0x381B6000, 0x381B8000, 0x381BA000, 0x381BC000, 0x381BE000,
923 0x381C0000, 0x381C2000, 0x381C4000, 0x381C6000, 0x381C8000, 0x381CA000, 0x381CC000, 0x381CE000, 0x381D0000, 0x381D2000, 0x381D4000, 0x381D6000, 0x381D8000, 0x381DA000, 0x381DC000, 0x381DE000,
924 0x381E0000, 0x381E2000, 0x381E4000, 0x381E6000, 0x381E8000, 0x381EA000, 0x381EC000, 0x381EE000, 0x381F0000, 0x381F2000, 0x381F4000, 0x381F6000, 0x381F8000, 0x381FA000, 0x381FC000, 0x381FE000,
925 0x38200000, 0x38202000, 0x38204000, 0x38206000, 0x38208000, 0x3820A000, 0x3820C000, 0x3820E000, 0x38210000, 0x38212000, 0x38214000, 0x38216000, 0x38218000, 0x3821A000, 0x3821C000, 0x3821E000,
926 0x38220000, 0x38222000, 0x38224000, 0x38226000, 0x38228000, 0x3822A000, 0x3822C000, 0x3822E000, 0x38230000, 0x38232000, 0x38234000, 0x38236000, 0x38238000, 0x3823A000, 0x3823C000, 0x3823E000,
927 0x38240000, 0x38242000, 0x38244000, 0x38246000, 0x38248000, 0x3824A000, 0x3824C000, 0x3824E000, 0x38250000, 0x38252000, 0x38254000, 0x38256000, 0x38258000, 0x3825A000, 0x3825C000, 0x3825E000,
928 0x38260000, 0x38262000, 0x38264000, 0x38266000, 0x38268000, 0x3826A000, 0x3826C000, 0x3826E000, 0x38270000, 0x38272000, 0x38274000, 0x38276000, 0x38278000, 0x3827A000, 0x3827C000, 0x3827E000,
929 0x38280000, 0x38282000, 0x38284000, 0x38286000, 0x38288000, 0x3828A000, 0x3828C000, 0x3828E000, 0x38290000, 0x38292000, 0x38294000, 0x38296000, 0x38298000, 0x3829A000, 0x3829C000, 0x3829E000,
930 0x382A0000, 0x382A2000, 0x382A4000, 0x382A6000, 0x382A8000, 0x382AA000, 0x382AC000, 0x382AE000, 0x382B0000, 0x382B2000, 0x382B4000, 0x382B6000, 0x382B8000, 0x382BA000, 0x382BC000, 0x382BE000,
931 0x382C0000, 0x382C2000, 0x382C4000, 0x382C6000, 0x382C8000, 0x382CA000, 0x382CC000, 0x382CE000, 0x382D0000, 0x382D2000, 0x382D4000, 0x382D6000, 0x382D8000, 0x382DA000, 0x382DC000, 0x382DE000,
932 0x382E0000, 0x382E2000, 0x382E4000, 0x382E6000, 0x382E8000, 0x382EA000, 0x382EC000, 0x382EE000, 0x382F0000, 0x382F2000, 0x382F4000, 0x382F6000, 0x382F8000, 0x382FA000, 0x382FC000, 0x382FE000,
933 0x38300000, 0x38302000, 0x38304000, 0x38306000, 0x38308000, 0x3830A000, 0x3830C000, 0x3830E000, 0x38310000, 0x38312000, 0x38314000, 0x38316000, 0x38318000, 0x3831A000, 0x3831C000, 0x3831E000,
934 0x38320000, 0x38322000, 0x38324000, 0x38326000, 0x38328000, 0x3832A000, 0x3832C000, 0x3832E000, 0x38330000, 0x38332000, 0x38334000, 0x38336000, 0x38338000, 0x3833A000, 0x3833C000, 0x3833E000,
935 0x38340000, 0x38342000, 0x38344000, 0x38346000, 0x38348000, 0x3834A000, 0x3834C000, 0x3834E000, 0x38350000, 0x38352000, 0x38354000, 0x38356000, 0x38358000, 0x3835A000, 0x3835C000, 0x3835E000,
936 0x38360000, 0x38362000, 0x38364000, 0x38366000, 0x38368000, 0x3836A000, 0x3836C000, 0x3836E000, 0x38370000, 0x38372000, 0x38374000, 0x38376000, 0x38378000, 0x3837A000, 0x3837C000, 0x3837E000,
937 0x38380000, 0x38382000, 0x38384000, 0x38386000, 0x38388000, 0x3838A000, 0x3838C000, 0x3838E000, 0x38390000, 0x38392000, 0x38394000, 0x38396000, 0x38398000, 0x3839A000, 0x3839C000, 0x3839E000,
938 0x383A0000, 0x383A2000, 0x383A4000, 0x383A6000, 0x383A8000, 0x383AA000, 0x383AC000, 0x383AE000, 0x383B0000, 0x383B2000, 0x383B4000, 0x383B6000, 0x383B8000, 0x383BA000, 0x383BC000, 0x383BE000,
939 0x383C0000, 0x383C2000, 0x383C4000, 0x383C6000, 0x383C8000, 0x383CA000, 0x383CC000, 0x383CE000, 0x383D0000, 0x383D2000, 0x383D4000, 0x383D6000, 0x383D8000, 0x383DA000, 0x383DC000, 0x383DE000,
940 0x383E0000, 0x383E2000, 0x383E4000, 0x383E6000, 0x383E8000, 0x383EA000, 0x383EC000, 0x383EE000, 0x383F0000, 0x383F2000, 0x383F4000, 0x383F6000, 0x383F8000, 0x383FA000, 0x383FC000, 0x383FE000,
941 0x38400000, 0x38402000, 0x38404000, 0x38406000, 0x38408000, 0x3840A000, 0x3840C000, 0x3840E000, 0x38410000, 0x38412000, 0x38414000, 0x38416000, 0x38418000, 0x3841A000, 0x3841C000, 0x3841E000,
942 0x38420000, 0x38422000, 0x38424000, 0x38426000, 0x38428000, 0x3842A000, 0x3842C000, 0x3842E000, 0x38430000, 0x38432000, 0x38434000, 0x38436000, 0x38438000, 0x3843A000, 0x3843C000, 0x3843E000,
943 0x38440000, 0x38442000, 0x38444000, 0x38446000, 0x38448000, 0x3844A000, 0x3844C000, 0x3844E000, 0x38450000, 0x38452000, 0x38454000, 0x38456000, 0x38458000, 0x3845A000, 0x3845C000, 0x3845E000,
944 0x38460000, 0x38462000, 0x38464000, 0x38466000, 0x38468000, 0x3846A000, 0x3846C000, 0x3846E000, 0x38470000, 0x38472000, 0x38474000, 0x38476000, 0x38478000, 0x3847A000, 0x3847C000, 0x3847E000,
945 0x38480000, 0x38482000, 0x38484000, 0x38486000, 0x38488000, 0x3848A000, 0x3848C000, 0x3848E000, 0x38490000, 0x38492000, 0x38494000, 0x38496000, 0x38498000, 0x3849A000, 0x3849C000, 0x3849E000,
946 0x384A0000, 0x384A2000, 0x384A4000, 0x384A6000, 0x384A8000, 0x384AA000, 0x384AC000, 0x384AE000, 0x384B0000, 0x384B2000, 0x384B4000, 0x384B6000, 0x384B8000, 0x384BA000, 0x384BC000, 0x384BE000,
947 0x384C0000, 0x384C2000, 0x384C4000, 0x384C6000, 0x384C8000, 0x384CA000, 0x384CC000, 0x384CE000, 0x384D0000, 0x384D2000, 0x384D4000, 0x384D6000, 0x384D8000, 0x384DA000, 0x384DC000, 0x384DE000,
948 0x384E0000, 0x384E2000, 0x384E4000, 0x384E6000, 0x384E8000, 0x384EA000, 0x384EC000, 0x384EE000, 0x384F0000, 0x384F2000, 0x384F4000, 0x384F6000, 0x384F8000, 0x384FA000, 0x384FC000, 0x384FE000,
949 0x38500000, 0x38502000, 0x38504000, 0x38506000, 0x38508000, 0x3850A000, 0x3850C000, 0x3850E000, 0x38510000, 0x38512000, 0x38514000, 0x38516000, 0x38518000, 0x3851A000, 0x3851C000, 0x3851E000,
950 0x38520000, 0x38522000, 0x38524000, 0x38526000, 0x38528000, 0x3852A000, 0x3852C000, 0x3852E000, 0x38530000, 0x38532000, 0x38534000, 0x38536000, 0x38538000, 0x3853A000, 0x3853C000, 0x3853E000,
951 0x38540000, 0x38542000, 0x38544000, 0x38546000, 0x38548000, 0x3854A000, 0x3854C000, 0x3854E000, 0x38550000, 0x38552000, 0x38554000, 0x38556000, 0x38558000, 0x3855A000, 0x3855C000, 0x3855E000,
952 0x38560000, 0x38562000, 0x38564000, 0x38566000, 0x38568000, 0x3856A000, 0x3856C000, 0x3856E000, 0x38570000, 0x38572000, 0x38574000, 0x38576000, 0x38578000, 0x3857A000, 0x3857C000, 0x3857E000,
953 0x38580000, 0x38582000, 0x38584000, 0x38586000, 0x38588000, 0x3858A000, 0x3858C000, 0x3858E000, 0x38590000, 0x38592000, 0x38594000, 0x38596000, 0x38598000, 0x3859A000, 0x3859C000, 0x3859E000,
954 0x385A0000, 0x385A2000, 0x385A4000, 0x385A6000, 0x385A8000, 0x385AA000, 0x385AC000, 0x385AE000, 0x385B0000, 0x385B2000, 0x385B4000, 0x385B6000, 0x385B8000, 0x385BA000, 0x385BC000, 0x385BE000,
955 0x385C0000, 0x385C2000, 0x385C4000, 0x385C6000, 0x385C8000, 0x385CA000, 0x385CC000, 0x385CE000, 0x385D0000, 0x385D2000, 0x385D4000, 0x385D6000, 0x385D8000, 0x385DA000, 0x385DC000, 0x385DE000,
956 0x385E0000, 0x385E2000, 0x385E4000, 0x385E6000, 0x385E8000, 0x385EA000, 0x385EC000, 0x385EE000, 0x385F0000, 0x385F2000, 0x385F4000, 0x385F6000, 0x385F8000, 0x385FA000, 0x385FC000, 0x385FE000,
957 0x38600000, 0x38602000, 0x38604000, 0x38606000, 0x38608000, 0x3860A000, 0x3860C000, 0x3860E000, 0x38610000, 0x38612000, 0x38614000, 0x38616000, 0x38618000, 0x3861A000, 0x3861C000, 0x3861E000,
958 0x38620000, 0x38622000, 0x38624000, 0x38626000, 0x38628000, 0x3862A000, 0x3862C000, 0x3862E000, 0x38630000, 0x38632000, 0x38634000, 0x38636000, 0x38638000, 0x3863A000, 0x3863C000, 0x3863E000,
959 0x38640000, 0x38642000, 0x38644000, 0x38646000, 0x38648000, 0x3864A000, 0x3864C000, 0x3864E000, 0x38650000, 0x38652000, 0x38654000, 0x38656000, 0x38658000, 0x3865A000, 0x3865C000, 0x3865E000,
960 0x38660000, 0x38662000, 0x38664000, 0x38666000, 0x38668000, 0x3866A000, 0x3866C000, 0x3866E000, 0x38670000, 0x38672000, 0x38674000, 0x38676000, 0x38678000, 0x3867A000, 0x3867C000, 0x3867E000,
961 0x38680000, 0x38682000, 0x38684000, 0x38686000, 0x38688000, 0x3868A000, 0x3868C000, 0x3868E000, 0x38690000, 0x38692000, 0x38694000, 0x38696000, 0x38698000, 0x3869A000, 0x3869C000, 0x3869E000,
962 0x386A0000, 0x386A2000, 0x386A4000, 0x386A6000, 0x386A8000, 0x386AA000, 0x386AC000, 0x386AE000, 0x386B0000, 0x386B2000, 0x386B4000, 0x386B6000, 0x386B8000, 0x386BA000, 0x386BC000, 0x386BE000,
963 0x386C0000, 0x386C2000, 0x386C4000, 0x386C6000, 0x386C8000, 0x386CA000, 0x386CC000, 0x386CE000, 0x386D0000, 0x386D2000, 0x386D4000, 0x386D6000, 0x386D8000, 0x386DA000, 0x386DC000, 0x386DE000,
964 0x386E0000, 0x386E2000, 0x386E4000, 0x386E6000, 0x386E8000, 0x386EA000, 0x386EC000, 0x386EE000, 0x386F0000, 0x386F2000, 0x386F4000, 0x386F6000, 0x386F8000, 0x386FA000, 0x386FC000, 0x386FE000,
965 0x38700000, 0x38702000, 0x38704000, 0x38706000, 0x38708000, 0x3870A000, 0x3870C000, 0x3870E000, 0x38710000, 0x38712000, 0x38714000, 0x38716000, 0x38718000, 0x3871A000, 0x3871C000, 0x3871E000,
966 0x38720000, 0x38722000, 0x38724000, 0x38726000, 0x38728000, 0x3872A000, 0x3872C000, 0x3872E000, 0x38730000, 0x38732000, 0x38734000, 0x38736000, 0x38738000, 0x3873A000, 0x3873C000, 0x3873E000,
967 0x38740000, 0x38742000, 0x38744000, 0x38746000, 0x38748000, 0x3874A000, 0x3874C000, 0x3874E000, 0x38750000, 0x38752000, 0x38754000, 0x38756000, 0x38758000, 0x3875A000, 0x3875C000, 0x3875E000,
968 0x38760000, 0x38762000, 0x38764000, 0x38766000, 0x38768000, 0x3876A000, 0x3876C000, 0x3876E000, 0x38770000, 0x38772000, 0x38774000, 0x38776000, 0x38778000, 0x3877A000, 0x3877C000, 0x3877E000,
969 0x38780000, 0x38782000, 0x38784000, 0x38786000, 0x38788000, 0x3878A000, 0x3878C000, 0x3878E000, 0x38790000, 0x38792000, 0x38794000, 0x38796000, 0x38798000, 0x3879A000, 0x3879C000, 0x3879E000,
970 0x387A0000, 0x387A2000, 0x387A4000, 0x387A6000, 0x387A8000, 0x387AA000, 0x387AC000, 0x387AE000, 0x387B0000, 0x387B2000, 0x387B4000, 0x387B6000, 0x387B8000, 0x387BA000, 0x387BC000, 0x387BE000,
971 0x387C0000, 0x387C2000, 0x387C4000, 0x387C6000, 0x387C8000, 0x387CA000, 0x387CC000, 0x387CE000, 0x387D0000, 0x387D2000, 0x387D4000, 0x387D6000, 0x387D8000, 0x387DA000, 0x387DC000, 0x387DE000,
972 0x387E0000, 0x387E2000, 0x387E4000, 0x387E6000, 0x387E8000, 0x387EA000, 0x387EC000, 0x387EE000, 0x387F0000, 0x387F2000, 0x387F4000, 0x387F6000, 0x387F8000, 0x387FA000, 0x387FC000, 0x387FE000 };
973 static const bits_t<float> exponent_table[64] = {
974 0x00000000, 0x00800000, 0x01000000, 0x01800000, 0x02000000, 0x02800000, 0x03000000, 0x03800000, 0x04000000, 0x04800000, 0x05000000, 0x05800000, 0x06000000, 0x06800000, 0x07000000, 0x07800000,
975 0x08000000, 0x08800000, 0x09000000, 0x09800000, 0x0A000000, 0x0A800000, 0x0B000000, 0x0B800000, 0x0C000000, 0x0C800000, 0x0D000000, 0x0D800000, 0x0E000000, 0x0E800000, 0x0F000000, 0x47800000,
976 0x80000000, 0x80800000, 0x81000000, 0x81800000, 0x82000000, 0x82800000, 0x83000000, 0x83800000, 0x84000000, 0x84800000, 0x85000000, 0x85800000, 0x86000000, 0x86800000, 0x87000000, 0x87800000,
977 0x88000000, 0x88800000, 0x89000000, 0x89800000, 0x8A000000, 0x8A800000, 0x8B000000, 0x8B800000, 0x8C000000, 0x8C800000, 0x8D000000, 0x8D800000, 0x8E000000, 0x8E800000, 0x8F000000, 0xC7800000 };
978 static const unsigned short offset_table[64] = {
979 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024,
980 0, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 };
981 bits_t<float> fbits = mantissa_table[offset_table[value>>10]+(value&0x3FF)] + exponent_table[value>>10];
982 #endif
983 float out;
984 std::memcpy(&out, &fbits, sizeof(float));
985 return out;
986 #endif
987 }
988
992 inline double half2float_impl(unsigned int value, double, true_type) {
993 #if HALF_ENABLE_F16C_INTRINSICS
994 return _mm_cvtsd_f64(_mm_cvtps_pd(_mm_cvtph_ps(_mm_cvtsi32_si128(value))));
995 #else
996 uint32 hi = static_cast<uint32>(value&0x8000) << 16;
997 unsigned int abs = value & 0x7FFF;
998 if(abs) {
999 hi |= 0x3F000000 << static_cast<unsigned>(abs>=0x7C00);
1000 for(; abs<0x400; abs<<=1,hi-=0x100000) ;
1001 hi += static_cast<uint32>(abs) << 10;
1002 }
1003 bits_t<double> dbits = static_cast<bits_t<double>>(hi) << 32;
1004 double out;
1005 std::memcpy(&out, &dbits, sizeof(double));
1006 return out;
1007 #endif
1008 }
1009
1014 template<class T> T half2float_impl(unsigned int value, T, ...) {
1015 T out;
1016 unsigned int abs = value & 0x7FFF;
1017 if(abs > 0x7C00)
1018 out = (std::numeric_limits<T>::has_signaling_NaN && !(abs&0x200)) ? std::numeric_limits<T>::signaling_NaN() :
1019 std::numeric_limits<T>::has_quiet_NaN ? std::numeric_limits<T>::quiet_NaN() : T();
1020 else if(abs == 0x7C00)
1021 out = std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity() : std::numeric_limits<T>::max();
1022 else if(abs > 0x3FF)
1023 out = std::ldexp(static_cast<T>((abs&0x3FF)|0x400), (abs>>10)-25);
1024 else
1025 out = std::ldexp(static_cast<T>(abs), -24);
1026 return (value&0x8000) ? -out : out;
1027 }
1028
1033 template<class T> T half2float(unsigned int value) {
1034 return half2float_impl(value, T(), bool_type<std::numeric_limits<T>::is_iec559&&sizeof(bits_t<T>)==sizeof(T)>());
1035 }
1036
1046 template<std::float_round_style R,bool E,bool I,class T> T half2int(unsigned int value) {
1047 unsigned int abs = value & 0x7FFF;
1048 if(abs >= 0x7C00) {
1050 return (value&0x8000) ? std::numeric_limits<T>::min() : std::numeric_limits<T>::max();
1051 }
1052 if(abs < 0x3800) {
1053 raise(FE_INEXACT, I);
1054 return (R==std::round_toward_infinity) ? T(~(value>>15)&(abs!=0)) :
1055 (R==std::round_toward_neg_infinity) ? -T(value>0x8000) :
1056 T();
1057 }
1058 int exp = 25 - (abs>>10);
1059 unsigned int m = (value&0x3FF) | 0x400;
1060 int32 i = static_cast<int32>((exp<=0) ? (m<<-exp) : ((m+(
1061 (R==std::round_to_nearest) ? ((1<<(exp-1))-(~(m>>exp)&E)) :
1062 (R==std::round_toward_infinity) ? (((1<<exp)-1)&((value>>15)-1)) :
1063 (R==std::round_toward_neg_infinity) ? (((1<<exp)-1)&-(value>>15)) : 0))>>exp));
1064 if((!std::numeric_limits<T>::is_signed && (value&0x8000)) || (std::numeric_limits<T>::digits<16 &&
1065 ((value&0x8000) ? (-i<std::numeric_limits<T>::min()) : (i>std::numeric_limits<T>::max()))))
1067 else if(I && exp > 0 && (m&((1<<exp)-1)))
1069 return static_cast<T>((value&0x8000) ? -i : i);
1070 }
1071
1075
1081 template<std::float_round_style R> uint32 mulhi(uint32 x, uint32 y) {
1082 uint32 xy = (x>>16) * (y&0xFFFF), yx = (x&0xFFFF) * (y>>16), c = (xy&0xFFFF) + (yx&0xFFFF) + (((x&0xFFFF)*(y&0xFFFF))>>16);
1083 return (x>>16)*(y>>16) + (xy>>16) + (yx>>16) + (c>>16) +
1084 ((R==std::round_to_nearest) ? ((c>>15)&1) : (R==std::round_toward_infinity) ? ((c&0xFFFF)!=0) : 0);
1085 }
1086
1092 return static_cast<uint32>((static_cast<unsigned long long>(x)*static_cast<unsigned long long>(y)+0x80000000)>>32);
1093 }
1094
1100 inline uint32 divide64(uint32 x, uint32 y, int &s) {
1101 unsigned long long xx = static_cast<unsigned long long>(x) << 32;
1102 return s = (xx%y!=0), static_cast<uint32>(xx/y);
1103 }
1104
1112 template<bool Q,bool R> unsigned int mod(unsigned int x, unsigned int y, int *quo = NULL) {
1113 unsigned int q = 0;
1114 if(x > y) {
1115 int absx = x, absy = y, expx = 0, expy = 0;
1116 for(; absx<0x400; absx<<=1,--expx) ;
1117 for(; absy<0x400; absy<<=1,--expy) ;
1118 expx += absx >> 10;
1119 expy += absy >> 10;
1120 int mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400;
1121 for(int d=expx-expy; d; --d) {
1122 if(!Q && mx == my)
1123 return 0;
1124 if(mx >= my) {
1125 mx -= my;
1126 q += Q;
1127 }
1128 mx <<= 1;
1129 q <<= static_cast<int>(Q);
1130 }
1131 if(!Q && mx == my)
1132 return 0;
1133 if(mx >= my) {
1134 mx -= my;
1135 ++q;
1136 }
1137 if(Q) {
1138 q &= (1<<(std::numeric_limits<int>::digits-1)) - 1;
1139 if(!mx)
1140 return *quo = q, 0;
1141 }
1142 for(; mx<0x400; mx<<=1,--expy) ;
1143 x = (expy>0) ? ((expy<<10)|(mx&0x3FF)) : (mx>>(1-expy));
1144 }
1145 if(R) {
1146 unsigned int a, b;
1147 if(y < 0x800) {
1148 a = (x<0x400) ? (x<<1) : (x+0x400);
1149 b = y;
1150 } else {
1151 a = x;
1152 b = y - 0x400;
1153 }
1154 if(a > b || (a == b && (q&1))) {
1155 int exp = (y>>10) + (y<=0x3FF), d = exp - (x>>10) - (x<=0x3FF);
1156 int m = (((y&0x3FF)|((y>0x3FF)<<10))<<1) - (((x&0x3FF)|((x>0x3FF)<<10))<<(1-d));
1157 for(; m<0x800 && exp>1; m<<=1,--exp) ;
1158 x = 0x8000 + ((exp-1)<<10) + (m>>1);
1159 q += Q;
1160 }
1161 }
1162 if(Q)
1163 *quo = q;
1164 return x;
1165 }
1166
1172 template<unsigned int F> uint32 sqrt(uint32 &r, int &exp) {
1173 int i = exp & 1;
1174 r <<= i;
1175 exp = (exp-i) / 2;
1176 uint32 m = 0;
1177 for(uint32 bit=static_cast<uint32>(1)<<F; bit; bit>>=2) {
1178 if(r < m+bit)
1179 m >>= 1;
1180 else {
1181 r -= m + bit;
1182 m = (m>>1) + bit;
1183 }
1184 }
1185 return m;
1186 }
1187
1193 inline uint32 exp2(uint32 m, unsigned int n = 32) {
1194 static const uint32 logs[] = {
1195 0x80000000, 0x4AE00D1D, 0x2934F098, 0x15C01A3A, 0x0B31FB7D, 0x05AEB4DD, 0x02DCF2D1, 0x016FE50B,
1196 0x00B84E23, 0x005C3E10, 0x002E24CA, 0x001713D6, 0x000B8A47, 0x0005C53B, 0x0002E2A3, 0x00017153,
1197 0x0000B8AA, 0x00005C55, 0x00002E2B, 0x00001715, 0x00000B8B, 0x000005C5, 0x000002E3, 0x00000171,
1198 0x000000B9, 0x0000005C, 0x0000002E, 0x00000017, 0x0000000C, 0x00000006, 0x00000003, 0x00000001 };
1199 if(!m)
1200 return 0x80000000;
1201 uint32 mx = 0x80000000, my = 0;
1202 for(unsigned int i=1; i<n; ++i) {
1203 uint32 mz = my + logs[i];
1204 if(mz <= m) {
1205 my = mz;
1206 mx += mx >> i;
1207 }
1208 }
1209 return mx;
1210 }
1211
1217 inline uint32 log2(uint32 m, unsigned int n = 32) {
1218 static const uint32 logs[] = {
1219 0x80000000, 0x4AE00D1D, 0x2934F098, 0x15C01A3A, 0x0B31FB7D, 0x05AEB4DD, 0x02DCF2D1, 0x016FE50B,
1220 0x00B84E23, 0x005C3E10, 0x002E24CA, 0x001713D6, 0x000B8A47, 0x0005C53B, 0x0002E2A3, 0x00017153,
1221 0x0000B8AA, 0x00005C55, 0x00002E2B, 0x00001715, 0x00000B8B, 0x000005C5, 0x000002E3, 0x00000171,
1222 0x000000B9, 0x0000005C, 0x0000002E, 0x00000017, 0x0000000C, 0x00000006, 0x00000003, 0x00000001 };
1223 if(m == 0x40000000)
1224 return 0;
1225 uint32 mx = 0x40000000, my = 0;
1226 for(unsigned int i=1; i<n; ++i) {
1227 uint32 mz = mx + (mx>>i);
1228 if(mz <= m) {
1229 mx = mz;
1230 my += logs[i];
1231 }
1232 }
1233 return my;
1234 }
1235
1241 inline std::pair<uint32,uint32> sincos(uint32 mz, unsigned int n = 31) {
1242 static const uint32 angles[] = {
1243 0x3243F6A9, 0x1DAC6705, 0x0FADBAFD, 0x07F56EA7, 0x03FEAB77, 0x01FFD55C, 0x00FFFAAB, 0x007FFF55,
1244 0x003FFFEB, 0x001FFFFD, 0x00100000, 0x00080000, 0x00040000, 0x00020000, 0x00010000, 0x00008000,
1245 0x00004000, 0x00002000, 0x00001000, 0x00000800, 0x00000400, 0x00000200, 0x00000100, 0x00000080,
1246 0x00000040, 0x00000020, 0x00000010, 0x00000008, 0x00000004, 0x00000002, 0x00000001 };
1247 uint32 mx = 0x26DD3B6A, my = 0;
1248 for(unsigned int i=0; i<n; ++i) {
1249 uint32 sign = sign_mask(mz);
1250 uint32 tx = mx - (arithmetic_shift(my, i)^sign) + sign;
1251 uint32 ty = my + (arithmetic_shift(mx, i)^sign) - sign;
1252 mx = tx; my = ty; mz -= (angles[i]^sign) - sign;
1253 }
1254 return std::make_pair(my, mx);
1255 }
1256
1263 inline uint32 atan2(uint32 my, uint32 mx, unsigned int n = 31) {
1264 static const uint32 angles[] = {
1265 0x3243F6A9, 0x1DAC6705, 0x0FADBAFD, 0x07F56EA7, 0x03FEAB77, 0x01FFD55C, 0x00FFFAAB, 0x007FFF55,
1266 0x003FFFEB, 0x001FFFFD, 0x00100000, 0x00080000, 0x00040000, 0x00020000, 0x00010000, 0x00008000,
1267 0x00004000, 0x00002000, 0x00001000, 0x00000800, 0x00000400, 0x00000200, 0x00000100, 0x00000080,
1268 0x00000040, 0x00000020, 0x00000010, 0x00000008, 0x00000004, 0x00000002, 0x00000001 };
1269 uint32 mz = 0;
1270 for(unsigned int i=0; i<n; ++i) {
1271 uint32 sign = sign_mask(my);
1272 uint32 tx = mx + (arithmetic_shift(my, i)^sign) - sign;
1273 uint32 ty = my - (arithmetic_shift(mx, i)^sign) + sign;
1274 mx = tx; my = ty; mz += (angles[i]^sign) - sign;
1275 }
1276 return mz;
1277 }
1278
1283 inline uint32 angle_arg(unsigned int abs, int &k) {
1284 uint32 m = (abs&0x3FF) | ((abs>0x3FF)<<10);
1285 int exp = (abs>>10) + (abs<=0x3FF) - 15;
1286 if(abs < 0x3A48)
1287 return k = 0, m << (exp+20);
1288 unsigned long long y = m * 0xA2F9836E4E442, mask = (1ULL<<(62-exp)) - 1, yi = (y+(mask>>1)) & ~mask, f = y - yi;
1289 uint32 sign = -static_cast<uint32>(f>>63);
1290 k = static_cast<int>(yi>>(62-exp));
1291 return (multiply64(static_cast<uint32>((sign ? -f : f)>>(31-exp)), 0xC90FDAA2)^sign) - sign;
1292 }
1293
1297 inline std::pair<uint32,uint32> atan2_args(unsigned int abs) {
1298 int exp = -15;
1299 for(; abs<0x400; abs<<=1,--exp) ;
1300 exp += abs >> 10;
1301 uint32 my = ((abs&0x3FF)|0x400) << 5, r = my * my;
1302 int rexp = 2 * exp;
1303 r = 0x40000000 - ((rexp>-31) ? ((r>>-rexp)|((r&((static_cast<uint32>(1)<<-rexp)-1))!=0)) : 1);
1304 for(rexp=0; r<0x40000000; r<<=1,--rexp) ;
1305 uint32 mx = sqrt<30>(r, rexp);
1306 int d = exp - rexp;
1307 if(d < 0)
1308 return std::make_pair((d<-14) ? ((my>>(-d-14))+((my>>(-d-15))&1)) : (my<<(14+d)), (mx<<14)+(r<<13)/mx);
1309 if(d > 0)
1310 return std::make_pair(my<<14, (d>14) ? ((mx>>(d-14))+((mx>>(d-15))&1)) : ((d==14) ? mx : ((mx<<(14-d))+(r<<(13-d))/mx)));
1311 return std::make_pair(my<<13, (mx<<13)+(r<<12)/mx);
1312 }
1313
1319 inline std::pair<uint32,uint32> hyperbolic_args(unsigned int abs, int &exp, unsigned int n = 32) {
1320 uint32 mx = detail::multiply64(static_cast<uint32>((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29), my;
1321 int e = (abs>>10) + (abs<=0x3FF);
1322 if(e < 14) {
1323 exp = 0;
1324 mx >>= 14 - e;
1325 } else {
1326 exp = mx >> (45-e);
1327 mx = (mx<<(e-14)) & 0x7FFFFFFF;
1328 }
1329 mx = exp2(mx, n);
1330 int d = exp << 1, s;
1331 if(mx > 0x80000000) {
1332 my = divide64(0x80000000, mx, s);
1333 my |= s;
1334 ++d;
1335 } else
1336 my = mx;
1337 return std::make_pair(mx, (d<31) ? ((my>>d)|((my&((static_cast<uint32>(1)<<d)-1))!=0)) : 1);
1338 }
1339
1351 template<std::float_round_style R,bool I> unsigned int exp2_post(uint32 m, int exp, bool esign, unsigned int sign = 0) {
1352 int s = 0;
1353 if(esign) {
1354 if(m > 0x80000000) {
1355 m = divide64(0x80000000, m, s);
1356 ++exp;
1357 }
1358 if(exp > 25)
1359 return underflow<R>(sign);
1360 else if(exp == 25)
1361 return rounded<R,I>(sign, 1, (m&0x7FFFFFFF)!=0);
1362 exp = -exp;
1363 } else if(exp > 15)
1364 return overflow<R>(sign);
1365 return fixed2half<R,31,false,false,I>(m, exp+14, sign, s);
1366 }
1367
1379 template<std::float_round_style R,uint32 L> unsigned int log2_post(uint32 m, int ilog, int exp, unsigned int sign = 0) {
1380 uint32 msign = sign_mask(ilog);
1381 m = (((static_cast<uint32>(ilog)<<27)+(m>>4))^msign) - msign;
1382 if(!m)
1383 return 0;
1384 for(; m<0x80000000; m<<=1,--exp) ;
1385 int i = m >= L, s;
1386 exp += i;
1387 m >>= 1 + i;
1388 sign ^= msign & 0x8000;
1389 if(exp < -11)
1390 return underflow<R>(sign);
1391 m = divide64(m, L, s);
1392 return fixed2half<R,30,false,false,true>(m, exp, sign, 1);
1393 }
1394
1403 template<std::float_round_style R> unsigned int hypot_post(uint32 r, int exp) {
1404 int i = r >> 31;
1405 if((exp+=i) > 46)
1406 return overflow<R>();
1407 if(exp < -34)
1408 return underflow<R>();
1409 r = (r>>i) | (r&i);
1410 uint32 m = sqrt<30>(r, exp+=15);
1411 return fixed2half<R,15,false,false,false>(m, exp-1, 0, r!=0);
1412 }
1413
1424 template<std::float_round_style R> unsigned int tangent_post(uint32 my, uint32 mx, int exp, unsigned int sign = 0) {
1425 int i = my >= mx, s;
1426 exp += i;
1427 if(exp > 29)
1428 return overflow<R>(sign);
1429 if(exp < -11)
1430 return underflow<R>(sign);
1431 uint32 m = divide64(my>>(i+1), mx, s);
1432 return fixed2half<R,30,false,false,true>(m, exp, sign, s);
1433 }
1434
1444 template<std::float_round_style R,bool S> unsigned int area(unsigned int arg) {
1445 int abs = arg & 0x7FFF, expx = (abs>>10) + (abs<=0x3FF) - 15, expy = -15, ilog, i;
1446 uint32 mx = static_cast<uint32>((abs&0x3FF)|((abs>0x3FF)<<10)) << 20, my, r;
1447 for(; abs<0x400; abs<<=1,--expy) ;
1448 expy += abs >> 10;
1449 r = ((abs&0x3FF)|0x400) << 5;
1450 r *= r;
1451 i = r >> 31;
1452 expy = 2*expy + i;
1453 r >>= i;
1454 if(S) {
1455 if(expy < 0) {
1456 r = 0x40000000 + ((expy>-30) ? ((r>>-expy)|((r&((static_cast<uint32>(1)<<-expy)-1))!=0)) : 1);
1457 expy = 0;
1458 } else {
1459 r += 0x40000000 >> expy;
1460 i = r >> 31;
1461 r = (r>>i) | (r&i);
1462 expy += i;
1463 }
1464 } else {
1465 r -= 0x40000000 >> expy;
1466 for(; r<0x40000000; r<<=1,--expy) ;
1467 }
1468 my = sqrt<30>(r, expy);
1469 my = (my<<15) + (r<<14)/my;
1470 if(S) {
1471 mx >>= expy - expx;
1472 ilog = expy;
1473 } else {
1474 my >>= expx - expy;
1475 ilog = expx;
1476 }
1477 my += mx;
1478 i = my >> 31;
1479 static const int G = S && (R==std::round_to_nearest);
1480 return log2_post<R,0xB8AA3B2A>(log2(my>>i, 26+S+G)+(G<<3), ilog+i, 17, arg&(static_cast<unsigned>(S)<<15));
1481 }
1482
1484 struct f31 {
1488 constexpr f31(uint32 mant, int e) : m(mant), exp(e) {}
1489
1492 f31(unsigned int abs) : exp(-15) {
1493 for(; abs<0x400; abs<<=1,--exp) ;
1494 m = static_cast<uint32>((abs&0x3FF)|0x400) << 21;
1495 exp += (abs>>10);
1496 }
1497
1502 friend f31 operator+(f31 a, f31 b) {
1503 if(b.exp > a.exp)
1504 std::swap(a, b);
1505 int d = a.exp - b.exp;
1506 uint32 m = a.m + ((d<32) ? (b.m>>d) : 0);
1507 int i = (m&0xFFFFFFFF) < a.m;
1508 return f31(((m+i)>>i)|0x80000000, a.exp+i);
1509 }
1510
1515 friend f31 operator-(f31 a, f31 b) {
1516 int d = a.exp - b.exp, exp = a.exp;
1517 uint32 m = a.m - ((d<32) ? (b.m>>d) : 0);
1518 if(!m)
1519 return f31(0, -32);
1520 for(; m<0x80000000; m<<=1,--exp) ;
1521 return f31(m, exp);
1522 }
1523
1528 friend f31 operator*(f31 a, f31 b) {
1529 uint32 m = multiply64(a.m, b.m);
1530 int i = m >> 31;
1531 return f31(m<<(1-i), a.exp + b.exp + i);
1532 }
1533
1538 friend f31 operator/(f31 a, f31 b) {
1539 int i = a.m >= b.m, s;
1540 uint32 m = divide64((a.m+i)>>i, b.m, s);
1541 return f31(m, a.exp - b.exp + i - 1);
1542 }
1543
1545 int exp;
1546 };
1547
1558 template<std::float_round_style R,bool C> unsigned int erf(unsigned int arg) {
1559 unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
1560 f31 x(abs), x2 = x * x * f31(0xB8AA3B29, 0), t = f31(0x80000000, 0) / (f31(0x80000000, 0)+f31(0xA7BA054A, -2)*x), t2 = t * t;
1561 f31 e = ((f31(0x87DC2213, 0)*t2+f31(0xB5F0E2AE, 0))*t2+f31(0x82790637, -2)-(f31(0xBA00E2B8, 0)*t2+f31(0x91A98E62, -2))*t) * t /
1562 ((x2.exp<0) ? f31(exp2((x2.exp>-32) ? (x2.m>>-x2.exp) : 0, 30), 0) : f31(exp2((x2.m<<x2.exp)&0x7FFFFFFF, 22), x2.m>>(31-x2.exp)));
1563 return (!C || sign) ? fixed2half<R,31,false,true,true>(0x80000000-(e.m>>(C-e.exp)), 14+C, sign&(C-1U)) :
1564 (e.exp<-25) ? underflow<R>() : fixed2half<R,30,false,false,true>(e.m>>1, e.exp+14, 0, e.m&1);
1565 }
1566
1576 template<std::float_round_style R,bool L> unsigned int gamma(unsigned int arg) {
1577/* static const double p[] ={ 2.50662827563479526904, 225.525584619175212544, -268.295973841304927459, 80.9030806934622512966, -5.00757863970517583837, 0.0114684895434781459556 };
1578 double t = arg + 4.65, s = p[0];
1579 for(unsigned int i=0; i<5; ++i)
1580 s += p[i+1] / (arg+i);
1581 return std::log(s) + (arg-0.5)*std::log(t) - t;
1582*/ static const f31 pi(0xC90FDAA2, 1), lbe(0xB8AA3B29, 0);
1583 unsigned int abs = arg & 0x7FFF, sign = arg & 0x8000;
1584 bool bsign = sign != 0;
1585 f31 z(abs), x = sign ? (z+f31(0x80000000, 0)) : z, t = x + f31(0x94CCCCCD, 2), s =
1586 f31(0xA06C9901, 1) + f31(0xBBE654E2, -7)/(x+f31(0x80000000, 2)) + f31(0xA1CE6098, 6)/(x+f31(0x80000000, 1))
1587 + f31(0xE1868CB7, 7)/x - f31(0x8625E279, 8)/(x+f31(0x80000000, 0)) - f31(0xA03E158F, 2)/(x+f31(0xC0000000, 1));
1588 int i = (s.exp>=2) + (s.exp>=4) + (s.exp>=8) + (s.exp>=16);
1589 s = f31((static_cast<uint32>(s.exp)<<(31-i))+(log2(s.m>>1, 28)>>i), i) / lbe;
1590 if(x.exp != -1 || x.m != 0x80000000) {
1591 i = (t.exp>=2) + (t.exp>=4) + (t.exp>=8);
1592 f31 l = f31((static_cast<uint32>(t.exp)<<(31-i))+(log2(t.m>>1, 30)>>i), i) / lbe;
1593 s = (x.exp<-1) ? (s-(f31(0x80000000, -1)-x)*l) : (s+(x-f31(0x80000000, -1))*l);
1594 }
1595 s = x.exp ? (s-t) : (t-s);
1596 if(bsign) {
1597 if(z.exp >= 0) {
1598 sign &= (L|((z.m>>(31-z.exp))&1)) - 1;
1599 for(z=f31((z.m<<(1+z.exp))&0xFFFFFFFF, -1); z.m<0x80000000; z.m<<=1,--z.exp) ;
1600 }
1601 if(z.exp == -1)
1602 z = f31(0x80000000, 0) - z;
1603 if(z.exp < -1) {
1604 z = z * pi;
1605 z.m = sincos(z.m>>(1-z.exp), 30).first;
1606 for(z.exp=1; z.m<0x80000000; z.m<<=1,--z.exp) ;
1607 }
1608 else
1609 z = f31(0x80000000, 0);
1610 } if(L) {
1611 if(bsign) {
1612 f31 l(0x92868247, 0);
1613 if(z.exp < 0) {
1614 uint32 m = log2((z.m+1)>>1, 27);
1615 z = f31(-((static_cast<uint32>(z.exp)<<26)+(m>>5)), 5);
1616 for(; z.m<0x80000000; z.m<<=1,--z.exp) ;
1617 l = l + z / lbe;
1618 }
1619 sign = static_cast<unsigned>(x.exp&&(l.exp<s.exp||(l.exp==s.exp&&l.m<s.m))) << 15;
1620 s = sign ? (s-l) : x.exp ? (l-s) : (l+s);
1621 } else {
1622 sign = static_cast<unsigned>(x.exp==0) << 15;
1623 if(s.exp < -24)
1624 return underflow<R>(sign);
1625 if(s.exp > 15)
1626 return overflow<R>(sign);
1627 }
1628 } else {
1629 s = s * lbe;
1630 uint32 m;
1631 if(s.exp < 0) {
1632 m = s.m >> -s.exp;
1633 s.exp = 0;
1634 } else {
1635 m = (s.m<<s.exp) & 0x7FFFFFFF;
1636 s.exp = (s.m>>(31-s.exp));
1637 }
1638 s.m = exp2(m, 27);
1639 if(!x.exp)
1640 s = f31(0x80000000, 0) / s;
1641 if(bsign) {
1642 if(z.exp < 0)
1643 s = s * z;
1644 s = pi / s;
1645 if(s.exp < -24)
1646 return underflow<R>(sign);
1647 } else if(z.exp > 0 && !(z.m&((1<<(31-z.exp))-1)))
1648 return ((s.exp+14)<<10) + (s.m>>21);
1649 if(s.exp > 15)
1650 return overflow<R>(sign);
1651 }
1652 return fixed2half<R,31,false,false,true>(s.m, s.exp+14, sign);
1653 }
1654
1655
1656 template<class,class,std::float_round_style> struct half_caster;
1657
1658 template <class T>
1659 concept arithmetic = std::integral<T> || std::floating_point<T>;
1660 }
1661
1679 class half {
1680 public:
1683
1687 constexpr half() noexcept : data_() {}
1688
1692 //explicit half(float rhs) : data_(static_cast<detail::uint16>(detail::float2half<round_style>(rhs))) {}
1693
1697 template<detail::arithmetic T>
1698 half(T rhs) : data_(static_cast<detail::uint16>(detail::float2half<round_style>(static_cast<float>(rhs)))) {}
1699
1702 operator float() const { return detail::half2float<float>(data_); }
1703
1708 half& operator=(const float &rhs) { data_ = static_cast<detail::uint16>(detail::float2half<round_style>(rhs)); return *this; }
1709
1710 template<detail::arithmetic T>
1711 half& operator=(const T &rhs) { return *this = static_cast<float>(rhs); }
1712
1716
1722 half& operator+=(half rhs) { return *this = *this + rhs; }
1723
1729 half& operator-=(half rhs) { return *this = *this - rhs; }
1730
1736 half& operator*=(half rhs) { return *this = *this * rhs; }
1737
1743 half& operator/=(half rhs) { return *this = *this / rhs; }
1744
1745 /*
1750 half& operator+=(float rhs) { return *this = *this + rhs; }
1751
1756 half& operator-=(float rhs) { return *this = *this - rhs; }
1757
1762 half& operator*=(float rhs) { return *this = *this * rhs; }
1763
1768 half& operator/=(float rhs) { return *this = *this / rhs; }
1769 */
1770
1774
1778 half& operator++() { return *this = *this + half(detail::binary, 0x3C00); }
1779
1783 half& operator--() { return *this = *this + half(detail::binary, 0xBC00); }
1784
1788 half operator++(int) { half out(*this); ++*this; return out; }
1789
1793 half operator--(int) { half out(*this); --*this; return out; }
1795 detail::uint16 get_data() const{ return data_; }
1796
1797 private:
1799 static const std::float_round_style round_style = (std::float_round_style)(HALF_ROUND_STYLE);
1800
1803 constexpr half(detail::binary_t, unsigned int bits) noexcept : data_(static_cast<detail::uint16>(bits)) {}
1804
1806 detail::uint16 data_;
1807
1808 friend constexpr_NOERR bool operator==(half, half);
1809 template<detail::arithmetic T> friend constexpr_NOERR bool operator==(half, T);
1810 friend constexpr_NOERR std::partial_ordering operator<=>(half, half);
1811 template <detail::arithmetic T> friend constexpr_NOERR std::partial_ordering operator<=>(half, T);
1812 friend constexpr half operator+(half);
1813 friend constexpr half operator-(half);
1814 friend half operator+(half, half);
1815 template<class T> friend half operator+(half, T);
1816 template<class T> friend half operator+(T, half);
1817 friend half operator-(half, half);
1818 template<class T> friend half operator-(half, T);
1819 template<class T> friend half operator-(T, half);
1820 friend half operator*(half, half);
1821 template<class T> friend half operator*(half, T);
1822 template<class T> friend half operator*(T, half);
1823 friend half operator/(half, half);
1824 template<class T> friend half operator/(half, T);
1825 template<class T> friend half operator/(T, half);
1826 template<class charT,class traits> friend std::basic_ostream<charT,traits>& operator<<(std::basic_ostream<charT,traits>&, half);
1827 template<class charT,class traits> friend std::basic_istream<charT,traits>& operator>>(std::basic_istream<charT,traits>&, half&);
1828 friend constexpr half fabs(half);
1829 friend half fmod(half, half);
1830 friend half remainder(half, half);
1831 friend half remquo(half, half, int*);
1832 friend half fma(half, half, half);
1835 friend half fdim(half, half);
1836 friend half nanh(const char*);
1837 friend half exp(half);
1838 friend half exp2(half);
1839 friend half expm1(half);
1840 friend half log(half);
1841 friend half log10(half);
1842 friend half log2(half);
1843 friend half log1p(half);
1844 friend half sqrt(half);
1845 friend half cbrt(half);
1846 friend half hypot(half, half);
1847 friend half hypot(half, half, half);
1848 friend half pow(half, half);
1849 friend void sincos(half, half*, half*);
1850 friend half sin(half);
1851 friend half cos(half);
1852 friend half tan(half);
1853 friend half asin(half);
1854 friend half acos(half);
1855 friend half atan(half);
1856 friend half atan2(half, half);
1857 friend half sinh(half);
1858 friend half cosh(half);
1859 friend half tanh(half);
1860 friend half asinh(half);
1861 friend half acosh(half);
1862 friend half atanh(half);
1863 friend half erf(half);
1864 friend half erfc(half);
1865 friend half lgamma(half);
1866 friend half tgamma(half);
1867 friend half ceil(half);
1868 friend half floor(half);
1869 friend half trunc(half);
1870 friend half round(half);
1871 friend long lround(half);
1872 friend half rint(half);
1873 friend long lrint(half);
1874 friend half nearbyint(half);
1875 friend long long llround(half);
1876 friend long long llrint(half);
1877 friend half frexp(half, int*);
1878 friend half scalbln(half, long);
1879 friend half modf(half, half*);
1880 friend int ilogb(half);
1881 friend half logb(half);
1882 friend half nextafter(half, half);
1883 friend half nexttoward(half, long double);
1884 friend constexpr half copysign(half, half);
1885 friend constexpr int fpclassify(half);
1886 friend constexpr bool isfinite(half);
1887 friend constexpr bool isinf(half);
1888 friend constexpr bool isnan(half);
1889 friend constexpr bool isnormal(half);
1890 friend constexpr bool signbit(half);
1891 friend constexpr bool isgreater(half, half);
1892 friend constexpr bool isgreaterequal(half, half);
1893 friend constexpr bool isless(half, half);
1894 friend constexpr bool islessequal(half, half);
1895 friend constexpr bool islessgreater(half, half);
1896 template<class,class,std::float_round_style> friend struct detail::half_caster;
1897 friend class std::numeric_limits<half>;
1898 friend struct std::hash<half>;
1899 friend half literal::operator ""_h(long double);
1900 };
1901
1902 namespace literal {
1910 inline half operator ""_h(long double value) { return half(detail::binary, detail::float2half<half::round_style>(value)); }
1911 }
1912
1913 namespace detail {
1920 template<class T,class U,std::float_round_style R=(std::float_round_style)(HALF_ROUND_STYLE)> struct half_caster {};
1921 template<class U,std::float_round_style R> struct half_caster<half,U,R> {
1922 static_assert(std::is_arithmetic<U>::value, "half_cast from non-arithmetic type unsupported");
1923 static half cast(U arg) { return cast_impl(arg, is_float<U>()); };
1924 private:
1925 static half cast_impl(U arg, true_type) { return half(binary, float2half<R>(arg)); }
1926 static half cast_impl(U arg, false_type) { return half(binary, int2half<R>(arg)); }
1927 };
1928 template<class T,std::float_round_style R> struct half_caster<T,half,R> {
1929 static_assert(std::is_arithmetic<T>::value, "half_cast to non-arithmetic type unsupported");
1930 static T cast(half arg) { return cast_impl(arg, is_float<T>()); }
1931 private:
1932 static T cast_impl(half arg, true_type) { return half2float<T>(arg.data_); }
1933 static T cast_impl(half arg, false_type) { return half2int<R,true,true,T>(arg.data_); }
1934 };
1935 template<std::float_round_style R> struct half_caster<half,half,R> {
1936 static half cast(half arg) { return arg; }
1937 };
1938 }
1939}
1940
1942namespace std {
1945 template<> class numeric_limits<half_float::half> {
1946 public:
1948 static constexpr bool is_specialized = true;
1949
1951 static constexpr bool is_signed = true;
1952
1954 static constexpr bool is_integer = false;
1955
1957 static constexpr bool is_exact = false;
1958
1960 static constexpr bool is_modulo = false;
1961
1963 static constexpr bool is_bounded = true;
1964
1966 static constexpr bool is_iec559 = true;
1967
1969 static constexpr bool has_infinity = true;
1970
1972 static constexpr bool has_quiet_NaN = true;
1973
1975 static constexpr bool has_signaling_NaN = true;
1976
1977// if C++ version < 23
1978#if __cplusplus < 202300L
1980 static constexpr float_denorm_style has_denorm = denorm_present;
1981#endif
1982
1984 static constexpr bool has_denorm_loss = false;
1985
1986 #if HALF_ERRHANDLING_THROWS
1987 static constexpr bool traps = true;
1988 #else
1990 static constexpr bool traps = false;
1991 #endif
1992
1994 static constexpr bool tinyness_before = false;
1995
1997 static constexpr float_round_style round_style = half_float::half::round_style;
1998
2000 static constexpr int digits = 11;
2001
2003 static constexpr int digits10 = 3;
2004
2006 static constexpr int max_digits10 = 5;
2007
2009 static constexpr int radix = 2;
2010
2012 static constexpr int min_exponent = -13;
2013
2015 static constexpr int min_exponent10 = -4;
2016
2018 static constexpr int max_exponent = 16;
2019
2021 static constexpr int max_exponent10 = 4;
2022
2024 static constexpr half_float::half min() noexcept { return half_float::half(half_float::detail::binary, 0x0400); }
2025
2027 static constexpr half_float::half lowest() noexcept { return half_float::half(half_float::detail::binary, 0xFBFF); }
2028
2030 static constexpr half_float::half max() noexcept { return half_float::half(half_float::detail::binary, 0x7BFF); }
2031
2033 static constexpr half_float::half epsilon() noexcept { return half_float::half(half_float::detail::binary, 0x1400); }
2034
2036 static constexpr half_float::half round_error() noexcept
2037 { return half_float::half(half_float::detail::binary, (round_style==std::round_to_nearest) ? 0x3800 : 0x3C00); }
2038
2040 static constexpr half_float::half infinity() noexcept { return half_float::half(half_float::detail::binary, 0x7C00); }
2041
2043 static constexpr half_float::half quiet_NaN() noexcept { return half_float::half(half_float::detail::binary, 0x7FFF); }
2044
2046 static constexpr half_float::half signaling_NaN() noexcept { return half_float::half(half_float::detail::binary, 0x7DFF); }
2047
2049 static constexpr half_float::half denorm_min() noexcept { return half_float::half(half_float::detail::binary, 0x0001); }
2050 };
2051
2054 template<> struct hash<half_float::half> {
2057
2059 typedef size_t result_type;
2060
2064 result_type operator()(argument_type arg) const { return hash<half_float::detail::uint16>()(arg.data_&-static_cast<unsigned>(arg.data_!=0x8000)); }
2065 };
2066
2067#if defined(__cpp_lib_format)
2068 template <>
2069 struct formatter<half_float::half>
2070 {
2071 constexpr auto parse(std::format_parse_context& ctx)
2072 {
2073 return ctx.begin(); // Simple implementation
2074 }
2075
2076 auto format(const half_float::half& value, std::format_context& ctx) const
2077 {
2078 std::ostringstream oss;
2079 oss << value;
2080 return std::format_to(ctx.out(), "{}", oss.str());
2081 }
2082 };
2083#endif
2084}
2085
2086namespace half_float {
2090
2098 return !detail::compsignal(x.data_, y.data_) && (x.data_==y.data_ || !((x.data_|y.data_)&0x7FFF));
2099 }
2100 template<detail::arithmetic T>
2101 inline constexpr_NOERR bool operator==(half x, T y) { return x == static_cast<half>(y); }
2102
2103 inline constexpr_NOERR std::partial_ordering operator<=>(half x, half y)
2104 {
2105 auto x_data = (x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15);
2106 auto y_data = (y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15);
2107 if (x_data < y_data) return std::partial_ordering::less;
2108 if (x_data > y_data) return std::partial_ordering::greater;
2109 if (x_data == y_data) return std::partial_ordering::equivalent;
2110 return std::partial_ordering::unordered;
2111 }
2112
2113 template <detail::arithmetic T>
2114 inline constexpr_NOERR std::partial_ordering operator<=>(half x, T y)
2115 {
2116 return x <=> half(y);
2117 }
2118
2123
2127 inline constexpr half operator+(half arg) { return arg; }
2128
2132 inline constexpr half operator-(half arg) { return half(detail::binary, arg.data_^0x8000); }
2133
2141 inline half operator+(half x, half y) {
2142 #ifdef HALF_ARITHMETIC_TYPE
2144 #else
2145 int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF;
2146 bool sub = ((x.data_^y.data_)&0x8000) != 0;
2147 if(absx >= 0x7C00 || absy >= 0x7C00)
2148 return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) : (absy!=0x7C00) ? x.data_ :
2149 (sub && absx==0x7C00) ? detail::invalid() : y.data_);
2150 if(!absx)
2151 return absy ? y : half(detail::binary, (half::round_style==std::round_toward_neg_infinity) ? (x.data_|y.data_) : (x.data_&y.data_));
2152 if(!absy)
2153 return x;
2154 unsigned int sign = ((sub && absy>absx) ? y.data_ : x.data_) & 0x8000;
2155 if(absy > absx)
2156 std::swap(absx, absy);
2157 int exp = (absx>>10) + (absx<=0x3FF), d = exp - (absy>>10) - (absy<=0x3FF), mx = ((absx&0x3FF)|((absx>0x3FF)<<10)) << 3, my;
2158 if(d < 13) {
2159 my = ((absy&0x3FF)|((absy>0x3FF)<<10)) << 3;
2160 my = (my>>d) | ((my&((1<<d)-1))!=0);
2161 } else
2162 my = 1;
2163 if(sub) {
2164 if(!(mx-=my))
2165 return half(detail::binary, static_cast<unsigned>(half::round_style==std::round_toward_neg_infinity)<<15);
2166 for(; mx<0x2000 && exp>1; mx<<=1,--exp) ;
2167 } else {
2168 mx += my;
2169 int i = mx >> 14;
2170 if((exp+=i) > 30)
2172 mx = (mx>>i) | (mx&i);
2173 }
2174 return half(detail::binary, detail::rounded<half::round_style,false>(sign+((exp-1)<<10)+(mx>>3), (mx>>2)&1, (mx&0x3)!=0));
2175 #endif
2176 }
2177 template<class T>
2178 inline half operator+(half x, T y) { return x + static_cast<half>(y); }
2179 template<class T>
2180 inline half operator+(T x, half y) { return static_cast<half>(x) + y; }
2181
2189 inline half operator-(half x, half y) {
2190 #ifdef HALF_ARITHMETIC_TYPE
2192 #else
2193 return x + (-y);
2194 #endif
2195 }
2196 template<class T>
2197 inline half operator-(half x, T y) { return x - static_cast<half>(y); }
2198 template<class T>
2199 inline half operator-(T x, half y) { return static_cast<half>(x) - y; }
2200
2208 inline half operator*(half x, half y) {
2209 #ifdef HALF_ARITHMETIC_TYPE
2211 #else
2212 int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = -16;
2213 unsigned int sign = (x.data_^y.data_) & 0x8000;
2214 if(absx >= 0x7C00 || absy >= 0x7C00)
2215 return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
2216 ((absx==0x7C00 && !absy)||(absy==0x7C00 && !absx)) ? detail::invalid() : (sign|0x7C00));
2217 if(!absx || !absy)
2218 return half(detail::binary, sign);
2219 for(; absx<0x400; absx<<=1,--exp) ;
2220 for(; absy<0x400; absy<<=1,--exp) ;
2221 detail::uint32 m = static_cast<detail::uint32>((absx&0x3FF)|0x400) * static_cast<detail::uint32>((absy&0x3FF)|0x400);
2222 int i = m >> 21, s = m & i;
2223 exp += (absx>>10) + (absy>>10) + i;
2224 if(exp > 29)
2226 else if(exp < -11)
2229 #endif
2230 }
2231 template<class T>
2232 inline half operator*(half x, T y) { return x * static_cast<half>(y); }
2233 template<class T>
2234 inline half operator*(T x, half y) { return static_cast<half>(x) * y; }
2235
2244 inline half operator/(half x, half y) {
2245 #ifdef HALF_ARITHMETIC_TYPE
2247 #else
2248 int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = 14;
2249 unsigned int sign = (x.data_^y.data_) & 0x8000;
2250 if(absx >= 0x7C00 || absy >= 0x7C00)
2251 return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
2252 (absx==absy) ? detail::invalid() : (sign|((absx==0x7C00) ? 0x7C00 : 0)));
2253 if(!absx)
2254 return half(detail::binary, absy ? sign : detail::invalid());
2255 if(!absy)
2256 return half(detail::binary, detail::pole(sign));
2257 for(; absx<0x400; absx<<=1,--exp) ;
2258 for(; absy<0x400; absy<<=1,++exp) ;
2259 detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400;
2260 int i = mx < my;
2261 exp += (absx>>10) - (absy>>10) - i;
2262 if(exp > 29)
2264 else if(exp < -11)
2266 mx <<= 12 + i;
2267 my <<= 1;
2269 #endif
2270 }
2271 template<class T>
2272 inline half operator/(half x, T y) { return x / static_cast<half>(y); }
2273 template<class T>
2274 inline half operator/(T x, half y) { return static_cast<half>(x) / y; }
2275
2280
2286 template<class charT,class traits> std::basic_ostream<charT,traits>& operator<<(std::basic_ostream<charT,traits> &out, half arg) {
2287 #ifdef HALF_ARITHMETIC_TYPE
2288 return out << detail::half2float<detail::internal_t>(arg.data_);
2289 #else
2290 return out << detail::half2float<float>(arg.data_);
2291 #endif
2292 }
2293
2303 template<class charT,class traits> std::basic_istream<charT,traits>& operator>>(std::basic_istream<charT,traits> &in, half &arg) {
2304 #ifdef HALF_ARITHMETIC_TYPE
2305 detail::internal_t f;
2306 #else
2307 double f;
2308 #endif
2309 if(in >> f)
2311 return in;
2312 }
2313
2318
2323 inline constexpr half fabs(half arg) { return half(detail::binary, arg.data_&0x7FFF); }
2324
2329 inline constexpr half abs(half arg) { return fabs(arg); }
2330
2337 inline half fmod(half x, half y) {
2338 unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000;
2339 if(absx >= 0x7C00 || absy >= 0x7C00)
2340 return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
2341 (absx==0x7C00) ? detail::invalid() : x.data_);
2342 if(!absy)
2344 if(!absx)
2345 return x;
2346 if(absx == absy)
2347 return half(detail::binary, sign);
2348 return half(detail::binary, sign|detail::mod<false,false>(absx, absy));
2349 }
2350
2357 inline half remainder(half x, half y) {
2358 unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, sign = x.data_ & 0x8000;
2359 if(absx >= 0x7C00 || absy >= 0x7C00)
2360 return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
2361 (absx==0x7C00) ? detail::invalid() : x.data_);
2362 if(!absy)
2364 if(absx == absy)
2365 return half(detail::binary, sign);
2366 return half(detail::binary, sign^detail::mod<false,true>(absx, absy));
2367 }
2368
2376 inline half remquo(half x, half y, int *quo) {
2377 unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, value = x.data_ & 0x8000;
2378 if(absx >= 0x7C00 || absy >= 0x7C00)
2379 return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
2380 (absx==0x7C00) ? detail::invalid() : (*quo = 0, x.data_));
2381 if(!absy)
2383 bool qsign = ((value^y.data_)&0x8000) != 0;
2384 int q = 1;
2385 if(absx != absy)
2386 value ^= detail::mod<true, true>(absx, absy, &q);
2387 return *quo = qsign ? -q : q, half(detail::binary, value);
2388 }
2389
2400 inline half fma(half x, half y, half z) {
2401 #ifdef HALF_ARITHMETIC_TYPE
2403 #if FP_FAST_FMA
2404 return half(detail::binary, detail::float2half<half::round_style>(std::fma(fx, fy, fz)));
2405 #else
2407 #endif
2408 #else
2409 int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, absz = z.data_ & 0x7FFF, exp = -15;
2410 unsigned int sign = (x.data_^y.data_) & 0x8000;
2411 bool sub = ((sign^z.data_)&0x8000) != 0;
2412 if(absx >= 0x7C00 || absy >= 0x7C00 || absz >= 0x7C00)
2413 return (absx>0x7C00 || absy>0x7C00 || absz>0x7C00) ? half(detail::binary, detail::signal(x.data_, y.data_, z.data_)) :
2414 (absx==0x7C00) ? half(detail::binary, (!absy || (sub && absz==0x7C00)) ? detail::invalid() : (sign|0x7C00)) :
2415 (absy==0x7C00) ? half(detail::binary, (!absx || (sub && absz==0x7C00)) ? detail::invalid() : (sign|0x7C00)) : z;
2416 if(!absx || !absy)
2417 return absz ? z : half(detail::binary, (half::round_style==std::round_toward_neg_infinity) ? (z.data_|sign) : (z.data_&sign));
2418 for(; absx<0x400; absx<<=1,--exp) ;
2419 for(; absy<0x400; absy<<=1,--exp) ;
2420 detail::uint32 m = static_cast<detail::uint32>((absx&0x3FF)|0x400) * static_cast<detail::uint32>((absy&0x3FF)|0x400);
2421 int i = m >> 21;
2422 exp += (absx>>10) + (absy>>10) + i;
2423 m <<= 3 - i;
2424 if(absz) {
2425 int expz = 0;
2426 for(; absz<0x400; absz<<=1,--expz) ;
2427 expz += absz >> 10;
2428 detail::uint32 mz = static_cast<detail::uint32>((absz&0x3FF)|0x400) << 13;
2429 if(expz > exp || (expz == exp && mz > m)) {
2430 std::swap(m, mz);
2431 std::swap(exp, expz);
2432 if(sub)
2433 sign = z.data_ & 0x8000;
2434 }
2435 int d = exp - expz;
2436 mz = (d<23) ? ((mz>>d)|((mz&((static_cast<detail::uint32>(1)<<d)-1))!=0)) : 1;
2437 if(sub) {
2438 m = m - mz;
2439 if(!m)
2440 return half(detail::binary, static_cast<unsigned>(half::round_style==std::round_toward_neg_infinity)<<15);
2441 for(; m<0x800000; m<<=1,--exp) ;
2442 } else {
2443 m += mz;
2444 i = m >> 24;
2445 m = (m>>i) | (m&i);
2446 exp += i;
2447 }
2448 }
2449 if(exp > 30)
2451 else if(exp < -10)
2454 #endif
2455 }
2456
2464 return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) <
2465 (y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_));
2466 }
2467
2475 return half(detail::binary, (!isnan(y) && (isnan(x) || (x.data_^(0x8000|(0x8000-(x.data_>>15)))) >
2476 (y.data_^(0x8000|(0x8000-(y.data_>>15)))))) ? detail::select(y.data_, x.data_) : detail::select(x.data_, y.data_));
2477 }
2478
2487 inline half fdim(half x, half y) {
2488 if(isnan(x) || isnan(y))
2489 return half(detail::binary, detail::signal(x.data_, y.data_));
2490 return (x.data_^(0x8000|(0x8000-(x.data_>>15)))) <= (y.data_^(0x8000|(0x8000-(y.data_>>15)))) ? half(detail::binary, 0) : (x-y);
2491 }
2492
2497 inline half nanh(const char *arg) {
2498 unsigned int value = 0x7FFF;
2499 while(*arg)
2500 value ^= static_cast<unsigned>(*arg++) & 0xFF;
2501 return half(detail::binary, value);
2502 }
2503
2508
2517 inline half exp(half arg) {
2518 #ifdef HALF_ARITHMETIC_TYPE
2520 #else
2521 int abs = arg.data_ & 0x7FFF;
2522 if(!abs)
2523 return half(detail::binary, 0x3C00);
2524 if(abs >= 0x7C00)
2525 return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_));
2526 if(abs >= 0x4C80)
2528 detail::uint32 m = detail::multiply64(static_cast<detail::uint32>((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29);
2529 int e = (abs>>10) + (abs<=0x3FF), exp;
2530 if(e < 14) {
2531 exp = 0;
2532 m >>= 14 - e;
2533 } else {
2534 exp = m >> (45-e);
2535 m = (m<<(e-14)) & 0x7FFFFFFF;
2536 }
2537 return half(detail::binary, detail::exp2_post<half::round_style,true>(detail::exp2(m, 26), exp, (arg.data_&0x8000)!=0));
2538 #endif
2539 }
2540
2549 inline half exp2(half arg) {
2550 #if defined(HALF_ARITHMETIC_TYPE)
2552 #else
2553 int abs = arg.data_ & 0x7FFF;
2554 if(!abs)
2555 return half(detail::binary, 0x3C00);
2556 if(abs >= 0x7C00)
2557 return half(detail::binary, (abs==0x7C00) ? (0x7C00&((arg.data_>>15)-1U)) : detail::signal(arg.data_));
2558 if(abs >= 0x4E40)
2560 int e = (abs>>10) + (abs<=0x3FF), exp = (abs&0x3FF) + ((abs>0x3FF)<<10);
2561 detail::uint32 m = detail::exp2((static_cast<detail::uint32>(exp)<<(6+e))&0x7FFFFFFF, 28);
2562 exp >>= 25 - e;
2563 if(m == 0x80000000) {
2564 if(arg.data_&0x8000)
2565 exp = -exp;
2566 else if(exp > 15)
2569 }
2570 return half(detail::binary, detail::exp2_post<half::round_style,true>(m, exp, (arg.data_&0x8000)!=0));
2571 #endif
2572 }
2573
2583 inline half expm1(half arg) {
2584 #if defined(HALF_ARITHMETIC_TYPE)
2586 #else
2587 unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
2588 if(!abs)
2589 return arg;
2590 if(abs >= 0x7C00)
2591 return half(detail::binary, (abs==0x7C00) ? (0x7C00+(sign>>1)) : detail::signal(arg.data_));
2592 if(abs >= 0x4A00)
2594 detail::uint32 m = detail::multiply64(static_cast<detail::uint32>((abs&0x3FF)+((abs>0x3FF)<<10))<<21, 0xB8AA3B29);
2595 int e = (abs>>10) + (abs<=0x3FF), exp;
2596 if(e < 14) {
2597 exp = 0;
2598 m >>= 14 - e;
2599 } else {
2600 exp = m >> (45-e);
2601 m = (m<<(e-14)) & 0x7FFFFFFF;
2602 }
2603 m = detail::exp2(m);
2604 if(sign) {
2605 int s = 0;
2606 if(m > 0x80000000) {
2607 ++exp;
2608 m = detail::divide64(0x80000000, m, s);
2609 }
2610 m = 0x80000000 - ((m>>exp)|((m&((static_cast<detail::uint32>(1)<<exp)-1))!=0)|s);
2611 exp = 0;
2612 } else
2613 m -= (exp<31) ? (0x80000000>>exp) : 1;
2614 for(exp+=14; m<0x80000000 && exp; m<<=1,--exp) ;
2615 if(exp > 29)
2617 return half(detail::binary, detail::rounded<half::round_style,true>(sign+(exp<<10)+(m>>21), (m>>20)&1, (m&0xFFFFF)!=0));
2618 #endif
2619 }
2620
2630 inline half log(half arg) {
2631 #ifdef HALF_ARITHMETIC_TYPE
2633 #else
2634 int abs = arg.data_ & 0x7FFF, exp = -15;
2635 if(!abs)
2636 return half(detail::binary, detail::pole(0x8000));
2637 if(arg.data_ & 0x8000)
2638 return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_));
2639 if(abs >= 0x7C00)
2640 return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_));
2641 for(; abs<0x400; abs<<=1,--exp) ;
2642 exp += abs >> 10;
2644 detail::log2(static_cast<detail::uint32>((abs&0x3FF)|0x400)<<20, 27)+8, exp, 17));
2645 #endif
2646 }
2647
2657 inline half log10(half arg) {
2658 #ifdef HALF_ARITHMETIC_TYPE
2660 #else
2661 int abs = arg.data_ & 0x7FFF, exp = -15;
2662 if(!abs)
2663 return half(detail::binary, detail::pole(0x8000));
2664 if(arg.data_ & 0x8000)
2665 return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_));
2666 if(abs >= 0x7C00)
2667 return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_));
2668 switch(abs) {
2669 case 0x4900: return half(detail::binary, 0x3C00);
2670 case 0x5640: return half(detail::binary, 0x4000);
2671 case 0x63D0: return half(detail::binary, 0x4200);
2672 case 0x70E2: return half(detail::binary, 0x4400);
2673 }
2674 for(; abs<0x400; abs<<=1,--exp) ;
2675 exp += abs >> 10;
2677 detail::log2(static_cast<detail::uint32>((abs&0x3FF)|0x400)<<20, 27)+8, exp, 16));
2678 #endif
2679 }
2680
2690 inline half log2(half arg) {
2691 #if defined(HALF_ARITHMETIC_TYPE)
2693 #else
2694 int abs = arg.data_ & 0x7FFF, exp = -15, s = 0;
2695 if(!abs)
2696 return half(detail::binary, detail::pole(0x8000));
2697 if(arg.data_ & 0x8000)
2698 return half(detail::binary, (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_));
2699 if(abs >= 0x7C00)
2700 return (abs==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_));
2701 if(abs == 0x3C00)
2702 return half(detail::binary, 0);
2703 for(; abs<0x400; abs<<=1,--exp) ;
2704 exp += (abs>>10);
2705 if(!(abs&0x3FF)) {
2706 unsigned int value = static_cast<unsigned>(exp<0) << 15, m = std::abs(exp) << 6;
2707 for(exp=18; m<0x400; m<<=1,--exp) ;
2708 return half(detail::binary, value+(exp<<10)+m);
2709 }
2710 detail::uint32 ilog = exp, sign = detail::sign_mask(ilog), m =
2711 (((ilog<<27)+(detail::log2(static_cast<detail::uint32>((abs&0x3FF)|0x400)<<20, 28)>>4))^sign) - sign;
2712 if(!m)
2713 return half(detail::binary, 0);
2714 for(exp=14; m<0x8000000 && exp; m<<=1,--exp) ;
2715 for(; m>0xFFFFFFF; m>>=1,++exp)
2716 s |= m & 1;
2718 #endif
2719 }
2720
2731 inline half log1p(half arg) {
2732 #if defined(HALF_ARITHMETIC_TYPE)
2734 #else
2735 if(arg.data_ >= 0xBC00)
2736 return half(detail::binary, (arg.data_==0xBC00) ? detail::pole(0x8000) : (arg.data_<=0xFC00) ? detail::invalid() : detail::signal(arg.data_));
2737 int abs = arg.data_ & 0x7FFF, exp = -15;
2738 if(!abs || abs >= 0x7C00)
2739 return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
2740 for(; abs<0x400; abs<<=1,--exp) ;
2741 exp += abs >> 10;
2742 detail::uint32 m = static_cast<detail::uint32>((abs&0x3FF)|0x400) << 20;
2743 if(arg.data_ & 0x8000) {
2744 m = 0x40000000 - (m>>-exp);
2745 for(exp=0; m<0x40000000; m<<=1,--exp) ;
2746 } else {
2747 if(exp < 0) {
2748 m = 0x40000000 + (m>>-exp);
2749 exp = 0;
2750 } else {
2751 m += 0x40000000 >> exp;
2752 int i = m >> 31;
2753 m >>= i;
2754 exp += i;
2755 }
2756 }
2758 #endif
2759 }
2760
2765
2774 inline half sqrt(half arg) {
2775 #ifdef HALF_ARITHMETIC_TYPE
2777 #else
2778 int abs = arg.data_ & 0x7FFF, exp = 15;
2779 if(!abs || arg.data_ >= 0x7C00)
2780 return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_>0x8000) ? detail::invalid() : arg.data_);
2781 for(; abs<0x400; abs<<=1,--exp) ;
2782 detail::uint32 r = static_cast<detail::uint32>((abs&0x3FF)|0x400) << 10, m = detail::sqrt<20>(r, exp+=abs>>10);
2783 return half(detail::binary, detail::rounded<half::round_style,false>((exp<<10)+(m&0x3FF), r>m, r!=0));
2784 #endif
2785 }
2786
2795 inline half cbrt(half arg) {
2796 #if defined(HALF_ARITHMETIC_TYPE)
2798 #else
2799 int abs = arg.data_ & 0x7FFF, exp = -15;
2800 if(!abs || abs == 0x3C00 || abs >= 0x7C00)
2801 return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
2802 for(; abs<0x400; abs<<=1, --exp);
2803 detail::uint32 ilog = exp + (abs>>10), sign = detail::sign_mask(ilog), f, m =
2804 (((ilog<<27)+(detail::log2(static_cast<detail::uint32>((abs&0x3FF)|0x400)<<20, 24)>>4))^sign) - sign;
2805 for(exp=2; m<0x80000000; m<<=1,--exp) ;
2806 m = detail::multiply64(m, 0xAAAAAAAB);
2807 int i = m >> 31, s;
2808 exp += i;
2809 m <<= 1 - i;
2810 if(exp < 0) {
2811 f = m >> -exp;
2812 exp = 0;
2813 } else {
2814 f = (m<<exp) & 0x7FFFFFFF;
2815 exp = m >> (31-exp);
2816 }
2817 m = detail::exp2(f, (half::round_style==std::round_to_nearest) ? 29 : 26);
2818 if(sign) {
2819 if(m > 0x80000000) {
2820 m = detail::divide64(0x80000000, m, s);
2821 ++exp;
2822 }
2823 exp = -exp;
2824 }
2825 return half(detail::binary, (half::round_style==std::round_to_nearest) ?
2827 detail::fixed2half<half::round_style,23,false,false,false>((m+0x80)>>8, exp+14, arg.data_&0x8000));
2828 #endif
2829 }
2830
2840 inline half hypot(half x, half y) {
2841 #ifdef HALF_ARITHMETIC_TYPE
2842 detail::internal_t fx = detail::half2float<detail::internal_t>(x.data_), fy = detail::half2float<detail::internal_t>(y.data_);
2843 return half(detail::binary, detail::float2half<half::round_style>(std::hypot(fx, fy)));
2844 #else
2845 int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, expx = 0, expy = 0;
2846 if(absx >= 0x7C00 || absy >= 0x7C00)
2847 return half(detail::binary, (absx==0x7C00) ? detail::select(0x7C00, y.data_) :
2848 (absy==0x7C00) ? detail::select(0x7C00, x.data_) : detail::signal(x.data_, y.data_));
2849 if(!absx)
2850 return half(detail::binary, absy ? detail::check_underflow(absy) : 0);
2851 if(!absy)
2853 if(absy > absx)
2854 std::swap(absx, absy);
2855 for(; absx<0x400; absx<<=1,--expx) ;
2856 for(; absy<0x400; absy<<=1,--expy) ;
2857 detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400;
2858 mx *= mx;
2859 my *= my;
2860 int ix = mx >> 21, iy = my >> 21;
2861 expx = 2*(expx+(absx>>10)) - 15 + ix;
2862 expy = 2*(expy+(absy>>10)) - 15 + iy;
2863 mx <<= 10 - ix;
2864 my <<= 10 - iy;
2865 int d = expx - expy;
2866 my = (d<30) ? ((my>>d)|((my&((static_cast<detail::uint32>(1)<<d)-1))!=0)) : 1;
2868 #endif
2869 }
2870
2881 inline half hypot(half x, half y, half z) {
2882 #ifdef HALF_ARITHMETIC_TYPE
2884 return half(detail::binary, detail::float2half<half::round_style>(std::sqrt(fx*fx+fy*fy+fz*fz)));
2885 #else
2886 int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, absz = z.data_ & 0x7FFF, expx = 0, expy = 0, expz = 0;
2887 if(!absx)
2888 return hypot(y, z);
2889 if(!absy)
2890 return hypot(x, z);
2891 if(!absz)
2892 return hypot(x, y);
2893 if(absx >= 0x7C00 || absy >= 0x7C00 || absz >= 0x7C00)
2894 return half(detail::binary, (absx==0x7C00) ? detail::select(0x7C00, detail::select(y.data_, z.data_)) :
2895 (absy==0x7C00) ? detail::select(0x7C00, detail::select(x.data_, z.data_)) :
2896 (absz==0x7C00) ? detail::select(0x7C00, detail::select(x.data_, y.data_)) :
2897 detail::signal(x.data_, y.data_, z.data_));
2898 if(absz > absy)
2899 std::swap(absy, absz);
2900 if(absy > absx)
2901 std::swap(absx, absy);
2902 if(absz > absy)
2903 std::swap(absy, absz);
2904 for(; absx<0x400; absx<<=1,--expx) ;
2905 for(; absy<0x400; absy<<=1,--expy) ;
2906 for(; absz<0x400; absz<<=1,--expz) ;
2907 detail::uint32 mx = (absx&0x3FF) | 0x400, my = (absy&0x3FF) | 0x400, mz = (absz&0x3FF) | 0x400;
2908 mx *= mx;
2909 my *= my;
2910 mz *= mz;
2911 int ix = mx >> 21, iy = my >> 21, iz = mz >> 21;
2912 expx = 2*(expx+(absx>>10)) - 15 + ix;
2913 expy = 2*(expy+(absy>>10)) - 15 + iy;
2914 expz = 2*(expz+(absz>>10)) - 15 + iz;
2915 mx <<= 10 - ix;
2916 my <<= 10 - iy;
2917 mz <<= 10 - iz;
2918 int d = expy - expz;
2919 mz = (d<30) ? ((mz>>d)|((mz&((static_cast<detail::uint32>(1)<<d)-1))!=0)) : 1;
2920 my += mz;
2921 if(my & 0x80000000) {
2922 my = (my>>1) | (my&1);
2923 if(++expy > expx) {
2924 std::swap(mx, my);
2925 std::swap(expx, expy);
2926 }
2927 }
2928 d = expx - expy;
2929 my = (d<30) ? ((my>>d)|((my&((static_cast<detail::uint32>(1)<<d)-1))!=0)) : 1;
2931 #endif
2932 }
2933
2944 inline half pow(half x, half y) {
2945 #ifdef HALF_ARITHMETIC_TYPE
2947 #else
2948 int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, exp = -15;
2949 if(!absy || x.data_ == 0x3C00)
2950 return half(detail::binary, detail::select(0x3C00, (x.data_==0x3C00) ? y.data_ : x.data_));
2951 bool is_int = absy >= 0x6400 || (absy>=0x3C00 && !(absy&((1<<(25-(absy>>10)))-1)));
2952 unsigned int sign = x.data_ & (static_cast<unsigned>((absy<0x6800)&&is_int&&((absy>>(25-(absy>>10)))&1))<<15);
2953 if(absx >= 0x7C00 || absy >= 0x7C00)
2954 return half(detail::binary, (absx>0x7C00 || absy>0x7C00) ? detail::signal(x.data_, y.data_) :
2955 (absy==0x7C00) ? ((absx==0x3C00) ? 0x3C00 : (!absx && y.data_==0xFC00) ? detail::pole() :
2956 (0x7C00&-((y.data_>>15)^(absx>0x3C00)))) : (sign|(0x7C00&((y.data_>>15)-1U))));
2957 if(!absx)
2958 return half(detail::binary, (y.data_&0x8000) ? detail::pole(sign) : sign);
2959 if((x.data_&0x8000) && !is_int)
2961 if(x.data_ == 0xBC00)
2962 return half(detail::binary, sign|0x3C00);
2963 if(y.data_ == 0x3800)
2964 return sqrt(x);
2965 if(y.data_ == 0x3C00)
2967 if(y.data_ == 0x4000)
2968 return x * x;
2969 for(; absx<0x400; absx<<=1,--exp) ;
2970 detail::uint32 ilog = exp + (absx>>10), msign = detail::sign_mask(ilog), f, m =
2971 (((ilog<<27)+((detail::log2(static_cast<detail::uint32>((absx&0x3FF)|0x400)<<20)+8)>>4))^msign) - msign;
2972 for(exp=-11; m<0x80000000; m<<=1,--exp) ;
2973 for(; absy<0x400; absy<<=1,--exp) ;
2974 m = detail::multiply64(m, static_cast<detail::uint32>((absy&0x3FF)|0x400)<<21);
2975 int i = m >> 31;
2976 exp += (absy>>10) + i;
2977 m <<= 1 - i;
2978 if(exp < 0) {
2979 f = m >> -exp;
2980 exp = 0;
2981 } else {
2982 f = (m<<exp) & 0x7FFFFFFF;
2983 exp = m >> (31-exp);
2984 }
2985 return half(detail::binary, detail::exp2_post<half::round_style,false>(detail::exp2(f), exp, ((msign&1)^(y.data_>>15))!=0, sign));
2986 #endif
2987 }
2988
2993
3003 inline void sincos(half arg, half *sin, half *cos) {
3004 #ifdef HALF_ARITHMETIC_TYPE
3005 detail::internal_t f = detail::half2float<detail::internal_t>(arg.data_);
3008 #else
3009 int abs = arg.data_ & 0x7FFF, sign = arg.data_ >> 15, k;
3010 if(abs >= 0x7C00)
3011 *sin = *cos = half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_));
3012 else if(!abs) {
3013 *sin = arg;
3014 *cos = half(detail::binary, 0x3C00);
3015 } else if(abs < 0x2500) {
3018 } else {
3019 if constexpr (half::round_style != std::round_to_nearest) {
3020 switch(abs) {
3021 case 0x48B7:
3022 *sin = half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x1D07, 1, 1));
3024 return;
3025 case 0x598C:
3026 *sin = half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x3BFF, 1, 1));
3028 return;
3029 case 0x6A64:
3030 *sin = half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x3BFE, 1, 1));
3032 return;
3033 case 0x6D8C:
3034 *sin = half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x0FE6, 1, 1));
3036 return;
3037 }
3038 }
3039 std::pair<detail::uint32,detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 28);
3040 switch(k & 3) {
3041 case 1: sc = std::make_pair(sc.second, -sc.first); break;
3042 case 2: sc = std::make_pair(-sc.first, -sc.second); break;
3043 case 3: sc = std::make_pair(-sc.second, sc.first); break;
3044 }
3047 }
3048 #endif
3049 }
3050
3059 inline half sin(half arg) {
3060 #ifdef HALF_ARITHMETIC_TYPE
3062 #else
3063 int abs = arg.data_ & 0x7FFF, k;
3064 if(!abs)
3065 return arg;
3066 if(abs >= 0x7C00)
3067 return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_));
3068 if(abs < 0x2900)
3070 if constexpr (half::round_style != std::round_to_nearest)
3071 switch(abs) {
3072 case 0x48B7: return half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x1D07, 1, 1));
3073 case 0x6A64: return half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x3BFE, 1, 1));
3074 case 0x6D8C: return half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x0FE6, 1, 1));
3075 }
3076 std::pair<detail::uint32,detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 28);
3077 detail::uint32 sign = -static_cast<detail::uint32>(((k>>1)&1)^(arg.data_>>15));
3078 return half(detail::binary, detail::fixed2half<half::round_style,30,true,true,true>((((k&1) ? sc.second : sc.first)^sign) - sign));
3079 #endif
3080 }
3081
3090 inline half cos(half arg) {
3091 #ifdef HALF_ARITHMETIC_TYPE
3093 #else
3094 int abs = arg.data_ & 0x7FFF, k;
3095 if(!abs)
3096 return half(detail::binary, 0x3C00);
3097 if(abs >= 0x7C00)
3098 return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_));
3099 if(abs < 0x2500)
3101 if constexpr (half::round_style != std::round_to_nearest)
3102 if(abs == 0x598C)
3104 std::pair<detail::uint32,detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 28);
3105 detail::uint32 sign = -static_cast<detail::uint32>(((k>>1)^k)&1);
3106 return half(detail::binary, detail::fixed2half<half::round_style,30,true,true,true>((((k&1) ? sc.first : sc.second)^sign) - sign));
3107 #endif
3108 }
3109
3118 inline half tan(half arg) {
3119 #ifdef HALF_ARITHMETIC_TYPE
3121 #else
3122 int abs = arg.data_ & 0x7FFF, exp = 13, k;
3123 if(!abs)
3124 return arg;
3125 if(abs >= 0x7C00)
3126 return half(detail::binary, (abs==0x7C00) ? detail::invalid() : detail::signal(arg.data_));
3127 if(abs < 0x2700)
3129 if(half::round_style != std::round_to_nearest)
3130 switch(abs) {
3131 case 0x658C: return half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x07E6, 1, 1));
3132 case 0x7330: return half(detail::binary, detail::rounded<half::round_style,true>((~arg.data_&0x8000)|0x4B62, 1, 1));
3133 }
3134 std::pair<detail::uint32,detail::uint32> sc = detail::sincos(detail::angle_arg(abs, k), 30);
3135 if(k & 1)
3136 sc = std::make_pair(-sc.second, sc.first);
3137 detail::uint32 signy = detail::sign_mask(sc.first), signx = detail::sign_mask(sc.second);
3138 detail::uint32 my = (sc.first^signy) - signy, mx = (sc.second^signx) - signx;
3139 for(; my<0x80000000; my<<=1,--exp) ;
3140 for(; mx<0x80000000; mx<<=1,++exp) ;
3141 return half(detail::binary, detail::tangent_post<half::round_style>(my, mx, exp, (signy^signx^arg.data_)&0x8000));
3142 #endif
3143 }
3144
3153 inline half asin(half arg) {
3154 #ifdef HALF_ARITHMETIC_TYPE
3156 #else
3157 unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
3158 if(!abs)
3159 return arg;
3160 if(abs >= 0x3C00)
3161 return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (abs>0x3C00) ? detail::invalid() :
3162 detail::rounded<half::round_style,true>(sign|0x3E48, 0, 1));
3163 if(abs < 0x2900)
3165 if(half::round_style != std::round_to_nearest && (abs == 0x2B44 || abs == 0x2DC3))
3167 std::pair<detail::uint32,detail::uint32> sc = detail::atan2_args(abs);
3168 detail::uint32 m = detail::atan2(sc.first, sc.second, (half::round_style==std::round_to_nearest) ? 27 : 26);
3170 #endif
3171 }
3172
3181 inline half acos(half arg) {
3182 #ifdef HALF_ARITHMETIC_TYPE
3184 #else
3185 unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ >> 15;
3186 if(!abs)
3188 if(abs >= 0x3C00)
3189 return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (abs>0x3C00) ? detail::invalid() :
3190 sign ? detail::rounded<half::round_style,true>(0x4248, 0, 1) : 0);
3191 std::pair<detail::uint32,detail::uint32> cs = detail::atan2_args(abs);
3192 detail::uint32 m = detail::atan2(cs.second, cs.first, 28);
3193 return half(detail::binary, detail::fixed2half<half::round_style,31,false,true,true>(sign ? (0xC90FDAA2-m) : m, 15, 0, sign));
3194 #endif
3195 }
3196
3205 inline half atan(half arg) {
3206 #ifdef HALF_ARITHMETIC_TYPE
3208 #else
3209 unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
3210 if(!abs)
3211 return arg;
3212 if(abs >= 0x7C00)
3213 return half(detail::binary, (abs==0x7C00) ? detail::rounded<half::round_style,true>(sign|0x3E48, 0, 1) : detail::signal(arg.data_));
3214 if(abs <= 0x2700)
3216 int exp = (abs>>10) + (abs<=0x3FF);
3217 detail::uint32 my = (abs&0x3FF) | ((abs>0x3FF)<<10);
3218 detail::uint32 m = (exp>15) ? detail::atan2(my<<19, 0x20000000>>(exp-15), (half::round_style==std::round_to_nearest) ? 26 : 24) :
3219 detail::atan2(my<<(exp+4), 0x20000000, (half::round_style==std::round_to_nearest) ? 30 : 28);
3221 #endif
3222 }
3223
3234 inline half atan2(half y, half x) {
3235 #ifdef HALF_ARITHMETIC_TYPE
3237 #else
3238 unsigned int absx = x.data_ & 0x7FFF, absy = y.data_ & 0x7FFF, signx = x.data_ >> 15, signy = y.data_ & 0x8000;
3239 if(absx >= 0x7C00 || absy >= 0x7C00) {
3240 if(absx > 0x7C00 || absy > 0x7C00)
3241 return half(detail::binary, detail::signal(x.data_, y.data_));
3242 if(absy == 0x7C00)
3243 return half(detail::binary, (absx<0x7C00) ? detail::rounded<half::round_style,true>(signy|0x3E48, 0, 1) :
3244 signx ? detail::rounded<half::round_style,true>(signy|0x40B6, 0, 1) :
3245 detail::rounded<half::round_style,true>(signy|0x3A48, 0, 1));
3246 return (x.data_==0x7C00) ? half(detail::binary, signy) : half(detail::binary, detail::rounded<half::round_style,true>(signy|0x4248, 0, 1));
3247 }
3248 if(!absy)
3249 return signx ? half(detail::binary, detail::rounded<half::round_style,true>(signy|0x4248, 0, 1)) : y;
3250 if(!absx)
3251 return half(detail::binary, detail::rounded<half::round_style,true>(signy|0x3E48, 0, 1));
3252 int d = (absy>>10) + (absy<=0x3FF) - (absx>>10) - (absx<=0x3FF);
3253 if(d > (signx ? 18 : 12))
3254 return half(detail::binary, detail::rounded<half::round_style,true>(signy|0x3E48, 0, 1));
3255 if(signx && d < -11)
3256 return half(detail::binary, detail::rounded<half::round_style,true>(signy|0x4248, 0, 1));
3257 if(!signx && d < ((half::round_style==std::round_toward_zero) ? -15 : -9)) {
3258 for(; absy<0x400; absy<<=1,--d) ;
3259 detail::uint32 mx = ((absx<<1)&0x7FF) | 0x800, my = ((absy<<1)&0x7FF) | 0x800;
3260 int i = my < mx;
3261 d -= i;
3262 if(d < -25)
3264 my <<= 11 + i;
3266 }
3267 detail::uint32 m = detail::atan2( ((absy&0x3FF)|((absy>0x3FF)<<10))<<(19+((d<0) ? d : (d>0) ? 0 : -1)),
3268 ((absx&0x3FF)|((absx>0x3FF)<<10))<<(19-((d>0) ? d : (d<0) ? 0 : 1)));
3269 return half(detail::binary, detail::fixed2half<half::round_style,31,false,true,true>(signx ? (0xC90FDAA2-m) : m, 15, signy, signx));
3270 #endif
3271 }
3272
3277
3286 inline half sinh(half arg) {
3287 #ifdef HALF_ARITHMETIC_TYPE
3289 #else
3290 int abs = arg.data_ & 0x7FFF, exp;
3291 if(!abs || abs >= 0x7C00)
3292 return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
3293 if(abs <= 0x2900)
3295 std::pair<detail::uint32,detail::uint32> mm = detail::hyperbolic_args(abs, exp, (half::round_style==std::round_to_nearest) ? 29 : 27);
3296 detail::uint32 m = mm.first - mm.second;
3297 for(exp+=13; m<0x80000000 && exp; m<<=1,--exp) ;
3298 unsigned int sign = arg.data_ & 0x8000;
3299 if(exp > 29)
3302 #endif
3303 }
3304
3313 inline half cosh(half arg) {
3314 #ifdef HALF_ARITHMETIC_TYPE
3316 #else
3317 int abs = arg.data_ & 0x7FFF, exp;
3318 if(!abs)
3319 return half(detail::binary, 0x3C00);
3320 if(abs >= 0x7C00)
3321 return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : 0x7C00);
3322 std::pair<detail::uint32,detail::uint32> mm = detail::hyperbolic_args(abs, exp, (half::round_style==std::round_to_nearest) ? 23 : 26);
3323 detail::uint32 m = mm.first + mm.second, i = (~m&0xFFFFFFFF) >> 31;
3324 m = (m>>i) | (m&i) | 0x80000000;
3325 if((exp+=13+i) > 29)
3328 #endif
3329 }
3330
3339 inline half tanh(half arg) {
3340 #ifdef HALF_ARITHMETIC_TYPE
3342 #else
3343 int abs = arg.data_ & 0x7FFF, exp;
3344 if(!abs)
3345 return arg;
3346 if(abs >= 0x7C00)
3347 return half(detail::binary, (abs>0x7C00) ? detail::signal(arg.data_) : (arg.data_-0x4000));
3348 if(abs >= 0x4500)
3349 return half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x3BFF, 1, 1));
3350 if(abs < 0x2700)
3352 if(half::round_style != std::round_to_nearest && abs == 0x2D3F)
3354 std::pair<detail::uint32,detail::uint32> mm = detail::hyperbolic_args(abs, exp, 27);
3355 detail::uint32 my = mm.first - mm.second - (half::round_style!=std::round_to_nearest), mx = mm.first + mm.second, i = (~mx&0xFFFFFFFF) >> 31;
3356 for(exp=13; my<0x80000000; my<<=1,--exp) ;
3357 mx = (mx>>i) | 0x80000000;
3358 return half(detail::binary, detail::tangent_post<half::round_style>(my, mx, exp-i, arg.data_&0x8000));
3359 #endif
3360 }
3361
3370 inline half asinh(half arg) {
3371 #if defined(HALF_ARITHMETIC_TYPE)
3373 #else
3374 int abs = arg.data_ & 0x7FFF;
3375 if(!abs || abs >= 0x7C00)
3376 return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
3377 if(abs <= 0x2900)
3379 if(half::round_style != std::round_to_nearest)
3380 switch(abs)
3381 {
3382 case 0x32D4: return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-13, 1, 1));
3383 case 0x3B5B: return half(detail::binary, detail::rounded<half::round_style,true>(arg.data_-197, 1, 1));
3384 }
3386 #endif
3387 }
3388
3397 inline half acosh(half arg) {
3398 #if defined(HALF_ARITHMETIC_TYPE)
3400 #else
3401 int abs = arg.data_ & 0x7FFF;
3402 if((arg.data_&0x8000) || abs < 0x3C00)
3403 return half(detail::binary, (abs<=0x7C00) ? detail::invalid() : detail::signal(arg.data_));
3404 if(abs == 0x3C00)
3405 return half(detail::binary, 0);
3406 if(arg.data_ >= 0x7C00)
3407 return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
3409 #endif
3410 }
3411
3421 inline half atanh(half arg) {
3422 #if defined(HALF_ARITHMETIC_TYPE)
3424 #else
3425 int abs = arg.data_ & 0x7FFF, exp = 0;
3426 if(!abs)
3427 return arg;
3428 if(abs >= 0x3C00)
3429 return half(detail::binary, (abs==0x3C00) ? detail::pole(arg.data_&0x8000) : (abs<=0x7C00) ? detail::invalid() : detail::signal(arg.data_));
3430 if(abs < 0x2700)
3432 detail::uint32 m = static_cast<detail::uint32>((abs&0x3FF)|((abs>0x3FF)<<10)) << ((abs>>10)+(abs<=0x3FF)+6), my = 0x80000000 + m, mx = 0x80000000 - m;
3433 for(; mx<0x80000000; mx<<=1,++exp) ;
3434 int i = my >= mx, s;
3436 (detail::divide64(my>>i, mx, s)+1)>>1, 27)+0x10, exp+i-1, 16, arg.data_&0x8000));
3437 #endif
3438 }
3439
3444
3453 inline half erf(half arg) {
3454 #if defined(HALF_ARITHMETIC_TYPE)
3456 #else
3457 unsigned int abs = arg.data_ & 0x7FFF;
3458 if(!abs || abs >= 0x7C00)
3459 return (abs>=0x7C00) ? half(detail::binary, (abs==0x7C00) ? (arg.data_-0x4000) : detail::signal(arg.data_)) : arg;
3460 if(abs >= 0x4200)
3461 return half(detail::binary, detail::rounded<half::round_style,true>((arg.data_&0x8000)|0x3BFF, 1, 1));
3463 #endif
3464 }
3465
3474 inline half erfc(half arg) {
3475 #if defined(HALF_ARITHMETIC_TYPE)
3477 #else
3478 unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
3479 if(abs >= 0x7C00)
3480 return (abs>=0x7C00) ? half(detail::binary, (abs==0x7C00) ? (sign>>1) : detail::signal(arg.data_)) : arg;
3481 if(!abs)
3482 return half(detail::binary, 0x3C00);
3483 if(abs >= 0x4400)
3484 return half(detail::binary, detail::rounded<half::round_style,true>((sign>>1)-(sign>>15), sign>>15, 1));
3486 #endif
3487 }
3488
3498 inline half lgamma(half arg) {
3499 #if defined(HALF_ARITHMETIC_TYPE)
3501 #else
3502 int abs = arg.data_ & 0x7FFF;
3503 if(abs >= 0x7C00)
3504 return half(detail::binary, (abs==0x7C00) ? 0x7C00 : detail::signal(arg.data_));
3505 if(!abs || arg.data_ >= 0xE400 || (arg.data_ >= 0xBC00 && !(abs&((1<<(25-(abs>>10)))-1))))
3506 return half(detail::binary, detail::pole());
3507 if(arg.data_ == 0x3C00 || arg.data_ == 0x4000)
3508 return half(detail::binary, 0);
3510 #endif
3511 }
3512
3522 inline half tgamma(half arg) {
3523 #if defined(HALF_ARITHMETIC_TYPE)
3525 #else
3526 unsigned int abs = arg.data_ & 0x7FFF;
3527 if(!abs)
3528 return half(detail::binary, detail::pole(arg.data_));
3529 if(abs >= 0x7C00)
3530 return (arg.data_==0x7C00) ? arg : half(detail::binary, detail::signal(arg.data_));
3531 if(arg.data_ >= 0xE400 || (arg.data_ >= 0xBC00 && !(abs&((1<<(25-(abs>>10)))-1))))
3533 if(arg.data_ >= 0xCA80)
3534 return half(detail::binary, detail::underflow<half::round_style>((1-((abs>>(25-(abs>>10)))&1))<<15));
3535 if(arg.data_ <= 0x100 || (arg.data_ >= 0x4900 && arg.data_ < 0x8000))
3537 if(arg.data_ == 0x3C00)
3538 return arg;
3540 #endif
3541 }
3542
3547
3555
3563
3571
3579
3586
3594
3601 inline long lrint(half arg) { return detail::half2int<half::round_style,true,true,long>(arg.data_); }
3602
3615
3622 inline long long llrint(half arg) { return detail::half2int<half::round_style,true,true,long long>(arg.data_); }
3623
3628
3635 inline half frexp(half arg, int *exp) {
3636 *exp = 0;
3637 unsigned int abs = arg.data_ & 0x7FFF;
3638 if(abs >= 0x7C00 || !abs)
3639 return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
3640 for(; abs<0x400; abs<<=1,--*exp) ;
3641 *exp += (abs>>10) - 14;
3642 return half(detail::binary, (arg.data_&0x8000)|0x3800|(abs&0x3FF));
3643 }
3644
3654 inline half scalbln(half arg, long exp) {
3655 unsigned int abs = arg.data_ & 0x7FFF, sign = arg.data_ & 0x8000;
3656 if(abs >= 0x7C00 || !abs)
3657 return (abs>0x7C00) ? half(detail::binary, detail::signal(arg.data_)) : arg;
3658 for(; abs<0x400; abs<<=1,--exp) ;
3659 exp += abs >> 10;
3660 if(exp > 30)
3662 else if(exp < -10)
3664 else if(exp > 0)
3665 return half(detail::binary, sign|(exp<<10)|(abs&0x3FF));
3666 unsigned int m = (abs&0x3FF) | 0x400;
3667 return half(detail::binary, detail::rounded<half::round_style,false>(sign|(m>>(1-exp)), (m>>-exp)&1, (m&((1<<-exp)-1))!=0));
3668 }
3669
3679 inline half scalbn(half arg, int exp) { return scalbln(arg, exp); }
3680
3690 inline half ldexp(half arg, int exp) { return scalbln(arg, exp); }
3691
3698 inline half modf(half arg, half *iptr) {
3699 unsigned int abs = arg.data_ & 0x7FFF;
3700 if(abs > 0x7C00) {
3701 arg = half(detail::binary, detail::signal(arg.data_));
3702 return *iptr = arg, arg;
3703 }
3704 if(abs >= 0x6400)
3705 return *iptr = arg, half(detail::binary, arg.data_&0x8000);
3706 if(abs < 0x3C00)
3707 return iptr->data_ = arg.data_ & 0x8000, arg;
3708 unsigned int exp = abs >> 10, mask = (1<<(25-exp)) - 1, m = arg.data_ & mask;
3709 iptr->data_ = arg.data_ & ~mask;
3710 if(!m)
3711 return half(detail::binary, arg.data_&0x8000);
3712 for(; m<0x400; m<<=1,--exp) ;
3713 return half(detail::binary, (arg.data_&0x8000)|(exp<<10)|(m&0x3FF));
3714 }
3715
3724 inline int ilogb(half arg) {
3725 int abs = arg.data_ & 0x7FFF, exp;
3726 if(!abs || abs >= 0x7C00) {
3728 #if defined(__GNUC__) && !defined(__clang__)
3729 #pragma GCC diagnostic push
3730 #pragma GCC diagnostic ignored "-Wduplicated-branches"
3731 #endif
3732 return !abs ? FP_ILOGB0 : (abs==0x7C00) ? INT_MAX : FP_ILOGBNAN;
3733 #if defined(__GNUC__) && !defined(__clang__)
3734 #pragma GCC diagnostic pop
3735 #endif
3736 }
3737 for(exp=(abs>>10)-15; abs<0x200; abs<<=1,--exp) ;
3738 return exp;
3739 }
3740
3747 inline half logb(half arg) {
3748 int abs = arg.data_ & 0x7FFF, exp;
3749 if(!abs)
3750 return half(detail::binary, detail::pole(0x8000));
3751 if(abs >= 0x7C00)
3752 return half(detail::binary, (abs==0x7C00) ? 0x7C00 : detail::signal(arg.data_));
3753 for(exp=(abs>>10)-15; abs<0x200; abs<<=1,--exp) ;
3754 unsigned int value = static_cast<unsigned>(exp<0) << 15;
3755 if(exp) {
3756 unsigned int m = std::abs(exp) << 6;
3757 for(exp=18; m<0x400; m<<=1,--exp) ;
3758 value |= (exp<<10) + m;
3759 }
3760 return half(detail::binary, value);
3761 }
3762
3771 inline half nextafter(half from, half to) {
3772 int fabs = from.data_ & 0x7FFF, tabs = to.data_ & 0x7FFF;
3773 if(fabs > 0x7C00 || tabs > 0x7C00)
3774 return half(detail::binary, detail::signal(from.data_, to.data_));
3775 if(from.data_ == to.data_ || !(fabs|tabs))
3776 return to;
3777 if(!fabs) {
3779 return half(detail::binary, (to.data_&0x8000)+1);
3780 }
3781 unsigned int out = from.data_ + (((from.data_>>15)^static_cast<unsigned>(
3782 (from.data_^(0x8000|(0x8000-(from.data_>>15))))<(to.data_^(0x8000|(0x8000-(to.data_>>15))))))<<1) - 1;
3783 detail::raise(FE_OVERFLOW, fabs<0x7C00 && (out&0x7C00)==0x7C00);
3785 return half(detail::binary, out);
3786 }
3787
3796 inline half nexttoward(half from, long double to) {
3797 int fabs = from.data_ & 0x7FFF;
3798 if(fabs > 0x7C00)
3799 return half(detail::binary, detail::signal(from.data_));
3800 long double lfrom = static_cast<long double>(from);
3801 if(detail::builtin_isnan(to) || lfrom == to)
3802 return half(static_cast<float>(to));
3803 if(!fabs) {
3805 return half(detail::binary, (static_cast<unsigned>(detail::builtin_signbit(to))<<15)+1);
3806 }
3807 unsigned int out = from.data_ + (((from.data_>>15)^static_cast<unsigned>(lfrom<to))<<1) - 1;
3808 detail::raise(FE_OVERFLOW, (out&0x7FFF)==0x7C00);
3810 return half(detail::binary, out);
3811 }
3812
3818 inline constexpr half copysign(half x, half y) { return half(detail::binary, x.data_^((x.data_^y.data_)&0x8000)); }
3819
3824
3833 inline constexpr int fpclassify(half arg) {
3834 return !(arg.data_&0x7FFF) ? FP_ZERO :
3835 ((arg.data_&0x7FFF)<0x400) ? FP_SUBNORMAL :
3836 ((arg.data_&0x7FFF)<0x7C00) ? FP_NORMAL :
3837 ((arg.data_&0x7FFF)==0x7C00) ? FP_INFINITE :
3838 FP_NAN;
3839 }
3840
3846 inline constexpr bool isfinite(half arg) { return (arg.data_&0x7C00) != 0x7C00; }
3847
3853 inline constexpr bool isinf(half arg) { return (arg.data_&0x7FFF) == 0x7C00; }
3854
3860 inline constexpr bool isnan(half arg) { return (arg.data_&0x7FFF) > 0x7C00; }
3861
3867 inline constexpr bool isnormal(half arg) { return ((arg.data_&0x7C00)!=0) & ((arg.data_&0x7C00)!=0x7C00); }
3868
3874 inline constexpr bool signbit(half arg) { return (arg.data_&0x8000) != 0; }
3875
3880
3887 inline constexpr bool isgreater(half x, half y) {
3888 return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) > ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
3889 }
3890
3897 inline constexpr bool isgreaterequal(half x, half y) {
3898 return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) >= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
3899 }
3900
3907 inline constexpr bool isless(half x, half y) {
3908 return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) < ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
3909 }
3910
3917 inline constexpr bool islessequal(half x, half y) {
3918 return ((x.data_^(0x8000|(0x8000-(x.data_>>15))))+(x.data_>>15)) <= ((y.data_^(0x8000|(0x8000-(y.data_>>15))))+(y.data_>>15)) && !isnan(x) && !isnan(y);
3919 }
3920
3927 inline constexpr bool islessgreater(half x, half y) {
3928 return x.data_!=y.data_ && ((x.data_|y.data_)&0x7FFF) && !isnan(x) && !isnan(y);
3929 }
3930
3937 inline constexpr bool isunordered(half x, half y) { return isnan(x) || isnan(y); }
3938
3943
3957 template<class T,class U> T half_cast(U arg) { return detail::half_caster<T,U>::cast(arg); }
3958
3973 template<class T,std::float_round_style R,class U> T half_cast(U arg) { return detail::half_caster<T,U,R>::cast(arg); }
3975
3980
3988 inline int feclearexcept(int excepts) { detail::errflags() &= ~excepts; return 0; }
3989
3997 inline int fetestexcept(int excepts) { return detail::errflags() & excepts; }
3998
4008 inline int feraiseexcept(int excepts) { detail::errflags() |= excepts; detail::raise(excepts); return 0; }
4009
4018 inline int fegetexceptflag(int *flagp, int excepts) { *flagp = detail::errflags() & excepts; return 0; }
4019
4029 inline int fesetexceptflag(const int *flagp, int excepts) { detail::errflags() = (detail::errflags()|(*flagp&excepts)) & (*flagp|~excepts); return 0; }
4030
4042 inline void fethrowexcept([[maybe_unused]] int excepts, const char *msg = "") {
4043 excepts &= detail::errflags();
4044#if HALF_ERRHANDLING_THROWS
4045 #ifdef HALF_ERRHANDLING_THROW_INVALID
4046 if(excepts & FE_INVALID)
4047 throw std::domain_error(msg);
4048 #endif
4049 #ifdef HALF_ERRHANDLING_THROW_DIVBYZERO
4050 if(excepts & FE_DIVBYZERO)
4051 throw std::domain_error(msg);
4052 #endif
4053 #ifdef HALF_ERRHANDLING_THROW_OVERFLOW
4054 if(excepts & FE_OVERFLOW)
4055 throw std::overflow_error(msg);
4056 #endif
4057 #ifdef HALF_ERRHANDLING_THROW_UNDERFLOW
4058 if(excepts & FE_UNDERFLOW)
4059 throw std::underflow_error(msg);
4060 #endif
4061 #ifdef HALF_ERRHANDLING_THROW_INEXACT
4062 if(excepts & FE_INEXACT)
4063 throw std::range_error(msg);
4064 #endif
4065#else
4066 std::fprintf(stderr, "%s\n", msg);
4067 std::terminate();
4068#endif
4069 }
4070
4071}
4072
4073#undef HALF_UNUSED_NOERR
4074#undef constexpr_NOERR
4075#undef HALF_TWOS_COMPLEMENT_INT
4076#ifdef HALF_POP_WARNINGS
4077 #pragma warning(pop)
4078 #undef HALF_POP_WARNINGS
4079#endif
4080
4081#if defined(__GNUC__)
4082# pragma GCC diagnostic pop
4083#elif defined(__clang__)
4084# pragma clang diagnostic pop
4085#elif defined(_MSC_VER)
4086# pragma warning(pop)
4087#endif
4088
Half-precision floating-point type.
half(T rhs)
Conversion constructor.
friend constexpr bool isgreater(half, half)
Quiet comparison for greater than.
friend constexpr bool isgreaterequal(half, half)
Quiet comparison for greater equal.
friend half cosh(half)
Hyperbolic cosine.
friend long long llrint(half)
Nearest integer using half's internal rounding mode.
friend constexpr half fabs(half)
Absolute value.
friend half erf(half)
Error function.
friend half tgamma(half)
Gamma function.
friend half expm1(half)
Exponential minus one.
friend constexpr half operator+(half)
Identity.
half operator++(int)
Postfix increment.
friend constexpr int fpclassify(half)
Classify floating-point value.
friend constexpr bool islessequal(half, half)
Quiet comparison for less equal.
friend half fdim(half, half)
Positive difference.
friend half fma(half, half, half)
Fused multiply add.
friend half log10(half)
Common logarithm.
half & operator*=(half rhs)
Arithmetic assignment.
friend half log2(half)
Binary logarithm.
half & operator=(const T &rhs)
friend half modf(half, half *)
Extract integer and fractional parts.
friend constexpr half copysign(half, half)
Take sign.
friend half sinh(half)
Hyperbolic sine.
friend half atanh(half)
Hyperbolic area tangent.
friend constexpr bool signbit(half)
Check sign.
friend constexpr_NOERR half fmin(half, half)
Minimum of half expressions.
friend void sincos(half, half *, half *)
Compute sine and cosine simultaneously.
friend half acosh(half)
Hyperbolic area cosine.
friend half atan2(half, half)
Arc tangent function.
friend constexpr bool isfinite(half)
Check if finite number.
half & operator=(const float &rhs)
Assignment operator.
friend half log1p(half)
Natural logarithm plus one.
friend constexpr bool isinf(half)
Check for infinity.
friend half nextafter(half, half)
Next representable value.
friend half round(half)
Nearest integer.
friend half fmod(half, half)
Remainder of division.
friend long lround(half)
Nearest integer.
friend half sin(half)
Sine function.
friend constexpr bool isnan(half)
Check for NaN.
friend half floor(half)
Nearest integer not greater than half value.
half & operator--()
Prefix decrement.
half & operator+=(half rhs)
Arithmetic assignment.
friend half erfc(half)
Complementary error function.
friend half log(half)
Natural logarithm.
friend long long llround(half)
Nearest integer.
friend half acos(half)
Arc cosine function.
friend int ilogb(half)
Extract exponent.
friend constexpr bool isnormal(half)
Check if normal number.
friend half hypot(half, half)
Hypotenuse function.
friend half cos(half)
Cosine function.
friend half exp2(half)
Binary exponential.
friend half asinh(half)
Hyperbolic area sine.
detail::uint16 get_data() const
friend constexpr_NOERR bool operator==(half, half)
Comparison for equality.
friend half logb(half)
Extract exponent.
friend std::basic_ostream< charT, traits > & operator<<(std::basic_ostream< charT, traits > &, half)
Output operator.
friend half remquo(half, half, int *)
Remainder of division.
friend constexpr_NOERR half fmax(half, half)
Maximum of half expressions.
friend half tanh(half)
Hyperbolic tangent.
friend std::basic_istream< charT, traits > & operator>>(std::basic_istream< charT, traits > &, half &)
Input operator.
friend half scalbln(half, long)
Multiply by power of two.
friend half tan(half)
Tangent function.
friend half operator*(half, half)
Multiplication.
friend half operator/(half, half)
Division.
constexpr half() noexcept
Default constructor.
friend half frexp(half, int *)
Decompress floating-point number.
friend half rint(half)
Nearest integer using half's internal rounding mode.
friend half exp(half)
Exponential function.
friend half nearbyint(half)
Nearest integer using half's internal rounding mode.
half & operator-=(half rhs)
Arithmetic assignment.
friend half nexttoward(half, long double)
Next representable value.
friend half trunc(half)
Nearest integer not greater in magnitude than half value.
half & operator/=(half rhs)
Arithmetic assignment.
friend constexpr half operator-(half)
Negation.
friend constexpr bool islessgreater(half, half)
Quiet comarison for less or greater.
half & operator++()
Prefix increment.
friend half atan(half)
Arc tangent function.
friend half pow(half, half)
Power function.
friend half sqrt(half)
Square root.
friend half nanh(const char *)
Get NaN value.
half operator--(int)
Postfix decrement.
friend half ceil(half)
Nearest integer not less than half value.
friend half cbrt(half)
Cubic root.
friend long lrint(half)
Nearest integer using half's internal rounding mode.
friend half asin(half)
Arc sine.
friend half lgamma(half)
Natural logarithm of gamma function.
friend half remainder(half, half)
Remainder of division.
friend constexpr_NOERR std::partial_ordering operator<=>(half, half)
friend constexpr bool isless(half, half)
Quiet comparison for less than.
static constexpr bool is_modulo
Doesn't provide modulo arithmetic.
static constexpr float_round_style round_style
Rounding mode.
static constexpr int max_digits10
Required decimal digits to represent all possible values.
static constexpr bool tinyness_before
Does not support no pre-rounding underflow detection.
static constexpr bool is_integer
Is not an integer type.
static constexpr int radix
Number base.
static constexpr half_float::half lowest() noexcept
Smallest finite value.
static constexpr half_float::half epsilon() noexcept
Difference between 1 and next representable value.
static constexpr int digits
Significant digits.
static constexpr bool traps
Traps only if HALF_ERRHANDLING_THROW_... is acitvated.
static constexpr int digits10
Significant decimal digits.
static constexpr int min_exponent
One more than smallest exponent.
static constexpr bool has_infinity
Supports infinity.
static constexpr bool is_iec559
IEEE conformant.
static constexpr int min_exponent10
Smallest normalized representable power of 10.
static constexpr half_float::half round_error() noexcept
Maximum rounding error in ULP (units in the last place).
static constexpr half_float::half infinity() noexcept
Positive infinity.
static constexpr float_denorm_style has_denorm
Supports subnormal values.
static constexpr bool has_denorm_loss
Supports no denormalization detection.
static constexpr half_float::half max() noexcept
Largest finite value.
static constexpr half_float::half denorm_min() noexcept
Smallest positive subnormal value.
static constexpr bool is_signed
Supports signed values.
static constexpr half_float::half min() noexcept
Smallest positive normal value.
static constexpr half_float::half quiet_NaN() noexcept
Quiet NaN.
static constexpr int max_exponent10
Largest finitely representable power of 10.
static constexpr half_float::half signaling_NaN() noexcept
Signaling NaN.
static constexpr bool has_signaling_NaN
Supports signaling NaNs.
static constexpr bool has_quiet_NaN
Supports quiet NaNs.
static constexpr bool is_bounded
Has a finite set of values.
static constexpr int max_exponent
One more than largest exponent.
static constexpr bool is_exact
Is not exact.
static constexpr bool is_specialized
Is template specialization.
#define FP_ILOGBNAN
#define FP_NAN
#define FP_ZERO
#define constexpr_NOERR
Definition float16_t.hpp:70
#define FP_INFINITE
#define FE_UNDERFLOW
#define FE_INVALID
#define FE_INEXACT
#define FP_ILOGB0
#define HALF_UNUSED_NOERR(name)
Definition float16_t.hpp:63
#define FE_DIVBYZERO
#define FP_NORMAL
#define FE_OVERFLOW
#define FP_SUBNORMAL
#define HALF_ROUND_STYLE
Default rounding mode.
#define HALF_ERRHANDLING_UNDERFLOW_TO_INEXACT
Raise INEXACT exception on underflow.
unsigned int integral(unsigned int value)
Round half-precision number to nearest integer value.
uint32 exp2(uint32 m, unsigned int n=32)
Fixed point binary exponential.
constexpr_NOERR unsigned int underflow(unsigned int sign=0)
Half-precision underflow.
bool builtin_signbit(T arg)
Check sign.
unsigned int hypot_post(uint32 r, int exp)
Hypotenuse square root and postprocessing.
constexpr_NOERR bool compsignal(unsigned int x, unsigned int y)
Check and signal for any NaN.
bool builtin_isnan(T arg)
Check for NaN.
constexpr binary_t binary
Tag for binary construction.
std::uint_fast32_t uint32
Fastest unsigned integer of (at least) 32 bits width.
unsigned int fixed2half(uint32 m, int exp=14, unsigned int sign=0, int s=0)
Convert fixed point to half-precision floating-point.
unsigned int mod(unsigned int x, unsigned int y, int *quo=NULL)
Half precision positive modulus.
uint32 sqrt(uint32 &r, int &exp)
Fixed point square root.
float half2float_impl(unsigned int value, float, true_type)
Convert half-precision to IEEE single-precision.
constexpr_NOERR unsigned int check_underflow(unsigned int arg)
Check value for underflow.
std::pair< uint32, uint32 > sincos(uint32 mz, unsigned int n=31)
Fixed point sine and cosine.
uint32 multiply64(uint32 x, uint32 y)
64-bit multiplication.
int & errflags()
Internal exception flags.
constexpr_NOERR unsigned int invalid()
Raise domain error and return NaN.
T half2int(unsigned int value)
Convert half-precision floating-point to integer.
uint32 mulhi(uint32 x, uint32 y)
upper part of 64-bit multiplication.
uint32 atan2(uint32 my, uint32 mx, unsigned int n=31)
Fixed point arc tangent.
constexpr_NOERR unsigned int signal(unsigned int nan)
Signal and silence signaling NaN.
unsigned int float2half(T value)
Convert floating-point to half-precision.
std::pair< uint32, uint32 > hyperbolic_args(unsigned int abs, int &exp, unsigned int n=32)
Get exponentials for hyperbolic computation.
unsigned int gamma(unsigned int arg)
Gamma function and postprocessing.
constexpr_NOERR unsigned int select(unsigned int x, unsigned int HALF_UNUSED_NOERR(y))
Select value or signaling NaN.
constexpr_NOERR unsigned int overflow(unsigned int sign=0)
Half-precision overflow.
constexpr_NOERR unsigned int pole(unsigned int sign=0)
Raise pole error and return infinity.
uint32 arithmetic_shift(uint32 arg, int i)
Platform-independent arithmetic right shift.
std::pair< uint32, uint32 > atan2_args(unsigned int abs)
Get arguments for atan2 function.
uint32 log2(uint32 m, unsigned int n=32)
Fixed point binary logarithm.
T half2float(unsigned int value)
Convert half-precision to floating-point.
uint32 divide64(uint32 x, uint32 y, int &s)
64-bit division.
void raise(int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond)=true)
Raise floating-point exception.
uint32 angle_arg(unsigned int abs, int &k)
Reduce argument for trigonometric functions.
unsigned int log2_post(uint32 m, int ilog, int exp, unsigned int sign=0)
Postprocessing for binary logarithm.
unsigned int area(unsigned int arg)
Area function and postprocessing.
unsigned int exp2_post(uint32 m, int exp, bool esign, unsigned int sign=0)
Postprocessing for binary exponential.
std::int_fast32_t int32
Fastest signed integer of (at least) 32 bits width.
unsigned int float2half_impl(float value, true_type)
Convert IEEE single-precision to half-precision.
typename bits< T >::type bits_t
uint32 sign_mask(uint32 arg)
Platform-independent sign mask.
std::uint_least16_t uint16
Unsigned integer of (at least) 16 bits width.
bool builtin_isinf(T arg)
Check for infinity.
unsigned int erf(unsigned int arg)
Error function and postprocessing.
constexpr_NOERR unsigned int rounded(unsigned int value, int g, int s)
Round half-precision number.
unsigned int int2half(T value)
Convert integer to half-precision floating-point.
unsigned int tangent_post(uint32 my, uint32 mx, int exp, unsigned int sign=0)
Division and postprocessing for tangents.
Library-defined half-precision literals.
Main namespace for half-precision functionality.
half asinh(half arg)
Hyperbolic area sine.
half sinh(half arg)
Hyperbolic sine.
constexpr_NOERR half fmax(half x, half y)
Maximum of half expressions.
int feclearexcept(int excepts)
Clear exception flags.
half nextafter(half from, half to)
Next representable value.
half atan(half arg)
Arc tangent function.
half hypot(half x, half y)
Hypotenuse function.
half fdim(half x, half y)
Positive difference.
half remquo(half x, half y, int *quo)
Remainder of division.
int fegetexceptflag(int *flagp, int excepts)
Save exception flags.
constexpr bool isfinite(half arg)
Check if finite number.
int ilogb(half arg)
Extract exponent.
half lgamma(half arg)
Natural logarithm of gamma function.
int fesetexceptflag(const int *flagp, int excepts)
Restore exception flags.
half fma(half x, half y, half z)
Fused multiply add.
half nearbyint(half arg)
Nearest integer using half's internal rounding mode.
constexpr half abs(half arg)
Absolute value.
half expm1(half arg)
Exponential minus one.
half ldexp(half arg, int exp)
Multiply by power of two.
half sin(half arg)
Sine function.
half tanh(half arg)
Hyperbolic tangent.
half rint(half arg)
Nearest integer using half's internal rounding mode.
T half_cast(U arg)
Cast to or from half-precision floating-point number.
half fmod(half x, half y)
Remainder of division.
constexpr bool islessgreater(half x, half y)
Quiet comarison for less or greater.
half log(half arg)
Natural logarithm.
half cos(half arg)
Cosine function.
half scalbn(half arg, int exp)
Multiply by power of two.
half exp2(half arg)
Binary exponential.
constexpr bool isless(half x, half y)
Quiet comparison for less than.
half atanh(half arg)
Hyperbolic area tangent.
std::basic_istream< charT, traits > & operator>>(std::basic_istream< charT, traits > &in, half &arg)
Input operator.
long long llround(half arg)
Nearest integer.
half nexttoward(half from, long double to)
Next representable value.
half round(half arg)
Nearest integer.
half log2(half arg)
Binary logarithm.
half asin(half arg)
Arc sine.
half sqrt(half arg)
Square root.
half trunc(half arg)
Nearest integer not greater in magnitude than half value.
half erfc(half arg)
Complementary error function.
half tan(half arg)
Tangent function.
std::basic_ostream< charT, traits > & operator<<(std::basic_ostream< charT, traits > &out, half arg)
Output operator.
half log10(half arg)
Common logarithm.
half floor(half arg)
Nearest integer not greater than half value.
half acosh(half arg)
Hyperbolic area cosine.
constexpr bool isnan(half arg)
Check for NaN.
constexpr half operator-(half arg)
Negation.
half operator*(half x, half y)
Multiplication.
long long llrint(half arg)
Nearest integer using half's internal rounding mode.
half atan2(half y, half x)
Arc tangent function.
int feraiseexcept(int excepts)
Raise exception flags.
half scalbln(half arg, long exp)
Multiply by power of two.
half tgamma(half arg)
Gamma function.
constexpr bool signbit(half arg)
Check sign.
long lrint(half arg)
Nearest integer using half's internal rounding mode.
constexpr bool isinf(half arg)
Check for infinity.
constexpr half copysign(half x, half y)
Take sign.
half cosh(half arg)
Hyperbolic cosine.
half logb(half arg)
Extract exponent.
constexpr bool islessequal(half x, half y)
Quiet comparison for less equal.
half erf(half arg)
Error function.
void sincos(half arg, half *sin, half *cos)
Compute sine and cosine simultaneously.
half ceil(half arg)
Nearest integer not less than half value.
half frexp(half arg, int *exp)
Decompress floating-point number.
constexpr bool isunordered(half x, half y)
Quiet check if unordered.
half log1p(half arg)
Natural logarithm plus one.
constexpr_NOERR half fmin(half x, half y)
Minimum of half expressions.
constexpr int fpclassify(half arg)
Classify floating-point value.
long lround(half arg)
Nearest integer.
half acos(half arg)
Arc cosine function.
constexpr bool isgreater(half x, half y)
Quiet comparison for greater than.
half pow(half x, half y)
Power function.
half nanh(const char *arg)
Get NaN value.
half modf(half arg, half *iptr)
Extract integer and fractional parts.
half cbrt(half arg)
Cubic root.
constexpr bool isgreaterequal(half x, half y)
Quiet comparison for greater equal.
void fethrowexcept(int excepts, const char *msg="")
Throw C++ exceptions based on set exception flags.
int fetestexcept(int excepts)
Test exception flags.
half exp(half arg)
Exponential function.
constexpr_NOERR std::partial_ordering operator<=>(half x, half y)
constexpr bool isnormal(half arg)
Check if normal number.
half remainder(half x, half y)
Remainder of division.
half operator/(half x, half y)
Division.
constexpr half fabs(half arg)
Absolute value.
constexpr half operator+(half arg)
Identity.
constexpr_NOERR bool operator==(half x, half y)
Comparison for equality.
Extensions to the C++ standard library.
constexpr bool is_floating_point_v< half_float::half >
constexpr bool is_signed_v< half_float::half >
constexpr bool is_scalar_v< half_float::half >
Tag type for binary construction.
Type traits for floating-point bits.
Helper for tag dispatching.
Class for 1.31 unsigned floating-point computation.
friend f31 operator-(f31 a, f31 b)
Subtraction operator.
friend f31 operator*(f31 a, f31 b)
Multiplication operator.
friend f31 operator/(f31 a, f31 b)
Division operator.
constexpr f31(uint32 mant, int e)
Constructor.
friend f31 operator+(f31 a, f31 b)
Addition operator.
f31(unsigned int abs)
Constructor.
uint32 m
mantissa as 1.31.
Helper class for half casts.
Type traits for floating-point types.
result_type operator()(argument_type arg) const
Compute hash function.
size_t result_type
Function return type.
half_float::half argument_type
Type of function argument.