Classes
struct	binary_t
	Tag type for binary construction. More...

struct	bits
	Type traits for floating-point bits. More...

struct	bits< const T >

struct	bits< const volatile T >

struct	bits< double >
	Unsigned integer of (at least) 64 bits width. More...

struct	bits< float >
	Unsigned integer of (at least) 32 bits width. More...

struct	bits< volatile T >

struct	bool_type
	Helper for tag dispatching. More...

struct	conditional
	Conditional type. More...

struct	f31
	Class for 1.31 unsigned floating-point computation. More...

class	false_type

struct	half_caster
	Helper class for half casts. More...

struct	half_caster< half, half, R >

struct	half_caster< half, U, R >

struct	half_caster< T, half, R >

struct	is_float
	Type traits for floating-point types. More...

class	true_type

Concepts
concept	arithmetic

Typedefs
using	uint16 = std::uint_least16_t
	Unsigned integer of (at least) 16 bits width.

using	uint32 = std::uint_fast32_t
	Fastest unsigned integer of (at least) 32 bits width.

using	int32 = std::int_fast32_t
	Fastest signed integer of (at least) 32 bits width.

template<class T>
using	bits_t = typename bits<T>::type

Functions
Implementation defined classification and arithmetic
template<class T>
bool	builtin_isinf (T arg)
	Check for infinity.

template<class T>
bool	builtin_isnan (T arg)
	Check for NaN.

template<class T>
bool	builtin_signbit (T arg)
	Check sign.

uint32	sign_mask (uint32 arg)
	Platform-independent sign mask.

uint32	arithmetic_shift (uint32 arg, int i)
	Platform-independent arithmetic right shift.

Error handling
int &	errflags ()
	Internal exception flags.

void	raise (int HALF_UNUSED_NOERR(flags), bool HALF_UNUSED_NOERR(cond)=true)
	Raise floating-point exception.

constexpr_NOERR bool	compsignal (unsigned int x, unsigned int y)
	Check and signal for any NaN.

constexpr_NOERR unsigned int	signal (unsigned int nan)
	Signal and silence signaling NaN.

constexpr_NOERR unsigned int	signal (unsigned int x, unsigned int y)
	Signal and silence signaling NaNs.

constexpr_NOERR unsigned int	signal (unsigned int x, unsigned int y, unsigned int z)
	Signal and silence signaling NaNs.

constexpr_NOERR unsigned int	select (unsigned int x, unsigned int HALF_UNUSED_NOERR(y))
	Select value or signaling NaN.

constexpr_NOERR unsigned int	invalid ()
	Raise domain error and return NaN.

constexpr_NOERR unsigned int	pole (unsigned int sign=0)
	Raise pole error and return infinity.

constexpr_NOERR unsigned int	check_underflow (unsigned int arg)
	Check value for underflow.

Conversion and rounding
template<std::float_round_style R>
constexpr_NOERR unsigned int	overflow (unsigned int sign=0)
	Half-precision overflow.

template<std::float_round_style R>
constexpr_NOERR unsigned int	underflow (unsigned int sign=0)
	Half-precision underflow.

template<std::float_round_style R, bool I>
constexpr_NOERR unsigned int	rounded (unsigned int value, int g, int s)
	Round half-precision number.

template<std::float_round_style R, bool E, bool I>
unsigned int	integral (unsigned int value)
	Round half-precision number to nearest integer value.

template<std::float_round_style R, unsigned int F, bool S, bool N, bool I>
unsigned int	fixed2half (uint32 m, int exp=14, unsigned int sign=0, int s=0)
	Convert fixed point to half-precision floating-point.

template<std::float_round_style R>
unsigned int	float2half_impl (float value, true_type)
	Convert IEEE single-precision to half-precision.

template<std::float_round_style R>
unsigned int	float2half_impl (double value, true_type)
	Convert IEEE double-precision to half-precision.

template<std::float_round_style R, class T>
unsigned int	float2half_impl (T value,...)
	Convert non-IEEE floating-point to half-precision.

template<std::float_round_style R, class T>
unsigned int	float2half (T value)
	Convert floating-point to half-precision.

template<class T>
unsigned int	float2half (T value)

template<std::float_round_style R, class T>
unsigned int	int2half (T value)
	Convert integer to half-precision floating-point.

float	half2float_impl (unsigned int value, float, true_type)
	Convert half-precision to IEEE single-precision.

double	half2float_impl (unsigned int value, double, true_type)
	Convert half-precision to IEEE double-precision.

template<class T>
T	half2float_impl (unsigned int value, T,...)
	Convert half-precision to non-IEEE floating-point.

template<class T>
T	half2float (unsigned int value)
	Convert half-precision to floating-point.

template<std::float_round_style R, bool E, bool I, class T>
T	half2int (unsigned int value)
	Convert half-precision floating-point to integer.

Mathematics
template<std::float_round_style R>
uint32	mulhi (uint32 x, uint32 y)
	upper part of 64-bit multiplication.

uint32	multiply64 (uint32 x, uint32 y)
	64-bit multiplication.

uint32	divide64 (uint32 x, uint32 y, int &s)
	64-bit division.

template<bool Q, bool R>
unsigned int	mod (unsigned int x, unsigned int y, int *quo=NULL)
	Half precision positive modulus.

template<unsigned int F>
uint32	sqrt (uint32 &r, int &exp)
	Fixed point square root.

uint32	exp2 (uint32 m, unsigned int n=32)
	Fixed point binary exponential.

uint32	log2 (uint32 m, unsigned int n=32)
	Fixed point binary logarithm.

std::pair< uint32, uint32 >	sincos (uint32 mz, unsigned int n=31)
	Fixed point sine and cosine.

uint32	atan2 (uint32 my, uint32 mx, unsigned int n=31)
	Fixed point arc tangent.

uint32	angle_arg (unsigned int abs, int &k)
	Reduce argument for trigonometric functions.

std::pair< uint32, uint32 >	atan2_args (unsigned int abs)
	Get arguments for atan2 function.

std::pair< uint32, uint32 >	hyperbolic_args (unsigned int abs, int &exp, unsigned int n=32)
	Get exponentials for hyperbolic computation.

template<std::float_round_style R, bool I>
unsigned int	exp2_post (uint32 m, int exp, bool esign, unsigned int sign=0)
	Postprocessing for binary exponential.

template<std::float_round_style R, uint32 L>
unsigned int	log2_post (uint32 m, int ilog, int exp, unsigned int sign=0)
	Postprocessing for binary logarithm.

template<std::float_round_style R>
unsigned int	hypot_post (uint32 r, int exp)
	Hypotenuse square root and postprocessing.

template<std::float_round_style R>
unsigned int	tangent_post (uint32 my, uint32 mx, int exp, unsigned int sign=0)
	Division and postprocessing for tangents.

template<std::float_round_style R, bool S>
unsigned int	area (unsigned int arg)
	Area function and postprocessing.

template<std::float_round_style R, bool C>
unsigned int	erf (unsigned int arg)
	Error function and postprocessing.

template<std::float_round_style R, bool L>
unsigned int	gamma (unsigned int arg)
	Gamma function and postprocessing.

Variables
constexpr binary_t	binary = binary_t()
	Tag for binary construction.

Typedef Documentation

◆ bits_t

template<class T>

using half_float::detail::bits_t = typename bits<T>::type

Definition at line 280 of file float16_t.hpp.

◆ int32

using half_float::detail::int32 = std::int_fast32_t

Fastest signed integer of (at least) 32 bits width.

Definition at line 273 of file float16_t.hpp.

◆ uint16

using half_float::detail::uint16 = std::uint_least16_t

Unsigned integer of (at least) 16 bits width.

Definition at line 267 of file float16_t.hpp.

◆ uint32

using half_float::detail::uint32 = std::uint_fast32_t

Fastest unsigned integer of (at least) 32 bits width.

Definition at line 270 of file float16_t.hpp.

Function Documentation

◆ angle_arg()

uint32 half_float::detail::angle_arg	(	unsigned int	abs,
		int &	k )

inline

Reduce argument for trigonometric functions.

Parameters

abs	half-precision floating-point value
k	value to take quarter period

Returns: abs reduced to [-pi/4,pi/4] as Q0.30

Definition at line 1257 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ area()

template<std::float_round_style R, bool S>

unsigned int half_float::detail::area ( unsigned int arg )

Area function and postprocessing.

This computes the value directly in Q2.30 using the representation asinh|acosh(x) = log(x+sqrt(x^2+|-1)).

Template Parameters

R	rounding mode to use
S	`true` for asinh, `false` for acosh

Parameters

arg	half-precision argument

Returns: asinh|acosh(arg) converted to half-precision

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if no other exception occurred

Definition at line 1418 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ arithmetic_shift()

uint32 half_float::detail::arithmetic_shift	(	uint32	arg,
		int	i )

inline

Platform-independent arithmetic right shift.

Parameters

arg	integer value in two's complement
i	shift amount (at most 31)

Returns: arg right shifted for i bits with possible sign extension

Definition at line 334 of file float16_t.hpp.

Here is the caller graph for this function:

◆ atan2()

uint32 half_float::detail::atan2	(	uint32	my,
		uint32	mx,
		unsigned int	n = 31 )

inline

Fixed point arc tangent.

This uses the CORDIC algorithm in vectoring mode.

Parameters

my	y coordinate as Q0.30
mx	x coordinate as Q0.30
n	number of iterations (at most 31)

Returns: arc tangent of my / mx as Q1.30

Definition at line 1237 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ atan2_args()

std::pair< uint32, uint32 > half_float::detail::atan2_args ( unsigned int abs )

inline

Get arguments for atan2 function.

Parameters

abs	half-precision floating-point value

Returns: abs and sqrt(1 - abs^2) as Q0.30

Definition at line 1271 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ builtin_isinf()

template<class T>

bool half_float::detail::builtin_isinf ( T arg )

Check for infinity.

Template Parameters

T	argument type (builtin floating-point type)

Parameters

arg	value to query

Return values

true	if infinity
false	else

Definition at line 301 of file float16_t.hpp.

Here is the caller graph for this function:

◆ builtin_isnan()

template<class T>

bool half_float::detail::builtin_isnan ( T arg )

Check for NaN.

Template Parameters

T	argument type (builtin floating-point type)

Parameters

arg	value to query

Return values

true	if not a number
false	else

Definition at line 308 of file float16_t.hpp.

Here is the caller graph for this function:

◆ builtin_signbit()

template<class T>

bool half_float::detail::builtin_signbit ( T arg )

Check sign.

Template Parameters

T	argument type (builtin floating-point type)

Parameters

arg	value to query

Return values

true	if signbit set
false	else

Definition at line 315 of file float16_t.hpp.

Here is the caller graph for this function:

◆ check_underflow()

constexpr_NOERR unsigned int half_float::detail::check_underflow ( unsigned int arg )

inline

Check value for underflow.

Parameters

arg	non-zero half-precision value to check

Returns: arg

Exceptions

FE_UNDERFLOW if arg is subnormal

Definition at line 487 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ compsignal()

constexpr_NOERR bool half_float::detail::compsignal	(	unsigned int	x,
		unsigned int	y )

inline

Check and signal for any NaN.

Parameters

x	first half-precision value to check
y	second half-precision value to check

Return values

true	if either x or y is NaN
false	else

Exceptions

FE_INVALID if x or y is NaN

Definition at line 406 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ divide64()

uint32 half_float::detail::divide64	(	uint32	x,
		uint32	y,
		int &	s )

inline

64-bit division.

Parameters

x	upper 32 bit of dividend
y	divisor
s	variable to store sticky bit for rounding

Returns: (x << 32) / y

Definition at line 1074 of file float16_t.hpp.

Here is the caller graph for this function:

◆ erf()

template<std::float_round_style R, bool C>

unsigned int half_float::detail::erf ( unsigned int arg )

Error function and postprocessing.

This computes the value directly in Q1.31 using the approximations given here.

Template Parameters

R	rounding mode to use
C	`true` for comlementary error function, `false` else

Parameters

arg	half-precision function argument

Returns: approximated value of error function in half-precision

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if no other exception occurred

Definition at line 1532 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ errflags()

int & half_float::detail::errflags ( )

inline

Internal exception flags.

Returns: reference to global exception flags

Definition at line 348 of file float16_t.hpp.

Here is the caller graph for this function:

◆ exp2()

uint32 half_float::detail::exp2	(	uint32	m,
		unsigned int	n = 32 )

inline

Fixed point binary exponential.

This uses the BKM algorithm in E-mode.

Parameters

m	exponent in [0,1) as Q0.31
n	number of iterations (at most 32)

Returns: 2 ^ m as Q1.31

Definition at line 1167 of file float16_t.hpp.

Here is the caller graph for this function:

◆ exp2_post()

template<std::float_round_style R, bool I>

unsigned int half_float::detail::exp2_post	(	uint32	m,
		int	exp,
		bool	esign,
		unsigned int	sign = 0 )

Postprocessing for binary exponential.

Template Parameters

R	rounding mode to use
I	`true` to always raise INEXACT exception, `false` to raise only for rounded results

Parameters

m	mantissa as Q1.31
exp	absolute value of unbiased exponent
esign	sign of actual exponent
sign	sign bit of result

Returns: value converted to half-precision

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if value had to be rounded or I is `true`

Definition at line 1325 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ fixed2half()

template<std::float_round_style R, unsigned int F, bool S, bool N, bool I>

unsigned int half_float::detail::fixed2half	(	uint32	m,
		int	exp = 14,
		unsigned int	sign = 0,
		int	s = 0 )

Convert fixed point to half-precision floating-point.

Template Parameters

R	rounding mode to use
F	number of fractional bits (at least 11)
S	`true` for signed, `false` for unsigned
N	`true` for additional normalization step, `false` if already normalized to 1.F
I	`true` to always raise INEXACT exception, `false` to raise only for rounded results

Parameters

m	mantissa in Q1.F fixed point format
exp	exponent
sign	half-precision value with sign bit only
s	sticky bit (or of all but the most significant already discarded bits)

Returns: value converted to half-precision

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if value had to be rounded or I is `true`

Definition at line 598 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ float2half() [1/2]

template<std::float_round_style R, class T>

unsigned int half_float::detail::float2half ( T value )

Convert floating-point to half-precision.

Template Parameters

R	rounding mode to use
T	source type (builtin floating-point type)

Parameters

value floating-point value to convert

Returns: rounded half-precision value

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if value had to be rounded

Definition at line 771 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ float2half() [2/2]

template<class T>

unsigned int half_float::detail::float2half ( T value )

Definition at line 774 of file float16_t.hpp.

Here is the call graph for this function:

◆ float2half_impl() [1/3]

template<std::float_round_style R>

unsigned int half_float::detail::float2half_impl	(	double	value,
		true_type	)

Convert IEEE double-precision to half-precision.

Template Parameters

R	rounding mode to use

Parameters

value double-precision value to convert

Returns: rounded half-precision value

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if value had to be rounded

Definition at line 706 of file float16_t.hpp.

Here is the call graph for this function:

◆ float2half_impl() [2/3]

template<std::float_round_style R>

unsigned int half_float::detail::float2half_impl	(	float	value,
		true_type	)

Convert IEEE single-precision to half-precision.

Credit for this goes to Jeroen van der Zijp.

Template Parameters

R	rounding mode to use

Parameters

value single-precision value to convert

Returns: rounded half-precision value

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if value had to be rounded

Definition at line 619 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ float2half_impl() [3/3]

template<std::float_round_style R, class T>

unsigned int half_float::detail::float2half_impl	(	T	value,
			... )

Convert non-IEEE floating-point to half-precision.

Template Parameters

R	rounding mode to use
T	source type (builtin floating-point type)

Parameters

value floating-point value to convert

Returns: rounded half-precision value

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if value had to be rounded

Definition at line 740 of file float16_t.hpp.

Here is the call graph for this function:

◆ gamma()

template<std::float_round_style R, bool L>

unsigned int half_float::detail::gamma ( unsigned int arg )

Gamma function and postprocessing.

This approximates the value of either the gamma function or its logarithm directly in Q1.31.

Template Parameters

R	rounding mode to use
L	`true` for lograithm of gamma function, `false` for gamma function

Parameters

arg	half-precision floating-point value

Returns: lgamma/tgamma(arg) in half-precision

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if arg is not a positive integer

Definition at line 1550 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ half2float()

template<class T>

T half_float::detail::half2float ( unsigned int value )

Convert half-precision to floating-point.

Template Parameters

T	type to convert to (builtin integer type)

Parameters

value half-precision value to convert

Returns: floating-point value

Definition at line 1007 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ half2float_impl() [1/3]

double half_float::detail::half2float_impl	(	unsigned int	value,
		double	,
		true_type	)

inline

Convert half-precision to IEEE double-precision.

Parameters

value half-precision value to convert

Returns: double-precision value

Definition at line 966 of file float16_t.hpp.

Here is the call graph for this function:

◆ half2float_impl() [2/3]

float half_float::detail::half2float_impl	(	unsigned int	value,
		float	,
		true_type	)

inline

Convert half-precision to IEEE single-precision.

Credit for this goes to Jeroen van der Zijp.

Parameters

value half-precision value to convert

Returns: single-precision value

Definition at line 804 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ half2float_impl() [3/3]

template<class T>

T half_float::detail::half2float_impl	(	unsigned int	value,
		T	,
			... )

Convert half-precision to non-IEEE floating-point.

Template Parameters

T	type to convert to (builtin integer type)

Parameters

value half-precision value to convert

Returns: floating-point value

Definition at line 988 of file float16_t.hpp.

Here is the call graph for this function:

◆ half2int()

template<std::float_round_style R, bool E, bool I, class T>

T half_float::detail::half2int ( unsigned int value )

Convert half-precision floating-point to integer.

Template Parameters

R	rounding mode to use
E	`true` for round to even, `false` for round away from zero
I	`true` to raise INEXACT exception (if inexact), `false` to never raise it
T	type to convert to (buitlin integer type with at least 16 bits precision, excluding any implicit sign bits)

Parameters

value half-precision value to convert

Returns: rounded integer value

Exceptions

FE_INVALID	if value is not representable in type T
FE_INEXACT	if value had to be rounded and I is `true`

Definition at line 1020 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ hyperbolic_args()

std::pair< uint32, uint32 > half_float::detail::hyperbolic_args	(	unsigned int	abs,
		int &	exp,
		unsigned int	n = 32 )

inline

Get exponentials for hyperbolic computation.

Parameters

abs	half-precision floating-point value
exp	variable to take unbiased exponent of larger result
n	number of BKM iterations (at most 32)

Returns: exp(abs) and exp(-abs) as Q1.31 with same exponent

Definition at line 1293 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ hypot_post()

template<std::float_round_style R>

unsigned int half_float::detail::hypot_post	(	uint32	r,
		int	exp )

Hypotenuse square root and postprocessing.

Template Parameters

R	rounding mode to use

Parameters

r	mantissa as Q2.30
exp	unbiased exponent

Returns: square root converted to half-precision

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if value had to be rounded

Definition at line 1377 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ int2half()

template<std::float_round_style R, class T>

unsigned int half_float::detail::int2half ( T value )

Convert integer to half-precision floating-point.

Template Parameters

R	rounding mode to use
T	type to convert (builtin integer type)

Parameters

value integral value to convert

Returns: rounded half-precision value

Exceptions

FE_OVERFLOW	on overflows
FE_INEXACT	if value had to be rounded

Definition at line 785 of file float16_t.hpp.

Here is the call graph for this function:

◆ integral()

template<std::float_round_style R, bool E, bool I>

unsigned int half_float::detail::integral ( unsigned int value )

Round half-precision number to nearest integer value.

Template Parameters

R	rounding mode to use
E	`true` for round to even, `false` for round away from zero
I	`true` to raise INEXACT exception (if inexact), `false` to never raise it

Parameters

value half-precision value to round

Returns: half-precision bits for nearest integral value

Exceptions

FE_INVALID	for signaling NaN
FE_INEXACT	if value had to be rounded and I is `true`

Definition at line 565 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ invalid()

constexpr_NOERR unsigned int half_float::detail::invalid ( )

inline

Raise domain error and return NaN.

return quiet NaN

Exceptions

FE_INVALID

Definition at line 465 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ log2()

uint32 half_float::detail::log2	(	uint32	m,
		unsigned int	n = 32 )

inline

Fixed point binary logarithm.

This uses the BKM algorithm in L-mode.

Parameters

m	mantissa in [1,2) as Q1.30
n	number of iterations (at most 32)

Returns: log2(m) as Q0.31

Definition at line 1191 of file float16_t.hpp.

Here is the caller graph for this function:

◆ log2_post()

template<std::float_round_style R, uint32 L>

unsigned int half_float::detail::log2_post	(	uint32	m,
		int	ilog,
		int	exp,
		unsigned int	sign = 0 )

Postprocessing for binary logarithm.

Template Parameters

R	rounding mode to use
L	logarithm for base transformation as Q1.31

Parameters

m	fractional part of logarithm as Q0.31
ilog	signed integer part of logarithm
exp	biased exponent of result
sign	sign bit of result

Returns: value base-transformed and converted to half-precision

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if no other exception occurred

Definition at line 1353 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ mod()

template<bool Q, bool R>

unsigned int half_float::detail::mod	(	unsigned int	x,
		unsigned int	y,
		int *	quo = NULL )

Half precision positive modulus.

Template Parameters

Q	`true` to compute full quotient, `false` else
R	`true` to compute signed remainder, `false` for positive remainder

Parameters

x	first operand as positive finite half-precision value
y	second operand as positive finite half-precision value
quo	adress to store quotient at, `nullptr` if Q `false`

Returns: modulus of x / y

Definition at line 1086 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ mulhi()

template<std::float_round_style R>

uint32 half_float::detail::mulhi	(	uint32	x,
		uint32	y )

upper part of 64-bit multiplication.

Template Parameters

R	rounding mode to use

Parameters

x	first factor
y	second factor

Returns: upper 32 bit of x * y

Definition at line 1055 of file float16_t.hpp.

◆ multiply64()

uint32 half_float::detail::multiply64	(	uint32	x,
		uint32	y )

inline

64-bit multiplication.

Parameters

x	first factor
y	second factor

Returns: upper 32 bit of x * y rounded to nearest

Definition at line 1065 of file float16_t.hpp.

Here is the caller graph for this function:

◆ overflow()

template<std::float_round_style R>

constexpr_NOERR unsigned int half_float::detail::overflow ( unsigned int sign = 0 )

Half-precision overflow.

Template Parameters

R	rounding mode to use

Parameters

sign	half-precision value with sign bit only

Returns: rounded overflowing half-precision value

Exceptions

FE_OVERFLOW

Definition at line 503 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ pole()

constexpr_NOERR unsigned int half_float::detail::pole ( unsigned int sign = 0 )

inline

Raise pole error and return infinity.

Parameters

sign	half-precision value with sign bit only

Returns: half-precision infinity with sign of sign

Exceptions

FE_DIVBYZERO

Definition at line 476 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ raise()

void half_float::detail::raise	(	int	HALF_UNUSED_NOERRflags,
		bool	HALF_UNUSED_NOERRcond = true )

inline

Raise floating-point exception.

Parameters

flags	exceptions to raise
cond	condition to raise exceptions for

Definition at line 353 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ rounded()

template<std::float_round_style R, bool I>

constexpr_NOERR unsigned int half_float::detail::rounded	(	unsigned int	value,
		int	g,
		int	s )

Round half-precision number.

Template Parameters

R	rounding mode to use
I	`true` to always raise INEXACT exception, `false` to raise only for rounded results

Parameters

value	finite half-precision number to round
g	guard bit (most significant discarded bit)
s	sticky bit (or of all but the most significant discarded bits)

Returns: rounded half-precision value

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if value had to be rounded or I is `true`

Definition at line 537 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ select()

constexpr_NOERR unsigned int half_float::detail::select	(	unsigned int	x,
		unsigned int	HALF_UNUSED_NOERRy )

inline

Select value or signaling NaN.

Parameters

x	preferred half-precision value
y	ignored half-precision value except for signaling NaN

Returns: y if signaling NaN, x otherwise

Exceptions

FE_INVALID if y is signaling NaN

Definition at line 454 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ sign_mask()

uint32 half_float::detail::sign_mask ( uint32 arg )

inline

Platform-independent sign mask.

Parameters

arg	integer value in two's complement

Return values

-1	if arg negative
0	if arg positive

Definition at line 321 of file float16_t.hpp.

Here is the caller graph for this function:

◆ signal() [1/3]

constexpr_NOERR unsigned int half_float::detail::signal ( unsigned int nan )

inline

Signal and silence signaling NaN.

Parameters

nan	half-precision NaN value

Returns: quiet NaN

Exceptions

FE_INVALID if nan is signaling NaN

Definition at line 417 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ signal() [2/3]

constexpr_NOERR unsigned int half_float::detail::signal	(	unsigned int	x,
		unsigned int	y )

inline

Signal and silence signaling NaNs.

Parameters

x	first half-precision value to check
y	second half-precision value to check

Returns: quiet NaN

Exceptions

FE_INVALID if x or y is signaling NaN

Definition at line 429 of file float16_t.hpp.

Here is the call graph for this function:

◆ signal() [3/3]

constexpr_NOERR unsigned int half_float::detail::signal	(	unsigned int	x,
		unsigned int	y,
		unsigned int	z )

inline

Signal and silence signaling NaNs.

Parameters

x	first half-precision value to check
y	second half-precision value to check
z	third half-precision value to check

Returns: quiet NaN

Exceptions

FE_INVALID if x, y or z is signaling NaN

Definition at line 442 of file float16_t.hpp.

Here is the call graph for this function:

◆ sincos()

std::pair< uint32, uint32 > half_float::detail::sincos	(	uint32	mz,
		unsigned int	n = 31 )

inline

Fixed point sine and cosine.

This uses the CORDIC algorithm in rotation mode.

Parameters

mz	angle in [-pi/2,pi/2] as Q1.30
n	number of iterations (at most 31)

Returns: sine and cosine of mz as Q1.30

Definition at line 1215 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ sqrt()

template<unsigned int F>

uint32 half_float::detail::sqrt	(	uint32 &	r,
		int &	exp )

Fixed point square root.

Template Parameters

F	number of fractional bits

Parameters

r	radicand in Q1.F fixed point format
exp	exponent

Returns: square root as Q1.F/2

Definition at line 1146 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ tangent_post()

template<std::float_round_style R>

unsigned int half_float::detail::tangent_post	(	uint32	my,
		uint32	mx,
		int	exp,
		unsigned int	sign = 0 )

Division and postprocessing for tangents.

Template Parameters

R	rounding mode to use

Parameters

my	dividend as Q1.31
mx	divisor as Q1.31
exp	biased exponent of result
sign	sign bit of result

Returns: quotient converted to half-precision

Exceptions

FE_OVERFLOW	on overflows
FE_UNDERFLOW	on underflows
FE_INEXACT	if no other exception occurred

Definition at line 1398 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

◆ underflow()

template<std::float_round_style R>

constexpr_NOERR unsigned int half_float::detail::underflow ( unsigned int sign = 0 )

Half-precision underflow.

Template Parameters

R	rounding mode to use

Parameters

sign	half-precision value with sign bit only

Returns: rounded underflowing half-precision value

Exceptions

FE_UNDERFLOW

Definition at line 518 of file float16_t.hpp.

Here is the call graph for this function:

Here is the caller graph for this function:

Variable Documentation

◆ binary

binary_t half_float::detail::binary = binary_t()

constexpr

Tag for binary construction.

Definition at line 291 of file float16_t.hpp.

Classes

Concepts

Typedefs

Functions

Variables

Typedef Documentation

◆ bits_t

◆ int32

◆ uint16

◆ uint32

Function Documentation

◆ angle_arg()

◆ area()

◆ arithmetic_shift()

◆ atan2()

◆ atan2_args()

◆ builtin_isinf()

◆ builtin_isnan()

◆ builtin_signbit()

◆ check_underflow()

◆ compsignal()

◆ divide64()

◆ erf()

◆ errflags()

◆ exp2()

◆ exp2_post()

◆ fixed2half()

◆ float2half() [1/2]

◆ float2half() [2/2]

◆ float2half_impl() [1/3]

◆ float2half_impl() [2/3]

◆ float2half_impl() [3/3]

◆ gamma()

◆ half2float()

◆ half2float_impl() [1/3]

◆ half2float_impl() [2/3]

◆ half2float_impl() [3/3]

◆ half2int()

◆ hyperbolic_args()

◆ hypot_post()

◆ int2half()

◆ integral()

◆ invalid()

◆ log2()

◆ log2_post()

◆ mod()

◆ mulhi()

◆ multiply64()

◆ overflow()

◆ pole()

◆ raise()

◆ rounded()

◆ select()

◆ sign_mask()

◆ signal() [1/3]

◆ signal() [2/3]

◆ signal() [3/3]

◆ sincos()

◆ sqrt()

◆ tangent_post()

◆ underflow()

Variable Documentation

◆ binary