sparrow/data__type_8hpp_source.html

// Copyright 2024 Man Group Operations Limited

//

// Licensed under the Apache License, Version 2.0 (the "License");

// you may not use this file except in compliance with the License.

// You may obtain a copy of the License at

//

//     http://www.apache.org/licenses/LICENSE-2.0

//

// Unless required by applicable law or agreed to in writing, software

// distributed under the License is distributed on an "AS IS" BASIS,

// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

// See the License for the specific language governing permissions and

// limitations under the License.


#pragma once


#include <chrono>

#include <version>


#include "sparrow/layout/date_types.hpp"

#include "sparrow/layout/interval_types.hpp"

#include "sparrow/layout/time_types.hpp"

#include "sparrow/layout/timestamp_without_timezone_types.hpp"

#include "sparrow/utils/sequence_view.hpp"


#if defined(SPARROW_USE_DATE_POLYFILL)


#    include <date/tz.h>


#    if defined(__cpp_lib_format)

#        include <format>


template <typename T>

struct std::formatter<date::zoned_time<T>>

{

    constexpr auto parse(std::format_parse_context& ctx)

    {

        return ctx.begin();  // Simple implementation

    }


    auto format(const date::zoned_time<T>& date, std::format_context& ctx) const

    {

        std::ostringstream oss;

        oss << date;

        std::string date_str = oss.str();

        return std::format_to(ctx.out(), "{}", date_str);

    }

};

#    endif


#else

namespace date = std::chrono;

#endif


#include <climits>

#include <concepts>

#include <cstdint>

#include <cstring>

#include <sstream>

#include <string>


#include "sparrow/config/config.hpp"

#include "sparrow/utils/contracts.hpp"

#include "sparrow/utils/decimal.hpp"

#include "sparrow/utils/large_int.hpp"

#include "sparrow/utils/mp_utils.hpp"


#if __cplusplus > 202002L and defined(__STDCPP_FLOAT16_T__) and defined(__STDCPP_FLOAT32_T__) \

    and defined(__STDCPP_FLOAT64_T__)

#    define SPARROW_STD_FIXED_FLOAT_SUPPORT

#endif


// TODO: use exclusively `std::float16_t etc. once we switch to c++23, see

// https://en.cppreference.com/w/cpp/types/floating-point

#if defined(SPARROW_STD_FIXED_FLOAT_SUPPORT)

#    include <stdfloat>

#else

#    include "sparrow/details/3rdparty/float16_t.hpp"

#endif


namespace sparrow

{


// TODO: use exclusively `std::float16_t etc. once we switch to c++23, see

// https://en.cppreference.com/w/cpp/types/floating-point

#if defined(SPARROW_STD_FIXED_FLOAT_SUPPORT)

    using float16_t = std::float16_t;

    using float32_t = std::float32_t;

    using float64_t = std::float64_t;

#else

    using float16_t = half_float::half;

    using float32_t = float;

    using float64_t = double;

#endif


    // P0355R7 (Extending chrono to Calendars and Time Zones) has not been entirely implemented in libc++ yet.

    // See: https://libcxx.llvm.org/Status/Cxx20.html#note-p0355

    // For now, we use HowardHinnant/date as a replacement if we are compiling with libc++.

    // TODO: use the following once libc++ has full support for P0355R7.

    // using timestamp = std::chrono::time_point<std::chrono::system_clock, std::chrono::nanoseconds>;

    template <typename Duration, typename TimeZonePtr = const date::time_zone*>

    using timestamp = date::zoned_time<Duration, TimeZonePtr>;


    // We need to be sure the current target platform is setup to support correctly these types.

    static_assert(sizeof(float16_t) == 2);

    static_assert(sizeof(float32_t) == 4);

    static_assert(sizeof(float64_t) == 8);

    static_assert(std::is_floating_point_v<float16_t>);

    static_assert(std::is_floating_point_v<float32_t>);

    static_assert(std::is_floating_point_v<float64_t>);

    static_assert(CHAR_BIT == 8);


    using byte_t = std::byte;  // For now we will use this to represent raw data TODO: evaluate later if it's

                               // the right choice, switch to char if not


    struct null_type

    {

    };


    constexpr bool operator==(const null_type&, const null_type&) noexcept

    {

        return true;

    }


    // TODO: does not support all types specified by the Arrow specification

    // yet


    enum class data_type : uint8_t

    {

        NA = 0,

        BOOL = 1,

        UINT8 = 2,

        INT8 = 3,

        UINT16 = 4,

        INT16 = 5,

        UINT32 = 6,

        INT32 = 7,

        UINT64 = 8,

        INT64 = 9,

        HALF_FLOAT = 10,

        FLOAT = 11,

        DOUBLE = 12,

        // UTF8 variable-length string

        STRING = 13,

        LARGE_STRING = 14,

        // Variable-length bytes (no guarantee of UTF8-ness)

        BINARY = 15,

        LARGE_BINARY = 16,

        LIST = 19,

        LARGE_LIST = 20,

        LIST_VIEW = 21,

        LARGE_LIST_VIEW = 22,

        FIXED_SIZED_LIST = 23,

        STRUCT = 24,

        MAP = 25,

        STRING_VIEW = 26,

        BINARY_VIEW = 27,

        DENSE_UNION,

        SPARSE_UNION,

        RUN_ENCODED,

        DECIMAL32,

        DECIMAL64,

        DECIMAL128,

        DECIMAL256,

        FIXED_WIDTH_BINARY,

        DATE_DAYS,

        DATE_MILLISECONDS,

        TIMESTAMP_SECONDS,

        TIMESTAMP_MILLISECONDS,

        TIMESTAMP_MICROSECONDS,

        TIMESTAMP_NANOSECONDS,

        TIME_SECONDS,

        TIME_MILLISECONDS,

        TIME_MICROSECONDS,

        TIME_NANOSECONDS,

        DURATION_SECONDS,

        DURATION_MILLISECONDS,

        DURATION_MICROSECONDS,

        DURATION_NANOSECONDS,

        INTERVAL_MONTHS,

        INTERVAL_DAYS_TIME,

        INTERVAL_MONTHS_DAYS_NANOSECONDS

    };


    // helper function to check if a string is all digits


    [[nodiscard]] constexpr bool all_digits(const std::string_view s)

    {

        return !s.empty()

               && std::find_if(

                      s.begin(),

                      s.end(),

                      [](unsigned char c)

                      {

                          return !std::isdigit(c);

                      }

                  ) == s.end();

    }


    // get the bit width for decimal value type from format

    [[nodiscard]] SPARROW_API std::size_t num_bytes_for_decimal(const char* format);


    // TODO: consider returning an optional instead


    [[nodiscard]] constexpr data_type format_to_data_type(std::string_view format)

    {

        // TODO: add missing conversions from

        // https://arrow.apache.org/docs/dev/format/CDataInterface.html#data-type-description-format-strings

        if (format.size() == 1)

        {

            switch (format[0])

            {

                case 'n':

                    return data_type::NA;

                case 'b':

                    return data_type::BOOL;

                case 'C':

                    return data_type::UINT8;

                case 'c':

                    return data_type::INT8;

                case 'S':

                    return data_type::UINT16;

                case 's':

                    return data_type::INT16;

                case 'I':

                    return data_type::UINT32;

                case 'i':

                    return data_type::INT32;

                case 'L':

                    return data_type::UINT64;

                case 'l':

                    return data_type::INT64;

                case 'e':

                    return data_type::HALF_FLOAT;

                case 'f':

                    return data_type::FLOAT;

                case 'g':

                    return data_type::DOUBLE;

                case 'u':

                    return data_type::STRING;

                case 'U':

                    return data_type::LARGE_STRING;

                case 'z':

                    return data_type::BINARY;

                case 'Z':

                    return data_type::LARGE_BINARY;

                default:

                    return data_type::NA;

            }

        }

        else if (format == "vu")  // string view

        {

            return data_type::STRING_VIEW;

        }

        else if (format == "vz")  // binary view

        {

            return data_type::BINARY_VIEW;

        }

        // TODO: add propper timestamp support below

        else if (format.starts_with("t"))

        {

            if (format == "tdD")

            {

                return data_type::DATE_DAYS;

            }

            else if (format == "tdm")

            {

                return data_type::DATE_MILLISECONDS;

            }

            else if (format.starts_with("tss:"))

            {

                return data_type::TIMESTAMP_SECONDS;

            }

            else if (format.starts_with("tsm:"))

            {

                return data_type::TIMESTAMP_MILLISECONDS;

            }

            else if (format.starts_with("tsu:"))

            {

                return data_type::TIMESTAMP_MICROSECONDS;

            }

            else if (format.starts_with("tsn:"))

            {

                return data_type::TIMESTAMP_NANOSECONDS;

            }

            else if (format == "tDs")

            {

                return data_type::DURATION_SECONDS;

            }

            else if (format == "tDm")

            {

                return data_type::DURATION_MILLISECONDS;

            }

            else if (format == "tDu")

            {

                return data_type::DURATION_MICROSECONDS;

            }

            else if (format == "tDn")

            {

                return data_type::DURATION_NANOSECONDS;

            }

            else if (format == "tiM")

            {

                return data_type::INTERVAL_MONTHS;

            }

            else if (format == "tiD")

            {

                return data_type::INTERVAL_DAYS_TIME;

            }

            else if (format == "tin")

            {

                return data_type::INTERVAL_MONTHS_DAYS_NANOSECONDS;

            }

            else if (format == "tts")

            {

                return data_type::TIME_SECONDS;

            }

            else if (format == "ttm")

            {

                return data_type::TIME_MILLISECONDS;

            }

            else if (format == "ttu")

            {

                return data_type::TIME_MICROSECONDS;

            }

            else if (format == "ttn")

            {

                return data_type::TIME_NANOSECONDS;

            }

        }

        else if (format == "+l")

        {

            return data_type::LIST;

        }

        else if (format == "+L")

        {

            return data_type::LARGE_LIST;

        }

        else if (format == "+vl")

        {

            return data_type::LIST_VIEW;

        }

        else if (format == "+vL")

        {

            return data_type::LARGE_LIST_VIEW;

        }

        else if (format.starts_with("+w:"))

        {

            return data_type::FIXED_SIZED_LIST;

        }

        else if (format == "+s")

        {

            return data_type::STRUCT;

        }

        else if (format == "+m")

        {

            return data_type::MAP;

        }

        else if (format.starts_with("+ud:"))

        {

            return data_type::DENSE_UNION;

        }

        else if (format.starts_with("+us:"))

        {

            return data_type::SPARSE_UNION;

        }

        else if (format.starts_with("+r"))

        {

            return data_type::RUN_ENCODED;

        }

        else if (format.starts_with("d:"))

        {

            const auto num_bytes = num_bytes_for_decimal(format.data());

            switch (num_bytes)

            {

                case 4:

                    return data_type::DECIMAL32;

                case 8:

                    return data_type::DECIMAL64;

                case 16:

                    return data_type::DECIMAL128;

                case 32:

                    return data_type::DECIMAL256;

                default:

                    throw std::runtime_error("Invalid format for decimal");

            }

        }

        else if (format.starts_with("w:"))

        {

            return data_type::FIXED_WIDTH_BINARY;

        }


        return data_type::NA;

    }


    template <std::floating_point T>

        requires(sizeof(T) >= 2 && sizeof(T) <= 8)


    [[nodiscard]] constexpr data_type data_type_from_size(T = {}) noexcept

    {

        // TODO: consider rewriting this to benefit from if constexpr? might not be necessary

        switch (sizeof(T))

        {

            case 2:

                return data_type::HALF_FLOAT;

            case 4:

                return data_type::FLOAT;

            case 8:

                return data_type::DOUBLE;

        }


        mpl::unreachable();

    }


    template <std::integral T>

        requires(sizeof(T) >= 1 && sizeof(T) <= 8)


    [[nodiscard]] constexpr data_type data_type_from_size(T = {}) noexcept

    {

        if constexpr (std::same_as<bool, T>)

        {

            return data_type::BOOL;

        }

        else if constexpr (std::signed_integral<T>)

        {

            // TODO: consider rewriting this to benefit from if constexpr? might not be necessary

            switch (sizeof(T))

            {

                case 1:

                    return data_type::INT8;

                case 2:

                    return data_type::INT16;

                case 4:

                    return data_type::INT32;

                case 8:

                    return data_type::INT64;

            }

        }

        else

        {

            static_assert(std::unsigned_integral<T>);


            // TODO: consider rewriting this to benefit from if constexpr? might not be necessary

            switch (sizeof(T))

            {

                case 1:

                    return data_type::UINT8;

                case 2:

                    return data_type::UINT16;

                case 4:

                    return data_type::UINT32;

                case 8:

                    return data_type::UINT64;

            }

        }


        mpl::unreachable();

    }


    namespace detail

    {

        template <class A>

        struct get_data_type_from_array;

    }


    // REMARK: this functions is non-applicable for the following types

    // - all decimal types because further information is needed (precision, scale)

    // - fixed-sized binary because further information is needed (element size)


    [[nodiscard]] constexpr std::string_view data_type_to_format(data_type type)

    {

        switch (type)

        {

            case data_type::NA:

                return "n";

            case data_type::BOOL:

                return "b";

            case data_type::UINT8:

                return "C";

            case data_type::INT8:

                return "c";

            case data_type::UINT16:

                return "S";

            case data_type::INT16:

                return "s";

            case data_type::UINT32:

                return "I";

            case data_type::INT32:

                return "i";

            case data_type::UINT64:

                return "L";

            case data_type::INT64:

                return "l";

            case data_type::HALF_FLOAT:

                return "e";

            case data_type::FLOAT:

                return "f";

            case data_type::DOUBLE:

                return "g";

            case data_type::STRING:

                return "u";

            case data_type::LARGE_STRING:

                return "U";

            case data_type::BINARY:

                return "z";

            case data_type::LARGE_BINARY:

                return "Z";

            case data_type::DATE_DAYS:

                return "tdD";

            case data_type::DATE_MILLISECONDS:

                return "tdm";

            case data_type::TIMESTAMP_SECONDS:

                return "tss:";

            case data_type::TIMESTAMP_MILLISECONDS:

                return "tsm:";

            case data_type::TIMESTAMP_MICROSECONDS:

                return "tsu:";

            case data_type::TIMESTAMP_NANOSECONDS:

                return "tsn:";

            case data_type::DURATION_SECONDS:

                return "tDs";

            case data_type::DURATION_MILLISECONDS:

                return "tDm";

            case data_type::DURATION_MICROSECONDS:

                return "tDu";

            case data_type::DURATION_NANOSECONDS:

                return "tDn";

            case data_type::INTERVAL_MONTHS:

                return "tiM";

            case data_type::INTERVAL_DAYS_TIME:

                return "tiD";

            case data_type::INTERVAL_MONTHS_DAYS_NANOSECONDS:

                return "tin";

            case data_type::TIME_SECONDS:

                return "tts";

            case data_type::TIME_MILLISECONDS:

                return "ttm";

            case data_type::TIME_MICROSECONDS:

                return "ttu";

            case data_type::TIME_NANOSECONDS:

                return "ttn";

            case data_type::LIST:

                return "+l";

            case data_type::LARGE_LIST:

                return "+L";

            case data_type::STRUCT:

                return "+s";

            case data_type::MAP:

                return "+m";

            default:

                // TODO: add missing types

                throw std::runtime_error("Unsupported data type");

        }

    }


    [[nodiscard]] constexpr bool data_type_is_primitive(data_type dt) noexcept

    {

        switch (dt)

        {

            case data_type::BOOL:

            case data_type::UINT8:

            case data_type::INT8:

            case data_type::UINT16:

            case data_type::INT16:

            case data_type::UINT32:

            case data_type::INT32:

            case data_type::UINT64:

            case data_type::INT64:

            case data_type::HALF_FLOAT:

            case data_type::FLOAT:

            case data_type::DOUBLE:

                return true;

            default:

                return false;

        }

    }


    [[nodiscard]] constexpr bool data_type_is_integer(data_type dt) noexcept

    {

        switch (dt)

        {

            case data_type::UINT8:

            case data_type::INT8:

            case data_type::UINT16:

            case data_type::INT16:

            case data_type::UINT32:

            case data_type::INT32:

            case data_type::UINT64:

            case data_type::INT64:

                return true;

            default:

                return false;

        }

    }


    class list_value;

    class struct_value;

    class map_value;


    // NOTE: this needs to be in sync-order with `data_type`

    using all_base_types_t = mpl::typelist<

        null_type,

        bool,

        std::uint8_t,

        std::int8_t,

        std::uint16_t,

        std::int16_t,

        std::uint32_t,

        std::int32_t,

        std::uint64_t,

        std::int64_t,

        float16_t,

        float32_t,

        float64_t,

        std::string,

        std::vector<byte_t>,

        date_days,

        date_milliseconds,

        timestamp<std::chrono::seconds>,

        timestamp<std::chrono::milliseconds>,

        timestamp<std::chrono::microseconds>,

        timestamp<std::chrono::nanoseconds>,

        zoned_time_without_timezone_seconds,

        zoned_time_without_timezone_milliseconds,

        zoned_time_without_timezone_microseconds,

        zoned_time_without_timezone_nanoseconds,

        std::chrono::seconds,

        std::chrono::milliseconds,

        std::chrono::microseconds,

        std::chrono::nanoseconds,

        chrono::months,

        days_time_interval,

        month_day_nanoseconds_interval,

        chrono::time_seconds,

        chrono::time_milliseconds,

        chrono::time_microseconds,

        chrono::time_nanoseconds,

        // TODO: add missing fundamental types here

        list_value,

        struct_value,

        map_value,

        decimal<std::int32_t>,

        decimal<std::int64_t>,

        decimal<int128_t>,

        decimal<int256_t>>;


    template <class T>

    concept is_arrow_base_type = mpl::contains<all_base_types_t, T>();


    // template <class T>

    // concept is_arrow_base_type_or_compound = is_arrow_base_type<T> || is_list_value_v<T>;

    using all_base_types_extended_t = mpl::append_t<all_base_types_t, char, std::string_view>;


    template <class T>

    concept is_arrow_base_type_extended = mpl::contains<all_base_types_extended_t, T>();


    template <class T>

    using get_corresponding_arrow_type_t = std::conditional_t<std::same_as<T, std::string_view>, std::string, T>;


    template <class T>

    struct arrow_traits;


    namespace detail

    {

        template <template <class> class>


        struct accepts_template

        {

        };


    }

    template <class T>


    concept is_arrow_traits = mpl::is_type_instance_of_v<T, arrow_traits> and requires {

        typename T::value_type;


        // typename detail::accepts_template<T::template default_layout>;


        // TODO: add more interface requirements on the traits here

        // TODO: add conversion operations between bytes and the value type

    };


    template <class T>

    concept has_arrow_type_traits = requires { typename ::sparrow::arrow_traits<T>; }

                                    and is_arrow_traits<::sparrow::arrow_traits<T>>;


    template <class T>

    concept any_arrow_type = is_arrow_base_type<T> or has_arrow_type_traits<T>;


    template <has_arrow_type_traits T>

    using default_layout_t = typename arrow_traits<T>::default_layout;


    // For now, a tiny wrapper around data_type

    // TODO: More data and functions to come


    class data_descriptor

    {

    public:


        constexpr data_descriptor() noexcept

            : data_descriptor(data_type::UINT8)

        {

        }


        data_descriptor(std::string_view format) noexcept

            : data_descriptor(format_to_data_type(format))

        {

        }


        constexpr explicit data_descriptor(data_type id) noexcept

            : m_id(id)

        {

        }


        [[nodiscard]] constexpr data_type id() const noexcept

        {

            return m_id;

        }


    private:


        data_type m_id;

    };


    namespace impl

    {

        template <class C, bool is_const>


        struct get_inner_reference

            : std::conditional<is_const, typename C::inner_const_reference, typename C::inner_reference>

        {

        };


        template <class C, bool is_const>

        using get_inner_reference_t = typename get_inner_reference<C, is_const>::type;

    }  // namespace impl


    template <class T>

    concept layout_offset = std::same_as<T, std::int32_t> || std::same_as<T, std::int64_t>;

}


#if defined(__cpp_lib_format)


namespace std

{

    template <>

    struct formatter<sparrow::data_type>

    {

        constexpr auto parse(std::format_parse_context& ctx)

        {

            return ctx.begin();  // Simple implementation

        }


        auto format(const sparrow::data_type& data_type, std::format_context& ctx) const

        {

            static const auto get_enum_name = [](sparrow::data_type dt) -> std::string_view

            {

                using enum sparrow::data_type;

                switch (dt)

                {

                    case NA:

                        return "N/A";

                    case BOOL:

                        return "bool";

                    case UINT8:

                        return "uint8";

                    case INT8:

                        return "int8";

                    case UINT16:

                        return "uint16";

                    case INT16:

                        return "int16";

                    case UINT32:

                        return "uint32";

                    case INT32:

                        return "int32";

                    case UINT64:

                        return "uint64";

                    case INT64:

                        return "int64";

                    case HALF_FLOAT:

                        return "float16";

                    case FLOAT:

                        return "float32";

                    case DOUBLE:

                        return "double";

                    case STRING:

                        return "String";

                    case LARGE_STRING:

                        return "Large string";

                    case BINARY:

                        return "Binary";

                    case LARGE_BINARY:

                        return "Large binary";

                    case DATE_DAYS:

                        return "Date days";

                    case DATE_MILLISECONDS:

                        return "Date milliseconds";

                    case TIMESTAMP_SECONDS:

                        return "Timestamp seconds";

                    case TIMESTAMP_MILLISECONDS:

                        return "Timestamp milliseconds";

                    case TIMESTAMP_MICROSECONDS:

                        return "Timestamp microseconds";

                    case TIMESTAMP_NANOSECONDS:

                        return "Timestamp nanoseconds";

                    case DURATION_SECONDS:

                        return "Duration seconds";

                    case DURATION_MILLISECONDS:

                        return "Duration milliseconds";

                    case DURATION_MICROSECONDS:

                        return "Duration microseconds";

                    case DURATION_NANOSECONDS:

                        return "Duration nanoseconds";

                    case INTERVAL_MONTHS:

                        return "Interval months";

                    case INTERVAL_DAYS_TIME:

                        return "Interval days time";

                    case INTERVAL_MONTHS_DAYS_NANOSECONDS:

                        return "Interval months days nanoseconds";

                    case TIME_SECONDS:

                        return "Time seconds";

                    case TIME_MILLISECONDS:

                        return "Time milliseconds";

                    case TIME_MICROSECONDS:

                        return "Time microseconds";

                    case TIME_NANOSECONDS:

                        return "Time nanoseconds";

                    case LIST:

                        return "List";

                    case LARGE_LIST:

                        return "Large list";

                    case LIST_VIEW:

                        return "List view";

                    case LARGE_LIST_VIEW:

                        return "Large list view";

                    case FIXED_SIZED_LIST:

                        return "Fixed sized list";

                    case STRUCT:

                        return "Struct";

                    case MAP:

                        return "Map";

                    case DENSE_UNION:

                        return "Dense union";

                    case SPARSE_UNION:

                        return "Sparse union";

                    case RUN_ENCODED:

                        return "Run encoded";

                    case DECIMAL32:

                        return "Decimal32";

                    case DECIMAL64:

                        return "Decimal64";

                    case DECIMAL128:

                        return "Decimal128";

                    case DECIMAL256:

                        return "Decimal256";

                    case FIXED_WIDTH_BINARY:

                        return "Fixed width binary";

                    case STRING_VIEW:

                        return "String view";

                    case BINARY_VIEW:

                        return "Binary view";

                };

                return "UNKNOWN";

            };


            return std::format_to(ctx.out(), "{}", get_enum_name(data_type));

        }

    };

}


template <>

struct std::formatter<sparrow::null_type>

{

    constexpr auto parse(std::format_parse_context& ctx)

    {

        return ctx.begin();  // Simple implementation

    }


    auto format(const sparrow::null_type&, std::format_context& ctx) const

    {

        return std::format_to(ctx.out(), "null_type");

    }

};


inline std::ostream& operator<<(std::ostream& os, const sparrow::null_type&)

{

    os << std::format("{}", "null");

    return os;

}


template <>

struct std::formatter<std::byte>

{

    constexpr auto parse(std::format_parse_context& ctx)

    {

        return ctx.begin();  // Simple implementation

    }


    auto format(const std::byte& b, std::format_context& ctx) const

    {

        return std::format_to(ctx.out(), "{}", static_cast<int>(b));

    }

};


#endif

sparrow::data_descriptor::id
constexpr data_type id() const noexcept
Definition data_type.hpp:779

sparrow::data_descriptor::data_descriptor
constexpr data_descriptor(data_type id) noexcept
Definition data_type.hpp:774

sparrow::data_descriptor::data_descriptor
constexpr data_descriptor() noexcept
Definition data_type.hpp:764

sparrow::data_descriptor::data_descriptor
data_descriptor(std::string_view format) noexcept
Definition data_type.hpp:769

sparrow::decimal
Definition decimal.hpp:62

sparrow::list_value
Definition list_value.hpp:200

sparrow::map_value
Definition map_value.hpp:27

sparrow::struct_value
Definition struct_value.hpp:31

sparrow::any_arrow_type
Matches any type which is one of the base C++ types supported or at least that provides an arrow_trai...
Definition data_type.hpp:751

sparrow::has_arrow_type_traits
Matches types providing valid and complete arrow_traits specialization.
Definition data_type.hpp:745

sparrow::is_arrow_base_type_extended
Checks if a type is an extended base type for Arrow.
Definition data_type.hpp:689

sparrow::is_arrow_base_type
Matches C++ representation types which are supported by default.
Definition data_type.hpp:673

sparrow::is_arrow_traits
Matches valid and complete arrow_traits specializations for type T.
Definition data_type.hpp:730

sparrow::layout_offset
Definition data_type.hpp:802

config.hpp

SPARROW_API
#define SPARROW_API
Definition config.hpp:38

contracts.hpp

date_types.hpp

decimal.hpp

interval_types.hpp

large_int.hpp

mp_utils.hpp

sparrow::chrono::months
std::chrono::duration< int32_t, std::ratio< 2629746 > > months
Definition temporal_types.hpp:22

sparrow::detail
Definition dynamic_bitset.hpp:237

sparrow::impl
Definition data_type.hpp:790

sparrow::impl::get_inner_reference_t
typename get_inner_reference< C, is_const >::type get_inner_reference_t
Definition data_type.hpp:798

sparrow::mpl::append_t
decltype(append(TypeList{}, Us{}...)) append_t
Type alias for appending types or typelists to a given typelist.
Definition mp_utils.hpp:198

sparrow::mpl::is_type_instance_of_v
constexpr bool is_type_instance_of_v
Variable template for convenient access to is_type_instance_of.
Definition mp_utils.hpp:102

sparrow::mpl::unreachable
void unreachable()
Invokes undefined behavior for optimization purposes.
Definition mp_utils.hpp:882

sparrow::mpl::contains
consteval bool contains()
Checks if a typelist contains a specific type.
Definition mp_utils.hpp:633

sparrow
Definition array.hpp:21

sparrow::byte_t
std::byte byte_t
Definition data_type.hpp:115

sparrow::data_type_to_format
constexpr std::string_view data_type_to_format(data_type type)
Definition data_type.hpp:491

sparrow::operator==
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.

sparrow::data_type_is_primitive
constexpr bool data_type_is_primitive(data_type dt) noexcept
Definition data_type.hpp:578

sparrow::float16_t
half_float::half float16_t
Definition data_type.hpp:93

sparrow::num_bytes_for_decimal
SPARROW_API std::size_t num_bytes_for_decimal(const char *format)

sparrow::float32_t
float float32_t
Definition data_type.hpp:94

sparrow::all_base_types_extended_t
mpl::append_t< all_base_types_t, char, std::string_view > all_base_types_extended_t
is arrow base type or arrow compound type (list<T>, struct<T> etc.)
Definition data_type.hpp:678

sparrow::timestamp
date::zoned_time< Duration, TimeZonePtr > timestamp
Definition data_type.hpp:104

sparrow::float64_t
double float64_t
Definition data_type.hpp:95

sparrow::get_corresponding_arrow_type_t
std::conditional_t< std::same_as< T, std::string_view >, std::string, T > get_corresponding_arrow_type_t
Template alias to get the corresponding Arrow type for a given type.
Definition data_type.hpp:700

sparrow::default_layout_t
typename arrow_traits< T >::default_layout default_layout_t
Binary layout type to use by default for the given C++ representation T of an arrow value.
Definition data_type.hpp:756

sparrow::data_type_from_size
constexpr data_type data_type_from_size(T={}) noexcept
Definition data_type.hpp:403

sparrow::all_digits
constexpr bool all_digits(const std::string_view s)
Definition data_type.hpp:188

sparrow::date_milliseconds
std::chrono::time_point< std::chrono::system_clock, std::chrono::milliseconds > date_milliseconds
Definition date_types.hpp:24

sparrow::data_type_is_integer
constexpr bool data_type_is_integer(data_type dt) noexcept
Definition data_type.hpp:601

sparrow::all_base_types_t
mpl::typelist< null_type, bool, std::uint8_t, std::int8_t, std::uint16_t, std::int16_t, std::uint32_t, std::int32_t, std::uint64_t, std::int64_t, float16_t, float32_t, float64_t, std::string, std::vector< byte_t >, date_days, date_milliseconds, timestamp< std::chrono::seconds >, timestamp< std::chrono::milliseconds >, timestamp< std::chrono::microseconds >, timestamp< std::chrono::nanoseconds >, zoned_time_without_timezone_seconds, zoned_time_without_timezone_milliseconds, zoned_time_without_timezone_microseconds, zoned_time_without_timezone_nanoseconds, std::chrono::seconds, std::chrono::milliseconds, std::chrono::microseconds, std::chrono::nanoseconds, chrono::months, days_time_interval, month_day_nanoseconds_interval, chrono::time_seconds, chrono::time_milliseconds, chrono::time_microseconds, chrono::time_nanoseconds, list_value, struct_value, map_value, decimal< std::int32_t >, decimal< std::int64_t >, decimal< int128_t >, decimal< int256_t > > all_base_types_t
C++ types value representation types matching Arrow types.
Definition data_type.hpp:625

sparrow::format_to_data_type
constexpr data_type format_to_data_type(std::string_view format)
Definition data_type.hpp:207

sparrow::data_type
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
Definition data_type.hpp:131

sparrow::data_type::SPARSE_UNION
@ SPARSE_UNION
Definition data_type.hpp:161

sparrow::data_type::INTERVAL_DAYS_TIME
@ INTERVAL_DAYS_TIME
Definition data_type.hpp:183

sparrow::data_type::DECIMAL256
@ DECIMAL256
Definition data_type.hpp:166

sparrow::data_type::UINT32
@ UINT32
Definition data_type.hpp:138

sparrow::data_type::LIST
@ LIST
Definition data_type.hpp:151

sparrow::data_type::HALF_FLOAT
@ HALF_FLOAT
Definition data_type.hpp:142

sparrow::data_type::LIST_VIEW
@ LIST_VIEW
Definition data_type.hpp:153

sparrow::data_type::TIME_NANOSECONDS
@ TIME_NANOSECONDS
Definition data_type.hpp:177

sparrow::data_type::DATE_MILLISECONDS
@ DATE_MILLISECONDS
Definition data_type.hpp:169

sparrow::data_type::INTERVAL_MONTHS
@ INTERVAL_MONTHS
Definition data_type.hpp:182

sparrow::data_type::DURATION_MILLISECONDS
@ DURATION_MILLISECONDS
Definition data_type.hpp:179

sparrow::data_type::LARGE_LIST_VIEW
@ LARGE_LIST_VIEW
Definition data_type.hpp:154

sparrow::data_type::UINT16
@ UINT16
Definition data_type.hpp:136

sparrow::data_type::INT64
@ INT64
Definition data_type.hpp:141

sparrow::data_type::MAP
@ MAP
Definition data_type.hpp:157

sparrow::data_type::DECIMAL128
@ DECIMAL128
Definition data_type.hpp:165

sparrow::data_type::DATE_DAYS
@ DATE_DAYS
Definition data_type.hpp:168

sparrow::data_type::INT16
@ INT16
Definition data_type.hpp:137

sparrow::data_type::TIME_MICROSECONDS
@ TIME_MICROSECONDS
Definition data_type.hpp:176

sparrow::data_type::TIMESTAMP_MILLISECONDS
@ TIMESTAMP_MILLISECONDS
Definition data_type.hpp:171

sparrow::data_type::DURATION_NANOSECONDS
@ DURATION_NANOSECONDS
Definition data_type.hpp:181

sparrow::data_type::STRING
@ STRING
Definition data_type.hpp:146

sparrow::data_type::INT32
@ INT32
Definition data_type.hpp:139

sparrow::data_type::UINT64
@ UINT64
Definition data_type.hpp:140

sparrow::data_type::TIMESTAMP_MICROSECONDS
@ TIMESTAMP_MICROSECONDS
Definition data_type.hpp:172

sparrow::data_type::LARGE_LIST
@ LARGE_LIST
Definition data_type.hpp:152

sparrow::data_type::DURATION_SECONDS
@ DURATION_SECONDS
Definition data_type.hpp:178

sparrow::data_type::DURATION_MICROSECONDS
@ DURATION_MICROSECONDS
Definition data_type.hpp:180

sparrow::data_type::RUN_ENCODED
@ RUN_ENCODED
Definition data_type.hpp:162

sparrow::data_type::BINARY_VIEW
@ BINARY_VIEW
Definition data_type.hpp:159

sparrow::data_type::BINARY
@ BINARY
Definition data_type.hpp:149

sparrow::data_type::FIXED_WIDTH_BINARY
@ FIXED_WIDTH_BINARY
Definition data_type.hpp:167

sparrow::data_type::STRING_VIEW
@ STRING_VIEW
Definition data_type.hpp:158

sparrow::data_type::LARGE_STRING
@ LARGE_STRING
Definition data_type.hpp:147

sparrow::data_type::DENSE_UNION
@ DENSE_UNION
Definition data_type.hpp:160

sparrow::data_type::BOOL
@ BOOL
Definition data_type.hpp:133

sparrow::data_type::TIME_SECONDS
@ TIME_SECONDS
Definition data_type.hpp:174

sparrow::data_type::STRUCT
@ STRUCT
Definition data_type.hpp:156

sparrow::data_type::TIME_MILLISECONDS
@ TIME_MILLISECONDS
Definition data_type.hpp:175

sparrow::data_type::LARGE_BINARY
@ LARGE_BINARY
Definition data_type.hpp:150

sparrow::data_type::INTERVAL_MONTHS_DAYS_NANOSECONDS
@ INTERVAL_MONTHS_DAYS_NANOSECONDS
Definition data_type.hpp:184

sparrow::data_type::TIMESTAMP_SECONDS
@ TIMESTAMP_SECONDS
Definition data_type.hpp:170

sparrow::data_type::TIMESTAMP_NANOSECONDS
@ TIMESTAMP_NANOSECONDS
Definition data_type.hpp:173

sparrow::data_type::DECIMAL64
@ DECIMAL64
Definition data_type.hpp:164

sparrow::data_type::NA
@ NA
Definition data_type.hpp:132

sparrow::data_type::DECIMAL32
@ DECIMAL32
Definition data_type.hpp:163

sparrow::data_type::FLOAT
@ FLOAT
Definition data_type.hpp:143

sparrow::data_type::UINT8
@ UINT8
Definition data_type.hpp:134

sparrow::data_type::INT8
@ INT8
Definition data_type.hpp:135

sparrow::data_type::FIXED_SIZED_LIST
@ FIXED_SIZED_LIST
Definition data_type.hpp:155

sparrow::data_type::DOUBLE
@ DOUBLE
Definition data_type.hpp:144

sparrow::date_days
std::chrono::time_point< std::chrono::system_clock, chrono::days > date_days
Definition date_types.hpp:23

std
Definition fixed_width_binary_reference.hpp:256

operator<<
std::ostream & operator<<(std::ostream &os, const sparrow::nullval_t &)
Definition nullable.hpp:1522

sequence_view.hpp

sparrow::arrow_traits
Provides compile-time information about Arrow data types.
Definition data_type.hpp:716

sparrow::chrono::time_microseconds
A duration representing time elapsed since midnight, in microseconds.
Definition time_types.hpp:56

sparrow::chrono::time_milliseconds
A duration representing time elapsed since midnight, in milliseconds.
Definition time_types.hpp:43

sparrow::chrono::time_nanoseconds
A duration representing time elapsed since midnight, in nanoseconds.
Definition time_types.hpp:69

sparrow::chrono::time_seconds
A duration representing time elapsed since midnight.
Definition time_types.hpp:30

sparrow::days_time_interval
Definition interval_types.hpp:31

sparrow::detail::accepts_template
Definition data_type.hpp:722

sparrow::detail::get_data_type_from_array
Metafunction for retrieving the data_type of a typed array.
Definition array_wrapper.hpp:35

sparrow::impl::get_inner_reference
Definition data_type.hpp:794

sparrow::month_day_nanoseconds_interval
Definition interval_types.hpp:46

sparrow::mpl::typelist
A sequence of types used for metaprogramming operations.
Definition mp_utils.hpp:123

sparrow::null_type
Definition data_type.hpp:119

sparrow::zoned_time_without_timezone_microseconds
A zoned time value without timezone, in microseconds.
Definition timestamp_without_timezone_types.hpp:51

sparrow::zoned_time_without_timezone_milliseconds
A zoned time value without timezone, in milliseconds.
Definition timestamp_without_timezone_types.hpp:38

sparrow::zoned_time_without_timezone_nanoseconds
A zoned time value without timezone, in nanoseconds.
Definition timestamp_without_timezone_types.hpp:64

sparrow::zoned_time_without_timezone_seconds
A zoned time value without timezone, in seconds.
Definition timestamp_without_timezone_types.hpp:25

time_types.hpp

timestamp_without_timezone_types.hpp