123 template <
class DERIVED>
150 [[nodiscard]]
constexpr std::optional<std::string_view>
name()
const;
229 [[nodiscard]]
constexpr bool empty()
const;
387 template <std::ranges::input_range R>
406 template <std::ranges::input_range R>
407 requires(std::convertible_to<std::ranges::range_value_t<R>, std::uint8_t>)
408 static constexpr std::string
490#if defined(__cpp_lib_format)
491 friend struct std::formatter<DERIVED>;
564 template <
class... Args>
632 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
634 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
635 [[nodiscard]]
static auto create_proxy(
636 std::vector<array>&& children,
639 std::optional<TYPE_MAPPING>&& type_mapping = std::nullopt,
640 std::optional<std::string_view>
name = std::nullopt,
641 std::optional<METADATA_RANGE>
metadata = std::nullopt
663 std::ranges::input_range TYPE_ID_BUFFER_RANGE,
664 std::ranges::input_range OFFSET_BUFFER_RANGE,
665 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
667 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
669 std::vector<array>&& children,
670 TYPE_ID_BUFFER_RANGE&& element_type,
671 OFFSET_BUFFER_RANGE&& offsets,
672 std::optional<TYPE_MAPPING>&& type_mapping = std::nullopt,
673 std::optional<std::string_view>
name = std::nullopt,
674 std::optional<METADATA_RANGE>
metadata = std::nullopt
680 return dense_union_array::create_proxy(
681 std::forward<std::vector<array>>(children),
682 std::move(element_type_buffer),
683 std::move(offsets_buffer),
684 std::move(type_mapping),
685 std::forward<std::optional<std::string_view>>(
name),
686 std::forward<std::optional<METADATA_RANGE>>(
metadata)
706 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
707 [[nodiscard]]
static arrow_proxy create_proxy_impl(
708 std::vector<array>&& children,
711 std::string&& format,
712 std::optional<std::string_view>
name = std::nullopt,
713 std::optional<METADATA_RANGE>
metadata = std::nullopt
733 std::ranges::input_range TYPE_ID_BUFFER_RANGE,
734 std::ranges::input_range OFFSET_BUFFER_RANGE,
736 [[nodiscard]]
static arrow_proxy create_proxy_impl(
737 std::vector<array>&& children,
738 TYPE_ID_BUFFER_RANGE&& element_type,
739 OFFSET_BUFFER_RANGE&& offsets,
740 std::string&& format,
741 std::optional<std::string_view>
name = std::nullopt,
742 std::optional<METADATA_RANGE>
metadata = std::nullopt
745 SPARROW_ASSERT_TRUE(std::ranges::distance(element_type) == std::ranges::distance(offsets));
749 return dense_union_array::create_proxy_impl(
750 std::forward<std::vector<array>>(children),
751 std::move(element_type_buffer),
752 std::move(offsets_buffer),
753 std::forward<std::string>(format),
754 std::forward<std::optional<std::string_view>>(
name),
755 std::forward<std::optional<METADATA_RANGE>>(
metadata)
769 SPARROW_API std::size_t element_offset(std::size_t i)
const;
771 const std::int32_t* p_offsets;
824 template <
class... Args>
867 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
869 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
870 static auto create_proxy(
871 std::vector<array>&& children,
873 std::optional<TYPE_MAPPING>&& type_mapping = std::nullopt,
874 std::optional<std::string_view>
name = std::nullopt,
875 std::optional<METADATA_RANGE>
metadata = std::nullopt
893 template <input_metadata_container METADATA_RANGE>
894 static auto create_proxy_impl(
895 std::vector<array>&& children,
897 std::string&& format,
898 std::optional<std::string_view>
name = std::nullopt,
899 std::optional<METADATA_RANGE>
metadata = std::nullopt
915 [[nodiscard]]
SPARROW_API std::size_t element_offset(std::size_t i)
const;
923 template <
class DERIVED>
929 format_string.remove_prefix(4);
931 constexpr std::string_view delim{
","};
932 std::size_t child_index = 0;
933 std::ranges::for_each(
934 format_string | std::views::split(delim),
937 const std::string str(
938 std::string_view{&*std::ranges::begin(s),
static_cast<size_t>(std::ranges::distance(s))}
940 const auto as_int = std::atoi(str.c_str());
941 ret[
static_cast<std::size_t
>(as_int)] =
static_cast<std::uint8_t
>(child_index);
948 template <
class DERIVED>
949 template <std::ranges::input_range R>
954 std::array<std::uint8_t, TYPE_ID_MAP_SIZE> ret;
955 if (!child_index_to_type_id.has_value())
957 constexpr std::array<std::uint8_t, TYPE_ID_MAP_SIZE> default_mapping = []
959 std::array<std::uint8_t, TYPE_ID_MAP_SIZE> arr{};
960 std::iota(arr.begin(), arr.end(), 0);
963 return default_mapping;
967 const std::size_t n = std::ranges::size(*child_index_to_type_id);
968 for (std::size_t i = 0; i < n; ++i)
970 ret[(*child_index_to_type_id)[
static_cast<std::uint8_t
>(i)]] =
static_cast<std::uint8_t
>(i);
976 template <
class DERIVED>
977 template <std::ranges::input_range R>
978 requires(std::convertible_to<std::ranges::range_value_t<R>, std::uint8_t>)
979 constexpr std::string
982 const auto range_size = range.has_value() ? std::ranges::size(*range) : 0;
985 std::string ret = dense ?
"+ud:" :
"+us:";
988 for (std::size_t i = 0; i < n; ++i)
990 ret += std::to_string(i) +
",";
995 for (
const auto& v : *range)
997 ret += std::to_string(v) +
",";
1005 throw std::invalid_argument(
"Invalid type-id map");
1009 template <
class DERIVED>
1015 template <
class DERIVED>
1021 template <
class DERIVED>
1027 template <
class DERIVED>
1033 template <
class DERIVED>
1042 template <
class DERIVED>
1048 template <
class DERIVED>
1061 template <
class DERIVED>
1064 const auto type_id =
static_cast<std::size_t
>(
p_type_ids[i]);
1066 const auto offset = this->
derived_cast().element_offset(i);
1070 template <
class DERIVED>
1076 template <
class DERIVED>
1082 template <
class DERIVED>
1088 template <
class DERIVED>
1094 template <
class DERIVED>
1100 template <
class DERIVED>
1106 template <
class DERIVED>
1112 template <
class DERIVED>
1118 template <
class DERIVED>
1124 template <
class DERIVED>
1130 template <
class DERIVED>
1136 template <
class DERIVED>
1142 template <
class DERIVED>
1148 template <
class DERIVED>
1154 template <
class DERIVED>
1157 return (*
this)[this->
size() - 1];
1160 template <
class DERIVED>
1164 for (std::size_t i = 0; i < children.size(); ++i)
1174 return std::ranges::equal(lhs, rhs);
1183 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
1185 std::vector<array>&& children,
1188 std::string&& format,
1189 std::optional<std::string_view> name,
1190 std::optional<METADATA_RANGE> metadata
1193 const auto n_children = children.size();
1197 for (std::size_t i = 0; i < n_children; ++i)
1199 auto& child = children[i];
1201 child_arrays[i] =
new ArrowArray(std::move(flat_arr));
1202 child_schemas[i] =
new ArrowSchema(std::move(flat_schema));
1207 child_schemas + n_children,
1214 const std::optional<std::unordered_set<sparrow::ArrowFlag>>
1222 std::move(metadata),
1231 static_cast<std::int64_t
>(size),
1241 return arrow_proxy{std::move(arr), std::move(schema)};
1249 template <std::ranges::input_range TYPE_MAPPING, input_metadata_container METADATA_RANGE>
1250 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
1251 auto dense_union_array::create_proxy(
1252 std::vector<array>&& children,
1253 type_id_buffer_type&& element_type,
1254 offset_buffer_type&& offsets,
1255 std::optional<TYPE_MAPPING>&& child_index_to_type_id,
1256 std::optional<std::string_view> name,
1257 std::optional<METADATA_RANGE> metadata
1261 const auto n_children = children.size();
1263 std::string format = make_format_string(
true , n_children, child_index_to_type_id);
1265 return create_proxy_impl(
1266 std::move(children),
1267 std::move(element_type),
1275 template <input_metadata_container METADATA_RANGE>
1276 auto dense_union_array::create_proxy_impl(
1277 std::vector<array>&& children,
1280 std::string&& format,
1281 std::optional<std::string_view>
name,
1282 std::optional<METADATA_RANGE>
metadata
1286 const auto size = element_type.size();
1288 std::vector<buffer<std::uint8_t>> arr_buffs;
1289 arr_buffs.reserve(2);
1290 arr_buffs.emplace_back(std::move(element_type).extract_storage());
1291 arr_buffs.emplace_back(std::move(offsets).extract_storage());
1294 std::move(children),
1295 std::move(arr_buffs),
1307 template <std::ranges::input_range TYPE_MAPPING, input_metadata_container METADATA_RANGE>
1308 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
1309 auto sparse_union_array::create_proxy(
1310 std::vector<array>&& children,
1311 type_id_buffer_type&& element_type,
1312 std::optional<TYPE_MAPPING>&& child_index_to_type_id,
1313 std::optional<std::string_view> name,
1314 std::optional<METADATA_RANGE> metadata
1317 const auto n_children = children.size();
1318 if (child_index_to_type_id.has_value())
1323 std::string format = make_format_string(
false , n_children, child_index_to_type_id);
1325 return create_proxy_impl(
1326 std::move(children),
1327 std::move(element_type),
1334 template <input_metadata_container METADATA_RANGE>
1335 auto sparse_union_array::create_proxy_impl(
1336 std::vector<array>&& children,
1337 type_id_buffer_type&& element_type,
1338 std::string&& format,
1339 std::optional<std::string_view> name,
1340 std::optional<METADATA_RANGE> metadata
1343 for (
const auto& child : children)
1347 const auto size = element_type.size();
1349 std::vector<buffer<std::uint8_t>> arr_buffs;
1350 arr_buffs.reserve(1);
1351 arr_buffs.emplace_back(std::move(element_type).extract_storage());
1354 std::move(children),
1355 std::move(arr_buffs),
1364#if defined(__cpp_lib_format)
1374template <
typename U>
1375 requires std::derived_from<U, sparrow::union_array_crtp_base<U>>
1376struct std::formatter<U>
1384 constexpr auto parse(std::format_parse_context& ctx)
1399 auto format(
const U& ar, std::format_context& ctx)
const
1401 if constexpr (std::is_same_v<U, sparrow::dense_union_array>)
1403 std::format_to(ctx.out(),
"DenseUnion");
1405 else if constexpr (std::is_same_v<U, sparrow::sparse_union_array>)
1407 std::format_to(ctx.out(),
"SparseUnion");
1411 static_assert(sparrow::mpl::dependent_false<U>::value,
"Unknown union array type");
1414 const auto& proxy = ar.get_arrow_proxy();
1415 std::format_to(ctx.out(),
" [name={} | size={}] <", proxy.name().value_or(
"nullptr"), proxy.length());
1419 std::prev(ar.cend()),
1420 [&ctx](
const auto& value)
1422 std::format_to(ctx.out(),
"{}, ", value);
1426 return std::format_to(ctx.out(),
"{}>", ar.back());
1442 template <
typename U>
1443 requires std::derived_from<U, union_array_crtp_base<U>>
1444 std::ostream&
operator<<(std::ostream& os,
const U& value)
1446 os << std::format(
"{}", value);
void sparse_union_array()
Object that owns a piece of contiguous memory.
Base class for CRTP base classes.
constexpr derived_type & derived_cast()
Dense union array implementation with offset buffer.
SPARROW_API dense_union_array & operator=(const dense_union_array &rhs)
Copy assignment operator.
u8_buffer< std::uint32_t > offset_buffer_type
union_array_crtp_base< dense_union_array > base_type
typename base_type::type_id_buffer_type type_id_buffer_type
dense_union_array(Args &&... args)
Generic constructor for creating dense union arrays.
SPARROW_API dense_union_array(arrow_proxy proxy)
Constructs dense union array from Arrow proxy.
SPARROW_API dense_union_array(const dense_union_array &rhs)
Copy constructor.
dense_union_array(dense_union_array &&rhs)=default
dense_union_array & operator=(dense_union_array &&rhs)=default
A view that repeats a value a given number of times.
Sparse union array implementation without offset buffer.
SPARROW_API sparse_union_array & operator=(const sparse_union_array &)
sparse_union_array(Args &&... args)
Generic constructor for creating sparse union arrays.
union_array_crtp_base< sparse_union_array > base_type
typename base_type::type_id_buffer_type type_id_buffer_type
SPARROW_API sparse_union_array(const sparse_union_array &)
SPARROW_API sparse_union_array(arrow_proxy proxy)
Constructs sparse union array from Arrow proxy.
This buffer class is used as storage buffer for all sparrow arrays.
CRTP base class providing shared functionality for union array implementations.
array_traits::inner_value_type inner_value_type
std::reverse_iterator< const_iterator > const_reverse_iterator
const std::uint8_t * p_type_ids
constexpr iterator begin()
Gets iterator to the beginning of the array.
detail::layout_bracket_functor< const derived_type, value_type > const_functor_type
constexpr std::optional< std::string_view > name() const
Gets the optional name of the union array.
array_traits::const_reference value_type
constexpr union_array_crtp_base(const self_type &rhs)
Copy constructor.
u8_buffer< std::uint8_t > type_id_buffer_type
static constexpr std::string make_format_string(bool dense, std::size_t n, const std::optional< R > &child_index_to_type_id)
Creates Arrow format string for union arrays.
SPARROW_CONSTEXPR_CLANG value_type front() const
Gets reference to the first element.
constexpr const_reverse_iterator crbegin() const
Gets const reverse iterator to the beginning of reversed array.
std::array< std::uint8_t, TYPE_ID_MAP_SIZE > m_type_id_map
constexpr arrow_proxy & get_arrow_proxy()
Gets mutable reference to the Arrow proxy.
union_array_crtp_base< DERIVED > self_type
constexpr self_type & operator=(self_type &&rhs)=default
union_array_crtp_base(arrow_proxy proxy)
Protected constructor from Arrow proxy.
array_traits::const_reference const_reference
detail::layout_bracket_functor< derived_type, value_type > functor_type
SPARROW_CONSTEXPR_CLANG value_type operator[](size_type i) const
Gets element at specified position without bounds checking.
std::array< std::uint8_t, TYPE_ID_MAP_SIZE > type_id_map
functor_index_iterator< const_functor_type > const_iterator
static constexpr type_id_map type_id_map_from_child_to_type_id(const std::optional< R > &child_index_to_type_id)
Creates type ID mapping from child index to type ID mapping.
constexpr const_reverse_iterator rbegin() const
Gets reverse iterator to the beginning of reversed array.
constexpr const_iterator begin() const
Gets const iterator to the beginning of the array.
SPARROW_CONSTEXPR_CLANG std::optional< key_value_view > metadata() const
Gets the metadata associated with the union array.
functor_index_iterator< functor_type > iterator
constexpr self_type & operator=(const self_type &rhs)
Copy assignment operator.
constexpr bool empty() const
Checks if the union array is empty.
SPARROW_CONSTEXPR_CLANG value_type at(size_type i) const
Gets element at specified position with bounds checking.
constexpr const_iterator cbegin() const
Gets const iterator to the beginning of the array.
constexpr const_reverse_iterator crend() const
Gets const reverse iterator to the end of reversed array.
constexpr const_iterator end() const
Gets const iterator to the end of the array.
static constexpr size_t TYPE_ID_MAP_SIZE
static constexpr type_id_map parse_type_id_map(std::string_view format_string)
Parses type ID mapping from Arrow format string.
SPARROW_CONSTEXPR_CLANG value_type operator[](size_type i)
Gets mutable element at specified position.
constexpr const_iterator cend() const
Gets const iterator to the end of the array.
SPARROW_CONSTEXPR_CLANG value_type back() const
Gets reference to the last element.
constexpr union_array_crtp_base(self_type &&rhs)=default
constexpr const arrow_proxy & get_arrow_proxy() const
Gets const reference to the Arrow proxy.
constexpr iterator end()
Gets iterator to the end of the array.
constexpr size_type size() const
Gets the number of elements in the union array.
constexpr const_reverse_iterator rend() const
Gets reverse iterator to the end of reversed array.
constexpr void zero_null_values(const inner_value_type &value)
Sets all null values to the specified value.
std::vector< cloning_ptr< array_wrapper > > children_type
constexpr children_type make_children(arrow_proxy &proxy)
Creates child array wrappers from Arrow proxy.
#define SPARROW_CONSTEXPR_CLANG
#define SPARROW_ASSERT_TRUE(expr__)
arrow_proxy create_union_proxy_impl(std::vector< array > &&children, std::vector< buffer< std::uint8_t > > &&buffers, std::size_t size, std::string &&format, std::optional< std::string_view > name, std::optional< METADATA_RANGE > metadata)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
void unreachable()
Invokes undefined behavior for optimization purposes.
constexpr bool is_dense_union_array_v
Type trait to check if a type is a dense_union_array.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr void zero_null_values(R &range, const T &default_value=T{})
Sets null values in a range to a default value.
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
constexpr std::size_t range_size(R &&r)
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
std::ostream & operator<<(std::ostream &os, const nullval_t &)
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
SPARROW_API array_traits::const_reference array_element(const array_wrapper &ar, std::size_t index)
constexpr bool is_sparse_union_array_v
Type trait to check if a type is a sparse_union_array.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
std::unordered_set< ArrowFlag > to_set_of_ArrowFlags(int64_t flag_values)
Converts a bitfield of ArrowFlag values to a set of ArrowFlag values.
Extensions to the C++ standard library.
mpl::rename< mpl::unique< mpl::transform< detail::array_const_reference_t, all_base_types_t > >, nullable_variant > const_reference
mpl::rename< all_base_types_t, std::variant > inner_value_type
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()
Metafunction for retrieving the data_type of a typed array.