123 template <
class DERIVED>
150 [[nodiscard]]
constexpr std::optional<std::string_view>
name()
const;
229 [[nodiscard]]
constexpr bool empty()
const;
387 template <std::ranges::input_range R>
406 template <std::ranges::input_range R>
407 requires(std::convertible_to<std::ranges::range_value_t<R>, std::uint8_t>)
408 static constexpr std::string
490#if defined(__cpp_lib_format)
491 friend struct std::formatter<DERIVED>;
564 template <
class... Args>
632 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
634 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
635 [[nodiscard]]
static auto create_proxy(
636 std::vector<array>&& children,
639 std::optional<TYPE_MAPPING>&& type_mapping = std::nullopt,
640 std::optional<std::string_view>
name = std::nullopt,
641 std::optional<METADATA_RANGE>
metadata = std::nullopt
663 std::ranges::input_range TYPE_ID_BUFFER_RANGE,
664 std::ranges::input_range OFFSET_BUFFER_RANGE,
665 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
667 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
669 std::vector<array>&& children,
670 TYPE_ID_BUFFER_RANGE&& element_type,
671 OFFSET_BUFFER_RANGE&& offsets,
672 std::optional<TYPE_MAPPING>&& type_mapping = std::nullopt,
673 std::optional<std::string_view>
name = std::nullopt,
674 std::optional<METADATA_RANGE>
metadata = std::nullopt
680 return dense_union_array::create_proxy(
681 std::forward<std::vector<array>>(children),
682 std::move(element_type_buffer),
683 std::move(offsets_buffer),
684 std::move(type_mapping),
685 std::forward<std::optional<std::string_view>>(
name),
686 std::forward<std::optional<METADATA_RANGE>>(
metadata)
706 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
707 [[nodiscard]]
static arrow_proxy create_proxy_impl(
708 std::vector<array>&& children,
711 std::string&& format,
712 std::optional<std::string_view>
name = std::nullopt,
713 std::optional<METADATA_RANGE>
metadata = std::nullopt
733 std::ranges::input_range TYPE_ID_BUFFER_RANGE,
734 std::ranges::input_range OFFSET_BUFFER_RANGE,
736 [[nodiscard]]
static arrow_proxy create_proxy_impl(
737 std::vector<array>&& children,
738 TYPE_ID_BUFFER_RANGE&& element_type,
739 OFFSET_BUFFER_RANGE&& offsets,
740 std::string&& format,
741 std::optional<std::string_view>
name = std::nullopt,
742 std::optional<METADATA_RANGE>
metadata = std::nullopt
745 SPARROW_ASSERT_TRUE(std::ranges::distance(element_type) == std::ranges::distance(offsets));
749 return dense_union_array::create_proxy_impl(
750 std::forward<std::vector<array>>(children),
751 std::move(element_type_buffer),
752 std::move(offsets_buffer),
753 std::forward<std::string>(format),
754 std::forward<std::optional<std::string_view>>(
name),
755 std::forward<std::optional<METADATA_RANGE>>(
metadata)
769 SPARROW_API std::size_t element_offset(std::size_t i)
const;
771 const std::int32_t* p_offsets;
824 template <
class... Args>
864 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
866 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
867 static auto create_proxy(
868 std::vector<array>&& children,
870 std::optional<TYPE_MAPPING>&& type_mapping = std::nullopt,
871 std::optional<std::string_view>
name = std::nullopt,
872 std::optional<METADATA_RANGE>
metadata = std::nullopt
890 template <input_metadata_container METADATA_RANGE>
891 static auto create_proxy_impl(
892 std::vector<array>&& children,
894 std::string&& format,
895 std::optional<std::string_view>
name = std::nullopt,
896 std::optional<METADATA_RANGE>
metadata = std::nullopt
912 [[nodiscard]]
SPARROW_API std::size_t element_offset(std::size_t i)
const;
920 template <
class DERIVED>
926 format_string.remove_prefix(4);
928 constexpr std::string_view delim{
","};
929 std::size_t child_index = 0;
930 std::ranges::for_each(
931 format_string | std::views::split(delim),
934 const std::string str(
935 std::string_view{&*std::ranges::begin(s),
static_cast<size_t>(std::ranges::distance(s))}
937 const auto as_int = std::atoi(str.c_str());
938 ret[
static_cast<std::size_t
>(as_int)] =
static_cast<std::uint8_t
>(child_index);
945 template <
class DERIVED>
946 template <std::ranges::input_range R>
951 std::array<std::uint8_t, TYPE_ID_MAP_SIZE> ret;
952 if (!child_index_to_type_id.has_value())
954 constexpr std::array<std::uint8_t, TYPE_ID_MAP_SIZE> default_mapping = []
956 std::array<std::uint8_t, TYPE_ID_MAP_SIZE> arr{};
957 std::iota(arr.begin(), arr.end(), 0);
960 return default_mapping;
964 const std::size_t n = std::ranges::size(*child_index_to_type_id);
965 for (std::size_t i = 0; i < n; ++i)
967 ret[(*child_index_to_type_id)[
static_cast<std::uint8_t
>(i)]] =
static_cast<std::uint8_t
>(i);
973 template <
class DERIVED>
974 template <std::ranges::input_range R>
975 requires(std::convertible_to<std::ranges::range_value_t<R>, std::uint8_t>)
976 constexpr std::string
979 const auto range_size = range.has_value() ? std::ranges::size(*range) : 0;
982 std::string ret = dense ?
"+ud:" :
"+us:";
985 for (std::size_t i = 0; i < n; ++i)
987 ret += std::to_string(i) +
",";
992 for (
const auto& v : *range)
994 ret += std::to_string(v) +
",";
1002 throw std::invalid_argument(
"Invalid type-id map");
1006 template <
class DERIVED>
1012 template <
class DERIVED>
1018 template <
class DERIVED>
1024 template <
class DERIVED>
1030 template <
class DERIVED>
1039 template <
class DERIVED>
1045 template <
class DERIVED>
1058 template <
class DERIVED>
1061 const auto type_id =
static_cast<std::size_t
>(
p_type_ids[i]);
1063 const auto offset = this->
derived_cast().element_offset(i);
1067 template <
class DERIVED>
1073 template <
class DERIVED>
1079 template <
class DERIVED>
1085 template <
class DERIVED>
1091 template <
class DERIVED>
1097 template <
class DERIVED>
1103 template <
class DERIVED>
1109 template <
class DERIVED>
1115 template <
class DERIVED>
1121 template <
class DERIVED>
1127 template <
class DERIVED>
1133 template <
class DERIVED>
1139 template <
class DERIVED>
1145 template <
class DERIVED>
1151 template <
class DERIVED>
1154 return (*
this)[this->
size() - 1];
1157 template <
class DERIVED>
1161 for (std::size_t i = 0; i < children.size(); ++i)
1171 return std::ranges::equal(lhs, rhs);
1178 template <std::ranges::input_range TYPE_MAPPING, input_metadata_container METADATA_RANGE>
1179 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
1180 auto dense_union_array::create_proxy(
1181 std::vector<array>&& children,
1182 type_id_buffer_type&& element_type,
1183 offset_buffer_type&& offsets,
1184 std::optional<TYPE_MAPPING>&& child_index_to_type_id,
1185 std::optional<std::string_view> name,
1186 std::optional<METADATA_RANGE> metadata
1190 const auto n_children = children.size();
1192 std::string format = make_format_string(
true , n_children, child_index_to_type_id);
1194 return create_proxy_impl(
1195 std::move(children),
1196 std::move(element_type),
1204 template <input_metadata_container METADATA_RANGE>
1205 auto dense_union_array::create_proxy_impl(
1206 std::vector<array>&& children,
1209 std::string&& format,
1210 std::optional<std::string_view>
name,
1211 std::optional<METADATA_RANGE>
metadata
1215 const auto n_children = children.size();
1218 const auto size = element_type.size();
1220 for (std::size_t i = 0; i < n_children; ++i)
1222 auto& child = children[i];
1224 child_arrays[i] =
new ArrowArray(std::move(flat_arr));
1225 child_schemas[i] =
new ArrowSchema(std::move(flat_schema));
1230 child_schemas + n_children,
1237 const std::optional<std::unordered_set<sparrow::ArrowFlag>>
1253 std::vector<buffer<std::uint8_t>> arr_buffs = {
1254 std::move(element_type).extract_storage(),
1255 std::move(offsets).extract_storage()
1259 static_cast<std::int64_t
>(
size),
1262 std::move(arr_buffs),
1268 return arrow_proxy{std::move(arr), std::move(schema)};
1275 template <std::ranges::input_range TYPE_MAPPING, input_metadata_container METADATA_RANGE>
1276 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
1277 auto sparse_union_array::create_proxy(
1278 std::vector<array>&& children,
1279 type_id_buffer_type&& element_type,
1280 std::optional<TYPE_MAPPING>&& child_index_to_type_id,
1281 std::optional<std::string_view> name,
1282 std::optional<METADATA_RANGE> metadata
1285 const auto n_children = children.size();
1286 if (child_index_to_type_id.has_value())
1291 std::string format = make_format_string(
false , n_children, child_index_to_type_id);
1293 return create_proxy_impl(
1294 std::move(children),
1295 std::move(element_type),
1302 template <input_metadata_container METADATA_RANGE>
1303 auto sparse_union_array::create_proxy_impl(
1304 std::vector<array>&& children,
1305 type_id_buffer_type&& element_type,
1306 std::string&& format,
1307 std::optional<std::string_view> name,
1308 std::optional<METADATA_RANGE> metadata
1311 for (
const auto& child : children)
1315 const auto n_children = children.size();
1316 ArrowSchema** child_schemas =
new ArrowSchema*[n_children];
1317 ArrowArray** child_arrays =
new ArrowArray*[n_children];
1318 const auto size = element_type.size();
1320 for (std::size_t i = 0; i < n_children; ++i)
1322 auto& child = children[i];
1324 child_arrays[i] =
new ArrowArray(std::move(flat_arr));
1325 child_schemas[i] =
new ArrowSchema(std::move(flat_schema));
1330 child_schemas + n_children,
1331 [](
const ArrowSchema* schema)
1337 const std::optional<std::unordered_set<sparrow::ArrowFlag>>
1345 std::move(metadata),
1353 std::vector<buffer<std::uint8_t>> arr_buffs = {std::move(element_type).extract_storage()};
1356 static_cast<std::int64_t
>(size),
1359 std::move(arr_buffs),
1365 return arrow_proxy{std::move(arr), std::move(schema)};
1369#if defined(__cpp_lib_format)
1379template <
typename U>
1380 requires std::derived_from<U, sparrow::union_array_crtp_base<U>>
1381struct std::formatter<U>
1389 constexpr auto parse(std::format_parse_context& ctx)
1404 auto format(
const U& ar, std::format_context& ctx)
const
1406 if constexpr (std::is_same_v<U, sparrow::dense_union_array>)
1408 std::format_to(ctx.out(),
"DenseUnion");
1410 else if constexpr (std::is_same_v<U, sparrow::sparse_union_array>)
1412 std::format_to(ctx.out(),
"SparseUnion");
1416 static_assert(sparrow::mpl::dependent_false<U>::value,
"Unknown union array type");
1419 const auto& proxy = ar.get_arrow_proxy();
1420 std::format_to(ctx.out(),
" [name={} | size={}] <", proxy.name().value_or(
"nullptr"), proxy.length());
1424 std::prev(ar.cend()),
1425 [&ctx](
const auto& value)
1427 std::format_to(ctx.out(),
"{}, ", value);
1431 return std::format_to(ctx.out(),
"{}>", ar.back());
1447 template <
typename U>
1448 requires std::derived_from<U, union_array_crtp_base<U>>
1449 std::ostream&
operator<<(std::ostream& os,
const U& value)
1451 os << std::format(
"{}", value);
void sparse_union_array()
Base class for CRTP base classes.
constexpr derived_type & derived_cast()
Dense union array implementation with offset buffer.
SPARROW_API dense_union_array & operator=(const dense_union_array &rhs)
Copy assignment operator.
u8_buffer< std::uint32_t > offset_buffer_type
union_array_crtp_base< dense_union_array > base_type
typename base_type::type_id_buffer_type type_id_buffer_type
dense_union_array(Args &&... args)
Generic constructor for creating dense union arrays.
SPARROW_API dense_union_array(arrow_proxy proxy)
Constructs dense union array from Arrow proxy.
SPARROW_API dense_union_array(const dense_union_array &rhs)
Copy constructor.
dense_union_array(dense_union_array &&rhs)=default
dense_union_array & operator=(dense_union_array &&rhs)=default
A view that repeats a value a given number of times.
Sparse union array implementation without offset buffer.
sparse_union_array(Args &&... args)
Generic constructor for creating sparse union arrays.
union_array_crtp_base< sparse_union_array > base_type
typename base_type::type_id_buffer_type type_id_buffer_type
SPARROW_API sparse_union_array(arrow_proxy proxy)
Constructs sparse union array from Arrow proxy.
This buffer class is used as storage buffer for all sparrow arrays.
CRTP base class providing shared functionality for union array implementations.
array_traits::inner_value_type inner_value_type
std::reverse_iterator< const_iterator > const_reverse_iterator
const std::uint8_t * p_type_ids
constexpr iterator begin()
Gets iterator to the beginning of the array.
detail::layout_bracket_functor< const derived_type, value_type > const_functor_type
constexpr std::optional< std::string_view > name() const
Gets the optional name of the union array.
array_traits::const_reference value_type
constexpr union_array_crtp_base(const self_type &rhs)
Copy constructor.
u8_buffer< std::uint8_t > type_id_buffer_type
static constexpr std::string make_format_string(bool dense, std::size_t n, const std::optional< R > &child_index_to_type_id)
Creates Arrow format string for union arrays.
SPARROW_CONSTEXPR_CLANG value_type front() const
Gets reference to the first element.
constexpr const_reverse_iterator crbegin() const
Gets const reverse iterator to the beginning of reversed array.
std::array< std::uint8_t, TYPE_ID_MAP_SIZE > m_type_id_map
constexpr arrow_proxy & get_arrow_proxy()
Gets mutable reference to the Arrow proxy.
union_array_crtp_base< DERIVED > self_type
constexpr self_type & operator=(self_type &&rhs)=default
union_array_crtp_base(arrow_proxy proxy)
Protected constructor from Arrow proxy.
array_traits::const_reference const_reference
detail::layout_bracket_functor< derived_type, value_type > functor_type
SPARROW_CONSTEXPR_CLANG value_type operator[](size_type i) const
Gets element at specified position without bounds checking.
std::array< std::uint8_t, TYPE_ID_MAP_SIZE > type_id_map
functor_index_iterator< const_functor_type > const_iterator
static constexpr type_id_map type_id_map_from_child_to_type_id(const std::optional< R > &child_index_to_type_id)
Creates type ID mapping from child index to type ID mapping.
constexpr const_reverse_iterator rbegin() const
Gets reverse iterator to the beginning of reversed array.
constexpr const_iterator begin() const
Gets const iterator to the beginning of the array.
SPARROW_CONSTEXPR_CLANG std::optional< key_value_view > metadata() const
Gets the metadata associated with the union array.
functor_index_iterator< functor_type > iterator
constexpr self_type & operator=(const self_type &rhs)
Copy assignment operator.
constexpr bool empty() const
Checks if the union array is empty.
SPARROW_CONSTEXPR_CLANG value_type at(size_type i) const
Gets element at specified position with bounds checking.
constexpr const_iterator cbegin() const
Gets const iterator to the beginning of the array.
constexpr const_reverse_iterator crend() const
Gets const reverse iterator to the end of reversed array.
constexpr const_iterator end() const
Gets const iterator to the end of the array.
static constexpr size_t TYPE_ID_MAP_SIZE
static constexpr type_id_map parse_type_id_map(std::string_view format_string)
Parses type ID mapping from Arrow format string.
SPARROW_CONSTEXPR_CLANG value_type operator[](size_type i)
Gets mutable element at specified position.
constexpr const_iterator cend() const
Gets const iterator to the end of the array.
SPARROW_CONSTEXPR_CLANG value_type back() const
Gets reference to the last element.
constexpr union_array_crtp_base(self_type &&rhs)=default
constexpr const arrow_proxy & get_arrow_proxy() const
Gets const reference to the Arrow proxy.
constexpr iterator end()
Gets iterator to the end of the array.
constexpr size_type size() const
Gets the number of elements in the union array.
constexpr const_reverse_iterator rend() const
Gets reverse iterator to the end of reversed array.
constexpr void zero_null_values(const inner_value_type &value)
Sets all null values to the specified value.
std::vector< cloning_ptr< array_wrapper > > children_type
constexpr children_type make_children(arrow_proxy &proxy)
Creates child array wrappers from Arrow proxy.
#define SPARROW_CONSTEXPR_CLANG
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
void unreachable()
Invokes undefined behavior for optimization purposes.
constexpr bool is_dense_union_array_v
Type trait to check if a type is a dense_union_array.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr void zero_null_values(R &range, const T &default_value=T{})
Sets null values in a range to a default value.
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
constexpr std::size_t range_size(R &&r)
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
std::ostream & operator<<(std::ostream &os, const nullval_t &)
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
SPARROW_API array_traits::const_reference array_element(const array_wrapper &ar, std::size_t index)
constexpr bool is_sparse_union_array_v
Type trait to check if a type is a sparse_union_array.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
std::unordered_set< ArrowFlag > to_set_of_ArrowFlags(int64_t flag_values)
Converts a bitfield of ArrowFlag values to a set of ArrowFlag values.
mpl::rename< mpl::unique< mpl::transform< detail::array_const_reference_t, all_base_types_t > >, nullable_variant > const_reference
mpl::rename< all_base_types_t, std::variant > inner_value_type
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()
Metafunction for retrieving the data_type of a typed array.