73 template <
class DERIVED>
91 [[nodiscard]] std::optional<std::string_view>
name()
const;
92 [[nodiscard]] std::optional<key_value_view>
metadata()
const;
121 template <std::ranges::input_range R>
124 template <std::ranges::input_range R>
125 requires(std::convertible_to<std::ranges::range_value_t<R>, std::uint8_t>)
151#if defined(__cpp_lib_format)
152 friend struct std::formatter<DERIVED>;
167 template <
class... Args>
187 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
189 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
190 static auto create_proxy(
191 std::vector<array>&& children,
194 TYPE_MAPPING&& type_mapping = TYPE_MAPPING{},
195 std::optional<std::string_view>
name = std::nullopt,
196 std::optional<METADATA_RANGE>
metadata = std::nullopt
199 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
200 static auto create_proxy_impl(
201 std::vector<array>&& children,
204 std::string&& format,
206 std::optional<std::string_view>
name = std::nullopt,
207 std::optional<METADATA_RANGE>
metadata = std::nullopt
210 SPARROW_API std::size_t element_offset(std::size_t i)
const;
212 const std::int32_t* p_offsets;
223 template <
class... Args>
236 template <std::ranges::input_range TYPE_MAPPING = std::vector<std::u
int8_t>>
237 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
238 static auto create_proxy(
239 std::vector<array>&& children,
241 TYPE_MAPPING&& type_mapping = TYPE_MAPPING{}
245 std::vector<array>&& children,
247 std::string&& format,
259 template <
class DERIVED>
264 format_string.remove_prefix(4);
266 constexpr std::string_view delim{
","};
267 std::size_t child_index = 0;
268 std::ranges::for_each(
269 format_string | std::views::split(delim),
272 const std::string str(
273 std::string_view{&*std::ranges::begin(s),
static_cast<size_t>(std::ranges::distance(s))}
275 const auto as_int = std::atoi(str.c_str());
276 ret[
static_cast<std::size_t
>(as_int)] =
static_cast<std::uint8_t
>(child_index);
283 template <
class DERIVED>
284 template <std::ranges::input_range R>
288 const std::size_t n = std::ranges::size(child_index_to_type_id);
289 std::array<std::uint8_t, 256> ret;
292 for (std::size_t i = 0; i < 256; ++i)
294 ret[i] =
static_cast<std::uint8_t
>(i);
299 for (std::size_t i = 0; i < n; ++i)
301 ret[child_index_to_type_id[i]] =
static_cast<std::uint8_t
>(i);
307 template <
class DERIVED>
308 template <std::ranges::input_range R>
309 requires(std::convertible_to<std::ranges::range_value_t<R>, std::uint8_t>)
312 const auto range_size = std::ranges::size(range);
315 std::string ret = dense ?
"+ud:" :
"+us:";
318 for (std::size_t i = 0; i < n; ++i)
320 ret += std::to_string(i) +
",";
325 for (
const auto& v : range)
327 ret += std::to_string(v) +
",";
335 throw std::invalid_argument(
"Invalid type-id map");
339 template <
class DERIVED>
345 template <
class DERIVED>
351 template <
class DERIVED>
357 template <
class DERIVED>
363 template <
class DERIVED>
372 template <
class DERIVED>
378 template <
class DERIVED>
391 template <
class DERIVED>
394 const auto type_id =
static_cast<std::size_t
>(
p_type_ids[i]);
396 const auto offset = this->
derived_cast().element_offset(i);
400 template <
class DERIVED>
406 template <
class DERIVED>
412 template <
class DERIVED>
418 template <
class DERIVED>
424 template <
class DERIVED>
430 template <
class DERIVED>
436 template <
class DERIVED>
442 template <
class DERIVED>
448 template <
class DERIVED>
454 template <
class DERIVED>
460 template <
class DERIVED>
466 template <
class DERIVED>
472 template <
class DERIVED>
478 template <
class DERIVED>
484 template <
class DERIVED>
487 return (*
this)[this->
size() - 1];
490 template <
class DERIVED>
494 for (std::size_t i = 0; i < children.size(); ++i)
504 return std::ranges::equal(lhs, rhs);
511 template <std::ranges::input_range TYPE_MAPPING, input_metadata_container METADATA_RANGE>
512 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
513 auto dense_union_array::create_proxy(
514 std::vector<array>&& children,
515 type_id_buffer_type&& element_type,
516 offset_buffer_type&& offsets,
517 TYPE_MAPPING&& child_index_to_type_id,
518 std::optional<std::string_view> name,
519 std::optional<METADATA_RANGE> metadata
522 const auto n_children = children.size();
525 auto type_id_to_child_index = type_id_map_from_child_to_type_id(child_index_to_type_id);
527 std::string format = make_format_string(
530 std::forward<TYPE_MAPPING>(child_index_to_type_id)
533 return create_proxy_impl(
535 std::move(element_type),
538 std::move(type_id_to_child_index),
544 template <input_metadata_container METADATA_RANGE>
545 auto dense_union_array::create_proxy_impl(
546 std::vector<array>&& children,
547 type_id_buffer_type&& element_type,
548 offset_buffer_type&& offsets,
549 std::string&& format,
551 std::optional<std::string_view> name,
552 std::optional<METADATA_RANGE> metadata
555 const auto n_children = children.size();
556 ArrowSchema** child_schemas =
new ArrowSchema*[n_children];
557 ArrowArray** child_arrays =
new ArrowArray*[n_children];
558 const auto size = element_type.size();
561 int64_t null_count = 0;
562 for (std::size_t i = 0; i <
size; ++i)
565 const auto type_id =
static_cast<std::uint8_t
>(element_type[i]);
566 const auto child_index = tim[type_id];
567 const auto offset =
static_cast<std::size_t
>(offsets[i]);
569 if (!children[child_index][offset].has_value())
575 for (std::size_t i = 0; i < n_children; ++i)
577 auto& child = children[i];
579 child_arrays[i] =
new ArrowArray(std::move(flat_arr));
580 child_schemas[i] =
new ArrowSchema(std::move(flat_schema));
594 std::vector<buffer<std::uint8_t>> arr_buffs = {
595 std::move(element_type).extract_storage(),
596 std::move(offsets).extract_storage()
600 static_cast<std::int64_t
>(size),
601 static_cast<std::int64_t
>(null_count),
603 std::move(arr_buffs),
609 return arrow_proxy{std::move(arr), std::move(schema)};
616 template <std::ranges::input_range TYPE_MAPPING>
617 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
618 auto sparse_union_array::create_proxy(
619 std::vector<array>&& children,
620 type_id_buffer_type&& element_type,
621 TYPE_MAPPING&& child_index_to_type_id
624 const auto n_children = children.size();
627 auto type_id_to_child_index = type_id_map_from_child_to_type_id(child_index_to_type_id);
629 std::string format = make_format_string(
632 std::forward<TYPE_MAPPING>(child_index_to_type_id)
635 return create_proxy_impl(
637 std::move(element_type),
639 std::move(type_id_to_child_index)
644#if defined(__cpp_lib_format)
647 requires std::derived_from<U, sparrow::union_array_crtp_base<U>>
648struct std::formatter<U>
650 constexpr auto parse(std::format_parse_context& ctx)
655 auto format(
const U& ar, std::format_context& ctx)
const
657 if constexpr (std::is_same_v<U, sparrow::dense_union_array>)
659 std::format_to(ctx.out(),
"DenseUnion");
661 else if constexpr (std::is_same_v<U, sparrow::sparse_union_array>)
663 std::format_to(ctx.out(),
"SparseUnion");
667 static_assert(sparrow::mpl::dependent_false<U>::value,
"Unknown union array type");
670 const auto& proxy = ar.get_arrow_proxy();
671 std::format_to(ctx.out(),
" [name={} | size={}] <", proxy.name().value_or(
"nullptr"), proxy.length());
675 std::prev(ar.cend()),
676 [&ctx](
const auto& value)
678 std::format_to(ctx.out(),
"{}, ", value);
682 return std::format_to(ctx.out(),
"{}>", ar.back());
688 template <
typename U>
689 requires std::derived_from<U, sparrow::union_array_crtp_base<U>>
690 std::ostream&
operator<<(std::ostream& os,
const U& value)
692 os << std::format(
"{}", value);
void sparse_union_array()
Proxy class over ArrowArray and ArrowSchema.
Base class for CRTP base classes.
derived_type & derived_cast()
SPARROW_API dense_union_array & operator=(const dense_union_array &rhs)
u8_buffer< std::uint32_t > offset_buffer_type
union_array_crtp_base< dense_union_array > base_type
typename base_type::type_id_buffer_type type_id_buffer_type
dense_union_array(Args &&... args)
SPARROW_API dense_union_array(arrow_proxy proxy)
SPARROW_API dense_union_array(const dense_union_array &rhs)
dense_union_array(dense_union_array &&rhs)=default
dense_union_array & operator=(dense_union_array &&rhs)=default
A view that repeats a value a given number of times.
sparse_union_array(Args &&... args)
union_array_crtp_base< sparse_union_array > base_type
typename base_type::type_id_buffer_type type_id_buffer_type
SPARROW_API sparse_union_array(arrow_proxy proxy)
This buffer class is use as storage buffer for all sparrow arrays.
array_traits::inner_value_type inner_value_type
std::reverse_iterator< const_iterator > const_reverse_iterator
std::array< std::uint8_t, 256 > type_id_map
const_iterator begin() const
self_type & operator=(self_type &&rhs)=default
const std::uint8_t * p_type_ids
union_array_crtp_base(const self_type &rhs)
static std::string make_format_string(bool dense, std::size_t n, R &&child_index_to_type_id)
detail::layout_bracket_functor< const derived_type, value_type > const_functor_type
union_array_crtp_base(self_type &&rhs)=default
array_traits::const_reference value_type
u8_buffer< std::uint8_t > type_id_buffer_type
const_reverse_iterator rend() const
std::array< std::uint8_t, 256 > m_type_id_map
self_type & operator=(const self_type &rhs)
const_iterator cbegin() const
union_array_crtp_base< DERIVED > self_type
const_iterator cend() const
union_array_crtp_base(arrow_proxy proxy)
value_type operator[](size_type i)
detail::layout_bracket_functor< derived_type, value_type > functor_type
const_reverse_iterator crbegin() const
const_reverse_iterator crend() const
static type_id_map parse_type_id_map(std::string_view format_string)
functor_index_iterator< const_functor_type > const_iterator
value_type operator[](size_type i) const
value_type at(size_type i) const
functor_index_iterator< functor_type > iterator
const_reverse_iterator rbegin() const
const arrow_proxy & get_arrow_proxy() const
std::optional< key_value_view > metadata() const
std::optional< std::string_view > name() const
static type_id_map type_id_map_from_child_to_type_id(R &&child_index_to_type_id)
arrow_proxy & get_arrow_proxy()
children_type make_children(arrow_proxy &proxy)
const_iterator end() const
std::vector< cloning_ptr< array_wrapper > > children_type
constexpr std::size_t size(typelist< T... >={})
constexpr bool excludes_copy_and_move_ctor_v
void unreachable()
Invokes undefined behavior.
constexpr bool is_dense_union_array_v
Checks whether T is a dense_union_array type.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArrays and ArrowSchema structures from the given array or typed layout.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
SPARROW_API array_traits::const_reference array_element(const array_wrapper &ar, std::size_t index)
std::ostream & operator<<(std::ostream &stream, T n)
constexpr bool is_sparse_union_array_v
Checks whether T is a sparse_union_array type.
std::size_t range_size(R &&r)
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
mpl::rename< mpl::transform< detail::array_const_reference_t, all_base_types_t >, nullable_variant > const_reference
mpl::rename< all_base_types_t, std::variant > inner_value_type
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()