76 template <
class DERIVED>
94 [[nodiscard]] std::optional<std::string_view>
name()
const;
95 [[nodiscard]] std::optional<key_value_view>
metadata()
const;
136 template <std::ranges::input_range R>
139 template <std::ranges::input_range R>
140 requires(std::convertible_to<std::ranges::range_value_t<R>, std::uint8_t>)
166#if defined(__cpp_lib_format)
167 friend struct std::formatter<DERIVED>;
182 template <
class... Args>
202 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
204 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
205 [[nodiscard]]
static auto create_proxy(
206 std::vector<array>&& children,
209 TYPE_MAPPING&& type_mapping = TYPE_MAPPING{},
210 std::optional<std::string_view>
name = std::nullopt,
211 std::optional<METADATA_RANGE>
metadata = std::nullopt
215 std::ranges::input_range TYPE_ID_BUFFER_RANGE,
216 std::ranges::input_range OFFSET_BUFFER_RANGE,
217 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
219 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
221 std::vector<array>&& children,
222 TYPE_ID_BUFFER_RANGE&& element_type,
223 OFFSET_BUFFER_RANGE&& offsets,
224 TYPE_MAPPING&& type_mapping = TYPE_MAPPING{},
225 std::optional<std::string_view>
name = std::nullopt,
226 std::optional<METADATA_RANGE>
metadata = std::nullopt
231 return dense_union_array::create_proxy(
232 std::forward<std::vector<array>>(children),
233 std::move(element_type_buffer),
234 std::move(offsets_buffer),
235 std::forward<TYPE_MAPPING>(type_mapping),
236 std::forward<std::optional<std::string_view>>(
name),
237 std::forward<std::optional<METADATA_RANGE>>(
metadata)
241 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
242 [[nodiscard]]
static arrow_proxy create_proxy_impl(
243 std::vector<array>&& children,
246 std::string&& format,
248 std::optional<std::string_view>
name = std::nullopt,
249 std::optional<METADATA_RANGE>
metadata = std::nullopt
253 std::ranges::input_range TYPE_ID_BUFFER_RANGE,
254 std::ranges::input_range OFFSET_BUFFER_RANGE,
255 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
256 [[nodiscard]]
static arrow_proxy create_proxy_impl(
257 std::vector<array>&& children,
258 TYPE_ID_BUFFER_RANGE&& element_type,
259 OFFSET_BUFFER_RANGE&& offsets,
260 std::string&& format,
262 std::optional<std::string_view>
name = std::nullopt,
263 std::optional<METADATA_RANGE>
metadata = std::nullopt
268 return dense_union_array::create_proxy_impl(
269 std::forward<std::vector<array>>(children),
270 std::move(element_type_buffer),
271 std::move(offsets_buffer),
272 std::forward<std::string>(format),
273 std::forward<type_id_map>(tim),
274 std::forward<std::optional<std::string_view>>(
name),
275 std::forward<std::optional<METADATA_RANGE>>(
metadata)
279 SPARROW_API std::size_t element_offset(std::size_t i)
const;
281 const std::int32_t* p_offsets;
292 template <
class... Args>
306 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
308 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
309 static auto create_proxy(
310 std::vector<array>&& children,
312 TYPE_MAPPING&& type_mapping = TYPE_MAPPING{},
313 std::optional<std::string_view>
name = std::nullopt,
314 std::optional<METADATA_RANGE>
metadata = std::nullopt
317 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
318 static auto create_proxy_impl(
319 std::vector<array>&& children,
321 std::string&& format,
323 std::optional<std::string_view>
name = std::nullopt,
324 std::optional<METADATA_RANGE>
metadata = std::nullopt
335 template <
class DERIVED>
340 format_string.remove_prefix(4);
342 constexpr std::string_view delim{
","};
343 std::size_t child_index = 0;
344 std::ranges::for_each(
345 format_string | std::views::split(delim),
348 const std::string str(
349 std::string_view{&*std::ranges::begin(s),
static_cast<size_t>(std::ranges::distance(s))}
351 const auto as_int = std::atoi(str.c_str());
352 ret[
static_cast<std::size_t
>(as_int)] =
static_cast<std::uint8_t
>(child_index);
359 template <
class DERIVED>
360 template <std::ranges::input_range R>
364 const std::size_t n = std::ranges::size(child_index_to_type_id);
365 std::array<std::uint8_t, 256> ret;
368 for (std::size_t i = 0; i < 256; ++i)
370 ret[i] =
static_cast<std::uint8_t
>(i);
375 for (std::size_t i = 0; i < n; ++i)
377 ret[child_index_to_type_id[i]] =
static_cast<std::uint8_t
>(i);
383 template <
class DERIVED>
384 template <std::ranges::input_range R>
385 requires(std::convertible_to<std::ranges::range_value_t<R>, std::uint8_t>)
388 const auto range_size = std::ranges::size(range);
391 std::string ret = dense ?
"+ud:" :
"+us:";
394 for (std::size_t i = 0; i < n; ++i)
396 ret += std::to_string(i) +
",";
401 for (
const auto& v : range)
403 ret += std::to_string(v) +
",";
411 throw std::invalid_argument(
"Invalid type-id map");
415 template <
class DERIVED>
421 template <
class DERIVED>
427 template <
class DERIVED>
433 template <
class DERIVED>
439 template <
class DERIVED>
448 template <
class DERIVED>
454 template <
class DERIVED>
467 template <
class DERIVED>
470 const auto type_id =
static_cast<std::size_t
>(
p_type_ids[i]);
472 const auto offset = this->
derived_cast().element_offset(i);
476 template <
class DERIVED>
482 template <
class DERIVED>
488 template <
class DERIVED>
494 template <
class DERIVED>
500 template <
class DERIVED>
506 template <
class DERIVED>
512 template <
class DERIVED>
518 template <
class DERIVED>
524 template <
class DERIVED>
530 template <
class DERIVED>
536 template <
class DERIVED>
542 template <
class DERIVED>
548 template <
class DERIVED>
554 template <
class DERIVED>
560 template <
class DERIVED>
563 return (*
this)[this->
size() - 1];
566 template <
class DERIVED>
570 for (std::size_t i = 0; i < children.size(); ++i)
580 return std::ranges::equal(lhs, rhs);
587 template <std::ranges::input_range TYPE_MAPPING, input_metadata_container METADATA_RANGE>
588 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
589 auto dense_union_array::create_proxy(
590 std::vector<array>&& children,
591 type_id_buffer_type&& element_type,
592 offset_buffer_type&& offsets,
593 TYPE_MAPPING&& child_index_to_type_id,
594 std::optional<std::string_view> name,
595 std::optional<METADATA_RANGE> metadata
598 const auto n_children = children.size();
601 auto type_id_to_child_index = type_id_map_from_child_to_type_id(child_index_to_type_id);
603 std::string format = make_format_string(
606 std::forward<TYPE_MAPPING>(child_index_to_type_id)
609 return create_proxy_impl(
611 std::move(element_type),
614 std::move(type_id_to_child_index),
620 template <input_metadata_container METADATA_RANGE>
621 auto dense_union_array::create_proxy_impl(
622 std::vector<array>&& children,
625 std::string&& format,
627 std::optional<std::string_view>
name,
628 std::optional<METADATA_RANGE>
metadata
631 const auto n_children = children.size();
634 const auto size = element_type.size();
637 int64_t null_count = 0;
638 for (std::size_t i = 0; i <
size; ++i)
641 const auto type_id =
static_cast<std::uint8_t
>(element_type[i]);
642 const auto child_index = tim[type_id];
643 const auto offset =
static_cast<std::size_t
>(offsets[i]);
645 if (!children[child_index][offset].has_value())
651 for (std::size_t i = 0; i < n_children; ++i)
653 auto& child = children[i];
655 child_arrays[i] =
new ArrowArray(std::move(flat_arr));
656 child_schemas[i] =
new ArrowSchema(std::move(flat_schema));
659 static const std::optional<std::unordered_set<sparrow::ArrowFlag>> flags{{
ArrowFlag::NULLABLE}};
672 std::vector<buffer<std::uint8_t>> arr_buffs = {
673 std::move(element_type).extract_storage(),
674 std::move(offsets).extract_storage()
678 static_cast<std::int64_t
>(
size),
679 static_cast<std::int64_t
>(null_count),
681 std::move(arr_buffs),
687 return arrow_proxy{std::move(arr), std::move(schema)};
694 template <std::ranges::input_range TYPE_MAPPING, input_metadata_container METADATA_RANGE>
695 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
696 auto sparse_union_array::create_proxy(
697 std::vector<array>&& children,
698 type_id_buffer_type&& element_type,
699 TYPE_MAPPING&& child_index_to_type_id,
700 std::optional<std::string_view> name,
701 std::optional<METADATA_RANGE> metadata
704 const auto n_children = children.size();
707 auto type_id_to_child_index = type_id_map_from_child_to_type_id(child_index_to_type_id);
709 std::string format = make_format_string(
712 std::forward<TYPE_MAPPING>(child_index_to_type_id)
715 return create_proxy_impl(
717 std::move(element_type),
719 std::move(type_id_to_child_index),
725 template <input_metadata_container METADATA_RANGE>
726 auto sparse_union_array::create_proxy_impl(
727 std::vector<array>&& children,
728 type_id_buffer_type&& element_type,
729 std::string&& format,
731 std::optional<std::string_view> name,
732 std::optional<METADATA_RANGE> metadata
735 const auto n_children = children.size();
738 const auto size = element_type.size();
741 int64_t null_count = 0;
742 for (std::size_t i = 0; i < size; ++i)
745 const auto type_id =
static_cast<std::uint8_t
>(element_type[i]);
746 const auto child_index = tim[type_id];
748 if (!children[child_index][i].has_value())
754 for (std::size_t i = 0; i < n_children; ++i)
756 auto& child = children[i];
758 child_arrays[i] =
new ArrowArray(std::move(flat_arr));
759 child_schemas[i] =
new ArrowSchema(std::move(flat_schema));
773 std::vector<buffer<std::uint8_t>> arr_buffs(1);
774 arr_buffs[0] = std::move(element_type).extract_storage();
777 static_cast<std::int64_t
>(size),
778 static_cast<std::int64_t
>(null_count),
780 std::move(arr_buffs),
786 return arrow_proxy{std::move(arr), std::move(schema)};
790#if defined(__cpp_lib_format)
793 requires std::derived_from<U, sparrow::union_array_crtp_base<U>>
794struct std::formatter<U>
796 constexpr auto parse(std::format_parse_context& ctx)
801 auto format(
const U& ar, std::format_context& ctx)
const
803 if constexpr (std::is_same_v<U, sparrow::dense_union_array>)
805 std::format_to(ctx.out(),
"DenseUnion");
807 else if constexpr (std::is_same_v<U, sparrow::sparse_union_array>)
809 std::format_to(ctx.out(),
"SparseUnion");
813 static_assert(sparrow::mpl::dependent_false<U>::value,
"Unknown union array type");
816 const auto& proxy = ar.get_arrow_proxy();
817 std::format_to(ctx.out(),
" [name={} | size={}] <", proxy.name().value_or(
"nullptr"), proxy.length());
821 std::prev(ar.cend()),
822 [&ctx](
const auto& value)
824 std::format_to(ctx.out(),
"{}, ", value);
828 return std::format_to(ctx.out(),
"{}>", ar.back());
834 template <
typename U>
835 requires std::derived_from<U, sparrow::union_array_crtp_base<U>>
836 std::ostream&
operator<<(std::ostream& os,
const U& value)
838 os << std::format(
"{}", value);
void sparse_union_array()
Proxy class over ArrowArray and ArrowSchema.
Base class for CRTP base classes.
derived_type & derived_cast()
SPARROW_API dense_union_array & operator=(const dense_union_array &rhs)
u8_buffer< std::uint32_t > offset_buffer_type
union_array_crtp_base< dense_union_array > base_type
typename base_type::type_id_buffer_type type_id_buffer_type
dense_union_array(Args &&... args)
SPARROW_API dense_union_array(arrow_proxy proxy)
SPARROW_API dense_union_array(const dense_union_array &rhs)
dense_union_array(dense_union_array &&rhs)=default
dense_union_array & operator=(dense_union_array &&rhs)=default
A view that repeats a value a given number of times.
sparse_union_array(Args &&... args)
union_array_crtp_base< sparse_union_array > base_type
typename base_type::type_id_buffer_type type_id_buffer_type
SPARROW_API sparse_union_array(arrow_proxy proxy)
This buffer class is use as storage buffer for all sparrow arrays.
array_traits::inner_value_type inner_value_type
std::reverse_iterator< const_iterator > const_reverse_iterator
std::array< std::uint8_t, 256 > type_id_map
const_iterator begin() const
self_type & operator=(self_type &&rhs)=default
const std::uint8_t * p_type_ids
union_array_crtp_base(const self_type &rhs)
static std::string make_format_string(bool dense, std::size_t n, R &&child_index_to_type_id)
detail::layout_bracket_functor< const derived_type, value_type > const_functor_type
void zero_null_values(const inner_value_type &value)
Sets all null values to the specified value.
union_array_crtp_base(self_type &&rhs)=default
array_traits::const_reference value_type
u8_buffer< std::uint8_t > type_id_buffer_type
const_reverse_iterator rend() const
std::array< std::uint8_t, 256 > m_type_id_map
self_type & operator=(const self_type &rhs)
const_iterator cbegin() const
union_array_crtp_base< DERIVED > self_type
const_iterator cend() const
union_array_crtp_base(arrow_proxy proxy)
value_type operator[](size_type i)
detail::layout_bracket_functor< derived_type, value_type > functor_type
const_reverse_iterator crbegin() const
const_reverse_iterator crend() const
static type_id_map parse_type_id_map(std::string_view format_string)
functor_index_iterator< const_functor_type > const_iterator
value_type operator[](size_type i) const
value_type at(size_type i) const
functor_index_iterator< functor_type > iterator
const_reverse_iterator rbegin() const
const arrow_proxy & get_arrow_proxy() const
std::optional< key_value_view > metadata() const
std::optional< std::string_view > name() const
static type_id_map type_id_map_from_child_to_type_id(R &&child_index_to_type_id)
arrow_proxy & get_arrow_proxy()
children_type make_children(arrow_proxy &proxy)
const_iterator end() const
std::vector< cloning_ptr< array_wrapper > > children_type
constexpr bool excludes_copy_and_move_ctor_v
void unreachable()
Invokes undefined behavior.
constexpr bool is_dense_union_array_v
Checks whether T is a dense_union_array type.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr void zero_null_values(R &range, const T &default_value=T{})
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArrays and ArrowSchema structures from the given array or typed layout.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
SPARROW_API array_traits::const_reference array_element(const array_wrapper &ar, std::size_t index)
constexpr bool is_sparse_union_array_v
Checks whether T is a sparse_union_array type.
std::size_t range_size(R &&r)
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
std::ostream & operator<<(std::ostream &os, const sparrow::nullval_t &)
mpl::rename< mpl::transform< detail::array_const_reference_t, all_base_types_t >, nullable_variant > const_reference
mpl::rename< all_base_types_t, std::variant > inner_value_type
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()