38 template <
class DERIVED>
88 struct get_data_type_from_array;
187 template <
class DERIVED>
227 using list_size_type =
inner_types::list_size_type;
260 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
271 template <class... ARGS>
278 template <std::ranges::range SIZES_RANGE>
290 std::optional<std::string_view> name = std::nullopt,
291 std::optional<METADATA_RANGE> metadata = std::nullopt
296 std::ranges::input_range OFFSET_BUFFER_RANGE,
298 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>,
offset_type>
301 OFFSET_BUFFER_RANGE&& list_offsets_range,
303 std::optional<std::string_view> name = std::nullopt,
304 std::optional<METADATA_RANGE> metadata = std::nullopt
308 return list_array_impl<BIG>::create_proxy(
309 std::forward<array>(flat_values),
310 std::move(list_offsets),
311 std::forward<VB>(validity_input),
312 std::forward<std::optional<std::string_view>>(name),
313 std::forward<std::optional<METADATA_RANGE>>(metadata)
319 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
320 [[nodiscard]]
static arrow_proxy create_proxy(
322 offset_buffer_type&& list_offsets,
323 bool nullable =
true,
324 std::optional<std::string_view> name = std::nullopt,
325 std::optional<METADATA_RANGE> metadata = std::nullopt
330 std::ranges::input_range OFFSET_BUFFER_RANGE,
331 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
332 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
333 [[nodiscard]]
static arrow_proxy create_proxy(
335 OFFSET_BUFFER_RANGE&& list_offsets_range,
336 bool nullable =
true,
337 std::optional<std::string_view> name = std::nullopt,
338 std::optional<METADATA_RANGE> metadata = std::nullopt
341 offset_buffer_type list_offsets{std::move(list_offsets_range)};
342 return list_array_impl<BIG>::create_proxy(
343 std::forward<array>(flat_values),
344 std::move(list_offsets),
346 std::forward<std::optional<std::string_view>>(name),
347 std::forward<std::optional<METADATA_RANGE>>(metadata)
351 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
352 [[nodiscard]] std::pair<offset_type, offset_type> offset_range(
size_type i)
const;
354 [[nodiscard]] offset_type* make_list_offsets();
373 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
385 template <
class... ARGS>
388 :
self_type(create_proxy(std::forward<ARGS>(args)...))
395 std::ranges::input_range OFFSET_BUFFER_RANGE,
396 std::ranges::input_range SIZE_RANGE,
400 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
401 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
405 OFFSET_BUFFER_RANGE&& list_offsets,
406 SIZE_RANGE&& list_sizes,
408 std::optional<std::string_view> name = std::nullopt,
409 std::optional<METADATA_RANGE> metadata = std::nullopt
412 return list_view_array_impl<BIG>::create_proxy(
413 std::move(flat_values),
414 offset_buffer_type(std::move(list_offsets)),
415 size_buffer_type(std::move(list_sizes)),
416 std::forward<VB>(validity_input),
423 validity_bitmap_input VB = validity_bitmap,
424 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
425 [[nodiscard]]
static arrow_proxy create_proxy(
427 offset_buffer_type&& list_offsets,
428 size_buffer_type&& list_sizes,
430 std::optional<std::string_view> name = std::nullopt,
431 std::optional<METADATA_RANGE> metadata = std::nullopt
435 std::ranges::input_range OFFSET_BUFFER_RANGE,
436 std::ranges::input_range SIZE_RANGE,
437 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
439 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
440 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
442 [[nodiscard]]
static arrow_proxy create_proxy(
444 OFFSET_BUFFER_RANGE&& list_offsets,
445 SIZE_RANGE&& list_sizes,
446 bool nullable =
true,
447 std::optional<std::string_view> name = std::nullopt,
448 std::optional<METADATA_RANGE> metadata = std::nullopt
451 return list_view_array_impl<BIG>::create_proxy(
452 std::move(flat_values),
453 offset_buffer_type(std::move(list_offsets)),
454 size_buffer_type(std::move(list_sizes)),
461 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
462 [[nodiscard]]
static arrow_proxy create_proxy(
464 offset_buffer_type&& list_offsets,
465 size_buffer_type&& list_sizes,
466 bool nullable =
true,
467 std::optional<std::string_view> name = std::nullopt,
468 std::optional<METADATA_RANGE> metadata = std::nullopt
471 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
472 static constexpr std::size_t SIZES_BUFFER_INDEX = 2;
473 [[nodiscard]] std::pair<offset_type, offset_type> offset_range(size_type i)
const;
475 [[nodiscard]] offset_type* make_list_offsets();
476 [[nodiscard]] offset_type* make_list_sizes();
478 offset_type* p_list_offsets;
505 template <
class... ARGS>
508 :
self_type(create_proxy(std::forward<ARGS>(args)...))
518 std::uint64_t list_size,
521 std::optional<std::string_view> name = std::nullopt,
522 std::optional<METADATA_RANGE> metadata = std::nullopt
529 std::uint64_t list_size,
532 std::optional<std::string_view> name = std::nullopt,
533 std::optional<METADATA_RANGE> metadata = std::nullopt
536 [[nodiscard]]
static uint64_t list_size_from_format(
const std::string_view format);
537 [[nodiscard]] std::pair<offset_type, offset_type> offset_range(size_type i)
const;
539 uint64_t m_list_size;
550 template <
class DERIVED>
553 , p_flat_array(make_flat_array())
557 template <
class DERIVED>
560 , p_flat_array(make_flat_array())
564 template <
class DERIVED>
568 p_flat_array = make_flat_array();
572 template <
class DERIVED>
575 return p_flat_array.get();
578 template <
class DERIVED>
581 return p_flat_array.get();
584 template <
class DERIVED>
585 auto list_array_crtp_base<DERIVED>::value_begin() -> value_iterator
590 template <
class DERIVED>
591 auto list_array_crtp_base<DERIVED>::value_end() -> value_iterator
593 return value_iterator(
594 detail::layout_value_functor<DERIVED, inner_value_type>(&this->derived_cast()),
599 template <
class DERIVED>
600 auto list_array_crtp_base<DERIVED>::value_cbegin() const -> const_value_iterator
602 return const_value_iterator(
608 template <
class DERIVED>
609 auto list_array_crtp_base<DERIVED>::value_cend() const -> const_value_iterator
611 return const_value_iterator(
617 template <
class DERIVED>
618 auto list_array_crtp_base<DERIVED>::value(size_type i) -> inner_reference
620 const auto r = this->derived_cast().offset_range(i);
622 return list_value{p_flat_array.get(),
static_cast<st
>(r.first),
static_cast<st
>(r.second)};
625 template <
class DERIVED>
626 auto list_array_crtp_base<DERIVED>::value(size_type i)
const -> inner_const_reference
628 const auto r = this->derived_cast().offset_range(i);
630 return list_value{p_flat_array.get(),
static_cast<st
>(r.first),
static_cast<st
>(r.second)};
633 template <
class DERIVED>
636 return array_factory(this->get_arrow_proxy().children()[0].view());
644# pragma GCC diagnostic push
645# pragma GCC diagnostic ignored "-Wcast-align"
651 , p_list_offsets(make_list_offsets())
656 template <std::ranges::range SIZES_RANGE>
660 std::forward<SIZES_RANGE>(sizes)
665 template <val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
668 offset_buffer_type&& list_offsets,
670 std::optional<std::string_view> name,
671 std::optional<METADATA_RANGE> metadata
674 const auto size = list_offsets.size() - 1;
685 BIG ? std::string(
"+L") : std::string(
"+l"),
689 new ArrowSchema*[1]{
new ArrowSchema(std::move(flat_schema))},
695 std::vector<buffer<std::uint8_t>> arr_buffs = {
696 std::move(vbitmap).extract_storage(),
697 std::move(list_offsets).extract_storage()
701 static_cast<std::int64_t
>(size),
702 static_cast<int64_t
>(null_count),
704 std::move(arr_buffs),
705 new ArrowArray*[1]{
new ArrowArray(std::move(flat_arr))},
710 return arrow_proxy{std::move(arr), std::move(schema)};
714 template <val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
717 offset_buffer_type&& list_offsets,
719 std::optional<std::string_view> name,
720 std::optional<METADATA_RANGE> metadata
725 return list_array_impl<BIG>::create_proxy(
726 std::move(flat_values),
727 std::move(list_offsets),
735 const auto size = list_offsets.size() - 1;
740 BIG ? std::string(
"+L") : std::string(
"+l"),
744 new ArrowSchema*[1]{
new ArrowSchema(std::move(flat_schema))},
750 std::vector<buffer<std::uint8_t>> arr_buffs = {
752 std::move(list_offsets).extract_storage()
756 static_cast<std::int64_t
>(size),
759 std::move(arr_buffs),
760 new ArrowArray*[1]{
new ArrowArray(std::move(flat_arr))},
765 return arrow_proxy{std::move(arr), std::move(schema)};
772 , p_list_offsets(make_list_offsets())
782 p_list_offsets = make_list_offsets();
788 auto list_array_impl<BIG>::offset_range(size_type i)
const -> std::pair<offset_type, offset_type>
790 return std::make_pair(p_list_offsets[i], p_list_offsets[i + 1]);
794 auto list_array_impl<BIG>::make_list_offsets() -> offset_type*
796 return reinterpret_cast<offset_type*
>(
797 this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
808 , p_list_offsets(make_list_offsets())
809 , p_list_sizes(make_list_sizes())
814 template <val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
815 arrow_proxy list_view_array_impl<BIG>::create_proxy(
817 offset_buffer_type&& list_offsets,
818 size_buffer_type&& list_sizes,
820 std::optional<std::string_view> name,
821 std::optional<METADATA_RANGE> metadata
824 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(),
"sizes and offset must have the same size");
825 const auto size = list_sizes.size();
833 BIG ? std::string(
"+vL") : std::string(
"+vl"),
837 new ArrowSchema*[1]{
new ArrowSchema(std::move(flat_schema))},
842 std::vector<buffer<std::uint8_t>> arr_buffs = {
843 std::move(vbitmap).extract_storage(),
844 std::move(list_offsets).extract_storage(),
845 std::move(list_sizes).extract_storage()
849 static_cast<std::int64_t
>(size),
850 static_cast<int64_t
>(null_count),
852 std::move(arr_buffs),
853 new ArrowArray*[1]{
new ArrowArray(std::move(flat_arr))},
858 return arrow_proxy{std::move(arr), std::move(schema)};
862 template <input_metadata_container METADATA_RANGE>
863 arrow_proxy list_view_array_impl<BIG>::create_proxy(
865 offset_buffer_type&& list_offsets,
866 size_buffer_type&& list_sizes,
868 std::optional<std::string_view> name,
869 std::optional<METADATA_RANGE> metadata
874 return list_view_array_impl<BIG>::create_proxy(
875 std::move(flat_values),
876 std::move(list_offsets),
877 std::move(list_sizes),
885 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(),
"sizes and offset must have the same size");
886 const auto size = list_sizes.size();
891 BIG ? std::string(
"+vL") : std::string(
"+vl"),
895 new ArrowSchema*[1]{
new ArrowSchema(std::move(flat_schema))},
900 std::vector<buffer<std::uint8_t>> arr_buffs = {
902 std::move(list_offsets).extract_storage(),
903 std::move(list_sizes).extract_storage()
907 static_cast<std::int64_t
>(size),
910 std::move(arr_buffs),
911 new ArrowArray*[1]{
new ArrowArray(std::move(flat_arr))},
916 return arrow_proxy{std::move(arr), std::move(schema)};
923 , p_list_offsets(make_list_offsets())
924 , p_list_sizes(make_list_sizes())
934 p_list_offsets = make_list_offsets();
935 p_list_sizes = make_list_sizes();
941 inline auto list_view_array_impl<BIG>::offset_range(size_type i)
const
942 -> std::pair<offset_type, offset_type>
944 const auto offset = p_list_offsets[i];
945 return std::make_pair(offset, offset + p_list_sizes[i]);
949 auto list_view_array_impl<BIG>::make_list_offsets() -> offset_type*
951 return reinterpret_cast<offset_type*
>(
952 this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
957 auto list_view_array_impl<BIG>::make_list_sizes() -> offset_type*
959 return reinterpret_cast<offset_type*
>(
960 this->get_arrow_proxy().buffers()[SIZES_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
965# pragma GCC diagnostic pop
972 inline auto fixed_sized_list_array::list_size_from_format(
const std::string_view format) -> uint64_t
975 const auto n_digits = format.size() - 3;
976 const auto list_size_str = format.substr(3, n_digits);
977 return std::stoull(std::string(list_size_str));
986 inline auto fixed_sized_list_array::offset_range(size_type i)
const -> std::pair<offset_type, offset_type>
988 const auto offset = i * m_list_size;
989 return std::make_pair(offset, offset + m_list_size);
992 template <val
idity_bitmap_input R, input_metadata_container METADATA_RANGE>
993 inline arrow_proxy fixed_sized_list_array::create_proxy(
994 std::uint64_t list_size,
997 std::optional<std::string_view> name,
998 std::optional<METADATA_RANGE> metadata
1001 const auto size = flat_values.size() /
static_cast<std::size_t
>(list_size);
1006 const auto null_count = vbitmap.
null_count();
1008 const repeat_view<bool> children_ownership{
true, 1};
1010 std::string format =
"+w:" + std::to_string(list_size);
1014 std::move(metadata),
1016 new ArrowSchema*[1]{
new ArrowSchema(std::move(flat_schema))},
1022 std::vector<buffer<std::uint8_t>> arr_buffs = {vbitmap.
extract_storage()};
1025 static_cast<std::int64_t
>(size),
1026 static_cast<int64_t
>(null_count),
1028 std::move(arr_buffs),
1029 new ArrowArray*[1]{
new ArrowArray(std::move(flat_arr))},
1034 return arrow_proxy{std::move(arr), std::move(schema)};
1037 template <val
idity_bitmap_input R, input_metadata_container METADATA_RANGE>
1038 inline arrow_proxy fixed_sized_list_array::create_proxy(
1039 std::uint64_t list_size,
1040 array&& flat_values,
1042 std::optional<std::string_view> name,
1043 std::optional<METADATA_RANGE> metadata
1048 return fixed_sized_list_array::create_proxy(
1050 std::move(flat_values),
1058 const auto size = flat_values.size() /
static_cast<std::size_t
>(list_size);
1060 const repeat_view<bool> children_ownership{
true, 1};
1062 std::string format =
"+w:" + std::to_string(list_size);
1066 std::move(metadata),
1068 new ArrowSchema*[1]{
new ArrowSchema(std::move(flat_schema))},
1074 std::vector<buffer<std::uint8_t>> arr_buffs = {
1075 buffer<std::uint8_t>{
nullptr, 0}
1079 static_cast<std::int64_t
>(size),
1082 std::move(arr_buffs),
1083 new ArrowArray*[1]{
new ArrowArray(std::move(flat_arr))},
1088 return arrow_proxy{std::move(arr), std::move(schema)};
typename base_type::const_bitmap_range const_bitmap_range
array_bitmap_base_impl & operator=(const array_bitmap_base_impl &)
typename base_type::iterator_tag iterator_tag
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
Base class defining common immutable interface for arrays with a bitmap.
Base class for array type erasure.
Dynamically typed array encapsulating an Arrow layout.
Proxy class over ArrowArray and ArrowSchema.
Object that owns a piece of contiguous memory.
Smart pointer behaving like a copiable std::unique_ptr.
storage_type extract_storage() noexcept
constexpr size_type null_count() const noexcept
fixed_sized_list_array & operator=(const self_type &)=default
inner_types::list_size_type list_size_type
array_inner_types< self_type > inner_types
fixed_sized_list_array(arrow_proxy proxy)
fixed_sized_list_array(ARGS &&... args)
list_array_crtp_base< self_type > base_type
fixed_sized_list_array self_type
fixed_sized_list_array & operator=(self_type &&)=default
fixed_sized_list_array(self_type &&)=default
typename base_type::size_type size_type
fixed_sized_list_array(const self_type &)=default
std::uint64_t offset_type
typename base_type::const_bitmap_range const_bitmap_range
const array_wrapper * raw_flat_array() const
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename inner_types::const_value_iterator const_value_iterator
list_value inner_value_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::iterator_tag iterator_tag
list_array_crtp_base(arrow_proxy proxy)
array_wrapper * raw_flat_array()
typename inner_types::value_iterator value_iterator
list_array_crtp_base & operator=(const self_type &)
list_array_crtp_base(self_type &&) noexcept=default
typename base_type::bitmap_type bitmap_type
list_array_crtp_base< DERIVED > self_type
typename base_type::size_type size_type
array_inner_types< DERIVED > inner_types
list_array_crtp_base(const self_type &)
nullable< inner_value_type > value_type
list_value inner_const_reference
list_value inner_reference
array_bitmap_base< DERIVED > base_type
list_array_impl< BIG > self_type
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
typename base_type::size_type size_type
list_array_impl(self_type &&) noexcept=default
list_array_impl(const self_type &)
array_inner_types< self_type > inner_types
static auto offset_from_sizes(SIZES_RANGE &&sizes) -> offset_buffer_type
inner_types::list_size_type list_size_type
list_array_crtp_base< list_array_impl< BIG > > base_type
list_array_impl & operator=(const self_type &)
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_array_impl(arrow_proxy proxy)
list_view_array_impl & operator=(const self_type &)
typename base_type::size_type size_type
list_view_array_impl & operator=(self_type &&)=default
list_view_array_impl(self_type &&)=default
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_view_array_impl(arrow_proxy proxy)
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
array_inner_types< self_type > inner_types
list_array_crtp_base< list_view_array_impl< BIG > > base_type
list_view_array_impl(ARGS &&... args)
list_view_array_impl< BIG > self_type
list_view_array_impl(const self_type &)
inner_types::list_size_type list_size_type
u8_buffer< std::remove_const_t< list_size_type > > size_buffer_type
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
A view that repeats a value a given number of times.
This buffer class is use as storage buffer for all sparrow arrays.
#define SPARROW_ASSERT(expr__, message__)
sparrow::u8_buffer< OFFSET_TYPE > offset_buffer_from_sizes(SIZES_RANGE &&sizes)
constexpr std::size_t size(typelist< T... >={})
constexpr bool excludes_copy_and_move_ctor_v
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_list_view_array_v
Checks whether T is a list_view_array type.
list_array_impl< false > list_array
array_bitmap_base_impl< D, false > array_bitmap_base
Convenient typedef to be used as a crtp base class for arrays using an immutable validity buffer.
constexpr bool is_fixed_sized_list_array_v
Checks whether T is a fixed_sized_list_array type.
list_view_array_impl< true > big_list_view_array
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArrays and ArrowSchema structures from the given array or typed layout.
constexpr bool is_big_list_array_v
Checks whether T is a big_list_array type.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
list_view_array_impl< false > list_view_array
dynamic_bitset< std::uint8_t > validity_bitmap
constexpr bool is_list_array_v
Checks whether T is a list_array type.
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
list_array_impl< true > big_list_array
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
constexpr bool is_big_list_view_array_v
Checks whether T is a big_list_view_array type.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
list_value inner_value_type
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
std::random_access_iterator_tag iterator_tag
fixed_sized_list_array array_type
list_value inner_reference
list_value inner_const_reference
std::uint64_t list_size_type
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
std::random_access_iterator_tag iterator_tag
list_array_impl< BIG > array_type
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
list_value inner_value_type
list_value inner_const_reference
list_value inner_reference
list_value inner_value_type
list_value inner_const_reference
std::random_access_iterator_tag iterator_tag
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
list_value inner_reference
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
list_view_array_impl< BIG > array_type
Base class for array_inner_types specialization.
Traits class that must be specialized by array classes inheriting from array_crtp_base.
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()