39 template <
class T,
class OT>
45 [[nodiscard]]
static std::string
format()
54 [[nodiscard]]
static std::string
format()
63 [[nodiscard]]
static std::string
format()
72 [[nodiscard]]
static std::string
format()
79 template <std::ranges::sized_range T,
class CR, layout_offset OT>
80 class variable_size_binary_array_impl;
88 binary_traits::value_type,
89 binary_traits::const_reference,
95 struct get_data_type_from_array;
158 template <std::ranges::sized_range T,
class CR, layout_offset OT>
206 template <std::ranges::sized_range T,
class CR, layout_offset OT>
213 sizeof(std::ranges::range_value_t<T>) ==
sizeof(std::uint8_t),
214 "Only sequences of types with the same size as uint8_t are supported"
258 template <
class... ARGS>
261 :
self_type(create_proxy(std::forward<ARGS>(args)...))
265 using base_type::get_arrow_proxy;
266 using base_type::size;
271 template <std::ranges::range SIZES_RANGE>
284 std::optional<std::string_view> name = std::nullopt,
285 std::optional<METADATA_RANGE> metadata = std::nullopt
289 std::ranges::input_range R,
293 std::ranges::input_range<std::ranges::range_value_t<R>> &&
301 std::optional<std::string_view> name = std::nullopt,
302 std::optional<METADATA_RANGE> metadata = std::nullopt
306 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
307 requires std::is_same_v<std::ranges::range_value_t<R>, nullable<T>>
308 [[nodiscard]]
static arrow_proxy create_proxy(
310 std::optional<std::string_view> name = std::nullopt,
311 std::optional<METADATA_RANGE> metadata = std::nullopt
314 static constexpr size_t OFFSET_BUFFER_INDEX = 1;
315 static constexpr size_t DATA_BUFFER_INDEX = 2;
322 [[nodiscard]] value_iterator value_begin();
323 [[nodiscard]] value_iterator value_end();
335 template <std::ranges::sized_range U>
336 requires mpl::convertible_ranges<U, T>
337 void resize_values(size_type new_length, U
value);
339 void resize_offsets(size_type new_length,
offset_type offset_value);
341 template <std::ranges::sized_range U>
342 requires mpl::convertible_ranges<U, T>
347 template <mpl::iterator_of_type<T> InputIt>
350 template <mpl::iterator_of_type<OT> InputIt>
357 template <std::ranges::sized_range U>
358 requires mpl::convertible_ranges<U, T>
372 template <std::ranges::sized_range T,
class CR, layout_offset OT>
376 const auto type = this->get_arrow_proxy().data_type();
386 template <std::ranges::sized_range T,
class CR, layout_offset OT>
387 template <std::ranges::range SIZES_RANGE>
395 template <std::ranges::sized_range T,
class CR, layout_offset OT>
396 template <mpl::
char_like C, val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
397 arrow_proxy variable_size_binary_array_impl<T, CR, OT>::create_proxy(
399 offset_buffer_type&& offsets,
401 std::optional<std::string_view> name,
402 std::optional<METADATA_RANGE> metadata
405 const auto size = offsets.size() - 1;
420 std::vector<buffer<std::uint8_t>> arr_buffs = {
421 std::move(vbitmap).extract_storage(),
422 std::move(offsets).extract_storage(),
423 std::move(data_buffer).extract_storage()
427 static_cast<std::int64_t
>(size),
428 static_cast<int64_t
>(null_count),
430 std::move(arr_buffs),
432 repeat_view<bool>(
true, 0),
436 return arrow_proxy{std::move(arr), std::move(schema)};
439 template <std::ranges::sized_range T,
class CR, layout_offset OT>
440 template <std::ranges::input_range R, val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
442 std::ranges::input_range<std::ranges::range_value_t<R>> &&
446 arrow_proxy variable_size_binary_array_impl<T, CR, OT>::create_proxy(
449 std::optional<std::string_view> name,
450 std::optional<METADATA_RANGE> metadata
453 using values_inner_value_type = std::ranges::range_value_t<std::ranges::range_value_t<R>>;
455 auto size_range = values
456 | std::views::transform(
459 return std::ranges::size(v);
462 auto offset_buffer = offset_from_sizes(size_range);
465 std::move(data_buffer),
466 std::move(offset_buffer),
467 std::forward<VB>(validity_input),
468 std::forward<std::optional<std::string_view>>(name),
469 std::forward<std::optional<METADATA_RANGE>>(metadata)
473 template <std::ranges::sized_range T,
class CR, layout_offset OT>
474 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
475 requires std::is_same_v<std::ranges::range_value_t<R>,
nullable<T>>
476 arrow_proxy variable_size_binary_array_impl<T, CR, OT>::create_proxy(
478 std::optional<std::string_view> name,
479 std::optional<METADATA_RANGE> metadata
483 const auto values = range
484 | std::views::transform(
490 const auto is_non_null = range
491 | std::views::transform(
494 return v.has_value();
497 return self_type::create_proxy(values, is_non_null, std::move(name), std::move(metadata));
500 template <std::ranges::sized_range T,
class CR, layout_offset OT>
501 auto variable_size_binary_array_impl<T, CR, OT>::data(size_type i) -> data_iterator
505 return proxy.buffers()[DATA_BUFFER_INDEX].template data<data_value_type>() + i;
508 template <std::ranges::sized_range T,
class CR, layout_offset OT>
509 auto variable_size_binary_array_impl<T, CR, OT>::data(size_type i)
const -> const_data_iterator
511 const arrow_proxy& proxy = this->get_arrow_proxy();
513 return proxy.buffers()[DATA_BUFFER_INDEX].template data<const data_value_type>() + i;
516 template <std::ranges::sized_range T,
class CR, layout_offset OT>
517 template <std::ranges::sized_range U>
519 void variable_size_binary_array_impl<T, CR, OT>::assign(U&& rhs, size_type index)
522 const auto offset_beg = *offset(index);
523 const auto offset_end = *offset(index + 1);
524 const auto initial_value_length = offset_end - offset_beg;
525 const auto new_value_length =
static_cast<OT
>(std::ranges::size(rhs));
526 const OT shift_byte_count = new_value_length - initial_value_length;
527 auto& data_buffer = this->get_arrow_proxy().get_array_private_data()->buffers()[DATA_BUFFER_INDEX];
528 if (shift_byte_count != 0)
530 const auto shift_val_abs =
static_cast<size_type
>(std::abs(shift_byte_count));
531 const auto new_data_buffer_size = shift_byte_count < 0 ? data_buffer.size() - shift_val_abs
532 : data_buffer.size() + shift_val_abs;
534 if (shift_byte_count > 0)
536 data_buffer.resize(new_data_buffer_size);
539 data_buffer.begin() + offset_end,
540 data_buffer.end() - shift_byte_count,
547 data_buffer.begin() + offset_end,
549 data_buffer.begin() + offset_end + shift_byte_count
551 data_buffer.resize(new_data_buffer_size);
557 [shift_byte_count](
auto& offset)
559 offset += shift_byte_count;
563 auto tmp = std::views::transform(
567 return static_cast<std::uint8_t
>(val);
571 std::copy(std::ranges::begin(tmp), std::ranges::end(tmp), data_buffer.begin() + offset_beg);
574 template <std::ranges::sized_range T,
class CR, layout_offset OT>
575 auto variable_size_binary_array_impl<T, CR, OT>::offset(size_type i) -> offset_iterator
578 return get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].template data<OT>()
579 +
static_cast<size_type
>(this->get_arrow_proxy().offset()) + i;
582 template <std::ranges::sized_range T,
class CR, layout_offset OT>
583 auto variable_size_binary_array_impl<T, CR, OT>::offset(size_type i)
const -> const_offset_iterator
586 return this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].template data<OT>()
587 +
static_cast<size_type
>(this->get_arrow_proxy().offset()) + i;
590 template <std::ranges::sized_range T,
class CR, layout_offset OT>
591 auto variable_size_binary_array_impl<T, CR, OT>::offsets_begin() -> offset_iterator
596 template <std::ranges::sized_range T,
class CR, layout_offset OT>
597 auto variable_size_binary_array_impl<T, CR, OT>::offsets_cbegin() const -> const_offset_iterator
602 template <std::ranges::sized_range T,
class CR, layout_offset OT>
603 auto variable_size_binary_array_impl<T, CR, OT>::offsets_end() -> offset_iterator
605 return offset(
size() + 1);
608 template <std::ranges::sized_range T,
class CR, layout_offset OT>
609 auto variable_size_binary_array_impl<T, CR, OT>::offsets_cend() const -> const_offset_iterator
611 return offset(
size() + 1);
614 template <std::ranges::sized_range T,
class CR, layout_offset OT>
621 template <std::ranges::sized_range T,
class CR, layout_offset OT>
625 const OT offset_begin = *offset(i);
627 const OT offset_end = *offset(i + 1);
634 template <std::ranges::sized_range T,
class CR, layout_offset OT>
635 auto variable_size_binary_array_impl<T, CR, OT>::value_begin() -> value_iterator
637 return value_iterator{
this, 0};
640 template <std::ranges::sized_range T,
class CR, layout_offset OT>
641 auto variable_size_binary_array_impl<T, CR, OT>::value_end() -> value_iterator
646 template <std::ranges::sized_range T,
class CR, layout_offset OT>
647 auto variable_size_binary_array_impl<T, CR, OT>::value_cbegin() const -> const_value_iterator
649 return const_value_iterator{
this, 0};
652 template <std::ranges::sized_range T,
class CR, layout_offset OT>
653 auto variable_size_binary_array_impl<T, CR, OT>::value_cend() const -> const_value_iterator
658 template <std::ranges::sized_range T,
class CR, layout_offset OT>
659 template <std::ranges::sized_range U>
661 void variable_size_binary_array_impl<T, CR, OT>::resize_values(size_type new_length, U value)
663 const size_t new_size = new_length +
static_cast<size_t>(this->get_arrow_proxy().offset());
664 auto& buffers = this->get_arrow_proxy().get_array_private_data()->buffers();
665 if (new_length <
size())
667 const auto offset_begin =
static_cast<size_t>(*offset(new_length));
668 auto& data_buffer = buffers[DATA_BUFFER_INDEX];
669 data_buffer.resize(offset_begin);
670 auto& offset_buffer = buffers[OFFSET_BUFFER_INDEX];
672 offset_buffer_adaptor.resize(new_size + 1);
674 else if (new_length >
size())
676 insert_value(value_cend(), value, new_length -
size());
680 template <std::ranges::sized_range T,
class CR, layout_offset OT>
681 template <std::ranges::sized_range U>
687 const auto idx =
static_cast<size_t>(std::distance(value_cbegin(), pos));
688 const OT offset_begin = *offset(idx);
689 const std::vector<uint8_t> casted_value{
value.cbegin(),
value.cend()};
691 const auto joined_repeated_value_range = std::ranges::views::join(my_repeat_view);
692 auto& data_buffer = this->get_arrow_proxy().get_array_private_data()->buffers()[DATA_BUFFER_INDEX];
693 const auto pos_to_insert =
sparrow::next(data_buffer.cbegin(), offset_begin);
694 data_buffer.insert(pos_to_insert, joined_repeated_value_range.begin(), joined_repeated_value_range.end());
695 insert_offset(offsets_cbegin() + idx + 1,
static_cast<offset_type>(
value.size()), count);
699 template <std::ranges::sized_range T,
class CR, layout_offset OT>
700 auto variable_size_binary_array_impl<T, CR, OT>::insert_offset(
701 const_offset_iterator pos,
702 offset_type value_size,
706 auto& offset_buffer = get_arrow_proxy().get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
707 const auto idx =
static_cast<size_t>(std::distance(offsets_cbegin(), pos));
709 const offset_type cumulative_size = value_size *
static_cast<offset_type
>(count);
713 offset_buffer_adaptor.end(),
714 [cumulative_size](
auto& offset)
716 offset += cumulative_size;
719 offset_buffer_adaptor.insert(
sparrow::next(offset_buffer_adaptor.cbegin(), idx + 1), count, 0);
721 for (
size_t i = idx + 1; i < idx + 1 + count; ++i)
723 offset_buffer_adaptor[i] = offset_buffer_adaptor[i - 1] + value_size;
725 return offsets_begin() + idx;
728 template <std::ranges::sized_range T,
class CR, layout_offset OT>
729 template <mpl::iterator_of_type<T> InputIt>
731 variable_size_binary_array_impl<T, CR, OT>::insert_values(
const_value_iterator pos, InputIt first, InputIt last)
734 auto& data_buffer = get_arrow_proxy().get_array_private_data()->buffers()[DATA_BUFFER_INDEX];
736 auto values = std::ranges::subrange(first, last);
737 const size_t cumulative_sizes = std::accumulate(
741 [](
size_t acc,
const T&
value)
743 return acc + value.size();
746 data_buffer_adaptor.resize(data_buffer_adaptor.size() + cumulative_sizes);
747 const auto idx =
static_cast<size_t>(std::distance(value_cbegin(), pos));
748 const OT offset_begin = *offset(idx);
749 auto insert_pos =
sparrow::next(data_buffer_adaptor.begin(), offset_begin);
755 data_buffer_adaptor.end()
758 for (
const T&
value : values)
760 std::copy(
value.begin(),
value.end(), insert_pos);
761 std::advance(insert_pos,
value.size());
764 const auto sizes_of_each_value = std::ranges::views::transform(
771 insert_offsets(offset(idx + 1), sizes_of_each_value.begin(), sizes_of_each_value.end());
775 template <std::ranges::sized_range T,
class CR, layout_offset OT>
776 template <mpl::iterator_of_type<OT> InputIt>
777 auto variable_size_binary_array_impl<T, CR, OT>::insert_offsets(
786 auto& offset_buffer = get_arrow_proxy().get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
788 const auto idx = std::distance(offsets_cbegin(), pos);
789 const OT cumulative_sizes = std::reduce(first_sizes, last_sizes, OT(0));
790 const auto sizes_count = std::distance(first_sizes, last_sizes);
791 offset_buffer_adaptor.resize(offset_buffer_adaptor.size() +
static_cast<size_t>(sizes_count));
794 offset_buffer_adaptor.begin() + idx,
795 offset_buffer_adaptor.end() - sizes_count,
796 offset_buffer_adaptor.end()
800 offset_buffer_adaptor.begin() + idx + sizes_count,
801 offset_buffer_adaptor.end(),
802 [cumulative_sizes](
auto& offset)
804 offset += cumulative_sizes;
808 InputIt it = first_sizes;
809 for (
size_t i =
static_cast<size_t>(idx + 1); i < static_cast<size_t>(idx + sizes_count + 1); ++i)
811 offset_buffer_adaptor[i] = offset_buffer_adaptor[i - 1] + *it;
814 return offset(
static_cast<size_t>(idx));
817 template <std::ranges::sized_range T,
class CR, layout_offset OT>
818 auto variable_size_binary_array_impl<T, CR, OT>::erase_values(const_value_iterator pos, size_type count)
823 const size_t index =
static_cast<size_t>(std::distance(value_cbegin(), pos));
828 auto& data_buffer = get_arrow_proxy().get_array_private_data()->buffers()[DATA_BUFFER_INDEX];
829 const auto offset_begin = *offset(index);
830 const auto offset_end = *offset(index + count);
831 const size_t difference =
static_cast<size_t>(offset_end - offset_begin);
833 std::move(data_buffer.begin() + offset_end, data_buffer.end(), data_buffer.begin() + offset_begin);
834 data_buffer.resize(data_buffer.size() - difference);
836 erase_offsets(offset(index), count);
840 template <std::ranges::sized_range T,
class CR, layout_offset OT>
841 auto variable_size_binary_array_impl<T, CR, OT>::erase_offsets(const_offset_iterator pos, size_type count)
846 const size_t index =
static_cast<size_t>(std::distance(offsets_cbegin(), pos));
849 return offset(index);
851 auto& offset_buffer = get_arrow_proxy().get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
853 const OT offset_start_value = *offset(index);
854 const OT offset_end_value = *offset(index + count);
855 const OT difference = offset_end_value - offset_start_value;
858 sparrow::next(offset_buffer_adaptor.begin(), index + count + 1),
859 offset_buffer_adaptor.end(),
862 offset_buffer_adaptor.resize(offset_buffer_adaptor.size() - count);
866 offset_buffer_adaptor.end(),
867 [difference](OT& offset)
869 offset -= difference;
872 return offset(index);
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
typename base_type::difference_type difference_type
Proxy class over ArrowArray and ArrowSchema.
bitset_iterator< self_type, false > iterator
constexpr size_type null_count() const noexcept
bitset_iterator< self_type, true > const_iterator
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
A view that repeats a value a given number of times.
This buffer class is use as storage buffer for all sparrow arrays.
typename base_type::bitmap_type bitmap_type
static auto offset_from_sizes(SIZES_RANGE &&sizes) -> offset_buffer_type
typename inner_types::const_value_iterator const_value_iterator
typename inner_types::const_offset_iterator const_offset_iterator
typename base_type::bitmap_reference bitmap_reference
mutable_array_bitmap_base< self_type > base_type
typename inner_types::inner_value_type inner_value_type
typename base_type::const_bitmap_range const_bitmap_range
inner_reference value(size_type i)
typename inner_types::offset_type offset_type
typename inner_types::offset_iterator offset_iterator
typename inner_types::const_data_iterator const_data_iterator
u8_buffer< char > char_buffer_type
typename base_type::iterator_tag iterator_tag
array_inner_types< self_type > inner_types
typename inner_types::inner_reference inner_reference
typename base_type::size_type size_type
inner_const_reference value(size_type i) const
typename base_type::difference_type difference_type
nullable< inner_reference, bitmap_reference > reference
variable_size_binary_array_impl(ARGS &&... args)
u8_buffer< std::byte > byte_buffer_type
typename inner_types::value_iterator value_iterator
u8_buffer< std::uint8_t > uint8_buffer_type
variable_size_binary_array_impl< T, std::string_view, OT > self_type
nullable< inner_const_reference, bitmap_const_reference > const_reference
variable_size_binary_array_impl(arrow_proxy)
typename base_type::bitmap_const_reference bitmap_const_reference
typename inner_types::data_iterator data_iterator
typename inner_types::data_value_type data_value_type
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
typename inner_types::inner_const_reference inner_const_reference
nullable< inner_value_type > value_type
Implementation of reference to inner type used for layout L.
Iterator over the data values of a variable size binary layout.
Concept for iterator types.
Matches range types From whose elements are convertible to elements of range type To.
#define SPARROW_ASSERT_TRUE(expr__)
sparrow::u8_buffer< OFFSET_TYPE > offset_buffer_from_sizes(SIZES_RANGE &&sizes)
constexpr std::size_t size(typelist< T... >={})
constexpr bool excludes_copy_and_move_ctor_v
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient typedef to be used as a crtp base class for arrays using a mutable validity buffer.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_big_string_array_v
Checks whether T is a big_string_array type.
constexpr bool is_string_array_v
Checks whether T is a string_array type.
variable_size_binary_array_impl< binary_traits::value_type, binary_traits::const_reference, std::int32_t > binary_array
constexpr InputIt next(InputIt it, Distance n)
variable_size_binary_array_impl< std::string, std::string_view, std::int32_t > string_array
constexpr bool is_big_binary_array_v
Checks whether T is a big_binary_array type.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
constexpr bool is_binary_array_v
Checks whether T is a binary_array type.
auto make_buffer_adaptor(FromBufferRef &buf)
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
variable_size_binary_array_impl< std::string, std::string_view, std::int64_t > big_string_array
variable_size_binary_array_impl< binary_traits::value_type, binary_traits::const_reference, std::int64_t > big_binary_array
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
arrow_traits< std::vector< byte_t > > binary_traits
array_inner_types< variable_size_binary_array_impl< T, CR, OT > >::iterator_tag iterator_tag
const_data_iterator value_iterator
inner_value_type value_type
inner_const_reference reference
const_bitmap_iterator bitmap_iterator
bitmap_type::iterator bitmap_iterator
array_inner_types< variable_size_binary_array_impl< T, CR, OT > >::iterator_tag iterator_tag
inner_value_type value_type
data_iterator value_iterator
inner_reference reference
variable_size_binary_reference< array_type > inner_reference
variable_size_binary_array_impl< T, CR, OT > array_type
std::random_access_iterator_tag iterator_tag
variable_size_binary_value_iterator< array_type, iterator_types > value_iterator
bitmap_type::const_iterator const_bitmap_iterator
const data_value_type * const_data_iterator
data_value_type * data_iterator
const OT * const_offset_iterator
typename T::value_type data_value_type
variable_size_binary_value_iterator< array_type, const_iterator_types > const_value_iterator
Base class for array_inner_types specialization.
Traits class that must be specialized by array classes inheriting from array_crtp_base.
Provides compile-time information about Arrow data types.
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()