39 template <
class T,
class OT>
45 [[nodiscard]]
static std::string
format()
54 [[nodiscard]]
static std::string
format()
63 [[nodiscard]]
static std::string
format()
72 [[nodiscard]]
static std::string
format()
79 template <std::ranges::sized_range T,
class CR, layout_offset OT>
80 class variable_size_binary_array_impl;
88 binary_traits::value_type,
89 binary_traits::const_reference,
95 struct get_data_type_from_array;
158 template <std::ranges::sized_range T,
class CR, layout_offset OT>
206 template <std::ranges::sized_range T,
class CR, layout_offset OT>
213 sizeof(std::ranges::range_value_t<T>) ==
sizeof(std::uint8_t),
214 "Only sequences of types with the same size as uint8_t are supported"
258 template <
class... ARGS>
261 :
self_type(create_proxy(std::forward<ARGS>(args)...))
265 using base_type::get_arrow_proxy;
266 using base_type::size;
271 template <std::ranges::range SIZES_RANGE>
276 template <mpl::
char_like C, val
idity_bitmap_input VB = val
idity_bitmap>
281 std::optional<std::string_view> name = std::nullopt,
282 std::optional<std::string_view> metadata = std::nullopt
285 template <std::ranges::input_range R, val
idity_bitmap_input VB = val
idity_bitmap>
287 std::ranges::input_range<std::ranges::range_value_t<R>> &&
295 std::optional<std::string_view> name = std::nullopt,
296 std::optional<std::string_view> metadata = std::nullopt
300 template <std::ranges::input_range R>
301 requires std::is_same_v<std::ranges::range_value_t<R>, nullable<T>>
302 [[nodiscard]]
static arrow_proxy create_proxy(
304 std::optional<std::string_view> name = std::nullopt,
305 std::optional<std::string_view> metadata = std::nullopt
308 static constexpr size_t OFFSET_BUFFER_INDEX = 1;
309 static constexpr size_t DATA_BUFFER_INDEX = 2;
316 [[nodiscard]] value_iterator value_begin();
317 [[nodiscard]] value_iterator value_end();
329 template <std::ranges::sized_range U>
330 requires mpl::convertible_ranges<U, T>
331 void resize_values(size_type new_length, U
value);
333 void resize_offsets(size_type new_length,
offset_type offset_value);
335 template <std::ranges::sized_range U>
336 requires mpl::convertible_ranges<U, T>
341 template <mpl::iterator_of_type<T> InputIt>
344 template <mpl::iterator_of_type<OT> InputIt>
351 template <std::ranges::sized_range U>
352 requires mpl::convertible_ranges<U, T>
366 template <std::ranges::sized_range T,
class CR, layout_offset OT>
370 const auto type = this->get_arrow_proxy().data_type();
380 template <std::ranges::sized_range T,
class CR, layout_offset OT>
381 template <std::ranges::range SIZES_RANGE>
389 template <std::ranges::sized_range T,
class CR, layout_offset OT>
390 template <mpl::
char_like C, val
idity_bitmap_input VB>
391 arrow_proxy variable_size_binary_array_impl<T, CR, OT>::create_proxy(
393 offset_buffer_type&& offsets,
395 std::optional<std::string_view> name,
396 std::optional<std::string_view> metadata
399 const auto size = offsets.size() - 1;
413 std::vector<buffer<std::uint8_t>> arr_buffs = {
414 std::move(vbitmap).extract_storage(),
415 std::move(offsets).extract_storage(),
416 std::move(data_buffer).extract_storage()
420 static_cast<std::int64_t
>(size),
421 static_cast<int64_t
>(null_count),
423 std::move(arr_buffs),
428 return arrow_proxy{std::move(arr), std::move(schema)};
431 template <std::ranges::sized_range T,
class CR, layout_offset OT>
432 template <std::ranges::input_range R, val
idity_bitmap_input VB>
434 std::ranges::input_range<std::ranges::range_value_t<R>> &&
438 arrow_proxy variable_size_binary_array_impl<T, CR, OT>::create_proxy(
441 std::optional<std::string_view> name,
442 std::optional<std::string_view> metadata
445 using values_inner_value_type = std::ranges::range_value_t<std::ranges::range_value_t<R>>;
447 auto size_range = values
448 | std::views::transform(
451 return std::ranges::size(v);
454 auto offset_buffer = offset_from_sizes(size_range);
457 std::move(data_buffer),
458 std::move(offset_buffer),
459 std::forward<VB>(validity_input),
460 std::forward<std::optional<std::string_view>>(name),
461 std::forward<std::optional<std::string_view>>(metadata)
465 template <std::ranges::sized_range T,
class CR, layout_offset OT>
466 template <std::ranges::input_range R>
467 requires std::is_same_v<std::ranges::range_value_t<R>,
nullable<T>>
468 arrow_proxy variable_size_binary_array_impl<T, CR, OT>::create_proxy(
470 std::optional<std::string_view> name,
471 std::optional<std::string_view> metadata
475 const auto values = range
476 | std::views::transform(
482 const auto is_non_null = range
483 | std::views::transform(
486 return v.has_value();
489 return self_type::create_proxy(values, is_non_null, std::move(name), std::move(metadata));
492 template <std::ranges::sized_range T,
class CR, layout_offset OT>
493 auto variable_size_binary_array_impl<T, CR, OT>::data(size_type i) -> data_iterator
497 return proxy.buffers()[DATA_BUFFER_INDEX].template data<data_value_type>() + i;
500 template <std::ranges::sized_range T,
class CR, layout_offset OT>
501 auto variable_size_binary_array_impl<T, CR, OT>::data(size_type i)
const -> const_data_iterator
503 const arrow_proxy& proxy = this->get_arrow_proxy();
505 return proxy.buffers()[DATA_BUFFER_INDEX].template data<const data_value_type>() + i;
508 template <std::ranges::sized_range T,
class CR, layout_offset OT>
509 template <std::ranges::sized_range U>
511 void variable_size_binary_array_impl<T, CR, OT>::assign(U&& rhs, size_type index)
514 const auto offset_beg = *offset(index);
515 const auto offset_end = *offset(index + 1);
516 const auto initial_value_length = offset_end - offset_beg;
517 const auto new_value_length =
static_cast<OT
>(std::ranges::size(rhs));
518 const OT shift_byte_count = new_value_length - initial_value_length;
519 auto& data_buffer = this->get_arrow_proxy().get_array_private_data()->buffers()[DATA_BUFFER_INDEX];
520 if (shift_byte_count != 0)
522 const auto shift_val_abs =
static_cast<size_type
>(std::abs(shift_byte_count));
523 const auto new_data_buffer_size = shift_byte_count < 0 ? data_buffer.size() - shift_val_abs
524 : data_buffer.size() + shift_val_abs;
526 if (shift_byte_count > 0)
528 data_buffer.resize(new_data_buffer_size);
531 data_buffer.begin() + offset_end,
532 data_buffer.end() - shift_byte_count,
539 data_buffer.begin() + offset_end,
541 data_buffer.begin() + offset_end + shift_byte_count
543 data_buffer.resize(new_data_buffer_size);
549 [shift_byte_count](
auto& offset)
551 offset += shift_byte_count;
555 auto tmp = std::views::transform(
559 return static_cast<std::uint8_t
>(val);
563 std::copy(std::ranges::begin(tmp), std::ranges::end(tmp), data_buffer.begin() + offset_beg);
566 template <std::ranges::sized_range T,
class CR, layout_offset OT>
567 auto variable_size_binary_array_impl<T, CR, OT>::offset(size_type i) -> offset_iterator
570 return get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].template data<OT>()
571 +
static_cast<size_type
>(this->get_arrow_proxy().offset()) + i;
574 template <std::ranges::sized_range T,
class CR, layout_offset OT>
575 auto variable_size_binary_array_impl<T, CR, OT>::offset(size_type i)
const -> const_offset_iterator
578 return this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].template data<OT>()
579 +
static_cast<size_type
>(this->get_arrow_proxy().offset()) + i;
582 template <std::ranges::sized_range T,
class CR, layout_offset OT>
583 auto variable_size_binary_array_impl<T, CR, OT>::offsets_begin() -> offset_iterator
588 template <std::ranges::sized_range T,
class CR, layout_offset OT>
589 auto variable_size_binary_array_impl<T, CR, OT>::offsets_cbegin() const -> const_offset_iterator
594 template <std::ranges::sized_range T,
class CR, layout_offset OT>
595 auto variable_size_binary_array_impl<T, CR, OT>::offsets_end() -> offset_iterator
597 return offset(
size() + 1);
600 template <std::ranges::sized_range T,
class CR, layout_offset OT>
601 auto variable_size_binary_array_impl<T, CR, OT>::offsets_cend() const -> const_offset_iterator
603 return offset(
size() + 1);
606 template <std::ranges::sized_range T,
class CR, layout_offset OT>
613 template <std::ranges::sized_range T,
class CR, layout_offset OT>
617 const OT offset_begin = *offset(i);
619 const OT offset_end = *offset(i + 1);
626 template <std::ranges::sized_range T,
class CR, layout_offset OT>
627 auto variable_size_binary_array_impl<T, CR, OT>::value_begin() -> value_iterator
629 return value_iterator{
this, 0};
632 template <std::ranges::sized_range T,
class CR, layout_offset OT>
633 auto variable_size_binary_array_impl<T, CR, OT>::value_end() -> value_iterator
638 template <std::ranges::sized_range T,
class CR, layout_offset OT>
639 auto variable_size_binary_array_impl<T, CR, OT>::value_cbegin() const -> const_value_iterator
641 return const_value_iterator{
this, 0};
644 template <std::ranges::sized_range T,
class CR, layout_offset OT>
645 auto variable_size_binary_array_impl<T, CR, OT>::value_cend() const -> const_value_iterator
650 template <std::ranges::sized_range T,
class CR, layout_offset OT>
651 template <std::ranges::sized_range U>
653 void variable_size_binary_array_impl<T, CR, OT>::resize_values(size_type new_length, U value)
655 const size_t new_size = new_length +
static_cast<size_t>(this->get_arrow_proxy().offset());
656 auto& buffers = this->get_arrow_proxy().get_array_private_data()->buffers();
657 if (new_length <
size())
659 const auto offset_begin =
static_cast<size_t>(*offset(new_length));
660 auto& data_buffer = buffers[DATA_BUFFER_INDEX];
661 data_buffer.resize(offset_begin);
662 auto& offset_buffer = buffers[OFFSET_BUFFER_INDEX];
664 offset_buffer_adaptor.resize(new_size + 1);
666 else if (new_length >
size())
668 insert_value(value_cend(), value, new_length -
size());
672 template <std::ranges::sized_range T,
class CR, layout_offset OT>
673 template <std::ranges::sized_range U>
679 const auto idx =
static_cast<size_t>(std::distance(value_cbegin(), pos));
680 const OT offset_begin = *offset(idx);
681 const std::vector<uint8_t> casted_value{
value.cbegin(),
value.cend()};
683 const auto joined_repeated_value_range = std::ranges::views::join(my_repeat_view);
684 auto& data_buffer = this->get_arrow_proxy().get_array_private_data()->buffers()[DATA_BUFFER_INDEX];
685 const auto pos_to_insert =
sparrow::next(data_buffer.cbegin(), offset_begin);
686 data_buffer.insert(pos_to_insert, joined_repeated_value_range.begin(), joined_repeated_value_range.end());
687 insert_offset(offsets_cbegin() + idx + 1,
static_cast<offset_type>(
value.size()), count);
691 template <std::ranges::sized_range T,
class CR, layout_offset OT>
692 auto variable_size_binary_array_impl<T, CR, OT>::insert_offset(
693 const_offset_iterator pos,
694 offset_type value_size,
698 auto& offset_buffer = get_arrow_proxy().get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
699 const auto idx =
static_cast<size_t>(std::distance(offsets_cbegin(), pos));
701 const offset_type cumulative_size = value_size *
static_cast<offset_type
>(count);
705 offset_buffer_adaptor.end(),
706 [cumulative_size](
auto& offset)
708 offset += cumulative_size;
711 offset_buffer_adaptor.insert(
sparrow::next(offset_buffer_adaptor.cbegin(), idx + 1), count, 0);
713 for (
size_t i = idx + 1; i < idx + 1 + count; ++i)
715 offset_buffer_adaptor[i] = offset_buffer_adaptor[i - 1] + value_size;
717 return offsets_begin() + idx;
720 template <std::ranges::sized_range T,
class CR, layout_offset OT>
721 template <mpl::iterator_of_type<T> InputIt>
723 variable_size_binary_array_impl<T, CR, OT>::insert_values(
const_value_iterator pos, InputIt first, InputIt last)
726 auto& data_buffer = get_arrow_proxy().get_array_private_data()->buffers()[DATA_BUFFER_INDEX];
728 auto values = std::ranges::subrange(first, last);
729 const size_t cumulative_sizes = std::accumulate(
733 [](
size_t acc,
const T&
value)
735 return acc + value.size();
738 data_buffer_adaptor.resize(data_buffer_adaptor.size() + cumulative_sizes);
739 const auto idx =
static_cast<size_t>(std::distance(value_cbegin(), pos));
740 const OT offset_begin = *offset(idx);
741 auto insert_pos =
sparrow::next(data_buffer_adaptor.begin(), offset_begin);
747 data_buffer_adaptor.end()
750 for (
const T&
value : values)
752 std::copy(
value.begin(),
value.end(), insert_pos);
753 std::advance(insert_pos,
value.size());
756 const auto sizes_of_each_value = std::ranges::views::transform(
763 insert_offsets(offset(idx + 1), sizes_of_each_value.begin(), sizes_of_each_value.end());
767 template <std::ranges::sized_range T,
class CR, layout_offset OT>
768 template <mpl::iterator_of_type<OT> InputIt>
769 auto variable_size_binary_array_impl<T, CR, OT>::insert_offsets(
778 auto& offset_buffer = get_arrow_proxy().get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
780 const auto idx = std::distance(offsets_cbegin(), pos);
781 const OT cumulative_sizes = std::reduce(first_sizes, last_sizes, OT(0));
782 const auto sizes_count = std::distance(first_sizes, last_sizes);
783 offset_buffer_adaptor.resize(offset_buffer_adaptor.size() +
static_cast<size_t>(sizes_count));
786 offset_buffer_adaptor.begin() + idx,
787 offset_buffer_adaptor.end() - sizes_count,
788 offset_buffer_adaptor.end()
792 offset_buffer_adaptor.begin() + idx + sizes_count,
793 offset_buffer_adaptor.end(),
794 [cumulative_sizes](
auto& offset)
796 offset += cumulative_sizes;
800 InputIt it = first_sizes;
801 for (
size_t i =
static_cast<size_t>(idx + 1); i < static_cast<size_t>(idx + sizes_count + 1); ++i)
803 offset_buffer_adaptor[i] = offset_buffer_adaptor[i - 1] + *it;
806 return offset(
static_cast<size_t>(idx));
809 template <std::ranges::sized_range T,
class CR, layout_offset OT>
810 auto variable_size_binary_array_impl<T, CR, OT>::erase_values(const_value_iterator pos, size_type count)
815 const size_t index =
static_cast<size_t>(std::distance(value_cbegin(), pos));
820 auto& data_buffer = get_arrow_proxy().get_array_private_data()->buffers()[DATA_BUFFER_INDEX];
821 const auto offset_begin = *offset(index);
822 const auto offset_end = *offset(index + count);
823 const size_t difference =
static_cast<size_t>(offset_end - offset_begin);
825 std::move(data_buffer.begin() + offset_end, data_buffer.end(), data_buffer.begin() + offset_begin);
826 data_buffer.resize(data_buffer.size() - difference);
828 erase_offsets(offset(index), count);
832 template <std::ranges::sized_range T,
class CR, layout_offset OT>
833 auto variable_size_binary_array_impl<T, CR, OT>::erase_offsets(const_offset_iterator pos, size_type count)
838 const size_t index =
static_cast<size_t>(std::distance(offsets_cbegin(), pos));
841 return offset(index);
843 auto& offset_buffer = get_arrow_proxy().get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
845 const OT offset_start_value = *offset(index);
846 const OT offset_end_value = *offset(index + count);
847 const OT difference = offset_end_value - offset_start_value;
850 sparrow::next(offset_buffer_adaptor.begin(), index + count + 1),
851 offset_buffer_adaptor.end(),
854 offset_buffer_adaptor.resize(offset_buffer_adaptor.size() - count);
858 offset_buffer_adaptor.end(),
859 [difference](OT& offset)
861 offset -= difference;
864 return offset(index);
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
typename base_type::difference_type difference_type
Proxy class over ArrowArray and ArrowSchema.
bitset_iterator< self_type, false > iterator
constexpr size_type null_count() const noexcept
bitset_iterator< self_type, true > const_iterator
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
A view that repeats a value a given number of times.
typename base_type::bitmap_type bitmap_type
static auto offset_from_sizes(SIZES_RANGE &&sizes) -> offset_buffer_type
typename inner_types::const_value_iterator const_value_iterator
typename inner_types::const_offset_iterator const_offset_iterator
typename base_type::bitmap_reference bitmap_reference
mutable_array_bitmap_base< self_type > base_type
typename inner_types::inner_value_type inner_value_type
typename base_type::const_bitmap_range const_bitmap_range
inner_reference value(size_type i)
typename inner_types::offset_type offset_type
typename inner_types::offset_iterator offset_iterator
typename inner_types::const_data_iterator const_data_iterator
u8_buffer< char > char_buffer_type
typename base_type::iterator_tag iterator_tag
array_inner_types< self_type > inner_types
typename inner_types::inner_reference inner_reference
typename base_type::size_type size_type
inner_const_reference value(size_type i) const
typename base_type::difference_type difference_type
nullable< inner_reference, bitmap_reference > reference
variable_size_binary_array_impl(ARGS &&... args)
u8_buffer< std::byte > byte_buffer_type
typename inner_types::value_iterator value_iterator
u8_buffer< std::uint8_t > uint8_buffer_type
variable_size_binary_array_impl< T, std::string_view, OT > self_type
nullable< inner_const_reference, bitmap_const_reference > const_reference
variable_size_binary_array_impl(arrow_proxy)
typename base_type::bitmap_const_reference bitmap_const_reference
typename inner_types::data_iterator data_iterator
typename inner_types::data_value_type data_value_type
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
typename inner_types::inner_const_reference inner_const_reference
nullable< inner_value_type > value_type
Implementation of reference to inner type used for layout L.
Iterator over the data values of a variable size binary layout.
Concept for iterator types.
Matches range types From whose elements are convertible to elements of range type To.
#define SPARROW_ASSERT_TRUE(expr__)
sparrow::u8_buffer< OFFSET_TYPE > offset_buffer_from_sizes(SIZES_RANGE &&sizes)
constexpr std::size_t size(typelist< T... >={})
constexpr bool excludes_copy_and_move_ctor_v
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient typedef to be used as a crtp base class for arrays using a mutable validity buffer.
constexpr bool is_big_string_array_v
Checks whether T is a big_string_array type.
ArrowSchema make_arrow_schema(F format, N name, M metadata, std::optional< ArrowFlag > flags, int64_t n_children, ArrowSchema **children, ArrowSchema *dictionary)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_string_array_v
Checks whether T is a string_array type.
variable_size_binary_array_impl< binary_traits::value_type, binary_traits::const_reference, std::int32_t > binary_array
constexpr InputIt next(InputIt it, Distance n)
variable_size_binary_array_impl< std::string, std::string_view, std::int32_t > string_array
constexpr bool is_big_binary_array_v
Checks whether T is a big_binary_array type.
dynamic_bitset< std::uint8_t > validity_bitmap
constexpr bool is_binary_array_v
Checks whether T is a binary_array type.
auto make_buffer_adaptor(FromBufferRef &buf)
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
variable_size_binary_array_impl< std::string, std::string_view, std::int64_t > big_string_array
variable_size_binary_array_impl< binary_traits::value_type, binary_traits::const_reference, std::int64_t > big_binary_array
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, size_t n_children, ArrowArray **children, ArrowArray *dictionary)
Creates an ArrowArray.
arrow_traits< std::vector< byte_t > > binary_traits
array_inner_types< variable_size_binary_array_impl< T, CR, OT > >::iterator_tag iterator_tag
const_data_iterator value_iterator
inner_value_type value_type
inner_const_reference reference
const_bitmap_iterator bitmap_iterator
bitmap_type::iterator bitmap_iterator
array_inner_types< variable_size_binary_array_impl< T, CR, OT > >::iterator_tag iterator_tag
inner_value_type value_type
data_iterator value_iterator
inner_reference reference
variable_size_binary_reference< array_type > inner_reference
variable_size_binary_array_impl< T, CR, OT > array_type
std::random_access_iterator_tag iterator_tag
variable_size_binary_value_iterator< array_type, iterator_types > value_iterator
bitmap_type::const_iterator const_bitmap_iterator
const data_value_type * const_data_iterator
data_value_type * data_iterator
const OT * const_offset_iterator
typename T::value_type data_value_type
variable_size_binary_value_iterator< array_type, const_iterator_types > const_value_iterator
Base class for array_inner_types specialization.
Traits class that must be specialized by array classes inheriting from array_crtp_base.
Provides compile-time information about Arrow data types.
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()