39 template <std::ranges::sized_range T,
class CR>
85 template <std::ranges::sized_range T,
class CR>
104 template <std::ranges::sized_range T,
class CR>
154 template <std::ranges::sized_range T,
class CR>
217 template <
class... Args>
259 template <std::ranges::input_range R>
260 requires std::convertible_to<std::ranges::range_value_t<R>, T>
261 static buffers create_buffers(R&& range);
282 template <std::ranges::input_range R, val
idity_bitmap_input VB = val
idity_bitmap, input_metadata_container METADATA_RANGE>
283 requires std::convertible_to<std::ranges::range_value_t<R>, T>
284 [[nodiscard]]
static arrow_proxy create_proxy(
287 std::optional<std::string_view> name = std::nullopt,
288 std::optional<METADATA_RANGE> metadata = std::nullopt
308 template <std::ranges::input_range NULLABLE_RANGE, input_metadata_container METADATA_RANGE>
309 requires std::convertible_to<std::ranges::range_value_t<NULLABLE_RANGE>, nullable<T>>
310 [[nodiscard]]
static arrow_proxy create_proxy(
311 NULLABLE_RANGE&& nullable_range,
312 std::optional<std::string_view> name = std::nullopt,
313 std::optional<METADATA_RANGE> metadata = std::nullopt
332 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
333 requires std::convertible_to<std::ranges::range_value_t<R>, T>
334 [[nodiscard]]
static arrow_proxy create_proxy(
337 std::optional<std::string_view> name = std::nullopt,
338 std::optional<METADATA_RANGE> metadata = std::nullopt
360 template <std::ranges::input_range VALUE_BUFFERS_RANGE, val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
361 requires std::convertible_to<std::ranges::range_value_t<VALUE_BUFFERS_RANGE>, u8_buffer<uint8_t>>
362 [[nodiscard]]
static arrow_proxy create_proxy(
363 size_t element_count,
364 u8_buffer<uint8_t>&& buffer_view,
365 VALUE_BUFFERS_RANGE&& value_buffers,
367 std::optional<std::string_view> name = std::nullopt,
368 std::optional<METADATA_RANGE> metadata = std::nullopt
436 static constexpr size_type LENGTH_BUFFER_INDEX = 1;
437 static constexpr std::size_t DATA_BUFFER_SIZE = 16;
438 static constexpr std::size_t SHORT_STRING_SIZE = 12;
439 static constexpr std::size_t PREFIX_SIZE = 4;
440 static constexpr std::ptrdiff_t PREFIX_OFFSET = 4;
441 static constexpr std::ptrdiff_t SHORT_STRING_OFFSET = 4;
442 static constexpr std::ptrdiff_t BUFFER_INDEX_OFFSET = 8;
443 static constexpr std::ptrdiff_t BUFFER_OFFSET_OFFSET = 12;
444 static constexpr std::size_t FIRST_VAR_DATA_BUFFER_INDEX = 2;
453 template <std::ranges::sized_range T,
class CR>
459 template <std::ranges::sized_range T,
class CR>
460 template <std::ranges::input_range R>
461 requires std::convertible_to<std::ranges::range_value_t<R>, T>
462 auto variable_size_binary_view_array_impl<T, CR>::create_buffers(R&& range) -> buffers
465# pragma GCC diagnostic push
466# pragma GCC diagnostic ignored "-Wcast-align"
472 std::size_t long_string_storage_size = 0;
474 for (
auto&& val : range)
476 auto val_casted = val
477 | std::ranges::views::transform(
480 return static_cast<std::uint8_t
>(v);
484 const auto length = val.size();
485 auto length_ptr = length_buffer.
data() + (i * DATA_BUFFER_SIZE);
488 *
reinterpret_cast<std::int32_t*
>(length_ptr) =
static_cast<std::int32_t
>(length);
490 if (length <= SHORT_STRING_SIZE)
495 length_ptr + SHORT_STRING_OFFSET + length,
496 length_ptr + DATA_BUFFER_SIZE,
503 auto prefix_sub_range = val_casted | std::ranges::views::take(PREFIX_SIZE);
507 *
reinterpret_cast<std::int32_t*
>(length_ptr + BUFFER_INDEX_OFFSET) = 0;
510 *
reinterpret_cast<std::int32_t*
>(
511 length_ptr + BUFFER_OFFSET_OFFSET
512 ) =
static_cast<std::int32_t
>(long_string_storage_size);
515 long_string_storage_size += length;
522 std::size_t long_string_storage_offset = 0;
523 for (
auto&& val : range)
525 const auto length = val.size();
526 if (length > SHORT_STRING_SIZE)
528 auto val_casted = val
529 | std::ranges::views::transform(
532 return static_cast<std::uint8_t
>(v);
537 long_string_storage_offset += length;
546 static_cast<std::size_t
>(1),
547 static_cast<int64_t
>(long_string_storage_size)
550 return {std::move(length_buffer), std::move(long_string_storage), std::move(buffer_sizes)};
553# pragma GCC diagnostic pop
557 template <std::ranges::sized_range T,
class CR>
558 template <std::ranges::input_range R, val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
559 requires std::convertible_to<std::ranges::range_value_t<R>, T>
560 arrow_proxy variable_size_binary_view_array_impl<T, CR>::create_proxy(
563 std::optional<std::string_view> name,
564 std::optional<METADATA_RANGE> metadata
573 static const std::optional<std::unordered_set<sparrow::ArrowFlag>> flags{{
ArrowFlag::NULLABLE}};
578 : std::string_view(
"vz"),
589 auto buffers_parts = create_buffers(std::forward<R>(range));
591 std::vector<buffer<uint8_t>> buffers{
592 std::move(vbitmap).extract_storage(),
593 std::move(buffers_parts.length_buffer),
594 std::move(buffers_parts.long_string_storage),
595 std::move(buffers_parts.buffer_sizes).extract_storage()
600 static_cast<std::int64_t
>(size),
601 static_cast<int64_t
>(null_count),
610 return arrow_proxy{std::move(arr), std::move(schema)};
613 template <std::ranges::sized_range T,
class CR>
614 template <std::ranges::input_range NULLABLE_RANGE, input_metadata_container METADATA_RANGE>
615 requires std::convertible_to<std::ranges::range_value_t<NULLABLE_RANGE>,
nullable<T>>
616 [[nodiscard]]
arrow_proxy variable_size_binary_view_array_impl<T, CR>::create_proxy(
617 NULLABLE_RANGE&& nullable_range,
618 std::optional<std::string_view> name,
619 std::optional<METADATA_RANGE> metadata
622 auto values = nullable_range
623 | std::views::transform(
626 return static_cast<T
>(v.value());
630 auto is_non_null = nullable_range
631 | std::views::transform(
634 return v.has_value();
639 std::forward<
decltype(values)>(values),
640 std::forward<
decltype(is_non_null)>(is_non_null),
646 template <std::ranges::sized_range T,
class CR>
647 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
648 requires std::convertible_to<std::ranges::range_value_t<R>, T>
649 [[nodiscard]]
arrow_proxy variable_size_binary_view_array_impl<T, CR>::create_proxy(
652 std::optional<std::string_view> name,
653 std::optional<METADATA_RANGE> metadata
658 return create_proxy(std::forward<R>(range),
validity_bitmap{}, std::move(name), std::move(metadata));
666 : std::string_view(
"vz"),
677 auto buffers_parts = create_buffers(std::forward<R>(range));
679 std::vector<buffer<uint8_t>> buffers{
681 std::move(buffers_parts.length_buffer),
682 std::move(buffers_parts.long_string_storage),
683 std::move(buffers_parts.buffer_sizes).extract_storage()
689 static_cast<std::int64_t
>(size),
690 static_cast<int64_t
>(0),
699 return arrow_proxy{std::move(arr), std::move(schema)};
703 template <std::ranges::sized_range T,
class CR>
704 template <std::ranges::input_range VALUE_BUFFERS_RANGE, val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
705 requires std::convertible_to<std::ranges::range_value_t<VALUE_BUFFERS_RANGE>,
u8_buffer<uint8_t>>
706 arrow_proxy variable_size_binary_view_array_impl<T, CR>::create_proxy(
707 size_t element_count,
709 VALUE_BUFFERS_RANGE&& value_buffers,
711 std::optional<std::string_view> name,
712 std::optional<METADATA_RANGE> metadata
719 static const std::optional<std::unordered_set<sparrow::ArrowFlag>> flags{{
ArrowFlag::NULLABLE}};
724 : std::string_view(
"vz"),
735 std::vector<buffer<uint8_t>> buffers{std::move(bitmap).extract_storage(), std::move(
buffer_view)};
736 for (
auto&& buf : value_buffers)
738 buffers.push_back(std::forward<
decltype(buf)>(buf));
743 for (std::size_t i = 0; i < value_buffers.size(); ++i)
745 buffer_sizes[i] =
static_cast<int64_t
>(value_buffers[i].size());
747 buffers.push_back(std::move(buffer_sizes).extract_storage());
750 static_cast<std::int64_t
>(size),
751 static_cast<std::int64_t
>(bitmap.null_count()),
760 return arrow_proxy{std::move(arr), std::move(schema)};
763 template <std::ranges::sized_range T,
class CR>
764 constexpr auto variable_size_binary_view_array_impl<T, CR>::value(size_type i) -> inner_reference
766 return static_cast<const self_type*
>(
this)->value(i);
769 template <std::ranges::sized_range T,
class CR>
770 constexpr auto variable_size_binary_view_array_impl<T, CR>::value(size_type i)
const
771 -> inner_const_reference
774# pragma GCC diagnostic push
775# pragma GCC diagnostic ignored "-Wcast-align"
779 using char_or_byte =
typename inner_const_reference::value_type;
781 auto data_ptr = this->get_arrow_proxy().buffers()[LENGTH_BUFFER_INDEX].template data<uint8_t>()
782 + (i * DATA_BUFFER_SIZE);
783 const auto length =
static_cast<std::size_t
>(*
reinterpret_cast<const std::int32_t*
>(data_ptr));
785 if (length <= SHORT_STRING_SIZE)
787 constexpr std::ptrdiff_t data_offset = 4;
788 const auto ptr =
reinterpret_cast<const char_or_byte*
>(data_ptr);
789 const auto ret = inner_const_reference(ptr + data_offset, length);
794 const auto buffer_index =
static_cast<std::size_t
>(
795 *
reinterpret_cast<const std::int32_t*
>(data_ptr + BUFFER_INDEX_OFFSET)
797 const auto buffer_offset =
static_cast<std::size_t
>(
798 *
reinterpret_cast<const std::int32_t*
>(data_ptr + BUFFER_OFFSET_OFFSET)
800 const auto buffer = this->get_arrow_proxy()
801 .buffers()[buffer_index + FIRST_VAR_DATA_BUFFER_INDEX]
802 .template data<const char_or_byte>();
803 return inner_const_reference(
buffer + buffer_offset, length);
807# pragma GCC diagnostic pop
811 template <std::ranges::sized_range T,
class CR>
812 constexpr auto variable_size_binary_view_array_impl<T, CR>::value_begin() -> value_iterator
817 template <std::ranges::sized_range T,
class CR>
818 constexpr auto variable_size_binary_view_array_impl<T, CR>::value_end() -> value_iterator
823 template <std::ranges::sized_range T,
class CR>
824 constexpr auto variable_size_binary_view_array_impl<T, CR>::value_cbegin() const -> const_value_iterator
829 template <std::ranges::sized_range T,
class CR>
830 constexpr auto variable_size_binary_view_array_impl<T, CR>::value_cend() const -> const_value_iterator
832 return const_value_iterator(
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::bitmap_iterator bitmap_iterator
typename base_type::iterator_tag iterator_tag
typename base_type::const_bitmap_iterator const_bitmap_iterator
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
typename base_type::difference_type difference_type
constexpr size_type size() const noexcept
Object that owns a piece of contiguous memory.
constexpr U * data() noexcept
constexpr size_type null_count() const noexcept
Returns the number of bits set to false (null/invalid).
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::bitmap_type bitmap_type
typename base_type::const_value_iterator const_value_iterator
typename base_type::bitmap_iterator bitmap_iterator
nullable< inner_const_reference, bitmap_const_reference > const_reference
nullable< inner_reference, bitmap_reference > reference
variable_size_binary_view_array_impl(arrow_proxy)
Constructs variable-size binary view array from Arrow proxy.
typename base_type::size_type size_type
variable_size_binary_view_array_impl(Args &&... args)
Generic constructor for creating variable-size binary view array.
variable_size_binary_view_array_impl< T, arrow_traits< std::string >::const_reference > self_type
typename base_type::iterator_tag iterator_tag
typename inner_types::inner_reference inner_reference
typename base_type::value_iterator value_iterator
typename inner_types::inner_value_type inner_value_type
mutable_array_bitmap_base< self_type > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename inner_types::inner_const_reference inner_const_reference
typename base_type::difference_type difference_type
typename base_type::const_bitmap_iterator const_bitmap_iterator
typename base_type::const_iterator const_iterator
typename base_type::iterator iterator
typename base_type::bitmap_range bitmap_range
array_inner_types< self_type > inner_types
nullable< inner_value_type > value_type
typename base_type::bitmap_reference bitmap_reference
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
constexpr std::ranges::copy_result< std::ranges::borrowed_iterator_t< R >, O > copy(R &&r, O result)
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient alias for arrays with mutable validity bitmaps.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_variable_size_binary_view_array
Checks whether T is a variable_size_binary_view_array_impl type.
constexpr std::size_t range_size(R &&r)
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
variable_size_binary_view_array_impl< arrow_traits< std::string >::value_type, arrow_traits< std::string >::const_reference > string_view_array
A variable-size string view layout implementation.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
variable_size_binary_view_array_impl< arrow_traits< std::vector< byte_t > >::value_type, arrow_traits< std::vector< byte_t > >::const_reference > binary_view_array
A variable-size binary view layout implementation.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
functor_index_iterator< detail::layout_value_functor< array_type, inner_reference > > value_iterator
functor_index_iterator< detail::layout_value_functor< const array_type, inner_const_reference > > const_value_iterator
variable_size_binary_view_array_impl< T, CR > array_type
std::random_access_iterator_tag iterator_tag
inner_reference inner_const_reference
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Provides compile-time information about Arrow data types.
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()
Metafunction for retrieving the data_type of a typed array.