39 template <std::ranges::sized_range T,
class CR>
85 template <std::ranges::sized_range T,
class CR>
104 template <std::ranges::sized_range T,
class CR>
154 template <std::ranges::sized_range T,
class CR>
217 template <
class... Args>
246 [[nodiscard]]
static constexpr std::string_view get_arrow_format()
248 return std::is_same_v<T, arrow_traits<std::string>::value_type> ? std::string_view(
"vu")
249 :
std::string_view(
"vz");
261 template <input_metadata_container METADATA_RANGE>
262 [[nodiscard]]
static ArrowSchema create_arrow_schema(
263 std::optional<std::string_view> name,
264 std::optional<METADATA_RANGE> metadata,
265 std::optional<std::unordered_set<sparrow::ArrowFlag>> flags
268 constexpr repeat_view<bool> children_ownership(
true, 0);
299 template <std::ranges::input_range R>
300 requires std::convertible_to<std::ranges::range_value_t<R>, T>
301 static buffers create_buffers(R&& range);
322 template <std::ranges::input_range R, val
idity_bitmap_input VB = val
idity_bitmap, input_metadata_container METADATA_RANGE>
323 requires std::convertible_to<std::ranges::range_value_t<R>, T>
324 [[nodiscard]]
static arrow_proxy create_proxy(
327 std::optional<std::string_view> name = std::nullopt,
328 std::optional<METADATA_RANGE> metadata = std::nullopt
348 template <std::ranges::input_range NULLABLE_RANGE, input_metadata_container METADATA_RANGE>
349 requires std::convertible_to<std::ranges::range_value_t<NULLABLE_RANGE>, nullable<T>>
350 [[nodiscard]]
static arrow_proxy create_proxy(
351 NULLABLE_RANGE&& nullable_range,
352 std::optional<std::string_view> name = std::nullopt,
353 std::optional<METADATA_RANGE> metadata = std::nullopt
372 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
373 requires std::convertible_to<std::ranges::range_value_t<R>, T>
374 [[nodiscard]]
static arrow_proxy create_proxy(
377 std::optional<std::string_view> name = std::nullopt,
378 std::optional<METADATA_RANGE> metadata = std::nullopt
400 template <std::ranges::input_range VALUE_BUFFERS_RANGE, val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
401 requires std::convertible_to<std::ranges::range_value_t<VALUE_BUFFERS_RANGE>, u8_buffer<uint8_t>>
402 [[nodiscard]]
static arrow_proxy create_proxy(
403 size_t element_count,
404 u8_buffer<uint8_t>&& buffer_view,
405 VALUE_BUFFERS_RANGE&& value_buffers,
407 std::optional<std::string_view> name = std::nullopt,
408 std::optional<METADATA_RANGE> metadata = std::nullopt
476 static constexpr size_type LENGTH_BUFFER_INDEX = 1;
477 static constexpr std::size_t DATA_BUFFER_SIZE = 16;
478 static constexpr std::size_t SHORT_STRING_SIZE = 12;
479 static constexpr std::size_t PREFIX_SIZE = 4;
480 static constexpr std::ptrdiff_t PREFIX_OFFSET = 4;
481 static constexpr std::ptrdiff_t SHORT_STRING_OFFSET = 4;
482 static constexpr std::ptrdiff_t BUFFER_INDEX_OFFSET = 8;
483 static constexpr std::ptrdiff_t BUFFER_OFFSET_OFFSET = 12;
484 static constexpr std::size_t FIRST_VAR_DATA_BUFFER_INDEX = 2;
493 template <std::ranges::sized_range T,
class CR>
499 template <std::ranges::sized_range T,
class CR>
500 template <std::ranges::input_range R>
501 requires std::convertible_to<std::ranges::range_value_t<R>, T>
502 auto variable_size_binary_view_array_impl<T, CR>::create_buffers(R&& range) -> buffers
505# pragma GCC diagnostic push
506# pragma GCC diagnostic ignored "-Wcast-align"
510 auto to_uint8 = [](
const auto& v)
512 return static_cast<std::uint8_t
>(v);
518 std::size_t long_string_storage_size = 0;
520 for (
auto&& val : range)
522 auto val_casted = val | std::ranges::views::transform(to_uint8);
524 const auto length = val.size();
525 auto length_ptr = length_buffer.
data() + (i * DATA_BUFFER_SIZE);
528 *
reinterpret_cast<std::int32_t*
>(length_ptr) =
static_cast<std::int32_t
>(length);
530 if (length <= SHORT_STRING_SIZE)
535 length_ptr + SHORT_STRING_OFFSET + length,
536 length_ptr + DATA_BUFFER_SIZE,
543 auto prefix_sub_range = val_casted | std::ranges::views::take(PREFIX_SIZE);
547 *
reinterpret_cast<std::int32_t*
>(length_ptr + BUFFER_INDEX_OFFSET) = 0;
550 *
reinterpret_cast<std::int32_t*
>(
551 length_ptr + BUFFER_OFFSET_OFFSET
552 ) =
static_cast<std::int32_t
>(long_string_storage_size);
555 long_string_storage_size += length;
562 std::size_t long_string_storage_offset = 0;
563 for (
auto&& val : range)
565 const auto length = val.size();
566 if (length > SHORT_STRING_SIZE)
568 auto val_casted = val | std::ranges::views::transform(to_uint8);
571 long_string_storage_offset += length;
580 static_cast<std::size_t
>(1),
581 static_cast<int64_t
>(long_string_storage_size)
584 return {std::move(length_buffer), std::move(long_string_storage), std::move(buffer_sizes)};
587# pragma GCC diagnostic pop
591 template <std::ranges::sized_range T,
class CR>
592 template <std::ranges::input_range R, val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
593 requires std::convertible_to<std::ranges::range_value_t<R>, T>
594 arrow_proxy variable_size_binary_view_array_impl<T, CR>::create_proxy(
597 std::optional<std::string_view> name,
598 std::optional<METADATA_RANGE> metadata
605 static const std::optional<std::unordered_set<sparrow::ArrowFlag>> flags{{
ArrowFlag::NULLABLE}};
608 ArrowSchema schema = create_arrow_schema(std::move(name), std::move(metadata), flags);
611 auto buffers_parts = create_buffers(std::forward<R>(range));
613 std::vector<buffer<uint8_t>> buffers{
614 std::move(vbitmap).extract_storage(),
615 std::move(buffers_parts.length_buffer),
616 std::move(buffers_parts.long_string_storage),
617 std::move(buffers_parts.buffer_sizes).extract_storage()
624 static_cast<std::int64_t
>(size),
625 static_cast<int64_t
>(null_count),
634 return arrow_proxy{std::move(arr), std::move(schema)};
637 template <std::ranges::sized_range T,
class CR>
638 template <std::ranges::input_range NULLABLE_RANGE, input_metadata_container METADATA_RANGE>
639 requires std::convertible_to<std::ranges::range_value_t<NULLABLE_RANGE>,
nullable<T>>
640 [[nodiscard]]
arrow_proxy variable_size_binary_view_array_impl<T, CR>::create_proxy(
641 NULLABLE_RANGE&& nullable_range,
642 std::optional<std::string_view> name,
643 std::optional<METADATA_RANGE> metadata
646 auto values = nullable_range
647 | std::views::transform(
650 return static_cast<T
>(v.value());
654 auto is_non_null = nullable_range
655 | std::views::transform(
658 return v.has_value();
663 std::forward<
decltype(values)>(values),
664 std::forward<
decltype(is_non_null)>(is_non_null),
670 template <std::ranges::sized_range T,
class CR>
671 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
672 requires std::convertible_to<std::ranges::range_value_t<R>, T>
673 [[nodiscard]]
arrow_proxy variable_size_binary_view_array_impl<T, CR>::create_proxy(
676 std::optional<std::string_view> name,
677 std::optional<METADATA_RANGE> metadata
682 return create_proxy(std::forward<R>(range),
validity_bitmap{}, std::move(name), std::move(metadata));
686 ArrowSchema schema = create_arrow_schema(std::move(name), std::move(metadata), std::nullopt);
689 auto buffers_parts = create_buffers(std::forward<R>(range));
691 std::vector<buffer<uint8_t>> buffers{
693 std::move(buffers_parts.length_buffer),
694 std::move(buffers_parts.long_string_storage),
695 std::move(buffers_parts.buffer_sizes).extract_storage()
703 static_cast<std::int64_t
>(size),
704 static_cast<int64_t
>(0),
713 return arrow_proxy{std::move(arr), std::move(schema)};
716 template <std::ranges::sized_range T,
class CR>
717 template <std::ranges::input_range VALUE_BUFFERS_RANGE, val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
718 requires std::convertible_to<std::ranges::range_value_t<VALUE_BUFFERS_RANGE>,
u8_buffer<uint8_t>>
719 arrow_proxy variable_size_binary_view_array_impl<T, CR>::create_proxy(
720 size_t element_count,
722 VALUE_BUFFERS_RANGE&& value_buffers,
724 std::optional<std::string_view> name,
725 std::optional<METADATA_RANGE> metadata
731 static const std::optional<std::unordered_set<sparrow::ArrowFlag>> flags{{
ArrowFlag::NULLABLE}};
733 ArrowSchema schema = create_arrow_schema(std::move(name), std::move(metadata), flags);
736 std::vector<buffer<uint8_t>> buffers{std::move(bitmap).extract_storage(), std::move(
buffer_view)};
737 for (
auto&& buf : value_buffers)
739 buffers.push_back(std::forward<
decltype(buf)>(buf));
744 for (std::size_t i = 0; i < value_buffers.size(); ++i)
746 buffer_sizes[i] =
static_cast<int64_t
>(value_buffers[i].size());
748 buffers.push_back(std::move(buffer_sizes).extract_storage());
753 static_cast<std::int64_t
>(size),
754 static_cast<std::int64_t
>(bitmap.null_count()),
763 return arrow_proxy{std::move(arr), std::move(schema)};
766 template <std::ranges::sized_range T,
class CR>
767 constexpr auto variable_size_binary_view_array_impl<T, CR>::value(size_type i) -> inner_reference
769 return static_cast<const self_type*
>(
this)->value(i);
772 template <std::ranges::sized_range T,
class CR>
773 constexpr auto variable_size_binary_view_array_impl<T, CR>::value(size_type i)
const
774 -> inner_const_reference
777# pragma GCC diagnostic push
778# pragma GCC diagnostic ignored "-Wcast-align"
782 using char_or_byte =
typename inner_const_reference::value_type;
784 auto data_ptr = this->get_arrow_proxy().buffers()[LENGTH_BUFFER_INDEX].template data<uint8_t>()
785 + (i * DATA_BUFFER_SIZE);
786 const auto length =
static_cast<std::size_t
>(*
reinterpret_cast<const std::int32_t*
>(data_ptr));
788 if (length <= SHORT_STRING_SIZE)
790 constexpr std::ptrdiff_t data_offset = 4;
791 const auto ptr =
reinterpret_cast<const char_or_byte*
>(data_ptr);
792 const auto ret = inner_const_reference(ptr + data_offset, length);
797 const auto buffer_index =
static_cast<std::size_t
>(
798 *
reinterpret_cast<const std::int32_t*
>(data_ptr + BUFFER_INDEX_OFFSET)
800 const auto buffer_offset =
static_cast<std::size_t
>(
801 *
reinterpret_cast<const std::int32_t*
>(data_ptr + BUFFER_OFFSET_OFFSET)
803 const auto buffer = this->get_arrow_proxy()
804 .buffers()[buffer_index + FIRST_VAR_DATA_BUFFER_INDEX]
805 .template data<const char_or_byte>();
806 return inner_const_reference(
buffer + buffer_offset, length);
810# pragma GCC diagnostic pop
814 template <std::ranges::sized_range T,
class CR>
815 constexpr auto variable_size_binary_view_array_impl<T, CR>::value_begin() -> value_iterator
820 template <std::ranges::sized_range T,
class CR>
821 constexpr auto variable_size_binary_view_array_impl<T, CR>::value_end() -> value_iterator
826 template <std::ranges::sized_range T,
class CR>
827 constexpr auto variable_size_binary_view_array_impl<T, CR>::value_cbegin() const -> const_value_iterator
832 template <std::ranges::sized_range T,
class CR>
833 constexpr auto variable_size_binary_view_array_impl<T, CR>::value_cend() const -> const_value_iterator
835 return const_value_iterator(
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::bitmap_iterator bitmap_iterator
typename base_type::iterator_tag iterator_tag
typename base_type::const_bitmap_iterator const_bitmap_iterator
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
typename base_type::difference_type difference_type
constexpr size_type size() const noexcept
Object that owns a piece of contiguous memory.
constexpr U * data() noexcept
constexpr size_type null_count() const noexcept
Returns the number of bits set to false (null/invalid).
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::bitmap_type bitmap_type
typename base_type::const_value_iterator const_value_iterator
typename base_type::bitmap_iterator bitmap_iterator
nullable< inner_const_reference, bitmap_const_reference > const_reference
nullable< inner_reference, bitmap_reference > reference
variable_size_binary_view_array_impl(arrow_proxy)
Constructs variable-size binary view array from Arrow proxy.
typename base_type::size_type size_type
variable_size_binary_view_array_impl(Args &&... args)
Generic constructor for creating variable-size binary view array.
variable_size_binary_view_array_impl< T, arrow_traits< std::string >::const_reference > self_type
typename base_type::iterator_tag iterator_tag
typename inner_types::inner_reference inner_reference
typename base_type::value_iterator value_iterator
typename inner_types::inner_value_type inner_value_type
mutable_array_bitmap_base< self_type > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename inner_types::inner_const_reference inner_const_reference
typename base_type::difference_type difference_type
typename base_type::const_bitmap_iterator const_bitmap_iterator
typename base_type::const_iterator const_iterator
typename base_type::iterator iterator
typename base_type::bitmap_range bitmap_range
array_inner_types< self_type > inner_types
nullable< inner_value_type > value_type
typename base_type::bitmap_reference bitmap_reference
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
constexpr std::ranges::copy_result< std::ranges::borrowed_iterator_t< R >, O > copy(R &&r, O result)
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient alias for arrays with mutable validity bitmaps.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_variable_size_binary_view_array
Checks whether T is a variable_size_binary_view_array_impl type.
constexpr std::size_t range_size(R &&r)
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
variable_size_binary_view_array_impl< arrow_traits< std::string >::value_type, arrow_traits< std::string >::const_reference > string_view_array
A variable-size string view layout implementation.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
variable_size_binary_view_array_impl< arrow_traits< std::vector< byte_t > >::value_type, arrow_traits< std::vector< byte_t > >::const_reference > binary_view_array
A variable-size binary view layout implementation.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
functor_index_iterator< detail::layout_value_functor< array_type, inner_reference > > value_iterator
functor_index_iterator< detail::layout_value_functor< const array_type, inner_const_reference > > const_value_iterator
variable_size_binary_view_array_impl< T, CR > array_type
std::random_access_iterator_tag iterator_tag
inner_reference inner_const_reference
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Provides compile-time information about Arrow data types.
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()
Metafunction for retrieving the data_type of a typed array.