40 || std::is_same_v<T, sequence_view<const std::byte>>;
43 template <variable_size_binary_view_impl_types T>
85 template <variable_size_binary_view_impl_types T>
99 template <variable_size_binary_view_impl_types T>
104 template <variable_size_binary_view_impl_types T>
112 template <variable_size_binary_view_impl_types T>
115 template <variable_size_binary_view_impl_types T>
153 template <
class... Args>
169 template <std::ranges::input_range R>
170 requires std::convertible_to<std::ranges::range_value_t<R>, T>
171 static buffers create_buffers(R&& range);
173 template <std::ranges::input_range R, val
idity_bitmap_input VB = val
idity_bitmap, input_metadata_container METADATA_RANGE>
174 requires std::convertible_to<std::ranges::range_value_t<R>, T>
175 [[nodiscard]]
static arrow_proxy create_proxy(
178 std::optional<std::string_view> name = std::nullopt,
179 std::optional<METADATA_RANGE> metadata = std::nullopt
182 template <std::ranges::input_range NULLABLE_RANGE, input_metadata_container METADATA_RANGE>
183 requires std::convertible_to<std::ranges::range_value_t<NULLABLE_RANGE>, nullable<T>>
184 [[nodiscard]]
static arrow_proxy create_proxy(
185 NULLABLE_RANGE&& nullable_range,
186 std::optional<std::string_view> name = std::nullopt,
187 std::optional<METADATA_RANGE> metadata = std::nullopt
190 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
191 requires std::convertible_to<std::ranges::range_value_t<R>, T>
192 [[nodiscard]]
static arrow_proxy create_proxy(
195 std::optional<std::string_view> name = std::nullopt,
196 std::optional<METADATA_RANGE> metadata = std::nullopt
208 static constexpr size_type LENGTH_BUFFER_INDEX = 1;
209 static constexpr std::size_t DATA_BUFFER_SIZE = 16;
210 static constexpr std::size_t SHORT_STRING_SIZE = 12;
211 static constexpr std::size_t PREFIX_SIZE = 4;
212 static constexpr std::ptrdiff_t PREFIX_OFFSET = 4;
213 static constexpr std::ptrdiff_t SHORT_STRING_OFFSET = 4;
214 static constexpr std::ptrdiff_t BUFFER_INDEX_OFFSET = 8;
215 static constexpr std::ptrdiff_t BUFFER_OFFSET_OFFSET = 12;
216 static constexpr std::size_t FIRST_VAR_DATA_BUFFER_INDEX = 2;
224 template <variable_size_binary_view_impl_types T>
230 template <variable_size_binary_view_impl_types T>
231 template <std::ranges::input_range R>
232 requires std::convertible_to<std::ranges::range_value_t<R>, T>
233 auto variable_size_binary_view_array_impl<T>::create_buffers(R&& range) -> buffers
236# pragma GCC diagnostic push
237# pragma GCC diagnostic ignored "-Wcast-align"
243 std::size_t long_string_storage_size = 0;
245 for (
auto&& val : range)
247 auto val_casted = val
248 | std::ranges::views::transform(
251 return static_cast<std::uint8_t
>(v);
255 const auto length = val.size();
256 auto length_ptr = length_buffer.
data() + (i * DATA_BUFFER_SIZE);
259 *
reinterpret_cast<std::int32_t*
>(length_ptr) =
static_cast<std::int32_t
>(length);
261 if (length <= SHORT_STRING_SIZE)
269 auto prefix_sub_range = val_casted | std::ranges::views::take(PREFIX_SIZE);
273 *
reinterpret_cast<std::int32_t*
>(
274 length_ptr + BUFFER_INDEX_OFFSET
275 ) =
static_cast<std::int32_t
>(FIRST_VAR_DATA_BUFFER_INDEX);
278 *
reinterpret_cast<std::int32_t*
>(
279 length_ptr + BUFFER_OFFSET_OFFSET
280 ) =
static_cast<std::int32_t
>(long_string_storage_size);
283 long_string_storage_size += length;
290 std::size_t long_string_storage_offset = 0;
291 for (
auto&& val : range)
293 const auto length = val.size();
294 if (length > SHORT_STRING_SIZE)
296 auto val_casted = val
297 | std::ranges::views::transform(
300 return static_cast<std::uint8_t
>(v);
305 long_string_storage_offset += length;
314 static_cast<std::size_t
>(1),
315 static_cast<int64_t
>(long_string_storage_size)
318 return {std::move(length_buffer), std::move(long_string_storage), std::move(buffer_sizes)};
321# pragma GCC diagnostic pop
325 template <variable_size_binary_view_impl_types T>
326 template <std::ranges::input_range R, val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
327 requires std::convertible_to<std::ranges::range_value_t<R>, T>
328 arrow_proxy variable_size_binary_view_array_impl<T>::create_proxy(
331 std::optional<std::string_view> name,
332 std::optional<METADATA_RANGE> metadata
341 static const std::optional<std::unordered_set<sparrow::ArrowFlag>> flags{{
ArrowFlag::NULLABLE}};
345 std::is_same<T, std::string_view>::value ? std::string_view(
"vu") : std::string_view(
"vz"),
356 auto buffers_parts = create_buffers(std::forward<R>(range));
358 std::vector<buffer<uint8_t>> buffers{
359 std::move(vbitmap).extract_storage(),
360 std::move(buffers_parts.length_buffer),
361 std::move(buffers_parts.long_string_storage),
362 std::move(buffers_parts.buffer_sizes).extract_storage()
367 static_cast<std::int64_t
>(size),
368 static_cast<int64_t
>(null_count),
377 return arrow_proxy{std::move(arr), std::move(schema)};
380 template <variable_size_binary_view_impl_types T>
381 template <std::ranges::input_range NULLABLE_RANGE, input_metadata_container METADATA_RANGE>
382 requires std::convertible_to<std::ranges::range_value_t<NULLABLE_RANGE>,
nullable<T>>
383 [[nodiscard]]
arrow_proxy variable_size_binary_view_array_impl<T>::create_proxy(
384 NULLABLE_RANGE&& nullable_range,
385 std::optional<std::string_view> name,
386 std::optional<METADATA_RANGE> metadata
389 auto values = nullable_range
390 | std::views::transform(
393 return static_cast<std::string_view
>(v.value());
397 auto is_non_null = nullable_range
398 | std::views::transform(
401 return v.has_value();
406 std::forward<
decltype(values)>(values),
407 std::forward<
decltype(is_non_null)>(is_non_null),
413 template <variable_size_binary_view_impl_types T>
414 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
415 requires std::convertible_to<std::ranges::range_value_t<R>, T>
416 [[nodiscard]]
arrow_proxy variable_size_binary_view_array_impl<T>::create_proxy(
419 std::optional<std::string_view> name,
420 std::optional<METADATA_RANGE> metadata
425 return create_proxy(std::forward<R>(range),
validity_bitmap{}, std::move(name), std::move(metadata));
432 std::is_same<T, std::string_view>::value ? std::string_view(
"vu") : std::string_view(
"vz"),
443 auto buffers_parts = create_buffers(std::forward<R>(range));
445 std::vector<buffer<uint8_t>> buffers{
447 std::move(buffers_parts.length_buffer),
448 std::move(buffers_parts.long_string_storage),
449 std::move(buffers_parts.buffer_sizes).extract_storage()
455 static_cast<std::int64_t
>(size),
456 static_cast<int64_t
>(0),
465 return arrow_proxy{std::move(arr), std::move(schema)};
469 template <variable_size_binary_view_impl_types T>
470 constexpr auto variable_size_binary_view_array_impl<T>::value(size_type i) -> inner_reference
472 return static_cast<const self_type*
>(
this)->value(i);
475 template <variable_size_binary_view_impl_types T>
476 constexpr auto variable_size_binary_view_array_impl<T>::value(size_type i)
const -> inner_const_reference
479# pragma GCC diagnostic push
480# pragma GCC diagnostic ignored "-Wcast-align"
485 constexpr std::size_t element_size = 16;
486 auto data_ptr = this->get_arrow_proxy().buffers()[LENGTH_BUFFER_INDEX].template data<uint8_t>()
487 + (i * element_size);
489 auto length =
static_cast<std::size_t
>(*
reinterpret_cast<const std::int32_t*
>(data_ptr));
490 using char_or_byte =
typename inner_const_reference::value_type;
494 constexpr std::ptrdiff_t data_offset = 4;
495 auto ptr =
reinterpret_cast<const char_or_byte*
>(data_ptr);
496 const auto ret = inner_const_reference(ptr + data_offset, length);
501 constexpr std::ptrdiff_t buffer_index_offset = 8;
502 constexpr std::ptrdiff_t buffer_offset_offset = 12;
503 auto buffer_index =
static_cast<std::size_t
>(
504 *
reinterpret_cast<const std::int32_t*
>(data_ptr + buffer_index_offset)
506 auto buffer_offset =
static_cast<std::size_t
>(
507 *
reinterpret_cast<const std::int32_t*
>(data_ptr + buffer_offset_offset)
509 auto buffer = this->get_arrow_proxy().buffers()[buffer_index].template data<const char_or_byte>();
510 return inner_const_reference(
buffer + buffer_offset, length);
514# pragma GCC diagnostic pop
518 template <variable_size_binary_view_impl_types T>
519 constexpr auto variable_size_binary_view_array_impl<T>::value_begin() -> value_iterator
524 template <variable_size_binary_view_impl_types T>
525 constexpr auto variable_size_binary_view_array_impl<T>::value_end() -> value_iterator
530 template <variable_size_binary_view_impl_types T>
531 constexpr auto variable_size_binary_view_array_impl<T>::value_cbegin() const -> const_value_iterator
536 template <variable_size_binary_view_impl_types T>
537 constexpr auto variable_size_binary_view_array_impl<T>::value_cend() const -> const_value_iterator
539 return const_value_iterator(
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::bitmap_iterator bitmap_iterator
typename base_type::iterator_tag iterator_tag
typename base_type::const_bitmap_iterator const_bitmap_iterator
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
typename base_type::difference_type difference_type
Proxy class over ArrowArray and ArrowSchema.
Object that owns a piece of contiguous memory.
constexpr U * data() noexcept
constexpr size_type null_count() const noexcept
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
A view that repeats a value a given number of times.
This buffer class is use as storage buffer for all sparrow arrays.
variable_size_binary_view_array_impl(Args &&... args)
variable_size_binary_view_array_impl(arrow_proxy)
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::size_type size_type
typename base_type::const_value_iterator const_value_iterator
typename base_type::difference_type difference_type
typename base_type::iterator_tag iterator_tag
nullable< inner_reference, bitmap_reference > reference
typename base_type::bitmap_type bitmap_type
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename base_type::bitmap_iterator bitmap_iterator
array_inner_types< self_type > inner_types
typename inner_types::inner_reference inner_reference
typename inner_types::inner_const_reference inner_const_reference
variable_size_binary_view_array_impl< T > self_type
typename base_type::const_bitmap_iterator const_bitmap_iterator
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::value_iterator value_iterator
typename base_type::bitmap_range bitmap_range
mutable_array_bitmap_base< self_type > base_type
nullable< inner_value_type > value_type
typename base_type::const_iterator const_iterator
typename base_type::bitmap_reference bitmap_reference
typename base_type::iterator iterator
typename inner_types::inner_value_type inner_value_type
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
constexpr bool excludes_copy_and_move_ctor_v
constexpr std::ranges::copy_result< std::ranges::borrowed_iterator_t< R >, O > copy(R &&r, O result)
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient typedef to be used as a crtp base class for arrays using a mutable validity buffer.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_variable_size_binary_view_array
Checks whether T is a variable_size_binary_view_array_impl type.
constexpr std::size_t range_size(R &&r)
variable_size_binary_view_array_impl< std::string_view > string_view_array
A variable-size string view layout implementation.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
variable_size_binary_view_array_impl< sequence_view< const std::byte > > binary_view_array
A variable-size binary view layout implementation.
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
variable_size_binary_view_array_impl< T > array_type
inner_reference inner_const_reference
std::random_access_iterator_tag iterator_tag
functor_index_iterator< detail::layout_value_functor< const array_type, inner_reference > > const_value_iterator
Base class for array_inner_types specialization.
Traits class that must be specialized by array classes inheriting from array_crtp_base.
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()
Metafunction for retrieving the data_type of a typed array.