sparrow 0.3.0
Loading...
Searching...
No Matches
variable_size_binary_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <cstdint>
18#include <iterator>
19#include <numeric>
20#include <ranges>
21#include <string>
22#include <vector>
23
34
35namespace sparrow
36{
37 namespace detail
38 {
39 template <class T, class OT>
41
42 template <>
43 struct variable_size_binary_format<std::string, std::int32_t>
44 {
45 [[nodiscard]] static std::string format()
46 {
47 return "u";
48 }
49 };
50
51 template <>
52 struct variable_size_binary_format<std::string, std::int64_t>
53 {
54 [[nodiscard]] static std::string format()
55 {
56 return "U";
57 }
58 };
59
60 template <>
61 struct variable_size_binary_format<std::vector<byte_t>, std::int32_t>
62 {
63 [[nodiscard]] static std::string format()
64 {
65 return "z";
66 }
67 };
68
69 template <>
70 struct variable_size_binary_format<std::vector<byte_t>, std::int64_t>
71 {
72 [[nodiscard]] static std::string format()
73 {
74 return "Z";
75 }
76 };
77 }
78
79 template <std::ranges::sized_range T, class CR, layout_offset OT>
80 class variable_size_binary_array_impl;
81
83
88 binary_traits::value_type,
89 binary_traits::const_reference,
90 std::int64_t>;
91
92 namespace detail
93 {
94 template <class T>
95 struct get_data_type_from_array;
96
97 template <>
99 {
100 [[nodiscard]] static constexpr sparrow::data_type get()
101 {
103 }
104 };
105
106 template <>
108 {
109 [[nodiscard]] static constexpr sparrow::data_type get()
110 {
112 }
113 };
114
115 template <>
117 {
118 [[nodiscard]] static constexpr sparrow::data_type get()
119 {
121 }
122 };
123
124 template <>
126 {
127 [[nodiscard]] static constexpr sparrow::data_type get()
128 {
130 }
131 };
132 }
133
137 template <class T>
138 constexpr bool is_string_array_v = std::same_as<T, string_array>;
139
143 template <class T>
144 constexpr bool is_big_string_array_v = std::same_as<T, big_string_array>;
145
149 template <class T>
150 constexpr bool is_binary_array_v = std::same_as<T, binary_array>;
151
155 template <class T>
156 constexpr bool is_big_binary_array_v = std::same_as<T, big_binary_array>;
157
158 template <std::ranges::sized_range T, class CR, layout_offset OT>
160 {
162
166 using offset_type = OT;
167
168 using data_value_type = typename T::value_type;
169
170 using offset_iterator = OT*;
171 using const_offset_iterator = const OT*;
172
175
176 using iterator_tag = std::random_access_iterator_tag;
177
179
188
190
199
201
202 // using iterator = layout_iterator<array_type, false>;
203 // using const_iterator = layout_iterator<array_type, true, CR>;
204 };
205
206 template <std::ranges::sized_range T, class CR, layout_offset OT>
208 : public mutable_array_bitmap_base<variable_size_binary_array_impl<T, CR, OT>>
209 {
210 private:
211
212 static_assert(
213 sizeof(std::ranges::range_value_t<T>) == sizeof(std::uint8_t),
214 "Only sequences of types with the same size as uint8_t are supported"
215 );
216
217 public:
218
221
223 using inner_value_type = typename inner_types::inner_value_type;
224 using inner_reference = typename inner_types::inner_reference;
225 using inner_const_reference = typename inner_types::inner_const_reference;
226
227 using offset_type = typename inner_types::offset_type;
232
234 using bitmap_reference = typename base_type::bitmap_reference;
237
241
242 using offset_iterator = typename inner_types::offset_iterator;
243 using const_offset_iterator = typename inner_types::const_offset_iterator;
244
248 using data_iterator = typename inner_types::data_iterator;
249
250 using const_data_iterator = typename inner_types::const_data_iterator;
251 using data_value_type = typename inner_types::data_value_type;
252
253 using value_iterator = typename inner_types::value_iterator;
254 using const_value_iterator = typename inner_types::const_value_iterator;
255
257
258 template <class... ARGS>
261 : self_type(create_proxy(std::forward<ARGS>(args)...))
262 {
263 }
264
265 using base_type::get_arrow_proxy;
266 using base_type::size;
267
269 [[nodiscard]] inner_const_reference value(size_type i) const;
270
271 template <std::ranges::range SIZES_RANGE>
272 [[nodiscard]] static auto offset_from_sizes(SIZES_RANGE&& sizes) -> offset_buffer_type;
273
274 private:
275
276 template <mpl::char_like C, validity_bitmap_input VB = validity_bitmap>
277 [[nodiscard]] static arrow_proxy create_proxy(
278 u8_buffer<C>&& data_buffer,
279 offset_buffer_type&& list_offsets,
280 VB&& validity_input = validity_bitmap{},
281 std::optional<std::string_view> name = std::nullopt,
282 std::optional<std::string_view> metadata = std::nullopt
283 );
284
285 template <std::ranges::input_range R, validity_bitmap_input VB = validity_bitmap>
286 requires(
287 std::ranges::input_range<std::ranges::range_value_t<R>> && // a range of ranges
289 // range of
290 // char-like
291 )
292 [[nodiscard]] static arrow_proxy create_proxy(
293 R&& values,
294 VB&& validity_input = validity_bitmap{},
295 std::optional<std::string_view> name = std::nullopt,
296 std::optional<std::string_view> metadata = std::nullopt
297 );
298
299 // range of nullable values
300 template <std::ranges::input_range R>
301 requires std::is_same_v<std::ranges::range_value_t<R>, nullable<T>>
302 [[nodiscard]] static arrow_proxy create_proxy(
303 R&&,
304 std::optional<std::string_view> name = std::nullopt,
305 std::optional<std::string_view> metadata = std::nullopt
306 );
307
308 static constexpr size_t OFFSET_BUFFER_INDEX = 1;
309 static constexpr size_t DATA_BUFFER_INDEX = 2;
310
311 [[nodiscard]] offset_iterator offset(size_type i);
312 [[nodiscard]] offset_iterator offsets_begin();
313 [[nodiscard]] offset_iterator offsets_end();
314 [[nodiscard]] data_iterator data(size_type i);
315
316 [[nodiscard]] value_iterator value_begin();
317 [[nodiscard]] value_iterator value_end();
318
319 [[nodiscard]] const_value_iterator value_cbegin() const;
320 [[nodiscard]] const_value_iterator value_cend() const;
321
322 [[nodiscard]] const_offset_iterator offset(size_type i) const;
323 [[nodiscard]] const_offset_iterator offsets_cbegin() const;
324 [[nodiscard]] const_offset_iterator offsets_cend() const;
325 [[nodiscard]] const_data_iterator data(size_type i) const;
326
327 // Modifiers
328
329 template <std::ranges::sized_range U>
330 requires mpl::convertible_ranges<U, T>
331 void resize_values(size_type new_length, U value);
332
333 void resize_offsets(size_type new_length, offset_type offset_value);
334
335 template <std::ranges::sized_range U>
336 requires mpl::convertible_ranges<U, T>
337 value_iterator insert_value(const_value_iterator pos, U value, size_type count);
338
339 offset_iterator insert_offset(const_offset_iterator pos, offset_type size, size_type count);
340
341 template <mpl::iterator_of_type<T> InputIt>
342 value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last);
343
344 template <mpl::iterator_of_type<OT> InputIt>
345 offset_iterator insert_offsets(const_offset_iterator pos, InputIt first, InputIt last);
346
347 value_iterator erase_values(const_value_iterator pos, size_type count);
348
349 offset_iterator erase_offsets(const_offset_iterator pos, size_type count);
350
351 template <std::ranges::sized_range U>
352 requires mpl::convertible_ranges<U, T>
353 void assign(U&& rhs, size_type index);
354
357 friend base_type;
360 };
361
362 /*********************************************
363 * variable_size_binary_array_impl implementation *
364 *********************************************/
365
366 template <std::ranges::sized_range T, class CR, layout_offset OT>
368 : base_type(std::move(proxy))
369 {
370 const auto type = this->get_arrow_proxy().data_type();
371 SPARROW_ASSERT_TRUE(type == data_type::STRING || type == data_type::BINARY); // TODO: Add
372 // data_type::LARGE_STRING
373 // and
374 // data_type::LARGE_BINARY
376 ((type == data_type::STRING || type == data_type::BINARY) && std::same_as<OT, int32_t>)
377 );
378 }
379
380 template <std::ranges::sized_range T, class CR, layout_offset OT>
381 template <std::ranges::range SIZES_RANGE>
384 {
385 return detail::offset_buffer_from_sizes<std::remove_const_t<offset_type>>(std::forward<SIZES_RANGE>(sizes
386 ));
387 }
388
389 template <std::ranges::sized_range T, class CR, layout_offset OT>
390 template <mpl::char_like C, validity_bitmap_input VB>
391 arrow_proxy variable_size_binary_array_impl<T, CR, OT>::create_proxy(
392 u8_buffer<C>&& data_buffer,
393 offset_buffer_type&& offsets,
394 VB&& validity_input,
395 std::optional<std::string_view> name,
396 std::optional<std::string_view> metadata
397 )
398 {
399 const auto size = offsets.size() - 1;
400 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
401 const auto null_count = vbitmap.null_count();
402
405 std::move(name), // name
406 std::move(metadata), // metadata
407 std::nullopt, // flags,
408 0, // n_children
409 nullptr, // children
410 nullptr // dictionary
411
412 );
413 std::vector<buffer<std::uint8_t>> arr_buffs = {
414 std::move(vbitmap).extract_storage(),
415 std::move(offsets).extract_storage(),
416 std::move(data_buffer).extract_storage()
417 };
418
420 static_cast<std::int64_t>(size), // length
421 static_cast<int64_t>(null_count),
422 0, // offset
423 std::move(arr_buffs),
424 0, // n_children
425 nullptr, // children
426 nullptr // dictionary
427 );
428 return arrow_proxy{std::move(arr), std::move(schema)};
429 }
430
431 template <std::ranges::sized_range T, class CR, layout_offset OT>
432 template <std::ranges::input_range R, validity_bitmap_input VB>
433 requires(
434 std::ranges::input_range<std::ranges::range_value_t<R>> && // a range of ranges
436 // range of char-like
437 )
438 arrow_proxy variable_size_binary_array_impl<T, CR, OT>::create_proxy(
439 R&& values,
440 VB&& validity_input,
441 std::optional<std::string_view> name,
442 std::optional<std::string_view> metadata
443 )
444 {
445 using values_inner_value_type = std::ranges::range_value_t<std::ranges::range_value_t<R>>;
446
447 auto size_range = values
448 | std::views::transform(
449 [](const auto& v)
450 {
451 return std::ranges::size(v);
452 }
453 );
454 auto offset_buffer = offset_from_sizes(size_range);
455 auto data_buffer = u8_buffer<values_inner_value_type>(std::ranges::views::join(values));
456 return create_proxy(
457 std::move(data_buffer),
458 std::move(offset_buffer),
459 std::forward<VB>(validity_input),
460 std::forward<std::optional<std::string_view>>(name),
461 std::forward<std::optional<std::string_view>>(metadata)
462 );
463 }
464
465 template <std::ranges::sized_range T, class CR, layout_offset OT>
466 template <std::ranges::input_range R>
467 requires std::is_same_v<std::ranges::range_value_t<R>, nullable<T>>
468 arrow_proxy variable_size_binary_array_impl<T, CR, OT>::create_proxy(
469 R&& range,
470 std::optional<std::string_view> name,
471 std::optional<std::string_view> metadata
472 )
473 {
474 // split into values and is_non_null ranges
475 const auto values = range
476 | std::views::transform(
477 [](const auto& v)
478 {
479 return v.get();
480 }
481 );
482 const auto is_non_null = range
483 | std::views::transform(
484 [](const auto& v)
485 {
486 return v.has_value();
487 }
488 );
489 return self_type::create_proxy(values, is_non_null, std::move(name), std::move(metadata));
490 }
491
492 template <std::ranges::sized_range T, class CR, layout_offset OT>
493 auto variable_size_binary_array_impl<T, CR, OT>::data(size_type i) -> data_iterator
494 {
495 arrow_proxy& proxy = get_arrow_proxy();
496 SPARROW_ASSERT_TRUE(proxy.buffers()[DATA_BUFFER_INDEX].size() >= i);
497 return proxy.buffers()[DATA_BUFFER_INDEX].template data<data_value_type>() + i;
498 }
499
500 template <std::ranges::sized_range T, class CR, layout_offset OT>
501 auto variable_size_binary_array_impl<T, CR, OT>::data(size_type i) const -> const_data_iterator
502 {
503 const arrow_proxy& proxy = this->get_arrow_proxy();
504 SPARROW_ASSERT_TRUE(proxy.buffers()[DATA_BUFFER_INDEX].size() >= i);
505 return proxy.buffers()[DATA_BUFFER_INDEX].template data<const data_value_type>() + i;
506 }
507
508 template <std::ranges::sized_range T, class CR, layout_offset OT>
509 template <std::ranges::sized_range U>
511 void variable_size_binary_array_impl<T, CR, OT>::assign(U&& rhs, size_type index)
512 {
513 SPARROW_ASSERT_TRUE(index < size());
514 const auto offset_beg = *offset(index);
515 const auto offset_end = *offset(index + 1);
516 const auto initial_value_length = offset_end - offset_beg;
517 const auto new_value_length = static_cast<OT>(std::ranges::size(rhs));
518 const OT shift_byte_count = new_value_length - initial_value_length;
519 auto& data_buffer = this->get_arrow_proxy().get_array_private_data()->buffers()[DATA_BUFFER_INDEX];
520 if (shift_byte_count != 0)
521 {
522 const auto shift_val_abs = static_cast<size_type>(std::abs(shift_byte_count));
523 const auto new_data_buffer_size = shift_byte_count < 0 ? data_buffer.size() - shift_val_abs
524 : data_buffer.size() + shift_val_abs;
525
526 if (shift_byte_count > 0)
527 {
528 data_buffer.resize(new_data_buffer_size);
529 // Move elements to make space for the new value
530 std::move_backward(
531 data_buffer.begin() + offset_end,
532 data_buffer.end() - shift_byte_count,
533 data_buffer.end()
534 );
535 }
536 else
537 {
538 std::move(
539 data_buffer.begin() + offset_end,
540 data_buffer.end(),
541 data_buffer.begin() + offset_end + shift_byte_count
542 );
543 data_buffer.resize(new_data_buffer_size);
544 }
545 // Adjust offsets for subsequent elements
546 std::for_each(
547 offset(index + 1),
548 offset(size() + 1),
549 [shift_byte_count](auto& offset)
550 {
551 offset += shift_byte_count;
552 }
553 );
554 }
555 auto tmp = std::views::transform(
556 rhs,
557 [](const auto& val)
558 {
559 return static_cast<std::uint8_t>(val);
560 }
561 );
562 // Copy the new value into the buffer
563 std::copy(std::ranges::begin(tmp), std::ranges::end(tmp), data_buffer.begin() + offset_beg);
564 }
565
566 template <std::ranges::sized_range T, class CR, layout_offset OT>
567 auto variable_size_binary_array_impl<T, CR, OT>::offset(size_type i) -> offset_iterator
568 {
569 SPARROW_ASSERT_TRUE(i <= size() + this->get_arrow_proxy().offset());
570 return get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].template data<OT>()
571 + static_cast<size_type>(this->get_arrow_proxy().offset()) + i;
572 }
573
574 template <std::ranges::sized_range T, class CR, layout_offset OT>
575 auto variable_size_binary_array_impl<T, CR, OT>::offset(size_type i) const -> const_offset_iterator
576 {
577 SPARROW_ASSERT_TRUE(i <= this->size() + this->get_arrow_proxy().offset());
578 return this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].template data<OT>()
579 + static_cast<size_type>(this->get_arrow_proxy().offset()) + i;
580 }
581
582 template <std::ranges::sized_range T, class CR, layout_offset OT>
583 auto variable_size_binary_array_impl<T, CR, OT>::offsets_begin() -> offset_iterator
584 {
585 return offset(0);
586 }
587
588 template <std::ranges::sized_range T, class CR, layout_offset OT>
589 auto variable_size_binary_array_impl<T, CR, OT>::offsets_cbegin() const -> const_offset_iterator
590 {
591 return offset(0);
592 }
593
594 template <std::ranges::sized_range T, class CR, layout_offset OT>
595 auto variable_size_binary_array_impl<T, CR, OT>::offsets_end() -> offset_iterator
596 {
597 return offset(size() + 1);
598 }
599
600 template <std::ranges::sized_range T, class CR, layout_offset OT>
601 auto variable_size_binary_array_impl<T, CR, OT>::offsets_cend() const -> const_offset_iterator
602 {
603 return offset(size() + 1);
604 }
605
606 template <std::ranges::sized_range T, class CR, layout_offset OT>
612
613 template <std::ranges::sized_range T, class CR, layout_offset OT>
615 {
616 SPARROW_ASSERT_TRUE(i < this->size());
617 const OT offset_begin = *offset(i);
618 SPARROW_ASSERT_TRUE(offset_begin >= 0);
619 const OT offset_end = *offset(i + 1);
620 SPARROW_ASSERT_TRUE(offset_end >= 0);
621 const const_data_iterator pointer_begin = data(static_cast<size_type>(offset_begin));
622 const const_data_iterator pointer_end = data(static_cast<size_type>(offset_end));
623 return inner_const_reference(pointer_begin, pointer_end);
624 }
625
626 template <std::ranges::sized_range T, class CR, layout_offset OT>
627 auto variable_size_binary_array_impl<T, CR, OT>::value_begin() -> value_iterator
628 {
629 return value_iterator{this, 0};
630 }
631
632 template <std::ranges::sized_range T, class CR, layout_offset OT>
633 auto variable_size_binary_array_impl<T, CR, OT>::value_end() -> value_iterator
634 {
635 return sparrow::next(value_begin(), size());
636 }
637
638 template <std::ranges::sized_range T, class CR, layout_offset OT>
639 auto variable_size_binary_array_impl<T, CR, OT>::value_cbegin() const -> const_value_iterator
640 {
641 return const_value_iterator{this, 0};
642 }
643
644 template <std::ranges::sized_range T, class CR, layout_offset OT>
645 auto variable_size_binary_array_impl<T, CR, OT>::value_cend() const -> const_value_iterator
646 {
647 return sparrow::next(value_cbegin(), this->size());
648 }
649
650 template <std::ranges::sized_range T, class CR, layout_offset OT>
651 template <std::ranges::sized_range U>
653 void variable_size_binary_array_impl<T, CR, OT>::resize_values(size_type new_length, U value)
654 {
655 const size_t new_size = new_length + static_cast<size_t>(this->get_arrow_proxy().offset());
656 auto& buffers = this->get_arrow_proxy().get_array_private_data()->buffers();
657 if (new_length < size())
658 {
659 const auto offset_begin = static_cast<size_t>(*offset(new_length));
660 auto& data_buffer = buffers[DATA_BUFFER_INDEX];
661 data_buffer.resize(offset_begin);
662 auto& offset_buffer = buffers[OFFSET_BUFFER_INDEX];
663 auto offset_buffer_adaptor = make_buffer_adaptor<OT>(offset_buffer);
664 offset_buffer_adaptor.resize(new_size + 1);
665 }
666 else if (new_length > size())
667 {
668 insert_value(value_cend(), value, new_length - size());
669 }
670 }
671
672 template <std::ranges::sized_range T, class CR, layout_offset OT>
673 template <std::ranges::sized_range U>
675 auto
676 variable_size_binary_array_impl<T, CR, OT>::insert_value(const_value_iterator pos, U value, size_type count)
678 {
679 const auto idx = static_cast<size_t>(std::distance(value_cbegin(), pos));
680 const OT offset_begin = *offset(idx);
681 const std::vector<uint8_t> casted_value{value.cbegin(), value.cend()};
682 const repeat_view<std::vector<uint8_t>> my_repeat_view{casted_value, count};
683 const auto joined_repeated_value_range = std::ranges::views::join(my_repeat_view);
684 auto& data_buffer = this->get_arrow_proxy().get_array_private_data()->buffers()[DATA_BUFFER_INDEX];
685 const auto pos_to_insert = sparrow::next(data_buffer.cbegin(), offset_begin);
686 data_buffer.insert(pos_to_insert, joined_repeated_value_range.begin(), joined_repeated_value_range.end());
687 insert_offset(offsets_cbegin() + idx + 1, static_cast<offset_type>(value.size()), count);
688 return sparrow::next(value_begin(), idx);
689 }
690
691 template <std::ranges::sized_range T, class CR, layout_offset OT>
692 auto variable_size_binary_array_impl<T, CR, OT>::insert_offset(
693 const_offset_iterator pos,
694 offset_type value_size,
695 size_type count
696 ) -> offset_iterator
697 {
698 auto& offset_buffer = get_arrow_proxy().get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
699 const auto idx = static_cast<size_t>(std::distance(offsets_cbegin(), pos));
700 auto offset_buffer_adaptor = make_buffer_adaptor<OT>(offset_buffer);
701 const offset_type cumulative_size = value_size * static_cast<offset_type>(count);
702 // Adjust offsets for subsequent elements
703 std::for_each(
704 sparrow::next(offset_buffer_adaptor.begin(), idx + 1),
705 offset_buffer_adaptor.end(),
706 [cumulative_size](auto& offset)
707 {
708 offset += cumulative_size;
709 }
710 );
711 offset_buffer_adaptor.insert(sparrow::next(offset_buffer_adaptor.cbegin(), idx + 1), count, 0);
712 // Put the right values in the new offsets
713 for (size_t i = idx + 1; i < idx + 1 + count; ++i)
714 {
715 offset_buffer_adaptor[i] = offset_buffer_adaptor[i - 1] + value_size;
716 }
717 return offsets_begin() + idx;
718 }
719
720 template <std::ranges::sized_range T, class CR, layout_offset OT>
721 template <mpl::iterator_of_type<T> InputIt>
722 auto
723 variable_size_binary_array_impl<T, CR, OT>::insert_values(const_value_iterator pos, InputIt first, InputIt last)
725 {
726 auto& data_buffer = get_arrow_proxy().get_array_private_data()->buffers()[DATA_BUFFER_INDEX];
727 auto data_buffer_adaptor = make_buffer_adaptor<data_value_type>(data_buffer);
728 auto values = std::ranges::subrange(first, last);
729 const size_t cumulative_sizes = std::accumulate(
730 values.begin(),
731 values.end(),
732 size_t(0),
733 [](size_t acc, const T& value)
734 {
735 return acc + value.size();
736 }
737 );
738 data_buffer_adaptor.resize(data_buffer_adaptor.size() + cumulative_sizes);
739 const auto idx = static_cast<size_t>(std::distance(value_cbegin(), pos));
740 const OT offset_begin = *offset(idx);
741 auto insert_pos = sparrow::next(data_buffer_adaptor.begin(), offset_begin);
742
743 // Move elements to make space for the new value
744 std::move_backward(
745 insert_pos,
746 sparrow::next(data_buffer_adaptor.end(), -static_cast<difference_type>(cumulative_sizes)),
747 data_buffer_adaptor.end()
748 );
749
750 for (const T& value : values)
751 {
752 std::copy(value.begin(), value.end(), insert_pos);
753 std::advance(insert_pos, value.size());
754 }
755
756 const auto sizes_of_each_value = std::ranges::views::transform(
757 values,
758 [](const T& value) -> offset_type
759 {
760 return static_cast<offset_type>(value.size());
761 }
762 );
763 insert_offsets(offset(idx + 1), sizes_of_each_value.begin(), sizes_of_each_value.end());
764 return sparrow::next(value_begin(), idx);
765 }
766
767 template <std::ranges::sized_range T, class CR, layout_offset OT>
768 template <mpl::iterator_of_type<OT> InputIt>
769 auto variable_size_binary_array_impl<T, CR, OT>::insert_offsets(
771 InputIt first_sizes,
772 InputIt last_sizes
773 ) -> offset_iterator
774 {
775 SPARROW_ASSERT_TRUE(pos >= offsets_cbegin());
776 SPARROW_ASSERT_TRUE(pos <= offsets_cend());
777 SPARROW_ASSERT_TRUE(first_sizes <= last_sizes);
778 auto& offset_buffer = get_arrow_proxy().get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
779 auto offset_buffer_adaptor = make_buffer_adaptor<OT>(offset_buffer);
780 const auto idx = std::distance(offsets_cbegin(), pos);
781 const OT cumulative_sizes = std::reduce(first_sizes, last_sizes, OT(0));
782 const auto sizes_count = std::distance(first_sizes, last_sizes);
783 offset_buffer_adaptor.resize(offset_buffer_adaptor.size() + static_cast<size_t>(sizes_count));
784 // Move the offsets to make space for the new offsets
785 std::move_backward(
786 offset_buffer_adaptor.begin() + idx,
787 offset_buffer_adaptor.end() - sizes_count,
788 offset_buffer_adaptor.end()
789 );
790 // Adjust offsets for subsequent elements
791 std::for_each(
792 offset_buffer_adaptor.begin() + idx + sizes_count,
793 offset_buffer_adaptor.end(),
794 [cumulative_sizes](auto& offset)
795 {
796 offset += cumulative_sizes;
797 }
798 );
799 // Put the right values in the new offsets
800 InputIt it = first_sizes;
801 for (size_t i = static_cast<size_t>(idx + 1); i < static_cast<size_t>(idx + sizes_count + 1); ++i)
802 {
803 offset_buffer_adaptor[i] = offset_buffer_adaptor[i - 1] + *it;
804 ++it;
805 }
806 return offset(static_cast<size_t>(idx));
807 }
808
809 template <std::ranges::sized_range T, class CR, layout_offset OT>
810 auto variable_size_binary_array_impl<T, CR, OT>::erase_values(const_value_iterator pos, size_type count)
811 -> value_iterator
812 {
813 SPARROW_ASSERT_TRUE(pos >= value_cbegin());
814 SPARROW_ASSERT_TRUE(pos <= value_cend());
815 const size_t index = static_cast<size_t>(std::distance(value_cbegin(), pos));
816 if (count == 0)
817 {
818 return sparrow::next(value_begin(), index);
819 }
820 auto& data_buffer = get_arrow_proxy().get_array_private_data()->buffers()[DATA_BUFFER_INDEX];
821 const auto offset_begin = *offset(index);
822 const auto offset_end = *offset(index + count);
823 const size_t difference = static_cast<size_t>(offset_end - offset_begin);
824 // move the values after the erased ones
825 std::move(data_buffer.begin() + offset_end, data_buffer.end(), data_buffer.begin() + offset_begin);
826 data_buffer.resize(data_buffer.size() - difference);
827 // adjust the offsets for the subsequent elements
828 erase_offsets(offset(index), count);
829 return sparrow::next(value_begin(), index);
830 }
831
832 template <std::ranges::sized_range T, class CR, layout_offset OT>
833 auto variable_size_binary_array_impl<T, CR, OT>::erase_offsets(const_offset_iterator pos, size_type count)
834 -> offset_iterator
835 {
836 SPARROW_ASSERT_TRUE(pos >= offsets_cbegin());
837 SPARROW_ASSERT_TRUE(pos <= offsets_cend());
838 const size_t index = static_cast<size_t>(std::distance(offsets_cbegin(), pos));
839 if (count == 0)
840 {
841 return offset(index);
842 }
843 auto& offset_buffer = get_arrow_proxy().get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
844 auto offset_buffer_adaptor = make_buffer_adaptor<OT>(offset_buffer);
845 const OT offset_start_value = *offset(index);
846 const OT offset_end_value = *offset(index + count);
847 const OT difference = offset_end_value - offset_start_value;
848 // move the offsets after the erased ones
849 std::move(
850 sparrow::next(offset_buffer_adaptor.begin(), index + count + 1),
851 offset_buffer_adaptor.end(),
852 sparrow::next(offset_buffer_adaptor.begin(), index + 1)
853 );
854 offset_buffer_adaptor.resize(offset_buffer_adaptor.size() - count);
855 // adjust the offsets for the subsequent elements
856 std::for_each(
857 sparrow::next(offset_buffer_adaptor.begin(), index + 1),
858 offset_buffer_adaptor.end(),
859 [difference](OT& offset)
860 {
861 offset -= difference;
862 }
863 );
864 return offset(index);
865 }
866
867}
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
typename base_type::difference_type difference_type
Proxy class over ArrowArray and ArrowSchema.
constexpr size_type null_count() const noexcept
bitset_iterator< self_type, true > const_iterator
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
Definition nullable.hpp:280
A view that repeats a value a given number of times.
static auto offset_from_sizes(SIZES_RANGE &&sizes) -> offset_buffer_type
inner_const_reference value(size_type i) const
Implementation of reference to inner type used for layout L.
Iterator over the data values of a variable size binary layout.
Concept for iterator types.
Matches range types From whose elements are convertible to elements of range type To.
Definition mp_utils.hpp:450
#define SPARROW_ASSERT_TRUE(expr__)
sparrow::u8_buffer< OFFSET_TYPE > offset_buffer_from_sizes(SIZES_RANGE &&sizes)
constexpr std::size_t size(typelist< T... >={})
Definition mp_utils.hpp:107
constexpr bool excludes_copy_and_move_ctor_v
Definition mp_utils.hpp:507
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient typedef to be used as a crtp base class for arrays using a mutable validity buffer.
constexpr bool is_big_string_array_v
Checks whether T is a big_string_array type.
ArrowSchema make_arrow_schema(F format, N name, M metadata, std::optional< ArrowFlag > flags, int64_t n_children, ArrowSchema **children, ArrowSchema *dictionary)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_string_array_v
Checks whether T is a string_array type.
variable_size_binary_array_impl< binary_traits::value_type, binary_traits::const_reference, std::int32_t > binary_array
constexpr InputIt next(InputIt it, Distance n)
Definition iterator.hpp:503
variable_size_binary_array_impl< std::string, std::string_view, std::int32_t > string_array
constexpr bool is_big_binary_array_v
Checks whether T is a big_binary_array type.
dynamic_bitset< std::uint8_t > validity_bitmap
constexpr bool is_binary_array_v
Checks whether T is a binary_array type.
auto make_buffer_adaptor(FromBufferRef &buf)
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
variable_size_binary_array_impl< std::string, std::string_view, std::int64_t > big_string_array
variable_size_binary_array_impl< binary_traits::value_type, binary_traits::const_reference, std::int64_t > big_binary_array
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, size_t n_children, ArrowArray **children, ArrowArray *dictionary)
Creates an ArrowArray.
arrow_traits< std::vector< byte_t > > binary_traits
array_inner_types< variable_size_binary_array_impl< T, CR, OT > >::iterator_tag iterator_tag
array_inner_types< variable_size_binary_array_impl< T, CR, OT > >::iterator_tag iterator_tag
variable_size_binary_value_iterator< array_type, iterator_types > value_iterator
variable_size_binary_value_iterator< array_type, const_iterator_types > const_value_iterator
Base class for array_inner_types specialization.
Traits class that must be specialized by array classes inheriting from array_crtp_base.
Provides compile-time information about Arrow data types.