sparrow 2.4.0
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
list_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <limits>
18#include <ranges>
19#include <stdexcept>
20#include <string> // for std::stoull
21#include <type_traits>
22#include <utility>
23#include <vector>
24
25#include "sparrow/array_api.hpp"
38
39namespace sparrow
40{
41 template <class DERIVED>
43
44 template <bool BIG>
45 class list_array_impl;
46
47 template <bool BIG>
49
50 namespace copy_tracker
51 {
52 template <typename T>
53 requires std::same_as<T, list_array_impl<false>> || std::same_as<T, list_array_impl<true>>
54 std::string key()
55 {
56 return "list_array";
57 }
58
59 template <typename T>
60 requires std::same_as<T, list_view_array_impl<false>> || std::same_as<T, list_view_array_impl<true>>
61 std::string key()
62 {
63 return "list_view_array";
64 }
65 }
66
89
104
106
107 namespace copy_tracker
108 {
109 template <>
110 inline std::string key<fixed_sized_list_array>()
111 {
112 return "fixed_sized_list_array";
113 }
114 }
115
116
120 template <class T>
121 constexpr bool is_list_array_v = std::same_as<T, list_array>;
122
126 template <class T>
127 constexpr bool is_big_list_array_v = std::same_as<T, big_list_array>;
128
132 template <class T>
133 constexpr bool is_list_view_array_v = std::same_as<T, list_view_array>;
134
138 template <class T>
139 constexpr bool is_big_list_view_array_v = std::same_as<T, big_list_view_array>;
140
144 template <class T>
145 constexpr bool is_fixed_sized_list_array_v = std::same_as<T, fixed_sized_list_array>;
146
147 namespace detail
148 {
149 template <bool BIG>
151 {
152 [[nodiscard]] static constexpr sparrow::data_type get()
153 {
155 }
156 };
157
158 template <bool BIG>
160 {
161 [[nodiscard]] static constexpr sparrow::data_type get()
162 {
164 }
165 };
166
167 template <>
169 {
170 [[nodiscard]] static constexpr sparrow::data_type get()
171 {
173 }
174 };
175
176 // Helper to build arrow schema for list arrays
177 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
179 std::string format,
180 ArrowSchema&& flat_schema,
181 std::optional<std::string_view> name,
182 std::optional<METADATA_RANGE> metadata,
183 bool nullable
184 )
185 {
187 std::optional<std::unordered_set<ArrowFlag>>
188 flags = nullable ? std::optional<std::unordered_set<ArrowFlag>>{{ArrowFlag::NULLABLE}}
189 : std::nullopt;
190 auto child_schemas = new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))};
191
192 return make_arrow_schema(
193 std::move(format),
194 name,
195 metadata,
196 flags,
197 child_schemas,
199 nullptr, // dictionary
200 true // dictionary ownership
201 );
202 }
203
204 // Helper to build arrow array for list arrays
206 std::int64_t size,
207 std::int64_t null_count,
208 std::vector<buffer<std::uint8_t>>&& arr_buffs,
209 ArrowArray&& flat_arr
210 )
211 {
213 auto child_arrays = new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))};
214 return make_arrow_array(
215 size,
216 null_count,
217 0, // offset
218 std::move(arr_buffs),
219 child_arrays,
221 nullptr, // dictionary
222 true // dictionary ownership
223 );
224 }
225 }
226
227 template <bool BIG>
240
241 template <bool BIG>
254
255 template <>
268
286 template <class DERIVED>
288 {
289 public:
290
294 using value_iterator = typename inner_types::value_iterator;
295 using const_value_iterator = typename inner_types::const_value_iterator;
297
299 using bitmap_reference = typename base_type::bitmap_reference;
301
303
305 using inner_reference = typename inner_types::inner_reference;
306 using inner_const_reference = typename inner_types::inner_const_reference;
307
312
320 [[nodiscard]] constexpr const array* raw_flat_array() const;
321
329 [[nodiscard]] constexpr array* raw_flat_array();
330
331 protected:
332
342
353
366
367 constexpr list_array_crtp_base(self_type&&) noexcept = default;
368 constexpr list_array_crtp_base& operator=(self_type&&) noexcept = default;
369
370 protected:
371
384 constexpr void throw_if_sliced_for_mutation(const char* operation) const;
385
386 [[nodiscard]] constexpr value_iterator value_begin();
387 [[nodiscard]] constexpr value_iterator value_end();
388 [[nodiscard]] constexpr const_value_iterator value_cbegin() const;
389 [[nodiscard]] constexpr const_value_iterator value_cend() const;
390
391 // Inserts `count` copies of value's flat elements into the flat array at flat_pos.
392 // No-op if value.size() == 0.
393 constexpr void insert_flat_elements(size_type flat_pos, const list_value& value, size_type count);
394
395 // Erases flat_count consecutive flat elements starting at flat_begin.
396 // No-op if flat_count == 0.
397 constexpr void erase_flat_elements(size_type flat_begin, size_type flat_count);
398
399 private:
400
401 using list_size_type = inner_types::list_size_type;
402
403 constexpr void resize_values(size_type new_length, const list_value& value);
404
405 template <std::input_iterator InputIt>
406 requires std::convertible_to<typename std::iterator_traits<InputIt>::value_type, list_value>
407 constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last);
408
409 [[nodiscard]] constexpr inner_reference value(size_type i);
410 [[nodiscard]] constexpr inner_const_reference value(size_type i) const;
411
412 [[nodiscard]] array make_flat_array();
413
414 // data members
415 array p_flat_array;
416
417 // friend classes
418 friend class array_crtp_base<DERIVED>;
419 friend class mutable_array_base<DERIVED>;
420 friend class list_reference<DERIVED>;
421
422 // needs access to this->value(i)
423 friend class detail::layout_value_functor<DERIVED, inner_reference>;
424 friend class detail::layout_value_functor<const DERIVED, inner_const_reference>;
425 };
426
427 template <bool BIG>
429 {
430 public:
431
435 using list_size_type = inner_types::list_size_type;
439 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
441
454
464 constexpr list_array_impl(const self_type&);
465
478
479 constexpr list_array_impl(self_type&&) noexcept = default;
480 constexpr list_array_impl& operator=(self_type&&) noexcept = default;
481
482 constexpr void slice_inplace(size_type start, size_type end);
483
495 template <class... ARGS>
496 requires(mpl::excludes_copy_and_move_ctor_v<list_array_impl<BIG>, ARGS...>)
497 explicit list_array_impl(ARGS&&... args)
498 : self_type(create_proxy(std::forward<ARGS>(args)...))
499 {
500 }
501
519 template <std::ranges::range SIZES_RANGE>
520 [[nodiscard]] static constexpr auto offset_from_sizes(SIZES_RANGE&& sizes) -> offset_buffer_type;
521
522 private:
523
543 template <
545 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
546 [[nodiscard]] static arrow_proxy create_proxy(
547 array&& flat_values,
548 offset_buffer_type&& list_offsets,
549 VB&& validity_input,
550 std::optional<std::string_view> name = std::nullopt,
551 std::optional<METADATA_RANGE> metadata = std::nullopt
552 );
553
572 template <
574 std::ranges::input_range OFFSET_BUFFER_RANGE,
575 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
576 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
577 [[nodiscard]] static arrow_proxy create_proxy(
578 array&& flat_values,
579 OFFSET_BUFFER_RANGE&& list_offsets_range,
580 VB&& validity_input,
581 std::optional<std::string_view> name = std::nullopt,
582 std::optional<METADATA_RANGE> metadata = std::nullopt
583 )
584 {
585 offset_buffer_type list_offsets{std::forward<OFFSET_BUFFER_RANGE>(list_offsets_range)};
586 return list_array_impl<BIG>::create_proxy(
587 std::move(flat_values),
588 std::move(list_offsets),
589 std::forward<VB>(validity_input),
590 std::forward<std::optional<std::string_view>>(name),
591 std::forward<std::optional<METADATA_RANGE>>(metadata)
592 );
593 }
594
595 template <
596 validity_bitmap_input VB = validity_bitmap,
597 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
598 [[nodiscard]] static arrow_proxy create_proxy(
599 array&& flat_values,
600 offset_buffer_type&& list_offsets,
601 bool nullable = true,
602 std::optional<std::string_view> name = std::nullopt,
603 std::optional<METADATA_RANGE> metadata = std::nullopt
604 );
605
606 template <
607 validity_bitmap_input VB = validity_bitmap,
608 std::ranges::input_range OFFSET_BUFFER_RANGE,
609 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
610 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
611 [[nodiscard]] static arrow_proxy create_proxy(
612 array&& flat_values,
613 OFFSET_BUFFER_RANGE&& list_offsets_range,
614 bool nullable = true,
615 std::optional<std::string_view> name = std::nullopt,
616 std::optional<METADATA_RANGE> metadata = std::nullopt
617 )
618 {
619 offset_buffer_type list_offsets{std::forward<OFFSET_BUFFER_RANGE>(list_offsets_range)};
620 return list_array_impl<BIG>::create_proxy(
621 std::move(flat_values),
622 std::move(list_offsets),
623 nullable,
624 std::forward<std::optional<std::string_view>>(name),
625 std::forward<std::optional<METADATA_RANGE>>(metadata)
626 );
627 }
628
629 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
630 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
631
632 [[nodiscard]] constexpr offset_type* make_list_offsets();
633
634 void replace_value(size_type index, const list_value& value);
635
636 constexpr value_iterator
637 insert_value(const_value_iterator pos, const list_value& value, size_type count);
638
639 constexpr value_iterator erase_values(const_value_iterator pos, size_type count);
640
641 offset_type* p_list_offsets;
642
643 // friend classes
644 friend class array_crtp_base<self_type>;
645 friend class mutable_array_base<self_type>;
646 friend class list_array_crtp_base<self_type>;
647 template <class L>
648 friend class list_reference;
649 };
650
651 template <bool BIG>
652 class list_view_array_impl final : public list_array_crtp_base<list_view_array_impl<BIG>>
653 {
654 public:
655
659 using list_size_type = inner_types::list_size_type;
663 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
666
679
690
703
704 constexpr list_view_array_impl(self_type&&) = default;
705 constexpr list_view_array_impl& operator=(self_type&&) = default;
706
707 constexpr void slice_inplace(size_type start, size_type end);
708
721 template <class... ARGS>
723 list_view_array_impl(ARGS&&... args)
724 : self_type(create_proxy(std::forward<ARGS>(args)...))
725 {
726 }
727
728 private:
729
753 template <
754 std::ranges::input_range OFFSET_BUFFER_RANGE,
755 std::ranges::input_range SIZE_RANGE,
757 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
758 requires(
759 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
760 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
761 )
762 [[nodiscard]] static arrow_proxy create_proxy(
763 array&& flat_values,
764 OFFSET_BUFFER_RANGE&& list_offsets,
765 SIZE_RANGE&& list_sizes,
766 VB&& validity_input,
767 std::optional<std::string_view> name = std::nullopt,
768 std::optional<METADATA_RANGE> metadata = std::nullopt
769 )
770 {
771 return list_view_array_impl<BIG>::create_proxy(
772 std::move(flat_values),
773 offset_buffer_type(std::forward<OFFSET_BUFFER_RANGE>(list_offsets)),
774 size_buffer_type(std::forward<SIZE_RANGE>(list_sizes)),
775 std::forward<VB>(validity_input),
776 name,
777 metadata
778 );
779 }
780
781 template <
782 validity_bitmap_input VB = validity_bitmap,
783 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
784 [[nodiscard]] static arrow_proxy create_proxy(
785 array&& flat_values,
786 offset_buffer_type&& list_offsets,
787 size_buffer_type&& list_sizes,
788 VB&& validity_input,
789 std::optional<std::string_view> name = std::nullopt,
790 std::optional<METADATA_RANGE> metadata = std::nullopt
791 );
792
793 template <
794 std::ranges::input_range OFFSET_BUFFER_RANGE,
795 std::ranges::input_range SIZE_RANGE,
796 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
797 requires(
798 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
799 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
800 )
801 [[nodiscard]] static arrow_proxy create_proxy(
802 array&& flat_values,
803 OFFSET_BUFFER_RANGE&& list_offsets,
804 SIZE_RANGE&& list_sizes,
805 bool nullable = true,
806 std::optional<std::string_view> name = std::nullopt,
807 std::optional<METADATA_RANGE> metadata = std::nullopt
808 )
809 {
810 return list_view_array_impl<BIG>::create_proxy(
811 std::move(flat_values),
812 offset_buffer_type(std::forward<OFFSET_BUFFER_RANGE>(list_offsets)),
813 size_buffer_type(std::forward<SIZE_RANGE>(list_sizes)),
814 nullable,
815 name,
816 metadata
817 );
818 }
819
820 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
821 [[nodiscard]] static arrow_proxy create_proxy(
822 array&& flat_values,
823 offset_buffer_type&& list_offsets,
824 size_buffer_type&& list_sizes,
825 bool nullable = true,
826 std::optional<std::string_view> name = std::nullopt,
827 std::optional<METADATA_RANGE> metadata = std::nullopt
828 );
829
830 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
831 static constexpr std::size_t SIZES_BUFFER_INDEX = 2;
832 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
833
834 [[nodiscard]] constexpr offset_type* make_list_offsets() const;
835 [[nodiscard]] constexpr const list_size_type* make_list_sizes() const;
836
837 void replace_value(size_type index, const list_value& value);
838
839 constexpr value_iterator
840 insert_value(const_value_iterator pos, const list_value& value, size_type count);
841
842 template <std::input_iterator InputIt>
843 requires std::convertible_to<typename std::iterator_traits<InputIt>::value_type, list_value>
844 constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last);
845
846 constexpr value_iterator erase_values(const_value_iterator pos, size_type count);
847
848 offset_type* p_list_offsets;
849 const list_size_type* p_list_sizes;
850
851 // friend classes
852 friend class array_crtp_base<self_type>;
853 friend class mutable_array_base<self_type>;
854 friend class list_array_crtp_base<self_type>;
855 template <class L>
856 friend class list_reference;
857 };
858
859 class fixed_sized_list_array final : public list_array_crtp_base<fixed_sized_list_array>
860 {
861 public:
862
866 using list_size_type = inner_types::list_size_type;
870 using offset_type = std::uint64_t;
871
883 explicit fixed_sized_list_array(arrow_proxy proxy);
884
887
890
902 template <class... ARGS>
905 : self_type(create_proxy(std::forward<ARGS>(args)...))
906 {
907 }
908
909 private:
910
929 template <
931 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
932 [[nodiscard]] static arrow_proxy create_proxy(
933 std::uint64_t list_size,
934 array&& flat_values,
935 R&& validity_input,
936 std::optional<std::string_view> name = std::nullopt,
937 std::optional<METADATA_RANGE> metadata = std::nullopt
938 );
939
958 template <
960 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
961 [[nodiscard]] static arrow_proxy create_proxy(
962 std::uint64_t list_size,
963 array&& flat_values,
964 bool nullable = true,
965 std::optional<std::string_view> name = std::nullopt,
966 std::optional<METADATA_RANGE> metadata = std::nullopt
967 );
968
981 [[nodiscard]] static uint64_t list_size_from_format(const std::string_view format);
982
993 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
994
995 [[nodiscard]] constexpr size_type flat_element_count(size_type list_count) const;
996
997 void replace_value(size_type index, const list_value& value);
998
999 value_iterator insert_value(const_value_iterator pos, const list_value& value, size_type count);
1000
1001 value_iterator erase_values(const_value_iterator pos, size_type count);
1002
1003 uint64_t m_list_size;
1004
1005 // friend classes
1006 friend class array_crtp_base<self_type>;
1007 friend class mutable_array_base<self_type>;
1008 friend class list_array_crtp_base<self_type>;
1009 template <class L>
1010 friend class list_reference;
1011 };
1012
1013 /***************************************
1014 * list_array_crtp_base implementation *
1015 ***************************************/
1016
1017 template <class DERIVED>
1019 : base_type(std::move(proxy))
1020 , p_flat_array(make_flat_array())
1021 {
1022 }
1023
1024 template <class DERIVED>
1026 : base_type(rhs)
1027 , p_flat_array(make_flat_array())
1028 {
1029 }
1030
1031 template <class DERIVED>
1033 {
1035 p_flat_array = make_flat_array();
1036 return *this;
1037 }
1038
1039 template <class DERIVED>
1041 {
1042 return &p_flat_array;
1043 }
1044
1045 template <class DERIVED>
1047 {
1048 return &p_flat_array;
1049 }
1050
1051 template <class DERIVED>
1052 constexpr void list_array_crtp_base<DERIVED>::throw_if_sliced_for_mutation(const char* operation) const
1053 {
1054 if (this->get_arrow_proxy().offset() != 0)
1055 {
1056 throw std::logic_error(std::string(operation) + " does not support sliced arrays (non-zero offset)");
1057 }
1058 }
1059
1060 template <class DERIVED>
1065
1066 template <class DERIVED>
1068 {
1069 return value_iterator(
1071 this->size()
1072 );
1073 }
1074
1075 template <class DERIVED>
1083
1084 template <class DERIVED>
1086 {
1087 return const_value_iterator(
1089 this->size()
1090 );
1091 }
1092
1093 template <class DERIVED>
1094 constexpr auto list_array_crtp_base<DERIVED>::value(size_type i) -> inner_reference
1095 {
1096 return inner_reference(&this->derived_cast(), i);
1097 }
1098
1099 template <class DERIVED>
1100 constexpr auto list_array_crtp_base<DERIVED>::value(size_type i) const -> inner_const_reference
1101 {
1102 const auto r = this->derived_cast().offset_range(i);
1103 using st = typename list_value::size_type;
1104 return list_value{&p_flat_array, static_cast<st>(r.first), static_cast<st>(r.second)};
1105 }
1106
1107 template <class DERIVED>
1108 array list_array_crtp_base<DERIVED>::make_flat_array()
1109 {
1110 auto& child_proxy = this->get_arrow_proxy().children()[0];
1111 return array{&child_proxy.array(), &child_proxy.schema()};
1112 }
1113
1114 template <class DERIVED>
1115 constexpr void
1117 {
1118 if (value.size() == 0)
1119 {
1120 return;
1121 }
1122 SPARROW_ASSERT_TRUE(value.flat_array() != nullptr);
1123 array& flat_array = *raw_flat_array();
1124 flat_array.insert(
1125 flat_array.cbegin() + static_cast<std::ptrdiff_t>(flat_pos),
1126 value.flat_array()->cbegin() + static_cast<std::ptrdiff_t>(value.begin_index()),
1127 value.flat_array()->cbegin() + static_cast<std::ptrdiff_t>(value.end_index()),
1128 count
1129 );
1130 }
1131
1132 template <class DERIVED>
1133 constexpr void
1135 {
1136 if (flat_count == 0)
1137 {
1138 return;
1139 }
1140 array& flat_array = *raw_flat_array();
1141 flat_array.erase(
1142 flat_array.cbegin() + static_cast<std::ptrdiff_t>(flat_begin),
1143 flat_array.cbegin() + static_cast<std::ptrdiff_t>(flat_begin + flat_count)
1144 );
1145 }
1146
1147 template <class DERIVED>
1148 constexpr void list_array_crtp_base<DERIVED>::resize_values(size_type new_length, const list_value& value)
1149 {
1150 DERIVED& derived = static_cast<DERIVED&>(*this);
1151 const size_type n = this->size();
1152 if (new_length < n)
1153 {
1154 derived.erase_values(
1155 sparrow::next(this->value_cbegin(), static_cast<std::ptrdiff_t>(new_length)),
1156 n - new_length
1157 );
1158 }
1159 else if (new_length > n)
1160 {
1161 derived.insert_value(this->value_cend(), value, new_length - n);
1162 }
1163 }
1164
1165 template <class DERIVED>
1166 template <std::input_iterator InputIt>
1167 requires std::convertible_to<typename std::iterator_traits<InputIt>::value_type, list_value>
1168 constexpr auto
1169 list_array_crtp_base<DERIVED>::insert_values(const_value_iterator pos, InputIt first, InputIt last)
1171 {
1172 DERIVED& derived = static_cast<DERIVED&>(*this);
1173 const auto idx = static_cast<size_type>(std::distance(this->value_cbegin(), pos));
1174 size_type count = 0;
1175 for (auto it = first; it != last; ++it, ++count)
1176 {
1177 auto cur_pos = sparrow::next(this->value_cbegin(), static_cast<std::ptrdiff_t>(idx + count));
1178 derived.insert_value(cur_pos, *it, 1);
1179 }
1180 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1181 }
1182
1183 /**********************************
1184 * list_array_impl implementation *
1185 **********************************/
1186
1187#ifdef __GNUC__
1188# pragma GCC diagnostic push
1189# pragma GCC diagnostic ignored "-Wcast-align"
1190#endif
1191
1192 template <bool BIG>
1194 : base_type(std::move(proxy))
1195 , p_list_offsets(make_list_offsets())
1196 {
1197 }
1198
1199 template <bool BIG>
1200 template <std::ranges::range SIZES_RANGE>
1202 {
1204 std::forward<SIZES_RANGE>(sizes)
1205 );
1206 }
1207
1208 template <bool BIG>
1209 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1210 arrow_proxy list_array_impl<BIG>::create_proxy(
1211 array&& flat_values,
1212 offset_buffer_type&& list_offsets,
1213 VB&& validity_input,
1214 std::optional<std::string_view> name,
1215 std::optional<METADATA_RANGE> metadata
1216 )
1217 {
1218 const auto size = list_offsets.size() - 1;
1219 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
1220 const auto null_count = vbitmap.null_count();
1221
1222 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1223
1225 BIG ? std::string("+L") : std::string("+l"),
1226 std::move(flat_schema),
1227 name,
1228 metadata,
1229 true // nullable
1230 );
1231
1232 std::vector<buffer<std::uint8_t>> arr_buffs;
1233 arr_buffs.reserve(2);
1234 arr_buffs.emplace_back(std::move(vbitmap).extract_storage());
1235 arr_buffs.emplace_back(std::move(list_offsets).extract_storage());
1236
1238 static_cast<std::int64_t>(size),
1239 static_cast<std::int64_t>(null_count),
1240 std::move(arr_buffs),
1241 std::move(flat_arr)
1242 );
1243
1244 return arrow_proxy{std::move(arr), std::move(schema)};
1245 }
1246
1247 template <bool BIG>
1248 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1249 arrow_proxy list_array_impl<BIG>::create_proxy(
1250 array&& flat_values,
1251 offset_buffer_type&& list_offsets,
1252 bool nullable,
1253 std::optional<std::string_view> name,
1254 std::optional<METADATA_RANGE> metadata
1255 )
1256 {
1257 if (nullable)
1258 {
1259 return list_array_impl<BIG>::create_proxy(
1260 std::move(flat_values),
1261 std::move(list_offsets),
1263 name,
1264 metadata
1265 );
1266 }
1267
1268 const auto size = list_offsets.size() - 1;
1269 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1270
1271 ArrowSchema schema = detail::make_list_arrow_schema(
1272 BIG ? std::string("+L") : std::string("+l"),
1273 std::move(flat_schema),
1274 name,
1275 metadata,
1276 false // not nullable
1277 );
1278
1279 std::vector<buffer<std::uint8_t>> arr_buffs;
1280 arr_buffs.reserve(2);
1281 arr_buffs.emplace_back(nullptr, 0, buffer<std::uint8_t>::default_allocator()); // no validity bitmap
1282 arr_buffs.emplace_back(std::move(list_offsets).extract_storage());
1283
1284 ArrowArray arr = detail::make_list_arrow_array(
1285 static_cast<std::int64_t>(size),
1286 0, // null_count
1287 std::move(arr_buffs),
1288 std::move(flat_arr)
1289 );
1290
1291 return arrow_proxy{std::move(arr), std::move(schema)};
1292 }
1293
1294 template <bool BIG>
1296 : base_type(rhs)
1297 , p_list_offsets(make_list_offsets())
1298 {
1300 }
1301
1302 template <bool BIG>
1304 {
1306 if (this != &rhs)
1307 {
1309 p_list_offsets = make_list_offsets();
1310 }
1311 return *this;
1312 }
1313
1314 template <bool BIG>
1316 {
1317 base_type::slice_inplace(start, end);
1318 p_list_offsets = make_list_offsets();
1319 }
1320
1321 template <bool BIG>
1322 constexpr auto list_array_impl<BIG>::offset_range(size_type i) const -> std::pair<offset_type, offset_type>
1323 {
1324 return std::make_pair(p_list_offsets[i], p_list_offsets[i + 1]);
1325 }
1326
1327 template <bool BIG>
1328 constexpr auto list_array_impl<BIG>::make_list_offsets() -> offset_type*
1329 {
1330 return this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].template data<offset_type>()
1331 + static_cast<size_type>(this->get_arrow_proxy().offset());
1332 }
1333
1334 template <bool BIG>
1335 constexpr auto
1336 list_array_impl<BIG>::insert_value(const_value_iterator pos, const list_value& value, size_type count)
1337 -> value_iterator
1338 {
1339 using mutable_offset_type = std::remove_const_t<offset_type>;
1340 const auto idx = static_cast<size_type>(std::distance(this->value_cbegin(), pos));
1341 auto& proxy = this->get_arrow_proxy();
1342
1343 this->throw_if_sliced_for_mutation("list_array_impl::insert_value");
1344
1345 auto& offset_buffer = proxy.get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
1346 auto offset_adaptor = make_buffer_adaptor<mutable_offset_type>(offset_buffer);
1347 const size_type n = offset_adaptor.size() - 1;
1348
1349 // Insert flat elements: count copies of value's slice
1350 const auto flat_insert_pos = static_cast<size_type>(p_list_offsets[idx]);
1351 this->insert_flat_elements(flat_insert_pos, value, count);
1352
1353 // Update offset buffer
1354 const auto value_size = value.size();
1355 const auto count_size = count;
1356 SPARROW_ASSERT_TRUE(std::in_range<mutable_offset_type>(value_size));
1357 SPARROW_ASSERT_TRUE(std::in_range<mutable_offset_type>(count_size));
1358 const auto val_sz = static_cast<mutable_offset_type>(value_size);
1359 const auto count_mt = static_cast<mutable_offset_type>(count_size);
1360 const auto max_offset = std::numeric_limits<mutable_offset_type>::max();
1361 // Check multiplication overflow for total_delta = count * val_sz
1362 if (val_sz != mutable_offset_type{})
1363 {
1364 SPARROW_ASSERT_TRUE(count_mt <= max_offset / val_sz);
1365 }
1366 const mutable_offset_type total_delta = count_mt * val_sz;
1367 // Existing offsets should fit in the representable range once delta is applied
1368 const mutable_offset_type existing_max = offset_adaptor[n];
1369 SPARROW_ASSERT_TRUE(existing_max <= max_offset - total_delta);
1370
1371 offset_adaptor.resize(n + 1 + count, mutable_offset_type{});
1372
1373 // Shift existing entries [idx+1..n] to [idx+count+1..n+count], adding delta
1374 for (size_type i = n; i > idx; --i)
1375 {
1376 offset_adaptor[i + count] = offset_adaptor[i] + total_delta;
1377 }
1378
1379 const auto flat_insert_offset = static_cast<mutable_offset_type>(flat_insert_pos);
1380 SPARROW_ASSERT_TRUE(flat_insert_offset <= max_offset - total_delta);
1381 for (size_type k = 1; k <= count; ++k)
1382 {
1383 offset_adaptor[idx + k] = flat_insert_offset + static_cast<mutable_offset_type>(k) * val_sz;
1384 }
1385
1386 proxy.update_buffers();
1387 p_list_offsets = make_list_offsets();
1388 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1389 }
1390
1391 template <bool BIG>
1392 constexpr auto list_array_impl<BIG>::erase_values(const_value_iterator pos, size_type count)
1393 -> value_iterator
1394 {
1395 using mutable_offset_type = std::remove_const_t<offset_type>;
1396 const size_type idx = static_cast<size_type>(std::distance(this->value_cbegin(), pos));
1397 const size_type n = this->size();
1398 auto& proxy = this->get_arrow_proxy();
1399
1400 if (count == 0)
1401 {
1402 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1403 }
1404
1405 this->throw_if_sliced_for_mutation("list_array_impl::erase_values");
1406
1407 // Erase flat elements
1408 const mutable_offset_type flat_erase_begin = p_list_offsets[idx];
1409 const mutable_offset_type flat_erase_end = p_list_offsets[idx + count];
1410 const mutable_offset_type flat_erase_count_val = flat_erase_end - flat_erase_begin;
1411
1412 this->erase_flat_elements(
1413 static_cast<size_type>(flat_erase_begin),
1414 static_cast<size_type>(flat_erase_count_val)
1415 );
1416
1417 // Update offset buffer: remove count entries, shift remaining down
1418 auto& offset_buffer = proxy.get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
1419 auto offset_adaptor = make_buffer_adaptor<mutable_offset_type>(offset_buffer);
1420 std::copy(
1421 offset_adaptor.begin() + static_cast<std::ptrdiff_t>(idx + count + 1),
1422 offset_adaptor.begin() + static_cast<std::ptrdiff_t>(n + 1),
1423 offset_adaptor.begin() + static_cast<std::ptrdiff_t>(idx + 1)
1424 );
1425 auto delta_begin = offset_adaptor.begin() + static_cast<std::ptrdiff_t>(idx + 1);
1426 auto delta_end = offset_adaptor.begin() + static_cast<std::ptrdiff_t>(n + 1 - count);
1427 std::transform(
1428 delta_begin,
1429 delta_end,
1430 delta_begin,
1431 [flat_erase_count_val](mutable_offset_type v)
1432 {
1433 return v - flat_erase_count_val;
1434 }
1435 );
1436 offset_adaptor.resize(n + 1 - count);
1437
1438 proxy.update_buffers();
1439 p_list_offsets = make_list_offsets();
1440 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1441 }
1442
1443 template <bool BIG>
1444 void list_array_impl<BIG>::replace_value(size_type index, const list_value& value)
1445 {
1446 using mutable_offset_type = std::remove_const_t<offset_type>;
1447
1448 this->throw_if_sliced_for_mutation("list_array_impl::replace_value");
1449 const size_type new_size = value.size();
1450 SPARROW_ASSERT_TRUE(std::in_range<mutable_offset_type>(new_size));
1451
1452 const size_type n = this->size();
1453 const auto [flat_begin, flat_end] = offset_range(index);
1454 const auto old_size = static_cast<size_type>(flat_end - flat_begin);
1455 const auto old_size_mt = static_cast<mutable_offset_type>(old_size);
1456 const auto new_size_mt = static_cast<mutable_offset_type>(new_size);
1457
1458 array source_owner;
1459 const array* source = value.flat_array();
1460 if (source != nullptr && this->raw_flat_array() == source)
1461 {
1462 source_owner = *source;
1463 source = &source_owner;
1464 }
1465
1466 if (old_size == new_size)
1467 {
1468 if (new_size == 0)
1469 {
1470 return;
1471 }
1472 // Same element count: overwrite flat data in-place, no offset adjustment needed.
1473 SPARROW_ASSERT_TRUE(source != nullptr);
1474 const auto flat_begin_index = static_cast<size_type>(flat_begin);
1475 this->erase_flat_elements(flat_begin_index, old_size);
1476 this->insert_flat_elements(
1477 flat_begin_index,
1478 list_value{source, value.begin_index(), value.end_index()},
1479 1
1480 );
1481 auto& proxy_eq = this->get_arrow_proxy();
1482 proxy_eq.update_buffers();
1483 p_list_offsets = make_list_offsets();
1484 return;
1485 }
1486
1487 this->erase_flat_elements(static_cast<size_type>(flat_begin), old_size);
1488 if (new_size > 0)
1489 {
1490 SPARROW_ASSERT_TRUE(source != nullptr);
1491 this->insert_flat_elements(
1492 static_cast<size_type>(flat_begin),
1493 list_value{source, value.begin_index(), value.end_index()},
1494 1
1495 );
1496 }
1497
1498 auto& proxy = this->get_arrow_proxy();
1499 auto& offset_buffer = proxy.get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
1500 auto offset_adaptor = make_buffer_adaptor<mutable_offset_type>(offset_buffer);
1501
1502 // delta != 0 here (equal-size case already returned above)
1503 auto tail_begin = offset_adaptor.begin() + static_cast<std::ptrdiff_t>(index + 1);
1504 auto tail_end = offset_adaptor.begin() + static_cast<std::ptrdiff_t>(n + 1);
1505 if (new_size_mt > old_size_mt)
1506 {
1507 const mutable_offset_type delta = new_size_mt - old_size_mt;
1508 const auto max_offset = std::numeric_limits<mutable_offset_type>::max();
1509 SPARROW_ASSERT_TRUE(offset_adaptor[n] <= max_offset - delta);
1510 std::transform(
1511 tail_begin,
1512 tail_end,
1513 tail_begin,
1514 [delta](mutable_offset_type v)
1515 {
1516 return v + delta;
1517 }
1518 );
1519 }
1520 else
1521 {
1522 const mutable_offset_type delta = old_size_mt - new_size_mt;
1523 std::transform(
1524 tail_begin,
1525 tail_end,
1526 tail_begin,
1527 [delta](mutable_offset_type v)
1528 {
1529 return v - delta;
1530 }
1531 );
1532 }
1533
1534 proxy.update_buffers();
1535 p_list_offsets = make_list_offsets();
1536 }
1537
1538 /***************************************
1539 * list_view_array_impl implementation *
1540 ***************************************/
1541
1542 template <bool BIG>
1544 : base_type(std::move(proxy))
1545 , p_list_offsets(make_list_offsets())
1546 , p_list_sizes(make_list_sizes())
1547 {
1548 }
1549
1550 template <bool BIG>
1551 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1552 arrow_proxy list_view_array_impl<BIG>::create_proxy(
1553 array&& flat_values,
1554 offset_buffer_type&& list_offsets,
1555 size_buffer_type&& list_sizes,
1556 VB&& validity_input,
1557 std::optional<std::string_view> name,
1558 std::optional<METADATA_RANGE> metadata
1559 )
1560 {
1561 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(), "sizes and offset must have the same size");
1562 const auto size = list_sizes.size();
1563 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
1564 const auto null_count = vbitmap.null_count();
1565
1566 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1567
1569 BIG ? std::string("+vL") : std::string("+vl"),
1570 std::move(flat_schema),
1571 name,
1572 metadata,
1573 true // nullable
1574 );
1575
1576 std::vector<buffer<std::uint8_t>> arr_buffs;
1577 arr_buffs.reserve(3);
1578 arr_buffs.emplace_back(std::move(vbitmap).extract_storage());
1579 arr_buffs.emplace_back(std::move(list_offsets).extract_storage());
1580 arr_buffs.emplace_back(std::move(list_sizes).extract_storage());
1581
1583 static_cast<std::int64_t>(size),
1584 static_cast<std::int64_t>(null_count),
1585 std::move(arr_buffs),
1586 std::move(flat_arr)
1587 );
1588
1589 return arrow_proxy{std::move(arr), std::move(schema)};
1590 }
1591
1592 template <bool BIG>
1593 template <input_metadata_container METADATA_RANGE>
1594 arrow_proxy list_view_array_impl<BIG>::create_proxy(
1595 array&& flat_values,
1596 offset_buffer_type&& list_offsets,
1597 size_buffer_type&& list_sizes,
1598 bool nullable,
1599 std::optional<std::string_view> name,
1600 std::optional<METADATA_RANGE> metadata
1601 )
1602 {
1603 if (nullable)
1604 {
1605 return list_view_array_impl<BIG>::create_proxy(
1606 std::move(flat_values),
1607 std::move(list_offsets),
1608 std::move(list_sizes),
1610 name,
1611 metadata
1612 );
1613 }
1614
1615 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(), "sizes and offset must have the same size");
1616 const auto size = list_sizes.size();
1617 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1618
1619 ArrowSchema schema = detail::make_list_arrow_schema(
1620 BIG ? std::string("+vL") : std::string("+vl"),
1621 std::move(flat_schema),
1622 name,
1623 metadata,
1624 false // not nullable
1625 );
1626
1627 std::vector<buffer<std::uint8_t>> arr_buffs;
1628 arr_buffs.reserve(3);
1629 arr_buffs.emplace_back(nullptr, 0, buffer<std::uint8_t>::default_allocator()); // no validity bitmap
1630 arr_buffs.emplace_back(std::move(list_offsets).extract_storage());
1631 arr_buffs.emplace_back(std::move(list_sizes).extract_storage());
1632
1633 ArrowArray arr = detail::make_list_arrow_array(
1634 static_cast<std::int64_t>(size),
1635 0, // null_count
1636 std::move(arr_buffs),
1637 std::move(flat_arr)
1638 );
1639
1640 return arrow_proxy{std::move(arr), std::move(schema)};
1641 }
1642
1643 template <bool BIG>
1645 : base_type(rhs)
1646 , p_list_offsets(make_list_offsets())
1647 , p_list_sizes(make_list_sizes())
1648 {
1650 }
1651
1652 template <bool BIG>
1654 {
1656 if (this != &rhs)
1657 {
1659 p_list_offsets = make_list_offsets();
1660 p_list_sizes = make_list_sizes();
1661 }
1662 return *this;
1663 }
1664
1665 template <bool BIG>
1667 {
1668 base_type::slice_inplace(start, end);
1669 p_list_offsets = make_list_offsets();
1670 p_list_sizes = make_list_sizes();
1671 }
1672
1673 template <bool BIG>
1674 inline constexpr auto list_view_array_impl<BIG>::offset_range(size_type i) const
1675 -> std::pair<offset_type, offset_type>
1676 {
1677 const auto offset = p_list_offsets[i];
1678 SPARROW_ASSERT_TRUE(std::in_range<std::remove_const_t<offset_type>>(p_list_sizes[i]));
1679 return std::make_pair(offset, offset + static_cast<offset_type>(p_list_sizes[i]));
1680 }
1681
1682 template <bool BIG>
1683 constexpr auto list_view_array_impl<BIG>::make_list_offsets() const -> offset_type*
1684 {
1685 return this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].template data<offset_type>()
1686 + static_cast<size_type>(this->get_arrow_proxy().offset());
1687 }
1688
1689 template <bool BIG>
1690 constexpr auto list_view_array_impl<BIG>::make_list_sizes() const -> const list_size_type*
1691 {
1692 return this->get_arrow_proxy().buffers()[SIZES_BUFFER_INDEX].template data<list_size_type>()
1693 + static_cast<size_type>(this->get_arrow_proxy().offset());
1694 }
1695
1696 template <bool BIG>
1697 constexpr auto
1698 list_view_array_impl<BIG>::insert_value(const_value_iterator pos, const list_value& value, size_type count)
1699 -> value_iterator
1700 {
1701 using mutable_offset_type = std::remove_const_t<offset_type>;
1702 using mutable_size_type = std::remove_const_t<list_size_type>;
1703 const size_type idx = static_cast<size_type>(std::distance(this->value_cbegin(), pos));
1704
1705 this->throw_if_sliced_for_mutation("list_view_array_impl::insert_value");
1706
1707 // Append new element(s) at the end of the flat array
1708 SPARROW_ASSERT_TRUE(std::in_range<mutable_offset_type>(value.size()));
1709 SPARROW_ASSERT_TRUE(std::in_range<mutable_size_type>(value.size()));
1710 const mutable_offset_type val_sz = static_cast<mutable_offset_type>(value.size());
1711 const size_type flat_append_pos = this->raw_flat_array()->size();
1712
1713 this->insert_flat_elements(flat_append_pos, value, count);
1714
1715 auto& proxy = this->get_arrow_proxy();
1716
1717 // Insert count new (offset, size) entries in the offset and sizes buffers at position idx
1718 const size_type n = this->size();
1719
1720 auto& offset_buffer = proxy.get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
1721 auto offset_adaptor = make_buffer_adaptor<mutable_offset_type>(offset_buffer);
1722 offset_adaptor.resize(n + count);
1723
1724 auto& sizes_buffer = proxy.get_array_private_data()->buffers()[SIZES_BUFFER_INDEX];
1725 auto sizes_adaptor = make_buffer_adaptor<mutable_size_type>(sizes_buffer);
1726 sizes_adaptor.resize(n + count);
1727
1728 // Shift existing entries [idx..n) → [idx+count..n+count): pure backward copy, no delta.
1729 // std::copy_backward lowers to memmove for trivially-copyable offset/size types.
1730 std::copy_backward(
1731 offset_adaptor.begin() + static_cast<std::ptrdiff_t>(idx),
1732 offset_adaptor.begin() + static_cast<std::ptrdiff_t>(n),
1733 offset_adaptor.begin() + static_cast<std::ptrdiff_t>(n + count)
1734 );
1735 std::copy_backward(
1736 sizes_adaptor.begin() + static_cast<std::ptrdiff_t>(idx),
1737 sizes_adaptor.begin() + static_cast<std::ptrdiff_t>(n),
1738 sizes_adaptor.begin() + static_cast<std::ptrdiff_t>(n + count)
1739 );
1740
1741 // Write new entries for the count inserted lists
1742 // Before writing, assert that the computed offsets are representable
1743 if (count > 0 && val_sz > 0)
1744 {
1745 const auto base_offset_ull = static_cast<unsigned long long>(flat_append_pos);
1746 const auto step_ull = static_cast<unsigned long long>(val_sz);
1747 const auto max_k_ull = static_cast<unsigned long long>(count - 1);
1748 const auto max_offset_ull = static_cast<unsigned long long>(
1749 std::numeric_limits<mutable_offset_type>::max()
1750 );
1751 const auto last_offset_ull = base_offset_ull + max_k_ull * step_ull;
1752 SPARROW_ASSERT_TRUE(base_offset_ull <= max_offset_ull);
1753 SPARROW_ASSERT_TRUE(last_offset_ull <= max_offset_ull);
1754 }
1755 for (size_type k = 0; k < count; ++k)
1756 {
1757 offset_adaptor[idx + k] = static_cast<mutable_offset_type>(flat_append_pos)
1758 + static_cast<mutable_offset_type>(k) * val_sz;
1759 sizes_adaptor[idx + k] = static_cast<mutable_size_type>(val_sz);
1760 }
1761
1762 proxy.update_buffers();
1763 p_list_offsets = make_list_offsets();
1764 p_list_sizes = make_list_sizes();
1765 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1766 }
1767
1768 template <bool BIG>
1769 template <std::input_iterator InputIt>
1770 requires std::convertible_to<typename std::iterator_traits<InputIt>::value_type, list_value>
1771 constexpr auto
1772 list_view_array_impl<BIG>::insert_values(const_value_iterator pos, InputIt first, InputIt last)
1774 {
1775 const size_type idx = static_cast<size_type>(std::distance(this->value_cbegin(), pos));
1776 auto& proxy = this->get_arrow_proxy();
1777 const size_type original_size = this->size();
1778 if constexpr (std::forward_iterator<InputIt>)
1779 {
1780 const auto count = static_cast<size_type>(std::distance(first, last));
1781 if (count == 0)
1782 {
1783 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1784 }
1785 size_type inserted_count = 0;
1786 // Temporarily extend logical length once for the whole bulk insertion.
1787 proxy.set_length(original_size + count);
1788 for (auto it = first; it != last; ++it)
1789 {
1790 auto cur_pos = sparrow::next(
1791 this->value_cbegin(),
1792 static_cast<std::ptrdiff_t>(idx + inserted_count)
1793 );
1794 insert_value(cur_pos, *it, 1);
1795 ++inserted_count;
1796 }
1797 proxy.set_length(original_size);
1798 }
1799 else
1800 {
1801 size_type inserted_count = 0;
1802 for (auto it = first; it != last; ++it)
1803 {
1804 auto cur_pos = sparrow::next(
1805 this->value_cbegin(),
1806 static_cast<std::ptrdiff_t>(idx + inserted_count)
1807 );
1808 insert_value(cur_pos, *it, 1);
1809 ++inserted_count;
1810 proxy.set_length(original_size + inserted_count);
1811 }
1812 proxy.set_length(original_size);
1813 }
1814 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1815 }
1816
1817 template <bool BIG>
1818 constexpr auto list_view_array_impl<BIG>::erase_values(const_value_iterator pos, size_type count)
1819 -> value_iterator
1820 {
1821 using mutable_offset_type = std::remove_const_t<offset_type>;
1822 using mutable_size_type = std::remove_const_t<list_size_type>;
1823 const size_type idx = static_cast<size_type>(std::distance(this->value_cbegin(), pos));
1824 const size_type n = this->size();
1825
1826 if (count == 0)
1827 {
1828 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1829 }
1830
1831 this->throw_if_sliced_for_mutation("list_view_array_impl::erase_values");
1832
1833 auto& proxy = this->get_arrow_proxy();
1834 auto& offset_buffer = proxy.get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
1835 auto offset_adaptor = make_buffer_adaptor<mutable_offset_type>(offset_buffer);
1836
1837 auto& sizes_buffer = proxy.get_array_private_data()->buffers()[SIZES_BUFFER_INDEX];
1838 auto sizes_adaptor = make_buffer_adaptor<mutable_size_type>(sizes_buffer);
1839
1840 // Shift entries [idx+count..n) to [idx..n-count)
1841 std::copy(
1842 offset_adaptor.begin() + static_cast<std::ptrdiff_t>(idx + count),
1843 offset_adaptor.end(),
1844 offset_adaptor.begin() + static_cast<std::ptrdiff_t>(idx)
1845 );
1846 std::copy(
1847 sizes_adaptor.begin() + static_cast<std::ptrdiff_t>(idx + count),
1848 sizes_adaptor.end(),
1849 sizes_adaptor.begin() + static_cast<std::ptrdiff_t>(idx)
1850 );
1851 offset_adaptor.resize(n - count);
1852 sizes_adaptor.resize(n - count);
1853 proxy.update_buffers();
1854 p_list_offsets = make_list_offsets();
1855 p_list_sizes = make_list_sizes();
1856 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1857 }
1858
1859 template <bool BIG>
1860 void list_view_array_impl<BIG>::replace_value(size_type index, const list_value& value)
1861 {
1862 using mutable_offset_type = std::remove_const_t<offset_type>;
1863 using mutable_size_type = std::remove_const_t<list_size_type>;
1864
1865 this->throw_if_sliced_for_mutation("list_view_array_impl::replace_value");
1866
1867 SPARROW_ASSERT_TRUE(std::in_range<mutable_offset_type>(value.size()));
1868 SPARROW_ASSERT_TRUE(std::in_range<mutable_size_type>(value.size()));
1869
1870 const auto value_size = static_cast<mutable_offset_type>(value.size());
1871 const size_type flat_append_pos = this->raw_flat_array()->size();
1872 SPARROW_ASSERT_TRUE(std::in_range<mutable_offset_type>(flat_append_pos));
1873
1874 this->insert_flat_elements(flat_append_pos, value, 1);
1875
1876 auto& proxy = this->get_arrow_proxy();
1877 auto& offset_buffer = proxy.get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
1878 auto offset_adaptor = make_buffer_adaptor<mutable_offset_type>(offset_buffer);
1879 auto& sizes_buffer = proxy.get_array_private_data()->buffers()[SIZES_BUFFER_INDEX];
1880 auto sizes_adaptor = make_buffer_adaptor<mutable_size_type>(sizes_buffer);
1881 offset_adaptor[index] = static_cast<mutable_offset_type>(flat_append_pos);
1882 sizes_adaptor[index] = static_cast<mutable_size_type>(value_size);
1883 proxy.update_buffers();
1884 p_list_offsets = make_list_offsets();
1885 p_list_sizes = make_list_sizes();
1886 }
1887
1888#ifdef __GNUC__
1889# pragma GCC diagnostic pop
1890#endif
1891
1892 /*****************************************
1893 * fixed_sized_list_array implementation *
1894 *****************************************/
1895
1896 inline auto fixed_sized_list_array::list_size_from_format(const std::string_view format) -> uint64_t
1897 {
1898 SPARROW_ASSERT(format.size() >= 3, "Invalid format string");
1899 const auto n_digits = format.size() - 3;
1900 const auto list_size_str = format.substr(3, n_digits);
1901 return std::stoull(std::string(list_size_str));
1902 }
1903
1905 : base_type(std::move(proxy))
1906 , m_list_size(fixed_sized_list_array::list_size_from_format(this->get_arrow_proxy().format()))
1907 {
1908 }
1909
1911 : base_type(rhs)
1912 , m_list_size(rhs.m_list_size)
1913 {
1915 }
1916
1918 {
1920 if (this != &rhs)
1921 {
1923 m_list_size = rhs.m_list_size;
1924 }
1925 return *this;
1926 }
1927
1928 constexpr auto fixed_sized_list_array::offset_range(size_type i) const
1929 -> std::pair<offset_type, offset_type>
1930 {
1931 const auto offset = (i + this->offset()) * m_list_size;
1932 return std::make_pair(offset, offset + m_list_size);
1933 }
1934
1935 constexpr auto fixed_sized_list_array::flat_element_count(size_type list_count) const -> size_type
1936 {
1937 const offset_type max_flat_count = static_cast<offset_type>(std::numeric_limits<size_type>::max());
1939 m_list_size == 0 || static_cast<offset_type>(list_count) <= max_flat_count / m_list_size
1940 );
1941 return static_cast<size_type>(static_cast<offset_type>(list_count) * m_list_size);
1942 }
1943
1944 inline auto
1945 fixed_sized_list_array::insert_value(const_value_iterator pos, const list_value& value, size_type count)
1946 -> value_iterator
1947 {
1948 SPARROW_ASSERT_TRUE(value.size() == m_list_size);
1949 const auto idx = static_cast<size_type>(std::distance(this->value_cbegin(), pos));
1950
1951 this->throw_if_sliced_for_mutation("fixed_sized_list_array::insert_value");
1952
1953 const size_type flat_insert_pos = flat_element_count(idx);
1954
1955 this->insert_flat_elements(flat_insert_pos, value, count);
1956
1957 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1958 }
1959
1960 inline auto fixed_sized_list_array::erase_values(const_value_iterator pos, size_type count) -> value_iterator
1961 {
1962 const auto idx = static_cast<size_type>(std::distance(this->value_cbegin(), pos));
1963 if (count == 0)
1964 {
1965 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1966 }
1967
1968 this->throw_if_sliced_for_mutation("fixed_sized_list_array::erase_values");
1969
1970 this->erase_flat_elements(flat_element_count(idx), flat_element_count(count));
1971 return sparrow::next(this->value_begin(), static_cast<std::ptrdiff_t>(idx));
1972 }
1973
1974 inline void fixed_sized_list_array::replace_value(size_type index, const list_value& value)
1975 {
1976 SPARROW_ASSERT_TRUE(value.size() == m_list_size);
1977
1978 this->throw_if_sliced_for_mutation("fixed_sized_list_array::replace_value");
1979
1980 if (m_list_size == 0)
1981 {
1982 return;
1983 }
1984
1985 array source_owner;
1986 const array* source = value.flat_array();
1987 if (source != nullptr && this->raw_flat_array() == source)
1988 {
1989 source_owner = *source;
1990 source = &source_owner;
1991 }
1992
1993 SPARROW_ASSERT_TRUE(source != nullptr);
1994 const size_type flat_index = flat_element_count(index);
1995 this->erase_flat_elements(flat_index, static_cast<size_type>(m_list_size));
1996 this->insert_flat_elements(flat_index, list_value{source, value.begin_index(), value.end_index()}, 1);
1997 }
1998
1999 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
2000 inline arrow_proxy fixed_sized_list_array::create_proxy(
2001 std::uint64_t list_size,
2002 array&& flat_values,
2003 R&& validity_input,
2004 std::optional<std::string_view> name,
2005 std::optional<METADATA_RANGE> metadata
2006 )
2007 {
2008 const auto size = flat_values.size() / static_cast<std::size_t>(list_size);
2009 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<R>(validity_input));
2010 const auto null_count = vbitmap.null_count();
2011
2012 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
2013
2014 std::string format = "+w:" + std::to_string(list_size);
2015 ArrowSchema schema = detail::make_list_arrow_schema(
2016 std::move(format),
2017 std::move(flat_schema),
2018 name,
2019 metadata,
2020 true // nullable
2021 );
2022
2023 std::vector<buffer<std::uint8_t>> arr_buffs;
2024 arr_buffs.reserve(1);
2025 arr_buffs.emplace_back(vbitmap.extract_storage());
2026
2027 ArrowArray arr = detail::make_list_arrow_array(
2028 static_cast<std::int64_t>(size),
2029 static_cast<std::int64_t>(null_count),
2030 std::move(arr_buffs),
2031 std::move(flat_arr)
2032 );
2033
2034 return arrow_proxy{std::move(arr), std::move(schema)};
2035 }
2036
2037 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
2038 inline arrow_proxy fixed_sized_list_array::create_proxy(
2039 std::uint64_t list_size,
2040 array&& flat_values,
2041 bool nullable,
2042 std::optional<std::string_view> name,
2043 std::optional<METADATA_RANGE> metadata
2044 )
2045 {
2046 if (nullable)
2047 {
2048 return fixed_sized_list_array::create_proxy(
2049 list_size,
2050 std::move(flat_values),
2052 name,
2053 metadata
2054 );
2055 }
2056
2057 const auto size = flat_values.size() / static_cast<std::size_t>(list_size);
2058 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
2059
2060 std::string format = "+w:" + std::to_string(list_size);
2061 ArrowSchema schema = detail::make_list_arrow_schema(
2062 std::move(format),
2063 std::move(flat_schema),
2064 name,
2065 metadata,
2066 false // not nullable
2067 );
2068
2069 std::vector<buffer<std::uint8_t>> arr_buffs;
2070 arr_buffs.reserve(1);
2071 arr_buffs.emplace_back(nullptr, 0, buffer<std::uint8_t>::default_allocator()); // no validity bitmap
2072
2073 ArrowArray arr = detail::make_list_arrow_array(
2074 static_cast<std::int64_t>(size),
2075 0, // null_count
2076 std::move(arr_buffs),
2077 std::move(flat_arr)
2078 );
2079
2080 return arrow_proxy{std::move(arr), std::move(schema)};
2081 }
2082}
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
constexpr array_bitmap_base_impl & operator=(const array_bitmap_base_impl &)
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:50
SPARROW_API iterator erase(const_iterator pos)
Inserts a copy of value before pos in the array, repeating the insertion count times.
auto children() const
SPARROW_API iterator insert(const_iterator pos, const_iterator first, const_iterator last)
Inserts elements from the range [first, last) before the element at the specified position.
SPARROW_API const_iterator cbegin() const
Returns a constant iterator to the beginning of the array.
Object that owns a piece of contiguous memory.
Definition buffer.hpp:131
xsimd::aligned_allocator< T > default_allocator
Definition buffer.hpp:144
constexpr size_type null_count() const noexcept
Returns the number of bits set to false (null/invalid).
typename storage_type::default_allocator default_allocator
typename base_type::value_iterator value_iterator
inner_types::list_size_type list_size_type
array_inner_types< self_type > inner_types
fixed_sized_list_array(arrow_proxy proxy)
Constructs fixed size list array from Arrow proxy.
fixed_sized_list_array(ARGS &&... args)
Generic constructor for creating fixed size list array.
list_array_crtp_base< self_type > base_type
fixed_sized_list_array self_type
fixed_sized_list_array & operator=(self_type &&)=default
typename base_type::const_value_iterator const_value_iterator
fixed_sized_list_array(self_type &&)=default
fixed_sized_list_array & operator=(const self_type &)
typename base_type::size_type size_type
CRTP base class for all list array implementations.
constexpr void throw_if_sliced_for_mutation(const char *operation) const
typename base_type::const_bitmap_range const_bitmap_range
constexpr list_array_crtp_base & operator=(const self_type &)
Copy assignment operator.
constexpr array * raw_flat_array()
Gets mutable access to the underlying flat array.
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename inner_types::const_value_iterator const_value_iterator
typename base_type::bitmap_const_reference bitmap_const_reference
constexpr void insert_flat_elements(size_type flat_pos, const list_value &value, size_type count)
typename inner_types::inner_const_reference inner_const_reference
typename base_type::iterator_tag iterator_tag
mutable_array_bitmap_base< DERIVED > base_type
typename base_type::bitmap_reference bitmap_reference
list_array_crtp_base(arrow_proxy proxy)
Constructs list array base from Arrow proxy.
constexpr list_array_crtp_base(const self_type &)
Copy constructor.
constexpr const_value_iterator value_cbegin() const
constexpr void erase_flat_elements(size_type flat_begin, size_type flat_count)
typename inner_types::value_iterator value_iterator
constexpr const_value_iterator value_cend() const
typename base_type::bitmap_type bitmap_type
list_array_crtp_base< DERIVED > self_type
typename base_type::size_type size_type
array_inner_types< DERIVED > inner_types
nullable< inner_reference, bitmap_reference > reference
nullable< inner_value_type > value_type
typename inner_types::inner_reference inner_reference
constexpr const array * raw_flat_array() const
Gets read-only access to the underlying flat array.
constexpr list_array_crtp_base(self_type &&) noexcept=default
list_array_impl< BIG > self_type
constexpr void slice_inplace(size_type start, size_type end)
constexpr list_array_impl(const self_type &)
Copy constructor.
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
typename base_type::size_type size_type
constexpr list_array_impl & operator=(const self_type &)
Copy assignment operator.
typename base_type::value_iterator value_iterator
array_inner_types< self_type > inner_types
constexpr list_array_impl(self_type &&) noexcept=default
static constexpr auto offset_from_sizes(SIZES_RANGE &&sizes) -> offset_buffer_type
Creates offset buffer from list sizes.
typename base_type::const_value_iterator const_value_iterator
inner_types::list_size_type list_size_type
list_array_crtp_base< list_array_impl< BIG > > base_type
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_array_impl(arrow_proxy proxy)
Constructs list array from Arrow proxy.
size_type size() const
Gets the number of elements in the list.
std::size_t size_type
constexpr list_view_array_impl & operator=(self_type &&)=default
typename base_type::size_type size_type
constexpr void slice_inplace(size_type start, size_type end)
constexpr list_view_array_impl(self_type &&)=default
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_view_array_impl(arrow_proxy proxy)
Constructs list view array from Arrow proxy.
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
array_inner_types< self_type > inner_types
list_array_crtp_base< list_view_array_impl< BIG > > base_type
typename base_type::const_value_iterator const_value_iterator
typename base_type::value_iterator value_iterator
list_view_array_impl(ARGS &&... args)
Generic constructor for creating list view array from various inputs.
list_view_array_impl< BIG > self_type
constexpr list_view_array_impl(const self_type &)
Copy constructor.
inner_types::list_size_type list_size_type
u8_buffer< std::remove_const_t< list_size_type > > size_buffer_type
constexpr list_view_array_impl & operator=(const self_type &)
Copy assignment operator.
Base class definining common interface for arrays with a bitmap.
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
Concept for input containers that can provide metadata pairs.
Definition metadata.hpp:332
Concept defining valid input types for validity bitmap creation.
#define SPARROW_ASSERT_TRUE(expr__)
#define SPARROW_ASSERT(expr__, message__)
SPARROW_API void increase(const std::string &key)
std::string key< fixed_sized_list_array >()
SPARROW_API int count(const std::string &key, int disabled_value=0)
std::string key()
Definition buffer.hpp:49
ArrowArray make_list_arrow_array(std::int64_t size, std::int64_t null_count, std::vector< buffer< std::uint8_t > > &&arr_buffs, ArrowArray &&flat_arr)
ArrowSchema make_list_arrow_schema(std::string format, ArrowSchema &&flat_schema, std::optional< std::string_view > name, std::optional< METADATA_RANGE > metadata, bool nullable)
constexpr sparrow::u8_buffer< OFFSET_TYPE > offset_buffer_from_sizes(SIZES_RANGE &&sizes)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
Definition mp_utils.hpp:216
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient alias for arrays with mutable validity bitmaps.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_list_view_array_v
Checks whether T is a list_view_array type.
list_array_impl< false > list_array
A list array implementation.
constexpr InputIt next(InputIt it, Distance n)
Definition iterator.hpp:605
constexpr bool is_fixed_sized_list_array_v
Checks whether T is a fixed_sized_list_array type.
list_view_array_impl< true > big_list_view_array
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:110
constexpr bool is_big_list_array_v
Checks whether T is a big_list_array type.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
list_view_array_impl< false > list_view_array
A list view array implementation.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
constexpr bool is_list_array_v
Checks whether T is a list_array type.
list_array_impl< true > big_list_array
A big list array implementation.
auto make_buffer_adaptor(FromBufferRef &buf)
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
constexpr bool is_big_list_view_array_v
Checks whether T is a big_list_view_array type.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
Extensions to the C++ standard library.
functor_index_iterator< detail::layout_value_functor< const array_type, inner_const_reference > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_reference > > value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
functor_index_iterator< detail::layout_value_functor< const array_type, inner_const_reference > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_reference > > value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_reference > > value_iterator
functor_index_iterator< detail::layout_value_functor< const array_type, inner_const_reference > > const_value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Metafunction for retrieving the data_type of a typed array.