sparrow 1.4.0
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
list_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <ranges>
18#include <string> // for std::stoull
19#include <type_traits>
20#include <vector>
21
22#include "sparrow/array_api.hpp"
35
36namespace sparrow
37{
38 template <class DERIVED>
40
41 template <bool BIG>
42 class list_array_impl;
43
44 template <bool BIG>
46
69
84
86
90 template <class T>
91 constexpr bool is_list_array_v = std::same_as<T, list_array>;
92
96 template <class T>
97 constexpr bool is_big_list_array_v = std::same_as<T, big_list_array>;
98
102 template <class T>
103 constexpr bool is_list_view_array_v = std::same_as<T, list_view_array>;
104
108 template <class T>
109 constexpr bool is_big_list_view_array_v = std::same_as<T, big_list_view_array>;
110
114 template <class T>
115 constexpr bool is_fixed_sized_list_array_v = std::same_as<T, fixed_sized_list_array>;
116
117 namespace detail
118 {
119 template <bool BIG>
121 {
122 [[nodiscard]] static constexpr sparrow::data_type get()
123 {
125 }
126 };
127
128 template <bool BIG>
130 {
131 [[nodiscard]] static constexpr sparrow::data_type get()
132 {
134 }
135 };
136
137 template <>
139 {
140 [[nodiscard]] static constexpr sparrow::data_type get()
141 {
143 }
144 };
145
146 // Helper to build arrow schema for list arrays
147 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
149 std::string format,
150 ArrowSchema&& flat_schema,
151 std::optional<std::string_view> name,
152 std::optional<METADATA_RANGE> metadata,
153 bool nullable
154 )
155 {
157 std::optional<std::unordered_set<ArrowFlag>>
158 flags = nullable ? std::optional<std::unordered_set<ArrowFlag>>{{ArrowFlag::NULLABLE}}
159 : std::nullopt;
160
161 return make_arrow_schema(
162 std::move(format),
163 name,
164 metadata,
165 flags,
166 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))},
168 nullptr, // dictionary
169 true // dictionary ownership
170 );
171 }
172
173 // Helper to build arrow array for list arrays
175 std::int64_t size,
176 std::int64_t null_count,
177 std::vector<buffer<std::uint8_t>>&& arr_buffs,
178 ArrowArray&& flat_arr
179 )
180 {
182 return make_arrow_array(
183 size,
184 null_count,
185 0, // offset
186 std::move(arr_buffs),
187 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))},
189 nullptr, // dictionary
190 true // dictionary ownership
191 );
192 }
193 }
194
195 template <bool BIG>
208
209 template <bool BIG>
222
223 template <>
236
251 template <class DERIVED>
253 {
254 public:
255
259 using value_iterator = typename inner_types::value_iterator;
260 using const_value_iterator = typename inner_types::const_value_iterator;
262
265
267
271
275
283 [[nodiscard]] constexpr const array_wrapper* raw_flat_array() const;
284
292 [[nodiscard]] constexpr array_wrapper* raw_flat_array();
293
294 protected:
295
305
316
329
330 constexpr list_array_crtp_base(self_type&&) noexcept = default;
331 constexpr list_array_crtp_base& operator=(self_type&&) noexcept = default;
332
333 private:
334
335 using list_size_type = inner_types::list_size_type;
336
337 [[nodiscard]] constexpr value_iterator value_begin();
338 [[nodiscard]] constexpr value_iterator value_end();
339 [[nodiscard]] constexpr const_value_iterator value_cbegin() const;
340 [[nodiscard]] constexpr const_value_iterator value_cend() const;
341
342 [[nodiscard]] constexpr inner_reference value(size_type i);
343 [[nodiscard]] constexpr inner_const_reference value(size_type i) const;
344
345 [[nodiscard]] cloning_ptr<array_wrapper> make_flat_array();
346
347 // data members
349
350 // friend classes
351 friend class array_crtp_base<DERIVED>;
352
353 // needs access to this->value(i)
354 friend class detail::layout_value_functor<DERIVED, inner_value_type>;
355 friend class detail::layout_value_functor<const DERIVED, inner_value_type>;
356 };
357
358 template <bool BIG>
360 {
361 public:
362
366 using list_size_type = inner_types::list_size_type;
368 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
370
383
393 constexpr list_array_impl(const self_type&);
394
407
408 constexpr list_array_impl(self_type&&) noexcept = default;
409 constexpr list_array_impl& operator=(self_type&&) noexcept = default;
410
422 template <class... ARGS>
423 requires(mpl::excludes_copy_and_move_ctor_v<list_array_impl<BIG>, ARGS...>)
424 explicit list_array_impl(ARGS&&... args)
425 : self_type(create_proxy(std::forward<ARGS>(args)...))
426 {
427 }
428
446 template <std::ranges::range SIZES_RANGE>
447 [[nodiscard]] static constexpr auto offset_from_sizes(SIZES_RANGE&& sizes) -> offset_buffer_type;
448
449 private:
450
470 template <
472 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
473 [[nodiscard]] static arrow_proxy create_proxy(
474 array&& flat_values,
475 offset_buffer_type&& list_offsets,
476 VB&& validity_input,
477 std::optional<std::string_view> name = std::nullopt,
478 std::optional<METADATA_RANGE> metadata = std::nullopt
479 );
480
499 template <
501 std::ranges::input_range OFFSET_BUFFER_RANGE,
502 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
503 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
504 [[nodiscard]] static arrow_proxy create_proxy(
505 array&& flat_values,
506 OFFSET_BUFFER_RANGE&& list_offsets_range,
507 VB&& validity_input,
508 std::optional<std::string_view> name = std::nullopt,
509 std::optional<METADATA_RANGE> metadata = std::nullopt
510 )
511 {
512 offset_buffer_type list_offsets{std::forward<OFFSET_BUFFER_RANGE>(list_offsets_range)};
513 return list_array_impl<BIG>::create_proxy(
514 std::move(flat_values),
515 std::move(list_offsets),
516 std::forward<VB>(validity_input),
517 std::forward<std::optional<std::string_view>>(name),
518 std::forward<std::optional<METADATA_RANGE>>(metadata)
519 );
520 }
521
522 template <
523 validity_bitmap_input VB = validity_bitmap,
524 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
525 [[nodiscard]] static arrow_proxy create_proxy(
526 array&& flat_values,
527 offset_buffer_type&& list_offsets,
528 bool nullable = true,
529 std::optional<std::string_view> name = std::nullopt,
530 std::optional<METADATA_RANGE> metadata = std::nullopt
531 );
532
533 template <
534 validity_bitmap_input VB = validity_bitmap,
535 std::ranges::input_range OFFSET_BUFFER_RANGE,
536 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
537 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
538 [[nodiscard]] static arrow_proxy create_proxy(
539 array&& flat_values,
540 OFFSET_BUFFER_RANGE&& list_offsets_range,
541 bool nullable = true,
542 std::optional<std::string_view> name = std::nullopt,
543 std::optional<METADATA_RANGE> metadata = std::nullopt
544 )
545 {
546 offset_buffer_type list_offsets{std::forward<OFFSET_BUFFER_RANGE>(list_offsets_range)};
547 return list_array_impl<BIG>::create_proxy(
548 std::move(flat_values),
549 std::move(list_offsets),
550 nullable,
551 std::forward<std::optional<std::string_view>>(name),
552 std::forward<std::optional<METADATA_RANGE>>(metadata)
553 );
554 }
555
556 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
557 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
558
559 [[nodiscard]] constexpr offset_type* make_list_offsets();
560
561 offset_type* p_list_offsets;
562
563 // friend classes
564 friend class array_crtp_base<self_type>;
565 friend class list_array_crtp_base<self_type>;
566 };
567
568 template <bool BIG>
569 class list_view_array_impl final : public list_array_crtp_base<list_view_array_impl<BIG>>
570 {
571 public:
572
576 using list_size_type = inner_types::list_size_type;
578 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
581
594
605
618
619 constexpr list_view_array_impl(self_type&&) = default;
620 constexpr list_view_array_impl& operator=(self_type&&) = default;
621
634 template <class... ARGS>
636 list_view_array_impl(ARGS&&... args)
637 : self_type(create_proxy(std::forward<ARGS>(args)...))
638 {
639 }
640
641 private:
642
666 template <
667 std::ranges::input_range OFFSET_BUFFER_RANGE,
668 std::ranges::input_range SIZE_RANGE,
670 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
671 requires(
672 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
673 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
674 )
675 [[nodiscard]] static arrow_proxy create_proxy(
676 array&& flat_values,
677 OFFSET_BUFFER_RANGE&& list_offsets,
678 SIZE_RANGE&& list_sizes,
679 VB&& validity_input,
680 std::optional<std::string_view> name = std::nullopt,
681 std::optional<METADATA_RANGE> metadata = std::nullopt
682 )
683 {
684 return list_view_array_impl<BIG>::create_proxy(
685 std::move(flat_values),
686 offset_buffer_type(std::forward<OFFSET_BUFFER_RANGE>(list_offsets)),
687 size_buffer_type(std::forward<SIZE_RANGE>(list_sizes)),
688 std::forward<VB>(validity_input),
689 name,
690 metadata
691 );
692 }
693
694 template <
695 validity_bitmap_input VB = validity_bitmap,
696 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
697 [[nodiscard]] static arrow_proxy create_proxy(
698 array&& flat_values,
699 offset_buffer_type&& list_offsets,
700 size_buffer_type&& list_sizes,
701 VB&& validity_input,
702 std::optional<std::string_view> name = std::nullopt,
703 std::optional<METADATA_RANGE> metadata = std::nullopt
704 );
705
706 template <
707 std::ranges::input_range OFFSET_BUFFER_RANGE,
708 std::ranges::input_range SIZE_RANGE,
709 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
710 requires(
711 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
712 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
713 )
714 [[nodiscard]] static arrow_proxy create_proxy(
715 array&& flat_values,
716 OFFSET_BUFFER_RANGE&& list_offsets,
717 SIZE_RANGE&& list_sizes,
718 bool nullable = true,
719 std::optional<std::string_view> name = std::nullopt,
720 std::optional<METADATA_RANGE> metadata = std::nullopt
721 )
722 {
723 return list_view_array_impl<BIG>::create_proxy(
724 std::move(flat_values),
725 offset_buffer_type(std::forward<OFFSET_BUFFER_RANGE>(list_offsets)),
726 size_buffer_type(std::forward<SIZE_RANGE>(list_sizes)),
727 nullable,
728 name,
729 metadata
730 );
731 }
732
733 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
734 [[nodiscard]] static arrow_proxy create_proxy(
735 array&& flat_values,
736 offset_buffer_type&& list_offsets,
737 size_buffer_type&& list_sizes,
738 bool nullable = true,
739 std::optional<std::string_view> name = std::nullopt,
740 std::optional<METADATA_RANGE> metadata = std::nullopt
741 );
742
743 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
744 static constexpr std::size_t SIZES_BUFFER_INDEX = 2;
745 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
746
747 [[nodiscard]] constexpr offset_type* make_list_offsets();
748 [[nodiscard]] constexpr offset_type* make_list_sizes();
749
750 offset_type* p_list_offsets;
751 offset_type* p_list_sizes;
752
753 // friend classes
754 friend class array_crtp_base<self_type>;
755 friend class list_array_crtp_base<self_type>;
756 };
757
758 class fixed_sized_list_array final : public list_array_crtp_base<fixed_sized_list_array>
759 {
760 public:
761
765 using list_size_type = inner_types::list_size_type;
767 using offset_type = std::uint64_t;
768
780 explicit fixed_sized_list_array(arrow_proxy proxy);
781
782 constexpr fixed_sized_list_array(const self_type&) = default;
784
787
799 template <class... ARGS>
802 : self_type(create_proxy(std::forward<ARGS>(args)...))
803 {
804 }
805
806 private:
807
826 template <
828 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
829 [[nodiscard]] static arrow_proxy create_proxy(
830 std::uint64_t list_size,
831 array&& flat_values,
832 R&& validity_input,
833 std::optional<std::string_view> name = std::nullopt,
834 std::optional<METADATA_RANGE> metadata = std::nullopt
835 );
836
855 template <
857 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
858 [[nodiscard]] static arrow_proxy create_proxy(
859 std::uint64_t list_size,
860 array&& flat_values,
861 bool nullable = true,
862 std::optional<std::string_view> name = std::nullopt,
863 std::optional<METADATA_RANGE> metadata = std::nullopt
864 );
865
878 [[nodiscard]] static uint64_t list_size_from_format(const std::string_view format);
879
890 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
891
892 uint64_t m_list_size;
893
894 // friend classes
895 friend class array_crtp_base<self_type>;
896 friend class list_array_crtp_base<self_type>;
897 };
898
899 /***************************************
900 * list_array_crtp_base implementation *
901 ***************************************/
902
903 template <class DERIVED>
905 : base_type(std::move(proxy))
906 , p_flat_array(make_flat_array())
907 {
908 }
909
910 template <class DERIVED>
912 : base_type(rhs)
913 , p_flat_array(make_flat_array())
914 {
915 }
916
917 template <class DERIVED>
919 {
921 p_flat_array = make_flat_array();
922 return *this;
923 }
924
925 template <class DERIVED>
927 {
928 return p_flat_array.get();
929 }
930
931 template <class DERIVED>
933 {
934 return p_flat_array.get();
935 }
936
937 template <class DERIVED>
938 constexpr auto list_array_crtp_base<DERIVED>::value_begin() -> value_iterator
939 {
940 return value_iterator(detail::layout_value_functor<DERIVED, inner_value_type>(&this->derived_cast()), 0);
941 }
942
943 template <class DERIVED>
944 constexpr auto list_array_crtp_base<DERIVED>::value_end() -> value_iterator
945 {
946 return value_iterator(
947 detail::layout_value_functor<DERIVED, inner_value_type>(&this->derived_cast()),
948 this->size()
949 );
950 }
951
952 template <class DERIVED>
953 constexpr auto list_array_crtp_base<DERIVED>::value_cbegin() const -> const_value_iterator
954 {
955 return const_value_iterator(
957 0
958 );
959 }
960
961 template <class DERIVED>
962 constexpr auto list_array_crtp_base<DERIVED>::value_cend() const -> const_value_iterator
963 {
964 return const_value_iterator(
966 this->size()
967 );
968 }
969
970 template <class DERIVED>
971 constexpr auto list_array_crtp_base<DERIVED>::value(size_type i) -> inner_reference
972 {
973 const auto r = this->derived_cast().offset_range(i);
974 using st = typename list_value::size_type;
975 return list_value{p_flat_array.get(), static_cast<st>(r.first), static_cast<st>(r.second)};
976 }
977
978 template <class DERIVED>
979 constexpr auto list_array_crtp_base<DERIVED>::value(size_type i) const -> inner_const_reference
980 {
981 const auto r = this->derived_cast().offset_range(i);
982 using st = typename list_value::size_type;
983 return list_value{p_flat_array.get(), static_cast<st>(r.first), static_cast<st>(r.second)};
984 }
985
986 template <class DERIVED>
987 cloning_ptr<array_wrapper> list_array_crtp_base<DERIVED>::make_flat_array()
988 {
989 return array_factory(this->get_arrow_proxy().children()[0].view());
990 }
991
992 /**********************************
993 * list_array_impl implementation *
994 **********************************/
995
996#ifdef __GNUC__
997# pragma GCC diagnostic push
998# pragma GCC diagnostic ignored "-Wcast-align"
999#endif
1000
1001 template <bool BIG>
1003 : base_type(std::move(proxy))
1004 , p_list_offsets(make_list_offsets())
1005 {
1006 }
1007
1008 template <bool BIG>
1009 template <std::ranges::range SIZES_RANGE>
1011 {
1013 std::forward<SIZES_RANGE>(sizes)
1014 );
1015 }
1016
1017 template <bool BIG>
1018 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1019 arrow_proxy list_array_impl<BIG>::create_proxy(
1020 array&& flat_values,
1021 offset_buffer_type&& list_offsets,
1022 VB&& validity_input,
1023 std::optional<std::string_view> name,
1024 std::optional<METADATA_RANGE> metadata
1025 )
1026 {
1027 const auto size = list_offsets.size() - 1;
1028 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
1029 const auto null_count = vbitmap.null_count();
1030
1031 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1032
1034 BIG ? std::string("+L") : std::string("+l"),
1035 std::move(flat_schema),
1036 name,
1037 metadata,
1038 true // nullable
1039 );
1040
1041 std::vector<buffer<std::uint8_t>> arr_buffs = {
1042 std::move(vbitmap).extract_storage(),
1043 std::move(list_offsets).extract_storage()
1044 };
1045
1047 static_cast<std::int64_t>(size),
1048 static_cast<std::int64_t>(null_count),
1049 std::move(arr_buffs),
1050 std::move(flat_arr)
1051 );
1052
1053 return arrow_proxy{std::move(arr), std::move(schema)};
1054 }
1055
1056 template <bool BIG>
1057 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1058 arrow_proxy list_array_impl<BIG>::create_proxy(
1059 array&& flat_values,
1060 offset_buffer_type&& list_offsets,
1061 bool nullable,
1062 std::optional<std::string_view> name,
1063 std::optional<METADATA_RANGE> metadata
1064 )
1065 {
1066 if (nullable)
1067 {
1068 return list_array_impl<BIG>::create_proxy(
1069 std::move(flat_values),
1070 std::move(list_offsets),
1072 name,
1073 metadata
1074 );
1075 }
1076
1077 const auto size = list_offsets.size() - 1;
1078 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1079
1080 ArrowSchema schema = detail::make_list_arrow_schema(
1081 BIG ? std::string("+L") : std::string("+l"),
1082 std::move(flat_schema),
1083 name,
1084 metadata,
1085 false // not nullable
1086 );
1087
1088 std::vector<buffer<std::uint8_t>> arr_buffs = {
1089 buffer<std::uint8_t>{nullptr, 0}, // no validity bitmap
1090 std::move(list_offsets).extract_storage()
1091 };
1092
1093 ArrowArray arr = detail::make_list_arrow_array(
1094 static_cast<std::int64_t>(size),
1095 0, // null_count
1096 std::move(arr_buffs),
1097 std::move(flat_arr)
1098 );
1099
1100 return arrow_proxy{std::move(arr), std::move(schema)};
1101 }
1102
1103 template <bool BIG>
1105 : base_type(rhs)
1106 , p_list_offsets(make_list_offsets())
1107 {
1108 }
1109
1110 template <bool BIG>
1112 {
1113 if (this != &rhs)
1114 {
1116 p_list_offsets = make_list_offsets();
1117 }
1118 return *this;
1119 }
1120
1121 template <bool BIG>
1122 constexpr auto list_array_impl<BIG>::offset_range(size_type i) const -> std::pair<offset_type, offset_type>
1123 {
1124 return std::make_pair(p_list_offsets[i], p_list_offsets[i + 1]);
1125 }
1126
1127 template <bool BIG>
1128 constexpr auto list_array_impl<BIG>::make_list_offsets() -> offset_type*
1129 {
1130 return reinterpret_cast<offset_type*>(
1131 this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
1132 );
1133 }
1134
1135 /***************************************
1136 * list_view_array_impl implementation *
1137 ***************************************/
1138
1139 template <bool BIG>
1141 : base_type(std::move(proxy))
1142 , p_list_offsets(make_list_offsets())
1143 , p_list_sizes(make_list_sizes())
1144 {
1145 }
1146
1147 template <bool BIG>
1148 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1149 arrow_proxy list_view_array_impl<BIG>::create_proxy(
1150 array&& flat_values,
1151 offset_buffer_type&& list_offsets,
1152 size_buffer_type&& list_sizes,
1153 VB&& validity_input,
1154 std::optional<std::string_view> name,
1155 std::optional<METADATA_RANGE> metadata
1156 )
1157 {
1158 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(), "sizes and offset must have the same size");
1159 const auto size = list_sizes.size();
1160 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
1161 const auto null_count = vbitmap.null_count();
1162
1163 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1164
1166 BIG ? std::string("+vL") : std::string("+vl"),
1167 std::move(flat_schema),
1168 name,
1169 metadata,
1170 true // nullable
1171 );
1172
1173 std::vector<buffer<std::uint8_t>> arr_buffs = {
1174 std::move(vbitmap).extract_storage(),
1175 std::move(list_offsets).extract_storage(),
1176 std::move(list_sizes).extract_storage()
1177 };
1178
1180 static_cast<std::int64_t>(size),
1181 static_cast<std::int64_t>(null_count),
1182 std::move(arr_buffs),
1183 std::move(flat_arr)
1184 );
1185
1186 return arrow_proxy{std::move(arr), std::move(schema)};
1187 }
1188
1189 template <bool BIG>
1190 template <input_metadata_container METADATA_RANGE>
1191 arrow_proxy list_view_array_impl<BIG>::create_proxy(
1192 array&& flat_values,
1193 offset_buffer_type&& list_offsets,
1194 size_buffer_type&& list_sizes,
1195 bool nullable,
1196 std::optional<std::string_view> name,
1197 std::optional<METADATA_RANGE> metadata
1198 )
1199 {
1200 if (nullable)
1201 {
1202 return list_view_array_impl<BIG>::create_proxy(
1203 std::move(flat_values),
1204 std::move(list_offsets),
1205 std::move(list_sizes),
1207 name,
1208 metadata
1209 );
1210 }
1211
1212 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(), "sizes and offset must have the same size");
1213 const auto size = list_sizes.size();
1214 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1215
1216 ArrowSchema schema = detail::make_list_arrow_schema(
1217 BIG ? std::string("+vL") : std::string("+vl"),
1218 std::move(flat_schema),
1219 name,
1220 metadata,
1221 false // not nullable
1222 );
1223
1224 std::vector<buffer<std::uint8_t>> arr_buffs = {
1225 buffer<std::uint8_t>{nullptr, 0}, // no validity bitmap
1226 std::move(list_offsets).extract_storage(),
1227 std::move(list_sizes).extract_storage()
1228 };
1229
1230 ArrowArray arr = detail::make_list_arrow_array(
1231 static_cast<std::int64_t>(size),
1232 0, // null_count
1233 std::move(arr_buffs),
1234 std::move(flat_arr)
1235 );
1236
1237 return arrow_proxy{std::move(arr), std::move(schema)};
1238 }
1239
1240 template <bool BIG>
1242 : base_type(rhs)
1243 , p_list_offsets(make_list_offsets())
1244 , p_list_sizes(make_list_sizes())
1245 {
1246 }
1247
1248 template <bool BIG>
1250 {
1251 if (this != &rhs)
1252 {
1254 p_list_offsets = make_list_offsets();
1255 p_list_sizes = make_list_sizes();
1256 }
1257 return *this;
1258 }
1259
1260 template <bool BIG>
1261 inline constexpr auto list_view_array_impl<BIG>::offset_range(size_type i) const
1262 -> std::pair<offset_type, offset_type>
1263 {
1264 const auto offset = p_list_offsets[i];
1265 return std::make_pair(offset, offset + p_list_sizes[i]);
1266 }
1267
1268 template <bool BIG>
1269 constexpr auto list_view_array_impl<BIG>::make_list_offsets() -> offset_type*
1270 {
1271 return reinterpret_cast<offset_type*>(
1272 this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
1273 );
1274 }
1275
1276 template <bool BIG>
1277 constexpr auto list_view_array_impl<BIG>::make_list_sizes() -> offset_type*
1278 {
1279 return reinterpret_cast<offset_type*>(
1280 this->get_arrow_proxy().buffers()[SIZES_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
1281 );
1282 }
1283
1284#ifdef __GNUC__
1285# pragma GCC diagnostic pop
1286#endif
1287
1288 /*****************************************
1289 * fixed_sized_list_array implementation *
1290 *****************************************/
1291
1292 inline auto fixed_sized_list_array::list_size_from_format(const std::string_view format) -> uint64_t
1293 {
1294 SPARROW_ASSERT(format.size() >= 3, "Invalid format string");
1295 const auto n_digits = format.size() - 3;
1296 const auto list_size_str = format.substr(3, n_digits);
1297 return std::stoull(std::string(list_size_str));
1298 }
1299
1301 : base_type(std::move(proxy))
1302 , m_list_size(fixed_sized_list_array::list_size_from_format(this->get_arrow_proxy().format()))
1303 {
1304 }
1305
1306 constexpr auto fixed_sized_list_array::offset_range(size_type i) const
1307 -> std::pair<offset_type, offset_type>
1308 {
1309 const auto offset = i * m_list_size;
1310 return std::make_pair(offset, offset + m_list_size);
1311 }
1312
1313 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
1314 inline arrow_proxy fixed_sized_list_array::create_proxy(
1315 std::uint64_t list_size,
1316 array&& flat_values,
1317 R&& validity_input,
1318 std::optional<std::string_view> name,
1319 std::optional<METADATA_RANGE> metadata
1320 )
1321 {
1322 const auto size = flat_values.size() / static_cast<std::size_t>(list_size);
1323 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<R>(validity_input));
1324 const auto null_count = vbitmap.null_count();
1325
1326 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1327
1328 std::string format = "+w:" + std::to_string(list_size);
1330 std::move(format),
1331 std::move(flat_schema),
1332 name,
1333 metadata,
1334 true // nullable
1335 );
1336
1337 std::vector<buffer<std::uint8_t>> arr_buffs = {vbitmap.extract_storage()};
1338
1339 ArrowArray arr = detail::make_list_arrow_array(
1340 static_cast<std::int64_t>(size),
1341 static_cast<std::int64_t>(null_count),
1342 std::move(arr_buffs),
1343 std::move(flat_arr)
1344 );
1345
1346 return arrow_proxy{std::move(arr), std::move(schema)};
1347 }
1348
1349 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
1350 inline arrow_proxy fixed_sized_list_array::create_proxy(
1351 std::uint64_t list_size,
1352 array&& flat_values,
1353 bool nullable,
1354 std::optional<std::string_view> name,
1355 std::optional<METADATA_RANGE> metadata
1356 )
1357 {
1358 if (nullable)
1359 {
1360 return fixed_sized_list_array::create_proxy(
1361 list_size,
1362 std::move(flat_values),
1364 name,
1365 metadata
1366 );
1367 }
1368
1369 const auto size = flat_values.size() / static_cast<std::size_t>(list_size);
1370 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1371
1372 std::string format = "+w:" + std::to_string(list_size);
1373 ArrowSchema schema = detail::make_list_arrow_schema(
1374 std::move(format),
1375 std::move(flat_schema),
1376 name,
1377 metadata,
1378 false // not nullable
1379 );
1380
1381 std::vector<buffer<std::uint8_t>> arr_buffs = {
1382 buffer<std::uint8_t>{nullptr, 0} // no validity bitmap
1383 };
1384
1385 ArrowArray arr = detail::make_list_arrow_array(
1386 static_cast<std::int64_t>(size),
1387 0, // null_count
1388 std::move(arr_buffs),
1389 std::move(flat_arr)
1390 );
1391
1392 return arrow_proxy{std::move(arr), std::move(schema)};
1393 }
1394}
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
constexpr array_bitmap_base_impl & operator=(const array_bitmap_base_impl &)
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
Base class for array type erasure.
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:43
Object that owns a piece of contiguous memory.
Definition buffer.hpp:113
Smart pointer behaving like a copiable std::unique_ptr.
Definition memory.hpp:126
constexpr size_type null_count() const noexcept
Returns the number of bits set to false (null/invalid).
fixed_sized_list_array & operator=(const self_type &)=default
constexpr fixed_sized_list_array(const self_type &)=default
inner_types::list_size_type list_size_type
array_inner_types< self_type > inner_types
fixed_sized_list_array(arrow_proxy proxy)
Constructs fixed size list array from Arrow proxy.
fixed_sized_list_array(ARGS &&... args)
Generic constructor for creating fixed size list array.
list_array_crtp_base< self_type > base_type
fixed_sized_list_array self_type
fixed_sized_list_array & operator=(self_type &&)=default
fixed_sized_list_array(self_type &&)=default
typename base_type::size_type size_type
CRTP base class for all list array implementations.
typename base_type::const_bitmap_range const_bitmap_range
constexpr list_array_crtp_base & operator=(const self_type &)
Copy assignment operator.
constexpr array_wrapper * raw_flat_array()
Gets mutable access to the underlying flat array.
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename inner_types::const_value_iterator const_value_iterator
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::iterator_tag iterator_tag
list_array_crtp_base(arrow_proxy proxy)
Constructs list array base from Arrow proxy.
constexpr list_array_crtp_base(const self_type &)
Copy constructor.
typename inner_types::value_iterator value_iterator
typename base_type::bitmap_type bitmap_type
list_array_crtp_base< DERIVED > self_type
typename base_type::size_type size_type
array_inner_types< DERIVED > inner_types
nullable< inner_value_type > value_type
constexpr const array_wrapper * raw_flat_array() const
Gets read-only access to the underlying flat array.
array_bitmap_base< DERIVED > base_type
constexpr list_array_crtp_base(self_type &&) noexcept=default
list_array_impl< BIG > self_type
constexpr list_array_impl(const self_type &)
Copy constructor.
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
typename base_type::size_type size_type
constexpr list_array_impl & operator=(const self_type &)
Copy assignment operator.
array_inner_types< self_type > inner_types
constexpr list_array_impl(self_type &&) noexcept=default
static constexpr auto offset_from_sizes(SIZES_RANGE &&sizes) -> offset_buffer_type
Creates offset buffer from list sizes.
inner_types::list_size_type list_size_type
list_array_crtp_base< list_array_impl< BIG > > base_type
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_array_impl(arrow_proxy proxy)
Constructs list array from Arrow proxy.
std::size_t size_type
constexpr list_view_array_impl & operator=(self_type &&)=default
typename base_type::size_type size_type
constexpr list_view_array_impl(self_type &&)=default
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_view_array_impl(arrow_proxy proxy)
Constructs list view array from Arrow proxy.
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
array_inner_types< self_type > inner_types
list_array_crtp_base< list_view_array_impl< BIG > > base_type
list_view_array_impl(ARGS &&... args)
Generic constructor for creating list view array from various inputs.
list_view_array_impl< BIG > self_type
constexpr list_view_array_impl(const self_type &)
Copy constructor.
inner_types::list_size_type list_size_type
u8_buffer< std::remove_const_t< list_size_type > > size_buffer_type
constexpr list_view_array_impl & operator=(const self_type &)
Copy assignment operator.
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
Concept for input containers that can provide metadata pairs.
Definition metadata.hpp:332
Concept defining valid input types for validity bitmap creation.
#define SPARROW_ASSERT(expr__, message__)
ArrowArray make_list_arrow_array(std::int64_t size, std::int64_t null_count, std::vector< buffer< std::uint8_t > > &&arr_buffs, ArrowArray &&flat_arr)
ArrowSchema make_list_arrow_schema(std::string format, ArrowSchema &&flat_schema, std::optional< std::string_view > name, std::optional< METADATA_RANGE > metadata, bool nullable)
constexpr sparrow::u8_buffer< OFFSET_TYPE > offset_buffer_from_sizes(SIZES_RANGE &&sizes)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
Definition mp_utils.hpp:216
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_list_view_array_v
Checks whether T is a list_view_array type.
list_array_impl< false > list_array
A list array implementation.
array_bitmap_base_impl< D, false > array_bitmap_base
Convenient alias for arrays with immutable validity bitmaps.
constexpr bool is_fixed_sized_list_array_v
Checks whether T is a fixed_sized_list_array type.
list_view_array_impl< true > big_list_view_array
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:110
constexpr bool is_big_list_array_v
Checks whether T is a big_list_array type.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
list_view_array_impl< false > list_view_array
A list view array implementation.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
constexpr bool is_list_array_v
Checks whether T is a list_array type.
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
list_array_impl< true > big_list_array
A big list array implementation.
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
constexpr bool is_big_list_view_array_v
Checks whether T is a big_list_view_array type.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
Extensions to the C++ standard library.
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Metafunction for retrieving the data_type of a typed array.