sparrow 2.2.0
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
list_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <ranges>
18#include <string> // for std::stoull
19#include <type_traits>
20#include <vector>
21
22#include "sparrow/array_api.hpp"
35
36namespace sparrow
37{
38 template <class DERIVED>
40
41 template <bool BIG>
42 class list_array_impl;
43
44 template <bool BIG>
46
69
84
86
90 template <class T>
91 constexpr bool is_list_array_v = std::same_as<T, list_array>;
92
96 template <class T>
97 constexpr bool is_big_list_array_v = std::same_as<T, big_list_array>;
98
102 template <class T>
103 constexpr bool is_list_view_array_v = std::same_as<T, list_view_array>;
104
108 template <class T>
109 constexpr bool is_big_list_view_array_v = std::same_as<T, big_list_view_array>;
110
114 template <class T>
115 constexpr bool is_fixed_sized_list_array_v = std::same_as<T, fixed_sized_list_array>;
116
117 namespace detail
118 {
119 template <bool BIG>
121 {
122 [[nodiscard]] static constexpr sparrow::data_type get()
123 {
125 }
126 };
127
128 template <bool BIG>
130 {
131 [[nodiscard]] static constexpr sparrow::data_type get()
132 {
134 }
135 };
136
137 template <>
139 {
140 [[nodiscard]] static constexpr sparrow::data_type get()
141 {
143 }
144 };
145
146 // Helper to build arrow schema for list arrays
147 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
149 std::string format,
150 ArrowSchema&& flat_schema,
151 std::optional<std::string_view> name,
152 std::optional<METADATA_RANGE> metadata,
153 bool nullable
154 )
155 {
157 std::optional<std::unordered_set<ArrowFlag>>
158 flags = nullable ? std::optional<std::unordered_set<ArrowFlag>>{{ArrowFlag::NULLABLE}}
159 : std::nullopt;
160
161 return make_arrow_schema(
162 std::move(format),
163 name,
164 metadata,
165 flags,
166 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))},
168 nullptr, // dictionary
169 true // dictionary ownership
170 );
171 }
172
173 // Helper to build arrow array for list arrays
175 std::int64_t size,
176 std::int64_t null_count,
177 std::vector<buffer<std::uint8_t>>&& arr_buffs,
178 ArrowArray&& flat_arr
179 )
180 {
182 return make_arrow_array(
183 size,
184 null_count,
185 0, // offset
186 std::move(arr_buffs),
187 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))},
189 nullptr, // dictionary
190 true // dictionary ownership
191 );
192 }
193 }
194
195 template <bool BIG>
208
209 template <bool BIG>
222
223 template <>
236
251 template <class DERIVED>
253 {
254 public:
255
259 using value_iterator = typename inner_types::value_iterator;
260 using const_value_iterator = typename inner_types::const_value_iterator;
262
265
267
271
275
283 [[nodiscard]] constexpr const array_wrapper* raw_flat_array() const;
284
292 [[nodiscard]] constexpr array_wrapper* raw_flat_array();
293
294 protected:
295
305
316
329
330 constexpr list_array_crtp_base(self_type&&) noexcept = default;
331 constexpr list_array_crtp_base& operator=(self_type&&) noexcept = default;
332
333 private:
334
335 using list_size_type = inner_types::list_size_type;
336
337 [[nodiscard]] constexpr value_iterator value_begin();
338 [[nodiscard]] constexpr value_iterator value_end();
339 [[nodiscard]] constexpr const_value_iterator value_cbegin() const;
340 [[nodiscard]] constexpr const_value_iterator value_cend() const;
341
342 [[nodiscard]] constexpr inner_reference value(size_type i);
343 [[nodiscard]] constexpr inner_const_reference value(size_type i) const;
344
345 [[nodiscard]] cloning_ptr<array_wrapper> make_flat_array();
346
347 // data members
349
350 // friend classes
351 friend class array_crtp_base<DERIVED>;
352
353 // needs access to this->value(i)
354 friend class detail::layout_value_functor<DERIVED, inner_value_type>;
355 friend class detail::layout_value_functor<const DERIVED, inner_value_type>;
356 };
357
358 template <bool BIG>
360 {
361 public:
362
366 using list_size_type = inner_types::list_size_type;
368 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
370
383
393 constexpr list_array_impl(const self_type&);
394
407
408 constexpr list_array_impl(self_type&&) noexcept = default;
409 constexpr list_array_impl& operator=(self_type&&) noexcept = default;
410
422 template <class... ARGS>
423 requires(mpl::excludes_copy_and_move_ctor_v<list_array_impl<BIG>, ARGS...>)
424 explicit list_array_impl(ARGS&&... args)
425 : self_type(create_proxy(std::forward<ARGS>(args)...))
426 {
427 }
428
446 template <std::ranges::range SIZES_RANGE>
447 [[nodiscard]] static constexpr auto offset_from_sizes(SIZES_RANGE&& sizes) -> offset_buffer_type;
448
449 private:
450
470 template <
472 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
473 [[nodiscard]] static arrow_proxy create_proxy(
474 array&& flat_values,
475 offset_buffer_type&& list_offsets,
476 VB&& validity_input,
477 std::optional<std::string_view> name = std::nullopt,
478 std::optional<METADATA_RANGE> metadata = std::nullopt
479 );
480
499 template <
501 std::ranges::input_range OFFSET_BUFFER_RANGE,
502 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
503 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
504 [[nodiscard]] static arrow_proxy create_proxy(
505 array&& flat_values,
506 OFFSET_BUFFER_RANGE&& list_offsets_range,
507 VB&& validity_input,
508 std::optional<std::string_view> name = std::nullopt,
509 std::optional<METADATA_RANGE> metadata = std::nullopt
510 )
511 {
512 offset_buffer_type list_offsets{std::forward<OFFSET_BUFFER_RANGE>(list_offsets_range)};
513 return list_array_impl<BIG>::create_proxy(
514 std::move(flat_values),
515 std::move(list_offsets),
516 std::forward<VB>(validity_input),
517 std::forward<std::optional<std::string_view>>(name),
518 std::forward<std::optional<METADATA_RANGE>>(metadata)
519 );
520 }
521
522 template <
523 validity_bitmap_input VB = validity_bitmap,
524 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
525 [[nodiscard]] static arrow_proxy create_proxy(
526 array&& flat_values,
527 offset_buffer_type&& list_offsets,
528 bool nullable = true,
529 std::optional<std::string_view> name = std::nullopt,
530 std::optional<METADATA_RANGE> metadata = std::nullopt
531 );
532
533 template <
534 validity_bitmap_input VB = validity_bitmap,
535 std::ranges::input_range OFFSET_BUFFER_RANGE,
536 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
537 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
538 [[nodiscard]] static arrow_proxy create_proxy(
539 array&& flat_values,
540 OFFSET_BUFFER_RANGE&& list_offsets_range,
541 bool nullable = true,
542 std::optional<std::string_view> name = std::nullopt,
543 std::optional<METADATA_RANGE> metadata = std::nullopt
544 )
545 {
546 offset_buffer_type list_offsets{std::forward<OFFSET_BUFFER_RANGE>(list_offsets_range)};
547 return list_array_impl<BIG>::create_proxy(
548 std::move(flat_values),
549 std::move(list_offsets),
550 nullable,
551 std::forward<std::optional<std::string_view>>(name),
552 std::forward<std::optional<METADATA_RANGE>>(metadata)
553 );
554 }
555
556 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
557 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
558
559 [[nodiscard]] constexpr offset_type* make_list_offsets();
560
561 offset_type* p_list_offsets;
562
563 // friend classes
564 friend class array_crtp_base<self_type>;
565 friend class list_array_crtp_base<self_type>;
566 };
567
568 template <bool BIG>
569 class list_view_array_impl final : public list_array_crtp_base<list_view_array_impl<BIG>>
570 {
571 public:
572
576 using list_size_type = inner_types::list_size_type;
578 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
581
594
605
618
619 constexpr list_view_array_impl(self_type&&) = default;
620 constexpr list_view_array_impl& operator=(self_type&&) = default;
621
634 template <class... ARGS>
636 list_view_array_impl(ARGS&&... args)
637 : self_type(create_proxy(std::forward<ARGS>(args)...))
638 {
639 }
640
641 private:
642
666 template <
667 std::ranges::input_range OFFSET_BUFFER_RANGE,
668 std::ranges::input_range SIZE_RANGE,
670 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
671 requires(
672 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
673 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
674 )
675 [[nodiscard]] static arrow_proxy create_proxy(
676 array&& flat_values,
677 OFFSET_BUFFER_RANGE&& list_offsets,
678 SIZE_RANGE&& list_sizes,
679 VB&& validity_input,
680 std::optional<std::string_view> name = std::nullopt,
681 std::optional<METADATA_RANGE> metadata = std::nullopt
682 )
683 {
684 return list_view_array_impl<BIG>::create_proxy(
685 std::move(flat_values),
686 offset_buffer_type(std::forward<OFFSET_BUFFER_RANGE>(list_offsets)),
687 size_buffer_type(std::forward<SIZE_RANGE>(list_sizes)),
688 std::forward<VB>(validity_input),
689 name,
690 metadata
691 );
692 }
693
694 template <
695 validity_bitmap_input VB = validity_bitmap,
696 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
697 [[nodiscard]] static arrow_proxy create_proxy(
698 array&& flat_values,
699 offset_buffer_type&& list_offsets,
700 size_buffer_type&& list_sizes,
701 VB&& validity_input,
702 std::optional<std::string_view> name = std::nullopt,
703 std::optional<METADATA_RANGE> metadata = std::nullopt
704 );
705
706 template <
707 std::ranges::input_range OFFSET_BUFFER_RANGE,
708 std::ranges::input_range SIZE_RANGE,
709 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
710 requires(
711 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
712 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
713 )
714 [[nodiscard]] static arrow_proxy create_proxy(
715 array&& flat_values,
716 OFFSET_BUFFER_RANGE&& list_offsets,
717 SIZE_RANGE&& list_sizes,
718 bool nullable = true,
719 std::optional<std::string_view> name = std::nullopt,
720 std::optional<METADATA_RANGE> metadata = std::nullopt
721 )
722 {
723 return list_view_array_impl<BIG>::create_proxy(
724 std::move(flat_values),
725 offset_buffer_type(std::forward<OFFSET_BUFFER_RANGE>(list_offsets)),
726 size_buffer_type(std::forward<SIZE_RANGE>(list_sizes)),
727 nullable,
728 name,
729 metadata
730 );
731 }
732
733 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
734 [[nodiscard]] static arrow_proxy create_proxy(
735 array&& flat_values,
736 offset_buffer_type&& list_offsets,
737 size_buffer_type&& list_sizes,
738 bool nullable = true,
739 std::optional<std::string_view> name = std::nullopt,
740 std::optional<METADATA_RANGE> metadata = std::nullopt
741 );
742
743 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
744 static constexpr std::size_t SIZES_BUFFER_INDEX = 2;
745 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
746
747 [[nodiscard]] constexpr offset_type* make_list_offsets();
748 [[nodiscard]] constexpr offset_type* make_list_sizes();
749
750 offset_type* p_list_offsets;
751 offset_type* p_list_sizes;
752
753 // friend classes
754 friend class array_crtp_base<self_type>;
755 friend class list_array_crtp_base<self_type>;
756 };
757
758 class fixed_sized_list_array final : public list_array_crtp_base<fixed_sized_list_array>
759 {
760 public:
761
765 using list_size_type = inner_types::list_size_type;
767 using offset_type = std::uint64_t;
768
780 explicit fixed_sized_list_array(arrow_proxy proxy);
781
782 constexpr fixed_sized_list_array(const self_type&) = default;
784
787
799 template <class... ARGS>
802 : self_type(create_proxy(std::forward<ARGS>(args)...))
803 {
804 }
805
806 private:
807
826 template <
828 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
829 [[nodiscard]] static arrow_proxy create_proxy(
830 std::uint64_t list_size,
831 array&& flat_values,
832 R&& validity_input,
833 std::optional<std::string_view> name = std::nullopt,
834 std::optional<METADATA_RANGE> metadata = std::nullopt
835 );
836
855 template <
857 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
858 [[nodiscard]] static arrow_proxy create_proxy(
859 std::uint64_t list_size,
860 array&& flat_values,
861 bool nullable = true,
862 std::optional<std::string_view> name = std::nullopt,
863 std::optional<METADATA_RANGE> metadata = std::nullopt
864 );
865
878 [[nodiscard]] static uint64_t list_size_from_format(const std::string_view format);
879
890 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
891
892 uint64_t m_list_size;
893
894 // friend classes
895 friend class array_crtp_base<self_type>;
896 friend class list_array_crtp_base<self_type>;
897 };
898
899 /***************************************
900 * list_array_crtp_base implementation *
901 ***************************************/
902
903 template <class DERIVED>
905 : base_type(std::move(proxy))
906 , p_flat_array(make_flat_array())
907 {
908 }
909
910 template <class DERIVED>
912 : base_type(rhs)
913 , p_flat_array(make_flat_array())
914 {
915 }
916
917 template <class DERIVED>
919 {
921 p_flat_array = make_flat_array();
922 return *this;
923 }
924
925 template <class DERIVED>
927 {
928 return p_flat_array.get();
929 }
930
931 template <class DERIVED>
933 {
934 return p_flat_array.get();
935 }
936
937 template <class DERIVED>
938 constexpr auto list_array_crtp_base<DERIVED>::value_begin() -> value_iterator
939 {
940 return value_iterator(detail::layout_value_functor<DERIVED, inner_value_type>(&this->derived_cast()), 0);
941 }
942
943 template <class DERIVED>
944 constexpr auto list_array_crtp_base<DERIVED>::value_end() -> value_iterator
945 {
946 return value_iterator(
947 detail::layout_value_functor<DERIVED, inner_value_type>(&this->derived_cast()),
948 this->size()
949 );
950 }
951
952 template <class DERIVED>
953 constexpr auto list_array_crtp_base<DERIVED>::value_cbegin() const -> const_value_iterator
954 {
955 return const_value_iterator(
957 0
958 );
959 }
960
961 template <class DERIVED>
962 constexpr auto list_array_crtp_base<DERIVED>::value_cend() const -> const_value_iterator
963 {
964 return const_value_iterator(
966 this->size()
967 );
968 }
969
970 template <class DERIVED>
971 constexpr auto list_array_crtp_base<DERIVED>::value(size_type i) -> inner_reference
972 {
973 const auto r = this->derived_cast().offset_range(i);
974 using st = typename list_value::size_type;
975 return list_value{p_flat_array.get(), static_cast<st>(r.first), static_cast<st>(r.second)};
976 }
977
978 template <class DERIVED>
979 constexpr auto list_array_crtp_base<DERIVED>::value(size_type i) const -> inner_const_reference
980 {
981 const auto r = this->derived_cast().offset_range(i);
982 using st = typename list_value::size_type;
983 return list_value{p_flat_array.get(), static_cast<st>(r.first), static_cast<st>(r.second)};
984 }
985
986 template <class DERIVED>
987 cloning_ptr<array_wrapper> list_array_crtp_base<DERIVED>::make_flat_array()
988 {
989 return array_factory(this->get_arrow_proxy().children()[0].view());
990 }
991
992 /**********************************
993 * list_array_impl implementation *
994 **********************************/
995
996#ifdef __GNUC__
997# pragma GCC diagnostic push
998# pragma GCC diagnostic ignored "-Wcast-align"
999#endif
1000
1001 template <bool BIG>
1003 : base_type(std::move(proxy))
1004 , p_list_offsets(make_list_offsets())
1005 {
1006 }
1007
1008 template <bool BIG>
1009 template <std::ranges::range SIZES_RANGE>
1011 {
1013 std::forward<SIZES_RANGE>(sizes)
1014 );
1015 }
1016
1017 template <bool BIG>
1018 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1019 arrow_proxy list_array_impl<BIG>::create_proxy(
1020 array&& flat_values,
1021 offset_buffer_type&& list_offsets,
1022 VB&& validity_input,
1023 std::optional<std::string_view> name,
1024 std::optional<METADATA_RANGE> metadata
1025 )
1026 {
1027 const auto size = list_offsets.size() - 1;
1028 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
1029 const auto null_count = vbitmap.null_count();
1030
1031 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1032
1034 BIG ? std::string("+L") : std::string("+l"),
1035 std::move(flat_schema),
1036 name,
1037 metadata,
1038 true // nullable
1039 );
1040
1041 std::vector<buffer<std::uint8_t>> arr_buffs = {
1042 std::move(vbitmap).extract_storage(),
1043 std::move(list_offsets).extract_storage()
1044 };
1045
1047 static_cast<std::int64_t>(size),
1048 static_cast<std::int64_t>(null_count),
1049 std::move(arr_buffs),
1050 std::move(flat_arr)
1051 );
1052
1053 return arrow_proxy{std::move(arr), std::move(schema)};
1054 }
1055
1056 template <bool BIG>
1057 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1058 arrow_proxy list_array_impl<BIG>::create_proxy(
1059 array&& flat_values,
1060 offset_buffer_type&& list_offsets,
1061 bool nullable,
1062 std::optional<std::string_view> name,
1063 std::optional<METADATA_RANGE> metadata
1064 )
1065 {
1066 if (nullable)
1067 {
1068 return list_array_impl<BIG>::create_proxy(
1069 std::move(flat_values),
1070 std::move(list_offsets),
1072 name,
1073 metadata
1074 );
1075 }
1076
1077 const auto size = list_offsets.size() - 1;
1078 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1079
1080 ArrowSchema schema = detail::make_list_arrow_schema(
1081 BIG ? std::string("+L") : std::string("+l"),
1082 std::move(flat_schema),
1083 name,
1084 metadata,
1085 false // not nullable
1086 );
1087
1088 std::vector<buffer<std::uint8_t>> arr_buffs = {
1090 // bitmap
1091 std::move(list_offsets).extract_storage()
1092 };
1093
1094 ArrowArray arr = detail::make_list_arrow_array(
1095 static_cast<std::int64_t>(size),
1096 0, // null_count
1097 std::move(arr_buffs),
1098 std::move(flat_arr)
1099 );
1100
1101 return arrow_proxy{std::move(arr), std::move(schema)};
1102 }
1103
1104 template <bool BIG>
1106 : base_type(rhs)
1107 , p_list_offsets(make_list_offsets())
1108 {
1109 }
1110
1111 template <bool BIG>
1113 {
1114 if (this != &rhs)
1115 {
1117 p_list_offsets = make_list_offsets();
1118 }
1119 return *this;
1120 }
1121
1122 template <bool BIG>
1123 constexpr auto list_array_impl<BIG>::offset_range(size_type i) const -> std::pair<offset_type, offset_type>
1124 {
1125 return std::make_pair(p_list_offsets[i], p_list_offsets[i + 1]);
1126 }
1127
1128 template <bool BIG>
1129 constexpr auto list_array_impl<BIG>::make_list_offsets() -> offset_type*
1130 {
1131 return reinterpret_cast<offset_type*>(
1132 this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
1133 );
1134 }
1135
1136 /***************************************
1137 * list_view_array_impl implementation *
1138 ***************************************/
1139
1140 template <bool BIG>
1142 : base_type(std::move(proxy))
1143 , p_list_offsets(make_list_offsets())
1144 , p_list_sizes(make_list_sizes())
1145 {
1146 }
1147
1148 template <bool BIG>
1149 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1150 arrow_proxy list_view_array_impl<BIG>::create_proxy(
1151 array&& flat_values,
1152 offset_buffer_type&& list_offsets,
1153 size_buffer_type&& list_sizes,
1154 VB&& validity_input,
1155 std::optional<std::string_view> name,
1156 std::optional<METADATA_RANGE> metadata
1157 )
1158 {
1159 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(), "sizes and offset must have the same size");
1160 const auto size = list_sizes.size();
1161 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
1162 const auto null_count = vbitmap.null_count();
1163
1164 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1165
1167 BIG ? std::string("+vL") : std::string("+vl"),
1168 std::move(flat_schema),
1169 name,
1170 metadata,
1171 true // nullable
1172 );
1173
1174 std::vector<buffer<std::uint8_t>> arr_buffs = {
1175 std::move(vbitmap).extract_storage(),
1176 std::move(list_offsets).extract_storage(),
1177 std::move(list_sizes).extract_storage()
1178 };
1179
1181 static_cast<std::int64_t>(size),
1182 static_cast<std::int64_t>(null_count),
1183 std::move(arr_buffs),
1184 std::move(flat_arr)
1185 );
1186
1187 return arrow_proxy{std::move(arr), std::move(schema)};
1188 }
1189
1190 template <bool BIG>
1191 template <input_metadata_container METADATA_RANGE>
1192 arrow_proxy list_view_array_impl<BIG>::create_proxy(
1193 array&& flat_values,
1194 offset_buffer_type&& list_offsets,
1195 size_buffer_type&& list_sizes,
1196 bool nullable,
1197 std::optional<std::string_view> name,
1198 std::optional<METADATA_RANGE> metadata
1199 )
1200 {
1201 if (nullable)
1202 {
1203 return list_view_array_impl<BIG>::create_proxy(
1204 std::move(flat_values),
1205 std::move(list_offsets),
1206 std::move(list_sizes),
1208 name,
1209 metadata
1210 );
1211 }
1212
1213 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(), "sizes and offset must have the same size");
1214 const auto size = list_sizes.size();
1215 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1216
1217 ArrowSchema schema = detail::make_list_arrow_schema(
1218 BIG ? std::string("+vL") : std::string("+vl"),
1219 std::move(flat_schema),
1220 name,
1221 metadata,
1222 false // not nullable
1223 );
1224
1225 std::vector<buffer<std::uint8_t>> arr_buffs = {
1227 // bitmap
1228 std::move(list_offsets).extract_storage(),
1229 std::move(list_sizes).extract_storage()
1230 };
1231
1232 ArrowArray arr = detail::make_list_arrow_array(
1233 static_cast<std::int64_t>(size),
1234 0, // null_count
1235 std::move(arr_buffs),
1236 std::move(flat_arr)
1237 );
1238
1239 return arrow_proxy{std::move(arr), std::move(schema)};
1240 }
1241
1242 template <bool BIG>
1244 : base_type(rhs)
1245 , p_list_offsets(make_list_offsets())
1246 , p_list_sizes(make_list_sizes())
1247 {
1248 }
1249
1250 template <bool BIG>
1252 {
1253 if (this != &rhs)
1254 {
1256 p_list_offsets = make_list_offsets();
1257 p_list_sizes = make_list_sizes();
1258 }
1259 return *this;
1260 }
1261
1262 template <bool BIG>
1263 inline constexpr auto list_view_array_impl<BIG>::offset_range(size_type i) const
1264 -> std::pair<offset_type, offset_type>
1265 {
1266 const auto offset = p_list_offsets[i];
1267 return std::make_pair(offset, offset + p_list_sizes[i]);
1268 }
1269
1270 template <bool BIG>
1271 constexpr auto list_view_array_impl<BIG>::make_list_offsets() -> offset_type*
1272 {
1273 return reinterpret_cast<offset_type*>(
1274 this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
1275 );
1276 }
1277
1278 template <bool BIG>
1279 constexpr auto list_view_array_impl<BIG>::make_list_sizes() -> offset_type*
1280 {
1281 return reinterpret_cast<offset_type*>(
1282 this->get_arrow_proxy().buffers()[SIZES_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
1283 );
1284 }
1285
1286#ifdef __GNUC__
1287# pragma GCC diagnostic pop
1288#endif
1289
1290 /*****************************************
1291 * fixed_sized_list_array implementation *
1292 *****************************************/
1293
1294 inline auto fixed_sized_list_array::list_size_from_format(const std::string_view format) -> uint64_t
1295 {
1296 SPARROW_ASSERT(format.size() >= 3, "Invalid format string");
1297 const auto n_digits = format.size() - 3;
1298 const auto list_size_str = format.substr(3, n_digits);
1299 return std::stoull(std::string(list_size_str));
1300 }
1301
1303 : base_type(std::move(proxy))
1304 , m_list_size(fixed_sized_list_array::list_size_from_format(this->get_arrow_proxy().format()))
1305 {
1306 }
1307
1308 constexpr auto fixed_sized_list_array::offset_range(size_type i) const
1309 -> std::pair<offset_type, offset_type>
1310 {
1311 const auto offset = i * m_list_size;
1312 return std::make_pair(offset, offset + m_list_size);
1313 }
1314
1315 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
1316 inline arrow_proxy fixed_sized_list_array::create_proxy(
1317 std::uint64_t list_size,
1318 array&& flat_values,
1319 R&& validity_input,
1320 std::optional<std::string_view> name,
1321 std::optional<METADATA_RANGE> metadata
1322 )
1323 {
1324 const auto size = flat_values.size() / static_cast<std::size_t>(list_size);
1325 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<R>(validity_input));
1326 const auto null_count = vbitmap.null_count();
1327
1328 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1329
1330 std::string format = "+w:" + std::to_string(list_size);
1332 std::move(format),
1333 std::move(flat_schema),
1334 name,
1335 metadata,
1336 true // nullable
1337 );
1338
1339 std::vector<buffer<std::uint8_t>> arr_buffs = {vbitmap.extract_storage()};
1340
1341 ArrowArray arr = detail::make_list_arrow_array(
1342 static_cast<std::int64_t>(size),
1343 static_cast<std::int64_t>(null_count),
1344 std::move(arr_buffs),
1345 std::move(flat_arr)
1346 );
1347
1348 return arrow_proxy{std::move(arr), std::move(schema)};
1349 }
1350
1351 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
1352 inline arrow_proxy fixed_sized_list_array::create_proxy(
1353 std::uint64_t list_size,
1354 array&& flat_values,
1355 bool nullable,
1356 std::optional<std::string_view> name,
1357 std::optional<METADATA_RANGE> metadata
1358 )
1359 {
1360 if (nullable)
1361 {
1362 return fixed_sized_list_array::create_proxy(
1363 list_size,
1364 std::move(flat_values),
1366 name,
1367 metadata
1368 );
1369 }
1370
1371 const auto size = flat_values.size() / static_cast<std::size_t>(list_size);
1372 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1373
1374 std::string format = "+w:" + std::to_string(list_size);
1375 ArrowSchema schema = detail::make_list_arrow_schema(
1376 std::move(format),
1377 std::move(flat_schema),
1378 name,
1379 metadata,
1380 false // not nullable
1381 );
1382
1383 std::vector<buffer<std::uint8_t>> arr_buffs = {
1384 buffer<std::uint8_t>{nullptr, 0, buffer<std::uint8_t>::default_allocator()} // no validity bitmap
1385 };
1386
1387 ArrowArray arr = detail::make_list_arrow_array(
1388 static_cast<std::int64_t>(size),
1389 0, // null_count
1390 std::move(arr_buffs),
1391 std::move(flat_arr)
1392 );
1393
1394 return arrow_proxy{std::move(arr), std::move(schema)};
1395 }
1396}
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
constexpr array_bitmap_base_impl & operator=(const array_bitmap_base_impl &)
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
Base class for array type erasure.
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:43
Object that owns a piece of contiguous memory.
Definition buffer.hpp:114
xsimd::aligned_allocator< T > default_allocator
Definition buffer.hpp:126
Smart pointer behaving like a copiable std::unique_ptr.
Definition memory.hpp:126
constexpr size_type null_count() const noexcept
Returns the number of bits set to false (null/invalid).
typename storage_type::default_allocator default_allocator
fixed_sized_list_array & operator=(const self_type &)=default
constexpr fixed_sized_list_array(const self_type &)=default
inner_types::list_size_type list_size_type
array_inner_types< self_type > inner_types
fixed_sized_list_array(arrow_proxy proxy)
Constructs fixed size list array from Arrow proxy.
fixed_sized_list_array(ARGS &&... args)
Generic constructor for creating fixed size list array.
list_array_crtp_base< self_type > base_type
fixed_sized_list_array self_type
fixed_sized_list_array & operator=(self_type &&)=default
fixed_sized_list_array(self_type &&)=default
typename base_type::size_type size_type
CRTP base class for all list array implementations.
typename base_type::const_bitmap_range const_bitmap_range
constexpr list_array_crtp_base & operator=(const self_type &)
Copy assignment operator.
constexpr array_wrapper * raw_flat_array()
Gets mutable access to the underlying flat array.
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename inner_types::const_value_iterator const_value_iterator
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::iterator_tag iterator_tag
list_array_crtp_base(arrow_proxy proxy)
Constructs list array base from Arrow proxy.
constexpr list_array_crtp_base(const self_type &)
Copy constructor.
typename inner_types::value_iterator value_iterator
typename base_type::bitmap_type bitmap_type
list_array_crtp_base< DERIVED > self_type
typename base_type::size_type size_type
array_inner_types< DERIVED > inner_types
nullable< inner_value_type > value_type
constexpr const array_wrapper * raw_flat_array() const
Gets read-only access to the underlying flat array.
array_bitmap_base< DERIVED > base_type
constexpr list_array_crtp_base(self_type &&) noexcept=default
list_array_impl< BIG > self_type
constexpr list_array_impl(const self_type &)
Copy constructor.
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
typename base_type::size_type size_type
constexpr list_array_impl & operator=(const self_type &)
Copy assignment operator.
array_inner_types< self_type > inner_types
constexpr list_array_impl(self_type &&) noexcept=default
static constexpr auto offset_from_sizes(SIZES_RANGE &&sizes) -> offset_buffer_type
Creates offset buffer from list sizes.
inner_types::list_size_type list_size_type
list_array_crtp_base< list_array_impl< BIG > > base_type
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_array_impl(arrow_proxy proxy)
Constructs list array from Arrow proxy.
std::size_t size_type
constexpr list_view_array_impl & operator=(self_type &&)=default
typename base_type::size_type size_type
constexpr list_view_array_impl(self_type &&)=default
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_view_array_impl(arrow_proxy proxy)
Constructs list view array from Arrow proxy.
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
array_inner_types< self_type > inner_types
list_array_crtp_base< list_view_array_impl< BIG > > base_type
list_view_array_impl(ARGS &&... args)
Generic constructor for creating list view array from various inputs.
list_view_array_impl< BIG > self_type
constexpr list_view_array_impl(const self_type &)
Copy constructor.
inner_types::list_size_type list_size_type
u8_buffer< std::remove_const_t< list_size_type > > size_buffer_type
constexpr list_view_array_impl & operator=(const self_type &)
Copy assignment operator.
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
Concept for input containers that can provide metadata pairs.
Definition metadata.hpp:332
Concept defining valid input types for validity bitmap creation.
#define SPARROW_ASSERT(expr__, message__)
ArrowArray make_list_arrow_array(std::int64_t size, std::int64_t null_count, std::vector< buffer< std::uint8_t > > &&arr_buffs, ArrowArray &&flat_arr)
ArrowSchema make_list_arrow_schema(std::string format, ArrowSchema &&flat_schema, std::optional< std::string_view > name, std::optional< METADATA_RANGE > metadata, bool nullable)
constexpr sparrow::u8_buffer< OFFSET_TYPE > offset_buffer_from_sizes(SIZES_RANGE &&sizes)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
Definition mp_utils.hpp:216
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_list_view_array_v
Checks whether T is a list_view_array type.
list_array_impl< false > list_array
A list array implementation.
array_bitmap_base_impl< D, false > array_bitmap_base
Convenient alias for arrays with immutable validity bitmaps.
constexpr bool is_fixed_sized_list_array_v
Checks whether T is a fixed_sized_list_array type.
list_view_array_impl< true > big_list_view_array
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:110
constexpr bool is_big_list_array_v
Checks whether T is a big_list_array type.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
list_view_array_impl< false > list_view_array
A list view array implementation.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
constexpr bool is_list_array_v
Checks whether T is a list_array type.
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
list_array_impl< true > big_list_array
A big list array implementation.
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
constexpr bool is_big_list_view_array_v
Checks whether T is a big_list_view_array type.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
Extensions to the C++ standard library.
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Metafunction for retrieving the data_type of a typed array.