sparrow 2.3.0
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
list_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <ranges>
18#include <string> // for std::stoull
19#include <type_traits>
20#include <vector>
21
22#include "sparrow/array_api.hpp"
36
37namespace sparrow
38{
39 template <class DERIVED>
41
42 template <bool BIG>
43 class list_array_impl;
44
45 template <bool BIG>
47
48 namespace copy_tracker
49 {
50 template <typename T>
51 requires std::same_as<T, list_array_impl<false>> || std::same_as<T, list_array_impl<true>>
52 std::string key()
53 {
54 return "list_array";
55 }
56
57 template <typename T>
58 requires std::same_as<T, list_view_array_impl<false>> || std::same_as<T, list_view_array_impl<true>>
59 std::string key()
60 {
61 return "list_view_array";
62 }
63 }
64
87
102
104
105 namespace copy_tracker
106 {
107 template <>
108 inline std::string key<fixed_sized_list_array>()
109 {
110 return "fixed_sized_list_array";
111 }
112 }
113
114
118 template <class T>
119 constexpr bool is_list_array_v = std::same_as<T, list_array>;
120
124 template <class T>
125 constexpr bool is_big_list_array_v = std::same_as<T, big_list_array>;
126
130 template <class T>
131 constexpr bool is_list_view_array_v = std::same_as<T, list_view_array>;
132
136 template <class T>
137 constexpr bool is_big_list_view_array_v = std::same_as<T, big_list_view_array>;
138
142 template <class T>
143 constexpr bool is_fixed_sized_list_array_v = std::same_as<T, fixed_sized_list_array>;
144
145 namespace detail
146 {
147 template <bool BIG>
149 {
150 [[nodiscard]] static constexpr sparrow::data_type get()
151 {
153 }
154 };
155
156 template <bool BIG>
158 {
159 [[nodiscard]] static constexpr sparrow::data_type get()
160 {
162 }
163 };
164
165 template <>
167 {
168 [[nodiscard]] static constexpr sparrow::data_type get()
169 {
171 }
172 };
173
174 // Helper to build arrow schema for list arrays
175 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
177 std::string format,
178 ArrowSchema&& flat_schema,
179 std::optional<std::string_view> name,
180 std::optional<METADATA_RANGE> metadata,
181 bool nullable
182 )
183 {
185 std::optional<std::unordered_set<ArrowFlag>>
186 flags = nullable ? std::optional<std::unordered_set<ArrowFlag>>{{ArrowFlag::NULLABLE}}
187 : std::nullopt;
188
189 return make_arrow_schema(
190 std::move(format),
191 name,
192 metadata,
193 flags,
194 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))},
196 nullptr, // dictionary
197 true // dictionary ownership
198 );
199 }
200
201 // Helper to build arrow array for list arrays
203 std::int64_t size,
204 std::int64_t null_count,
205 std::vector<buffer<std::uint8_t>>&& arr_buffs,
206 ArrowArray&& flat_arr
207 )
208 {
210 return make_arrow_array(
211 size,
212 null_count,
213 0, // offset
214 std::move(arr_buffs),
215 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))},
217 nullptr, // dictionary
218 true // dictionary ownership
219 );
220 }
221 }
222
223 template <bool BIG>
236
237 template <bool BIG>
250
251 template <>
264
279 template <class DERIVED>
281 {
282 public:
283
287 using value_iterator = typename inner_types::value_iterator;
288 using const_value_iterator = typename inner_types::const_value_iterator;
290
293
295
299
303
311 [[nodiscard]] constexpr const array_wrapper* raw_flat_array() const;
312
320 [[nodiscard]] constexpr array_wrapper* raw_flat_array();
321
322 protected:
323
333
344
357
358 constexpr list_array_crtp_base(self_type&&) noexcept = default;
359 constexpr list_array_crtp_base& operator=(self_type&&) noexcept = default;
360
361 private:
362
363 using list_size_type = inner_types::list_size_type;
364
365 [[nodiscard]] constexpr value_iterator value_begin();
366 [[nodiscard]] constexpr value_iterator value_end();
367 [[nodiscard]] constexpr const_value_iterator value_cbegin() const;
368 [[nodiscard]] constexpr const_value_iterator value_cend() const;
369
370 [[nodiscard]] constexpr inner_reference value(size_type i);
371 [[nodiscard]] constexpr inner_const_reference value(size_type i) const;
372
373 [[nodiscard]] cloning_ptr<array_wrapper> make_flat_array();
374
375 // data members
377
378 // friend classes
379 friend class array_crtp_base<DERIVED>;
380
381 // needs access to this->value(i)
382 friend class detail::layout_value_functor<DERIVED, inner_value_type>;
383 friend class detail::layout_value_functor<const DERIVED, inner_value_type>;
384 };
385
386 template <bool BIG>
388 {
389 public:
390
394 using list_size_type = inner_types::list_size_type;
396 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
398
411
421 constexpr list_array_impl(const self_type&);
422
435
436 constexpr list_array_impl(self_type&&) noexcept = default;
437 constexpr list_array_impl& operator=(self_type&&) noexcept = default;
438
450 template <class... ARGS>
451 requires(mpl::excludes_copy_and_move_ctor_v<list_array_impl<BIG>, ARGS...>)
452 explicit list_array_impl(ARGS&&... args)
453 : self_type(create_proxy(std::forward<ARGS>(args)...))
454 {
455 }
456
474 template <std::ranges::range SIZES_RANGE>
475 [[nodiscard]] static constexpr auto offset_from_sizes(SIZES_RANGE&& sizes) -> offset_buffer_type;
476
477 private:
478
498 template <
500 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
501 [[nodiscard]] static arrow_proxy create_proxy(
502 array&& flat_values,
503 offset_buffer_type&& list_offsets,
504 VB&& validity_input,
505 std::optional<std::string_view> name = std::nullopt,
506 std::optional<METADATA_RANGE> metadata = std::nullopt
507 );
508
527 template <
529 std::ranges::input_range OFFSET_BUFFER_RANGE,
530 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
531 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
532 [[nodiscard]] static arrow_proxy create_proxy(
533 array&& flat_values,
534 OFFSET_BUFFER_RANGE&& list_offsets_range,
535 VB&& validity_input,
536 std::optional<std::string_view> name = std::nullopt,
537 std::optional<METADATA_RANGE> metadata = std::nullopt
538 )
539 {
540 offset_buffer_type list_offsets{std::forward<OFFSET_BUFFER_RANGE>(list_offsets_range)};
541 return list_array_impl<BIG>::create_proxy(
542 std::move(flat_values),
543 std::move(list_offsets),
544 std::forward<VB>(validity_input),
545 std::forward<std::optional<std::string_view>>(name),
546 std::forward<std::optional<METADATA_RANGE>>(metadata)
547 );
548 }
549
550 template <
551 validity_bitmap_input VB = validity_bitmap,
552 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
553 [[nodiscard]] static arrow_proxy create_proxy(
554 array&& flat_values,
555 offset_buffer_type&& list_offsets,
556 bool nullable = true,
557 std::optional<std::string_view> name = std::nullopt,
558 std::optional<METADATA_RANGE> metadata = std::nullopt
559 );
560
561 template <
562 validity_bitmap_input VB = validity_bitmap,
563 std::ranges::input_range OFFSET_BUFFER_RANGE,
564 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
565 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
566 [[nodiscard]] static arrow_proxy create_proxy(
567 array&& flat_values,
568 OFFSET_BUFFER_RANGE&& list_offsets_range,
569 bool nullable = true,
570 std::optional<std::string_view> name = std::nullopt,
571 std::optional<METADATA_RANGE> metadata = std::nullopt
572 )
573 {
574 offset_buffer_type list_offsets{std::forward<OFFSET_BUFFER_RANGE>(list_offsets_range)};
575 return list_array_impl<BIG>::create_proxy(
576 std::move(flat_values),
577 std::move(list_offsets),
578 nullable,
579 std::forward<std::optional<std::string_view>>(name),
580 std::forward<std::optional<METADATA_RANGE>>(metadata)
581 );
582 }
583
584 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
585 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
586
587 [[nodiscard]] constexpr offset_type* make_list_offsets();
588
589 offset_type* p_list_offsets;
590
591 // friend classes
592 friend class array_crtp_base<self_type>;
593 friend class list_array_crtp_base<self_type>;
594 };
595
596 template <bool BIG>
597 class list_view_array_impl final : public list_array_crtp_base<list_view_array_impl<BIG>>
598 {
599 public:
600
604 using list_size_type = inner_types::list_size_type;
606 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
609
622
633
646
647 constexpr list_view_array_impl(self_type&&) = default;
648 constexpr list_view_array_impl& operator=(self_type&&) = default;
649
662 template <class... ARGS>
664 list_view_array_impl(ARGS&&... args)
665 : self_type(create_proxy(std::forward<ARGS>(args)...))
666 {
667 }
668
669 private:
670
694 template <
695 std::ranges::input_range OFFSET_BUFFER_RANGE,
696 std::ranges::input_range SIZE_RANGE,
698 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
699 requires(
700 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
701 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
702 )
703 [[nodiscard]] static arrow_proxy create_proxy(
704 array&& flat_values,
705 OFFSET_BUFFER_RANGE&& list_offsets,
706 SIZE_RANGE&& list_sizes,
707 VB&& validity_input,
708 std::optional<std::string_view> name = std::nullopt,
709 std::optional<METADATA_RANGE> metadata = std::nullopt
710 )
711 {
712 return list_view_array_impl<BIG>::create_proxy(
713 std::move(flat_values),
714 offset_buffer_type(std::forward<OFFSET_BUFFER_RANGE>(list_offsets)),
715 size_buffer_type(std::forward<SIZE_RANGE>(list_sizes)),
716 std::forward<VB>(validity_input),
717 name,
718 metadata
719 );
720 }
721
722 template <
723 validity_bitmap_input VB = validity_bitmap,
724 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
725 [[nodiscard]] static arrow_proxy create_proxy(
726 array&& flat_values,
727 offset_buffer_type&& list_offsets,
728 size_buffer_type&& list_sizes,
729 VB&& validity_input,
730 std::optional<std::string_view> name = std::nullopt,
731 std::optional<METADATA_RANGE> metadata = std::nullopt
732 );
733
734 template <
735 std::ranges::input_range OFFSET_BUFFER_RANGE,
736 std::ranges::input_range SIZE_RANGE,
737 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
738 requires(
739 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
740 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
741 )
742 [[nodiscard]] static arrow_proxy create_proxy(
743 array&& flat_values,
744 OFFSET_BUFFER_RANGE&& list_offsets,
745 SIZE_RANGE&& list_sizes,
746 bool nullable = true,
747 std::optional<std::string_view> name = std::nullopt,
748 std::optional<METADATA_RANGE> metadata = std::nullopt
749 )
750 {
751 return list_view_array_impl<BIG>::create_proxy(
752 std::move(flat_values),
753 offset_buffer_type(std::forward<OFFSET_BUFFER_RANGE>(list_offsets)),
754 size_buffer_type(std::forward<SIZE_RANGE>(list_sizes)),
755 nullable,
756 name,
757 metadata
758 );
759 }
760
761 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
762 [[nodiscard]] static arrow_proxy create_proxy(
763 array&& flat_values,
764 offset_buffer_type&& list_offsets,
765 size_buffer_type&& list_sizes,
766 bool nullable = true,
767 std::optional<std::string_view> name = std::nullopt,
768 std::optional<METADATA_RANGE> metadata = std::nullopt
769 );
770
771 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
772 static constexpr std::size_t SIZES_BUFFER_INDEX = 2;
773 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
774
775 [[nodiscard]] constexpr offset_type* make_list_offsets();
776 [[nodiscard]] constexpr offset_type* make_list_sizes();
777
778 offset_type* p_list_offsets;
779 offset_type* p_list_sizes;
780
781 // friend classes
782 friend class array_crtp_base<self_type>;
783 friend class list_array_crtp_base<self_type>;
784 };
785
786 class fixed_sized_list_array final : public list_array_crtp_base<fixed_sized_list_array>
787 {
788 public:
789
793 using list_size_type = inner_types::list_size_type;
795 using offset_type = std::uint64_t;
796
808 explicit fixed_sized_list_array(arrow_proxy proxy);
809
812
815
827 template <class... ARGS>
830 : self_type(create_proxy(std::forward<ARGS>(args)...))
831 {
832 }
833
834 private:
835
854 template <
856 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
857 [[nodiscard]] static arrow_proxy create_proxy(
858 std::uint64_t list_size,
859 array&& flat_values,
860 R&& validity_input,
861 std::optional<std::string_view> name = std::nullopt,
862 std::optional<METADATA_RANGE> metadata = std::nullopt
863 );
864
883 template <
885 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
886 [[nodiscard]] static arrow_proxy create_proxy(
887 std::uint64_t list_size,
888 array&& flat_values,
889 bool nullable = true,
890 std::optional<std::string_view> name = std::nullopt,
891 std::optional<METADATA_RANGE> metadata = std::nullopt
892 );
893
906 [[nodiscard]] static uint64_t list_size_from_format(const std::string_view format);
907
918 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
919
920 uint64_t m_list_size;
921
922 // friend classes
923 friend class array_crtp_base<self_type>;
924 friend class list_array_crtp_base<self_type>;
925 };
926
927 /***************************************
928 * list_array_crtp_base implementation *
929 ***************************************/
930
931 template <class DERIVED>
933 : base_type(std::move(proxy))
934 , p_flat_array(make_flat_array())
935 {
936 }
937
938 template <class DERIVED>
940 : base_type(rhs)
941 , p_flat_array(make_flat_array())
942 {
943 }
944
945 template <class DERIVED>
947 {
949 p_flat_array = make_flat_array();
950 return *this;
951 }
952
953 template <class DERIVED>
955 {
956 return p_flat_array.get();
957 }
958
959 template <class DERIVED>
961 {
962 return p_flat_array.get();
963 }
964
965 template <class DERIVED>
966 constexpr auto list_array_crtp_base<DERIVED>::value_begin() -> value_iterator
967 {
968 return value_iterator(detail::layout_value_functor<DERIVED, inner_value_type>(&this->derived_cast()), 0);
969 }
970
971 template <class DERIVED>
972 constexpr auto list_array_crtp_base<DERIVED>::value_end() -> value_iterator
973 {
974 return value_iterator(
975 detail::layout_value_functor<DERIVED, inner_value_type>(&this->derived_cast()),
976 this->size()
977 );
978 }
979
980 template <class DERIVED>
981 constexpr auto list_array_crtp_base<DERIVED>::value_cbegin() const -> const_value_iterator
982 {
983 return const_value_iterator(
985 0
986 );
987 }
988
989 template <class DERIVED>
990 constexpr auto list_array_crtp_base<DERIVED>::value_cend() const -> const_value_iterator
991 {
992 return const_value_iterator(
994 this->size()
995 );
996 }
997
998 template <class DERIVED>
999 constexpr auto list_array_crtp_base<DERIVED>::value(size_type i) -> inner_reference
1000 {
1001 const auto r = this->derived_cast().offset_range(i);
1002 using st = typename list_value::size_type;
1003 return list_value{p_flat_array.get(), static_cast<st>(r.first), static_cast<st>(r.second)};
1004 }
1005
1006 template <class DERIVED>
1007 constexpr auto list_array_crtp_base<DERIVED>::value(size_type i) const -> inner_const_reference
1008 {
1009 const auto r = this->derived_cast().offset_range(i);
1010 using st = typename list_value::size_type;
1011 return list_value{p_flat_array.get(), static_cast<st>(r.first), static_cast<st>(r.second)};
1012 }
1013
1014 template <class DERIVED>
1015 cloning_ptr<array_wrapper> list_array_crtp_base<DERIVED>::make_flat_array()
1016 {
1017 return array_factory(this->get_arrow_proxy().children()[0].view());
1018 }
1019
1020 /**********************************
1021 * list_array_impl implementation *
1022 **********************************/
1023
1024#ifdef __GNUC__
1025# pragma GCC diagnostic push
1026# pragma GCC diagnostic ignored "-Wcast-align"
1027#endif
1028
1029 template <bool BIG>
1031 : base_type(std::move(proxy))
1032 , p_list_offsets(make_list_offsets())
1033 {
1034 }
1035
1036 template <bool BIG>
1037 template <std::ranges::range SIZES_RANGE>
1039 {
1041 std::forward<SIZES_RANGE>(sizes)
1042 );
1043 }
1044
1045 template <bool BIG>
1046 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1047 arrow_proxy list_array_impl<BIG>::create_proxy(
1048 array&& flat_values,
1049 offset_buffer_type&& list_offsets,
1050 VB&& validity_input,
1051 std::optional<std::string_view> name,
1052 std::optional<METADATA_RANGE> metadata
1053 )
1054 {
1055 const auto size = list_offsets.size() - 1;
1056 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
1057 const auto null_count = vbitmap.null_count();
1058
1059 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1060
1062 BIG ? std::string("+L") : std::string("+l"),
1063 std::move(flat_schema),
1064 name,
1065 metadata,
1066 true // nullable
1067 );
1068
1069 std::vector<buffer<std::uint8_t>> arr_buffs;
1070 arr_buffs.reserve(2);
1071 arr_buffs.emplace_back(std::move(vbitmap).extract_storage());
1072 arr_buffs.emplace_back(std::move(list_offsets).extract_storage());
1073
1075 static_cast<std::int64_t>(size),
1076 static_cast<std::int64_t>(null_count),
1077 std::move(arr_buffs),
1078 std::move(flat_arr)
1079 );
1080
1081 return arrow_proxy{std::move(arr), std::move(schema)};
1082 }
1083
1084 template <bool BIG>
1085 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1086 arrow_proxy list_array_impl<BIG>::create_proxy(
1087 array&& flat_values,
1088 offset_buffer_type&& list_offsets,
1089 bool nullable,
1090 std::optional<std::string_view> name,
1091 std::optional<METADATA_RANGE> metadata
1092 )
1093 {
1094 if (nullable)
1095 {
1096 return list_array_impl<BIG>::create_proxy(
1097 std::move(flat_values),
1098 std::move(list_offsets),
1100 name,
1101 metadata
1102 );
1103 }
1104
1105 const auto size = list_offsets.size() - 1;
1106 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1107
1108 ArrowSchema schema = detail::make_list_arrow_schema(
1109 BIG ? std::string("+L") : std::string("+l"),
1110 std::move(flat_schema),
1111 name,
1112 metadata,
1113 false // not nullable
1114 );
1115
1116 std::vector<buffer<std::uint8_t>> arr_buffs;
1117 arr_buffs.reserve(2);
1118 arr_buffs.emplace_back(nullptr, 0, buffer<std::uint8_t>::default_allocator()); // no validity bitmap
1119 arr_buffs.emplace_back(std::move(list_offsets).extract_storage());
1120
1121 ArrowArray arr = detail::make_list_arrow_array(
1122 static_cast<std::int64_t>(size),
1123 0, // null_count
1124 std::move(arr_buffs),
1125 std::move(flat_arr)
1126 );
1127
1128 return arrow_proxy{std::move(arr), std::move(schema)};
1129 }
1130
1131 template <bool BIG>
1133 : base_type(rhs)
1134 , p_list_offsets(make_list_offsets())
1135 {
1137 }
1138
1139 template <bool BIG>
1141 {
1143 if (this != &rhs)
1144 {
1146 p_list_offsets = make_list_offsets();
1147 }
1148 return *this;
1149 }
1150
1151 template <bool BIG>
1152 constexpr auto list_array_impl<BIG>::offset_range(size_type i) const -> std::pair<offset_type, offset_type>
1153 {
1154 return std::make_pair(p_list_offsets[i], p_list_offsets[i + 1]);
1155 }
1156
1157 template <bool BIG>
1158 constexpr auto list_array_impl<BIG>::make_list_offsets() -> offset_type*
1159 {
1160 return reinterpret_cast<offset_type*>(
1161 this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
1162 );
1163 }
1164
1165 /***************************************
1166 * list_view_array_impl implementation *
1167 ***************************************/
1168
1169 template <bool BIG>
1171 : base_type(std::move(proxy))
1172 , p_list_offsets(make_list_offsets())
1173 , p_list_sizes(make_list_sizes())
1174 {
1175 }
1176
1177 template <bool BIG>
1178 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1179 arrow_proxy list_view_array_impl<BIG>::create_proxy(
1180 array&& flat_values,
1181 offset_buffer_type&& list_offsets,
1182 size_buffer_type&& list_sizes,
1183 VB&& validity_input,
1184 std::optional<std::string_view> name,
1185 std::optional<METADATA_RANGE> metadata
1186 )
1187 {
1188 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(), "sizes and offset must have the same size");
1189 const auto size = list_sizes.size();
1190 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
1191 const auto null_count = vbitmap.null_count();
1192
1193 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1194
1196 BIG ? std::string("+vL") : std::string("+vl"),
1197 std::move(flat_schema),
1198 name,
1199 metadata,
1200 true // nullable
1201 );
1202
1203 std::vector<buffer<std::uint8_t>> arr_buffs;
1204 arr_buffs.reserve(3);
1205 arr_buffs.emplace_back(std::move(vbitmap).extract_storage());
1206 arr_buffs.emplace_back(std::move(list_offsets).extract_storage());
1207 arr_buffs.emplace_back(std::move(list_sizes).extract_storage());
1208
1210 static_cast<std::int64_t>(size),
1211 static_cast<std::int64_t>(null_count),
1212 std::move(arr_buffs),
1213 std::move(flat_arr)
1214 );
1215
1216 return arrow_proxy{std::move(arr), std::move(schema)};
1217 }
1218
1219 template <bool BIG>
1220 template <input_metadata_container METADATA_RANGE>
1221 arrow_proxy list_view_array_impl<BIG>::create_proxy(
1222 array&& flat_values,
1223 offset_buffer_type&& list_offsets,
1224 size_buffer_type&& list_sizes,
1225 bool nullable,
1226 std::optional<std::string_view> name,
1227 std::optional<METADATA_RANGE> metadata
1228 )
1229 {
1230 if (nullable)
1231 {
1232 return list_view_array_impl<BIG>::create_proxy(
1233 std::move(flat_values),
1234 std::move(list_offsets),
1235 std::move(list_sizes),
1237 name,
1238 metadata
1239 );
1240 }
1241
1242 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(), "sizes and offset must have the same size");
1243 const auto size = list_sizes.size();
1244 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1245
1246 ArrowSchema schema = detail::make_list_arrow_schema(
1247 BIG ? std::string("+vL") : std::string("+vl"),
1248 std::move(flat_schema),
1249 name,
1250 metadata,
1251 false // not nullable
1252 );
1253
1254 std::vector<buffer<std::uint8_t>> arr_buffs;
1255 arr_buffs.reserve(3);
1256 arr_buffs.emplace_back(nullptr, 0, buffer<std::uint8_t>::default_allocator()); // no validity bitmap
1257 arr_buffs.emplace_back(std::move(list_offsets).extract_storage());
1258 arr_buffs.emplace_back(std::move(list_sizes).extract_storage());
1259
1260 ArrowArray arr = detail::make_list_arrow_array(
1261 static_cast<std::int64_t>(size),
1262 0, // null_count
1263 std::move(arr_buffs),
1264 std::move(flat_arr)
1265 );
1266
1267 return arrow_proxy{std::move(arr), std::move(schema)};
1268 }
1269
1270 template <bool BIG>
1272 : base_type(rhs)
1273 , p_list_offsets(make_list_offsets())
1274 , p_list_sizes(make_list_sizes())
1275 {
1277 }
1278
1279 template <bool BIG>
1281 {
1283 if (this != &rhs)
1284 {
1286 p_list_offsets = make_list_offsets();
1287 p_list_sizes = make_list_sizes();
1288 }
1289 return *this;
1290 }
1291
1292 template <bool BIG>
1293 inline constexpr auto list_view_array_impl<BIG>::offset_range(size_type i) const
1294 -> std::pair<offset_type, offset_type>
1295 {
1296 const auto offset = p_list_offsets[i];
1297 return std::make_pair(offset, offset + p_list_sizes[i]);
1298 }
1299
1300 template <bool BIG>
1301 constexpr auto list_view_array_impl<BIG>::make_list_offsets() -> offset_type*
1302 {
1303 return reinterpret_cast<offset_type*>(
1304 this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
1305 );
1306 }
1307
1308 template <bool BIG>
1309 constexpr auto list_view_array_impl<BIG>::make_list_sizes() -> offset_type*
1310 {
1311 return reinterpret_cast<offset_type*>(
1312 this->get_arrow_proxy().buffers()[SIZES_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
1313 );
1314 }
1315
1316#ifdef __GNUC__
1317# pragma GCC diagnostic pop
1318#endif
1319
1320 /*****************************************
1321 * fixed_sized_list_array implementation *
1322 *****************************************/
1323
1324 inline auto fixed_sized_list_array::list_size_from_format(const std::string_view format) -> uint64_t
1325 {
1326 SPARROW_ASSERT(format.size() >= 3, "Invalid format string");
1327 const auto n_digits = format.size() - 3;
1328 const auto list_size_str = format.substr(3, n_digits);
1329 return std::stoull(std::string(list_size_str));
1330 }
1331
1333 : base_type(std::move(proxy))
1334 , m_list_size(fixed_sized_list_array::list_size_from_format(this->get_arrow_proxy().format()))
1335 {
1336 }
1337
1339 : base_type(rhs)
1340 , m_list_size(rhs.m_list_size)
1341 {
1343 }
1344
1346 {
1348 if (this != &rhs)
1349 {
1351 m_list_size = rhs.m_list_size;
1352 }
1353 return *this;
1354 }
1355
1356 constexpr auto fixed_sized_list_array::offset_range(size_type i) const
1357 -> std::pair<offset_type, offset_type>
1358 {
1359 const auto offset = i * m_list_size;
1360 return std::make_pair(offset, offset + m_list_size);
1361 }
1362
1363 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
1364 inline arrow_proxy fixed_sized_list_array::create_proxy(
1365 std::uint64_t list_size,
1366 array&& flat_values,
1367 R&& validity_input,
1368 std::optional<std::string_view> name,
1369 std::optional<METADATA_RANGE> metadata
1370 )
1371 {
1372 const auto size = flat_values.size() / static_cast<std::size_t>(list_size);
1373 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<R>(validity_input));
1374 const auto null_count = vbitmap.null_count();
1375
1376 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1377
1378 std::string format = "+w:" + std::to_string(list_size);
1380 std::move(format),
1381 std::move(flat_schema),
1382 name,
1383 metadata,
1384 true // nullable
1385 );
1386
1387 std::vector<buffer<std::uint8_t>> arr_buffs;
1388 arr_buffs.reserve(1);
1389 arr_buffs.emplace_back(vbitmap.extract_storage());
1390
1392 static_cast<std::int64_t>(size),
1393 static_cast<std::int64_t>(null_count),
1394 std::move(arr_buffs),
1395 std::move(flat_arr)
1396 );
1397
1398 return arrow_proxy{std::move(arr), std::move(schema)};
1399 }
1400
1401 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
1402 inline arrow_proxy fixed_sized_list_array::create_proxy(
1403 std::uint64_t list_size,
1404 array&& flat_values,
1405 bool nullable,
1406 std::optional<std::string_view> name,
1407 std::optional<METADATA_RANGE> metadata
1408 )
1409 {
1410 if (nullable)
1411 {
1412 return fixed_sized_list_array::create_proxy(
1413 list_size,
1414 std::move(flat_values),
1416 name,
1417 metadata
1418 );
1419 }
1420
1421 const auto size = flat_values.size() / static_cast<std::size_t>(list_size);
1422 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1423
1424 std::string format = "+w:" + std::to_string(list_size);
1425 ArrowSchema schema = detail::make_list_arrow_schema(
1426 std::move(format),
1427 std::move(flat_schema),
1428 name,
1429 metadata,
1430 false // not nullable
1431 );
1432
1433 std::vector<buffer<std::uint8_t>> arr_buffs;
1434 arr_buffs.reserve(1);
1435 arr_buffs.emplace_back(nullptr, 0, buffer<std::uint8_t>::default_allocator()); // no validity bitmap
1436
1437 ArrowArray arr = detail::make_list_arrow_array(
1438 static_cast<std::int64_t>(size),
1439 0, // null_count
1440 std::move(arr_buffs),
1441 std::move(flat_arr)
1442 );
1443
1444 return arrow_proxy{std::move(arr), std::move(schema)};
1445 }
1446}
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
constexpr array_bitmap_base_impl & operator=(const array_bitmap_base_impl &)
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
Base class for array type erasure.
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:43
Object that owns a piece of contiguous memory.
Definition buffer.hpp:131
xsimd::aligned_allocator< T > default_allocator
Definition buffer.hpp:144
Smart pointer behaving like a copiable std::unique_ptr.
Definition memory.hpp:126
constexpr size_type null_count() const noexcept
Returns the number of bits set to false (null/invalid).
typename storage_type::default_allocator default_allocator
inner_types::list_size_type list_size_type
array_inner_types< self_type > inner_types
fixed_sized_list_array(arrow_proxy proxy)
Constructs fixed size list array from Arrow proxy.
fixed_sized_list_array(ARGS &&... args)
Generic constructor for creating fixed size list array.
list_array_crtp_base< self_type > base_type
fixed_sized_list_array self_type
fixed_sized_list_array & operator=(self_type &&)=default
fixed_sized_list_array(self_type &&)=default
fixed_sized_list_array & operator=(const self_type &)
typename base_type::size_type size_type
CRTP base class for all list array implementations.
typename base_type::const_bitmap_range const_bitmap_range
constexpr list_array_crtp_base & operator=(const self_type &)
Copy assignment operator.
constexpr array_wrapper * raw_flat_array()
Gets mutable access to the underlying flat array.
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename inner_types::const_value_iterator const_value_iterator
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::iterator_tag iterator_tag
list_array_crtp_base(arrow_proxy proxy)
Constructs list array base from Arrow proxy.
constexpr list_array_crtp_base(const self_type &)
Copy constructor.
typename inner_types::value_iterator value_iterator
typename base_type::bitmap_type bitmap_type
list_array_crtp_base< DERIVED > self_type
typename base_type::size_type size_type
array_inner_types< DERIVED > inner_types
nullable< inner_value_type > value_type
constexpr const array_wrapper * raw_flat_array() const
Gets read-only access to the underlying flat array.
array_bitmap_base< DERIVED > base_type
constexpr list_array_crtp_base(self_type &&) noexcept=default
list_array_impl< BIG > self_type
constexpr list_array_impl(const self_type &)
Copy constructor.
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
typename base_type::size_type size_type
constexpr list_array_impl & operator=(const self_type &)
Copy assignment operator.
array_inner_types< self_type > inner_types
constexpr list_array_impl(self_type &&) noexcept=default
static constexpr auto offset_from_sizes(SIZES_RANGE &&sizes) -> offset_buffer_type
Creates offset buffer from list sizes.
inner_types::list_size_type list_size_type
list_array_crtp_base< list_array_impl< BIG > > base_type
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_array_impl(arrow_proxy proxy)
Constructs list array from Arrow proxy.
std::size_t size_type
constexpr list_view_array_impl & operator=(self_type &&)=default
typename base_type::size_type size_type
constexpr list_view_array_impl(self_type &&)=default
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_view_array_impl(arrow_proxy proxy)
Constructs list view array from Arrow proxy.
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
array_inner_types< self_type > inner_types
list_array_crtp_base< list_view_array_impl< BIG > > base_type
list_view_array_impl(ARGS &&... args)
Generic constructor for creating list view array from various inputs.
list_view_array_impl< BIG > self_type
constexpr list_view_array_impl(const self_type &)
Copy constructor.
inner_types::list_size_type list_size_type
u8_buffer< std::remove_const_t< list_size_type > > size_buffer_type
constexpr list_view_array_impl & operator=(const self_type &)
Copy assignment operator.
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
Concept for input containers that can provide metadata pairs.
Definition metadata.hpp:332
Concept defining valid input types for validity bitmap creation.
#define SPARROW_ASSERT(expr__, message__)
SPARROW_API void increase(const std::string &key)
std::string key< fixed_sized_list_array >()
std::string key()
Definition buffer.hpp:49
ArrowArray make_list_arrow_array(std::int64_t size, std::int64_t null_count, std::vector< buffer< std::uint8_t > > &&arr_buffs, ArrowArray &&flat_arr)
ArrowSchema make_list_arrow_schema(std::string format, ArrowSchema &&flat_schema, std::optional< std::string_view > name, std::optional< METADATA_RANGE > metadata, bool nullable)
constexpr sparrow::u8_buffer< OFFSET_TYPE > offset_buffer_from_sizes(SIZES_RANGE &&sizes)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
Definition mp_utils.hpp:216
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_list_view_array_v
Checks whether T is a list_view_array type.
list_array_impl< false > list_array
A list array implementation.
array_bitmap_base_impl< D, false > array_bitmap_base
Convenient alias for arrays with immutable validity bitmaps.
constexpr bool is_fixed_sized_list_array_v
Checks whether T is a fixed_sized_list_array type.
list_view_array_impl< true > big_list_view_array
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:110
constexpr bool is_big_list_array_v
Checks whether T is a big_list_array type.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
list_view_array_impl< false > list_view_array
A list view array implementation.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
constexpr bool is_list_array_v
Checks whether T is a list_array type.
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
list_array_impl< true > big_list_array
A big list array implementation.
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
constexpr bool is_big_list_view_array_v
Checks whether T is a big_list_view_array type.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
Extensions to the C++ standard library.
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Metafunction for retrieving the data_type of a typed array.