sparrow ..
Loading...
Searching...
No Matches
list_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <ranges>
18#include <string> // for std::stoull
19#include <type_traits>
20#include <vector>
21
22#include "sparrow/array_api.hpp"
35
36namespace sparrow
37{
38 template <class DERIVED>
40
41 template <bool BIG>
42 class list_array_impl;
43
44 template <bool BIG>
46
69
84
86
90 template <class T>
91 constexpr bool is_list_array_v = std::same_as<T, list_array>;
92
96 template <class T>
97 constexpr bool is_big_list_array_v = std::same_as<T, big_list_array>;
98
102 template <class T>
103 constexpr bool is_list_view_array_v = std::same_as<T, list_view_array>;
104
108 template <class T>
109 constexpr bool is_big_list_view_array_v = std::same_as<T, big_list_view_array>;
110
114 template <class T>
115 constexpr bool is_fixed_sized_list_array_v = std::same_as<T, fixed_sized_list_array>;
116
117 namespace detail
118 {
119 template <>
121 {
122 [[nodiscard]] static constexpr sparrow::data_type get()
123 {
125 }
126 };
127
128 template <>
130 {
131 [[nodiscard]] static constexpr sparrow::data_type get()
132 {
134 }
135 };
136
137 template <>
139 {
140 [[nodiscard]] static constexpr sparrow::data_type get()
141 {
143 }
144 };
145
146 template <>
148 {
149 [[nodiscard]] static constexpr sparrow::data_type get()
150 {
152 }
153 };
154
155 template <>
157 {
158 [[nodiscard]] static constexpr sparrow::data_type get()
159 {
161 }
162 };
163 }
164
165 template <bool BIG>
178
179 template <bool BIG>
192
193 template <>
206
221 template <class DERIVED>
223 {
224 public:
225
229 using value_iterator = typename inner_types::value_iterator;
230 using const_value_iterator = typename inner_types::const_value_iterator;
232
235
237
241
245
253 [[nodiscard]] constexpr const array_wrapper* raw_flat_array() const;
254
262 [[nodiscard]] constexpr array_wrapper* raw_flat_array();
263
264 protected:
265
275
286
299
300 constexpr list_array_crtp_base(self_type&&) noexcept = default;
301 constexpr list_array_crtp_base& operator=(self_type&&) noexcept = default;
302
303 private:
304
305 using list_size_type = inner_types::list_size_type;
306
307 [[nodiscard]] constexpr value_iterator value_begin();
308 [[nodiscard]] constexpr value_iterator value_end();
309 [[nodiscard]] constexpr const_value_iterator value_cbegin() const;
310 [[nodiscard]] constexpr const_value_iterator value_cend() const;
311
312 [[nodiscard]] constexpr inner_reference value(size_type i);
313 [[nodiscard]] constexpr inner_const_reference value(size_type i) const;
314
315 [[nodiscard]] cloning_ptr<array_wrapper> make_flat_array();
316
317 // data members
319
320 // friend classes
321 friend class array_crtp_base<DERIVED>;
322
323 // needs access to this->value(i)
324 friend class detail::layout_value_functor<DERIVED, inner_value_type>;
325 friend class detail::layout_value_functor<const DERIVED, inner_value_type>;
326 };
327
328 template <bool BIG>
330 {
331 public:
332
336 using list_size_type = inner_types::list_size_type;
338 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
340
353
363 constexpr list_array_impl(const self_type&);
364
377
378 constexpr list_array_impl(self_type&&) noexcept = default;
379 constexpr list_array_impl& operator=(self_type&&) noexcept = default;
380
392 template <class... ARGS>
393 requires(mpl::excludes_copy_and_move_ctor_v<list_array_impl<BIG>, ARGS...>)
394 explicit list_array_impl(ARGS&&... args)
395 : self_type(create_proxy(std::forward<ARGS>(args)...))
396 {
397 }
398
416 template <std::ranges::range SIZES_RANGE>
417 [[nodiscard]] static constexpr auto offset_from_sizes(SIZES_RANGE&& sizes) -> offset_buffer_type;
418
419 private:
420
440 template <
442 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
443 [[nodiscard]] static arrow_proxy create_proxy(
444 array&& flat_values,
445 offset_buffer_type&& list_offsets,
446 VB&& validity_input,
447 std::optional<std::string_view> name = std::nullopt,
448 std::optional<METADATA_RANGE> metadata = std::nullopt
449 );
450
469 template <
471 std::ranges::input_range OFFSET_BUFFER_RANGE,
472 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
473 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
474 [[nodiscard]] static arrow_proxy create_proxy(
475 array&& flat_values,
476 OFFSET_BUFFER_RANGE&& list_offsets_range,
477 VB&& validity_input,
478 std::optional<std::string_view> name = std::nullopt,
479 std::optional<METADATA_RANGE> metadata = std::nullopt
480 )
481 {
482 offset_buffer_type list_offsets{std::forward<OFFSET_BUFFER_RANGE>(list_offsets_range)};
483 return list_array_impl<BIG>::create_proxy(
484 std::move(flat_values),
485 std::move(list_offsets),
486 std::forward<VB>(validity_input),
487 std::forward<std::optional<std::string_view>>(name),
488 std::forward<std::optional<METADATA_RANGE>>(metadata)
489 );
490 }
491
492 template <
493 validity_bitmap_input VB = validity_bitmap,
494 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
495 [[nodiscard]] static arrow_proxy create_proxy(
496 array&& flat_values,
497 offset_buffer_type&& list_offsets,
498 bool nullable = true,
499 std::optional<std::string_view> name = std::nullopt,
500 std::optional<METADATA_RANGE> metadata = std::nullopt
501 );
502
503 template <
504 validity_bitmap_input VB = validity_bitmap,
505 std::ranges::input_range OFFSET_BUFFER_RANGE,
506 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
507 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
508 [[nodiscard]] static arrow_proxy create_proxy(
509 array&& flat_values,
510 OFFSET_BUFFER_RANGE&& list_offsets_range,
511 bool nullable = true,
512 std::optional<std::string_view> name = std::nullopt,
513 std::optional<METADATA_RANGE> metadata = std::nullopt
514 )
515 {
516 offset_buffer_type list_offsets{std::forward<OFFSET_BUFFER_RANGE>(list_offsets_range)};
517 return list_array_impl<BIG>::create_proxy(
518 std::move(flat_values),
519 std::move(list_offsets),
520 nullable,
521 std::forward<std::optional<std::string_view>>(name),
522 std::forward<std::optional<METADATA_RANGE>>(metadata)
523 );
524 }
525
526 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
527 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
528
529 [[nodiscard]] constexpr offset_type* make_list_offsets();
530
531 offset_type* p_list_offsets;
532
533 // friend classes
534 friend class array_crtp_base<self_type>;
535 friend class list_array_crtp_base<self_type>;
536 };
537
538 template <bool BIG>
539 class list_view_array_impl final : public list_array_crtp_base<list_view_array_impl<BIG>>
540 {
541 public:
542
546 using list_size_type = inner_types::list_size_type;
548 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
551
564
575
588
589 constexpr list_view_array_impl(self_type&&) = default;
590 constexpr list_view_array_impl& operator=(self_type&&) = default;
591
604 template <class... ARGS>
606 list_view_array_impl(ARGS&&... args)
607 : self_type(create_proxy(std::forward<ARGS>(args)...))
608 {
609 }
610
611 private:
612
636 template <
637 std::ranges::input_range OFFSET_BUFFER_RANGE,
638 std::ranges::input_range SIZE_RANGE,
640 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
641 requires(
642 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
643 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
644 )
645 [[nodiscard]] static arrow_proxy create_proxy(
646 array&& flat_values,
647 OFFSET_BUFFER_RANGE&& list_offsets,
648 SIZE_RANGE&& list_sizes,
649 VB&& validity_input,
650 std::optional<std::string_view> name = std::nullopt,
651 std::optional<METADATA_RANGE> metadata = std::nullopt
652 )
653 {
654 return list_view_array_impl<BIG>::create_proxy(
655 std::move(flat_values),
656 offset_buffer_type(std::forward<OFFSET_BUFFER_RANGE>(list_offsets)),
657 size_buffer_type(std::forward<SIZE_RANGE>(list_sizes)),
658 std::forward<VB>(validity_input),
659 name,
660 metadata
661 );
662 }
663
664 template <
665 validity_bitmap_input VB = validity_bitmap,
666 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
667 [[nodiscard]] static arrow_proxy create_proxy(
668 array&& flat_values,
669 offset_buffer_type&& list_offsets,
670 size_buffer_type&& list_sizes,
671 VB&& validity_input,
672 std::optional<std::string_view> name = std::nullopt,
673 std::optional<METADATA_RANGE> metadata = std::nullopt
674 );
675
676 template <
677 std::ranges::input_range OFFSET_BUFFER_RANGE,
678 std::ranges::input_range SIZE_RANGE,
679 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
680 requires(
681 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
682 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
683 )
684 [[nodiscard]] static arrow_proxy create_proxy(
685 array&& flat_values,
686 OFFSET_BUFFER_RANGE&& list_offsets,
687 SIZE_RANGE&& list_sizes,
688 bool nullable = true,
689 std::optional<std::string_view> name = std::nullopt,
690 std::optional<METADATA_RANGE> metadata = std::nullopt
691 )
692 {
693 return list_view_array_impl<BIG>::create_proxy(
694 std::move(flat_values),
695 offset_buffer_type(std::forward<OFFSET_BUFFER_RANGE>(list_offsets)),
696 size_buffer_type(std::forward<SIZE_RANGE>(list_sizes)),
697 nullable,
698 name,
699 metadata
700 );
701 }
702
703 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
704 [[nodiscard]] static arrow_proxy create_proxy(
705 array&& flat_values,
706 offset_buffer_type&& list_offsets,
707 size_buffer_type&& list_sizes,
708 bool nullable = true,
709 std::optional<std::string_view> name = std::nullopt,
710 std::optional<METADATA_RANGE> metadata = std::nullopt
711 );
712
713 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
714 static constexpr std::size_t SIZES_BUFFER_INDEX = 2;
715 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
716
717 [[nodiscard]] constexpr offset_type* make_list_offsets();
718 [[nodiscard]] constexpr offset_type* make_list_sizes();
719
720 offset_type* p_list_offsets;
721 offset_type* p_list_sizes;
722
723 // friend classes
724 friend class array_crtp_base<self_type>;
725 friend class list_array_crtp_base<self_type>;
726 };
727
728 class fixed_sized_list_array final : public list_array_crtp_base<fixed_sized_list_array>
729 {
730 public:
731
735 using list_size_type = inner_types::list_size_type;
737 using offset_type = std::uint64_t;
738
750 explicit fixed_sized_list_array(arrow_proxy proxy);
751
752 constexpr fixed_sized_list_array(const self_type&) = default;
754
757
769 template <class... ARGS>
772 : self_type(create_proxy(std::forward<ARGS>(args)...))
773 {
774 }
775
776 private:
777
796 template <
798 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
799 [[nodiscard]] static arrow_proxy create_proxy(
800 std::uint64_t list_size,
801 array&& flat_values,
802 R&& validity_input,
803 std::optional<std::string_view> name = std::nullopt,
804 std::optional<METADATA_RANGE> metadata = std::nullopt
805 );
806
825 template <
827 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
828 [[nodiscard]] static arrow_proxy create_proxy(
829 std::uint64_t list_size,
830 array&& flat_values,
831 bool nullable = true,
832 std::optional<std::string_view> name = std::nullopt,
833 std::optional<METADATA_RANGE> metadata = std::nullopt
834 );
835
848 [[nodiscard]] static uint64_t list_size_from_format(const std::string_view format);
849
860 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
861
862 uint64_t m_list_size;
863
864 // friend classes
865 friend class array_crtp_base<self_type>;
866 friend class list_array_crtp_base<self_type>;
867 };
868
869 /***************************************
870 * list_array_crtp_base implementation *
871 ***************************************/
872
873 template <class DERIVED>
875 : base_type(std::move(proxy))
876 , p_flat_array(make_flat_array())
877 {
878 }
879
880 template <class DERIVED>
882 : base_type(rhs)
883 , p_flat_array(make_flat_array())
884 {
885 }
886
887 template <class DERIVED>
889 {
891 p_flat_array = make_flat_array();
892 return *this;
893 }
894
895 template <class DERIVED>
897 {
898 return p_flat_array.get();
899 }
900
901 template <class DERIVED>
903 {
904 return p_flat_array.get();
905 }
906
907 template <class DERIVED>
908 constexpr auto list_array_crtp_base<DERIVED>::value_begin() -> value_iterator
909 {
910 return value_iterator(detail::layout_value_functor<DERIVED, inner_value_type>(&this->derived_cast()), 0);
911 }
912
913 template <class DERIVED>
914 constexpr auto list_array_crtp_base<DERIVED>::value_end() -> value_iterator
915 {
916 return value_iterator(
917 detail::layout_value_functor<DERIVED, inner_value_type>(&this->derived_cast()),
918 this->size()
919 );
920 }
921
922 template <class DERIVED>
923 constexpr auto list_array_crtp_base<DERIVED>::value_cbegin() const -> const_value_iterator
924 {
925 return const_value_iterator(
927 0
928 );
929 }
930
931 template <class DERIVED>
932 constexpr auto list_array_crtp_base<DERIVED>::value_cend() const -> const_value_iterator
933 {
934 return const_value_iterator(
936 this->size()
937 );
938 }
939
940 template <class DERIVED>
941 constexpr auto list_array_crtp_base<DERIVED>::value(size_type i) -> inner_reference
942 {
943 const auto r = this->derived_cast().offset_range(i);
944 using st = typename list_value::size_type;
945 return list_value{p_flat_array.get(), static_cast<st>(r.first), static_cast<st>(r.second)};
946 }
947
948 template <class DERIVED>
949 constexpr auto list_array_crtp_base<DERIVED>::value(size_type i) const -> inner_const_reference
950 {
951 const auto r = this->derived_cast().offset_range(i);
952 using st = typename list_value::size_type;
953 return list_value{p_flat_array.get(), static_cast<st>(r.first), static_cast<st>(r.second)};
954 }
955
956 template <class DERIVED>
957 cloning_ptr<array_wrapper> list_array_crtp_base<DERIVED>::make_flat_array()
958 {
959 return array_factory(this->get_arrow_proxy().children()[0].view());
960 }
961
962 /**********************************
963 * list_array_impl implementation *
964 **********************************/
965
966#ifdef __GNUC__
967# pragma GCC diagnostic push
968# pragma GCC diagnostic ignored "-Wcast-align"
969#endif
970
971 template <bool BIG>
973 : base_type(std::move(proxy))
974 , p_list_offsets(make_list_offsets())
975 {
976 }
977
978 template <bool BIG>
979 template <std::ranges::range SIZES_RANGE>
980 constexpr auto list_array_impl<BIG>::offset_from_sizes(SIZES_RANGE&& sizes) -> offset_buffer_type
981 {
983 std::forward<SIZES_RANGE>(sizes)
984 );
985 }
986
987 template <bool BIG>
988 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
989 arrow_proxy list_array_impl<BIG>::create_proxy(
990 array&& flat_values,
991 offset_buffer_type&& list_offsets,
992 VB&& validity_input,
993 std::optional<std::string_view> name,
994 std::optional<METADATA_RANGE> metadata
995 )
996 {
997 const auto size = list_offsets.size() - 1;
998 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
999
1000 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1001
1002 const auto null_count = vbitmap.null_count();
1004
1005 static const std::optional<std::unordered_set<ArrowFlag>> flags{{ArrowFlag::NULLABLE}};
1006
1007 ArrowSchema schema = make_arrow_schema(
1008 BIG ? std::string("+L") : std::string("+l"), // format
1009 name, // name
1010 metadata, // metadata
1011 flags, // flags,
1012 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children
1013 children_ownership, // children ownership
1014 nullptr, // dictionary
1015 true // dictionary ownership
1016
1017 );
1018 std::vector<buffer<std::uint8_t>> arr_buffs = {
1019 std::move(vbitmap).extract_storage(),
1020 std::move(list_offsets).extract_storage()
1021 };
1022
1023 ArrowArray arr = make_arrow_array(
1024 static_cast<std::int64_t>(size), // length
1025 static_cast<int64_t>(null_count),
1026 0, // offset
1027 std::move(arr_buffs),
1028 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children
1029 children_ownership, // children ownership
1030 nullptr, // dictionary
1031 true // dictionary ownership
1032 );
1033 return arrow_proxy{std::move(arr), std::move(schema)};
1034 }
1035
1036 template <bool BIG>
1037 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1038 arrow_proxy list_array_impl<BIG>::create_proxy(
1039 array&& flat_values,
1040 offset_buffer_type&& list_offsets,
1041 bool nullable,
1042 std::optional<std::string_view> name,
1043 std::optional<METADATA_RANGE> metadata
1044 )
1045 {
1046 if (nullable)
1047 {
1048 return list_array_impl<BIG>::create_proxy(
1049 std::move(flat_values),
1050 std::move(list_offsets),
1052 name,
1053 metadata
1054 );
1055 }
1056 else
1057 {
1058 const auto size = list_offsets.size() - 1;
1059 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1061
1062 ArrowSchema schema = make_arrow_schema(
1063 BIG ? std::string("+L") : std::string("+l"), // format
1064 name, // name
1065 metadata, // metadata
1066 std::nullopt, // flags,
1067 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children
1068 children_ownership, // children ownership
1069 nullptr, // dictionary
1070 true // dictionary ownership
1071
1072 );
1073 std::vector<buffer<std::uint8_t>> arr_buffs = {
1074 buffer<std::uint8_t>{nullptr, 0}, // no validity bitmap
1075 std::move(list_offsets).extract_storage()
1076 };
1077
1078 ArrowArray arr = make_arrow_array(
1079 static_cast<std::int64_t>(size), // length
1080 0,
1081 0, // offset
1082 std::move(arr_buffs),
1083 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children
1084 children_ownership, // children ownership
1085 nullptr, // dictionary
1086 true // dictionary ownership
1087 );
1088 return arrow_proxy{std::move(arr), std::move(schema)};
1089 }
1090 }
1091
1092 template <bool BIG>
1094 : base_type(rhs)
1095 , p_list_offsets(make_list_offsets())
1096 {
1097 }
1098
1099 template <bool BIG>
1101 {
1102 if (this != &rhs)
1103 {
1105 p_list_offsets = make_list_offsets();
1106 }
1107 return *this;
1108 }
1109
1110 template <bool BIG>
1111 constexpr auto list_array_impl<BIG>::offset_range(size_type i) const -> std::pair<offset_type, offset_type>
1112 {
1113 return std::make_pair(p_list_offsets[i], p_list_offsets[i + 1]);
1114 }
1115
1116 template <bool BIG>
1117 constexpr auto list_array_impl<BIG>::make_list_offsets() -> offset_type*
1118 {
1119 return reinterpret_cast<offset_type*>(
1120 this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
1121 );
1122 }
1123
1124 /***************************************
1125 * list_view_array_impl implementation *
1126 ***************************************/
1127
1128 template <bool BIG>
1130 : base_type(std::move(proxy))
1131 , p_list_offsets(make_list_offsets())
1132 , p_list_sizes(make_list_sizes())
1133 {
1134 }
1135
1136 template <bool BIG>
1137 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1138 arrow_proxy list_view_array_impl<BIG>::create_proxy(
1139 array&& flat_values,
1140 offset_buffer_type&& list_offsets,
1141 size_buffer_type&& list_sizes,
1142 VB&& validity_input,
1143 std::optional<std::string_view> name,
1144 std::optional<METADATA_RANGE> metadata
1145 )
1146 {
1147 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(), "sizes and offset must have the same size");
1148 const auto size = list_sizes.size();
1149 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
1150 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1151 const auto null_count = vbitmap.null_count();
1153 static const std::optional<std::unordered_set<ArrowFlag>> flags{{ArrowFlag::NULLABLE}};
1154
1155 ArrowSchema schema = make_arrow_schema(
1156 BIG ? std::string("+vL") : std::string("+vl"), // format
1157 name, // name
1158 metadata, // metadata
1159 flags, // flags,
1160 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children
1162 nullptr, // dictionary
1163 true
1164 );
1165 std::vector<buffer<std::uint8_t>> arr_buffs = {
1166 std::move(vbitmap).extract_storage(),
1167 std::move(list_offsets).extract_storage(),
1168 std::move(list_sizes).extract_storage()
1169 };
1170
1171 ArrowArray arr = make_arrow_array(
1172 static_cast<std::int64_t>(size), // length
1173 static_cast<int64_t>(null_count),
1174 0, // offset
1175 std::move(arr_buffs),
1176 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children
1178 nullptr, // dictionary
1179 true
1180 );
1181 return arrow_proxy{std::move(arr), std::move(schema)};
1182 }
1183
1184 template <bool BIG>
1185 template <input_metadata_container METADATA_RANGE>
1186 arrow_proxy list_view_array_impl<BIG>::create_proxy(
1187 array&& flat_values,
1188 offset_buffer_type&& list_offsets,
1189 size_buffer_type&& list_sizes,
1190 bool nullable,
1191 std::optional<std::string_view> name,
1192 std::optional<METADATA_RANGE> metadata
1193 )
1194 {
1195 if (nullable)
1196 {
1197 return list_view_array_impl<BIG>::create_proxy(
1198 std::move(flat_values),
1199 std::move(list_offsets),
1200 std::move(list_sizes),
1202 name,
1203 metadata
1204 );
1205 }
1206 else
1207 {
1208 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(), "sizes and offset must have the same size");
1209 const auto size = list_sizes.size();
1210 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1212
1213 ArrowSchema schema = make_arrow_schema(
1214 BIG ? std::string("+vL") : std::string("+vl"), // format
1215 name, // name
1216 metadata, // metadata
1217 std::nullopt, // flags,
1218 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children
1220 nullptr, // dictionary
1221 true
1222 );
1223 std::vector<buffer<std::uint8_t>> arr_buffs = {
1224 buffer<std::uint8_t>{nullptr, 0}, // no validity bitmap
1225 std::move(list_offsets).extract_storage(),
1226 std::move(list_sizes).extract_storage()
1227 };
1228
1229 ArrowArray arr = make_arrow_array(
1230 static_cast<std::int64_t>(size), // length
1231 0, // null_count
1232 0, // offset
1233 std::move(arr_buffs),
1234 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children
1236 nullptr, // dictionary
1237 true
1238 );
1239 return arrow_proxy{std::move(arr), std::move(schema)};
1240 }
1241 }
1242
1243 template <bool BIG>
1245 : base_type(rhs)
1246 , p_list_offsets(make_list_offsets())
1247 , p_list_sizes(make_list_sizes())
1248 {
1249 }
1250
1251 template <bool BIG>
1253 {
1254 if (this != &rhs)
1255 {
1257 p_list_offsets = make_list_offsets();
1258 p_list_sizes = make_list_sizes();
1259 }
1260 return *this;
1261 }
1262
1263 template <bool BIG>
1264 inline constexpr auto list_view_array_impl<BIG>::offset_range(size_type i) const
1265 -> std::pair<offset_type, offset_type>
1266 {
1267 const auto offset = p_list_offsets[i];
1268 return std::make_pair(offset, offset + p_list_sizes[i]);
1269 }
1270
1271 template <bool BIG>
1272 constexpr auto list_view_array_impl<BIG>::make_list_offsets() -> offset_type*
1273 {
1274 return reinterpret_cast<offset_type*>(
1275 this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
1276 );
1277 }
1278
1279 template <bool BIG>
1280 constexpr auto list_view_array_impl<BIG>::make_list_sizes() -> offset_type*
1281 {
1282 return reinterpret_cast<offset_type*>(
1283 this->get_arrow_proxy().buffers()[SIZES_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
1284 );
1285 }
1286
1287#ifdef __GNUC__
1288# pragma GCC diagnostic pop
1289#endif
1290
1291 /*****************************************
1292 * fixed_sized_list_array implementation *
1293 *****************************************/
1294
1295 inline auto fixed_sized_list_array::list_size_from_format(const std::string_view format) -> uint64_t
1296 {
1297 SPARROW_ASSERT(format.size() >= 3, "Invalid format string");
1298 const auto n_digits = format.size() - 3;
1299 const auto list_size_str = format.substr(3, n_digits);
1300 return std::stoull(std::string(list_size_str));
1301 }
1302
1304 : base_type(std::move(proxy))
1305 , m_list_size(fixed_sized_list_array::list_size_from_format(this->get_arrow_proxy().format()))
1306 {
1307 }
1308
1309 constexpr auto fixed_sized_list_array::offset_range(size_type i) const
1310 -> std::pair<offset_type, offset_type>
1311 {
1312 const auto offset = i * m_list_size;
1313 return std::make_pair(offset, offset + m_list_size);
1314 }
1315
1316 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
1317 inline arrow_proxy fixed_sized_list_array::create_proxy(
1318 std::uint64_t list_size,
1319 array&& flat_values,
1320 R&& validity_input,
1321 std::optional<std::string_view> name,
1322 std::optional<METADATA_RANGE> metadata
1323 )
1324 {
1325 const auto size = flat_values.size() / static_cast<std::size_t>(list_size);
1326 auto vbitmap = ensure_validity_bitmap(size, std::forward<R>(validity_input));
1327
1328 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1329
1330 const auto null_count = vbitmap.null_count();
1331
1332 const repeat_view<bool> children_ownership{true, 1};
1333
1334 std::string format = "+w:" + std::to_string(list_size);
1335 ArrowSchema schema = make_arrow_schema(
1336 format,
1337 std::move(name), // name
1338 std::move(metadata), // metadata
1339 std::nullopt, // flags,
1340 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children
1341 children_ownership, // children ownership
1342 nullptr, // dictionary
1343 true // dictionary ownership
1344
1345 );
1346 std::vector<buffer<std::uint8_t>> arr_buffs = {vbitmap.extract_storage()};
1347
1348 ArrowArray arr = make_arrow_array(
1349 static_cast<std::int64_t>(size), // length
1350 static_cast<int64_t>(null_count),
1351 0, // offset
1352 std::move(arr_buffs),
1353 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children
1354 children_ownership, // children ownership
1355 nullptr, // dictionary
1356 true // dictionary ownership
1357 );
1358 return arrow_proxy{std::move(arr), std::move(schema)};
1359 }
1360
1361 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
1362 inline arrow_proxy fixed_sized_list_array::create_proxy(
1363 std::uint64_t list_size,
1364 array&& flat_values,
1365 bool nullable,
1366 std::optional<std::string_view> name,
1367 std::optional<METADATA_RANGE> metadata
1368 )
1369 {
1370 if (nullable)
1371 {
1372 return fixed_sized_list_array::create_proxy(
1373 list_size,
1374 std::move(flat_values),
1376 name,
1377 metadata
1378 );
1379 }
1380 else
1381 {
1382 const auto size = flat_values.size() / static_cast<std::size_t>(list_size);
1383 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1384 const repeat_view<bool> children_ownership{true, 1};
1385
1386 std::string format = "+w:" + std::to_string(list_size);
1387 ArrowSchema schema = make_arrow_schema(
1388 format,
1389 std::move(name), // name
1390 std::move(metadata), // metadata
1391 std::nullopt, // flags,
1392 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children
1393 children_ownership, // children ownership
1394 nullptr, // dictionary
1395 true // dictionary ownership
1396
1397 );
1398 std::vector<buffer<std::uint8_t>> arr_buffs = {
1399 buffer<std::uint8_t>{nullptr, 0} // no validity bitmap
1400 };
1401
1402 ArrowArray arr = make_arrow_array(
1403 static_cast<std::int64_t>(size), // length
1404 0, // null_count
1405 0, // offset
1406 std::move(arr_buffs),
1407 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children
1408 children_ownership, // children ownership
1409 nullptr, // dictionary
1410 true // dictionary ownership
1411 );
1412 return arrow_proxy{std::move(arr), std::move(schema)};
1413 }
1414 }
1415}
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
constexpr array_bitmap_base_impl & operator=(const array_bitmap_base_impl &)
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
Base class for array type erasure.
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:41
Object that owns a piece of contiguous memory.
Definition buffer.hpp:113
Smart pointer behaving like a copiable std::unique_ptr.
Definition memory.hpp:126
storage_type extract_storage() noexcept
Extracts the underlying storage (move operation).
constexpr size_type null_count() const noexcept
Returns the number of bits set to false (null/invalid).
fixed_sized_list_array & operator=(const self_type &)=default
constexpr fixed_sized_list_array(const self_type &)=default
inner_types::list_size_type list_size_type
array_inner_types< self_type > inner_types
fixed_sized_list_array(arrow_proxy proxy)
Constructs fixed size list array from Arrow proxy.
fixed_sized_list_array(ARGS &&... args)
Generic constructor for creating fixed size list array.
list_array_crtp_base< self_type > base_type
fixed_sized_list_array self_type
fixed_sized_list_array & operator=(self_type &&)=default
fixed_sized_list_array(self_type &&)=default
typename base_type::size_type size_type
CRTP base class for all list array implementations.
typename base_type::const_bitmap_range const_bitmap_range
constexpr list_array_crtp_base & operator=(const self_type &)
Copy assignment operator.
constexpr array_wrapper * raw_flat_array()
Gets mutable access to the underlying flat array.
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename inner_types::const_value_iterator const_value_iterator
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::iterator_tag iterator_tag
list_array_crtp_base(arrow_proxy proxy)
Constructs list array base from Arrow proxy.
constexpr list_array_crtp_base(const self_type &)
Copy constructor.
typename inner_types::value_iterator value_iterator
typename base_type::bitmap_type bitmap_type
list_array_crtp_base< DERIVED > self_type
typename base_type::size_type size_type
array_inner_types< DERIVED > inner_types
nullable< inner_value_type > value_type
constexpr const array_wrapper * raw_flat_array() const
Gets read-only access to the underlying flat array.
array_bitmap_base< DERIVED > base_type
constexpr list_array_crtp_base(self_type &&) noexcept=default
list_array_impl< BIG > self_type
constexpr list_array_impl(const self_type &)
Copy constructor.
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
typename base_type::size_type size_type
constexpr list_array_impl & operator=(const self_type &)
Copy assignment operator.
array_inner_types< self_type > inner_types
constexpr list_array_impl(self_type &&) noexcept=default
static constexpr auto offset_from_sizes(SIZES_RANGE &&sizes) -> offset_buffer_type
Creates offset buffer from list sizes.
inner_types::list_size_type list_size_type
list_array_crtp_base< list_array_impl< BIG > > base_type
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_array_impl(arrow_proxy proxy)
Constructs list array from Arrow proxy.
std::size_t size_type
constexpr list_view_array_impl & operator=(self_type &&)=default
typename base_type::size_type size_type
constexpr list_view_array_impl(self_type &&)=default
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_view_array_impl(arrow_proxy proxy)
Constructs list view array from Arrow proxy.
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
array_inner_types< self_type > inner_types
list_array_crtp_base< list_view_array_impl< BIG > > base_type
list_view_array_impl(ARGS &&... args)
Generic constructor for creating list view array from various inputs.
list_view_array_impl< BIG > self_type
constexpr list_view_array_impl(const self_type &)
Copy constructor.
inner_types::list_size_type list_size_type
u8_buffer< std::remove_const_t< list_size_type > > size_buffer_type
constexpr list_view_array_impl & operator=(const self_type &)
Copy assignment operator.
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
Concept for input containers that can provide metadata pairs.
Definition metadata.hpp:304
Concept defining valid input types for validity bitmap creation.
#define SPARROW_ASSERT(expr__, message__)
constexpr sparrow::u8_buffer< OFFSET_TYPE > offset_buffer_from_sizes(SIZES_RANGE &&sizes)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
Definition mp_utils.hpp:216
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_list_view_array_v
Checks whether T is a list_view_array type.
list_array_impl< false > list_array
A list array implementation.
array_bitmap_base_impl< D, false > array_bitmap_base
Convenient alias for arrays with immutable validity bitmaps.
constexpr bool is_fixed_sized_list_array_v
Checks whether T is a fixed_sized_list_array type.
list_view_array_impl< true > big_list_view_array
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:98
constexpr bool is_big_list_array_v
Checks whether T is a big_list_array type.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
list_view_array_impl< false > list_view_array
A list view array implementation.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
constexpr bool is_list_array_v
Checks whether T is a list_array type.
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
list_array_impl< true > big_list_array
A big list array implementation.
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
constexpr bool is_big_list_view_array_v
Checks whether T is a big_list_view_array type.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Metafunction for retrieving the data_type of a typed array.