sparrow 0.9.0
Loading...
Searching...
No Matches
list_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <ranges>
18#include <string> // for std::stoull
19#include <type_traits>
20#include <vector>
21
22#include "sparrow/array_api.hpp"
35
36namespace sparrow
37{
38 template <class DERIVED>
40
41 template <bool BIG>
42 class list_array_impl;
43
44 template <bool BIG>
46
63
74
76
80 template <class T>
81 constexpr bool is_list_array_v = std::same_as<T, list_array>;
82
86 template <class T>
87 constexpr bool is_big_list_array_v = std::same_as<T, big_list_array>;
88
92 template <class T>
93 constexpr bool is_list_view_array_v = std::same_as<T, list_view_array>;
94
98 template <class T>
99 constexpr bool is_big_list_view_array_v = std::same_as<T, big_list_view_array>;
100
104 template <class T>
105 constexpr bool is_fixed_sized_list_array_v = std::same_as<T, fixed_sized_list_array>;
106
107 namespace detail
108 {
109 template <class T>
110 struct get_data_type_from_array;
111
112 template <>
114 {
115 [[nodiscard]] static constexpr sparrow::data_type get()
116 {
118 }
119 };
120
121 template <>
123 {
124 [[nodiscard]] static constexpr sparrow::data_type get()
125 {
127 }
128 };
129
130 template <>
132 {
133 [[nodiscard]] static constexpr sparrow::data_type get()
134 {
136 }
137 };
138
139 template <>
141 {
142 [[nodiscard]] static constexpr sparrow::data_type get()
143 {
145 }
146 };
147
148 template <>
150 {
151 [[nodiscard]] static constexpr sparrow::data_type get()
152 {
154 }
155 };
156 }
157
158 template <bool BIG>
171
172 template <bool BIG>
185
186 template <>
199
200 // using list_array = list_array_crtp_base<false>;
201 // using big_list_array = list_array_crtp_base<true>;
202
203 // this is the base class for
204 // - list-array
205 // - big-list-array
206 // - list-view-array
207 // - big-list-view-array
208 // - fixed-size-list-array
209 template <class DERIVED>
211 {
212 public:
213
217 using value_iterator = typename inner_types::value_iterator;
218 using const_value_iterator = typename inner_types::const_value_iterator;
220
223
225
229
233
234 [[nodiscard]] constexpr const array_wrapper* raw_flat_array() const;
235 [[nodiscard]] constexpr array_wrapper* raw_flat_array();
236
237 protected:
238
240
243
244 constexpr list_array_crtp_base(self_type&&) noexcept = default;
245 constexpr list_array_crtp_base& operator=(self_type&&) noexcept = default;
246
247 private:
248
249 using list_size_type = inner_types::list_size_type;
250
251 [[nodiscard]] constexpr value_iterator value_begin();
252 [[nodiscard]] constexpr value_iterator value_end();
253 [[nodiscard]] constexpr const_value_iterator value_cbegin() const;
254 [[nodiscard]] constexpr const_value_iterator value_cend() const;
255
256 [[nodiscard]] constexpr inner_reference value(size_type i);
257 [[nodiscard]] constexpr inner_const_reference value(size_type i) const;
258
259 [[nodiscard]] cloning_ptr<array_wrapper> make_flat_array();
260
261 // data members
263
264 // friend classes
265 friend class array_crtp_base<DERIVED>;
266
267 // needs access to this->value(i)
268 friend class detail::layout_value_functor<DERIVED, inner_value_type>;
269 friend class detail::layout_value_functor<const DERIVED, inner_value_type>;
270 };
271
272 template <bool BIG>
274 {
275 public:
276
280 using list_size_type = inner_types::list_size_type;
282 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
284
286
287 constexpr list_array_impl(const self_type&);
289
290 constexpr list_array_impl(self_type&&) noexcept = default;
291 constexpr list_array_impl& operator=(self_type&&) noexcept = default;
292
293 template <class... ARGS>
294 requires(mpl::excludes_copy_and_move_ctor_v<list_array_impl<BIG>, ARGS...>)
295 explicit list_array_impl(ARGS&&... args)
296 : self_type(create_proxy(std::forward<ARGS>(args)...))
297 {
298 }
299
300 template <std::ranges::range SIZES_RANGE>
301 [[nodiscard]] static constexpr auto offset_from_sizes(SIZES_RANGE&& sizes) -> offset_buffer_type;
302
303 private:
304
305 template <
307 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
308 [[nodiscard]] static arrow_proxy create_proxy(
309 array&& flat_values,
310 offset_buffer_type&& list_offsets,
311 VB&& validity_input,
312 std::optional<std::string_view> name = std::nullopt,
313 std::optional<METADATA_RANGE> metadata = std::nullopt
314 );
315
316 template <
318 std::ranges::input_range OFFSET_BUFFER_RANGE,
319 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
320 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
321 [[nodiscard]] static arrow_proxy create_proxy(
322 array&& flat_values,
323 OFFSET_BUFFER_RANGE&& list_offsets_range,
324 VB&& validity_input,
325 std::optional<std::string_view> name = std::nullopt,
326 std::optional<METADATA_RANGE> metadata = std::nullopt
327 )
328 {
329 offset_buffer_type list_offsets{std::forward<OFFSET_BUFFER_RANGE>(list_offsets_range)};
330 return list_array_impl<BIG>::create_proxy(
331 std::move(flat_values),
332 std::move(list_offsets),
333 std::forward<VB>(validity_input),
334 std::forward<std::optional<std::string_view>>(name),
335 std::forward<std::optional<METADATA_RANGE>>(metadata)
336 );
337 }
338
339 template <
340 validity_bitmap_input VB = validity_bitmap,
341 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
342 [[nodiscard]] static arrow_proxy create_proxy(
343 array&& flat_values,
344 offset_buffer_type&& list_offsets,
345 bool nullable = true,
346 std::optional<std::string_view> name = std::nullopt,
347 std::optional<METADATA_RANGE> metadata = std::nullopt
348 );
349
350 template <
351 validity_bitmap_input VB = validity_bitmap,
352 std::ranges::input_range OFFSET_BUFFER_RANGE,
353 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
354 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
355 [[nodiscard]] static arrow_proxy create_proxy(
356 array&& flat_values,
357 OFFSET_BUFFER_RANGE&& list_offsets_range,
358 bool nullable = true,
359 std::optional<std::string_view> name = std::nullopt,
360 std::optional<METADATA_RANGE> metadata = std::nullopt
361 )
362 {
363 offset_buffer_type list_offsets{std::forward<OFFSET_BUFFER_RANGE>(list_offsets_range)};
364 return list_array_impl<BIG>::create_proxy(
365 std::move(flat_values),
366 std::move(list_offsets),
367 nullable,
368 std::forward<std::optional<std::string_view>>(name),
369 std::forward<std::optional<METADATA_RANGE>>(metadata)
370 );
371 }
372
373 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
374 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
375
376 [[nodiscard]] constexpr offset_type* make_list_offsets();
377
378 offset_type* p_list_offsets;
379
380 // friend classes
381 friend class array_crtp_base<self_type>;
382 friend class list_array_crtp_base<self_type>;
383 };
384
385 template <bool BIG>
386 class list_view_array_impl final : public list_array_crtp_base<list_view_array_impl<BIG>>
387 {
388 public:
389
393 using list_size_type = inner_types::list_size_type;
395 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
398
400
403
404 constexpr list_view_array_impl(self_type&&) = default;
405 constexpr list_view_array_impl& operator=(self_type&&) = default;
406
407 template <class... ARGS>
409 list_view_array_impl(ARGS&&... args)
410 : self_type(create_proxy(std::forward<ARGS>(args)...))
411 {
412 }
413
414 private:
415
416 template <
417 std::ranges::input_range OFFSET_BUFFER_RANGE,
418 std::ranges::input_range SIZE_RANGE,
420 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
421 requires(
422 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
423 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
424 )
425 [[nodiscard]] static arrow_proxy create_proxy(
426 array&& flat_values,
427 OFFSET_BUFFER_RANGE&& list_offsets,
428 SIZE_RANGE&& list_sizes,
429 VB&& validity_input,
430 std::optional<std::string_view> name = std::nullopt,
431 std::optional<METADATA_RANGE> metadata = std::nullopt
432 )
433 {
434 return list_view_array_impl<BIG>::create_proxy(
435 std::move(flat_values),
436 offset_buffer_type(std::forward<OFFSET_BUFFER_RANGE>(list_offsets)),
437 size_buffer_type(std::forward<SIZE_RANGE>(list_sizes)),
438 std::forward<VB>(validity_input),
439 name,
440 metadata
441 );
442 }
443
444 template <
445 validity_bitmap_input VB = validity_bitmap,
446 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
447 [[nodiscard]] static arrow_proxy create_proxy(
448 array&& flat_values,
449 offset_buffer_type&& list_offsets,
450 size_buffer_type&& list_sizes,
451 VB&& validity_input,
452 std::optional<std::string_view> name = std::nullopt,
453 std::optional<METADATA_RANGE> metadata = std::nullopt
454 );
455
456 template <
457 std::ranges::input_range OFFSET_BUFFER_RANGE,
458 std::ranges::input_range SIZE_RANGE,
459 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
460 requires(
461 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
462 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
463 )
464 [[nodiscard]] static arrow_proxy create_proxy(
465 array&& flat_values,
466 OFFSET_BUFFER_RANGE&& list_offsets,
467 SIZE_RANGE&& list_sizes,
468 bool nullable = true,
469 std::optional<std::string_view> name = std::nullopt,
470 std::optional<METADATA_RANGE> metadata = std::nullopt
471 )
472 {
473 return list_view_array_impl<BIG>::create_proxy(
474 std::move(flat_values),
475 offset_buffer_type(std::forward<OFFSET_BUFFER_RANGE>(list_offsets)),
476 size_buffer_type(std::forward<SIZE_RANGE>(list_sizes)),
477 nullable,
478 name,
479 metadata
480 );
481 }
482
483 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
484 [[nodiscard]] static arrow_proxy create_proxy(
485 array&& flat_values,
486 offset_buffer_type&& list_offsets,
487 size_buffer_type&& list_sizes,
488 bool nullable = true,
489 std::optional<std::string_view> name = std::nullopt,
490 std::optional<METADATA_RANGE> metadata = std::nullopt
491 );
492
493 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
494 static constexpr std::size_t SIZES_BUFFER_INDEX = 2;
495 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
496
497 [[nodiscard]] constexpr offset_type* make_list_offsets();
498 [[nodiscard]] constexpr offset_type* make_list_sizes();
499
500 offset_type* p_list_offsets;
501 offset_type* p_list_sizes;
502
503 // friend classes
504 friend class array_crtp_base<self_type>;
505 friend class list_array_crtp_base<self_type>;
506 };
507
508 class fixed_sized_list_array final : public list_array_crtp_base<fixed_sized_list_array>
509 {
510 public:
511
515 using list_size_type = inner_types::list_size_type;
517 using offset_type = std::uint64_t;
518
519 explicit fixed_sized_list_array(arrow_proxy proxy);
520
521 constexpr fixed_sized_list_array(const self_type&) = default;
523
526
527 template <class... ARGS>
530 : self_type(create_proxy(std::forward<ARGS>(args)...))
531 {
532 }
533
534 private:
535
536 template <
538 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
539 [[nodiscard]] static arrow_proxy create_proxy(
540 std::uint64_t list_size,
541 array&& flat_values,
542 R&& validity_input,
543 std::optional<std::string_view> name = std::nullopt,
544 std::optional<METADATA_RANGE> metadata = std::nullopt
545 );
546
547 template <
549 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
550 [[nodiscard]] static arrow_proxy create_proxy(
551 std::uint64_t list_size,
552 array&& flat_values,
553 bool nullable = true,
554 std::optional<std::string_view> name = std::nullopt,
555 std::optional<METADATA_RANGE> metadata = std::nullopt
556 );
557
558 [[nodiscard]] static uint64_t list_size_from_format(const std::string_view format);
559 [[nodiscard]] constexpr std::pair<offset_type, offset_type> offset_range(size_type i) const;
560
561 uint64_t m_list_size;
562
563 // friend classes
564 friend class array_crtp_base<self_type>;
565 friend class list_array_crtp_base<self_type>;
566 };
567
568 /***************************************
569 * list_array_crtp_base implementation *
570 ***************************************/
571
572 template <class DERIVED>
574 : base_type(std::move(proxy))
575 , p_flat_array(make_flat_array())
576 {
577 }
578
579 template <class DERIVED>
581 : base_type(rhs)
582 , p_flat_array(make_flat_array())
583 {
584 }
585
586 template <class DERIVED>
588 {
590 p_flat_array = make_flat_array();
591 return *this;
592 }
593
594 template <class DERIVED>
596 {
597 return p_flat_array.get();
598 }
599
600 template <class DERIVED>
602 {
603 return p_flat_array.get();
604 }
605
606 template <class DERIVED>
607 constexpr auto list_array_crtp_base<DERIVED>::value_begin() -> value_iterator
608 {
609 return value_iterator(detail::layout_value_functor<DERIVED, inner_value_type>(&this->derived_cast()), 0);
610 }
611
612 template <class DERIVED>
613 constexpr auto list_array_crtp_base<DERIVED>::value_end() -> value_iterator
614 {
615 return value_iterator(
616 detail::layout_value_functor<DERIVED, inner_value_type>(&this->derived_cast()),
617 this->size()
618 );
619 }
620
621 template <class DERIVED>
622 constexpr auto list_array_crtp_base<DERIVED>::value_cbegin() const -> const_value_iterator
623 {
624 return const_value_iterator(
626 0
627 );
628 }
629
630 template <class DERIVED>
631 constexpr auto list_array_crtp_base<DERIVED>::value_cend() const -> const_value_iterator
632 {
633 return const_value_iterator(
635 this->size()
636 );
637 }
638
639 template <class DERIVED>
640 constexpr auto list_array_crtp_base<DERIVED>::value(size_type i) -> inner_reference
641 {
642 const auto r = this->derived_cast().offset_range(i);
643 using st = typename list_value::size_type;
644 return list_value{p_flat_array.get(), static_cast<st>(r.first), static_cast<st>(r.second)};
645 }
646
647 template <class DERIVED>
648 constexpr auto list_array_crtp_base<DERIVED>::value(size_type i) const -> inner_const_reference
649 {
650 const auto r = this->derived_cast().offset_range(i);
651 using st = typename list_value::size_type;
652 return list_value{p_flat_array.get(), static_cast<st>(r.first), static_cast<st>(r.second)};
653 }
654
655 template <class DERIVED>
656 cloning_ptr<array_wrapper> list_array_crtp_base<DERIVED>::make_flat_array()
657 {
658 return array_factory(this->get_arrow_proxy().children()[0].view());
659 }
660
661 /**********************************
662 * list_array_impl implementation *
663 **********************************/
664
665#ifdef __GNUC__
666# pragma GCC diagnostic push
667# pragma GCC diagnostic ignored "-Wcast-align"
668#endif
669
670 template <bool BIG>
672 : base_type(std::move(proxy))
673 , p_list_offsets(make_list_offsets())
674 {
675 }
676
677 template <bool BIG>
678 template <std::ranges::range SIZES_RANGE>
679 constexpr auto list_array_impl<BIG>::offset_from_sizes(SIZES_RANGE&& sizes) -> offset_buffer_type
680 {
682 std::forward<SIZES_RANGE>(sizes)
683 );
684 }
685
686 template <bool BIG>
687 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
688 arrow_proxy list_array_impl<BIG>::create_proxy(
689 array&& flat_values,
690 offset_buffer_type&& list_offsets,
691 VB&& validity_input,
692 std::optional<std::string_view> name,
693 std::optional<METADATA_RANGE> metadata
694 )
695 {
696 const auto size = list_offsets.size() - 1;
697 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
698
699 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
700
701 const auto null_count = vbitmap.null_count();
703
704 static const std::optional<std::unordered_set<ArrowFlag>> flags{{ArrowFlag::NULLABLE}};
705
706 ArrowSchema schema = make_arrow_schema(
707 BIG ? std::string("+L") : std::string("+l"), // format
708 name, // name
709 metadata, // metadata
710 flags, // flags,
711 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children
712 children_ownership, // children ownership
713 nullptr, // dictionary
714 true // dictionary ownership
715
716 );
717 std::vector<buffer<std::uint8_t>> arr_buffs = {
718 std::move(vbitmap).extract_storage(),
719 std::move(list_offsets).extract_storage()
720 };
721
722 ArrowArray arr = make_arrow_array(
723 static_cast<std::int64_t>(size), // length
724 static_cast<int64_t>(null_count),
725 0, // offset
726 std::move(arr_buffs),
727 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children
728 children_ownership, // children ownership
729 nullptr, // dictionary
730 true // dictionary ownership
731 );
732 return arrow_proxy{std::move(arr), std::move(schema)};
733 }
734
735 template <bool BIG>
736 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
737 arrow_proxy list_array_impl<BIG>::create_proxy(
738 array&& flat_values,
739 offset_buffer_type&& list_offsets,
740 bool nullable,
741 std::optional<std::string_view> name,
742 std::optional<METADATA_RANGE> metadata
743 )
744 {
745 if (nullable)
746 {
747 return list_array_impl<BIG>::create_proxy(
748 std::move(flat_values),
749 std::move(list_offsets),
751 name,
752 metadata
753 );
754 }
755 else
756 {
757 const auto size = list_offsets.size() - 1;
758 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
760
761 ArrowSchema schema = make_arrow_schema(
762 BIG ? std::string("+L") : std::string("+l"), // format
763 name, // name
764 metadata, // metadata
765 std::nullopt, // flags,
766 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children
767 children_ownership, // children ownership
768 nullptr, // dictionary
769 true // dictionary ownership
770
771 );
772 std::vector<buffer<std::uint8_t>> arr_buffs = {
773 buffer<std::uint8_t>{nullptr, 0}, // no validity bitmap
774 std::move(list_offsets).extract_storage()
775 };
776
777 ArrowArray arr = make_arrow_array(
778 static_cast<std::int64_t>(size), // length
779 0,
780 0, // offset
781 std::move(arr_buffs),
782 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children
783 children_ownership, // children ownership
784 nullptr, // dictionary
785 true // dictionary ownership
786 );
787 return arrow_proxy{std::move(arr), std::move(schema)};
788 }
789 }
790
791 template <bool BIG>
793 : base_type(rhs)
794 , p_list_offsets(make_list_offsets())
795 {
796 }
797
798 template <bool BIG>
800 {
801 if (this != &rhs)
802 {
804 p_list_offsets = make_list_offsets();
805 }
806 return *this;
807 }
808
809 template <bool BIG>
810 constexpr auto list_array_impl<BIG>::offset_range(size_type i) const -> std::pair<offset_type, offset_type>
811 {
812 return std::make_pair(p_list_offsets[i], p_list_offsets[i + 1]);
813 }
814
815 template <bool BIG>
816 constexpr auto list_array_impl<BIG>::make_list_offsets() -> offset_type*
817 {
818 return reinterpret_cast<offset_type*>(
819 this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
820 );
821 }
822
823 /***************************************
824 * list_view_array_impl implementation *
825 ***************************************/
826
827 template <bool BIG>
829 : base_type(std::move(proxy))
830 , p_list_offsets(make_list_offsets())
831 , p_list_sizes(make_list_sizes())
832 {
833 }
834
835 template <bool BIG>
836 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
837 arrow_proxy list_view_array_impl<BIG>::create_proxy(
838 array&& flat_values,
839 offset_buffer_type&& list_offsets,
840 size_buffer_type&& list_sizes,
841 VB&& validity_input,
842 std::optional<std::string_view> name,
843 std::optional<METADATA_RANGE> metadata
844 )
845 {
846 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(), "sizes and offset must have the same size");
847 const auto size = list_sizes.size();
848 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
849 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
850 const auto null_count = vbitmap.null_count();
852 static const std::optional<std::unordered_set<ArrowFlag>> flags{{ArrowFlag::NULLABLE}};
853
854 ArrowSchema schema = make_arrow_schema(
855 BIG ? std::string("+vL") : std::string("+vl"), // format
856 name, // name
857 metadata, // metadata
858 flags, // flags,
859 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children
861 nullptr, // dictionary
862 true
863 );
864 std::vector<buffer<std::uint8_t>> arr_buffs = {
865 std::move(vbitmap).extract_storage(),
866 std::move(list_offsets).extract_storage(),
867 std::move(list_sizes).extract_storage()
868 };
869
870 ArrowArray arr = make_arrow_array(
871 static_cast<std::int64_t>(size), // length
872 static_cast<int64_t>(null_count),
873 0, // offset
874 std::move(arr_buffs),
875 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children
877 nullptr, // dictionary
878 true
879 );
880 return arrow_proxy{std::move(arr), std::move(schema)};
881 }
882
883 template <bool BIG>
884 template <input_metadata_container METADATA_RANGE>
885 arrow_proxy list_view_array_impl<BIG>::create_proxy(
886 array&& flat_values,
887 offset_buffer_type&& list_offsets,
888 size_buffer_type&& list_sizes,
889 bool nullable,
890 std::optional<std::string_view> name,
891 std::optional<METADATA_RANGE> metadata
892 )
893 {
894 if (nullable)
895 {
896 return list_view_array_impl<BIG>::create_proxy(
897 std::move(flat_values),
898 std::move(list_offsets),
899 std::move(list_sizes),
901 name,
902 metadata
903 );
904 }
905 else
906 {
907 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(), "sizes and offset must have the same size");
908 const auto size = list_sizes.size();
909 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
911
912 ArrowSchema schema = make_arrow_schema(
913 BIG ? std::string("+vL") : std::string("+vl"), // format
914 name, // name
915 metadata, // metadata
916 std::nullopt, // flags,
917 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children
919 nullptr, // dictionary
920 true
921 );
922 std::vector<buffer<std::uint8_t>> arr_buffs = {
923 buffer<std::uint8_t>{nullptr, 0}, // no validity bitmap
924 std::move(list_offsets).extract_storage(),
925 std::move(list_sizes).extract_storage()
926 };
927
928 ArrowArray arr = make_arrow_array(
929 static_cast<std::int64_t>(size), // length
930 0, // null_count
931 0, // offset
932 std::move(arr_buffs),
933 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children
935 nullptr, // dictionary
936 true
937 );
938 return arrow_proxy{std::move(arr), std::move(schema)};
939 }
940 }
941
942 template <bool BIG>
944 : base_type(rhs)
945 , p_list_offsets(make_list_offsets())
946 , p_list_sizes(make_list_sizes())
947 {
948 }
949
950 template <bool BIG>
952 {
953 if (this != &rhs)
954 {
956 p_list_offsets = make_list_offsets();
957 p_list_sizes = make_list_sizes();
958 }
959 return *this;
960 }
961
962 template <bool BIG>
963 inline constexpr auto list_view_array_impl<BIG>::offset_range(size_type i) const
964 -> std::pair<offset_type, offset_type>
965 {
966 const auto offset = p_list_offsets[i];
967 return std::make_pair(offset, offset + p_list_sizes[i]);
968 }
969
970 template <bool BIG>
971 constexpr auto list_view_array_impl<BIG>::make_list_offsets() -> offset_type*
972 {
973 return reinterpret_cast<offset_type*>(
974 this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
975 );
976 }
977
978 template <bool BIG>
979 constexpr auto list_view_array_impl<BIG>::make_list_sizes() -> offset_type*
980 {
981 return reinterpret_cast<offset_type*>(
982 this->get_arrow_proxy().buffers()[SIZES_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
983 );
984 }
985
986#ifdef __GNUC__
987# pragma GCC diagnostic pop
988#endif
989
990 /*****************************************
991 * fixed_sized_list_array implementation *
992 *****************************************/
993
994 inline auto fixed_sized_list_array::list_size_from_format(const std::string_view format) -> uint64_t
995 {
996 SPARROW_ASSERT(format.size() >= 3, "Invalid format string");
997 const auto n_digits = format.size() - 3;
998 const auto list_size_str = format.substr(3, n_digits);
999 return std::stoull(std::string(list_size_str));
1000 }
1001
1003 : base_type(std::move(proxy))
1004 , m_list_size(fixed_sized_list_array::list_size_from_format(this->get_arrow_proxy().format()))
1005 {
1006 }
1007
1008 constexpr auto fixed_sized_list_array::offset_range(size_type i) const
1009 -> std::pair<offset_type, offset_type>
1010 {
1011 const auto offset = i * m_list_size;
1012 return std::make_pair(offset, offset + m_list_size);
1013 }
1014
1015 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
1016 inline arrow_proxy fixed_sized_list_array::create_proxy(
1017 std::uint64_t list_size,
1018 array&& flat_values,
1019 R&& validity_input,
1020 std::optional<std::string_view> name,
1021 std::optional<METADATA_RANGE> metadata
1022 )
1023 {
1024 const auto size = flat_values.size() / static_cast<std::size_t>(list_size);
1025 auto vbitmap = ensure_validity_bitmap(size, std::forward<R>(validity_input));
1026
1027 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1028
1029 const auto null_count = vbitmap.null_count();
1030
1031 const repeat_view<bool> children_ownership{true, 1};
1032
1033 std::string format = "+w:" + std::to_string(list_size);
1034 ArrowSchema schema = make_arrow_schema(
1035 format,
1036 std::move(name), // name
1037 std::move(metadata), // metadata
1038 std::nullopt, // flags,
1039 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children
1040 children_ownership, // children ownership
1041 nullptr, // dictionary
1042 true // dictionary ownership
1043
1044 );
1045 std::vector<buffer<std::uint8_t>> arr_buffs = {vbitmap.extract_storage()};
1046
1047 ArrowArray arr = make_arrow_array(
1048 static_cast<std::int64_t>(size), // length
1049 static_cast<int64_t>(null_count),
1050 0, // offset
1051 std::move(arr_buffs),
1052 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children
1053 children_ownership, // children ownership
1054 nullptr, // dictionary
1055 true // dictionary ownership
1056 );
1057 return arrow_proxy{std::move(arr), std::move(schema)};
1058 }
1059
1060 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
1061 inline arrow_proxy fixed_sized_list_array::create_proxy(
1062 std::uint64_t list_size,
1063 array&& flat_values,
1064 bool nullable,
1065 std::optional<std::string_view> name,
1066 std::optional<METADATA_RANGE> metadata
1067 )
1068 {
1069 if (nullable)
1070 {
1071 return fixed_sized_list_array::create_proxy(
1072 list_size,
1073 std::move(flat_values),
1075 name,
1076 metadata
1077 );
1078 }
1079 else
1080 {
1081 const auto size = flat_values.size() / static_cast<std::size_t>(list_size);
1082 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1083 const repeat_view<bool> children_ownership{true, 1};
1084
1085 std::string format = "+w:" + std::to_string(list_size);
1086 ArrowSchema schema = make_arrow_schema(
1087 format,
1088 std::move(name), // name
1089 std::move(metadata), // metadata
1090 std::nullopt, // flags,
1091 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children
1092 children_ownership, // children ownership
1093 nullptr, // dictionary
1094 true // dictionary ownership
1095
1096 );
1097 std::vector<buffer<std::uint8_t>> arr_buffs = {
1098 buffer<std::uint8_t>{nullptr, 0} // no validity bitmap
1099 };
1100
1101 ArrowArray arr = make_arrow_array(
1102 static_cast<std::int64_t>(size), // length
1103 0, // null_count
1104 0, // offset
1105 std::move(arr_buffs),
1106 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children
1107 children_ownership, // children ownership
1108 nullptr, // dictionary
1109 true // dictionary ownership
1110 );
1111 return arrow_proxy{std::move(arr), std::move(schema)};
1112 }
1113 }
1114}
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
constexpr array_bitmap_base_impl & operator=(const array_bitmap_base_impl &)
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
Base class defining common immutable interface for arrays with a bitmap.
Base class for array type erasure.
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:39
Proxy class over ArrowArray and ArrowSchema.
Object that owns a piece of contiguous memory.
Definition buffer.hpp:112
Smart pointer behaving like a copiable std::unique_ptr.
Definition memory.hpp:126
storage_type extract_storage() noexcept
constexpr size_type null_count() const noexcept
fixed_sized_list_array & operator=(const self_type &)=default
constexpr fixed_sized_list_array(const self_type &)=default
inner_types::list_size_type list_size_type
array_inner_types< self_type > inner_types
fixed_sized_list_array(arrow_proxy proxy)
fixed_sized_list_array(ARGS &&... args)
list_array_crtp_base< self_type > base_type
fixed_sized_list_array self_type
fixed_sized_list_array & operator=(self_type &&)=default
fixed_sized_list_array(self_type &&)=default
typename base_type::size_type size_type
typename base_type::const_bitmap_range const_bitmap_range
constexpr list_array_crtp_base & operator=(const self_type &)
constexpr array_wrapper * raw_flat_array()
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename inner_types::const_value_iterator const_value_iterator
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::iterator_tag iterator_tag
list_array_crtp_base(arrow_proxy proxy)
constexpr list_array_crtp_base(const self_type &)
typename inner_types::value_iterator value_iterator
typename base_type::bitmap_type bitmap_type
list_array_crtp_base< DERIVED > self_type
typename base_type::size_type size_type
array_inner_types< DERIVED > inner_types
nullable< inner_value_type > value_type
constexpr const array_wrapper * raw_flat_array() const
array_bitmap_base< DERIVED > base_type
constexpr list_array_crtp_base(self_type &&) noexcept=default
list_array_impl< BIG > self_type
constexpr list_array_impl(const self_type &)
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
typename base_type::size_type size_type
constexpr list_array_impl & operator=(const self_type &)
array_inner_types< self_type > inner_types
constexpr list_array_impl(self_type &&) noexcept=default
static constexpr auto offset_from_sizes(SIZES_RANGE &&sizes) -> offset_buffer_type
inner_types::list_size_type list_size_type
list_array_crtp_base< list_array_impl< BIG > > base_type
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_array_impl(arrow_proxy proxy)
std::size_t size_type
constexpr list_view_array_impl & operator=(self_type &&)=default
typename base_type::size_type size_type
constexpr list_view_array_impl(self_type &&)=default
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_view_array_impl(arrow_proxy proxy)
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
array_inner_types< self_type > inner_types
list_array_crtp_base< list_view_array_impl< BIG > > base_type
list_view_array_impl(ARGS &&... args)
list_view_array_impl< BIG > self_type
constexpr list_view_array_impl(const self_type &)
inner_types::list_size_type list_size_type
u8_buffer< std::remove_const_t< list_size_type > > size_buffer_type
constexpr list_view_array_impl & operator=(const self_type &)
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
Definition nullable.hpp:278
A view that repeats a value a given number of times.
This buffer class is use as storage buffer for all sparrow arrays.
Definition u8_buffer.hpp:75
#define SPARROW_ASSERT(expr__, message__)
constexpr sparrow::u8_buffer< OFFSET_TYPE > offset_buffer_from_sizes(SIZES_RANGE &&sizes)
constexpr std::size_t size(typelist< T... >={})
Definition mp_utils.hpp:106
constexpr bool excludes_copy_and_move_ctor_v
Definition mp_utils.hpp:574
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_list_view_array_v
Checks whether T is a list_view_array type.
list_array_impl< false > list_array
A list array implementation.
array_bitmap_base_impl< D, false > array_bitmap_base
Convenient typedef to be used as a crtp base class for arrays using an immutable validity buffer.
constexpr bool is_fixed_sized_list_array_v
Checks whether T is a fixed_sized_list_array type.
list_view_array_impl< true > big_list_view_array
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArrays and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:91
constexpr bool is_big_list_array_v
Checks whether T is a big_list_array type.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
list_view_array_impl< false > list_view_array
A list view array implementation.
dynamic_bitset< std::uint8_t > validity_bitmap
constexpr bool is_list_array_v
Checks whether T is a list_array type.
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
list_array_impl< true > big_list_array
A big list array implementation.
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
constexpr bool is_big_list_view_array_v
Checks whether T is a big_list_view_array type.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
Base class for array_inner_types specialization.
Traits class that must be specialized by array classes inheriting from array_crtp_base.