sparrow 0.9.0
Loading...
Searching...
No Matches
list_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <ranges>
18#include <string> // for std::stoull
19#include <type_traits>
20#include <vector>
21
22#include "sparrow/array_api.hpp"
35
36namespace sparrow
37{
38 template <class DERIVED>
40
41 template <bool BIG>
42 class list_array_impl;
43
44 template <bool BIG>
46
49
52
54
58 template <class T>
59 constexpr bool is_list_array_v = std::same_as<T, list_array>;
60
64 template <class T>
65 constexpr bool is_big_list_array_v = std::same_as<T, big_list_array>;
66
70 template <class T>
71 constexpr bool is_list_view_array_v = std::same_as<T, list_view_array>;
72
76 template <class T>
77 constexpr bool is_big_list_view_array_v = std::same_as<T, big_list_view_array>;
78
82 template <class T>
83 constexpr bool is_fixed_sized_list_array_v = std::same_as<T, fixed_sized_list_array>;
84
85 namespace detail
86 {
87 template <class T>
88 struct get_data_type_from_array;
89
90 template <>
92 {
93 [[nodiscard]] static constexpr sparrow::data_type get()
94 {
96 }
97 };
98
99 template <>
101 {
102 [[nodiscard]] static constexpr sparrow::data_type get()
103 {
105 }
106 };
107
108 template <>
110 {
111 [[nodiscard]] static constexpr sparrow::data_type get()
112 {
114 }
115 };
116
117 template <>
119 {
120 [[nodiscard]] static constexpr sparrow::data_type get()
121 {
123 }
124 };
125
126 template <>
128 {
129 [[nodiscard]] static constexpr sparrow::data_type get()
130 {
132 }
133 };
134 }
135
136 template <bool BIG>
149
150 template <bool BIG>
163
164 template <>
177
178 // using list_array = list_array_crtp_base<false>;
179 // using big_list_array = list_array_crtp_base<true>;
180
181 // this is the base class for
182 // - list-array
183 // - big-list-array
184 // - list-view-array
185 // - big-list-view-array
186 // - fixed-size-list-array
187 template <class DERIVED>
189 {
190 public:
191
195 using value_iterator = typename inner_types::value_iterator;
196 using const_value_iterator = typename inner_types::const_value_iterator;
198
201
203
207
211
212 [[nodiscard]] const array_wrapper* raw_flat_array() const;
214
215 protected:
216
218
221
222 list_array_crtp_base(self_type&&) noexcept = default;
223 list_array_crtp_base& operator=(self_type&&) noexcept = default;
224
225 private:
226
227 using list_size_type = inner_types::list_size_type;
228
229 [[nodiscard]] value_iterator value_begin();
230 [[nodiscard]] value_iterator value_end();
231 [[nodiscard]] const_value_iterator value_cbegin() const;
232 [[nodiscard]] const_value_iterator value_cend() const;
233
234 [[nodiscard]] inner_reference value(size_type i);
235 [[nodiscard]] inner_const_reference value(size_type i) const;
236
237 [[nodiscard]] cloning_ptr<array_wrapper> make_flat_array();
238
239 // data members
241
242 // friend classes
243 friend class array_crtp_base<DERIVED>;
244
245 // needs access to this->value(i)
246 friend class detail::layout_value_functor<DERIVED, inner_value_type>;
247 friend class detail::layout_value_functor<const DERIVED, inner_value_type>;
248 };
249
250 template <bool BIG>
252 {
253 public:
254
258 using list_size_type = inner_types::list_size_type;
260 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
262
264
267
268 list_array_impl(self_type&&) noexcept = default;
269 list_array_impl& operator=(self_type&&) noexcept = default;
270
271 template <class... ARGS>
272 requires(mpl::excludes_copy_and_move_ctor_v<list_array_impl<BIG>, ARGS...>)
273 explicit list_array_impl(ARGS&&... args)
274 : self_type(create_proxy(std::forward<ARGS>(args)...))
275 {
276 }
277
278 template <std::ranges::range SIZES_RANGE>
279 [[nodiscard]] static auto offset_from_sizes(SIZES_RANGE&& sizes) -> offset_buffer_type;
280
281 private:
282
283 template <
285 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
286 [[nodiscard]] static arrow_proxy create_proxy(
287 array&& flat_values,
288 offset_buffer_type&& list_offsets,
289 VB&& validity_input,
290 std::optional<std::string_view> name = std::nullopt,
291 std::optional<METADATA_RANGE> metadata = std::nullopt
292 );
293
294 template <
296 std::ranges::input_range OFFSET_BUFFER_RANGE,
297 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
298 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
299 [[nodiscard]] static arrow_proxy create_proxy(
300 array&& flat_values,
301 OFFSET_BUFFER_RANGE&& list_offsets_range,
302 VB&& validity_input,
303 std::optional<std::string_view> name = std::nullopt,
304 std::optional<METADATA_RANGE> metadata = std::nullopt
305 )
306 {
307 offset_buffer_type list_offsets{std::move(list_offsets_range)};
308 return list_array_impl<BIG>::create_proxy(
309 std::forward<array>(flat_values),
310 std::move(list_offsets),
311 std::forward<VB>(validity_input),
312 std::forward<std::optional<std::string_view>>(name),
313 std::forward<std::optional<METADATA_RANGE>>(metadata)
314 );
315 }
316
317 template <
318 validity_bitmap_input VB = validity_bitmap,
319 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
320 [[nodiscard]] static arrow_proxy create_proxy(
321 array&& flat_values,
322 offset_buffer_type&& list_offsets,
323 bool nullable = true,
324 std::optional<std::string_view> name = std::nullopt,
325 std::optional<METADATA_RANGE> metadata = std::nullopt
326 );
327
328 template <
329 validity_bitmap_input VB = validity_bitmap,
330 std::ranges::input_range OFFSET_BUFFER_RANGE,
331 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
332 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
333 [[nodiscard]] static arrow_proxy create_proxy(
334 array&& flat_values,
335 OFFSET_BUFFER_RANGE&& list_offsets_range,
336 bool nullable = true,
337 std::optional<std::string_view> name = std::nullopt,
338 std::optional<METADATA_RANGE> metadata = std::nullopt
339 )
340 {
341 offset_buffer_type list_offsets{std::move(list_offsets_range)};
342 return list_array_impl<BIG>::create_proxy(
343 std::forward<array>(flat_values),
344 std::move(list_offsets),
345 nullable,
346 std::forward<std::optional<std::string_view>>(name),
347 std::forward<std::optional<METADATA_RANGE>>(metadata)
348 );
349 }
350
351 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
352 [[nodiscard]] std::pair<offset_type, offset_type> offset_range(size_type i) const;
353
354 [[nodiscard]] offset_type* make_list_offsets();
355
356 offset_type* p_list_offsets;
357
358 // friend classes
359 friend class array_crtp_base<self_type>;
360 friend class list_array_crtp_base<self_type>;
361 };
362
363 template <bool BIG>
364 class list_view_array_impl final : public list_array_crtp_base<list_view_array_impl<BIG>>
365 {
366 public:
367
371 using list_size_type = inner_types::list_size_type;
373 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
376
378
381
384
385 template <class... ARGS>
387 list_view_array_impl(ARGS&&... args)
388 : self_type(create_proxy(std::forward<ARGS>(args)...))
389 {
390 }
391
392 private:
393
394 template <
395 std::ranges::input_range OFFSET_BUFFER_RANGE,
396 std::ranges::input_range SIZE_RANGE,
398 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
399 requires(
400 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
401 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
402 )
403 [[nodiscard]] static arrow_proxy create_proxy(
404 array&& flat_values,
405 OFFSET_BUFFER_RANGE&& list_offsets,
406 SIZE_RANGE&& list_sizes,
407 VB&& validity_input,
408 std::optional<std::string_view> name = std::nullopt,
409 std::optional<METADATA_RANGE> metadata = std::nullopt
410 )
411 {
412 return list_view_array_impl<BIG>::create_proxy(
413 std::move(flat_values),
414 offset_buffer_type(std::move(list_offsets)),
415 size_buffer_type(std::move(list_sizes)),
416 std::forward<VB>(validity_input),
417 name,
418 metadata
419 );
420 }
421
422 template <
423 validity_bitmap_input VB = validity_bitmap,
424 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
425 [[nodiscard]] static arrow_proxy create_proxy(
426 array&& flat_values,
427 offset_buffer_type&& list_offsets,
428 size_buffer_type&& list_sizes,
429 VB&& validity_input,
430 std::optional<std::string_view> name = std::nullopt,
431 std::optional<METADATA_RANGE> metadata = std::nullopt
432 );
433
434 template <
435 std::ranges::input_range OFFSET_BUFFER_RANGE,
436 std::ranges::input_range SIZE_RANGE,
437 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
438 requires(
439 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
440 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
441 )
442 [[nodiscard]] static arrow_proxy create_proxy(
443 array&& flat_values,
444 OFFSET_BUFFER_RANGE&& list_offsets,
445 SIZE_RANGE&& list_sizes,
446 bool nullable = true,
447 std::optional<std::string_view> name = std::nullopt,
448 std::optional<METADATA_RANGE> metadata = std::nullopt
449 )
450 {
451 return list_view_array_impl<BIG>::create_proxy(
452 std::move(flat_values),
453 offset_buffer_type(std::move(list_offsets)),
454 size_buffer_type(std::move(list_sizes)),
455 nullable,
456 name,
457 metadata
458 );
459 }
460
461 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
462 [[nodiscard]] static arrow_proxy create_proxy(
463 array&& flat_values,
464 offset_buffer_type&& list_offsets,
465 size_buffer_type&& list_sizes,
466 bool nullable = true,
467 std::optional<std::string_view> name = std::nullopt,
468 std::optional<METADATA_RANGE> metadata = std::nullopt
469 );
470
471 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
472 static constexpr std::size_t SIZES_BUFFER_INDEX = 2;
473 [[nodiscard]] std::pair<offset_type, offset_type> offset_range(size_type i) const;
474
475 [[nodiscard]] offset_type* make_list_offsets();
476 [[nodiscard]] offset_type* make_list_sizes();
477
478 offset_type* p_list_offsets;
479 offset_type* p_list_sizes;
480
481 // friend classes
482 friend class array_crtp_base<self_type>;
483 friend class list_array_crtp_base<self_type>;
484 };
485
486 class fixed_sized_list_array final : public list_array_crtp_base<fixed_sized_list_array>
487 {
488 public:
489
493 using list_size_type = inner_types::list_size_type;
495 using offset_type = std::uint64_t;
496
497 explicit fixed_sized_list_array(arrow_proxy proxy);
498
501
504
505 template <class... ARGS>
508 : self_type(create_proxy(std::forward<ARGS>(args)...))
509 {
510 }
511
512 private:
513
514 template <
516 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
517 [[nodiscard]] static arrow_proxy create_proxy(
518 std::uint64_t list_size,
519 array&& flat_values,
520 R&& validity_input,
521 std::optional<std::string_view> name = std::nullopt,
522 std::optional<METADATA_RANGE> metadata = std::nullopt
523 );
524
525 template <
527 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
528 [[nodiscard]] static arrow_proxy create_proxy(
529 std::uint64_t list_size,
530 array&& flat_values,
531 bool nullable = true,
532 std::optional<std::string_view> name = std::nullopt,
533 std::optional<METADATA_RANGE> metadata = std::nullopt
534 );
535
536 [[nodiscard]] static uint64_t list_size_from_format(const std::string_view format);
537 [[nodiscard]] std::pair<offset_type, offset_type> offset_range(size_type i) const;
538
539 uint64_t m_list_size;
540
541 // friend classes
542 friend class array_crtp_base<self_type>;
543 friend class list_array_crtp_base<self_type>;
544 };
545
546 /***************************************
547 * list_array_crtp_base implementation *
548 ***************************************/
549
550 template <class DERIVED>
552 : base_type(std::move(proxy))
553 , p_flat_array(make_flat_array())
554 {
555 }
556
557 template <class DERIVED>
559 : base_type(rhs)
560 , p_flat_array(make_flat_array())
561 {
562 }
563
564 template <class DERIVED>
566 {
568 p_flat_array = make_flat_array();
569 return *this;
570 }
571
572 template <class DERIVED>
574 {
575 return p_flat_array.get();
576 }
577
578 template <class DERIVED>
580 {
581 return p_flat_array.get();
582 }
583
584 template <class DERIVED>
585 auto list_array_crtp_base<DERIVED>::value_begin() -> value_iterator
586 {
587 return value_iterator(detail::layout_value_functor<DERIVED, inner_value_type>(&this->derived_cast()), 0);
588 }
589
590 template <class DERIVED>
591 auto list_array_crtp_base<DERIVED>::value_end() -> value_iterator
592 {
593 return value_iterator(
594 detail::layout_value_functor<DERIVED, inner_value_type>(&this->derived_cast()),
595 this->size()
596 );
597 }
598
599 template <class DERIVED>
600 auto list_array_crtp_base<DERIVED>::value_cbegin() const -> const_value_iterator
601 {
602 return const_value_iterator(
604 0
605 );
606 }
607
608 template <class DERIVED>
609 auto list_array_crtp_base<DERIVED>::value_cend() const -> const_value_iterator
610 {
611 return const_value_iterator(
613 this->size()
614 );
615 }
616
617 template <class DERIVED>
618 auto list_array_crtp_base<DERIVED>::value(size_type i) -> inner_reference
619 {
620 const auto r = this->derived_cast().offset_range(i);
621 using st = typename list_value::size_type;
622 return list_value{p_flat_array.get(), static_cast<st>(r.first), static_cast<st>(r.second)};
623 }
624
625 template <class DERIVED>
626 auto list_array_crtp_base<DERIVED>::value(size_type i) const -> inner_const_reference
627 {
628 const auto r = this->derived_cast().offset_range(i);
629 using st = typename list_value::size_type;
630 return list_value{p_flat_array.get(), static_cast<st>(r.first), static_cast<st>(r.second)};
631 }
632
633 template <class DERIVED>
634 cloning_ptr<array_wrapper> list_array_crtp_base<DERIVED>::make_flat_array()
635 {
636 return array_factory(this->get_arrow_proxy().children()[0].view());
637 }
638
639 /**********************************
640 * list_array_impl implementation *
641 **********************************/
642
643#ifdef __GNUC__
644# pragma GCC diagnostic push
645# pragma GCC diagnostic ignored "-Wcast-align"
646#endif
647
648 template <bool BIG>
650 : base_type(std::move(proxy))
651 , p_list_offsets(make_list_offsets())
652 {
653 }
654
655 template <bool BIG>
656 template <std::ranges::range SIZES_RANGE>
658 {
660 std::forward<SIZES_RANGE>(sizes)
661 );
662 }
663
664 template <bool BIG>
665 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
666 arrow_proxy list_array_impl<BIG>::create_proxy(
667 array&& flat_values,
668 offset_buffer_type&& list_offsets,
669 VB&& validity_input,
670 std::optional<std::string_view> name,
671 std::optional<METADATA_RANGE> metadata
672 )
673 {
674 const auto size = list_offsets.size() - 1;
675 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
676
677 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
678
679 const auto null_count = vbitmap.null_count();
681
682 static const std::optional<std::unordered_set<ArrowFlag>> flags{{ArrowFlag::NULLABLE}};
683
684 ArrowSchema schema = make_arrow_schema(
685 BIG ? std::string("+L") : std::string("+l"), // format
686 name, // name
687 metadata, // metadata
688 flags, // flags,
689 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children
690 children_ownership, // children ownership
691 nullptr, // dictionary
692 true // dictionary ownership
693
694 );
695 std::vector<buffer<std::uint8_t>> arr_buffs = {
696 std::move(vbitmap).extract_storage(),
697 std::move(list_offsets).extract_storage()
698 };
699
700 ArrowArray arr = make_arrow_array(
701 static_cast<std::int64_t>(size), // length
702 static_cast<int64_t>(null_count),
703 0, // offset
704 std::move(arr_buffs),
705 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children
706 children_ownership, // children ownership
707 nullptr, // dictionary
708 true // dictionary ownership
709 );
710 return arrow_proxy{std::move(arr), std::move(schema)};
711 }
712
713 template <bool BIG>
714 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
715 arrow_proxy list_array_impl<BIG>::create_proxy(
716 array&& flat_values,
717 offset_buffer_type&& list_offsets,
718 bool nullable,
719 std::optional<std::string_view> name,
720 std::optional<METADATA_RANGE> metadata
721 )
722 {
723 if (nullable)
724 {
725 return list_array_impl<BIG>::create_proxy(
726 std::move(flat_values),
727 std::move(list_offsets),
729 name,
730 metadata
731 );
732 }
733 else
734 {
735 const auto size = list_offsets.size() - 1;
736 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
738
739 ArrowSchema schema = make_arrow_schema(
740 BIG ? std::string("+L") : std::string("+l"), // format
741 name, // name
742 metadata, // metadata
743 std::nullopt, // flags,
744 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children
745 children_ownership, // children ownership
746 nullptr, // dictionary
747 true // dictionary ownership
748
749 );
750 std::vector<buffer<std::uint8_t>> arr_buffs = {
751 buffer<std::uint8_t>{nullptr, 0}, // no validity bitmap
752 std::move(list_offsets).extract_storage()
753 };
754
755 ArrowArray arr = make_arrow_array(
756 static_cast<std::int64_t>(size), // length
757 0,
758 0, // offset
759 std::move(arr_buffs),
760 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children
761 children_ownership, // children ownership
762 nullptr, // dictionary
763 true // dictionary ownership
764 );
765 return arrow_proxy{std::move(arr), std::move(schema)};
766 }
767 }
768
769 template <bool BIG>
771 : base_type(rhs)
772 , p_list_offsets(make_list_offsets())
773 {
774 }
775
776 template <bool BIG>
778 {
779 if (this != &rhs)
780 {
782 p_list_offsets = make_list_offsets();
783 }
784 return *this;
785 }
786
787 template <bool BIG>
788 auto list_array_impl<BIG>::offset_range(size_type i) const -> std::pair<offset_type, offset_type>
789 {
790 return std::make_pair(p_list_offsets[i], p_list_offsets[i + 1]);
791 }
792
793 template <bool BIG>
794 auto list_array_impl<BIG>::make_list_offsets() -> offset_type*
795 {
796 return reinterpret_cast<offset_type*>(
797 this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
798 );
799 }
800
801 /***************************************
802 * list_view_array_impl implementation *
803 ***************************************/
804
805 template <bool BIG>
807 : base_type(std::move(proxy))
808 , p_list_offsets(make_list_offsets())
809 , p_list_sizes(make_list_sizes())
810 {
811 }
812
813 template <bool BIG>
814 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
815 arrow_proxy list_view_array_impl<BIG>::create_proxy(
816 array&& flat_values,
817 offset_buffer_type&& list_offsets,
818 size_buffer_type&& list_sizes,
819 VB&& validity_input,
820 std::optional<std::string_view> name,
821 std::optional<METADATA_RANGE> metadata
822 )
823 {
824 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(), "sizes and offset must have the same size");
825 const auto size = list_sizes.size();
826 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
827 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
828 const auto null_count = vbitmap.null_count();
830 static const std::optional<std::unordered_set<ArrowFlag>> flags{{ArrowFlag::NULLABLE}};
831
832 ArrowSchema schema = make_arrow_schema(
833 BIG ? std::string("+vL") : std::string("+vl"), // format
834 name, // name
835 metadata, // metadata
836 flags, // flags,
837 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children
839 nullptr, // dictionary
840 true
841 );
842 std::vector<buffer<std::uint8_t>> arr_buffs = {
843 std::move(vbitmap).extract_storage(),
844 std::move(list_offsets).extract_storage(),
845 std::move(list_sizes).extract_storage()
846 };
847
848 ArrowArray arr = make_arrow_array(
849 static_cast<std::int64_t>(size), // length
850 static_cast<int64_t>(null_count),
851 0, // offset
852 std::move(arr_buffs),
853 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children
855 nullptr, // dictionary
856 true
857 );
858 return arrow_proxy{std::move(arr), std::move(schema)};
859 }
860
861 template <bool BIG>
862 template <input_metadata_container METADATA_RANGE>
863 arrow_proxy list_view_array_impl<BIG>::create_proxy(
864 array&& flat_values,
865 offset_buffer_type&& list_offsets,
866 size_buffer_type&& list_sizes,
867 bool nullable,
868 std::optional<std::string_view> name,
869 std::optional<METADATA_RANGE> metadata
870 )
871 {
872 if (nullable)
873 {
874 return list_view_array_impl<BIG>::create_proxy(
875 std::move(flat_values),
876 std::move(list_offsets),
877 std::move(list_sizes),
879 name,
880 metadata
881 );
882 }
883 else
884 {
885 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(), "sizes and offset must have the same size");
886 const auto size = list_sizes.size();
887 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
889
890 ArrowSchema schema = make_arrow_schema(
891 BIG ? std::string("+vL") : std::string("+vl"), // format
892 name, // name
893 metadata, // metadata
894 std::nullopt, // flags,
895 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children
897 nullptr, // dictionary
898 true
899 );
900 std::vector<buffer<std::uint8_t>> arr_buffs = {
901 buffer<std::uint8_t>{nullptr, 0}, // no validity bitmap
902 std::move(list_offsets).extract_storage(),
903 std::move(list_sizes).extract_storage()
904 };
905
906 ArrowArray arr = make_arrow_array(
907 static_cast<std::int64_t>(size), // length
908 0, // null_count
909 0, // offset
910 std::move(arr_buffs),
911 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children
913 nullptr, // dictionary
914 true
915 );
916 return arrow_proxy{std::move(arr), std::move(schema)};
917 }
918 }
919
920 template <bool BIG>
922 : base_type(rhs)
923 , p_list_offsets(make_list_offsets())
924 , p_list_sizes(make_list_sizes())
925 {
926 }
927
928 template <bool BIG>
930 {
931 if (this != &rhs)
932 {
934 p_list_offsets = make_list_offsets();
935 p_list_sizes = make_list_sizes();
936 }
937 return *this;
938 }
939
940 template <bool BIG>
941 inline auto list_view_array_impl<BIG>::offset_range(size_type i) const
942 -> std::pair<offset_type, offset_type>
943 {
944 const auto offset = p_list_offsets[i];
945 return std::make_pair(offset, offset + p_list_sizes[i]);
946 }
947
948 template <bool BIG>
949 auto list_view_array_impl<BIG>::make_list_offsets() -> offset_type*
950 {
951 return reinterpret_cast<offset_type*>(
952 this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
953 );
954 }
955
956 template <bool BIG>
957 auto list_view_array_impl<BIG>::make_list_sizes() -> offset_type*
958 {
959 return reinterpret_cast<offset_type*>(
960 this->get_arrow_proxy().buffers()[SIZES_BUFFER_INDEX].data() + this->get_arrow_proxy().offset()
961 );
962 }
963
964#ifdef __GNUC__
965# pragma GCC diagnostic pop
966#endif
967
968 /*****************************************
969 * fixed_sized_list_array implementation *
970 *****************************************/
971
972 inline auto fixed_sized_list_array::list_size_from_format(const std::string_view format) -> uint64_t
973 {
974 SPARROW_ASSERT(format.size() >= 3, "Invalid format string");
975 const auto n_digits = format.size() - 3;
976 const auto list_size_str = format.substr(3, n_digits);
977 return std::stoull(std::string(list_size_str));
978 }
979
981 : base_type(std::move(proxy))
982 , m_list_size(fixed_sized_list_array::list_size_from_format(this->get_arrow_proxy().format()))
983 {
984 }
985
986 inline auto fixed_sized_list_array::offset_range(size_type i) const -> std::pair<offset_type, offset_type>
987 {
988 const auto offset = i * m_list_size;
989 return std::make_pair(offset, offset + m_list_size);
990 }
991
992 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
993 inline arrow_proxy fixed_sized_list_array::create_proxy(
994 std::uint64_t list_size,
995 array&& flat_values,
996 R&& validity_input,
997 std::optional<std::string_view> name,
998 std::optional<METADATA_RANGE> metadata
999 )
1000 {
1001 const auto size = flat_values.size() / static_cast<std::size_t>(list_size);
1002 auto vbitmap = ensure_validity_bitmap(size, std::forward<R>(validity_input));
1003
1004 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1005
1006 const auto null_count = vbitmap.null_count();
1007
1008 const repeat_view<bool> children_ownership{true, 1};
1009
1010 std::string format = "+w:" + std::to_string(list_size);
1011 ArrowSchema schema = make_arrow_schema(
1012 format,
1013 std::move(name), // name
1014 std::move(metadata), // metadata
1015 std::nullopt, // flags,
1016 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children
1017 children_ownership, // children ownership
1018 nullptr, // dictionary
1019 true // dictionary ownership
1020
1021 );
1022 std::vector<buffer<std::uint8_t>> arr_buffs = {vbitmap.extract_storage()};
1023
1024 ArrowArray arr = make_arrow_array(
1025 static_cast<std::int64_t>(size), // length
1026 static_cast<int64_t>(null_count),
1027 0, // offset
1028 std::move(arr_buffs),
1029 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children
1030 children_ownership, // children ownership
1031 nullptr, // dictionary
1032 true // dictionary ownership
1033 );
1034 return arrow_proxy{std::move(arr), std::move(schema)};
1035 }
1036
1037 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
1038 inline arrow_proxy fixed_sized_list_array::create_proxy(
1039 std::uint64_t list_size,
1040 array&& flat_values,
1041 bool nullable,
1042 std::optional<std::string_view> name,
1043 std::optional<METADATA_RANGE> metadata
1044 )
1045 {
1046 if (nullable)
1047 {
1048 return fixed_sized_list_array::create_proxy(
1049 list_size,
1050 std::move(flat_values),
1052 name,
1053 metadata
1054 );
1055 }
1056 else
1057 {
1058 const auto size = flat_values.size() / static_cast<std::size_t>(list_size);
1059 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(flat_values));
1060 const repeat_view<bool> children_ownership{true, 1};
1061
1062 std::string format = "+w:" + std::to_string(list_size);
1063 ArrowSchema schema = make_arrow_schema(
1064 format,
1065 std::move(name), // name
1066 std::move(metadata), // metadata
1067 std::nullopt, // flags,
1068 new ArrowSchema*[1]{new ArrowSchema(std::move(flat_schema))}, // children
1069 children_ownership, // children ownership
1070 nullptr, // dictionary
1071 true // dictionary ownership
1072
1073 );
1074 std::vector<buffer<std::uint8_t>> arr_buffs = {
1075 buffer<std::uint8_t>{nullptr, 0} // no validity bitmap
1076 };
1077
1078 ArrowArray arr = make_arrow_array(
1079 static_cast<std::int64_t>(size), // length
1080 0, // null_count
1081 0, // offset
1082 std::move(arr_buffs),
1083 new ArrowArray*[1]{new ArrowArray(std::move(flat_arr))}, // children
1084 children_ownership, // children ownership
1085 nullptr, // dictionary
1086 true // dictionary ownership
1087 );
1088 return arrow_proxy{std::move(arr), std::move(schema)};
1089 }
1090 }
1091}
typename base_type::const_bitmap_range const_bitmap_range
array_bitmap_base_impl & operator=(const array_bitmap_base_impl &)
typename base_type::iterator_tag iterator_tag
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
Base class defining common immutable interface for arrays with a bitmap.
Base class for array type erasure.
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:39
Proxy class over ArrowArray and ArrowSchema.
Object that owns a piece of contiguous memory.
Definition buffer.hpp:112
Smart pointer behaving like a copiable std::unique_ptr.
Definition memory.hpp:126
storage_type extract_storage() noexcept
constexpr size_type null_count() const noexcept
fixed_sized_list_array & operator=(const self_type &)=default
inner_types::list_size_type list_size_type
array_inner_types< self_type > inner_types
fixed_sized_list_array(arrow_proxy proxy)
fixed_sized_list_array(ARGS &&... args)
list_array_crtp_base< self_type > base_type
fixed_sized_list_array self_type
fixed_sized_list_array & operator=(self_type &&)=default
fixed_sized_list_array(self_type &&)=default
typename base_type::size_type size_type
fixed_sized_list_array(const self_type &)=default
typename base_type::const_bitmap_range const_bitmap_range
const array_wrapper * raw_flat_array() const
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename inner_types::const_value_iterator const_value_iterator
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::iterator_tag iterator_tag
list_array_crtp_base(arrow_proxy proxy)
array_wrapper * raw_flat_array()
typename inner_types::value_iterator value_iterator
list_array_crtp_base & operator=(const self_type &)
list_array_crtp_base(self_type &&) noexcept=default
typename base_type::bitmap_type bitmap_type
list_array_crtp_base< DERIVED > self_type
typename base_type::size_type size_type
array_inner_types< DERIVED > inner_types
list_array_crtp_base(const self_type &)
nullable< inner_value_type > value_type
array_bitmap_base< DERIVED > base_type
list_array_impl< BIG > self_type
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
typename base_type::size_type size_type
list_array_impl(self_type &&) noexcept=default
list_array_impl(const self_type &)
array_inner_types< self_type > inner_types
static auto offset_from_sizes(SIZES_RANGE &&sizes) -> offset_buffer_type
inner_types::list_size_type list_size_type
list_array_crtp_base< list_array_impl< BIG > > base_type
list_array_impl & operator=(const self_type &)
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_array_impl(arrow_proxy proxy)
std::size_t size_type
list_view_array_impl & operator=(const self_type &)
typename base_type::size_type size_type
list_view_array_impl & operator=(self_type &&)=default
list_view_array_impl(self_type &&)=default
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_view_array_impl(arrow_proxy proxy)
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
array_inner_types< self_type > inner_types
list_array_crtp_base< list_view_array_impl< BIG > > base_type
list_view_array_impl(ARGS &&... args)
list_view_array_impl< BIG > self_type
list_view_array_impl(const self_type &)
inner_types::list_size_type list_size_type
u8_buffer< std::remove_const_t< list_size_type > > size_buffer_type
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
Definition nullable.hpp:281
A view that repeats a value a given number of times.
This buffer class is use as storage buffer for all sparrow arrays.
Definition u8_buffer.hpp:75
#define SPARROW_ASSERT(expr__, message__)
sparrow::u8_buffer< OFFSET_TYPE > offset_buffer_from_sizes(SIZES_RANGE &&sizes)
constexpr std::size_t size(typelist< T... >={})
Definition mp_utils.hpp:107
constexpr bool excludes_copy_and_move_ctor_v
Definition mp_utils.hpp:507
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_list_view_array_v
Checks whether T is a list_view_array type.
list_array_impl< false > list_array
array_bitmap_base_impl< D, false > array_bitmap_base
Convenient typedef to be used as a crtp base class for arrays using an immutable validity buffer.
constexpr bool is_fixed_sized_list_array_v
Checks whether T is a fixed_sized_list_array type.
list_view_array_impl< true > big_list_view_array
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArrays and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:91
constexpr bool is_big_list_array_v
Checks whether T is a big_list_array type.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
list_view_array_impl< false > list_view_array
dynamic_bitset< std::uint8_t > validity_bitmap
constexpr bool is_list_array_v
Checks whether T is a list_array type.
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
list_array_impl< true > big_list_array
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
constexpr bool is_big_list_view_array_v
Checks whether T is a big_list_view_array type.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
Base class for array_inner_types specialization.
Traits class that must be specialized by array classes inheriting from array_crtp_base.