sparrow 1.4.0
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
primitive_array_impl.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15
16#pragma once
17
24#include "sparrow/u8_buffer.hpp"
28
29namespace sparrow
30{
31 template <trivial_copyable_type T, typename Ext = empty_extension, trivial_copyable_type T2 = T>
33
34 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
54
55 namespace detail
56 {
57 template <class T>
59
60 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
62 {
63 [[nodiscard]] static constexpr sparrow::data_type get()
64 {
66 }
67 };
68 }
69
110 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
111 class primitive_array_impl final : public mutable_array_bitmap_base<primitive_array_impl<T, Ext, T2>>,
112 private details::primitive_data_access<T, T2>,
113 public Ext
114 {
115 public:
116
120 using size_type = std::size_t;
121
123 using inner_value_type = typename inner_types::inner_value_type;
124 using inner_reference = typename inner_types::inner_reference;
125 using inner_const_reference = typename inner_types::inner_const_reference;
126
127 using pointer = typename inner_types::pointer;
128 using const_pointer = typename inner_types::const_pointer;
129
130 using value_iterator = typename base_type::value_iterator;
131 using const_value_iterator = typename base_type::const_value_iterator;
132
144
165 template <class... Args>
167 explicit primitive_array_impl(Args&&... args)
168 : base_type(create_proxy(std::forward<Args>(args)...))
169 , access_class_type(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
170 {
171 }
172
187 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
189 std::initializer_list<inner_value_type> init,
190 bool nullable = true,
191 std::optional<std::string_view> name = std::nullopt,
192 std::optional<METADATA_RANGE> metadata = std::nullopt
193 )
194 : base_type(create_proxy(init, nullable, std::move(name), std::move(metadata)))
195 , access_class_type(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
196 {
197 }
198
209
222
233
245 constexpr primitive_array_impl& operator=(primitive_array_impl&&) noexcept;
246
247 private:
248
265 template <
266 validity_bitmap_input VALIDITY_RANGE = validity_bitmap,
267 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
268 [[nodiscard]] static auto create_proxy(
269 u8_buffer<T2>&& data_buffer,
270 size_t size,
271 VALIDITY_RANGE&& bitmaps,
272 std::optional<std::string_view> name = std::nullopt,
273 std::optional<METADATA_RANGE> metadata = std::nullopt
274 ) -> arrow_proxy;
275
294 template <
295 validity_bitmap_input VALIDITY_RANGE = validity_bitmap,
296 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
297 [[nodiscard]] static auto create_proxy(
298 u8_buffer<T2>&& data_buffer,
299 size_t size,
300 bool nullable = true,
301 std::optional<std::string_view> name = std::nullopt,
302 std::optional<METADATA_RANGE> metadata = std::nullopt
303 ) -> arrow_proxy;
304
323 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
324 requires(
325 std::convertible_to<std::ranges::range_value_t<R>, T2>
326 && !mpl::is_type_instance_of_v<R, u8_buffer>
327 )
328 [[nodiscard]] static auto create_proxy(
329 R&& range,
330 bool nullable = true,
331 std::optional<std::string_view> name = std::nullopt,
332 std::optional<METADATA_RANGE> metadata = std::nullopt
333 ) -> arrow_proxy;
334
353 template <class U, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
354 requires std::convertible_to<U, T2>
355 [[nodiscard]] static arrow_proxy create_proxy(
356 size_type n,
357 const U& value = U{},
358 bool nullable = true,
359 std::optional<std::string_view> name = std::nullopt,
360 std::optional<METADATA_RANGE> metadata = std::nullopt
361 );
362
384 template <
385 std::ranges::input_range R,
387 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
388 requires(std::convertible_to<std::ranges::range_value_t<R>, T2>)
389 [[nodiscard]] static arrow_proxy create_proxy(
390 R&&,
391 R2&&,
392 std::optional<std::string_view> name = std::nullopt,
393 std::optional<METADATA_RANGE> metadata = std::nullopt
394 );
395
413 template <std::ranges::input_range NULLABLE_RANGE, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
414 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_RANGE>, nullable<T2>>
415 [[nodiscard]] static arrow_proxy create_proxy(
416 NULLABLE_RANGE&&,
417 std::optional<std::string_view> name = std::nullopt,
418 std::optional<METADATA_RANGE> metadata = std::nullopt
419 );
420
439 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
440 [[nodiscard]] static arrow_proxy create_proxy_impl(
441 u8_buffer<T2>&& data_buffer,
442 size_t size,
443 std::optional<validity_bitmap>&& bitmap,
444 std::optional<std::string_view> name = std::nullopt,
445 std::optional<METADATA_RANGE> metadata = std::nullopt
446 );
447
453
454 // Modifiers
455
460
461 static constexpr size_type DATA_BUFFER_INDEX = 1;
462
464 friend base_type;
467 };
468
469 /********************************************************
470 * primitive_array_impl implementation *
471 ********************************************************/
472
473 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
475 : base_type(std::move(proxy_param))
476 , access_class_type(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
477 {
478 }
479
480 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
482 : base_type(rhs)
483 , access_class_type(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
484 {
485 }
486
487 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
490 {
492 access_class_type::reset_proxy(this->get_arrow_proxy());
493 return *this;
494 }
495
496 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
498 : base_type(std::move(rhs))
499 , access_class_type(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
500 {
501 }
502
503 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
506 {
507 base_type::operator=(std::move(rhs));
508 access_class_type::reset_proxy(this->get_arrow_proxy());
509 return *this;
510 }
511
512 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
513 template <validity_bitmap_input VALIDITY_RANGE, input_metadata_container METADATA_RANGE>
514 auto primitive_array_impl<T, Ext, T2>::create_proxy(
515 u8_buffer<T2>&& data_buffer,
516 size_t size,
517 VALIDITY_RANGE&& bitmap_input,
518 std::optional<std::string_view> name,
519 std::optional<METADATA_RANGE> metadata
520 ) -> arrow_proxy
521 {
522 return create_proxy_impl(
523 std::forward<u8_buffer<T2>>(data_buffer),
524 size,
525 ensure_validity_bitmap(size, std::forward<VALIDITY_RANGE>(bitmap_input)),
526 std::move(name),
527 std::move(metadata)
528 );
529 }
530
531 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
532 template <std::ranges::input_range VALUE_RANGE, validity_bitmap_input VALIDITY_RANGE, input_metadata_container METADATA_RANGE>
533 requires(std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, T2>)
534 arrow_proxy primitive_array_impl<T, Ext, T2>::create_proxy(
535 VALUE_RANGE&& values,
536 VALIDITY_RANGE&& validity_input,
537 std::optional<std::string_view> name,
538 std::optional<METADATA_RANGE> metadata
539 )
540 {
541 auto size = static_cast<size_t>(std::ranges::distance(values));
543 std::forward<VALUE_RANGE>(values)
544 );
545 return create_proxy(
546 std::move(data_buffer),
547 size,
548 std::forward<VALIDITY_RANGE>(validity_input),
549 std::move(name),
550 std::move(metadata)
551 );
552 }
553
554 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
555 template <class U, input_metadata_container METADATA_RANGE>
556 requires std::convertible_to<U, T2>
557 arrow_proxy primitive_array_impl<T, Ext, T2>::create_proxy(
558 size_type n,
559 const U& value,
560 bool nullable,
561 std::optional<std::string_view> name,
562 std::optional<METADATA_RANGE> metadata
563 )
564 {
565 // create data_buffer
566 u8_buffer<T2> data_buffer(n, value);
567 return create_proxy_impl(
568 std::move(data_buffer),
569 n,
570 nullable ? std::make_optional<validity_bitmap>(nullptr, 0) : std::nullopt,
571 std::move(name),
572 std::move(metadata)
573 );
574 }
575
576 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
577 template <validity_bitmap_input VALIDITY_RANGE, input_metadata_container METADATA_RANGE>
578 arrow_proxy primitive_array_impl<T, Ext, T2>::create_proxy(
579 u8_buffer<T2>&& data_buffer,
580 size_t size,
581 bool nullable,
582 std::optional<std::string_view> name,
583 std::optional<METADATA_RANGE> metadata
584 )
585 {
586 std::optional<validity_bitmap> bitmap = nullable ? std::make_optional<validity_bitmap>(nullptr, 0)
587 : std::nullopt;
588 return create_proxy_impl(
589 std::move(data_buffer),
590 size,
591 std::move(bitmap),
592 std::move(name),
593 std::move(metadata)
594 );
595 }
596
597 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
598 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
599 requires(std::convertible_to<std::ranges::range_value_t<R>, T2> && !mpl::is_type_instance_of_v<R, u8_buffer>)
600 arrow_proxy primitive_array_impl<T, Ext, T2>::create_proxy(
601 R&& range,
602 bool nullable,
603 std::optional<std::string_view> name,
604 std::optional<METADATA_RANGE> metadata
605 )
606 {
607 auto data_buffer = details::primitive_data_access<T, T2>::make_data_buffer(std::forward<R>(range));
608 auto distance = static_cast<size_t>(std::ranges::distance(range));
609 std::optional<validity_bitmap> bitmap = nullable ? std::make_optional<validity_bitmap>(nullptr, 0)
610 : std::nullopt;
611 return create_proxy_impl(
612 std::move(data_buffer),
613 distance,
614 std::move(bitmap),
615 std::move(name),
616 std::move(metadata)
617 );
618 }
619
620 // range of nullable values
621 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
622 template <std::ranges::input_range NULLABLE_RANGE, input_metadata_container METADATA_RANGE>
623 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_RANGE>, nullable<T2>>
624 arrow_proxy primitive_array_impl<T, Ext, T2>::create_proxy(
625 NULLABLE_RANGE&& nullable_range,
626 std::optional<std::string_view> name,
627 std::optional<METADATA_RANGE> metadata
628 )
629 {
630 // split into values and is_non_null ranges
631 auto values = nullable_range
632 | std::views::transform(
633 [](const auto& v)
634 {
635 return v.get();
636 }
637 );
638 auto is_non_null = nullable_range
639 | std::views::transform(
640 [](const auto& v)
641 {
642 return v.has_value();
643 }
644 );
645 return self_type::create_proxy(values, is_non_null, std::move(name), std::move(metadata));
646 }
647
648 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
649 template <input_metadata_container METADATA_RANGE>
650 [[nodiscard]] arrow_proxy primitive_array_impl<T, Ext, T2>::create_proxy_impl(
651 u8_buffer<T2>&& data_buffer,
652 size_t size,
653 std::optional<validity_bitmap>&& bitmap,
654 std::optional<std::string_view> name,
655 std::optional<METADATA_RANGE> metadata
656 )
657 {
658 const bool bitmap_has_value = bitmap.has_value();
659 const auto null_count = bitmap_has_value ? bitmap->null_count() : 0;
660 const auto flags = bitmap_has_value
661 ? std::make_optional<std::unordered_set<sparrow::ArrowFlag>>({ArrowFlag::NULLABLE})
662 : std::nullopt;
663
664 // create arrow schema and array
665 ArrowSchema schema = make_arrow_schema(
667 std::move(name), // name
668 std::move(metadata), // metadata
669 flags, // flags
670 nullptr, // children
671 repeat_view<bool>(true, 0), // children_ownership
672 nullptr, // dictionary
673 true // dictionary ownership
674 );
675
676 buffer<uint8_t> bitmap_buffer = bitmap_has_value ? std::move(*bitmap).extract_storage()
677 : buffer<uint8_t>{nullptr, 0};
678
679 std::vector<buffer<uint8_t>> buffers(2);
680 buffers[0] = std::move(bitmap_buffer);
681 buffers[1] = std::move(data_buffer).extract_storage();
682
683 // create arrow array
684 ArrowArray arr = make_arrow_array(
685 static_cast<std::int64_t>(size), // length
686 static_cast<int64_t>(null_count),
687 0, // offset
688 std::move(buffers),
689 nullptr, // children
690 repeat_view<bool>(true, 0), // children_ownership
691 nullptr, // dictionary,
692 true // dictionary ownership
693 );
694 arrow_proxy proxy(std::move(arr), std::move(schema));
695 Ext::init(proxy);
696 return proxy;
697 }
698}
constexpr array_bitmap_base_impl & operator=(const array_bitmap_base_impl &)
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
Object that owns a piece of contiguous memory.
Definition buffer.hpp:113
Provides access to primitive data stored in Arrow format buffers.
constexpr inner_reference value(size_t i)
pointer_iterator< inner_const_pointer > const_value_iterator
std::conditional_t< std::is_same_v< T2, bool >, T2, const T2 & > inner_const_reference
constexpr const_value_iterator value_cbegin() const
constexpr value_iterator insert_value(const_value_iterator pos, T2 value, size_t count)
static constexpr u8_buffer< T2 > make_data_buffer(RANGE &&r)
constexpr const_value_iterator value_cend() const
constexpr void reset_proxy(arrow_proxy &proxy)
constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last)
constexpr void resize_values(size_t new_length, const T2 &value)
constexpr value_iterator erase_values(const_value_iterator pos, size_t count)
constexpr primitive_array_impl(primitive_array_impl &&) noexcept
typename inner_types::inner_reference inner_reference
typename inner_types::inner_value_type inner_value_type
typename base_type::const_value_iterator const_value_iterator
primitive_array_impl(arrow_proxy)
Constructs a primitive array from an existing Arrow proxy.
typename inner_types::inner_const_reference inner_const_reference
primitive_array_impl(std::initializer_list< inner_value_type > init, bool nullable=true, std::optional< std::string_view > name=std::nullopt, std::optional< METADATA_RANGE > metadata=std::nullopt)
constexpr primitive_array_impl & operator=(const primitive_array_impl &)
constexpr primitive_array_impl(const primitive_array_impl &)
typename inner_types::const_pointer const_pointer
details::primitive_data_access< T, T2 > access_class_type
mutable_array_bitmap_base< primitive_array_impl< T, Ext, T2 > > base_type
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
Concept for input containers that can provide metadata pairs.
Definition metadata.hpp:332
Concept defining valid input types for validity bitmap creation.
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
constexpr bool is_type_instance_of_v
Variable template for convenient access to is_type_instance_of.
Definition mp_utils.hpp:102
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient alias for arrays with mutable validity bitmaps.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr std::string_view data_type_to_format(data_type type)
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
std::pair< metadata_key, metadata_value > metadata_pair
Type alias for metadata key-value pairs.
Definition metadata.hpp:61
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
Extensions to the C++ standard library.
typename data_access_type::inner_const_pointer const_pointer
typename data_access_type::const_value_iterator const_value_iterator
typename data_access_type::inner_const_reference inner_const_reference
nullable< inner_const_reference, bitmap_const_reference > const_reference
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Metafunction for retrieving the data_type of a typed array.