sparrow 2.3.0
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
primitive_array_impl.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15
16#pragma once
17
25#include "sparrow/u8_buffer.hpp"
29
30namespace sparrow
31{
32 template <trivial_copyable_type T, typename Ext = empty_extension, trivial_copyable_type T2 = T>
34
35 namespace copy_tracker
36 {
37 template <typename T>
39 std::string key()
40 {
41 return "primitive_array";
42 }
43 }
44
45 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
65
66 namespace detail
67 {
68 template <class T>
70
71 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
73 {
74 [[nodiscard]] static constexpr sparrow::data_type get()
75 {
77 }
78 };
79 }
80
121 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
122 class primitive_array_impl final : public mutable_array_bitmap_base<primitive_array_impl<T, Ext, T2>>,
123 private details::primitive_data_access<T, T2>,
124 public Ext
125 {
126 public:
127
131 using size_type = std::size_t;
132
134 using inner_value_type = typename inner_types::inner_value_type;
135 using inner_reference = typename inner_types::inner_reference;
136 using inner_const_reference = typename inner_types::inner_const_reference;
137
138 using pointer = typename inner_types::pointer;
139 using const_pointer = typename inner_types::const_pointer;
140
141 using value_iterator = typename base_type::value_iterator;
142 using const_value_iterator = typename base_type::const_value_iterator;
143
155
176 template <class... Args>
178 explicit primitive_array_impl(Args&&... args)
179 : base_type(create_proxy(std::forward<Args>(args)...))
180 , access_class_type(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
181 {
182 }
183
198 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
200 std::initializer_list<inner_value_type> init,
201 bool nullable = true,
202 std::optional<std::string_view> name = std::nullopt,
203 std::optional<METADATA_RANGE> metadata = std::nullopt
204 )
205 : base_type(create_proxy(init, nullable, std::move(name), std::move(metadata)))
206 , access_class_type(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
207 {
208 }
209
220
233
244
256 constexpr primitive_array_impl& operator=(primitive_array_impl&&) noexcept;
257
258 private:
259
276 template <
277 validity_bitmap_input VALIDITY_RANGE = validity_bitmap,
278 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
279 [[nodiscard]] static auto create_proxy(
280 u8_buffer<T2>&& data_buffer,
281 size_t size,
282 VALIDITY_RANGE&& bitmaps,
283 std::optional<std::string_view> name = std::nullopt,
284 std::optional<METADATA_RANGE> metadata = std::nullopt
285 ) -> arrow_proxy;
286
305 template <
306 validity_bitmap_input VALIDITY_RANGE = validity_bitmap,
307 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
308 [[nodiscard]] static auto create_proxy(
309 u8_buffer<T2>&& data_buffer,
310 size_t size,
311 bool nullable = true,
312 std::optional<std::string_view> name = std::nullopt,
313 std::optional<METADATA_RANGE> metadata = std::nullopt
314 ) -> arrow_proxy;
315
334 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
335 requires(
336 std::convertible_to<std::ranges::range_value_t<R>, T2>
337 && !mpl::is_type_instance_of_v<R, u8_buffer>
338 )
339 [[nodiscard]] static auto create_proxy(
340 R&& range,
341 bool nullable = true,
342 std::optional<std::string_view> name = std::nullopt,
343 std::optional<METADATA_RANGE> metadata = std::nullopt
344 ) -> arrow_proxy;
345
364 template <class U, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
365 requires std::convertible_to<U, T2>
366 [[nodiscard]] static arrow_proxy create_proxy(
367 size_type n,
368 const U& value = U{},
369 bool nullable = true,
370 std::optional<std::string_view> name = std::nullopt,
371 std::optional<METADATA_RANGE> metadata = std::nullopt
372 );
373
395 template <
396 std::ranges::input_range R,
398 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
399 requires(std::convertible_to<std::ranges::range_value_t<R>, T2>)
400 [[nodiscard]] static arrow_proxy create_proxy(
401 R&&,
402 R2&&,
403 std::optional<std::string_view> name = std::nullopt,
404 std::optional<METADATA_RANGE> metadata = std::nullopt
405 );
406
424 template <std::ranges::input_range NULLABLE_RANGE, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
425 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_RANGE>, nullable<T2>>
426 [[nodiscard]] static arrow_proxy create_proxy(
427 NULLABLE_RANGE&&,
428 std::optional<std::string_view> name = std::nullopt,
429 std::optional<METADATA_RANGE> metadata = std::nullopt
430 );
431
450 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
451 [[nodiscard]] static arrow_proxy create_proxy_impl(
452 u8_buffer<T2>&& data_buffer,
453 size_t size,
454 std::optional<validity_bitmap>&& bitmap,
455 std::optional<std::string_view> name = std::nullopt,
456 std::optional<METADATA_RANGE> metadata = std::nullopt
457 );
458
464
465 // Modifiers
466
471
472 static constexpr size_type DATA_BUFFER_INDEX = 1;
473
475 friend base_type;
478 };
479
480 /********************************************************
481 * primitive_array_impl implementation *
482 ********************************************************/
483
484 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
486 : base_type(std::move(proxy_param))
487 , access_class_type(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
488 {
489 }
490
491 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
493 : base_type(rhs)
494 , access_class_type(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
495 {
497 }
498
499 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
508
509 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
511 : base_type(std::move(rhs))
512 , access_class_type(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
513 {
514 }
515
516 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
519 {
520 base_type::operator=(std::move(rhs));
521 access_class_type::reset_proxy(this->get_arrow_proxy());
522 return *this;
523 }
524
525 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
526 template <validity_bitmap_input VALIDITY_RANGE, input_metadata_container METADATA_RANGE>
527 auto primitive_array_impl<T, Ext, T2>::create_proxy(
528 u8_buffer<T2>&& data_buffer,
529 size_t size,
530 VALIDITY_RANGE&& bitmap_input,
531 std::optional<std::string_view> name,
532 std::optional<METADATA_RANGE> metadata
533 ) -> arrow_proxy
534 {
535 return create_proxy_impl(
536 std::forward<u8_buffer<T2>>(data_buffer),
537 size,
538 ensure_validity_bitmap(size, std::forward<VALIDITY_RANGE>(bitmap_input)),
539 std::move(name),
540 std::move(metadata)
541 );
542 }
543
544 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
545 template <std::ranges::input_range VALUE_RANGE, validity_bitmap_input VALIDITY_RANGE, input_metadata_container METADATA_RANGE>
546 requires(std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, T2>)
547 arrow_proxy primitive_array_impl<T, Ext, T2>::create_proxy(
548 VALUE_RANGE&& values,
549 VALIDITY_RANGE&& validity_input,
550 std::optional<std::string_view> name,
551 std::optional<METADATA_RANGE> metadata
552 )
553 {
554 auto size = static_cast<size_t>(std::ranges::distance(values));
556 std::forward<VALUE_RANGE>(values)
557 );
558 return create_proxy(
559 std::move(data_buffer),
560 size,
561 std::forward<VALIDITY_RANGE>(validity_input),
562 std::move(name),
563 std::move(metadata)
564 );
565 }
566
567 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
568 template <class U, input_metadata_container METADATA_RANGE>
569 requires std::convertible_to<U, T2>
570 arrow_proxy primitive_array_impl<T, Ext, T2>::create_proxy(
571 size_type n,
572 const U& value,
573 bool nullable,
574 std::optional<std::string_view> name,
575 std::optional<METADATA_RANGE> metadata
576 )
577 {
578 // create data_buffer
579 u8_buffer<T2> data_buffer(n, value);
580
581 return create_proxy_impl(
582 std::move(data_buffer),
583 n,
584 nullable ? std::make_optional<validity_bitmap>(nullptr, 0, validity_bitmap::default_allocator())
585 : std::nullopt,
586 std::move(name),
587 std::move(metadata)
588 );
589 }
590
591 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
592 template <validity_bitmap_input VALIDITY_RANGE, input_metadata_container METADATA_RANGE>
593 arrow_proxy primitive_array_impl<T, Ext, T2>::create_proxy(
594 u8_buffer<T2>&& data_buffer,
595 size_t size,
596 bool nullable,
597 std::optional<std::string_view> name,
598 std::optional<METADATA_RANGE> metadata
599 )
600 {
601 std::optional<validity_bitmap> bitmap = nullable ? std::make_optional<validity_bitmap>(
602 nullptr,
603 0,
605 )
606 : std::nullopt;
607 return create_proxy_impl(
608 std::move(data_buffer),
609 size,
610 std::move(bitmap),
611 std::move(name),
612 std::move(metadata)
613 );
614 }
615
616 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
617 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
618 requires(std::convertible_to<std::ranges::range_value_t<R>, T2> && !mpl::is_type_instance_of_v<R, u8_buffer>)
619 arrow_proxy primitive_array_impl<T, Ext, T2>::create_proxy(
620 R&& range,
621 bool nullable,
622 std::optional<std::string_view> name,
623 std::optional<METADATA_RANGE> metadata
624 )
625 {
626 auto data_buffer = details::primitive_data_access<T, T2>::make_data_buffer(std::forward<R>(range));
627 auto distance = static_cast<size_t>(std::ranges::distance(range));
628
629 std::optional<validity_bitmap> bitmap = nullable ? std::make_optional<validity_bitmap>(
630 nullptr,
631 0,
633 )
634 : std::nullopt;
635 return create_proxy_impl(
636 std::move(data_buffer),
637 distance,
638 std::move(bitmap),
639 std::move(name),
640 std::move(metadata)
641 );
642 }
643
644 // range of nullable values
645 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
646 template <std::ranges::input_range NULLABLE_RANGE, input_metadata_container METADATA_RANGE>
647 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_RANGE>, nullable<T2>>
648 arrow_proxy primitive_array_impl<T, Ext, T2>::create_proxy(
649 NULLABLE_RANGE&& nullable_range,
650 std::optional<std::string_view> name,
651 std::optional<METADATA_RANGE> metadata
652 )
653 {
654 // split into values and is_non_null ranges
655 auto values = nullable_range
656 | std::views::transform(
657 [](const auto& v)
658 {
659 return v.get();
660 }
661 );
662 auto is_non_null = nullable_range
663 | std::views::transform(
664 [](const auto& v)
665 {
666 return v.has_value();
667 }
668 );
669 return self_type::create_proxy(values, is_non_null, std::move(name), std::move(metadata));
670 }
671
672 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
673 template <input_metadata_container METADATA_RANGE>
674 [[nodiscard]] arrow_proxy primitive_array_impl<T, Ext, T2>::create_proxy_impl(
675 u8_buffer<T2>&& data_buffer,
676 size_t size,
677 std::optional<validity_bitmap>&& bitmap,
678 std::optional<std::string_view> name,
679 std::optional<METADATA_RANGE> metadata
680 )
681 {
682 const bool bitmap_has_value = bitmap.has_value();
683 const auto null_count = bitmap_has_value ? bitmap->null_count() : 0;
684 const auto flags = bitmap_has_value
685 ? std::make_optional<std::unordered_set<sparrow::ArrowFlag>>({ArrowFlag::NULLABLE})
686 : std::nullopt;
687
688 // create arrow schema and array
689 ArrowSchema schema = make_arrow_schema(
691 std::move(name), // name
692 std::move(metadata), // metadata
693 flags, // flags
694 nullptr, // children
695 repeat_view<bool>(true, 0), // children_ownership
696 nullptr, // dictionary
697 true // dictionary ownership
698 );
699
700 // Extract storage from data_buffer and bitmap (if present)
701 buffer<uint8_t> extracted_data_buffer = std::move(data_buffer).extract_storage();
702 buffer<uint8_t> bitmap_buffer = bitmap_has_value
703 ? std::move(*bitmap).extract_storage()
704 : buffer<uint8_t>{nullptr, 0, extracted_data_buffer.get_allocator()};
705
706 std::vector<buffer<uint8_t>> buffers;
707 buffers.reserve(2);
708 buffers.emplace_back(std::move(bitmap_buffer));
709 buffers.emplace_back(std::move(extracted_data_buffer));
710
711
712 // create arrow array
713 ArrowArray arr = make_arrow_array(
714 static_cast<std::int64_t>(size), // length
715 static_cast<int64_t>(null_count),
716 0, // offset
717 std::move(buffers),
718 nullptr, // children
719 repeat_view<bool>(true, 0), // children_ownership
720 nullptr, // dictionary,
721 true // dictionary ownership
722 );
723 arrow_proxy proxy(std::move(arr), std::move(schema));
724 Ext::init(proxy);
725 return proxy;
726 }
727}
constexpr array_bitmap_base_impl & operator=(const array_bitmap_base_impl &)
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
Object that owns a piece of contiguous memory.
Definition buffer.hpp:131
constexpr void reserve(size_type new_cap)
Definition buffer.hpp:787
Provides access to primitive data stored in Arrow format buffers.
constexpr inner_reference value(size_t i)
pointer_iterator< inner_const_pointer > const_value_iterator
std::conditional_t< std::is_same_v< T2, bool >, T2, const T2 & > inner_const_reference
constexpr const_value_iterator value_cbegin() const
constexpr value_iterator insert_value(const_value_iterator pos, T2 value, size_t count)
static constexpr u8_buffer< T2 > make_data_buffer(RANGE &&r)
constexpr const_value_iterator value_cend() const
constexpr void reset_proxy(arrow_proxy &proxy)
constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last)
constexpr void resize_values(size_t new_length, const T2 &value)
constexpr value_iterator erase_values(const_value_iterator pos, size_t count)
typename storage_type::default_allocator default_allocator
constexpr primitive_array_impl(primitive_array_impl &&) noexcept
typename inner_types::inner_reference inner_reference
typename inner_types::inner_value_type inner_value_type
typename base_type::const_value_iterator const_value_iterator
primitive_array_impl(arrow_proxy)
Constructs a primitive array from an existing Arrow proxy.
typename inner_types::inner_const_reference inner_const_reference
primitive_array_impl(std::initializer_list< inner_value_type > init, bool nullable=true, std::optional< std::string_view > name=std::nullopt, std::optional< METADATA_RANGE > metadata=std::nullopt)
constexpr primitive_array_impl & operator=(const primitive_array_impl &)
constexpr primitive_array_impl(const primitive_array_impl &)
typename inner_types::const_pointer const_pointer
details::primitive_data_access< T, T2 > access_class_type
mutable_array_bitmap_base< primitive_array_impl< T, Ext, T2 > > base_type
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
Concept for input containers that can provide metadata pairs.
Definition metadata.hpp:332
Concept defining valid input types for validity bitmap creation.
SPARROW_API void increase(const std::string &key)
std::string key()
Definition buffer.hpp:49
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
constexpr bool is_type_instance_of_v
Variable template for convenient access to is_type_instance_of.
Definition mp_utils.hpp:102
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient alias for arrays with mutable validity bitmaps.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr std::string_view data_type_to_format(data_type type)
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
std::pair< metadata_key, metadata_value > metadata_pair
Type alias for metadata key-value pairs.
Definition metadata.hpp:61
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
Extensions to the C++ standard library.
typename data_access_type::inner_const_pointer const_pointer
typename data_access_type::const_value_iterator const_value_iterator
typename data_access_type::inner_const_reference inner_const_reference
nullable< inner_const_reference, bitmap_const_reference > const_reference
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Metafunction for retrieving the data_type of a typed array.