sparrow 2.1.0
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
primitive_array_impl.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15
16#pragma once
17
24#include "sparrow/u8_buffer.hpp"
28
29namespace sparrow
30{
31 template <trivial_copyable_type T, typename Ext = empty_extension, trivial_copyable_type T2 = T>
33
34 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
54
55 namespace detail
56 {
57 template <class T>
59
60 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
62 {
63 [[nodiscard]] static constexpr sparrow::data_type get()
64 {
66 }
67 };
68 }
69
110 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
111 class primitive_array_impl final : public mutable_array_bitmap_base<primitive_array_impl<T, Ext, T2>>,
112 private details::primitive_data_access<T, T2>,
113 public Ext
114 {
115 public:
116
120 using size_type = std::size_t;
121
123 using inner_value_type = typename inner_types::inner_value_type;
124 using inner_reference = typename inner_types::inner_reference;
125 using inner_const_reference = typename inner_types::inner_const_reference;
126
127 using pointer = typename inner_types::pointer;
128 using const_pointer = typename inner_types::const_pointer;
129
130 using value_iterator = typename base_type::value_iterator;
131 using const_value_iterator = typename base_type::const_value_iterator;
132
144
165 template <class... Args>
167 explicit primitive_array_impl(Args&&... args)
168 : base_type(create_proxy(std::forward<Args>(args)...))
169 , access_class_type(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
170 {
171 }
172
187 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
189 std::initializer_list<inner_value_type> init,
190 bool nullable = true,
191 std::optional<std::string_view> name = std::nullopt,
192 std::optional<METADATA_RANGE> metadata = std::nullopt
193 )
194 : base_type(create_proxy(init, nullable, std::move(name), std::move(metadata)))
195 , access_class_type(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
196 {
197 }
198
209
222
233
245 constexpr primitive_array_impl& operator=(primitive_array_impl&&) noexcept;
246
247 private:
248
265 template <
266 validity_bitmap_input VALIDITY_RANGE = validity_bitmap,
267 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
268 [[nodiscard]] static auto create_proxy(
269 u8_buffer<T2>&& data_buffer,
270 size_t size,
271 VALIDITY_RANGE&& bitmaps,
272 std::optional<std::string_view> name = std::nullopt,
273 std::optional<METADATA_RANGE> metadata = std::nullopt
274 ) -> arrow_proxy;
275
294 template <
295 validity_bitmap_input VALIDITY_RANGE = validity_bitmap,
296 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
297 [[nodiscard]] static auto create_proxy(
298 u8_buffer<T2>&& data_buffer,
299 size_t size,
300 bool nullable = true,
301 std::optional<std::string_view> name = std::nullopt,
302 std::optional<METADATA_RANGE> metadata = std::nullopt
303 ) -> arrow_proxy;
304
323 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
324 requires(
325 std::convertible_to<std::ranges::range_value_t<R>, T2>
326 && !mpl::is_type_instance_of_v<R, u8_buffer>
327 )
328 [[nodiscard]] static auto create_proxy(
329 R&& range,
330 bool nullable = true,
331 std::optional<std::string_view> name = std::nullopt,
332 std::optional<METADATA_RANGE> metadata = std::nullopt
333 ) -> arrow_proxy;
334
353 template <class U, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
354 requires std::convertible_to<U, T2>
355 [[nodiscard]] static arrow_proxy create_proxy(
356 size_type n,
357 const U& value = U{},
358 bool nullable = true,
359 std::optional<std::string_view> name = std::nullopt,
360 std::optional<METADATA_RANGE> metadata = std::nullopt
361 );
362
384 template <
385 std::ranges::input_range R,
387 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
388 requires(std::convertible_to<std::ranges::range_value_t<R>, T2>)
389 [[nodiscard]] static arrow_proxy create_proxy(
390 R&&,
391 R2&&,
392 std::optional<std::string_view> name = std::nullopt,
393 std::optional<METADATA_RANGE> metadata = std::nullopt
394 );
395
413 template <std::ranges::input_range NULLABLE_RANGE, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
414 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_RANGE>, nullable<T2>>
415 [[nodiscard]] static arrow_proxy create_proxy(
416 NULLABLE_RANGE&&,
417 std::optional<std::string_view> name = std::nullopt,
418 std::optional<METADATA_RANGE> metadata = std::nullopt
419 );
420
439 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
440 [[nodiscard]] static arrow_proxy create_proxy_impl(
441 u8_buffer<T2>&& data_buffer,
442 size_t size,
443 std::optional<validity_bitmap>&& bitmap,
444 std::optional<std::string_view> name = std::nullopt,
445 std::optional<METADATA_RANGE> metadata = std::nullopt
446 );
447
453
454 // Modifiers
455
460
461 static constexpr size_type DATA_BUFFER_INDEX = 1;
462
464 friend base_type;
467 };
468
469 /********************************************************
470 * primitive_array_impl implementation *
471 ********************************************************/
472
473 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
475 : base_type(std::move(proxy_param))
476 , access_class_type(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
477 {
478 }
479
480 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
482 : base_type(rhs)
483 , access_class_type(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
484 {
485 }
486
487 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
490 {
492 access_class_type::reset_proxy(this->get_arrow_proxy());
493 return *this;
494 }
495
496 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
498 : base_type(std::move(rhs))
499 , access_class_type(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
500 {
501 }
502
503 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
506 {
507 base_type::operator=(std::move(rhs));
508 access_class_type::reset_proxy(this->get_arrow_proxy());
509 return *this;
510 }
511
512 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
513 template <validity_bitmap_input VALIDITY_RANGE, input_metadata_container METADATA_RANGE>
514 auto primitive_array_impl<T, Ext, T2>::create_proxy(
515 u8_buffer<T2>&& data_buffer,
516 size_t size,
517 VALIDITY_RANGE&& bitmap_input,
518 std::optional<std::string_view> name,
519 std::optional<METADATA_RANGE> metadata
520 ) -> arrow_proxy
521 {
522 return create_proxy_impl(
523 std::forward<u8_buffer<T2>>(data_buffer),
524 size,
525 ensure_validity_bitmap(size, std::forward<VALIDITY_RANGE>(bitmap_input)),
526 std::move(name),
527 std::move(metadata)
528 );
529 }
530
531 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
532 template <std::ranges::input_range VALUE_RANGE, validity_bitmap_input VALIDITY_RANGE, input_metadata_container METADATA_RANGE>
533 requires(std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, T2>)
534 arrow_proxy primitive_array_impl<T, Ext, T2>::create_proxy(
535 VALUE_RANGE&& values,
536 VALIDITY_RANGE&& validity_input,
537 std::optional<std::string_view> name,
538 std::optional<METADATA_RANGE> metadata
539 )
540 {
541 auto size = static_cast<size_t>(std::ranges::distance(values));
543 std::forward<VALUE_RANGE>(values)
544 );
545 return create_proxy(
546 std::move(data_buffer),
547 size,
548 std::forward<VALIDITY_RANGE>(validity_input),
549 std::move(name),
550 std::move(metadata)
551 );
552 }
553
554 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
555 template <class U, input_metadata_container METADATA_RANGE>
556 requires std::convertible_to<U, T2>
557 arrow_proxy primitive_array_impl<T, Ext, T2>::create_proxy(
558 size_type n,
559 const U& value,
560 bool nullable,
561 std::optional<std::string_view> name,
562 std::optional<METADATA_RANGE> metadata
563 )
564 {
565 // create data_buffer
566 u8_buffer<T2> data_buffer(n, value);
567 return create_proxy_impl(
568 std::move(data_buffer),
569 n,
570 nullable ? std::make_optional<validity_bitmap>(nullptr, 0, validity_bitmap::default_allocator())
571 : std::nullopt,
572 std::move(name),
573 std::move(metadata)
574 );
575 }
576
577 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
578 template <validity_bitmap_input VALIDITY_RANGE, input_metadata_container METADATA_RANGE>
579 arrow_proxy primitive_array_impl<T, Ext, T2>::create_proxy(
580 u8_buffer<T2>&& data_buffer,
581 size_t size,
582 bool nullable,
583 std::optional<std::string_view> name,
584 std::optional<METADATA_RANGE> metadata
585 )
586 {
587 std::optional<validity_bitmap> bitmap = nullable ? std::make_optional<validity_bitmap>(
588 nullptr,
589 0,
591 )
592 : std::nullopt;
593 return create_proxy_impl(
594 std::move(data_buffer),
595 size,
596 std::move(bitmap),
597 std::move(name),
598 std::move(metadata)
599 );
600 }
601
602 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
603 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
604 requires(std::convertible_to<std::ranges::range_value_t<R>, T2> && !mpl::is_type_instance_of_v<R, u8_buffer>)
605 arrow_proxy primitive_array_impl<T, Ext, T2>::create_proxy(
606 R&& range,
607 bool nullable,
608 std::optional<std::string_view> name,
609 std::optional<METADATA_RANGE> metadata
610 )
611 {
612 auto data_buffer = details::primitive_data_access<T, T2>::make_data_buffer(std::forward<R>(range));
613 auto distance = static_cast<size_t>(std::ranges::distance(range));
614 std::optional<validity_bitmap> bitmap = nullable ? std::make_optional<validity_bitmap>(
615 nullptr,
616 0,
618 )
619 : std::nullopt;
620 return create_proxy_impl(
621 std::move(data_buffer),
622 distance,
623 std::move(bitmap),
624 std::move(name),
625 std::move(metadata)
626 );
627 }
628
629 // range of nullable values
630 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
631 template <std::ranges::input_range NULLABLE_RANGE, input_metadata_container METADATA_RANGE>
632 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_RANGE>, nullable<T2>>
633 arrow_proxy primitive_array_impl<T, Ext, T2>::create_proxy(
634 NULLABLE_RANGE&& nullable_range,
635 std::optional<std::string_view> name,
636 std::optional<METADATA_RANGE> metadata
637 )
638 {
639 // split into values and is_non_null ranges
640 auto values = nullable_range
641 | std::views::transform(
642 [](const auto& v)
643 {
644 return v.get();
645 }
646 );
647 auto is_non_null = nullable_range
648 | std::views::transform(
649 [](const auto& v)
650 {
651 return v.has_value();
652 }
653 );
654 return self_type::create_proxy(values, is_non_null, std::move(name), std::move(metadata));
655 }
656
657 template <trivial_copyable_type T, typename Ext, trivial_copyable_type T2>
658 template <input_metadata_container METADATA_RANGE>
659 [[nodiscard]] arrow_proxy primitive_array_impl<T, Ext, T2>::create_proxy_impl(
660 u8_buffer<T2>&& data_buffer,
661 size_t size,
662 std::optional<validity_bitmap>&& bitmap,
663 std::optional<std::string_view> name,
664 std::optional<METADATA_RANGE> metadata
665 )
666 {
667 const bool bitmap_has_value = bitmap.has_value();
668 const auto null_count = bitmap_has_value ? bitmap->null_count() : 0;
669 const auto flags = bitmap_has_value
670 ? std::make_optional<std::unordered_set<sparrow::ArrowFlag>>({ArrowFlag::NULLABLE})
671 : std::nullopt;
672
673 // create arrow schema and array
674 ArrowSchema schema = make_arrow_schema(
676 std::move(name), // name
677 std::move(metadata), // metadata
678 flags, // flags
679 nullptr, // children
680 repeat_view<bool>(true, 0), // children_ownership
681 nullptr, // dictionary
682 true // dictionary ownership
683 );
684
685 buffer<uint8_t> bitmap_buffer = bitmap_has_value
686 ? std::move(*bitmap).extract_storage()
688
689 std::vector<buffer<uint8_t>> buffers(2);
690 buffers[0] = std::move(bitmap_buffer);
691 buffers[1] = std::move(data_buffer).extract_storage();
692
693 // create arrow array
694 ArrowArray arr = make_arrow_array(
695 static_cast<std::int64_t>(size), // length
696 static_cast<int64_t>(null_count),
697 0, // offset
698 std::move(buffers),
699 nullptr, // children
700 repeat_view<bool>(true, 0), // children_ownership
701 nullptr, // dictionary,
702 true // dictionary ownership
703 );
704 arrow_proxy proxy(std::move(arr), std::move(schema));
705 Ext::init(proxy);
706 return proxy;
707 }
708}
constexpr array_bitmap_base_impl & operator=(const array_bitmap_base_impl &)
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
Object that owns a piece of contiguous memory.
Definition buffer.hpp:114
xsimd::aligned_allocator< T > default_allocator
Definition buffer.hpp:126
Provides access to primitive data stored in Arrow format buffers.
constexpr inner_reference value(size_t i)
pointer_iterator< inner_const_pointer > const_value_iterator
std::conditional_t< std::is_same_v< T2, bool >, T2, const T2 & > inner_const_reference
constexpr const_value_iterator value_cbegin() const
constexpr value_iterator insert_value(const_value_iterator pos, T2 value, size_t count)
static constexpr u8_buffer< T2 > make_data_buffer(RANGE &&r)
constexpr const_value_iterator value_cend() const
constexpr void reset_proxy(arrow_proxy &proxy)
constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last)
constexpr void resize_values(size_t new_length, const T2 &value)
constexpr value_iterator erase_values(const_value_iterator pos, size_t count)
typename storage_type::default_allocator default_allocator
constexpr primitive_array_impl(primitive_array_impl &&) noexcept
typename inner_types::inner_reference inner_reference
typename inner_types::inner_value_type inner_value_type
typename base_type::const_value_iterator const_value_iterator
primitive_array_impl(arrow_proxy)
Constructs a primitive array from an existing Arrow proxy.
typename inner_types::inner_const_reference inner_const_reference
primitive_array_impl(std::initializer_list< inner_value_type > init, bool nullable=true, std::optional< std::string_view > name=std::nullopt, std::optional< METADATA_RANGE > metadata=std::nullopt)
constexpr primitive_array_impl & operator=(const primitive_array_impl &)
constexpr primitive_array_impl(const primitive_array_impl &)
typename inner_types::const_pointer const_pointer
details::primitive_data_access< T, T2 > access_class_type
mutable_array_bitmap_base< primitive_array_impl< T, Ext, T2 > > base_type
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
Concept for input containers that can provide metadata pairs.
Definition metadata.hpp:332
Concept defining valid input types for validity bitmap creation.
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
constexpr bool is_type_instance_of_v
Variable template for convenient access to is_type_instance_of.
Definition mp_utils.hpp:102
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient alias for arrays with mutable validity bitmaps.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr std::string_view data_type_to_format(data_type type)
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
std::pair< metadata_key, metadata_value > metadata_pair
Type alias for metadata key-value pairs.
Definition metadata.hpp:61
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
Extensions to the C++ standard library.
typename data_access_type::inner_const_pointer const_pointer
typename data_access_type::const_value_iterator const_value_iterator
typename data_access_type::inner_const_reference inner_const_reference
nullable< inner_const_reference, bitmap_const_reference > const_reference
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Metafunction for retrieving the data_type of a typed array.