sparrow 2.4.0
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
array_base.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <algorithm>
18#include <cstddef>
19#include <cstdint>
20#include <ranges>
21#include <utility>
22
31
32namespace sparrow
33{
48
70 template <class D>
72
107 template <class D>
108 class array_crtp_base : public crtp_base<D>
109 {
110 public:
111
113 using derived_type = D;
114
116
117 using size_type = std::size_t;
118 using difference_type = std::ptrdiff_t;
119
120 using bitmap_type = typename inner_types::bitmap_type;
121 using const_bitmap_type = typename inner_types::const_bitmap_type;
122 using bitmap_const_reference = bitmap_type::const_reference;
123 using bitmap_iterator = bitmap_type::iterator;
124 using const_bitmap_iterator = const_bitmap_type::const_iterator;
125 using const_bitmap_range = std::ranges::subrange<const_bitmap_iterator>;
126
127 using inner_value_type = typename inner_types::inner_value_type;
129
130 using inner_const_reference = typename inner_types::inner_const_reference;
132
133 using const_value_iterator = typename inner_types::const_value_iterator;
134 using const_value_range = std::ranges::subrange<const_value_iterator>;
135
136 using iterator_tag = typename inner_types::iterator_tag;
137
146
148 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
149
158 [[nodiscard]] constexpr std::optional<std::string_view> name() const;
159
168 [[nodiscard]] std::optional<key_value_view> metadata() const;
169
178 [[nodiscard]] constexpr bool empty() const;
179
188 [[nodiscard]] constexpr size_type size() const;
189
198 [[nodiscard]] constexpr size_type offset() const;
199
208 [[nodiscard]] constexpr std::int64_t null_count() const;
209
222 [[nodiscard]] constexpr const_reference at(size_type i) const;
223
236 [[nodiscard]] constexpr const_reference operator[](size_type i) const;
237
250 [[nodiscard]] constexpr const_reference front() const;
251
264 [[nodiscard]] constexpr const_reference back() const;
265
274 [[nodiscard]] constexpr const_iterator begin() const;
275
284 [[nodiscard]] constexpr const_iterator end() const;
285
294 [[nodiscard]] constexpr const_iterator cbegin() const;
295
304 [[nodiscard]] constexpr const_iterator cend() const;
305
314 [[nodiscard]] constexpr const_reverse_iterator rbegin() const;
315
324 [[nodiscard]] constexpr const_reverse_iterator rend() const;
325
334 [[nodiscard]] constexpr const_reverse_iterator crbegin() const;
335
344 [[nodiscard]] constexpr const_reverse_iterator crend() const;
345
355 [[nodiscard]] constexpr const_bitmap_range bitmap() const;
356
366 [[nodiscard]] constexpr const_value_range values() const;
367
389 [[nodiscard]] constexpr D slice(size_type start, size_type end) const;
390
411 [[nodiscard]] constexpr D slice_view(size_type start, size_type end) const;
412
426 constexpr void slice_inplace(size_type start, size_type end);
427
428 protected:
429
440
441 constexpr array_crtp_base(const array_crtp_base&) = default;
442 constexpr array_crtp_base& operator=(const array_crtp_base&) = default;
443
444 constexpr array_crtp_base(array_crtp_base&&) noexcept = default;
445 constexpr array_crtp_base& operator=(array_crtp_base&&) noexcept = default;
446
454 [[nodiscard]] constexpr arrow_proxy& get_arrow_proxy() noexcept;
455
463 [[nodiscard]] constexpr const arrow_proxy& get_arrow_proxy() const noexcept;
464
478
487 constexpr const_bitmap_iterator bitmap_begin() const;
488
496 constexpr const_bitmap_iterator bitmap_end() const;
497
506 constexpr const_bitmap_iterator bitmap_cbegin() const;
507
516 constexpr const_bitmap_iterator bitmap_cend() const;
517
518 private:
519
520 arrow_proxy m_proxy;
521
522 // friend classes
523 friend class layout_iterator<iterator_types>;
524 friend class detail::array_access;
525#if defined(__cpp_lib_format)
526 friend struct std::formatter<D>;
527#endif
528 };
529
530 template <class D>
531 constexpr bool operator==(const array_crtp_base<D>& lhs, const array_crtp_base<D>& rhs);
532
533 /**********************************
534 * array_crtp_base implementation *
535 **********************************/
536
537 template <class D>
538 constexpr std::optional<std::string_view> array_crtp_base<D>::name() const
539 {
540 return get_arrow_proxy().name();
541 }
542
543 template <class D>
544 std::optional<key_value_view> array_crtp_base<D>::metadata() const
545 {
546 return get_arrow_proxy().metadata();
547 }
548
549 template <class D>
550 constexpr bool array_crtp_base<D>::empty() const
551 {
552 return size() == size_type(0);
553 }
554
555 template <class D>
556 constexpr auto array_crtp_base<D>::size() const -> size_type
557 {
558 return static_cast<size_type>(get_arrow_proxy().length());
559 }
560
561 template <class D>
562 constexpr auto array_crtp_base<D>::offset() const -> size_type
563 {
564 return static_cast<size_type>(get_arrow_proxy().offset());
565 }
566
567 template <class D>
568 constexpr std::int64_t array_crtp_base<D>::null_count() const
569 {
570 return get_arrow_proxy().null_count();
571 }
572
573 template <class D>
575 {
576 if (i >= size())
577 {
578 const std::string error_message = "Index " + std::to_string(i)
579 + " is greater or equal to size of array ("
580 + std::to_string(size()) + ")";
581 throw std::out_of_range(error_message);
582 }
583 return (*this)[i];
584 }
585
586 template <class D>
588 {
589 auto& derived_cast = this->derived_cast();
591 return const_reference(inner_const_reference(derived_cast.value(i)), derived_cast.has_value(i));
592 }
593
594 template <class D>
596 {
598 return (*this)[size_type(0)];
599 }
600
601 template <class D>
603 {
605 return (*this)[size() - 1];
606 }
607
608 template <class D>
610 {
611 return cbegin();
612 }
613
614 template <class D>
615 constexpr auto array_crtp_base<D>::end() const -> const_iterator
616 {
617 return cend();
618 }
619
620 template <class D>
622 {
623 return const_iterator(this->derived_cast().value_cbegin(), bitmap_begin());
624 }
625
626 template <class D>
627 constexpr auto array_crtp_base<D>::cend() const -> const_iterator
628 {
629 return const_iterator(this->derived_cast().value_cend(), bitmap_end());
630 }
631
632 template <class D>
634 {
635 return crbegin();
636 }
637
638 template <class D>
640 {
641 return crend();
642 }
643
644 template <class D>
646 {
648 }
649
650 template <class D>
652 {
654 }
655
656 template <class D>
658 {
660 }
661
662 template <class D>
664 {
665 return const_value_range(this->derived_cast().value_cbegin(), this->derived_cast().value_cend());
666 }
667
668 template <class D>
670 : m_proxy(std::move(proxy))
671 {
672 }
673
674 template <class D>
676 {
677 return m_proxy;
678 }
679
680 template <class D>
681 constexpr auto array_crtp_base<D>::get_arrow_proxy() const noexcept -> const arrow_proxy&
682 {
683 return m_proxy;
684 }
685
686 template <class D>
688 {
690 return *sparrow::next(bitmap_begin(), i);
691 }
692
693 template <class D>
695 {
696 return this->derived_cast().get_bitmap().cbegin();
697 }
698
699 template <class D>
701 {
702 return sparrow::next(bitmap_begin(), size());
703 }
704
705 template <class D>
707 {
708 return bitmap_begin();
709 }
710
711 template <class D>
713 {
714 return bitmap_end();
715 }
716
717 template <class D>
719 {
720 SPARROW_ASSERT_TRUE(start <= end);
721 return D{get_arrow_proxy().slice(start, end)};
722 }
723
724 template <class D>
726 {
727 SPARROW_ASSERT_TRUE(start <= end);
728 return D{get_arrow_proxy().slice_view(start, end)};
729 }
730
731 template <class D>
733 {
734 get_arrow_proxy().slice_inplace(start, end);
735 }
736
737 /*
738 * @brief Equality comparison operator for arrays.
739 *
740 * Compares two arrays element-wise, including both values and validity flags.
741 *
742 * @tparam D Array type
743 * @param lhs First array to compare
744 * @param rhs Second array to compare
745 * @return true if arrays are element-wise equal, false otherwise
746 *
747 * @post Returns true iff arrays have same size and all elements compare equal
748 * @post Comparison includes both values and validity states
749 */
750 template <class D>
751 constexpr bool operator==(const array_crtp_base<D>& lhs, const array_crtp_base<D>& rhs)
752 {
753 return std::ranges::equal(lhs, rhs);
754 }
755}
756
757#if defined(__cpp_lib_format)
758
759template <typename D>
760 requires std::derived_from<D, sparrow::array_crtp_base<D>>
761struct std::formatter<D>
762{
763 constexpr auto parse(std::format_parse_context& ctx)
764 {
765 return ctx.begin(); // Simple implementation
766 }
767
768 auto format(const D& ar, std::format_context& ctx) const
769 {
770 const auto& proxy = ar.get_arrow_proxy();
771 std::string type;
772 if (proxy.dictionary())
773 {
774 std::format_to(ctx.out(), "Dictionary<{}>", proxy.dictionary()->data_type());
775 }
776 else
777 {
778 std::format_to(ctx.out(), "{}", proxy.data_type());
779 }
780 std::format_to(ctx.out(), " [name={} | size={}] <", ar.name().value_or("nullptr"), proxy.length());
781
782 std::for_each(
783 ar.cbegin(),
784 std::prev(ar.cend()),
785 [&ctx](const auto& value)
786 {
787 std::format_to(ctx.out(), "{}, ", value);
788 }
789 );
790 return std::format_to(ctx.out(), "{}>", ar.back());
791 }
792};
793
794namespace sparrow
795{
796 template <typename D>
797 requires std::derived_from<D, array_crtp_base<D>>
798 std::ostream& operator<<(std::ostream& os, const D& value)
799 {
800 os << std::format("{}", value);
801 return os;
802 }
803}
804
805#endif
typename inner_types::const_bitmap_type const_bitmap_type
constexpr const_bitmap_iterator bitmap_cbegin() const
Gets const bitmap iterator to the beginning.
constexpr const_value_range values() const
Gets the raw values as a range.
array_crtp_base(arrow_proxy)
Protected constructor from Arrow proxy.
constexpr const_reverse_iterator rbegin() const
Gets reverse iterator to the beginning of reversed array.
constexpr D slice_view(size_type start, size_type end) const
Creates a sliced view of the array.
constexpr const_reference back() const
Gets reference to the last element.
typename inner_types::const_value_iterator const_value_iterator
bitmap_type::const_reference bitmap_const_reference
std::ranges::subrange< const_value_iterator > const_value_range
bitmap_type::iterator bitmap_iterator
constexpr const_reference front() const
Gets reference to the first element.
constexpr const_bitmap_iterator bitmap_end() const
Gets bitmap iterator to the end.
constexpr const_bitmap_iterator bitmap_cend() const
Gets const bitmap iterator to the end.
constexpr bool empty() const
Checks if the array is empty.
nullable< inner_const_reference, bitmap_const_reference > const_reference
constexpr const_iterator cbegin() const
Gets const iterator to the beginning of the array.
constexpr const_reference at(size_type i) const
Gets element at specified position with bounds checking.
typename inner_types::iterator_tag iterator_tag
constexpr bitmap_const_reference has_value(size_type i) const
Checks if element at index i has a valid value.
constexpr array_crtp_base(const array_crtp_base &)=default
constexpr size_type offset() const
Gets the starting offset within the buffers.
std::ranges::subrange< const_bitmap_iterator > const_bitmap_range
friend class detail::array_access
typename inner_types::bitmap_type bitmap_type
const_bitmap_type::const_iterator const_bitmap_iterator
constexpr const_reverse_iterator crbegin() const
Gets const reverse iterator to the beginning of reversed array.
typename inner_types::inner_const_reference inner_const_reference
constexpr const_reverse_iterator crend() const
Gets const reverse iterator to the end of reversed array.
nullable< inner_value_type > value_type
std::ptrdiff_t difference_type
constexpr const_reference operator[](size_type i) const
Gets element at specified position without bounds checking.
layout_iterator< iterator_types > const_iterator
constexpr arrow_proxy & get_arrow_proxy() noexcept
Gets mutable reference to the Arrow proxy.
array_crtp_base< D > self_type
std::reverse_iterator< const_iterator > const_reverse_iterator
constexpr const_iterator cend() const
Gets const iterator to the end of the array.
constexpr array_crtp_base & operator=(const array_crtp_base &)=default
constexpr const_bitmap_iterator bitmap_begin() const
Gets bitmap iterator to the beginning.
constexpr void slice_inplace(size_type start, size_type end)
Slices the array in place.
constexpr std::int64_t null_count() const
Gets the count of null elements in the array.
array_inner_types< derived_type > inner_types
constexpr const_reverse_iterator rend() const
Gets reverse iterator to the end of reversed array.
constexpr std::optional< std::string_view > name() const
Gets the optional name of the array.
constexpr const_iterator end() const
Gets iterator to the end of the array.
constexpr D slice(size_type start, size_type end) const
Creates a sliced copy of the array.
constexpr array_crtp_base(array_crtp_base &&) noexcept=default
constexpr const_iterator begin() const
Gets iterator to the beginning of the array.
constexpr const_bitmap_range bitmap() const
Gets the validity bitmap as a range.
std::optional< key_value_view > metadata() const
Gets the metadata associated with the array.
typename inner_types::inner_value_type inner_value_type
constexpr size_type size() const
Gets the number of elements in the array.
Base class for CRTP base classes.
Definition crtp_base.hpp:29
constexpr derived_type & derived_cast()
Definition crtp_base.hpp:39
A non-owning view to a dynamic size sequence of bits stored in external memory.
Layout iterator class.
constexpr bitmap_iterator bitmap_end()
base_type::const_reference const_reference
bitmap_type::const_reference bitmap_const_reference
base_type::const_iterator const_iterator
constexpr bitmap_reference has_value(size_type i)
base_type::const_bitmap_range const_bitmap_range
const_bitmap_type::const_iterator const_bitmap_iterator
constexpr iterator end()
Returns a iterator to the element following the last element of the array.
constexpr bitmap_iterator bitmap_begin()
Concept for iterator types.
#define SPARROW_ASSERT_TRUE(expr__)
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
constexpr InputIt next(InputIt it, Distance n)
Definition iterator.hpp:605
std::ostream & operator<<(std::ostream &os, const nullval_t &)
Extensions to the C++ standard library.
self_type::const_reference reference
self_type::const_bitmap_iterator bitmap_iterator
self_type::const_value_iterator value_iterator
Base class for array_inner_types specializations.
non_owning_dynamic_bitset< std::uint8_t > bitmap_type
dynamic_bitset_view< const std::uint8_t > const_bitmap_type
Traits class that must be specialized by array implementations.