44 template <
class Layout,
bool is_const>
50 using storage_type = std::conditional_t<is_const, const layout_type*, layout_type>;
52 conditional_t<is_const, typename layout_type::const_reference, typename layout_type::reference>;
77 return p_layout->operator[](i);
91 template <std::
integral IT>
92 class dictionary_encoded_array;
96 template <std::
integral IT>
115 template <std::
integral IT>
123 [[nodiscard]]
static constexpr bool get() noexcept
148 template <std::
integral IT>
215 [[nodiscard]]
constexpr std::optional<std::string_view>
name()
const;
222 [[nodiscard]] std::optional<key_value_view>
metadata()
const;
236 [[nodiscard]]
constexpr bool empty()
const;
350 template <
class... Args>
396 [[nodiscard]]
static auto create_proxy(
400 std::optional<std::string_view>
name = std::nullopt,
401 std::optional<METADATA_RANGE>
metadata = std::nullopt
419 [[nodiscard]]
static auto create_proxy(
423 std::optional<std::string_view>
name = std::nullopt,
424 std::optional<METADATA_RANGE>
metadata = std::nullopt
438 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
439 [[nodiscard]]
static auto create_proxy_impl(
442 std::optional<validity_bitmap> validity = std::nullopt,
443 std::optional<std::string_view>
name = std::nullopt,
444 std::optional<METADATA_RANGE>
metadata = std::nullopt
461 std::ranges::input_range KEY_RANGE,
465 !std::same_as<KEY_RANGE, keys_buffer_type>
466 and std::same_as<IT, std::ranges::range_value_t<KEY_RANGE>>
472 std::optional<std::string_view>
name = std::nullopt,
473 std::optional<METADATA_RANGE>
metadata = std::nullopt
478 std::move(keys_buffer),
479 std::forward<array>(values),
480 std::forward<R>(bitmaps),
498 std::ranges::input_range NULLABLE_KEY_RANGE,
499 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
500 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_KEY_RANGE>, nullable<IT>>
501 static arrow_proxy create_proxy(
502 NULLABLE_KEY_RANGE&& nullable_keys,
504 std::optional<std::string_view>
name = std::nullopt,
505 std::optional<METADATA_RANGE>
metadata = std::nullopt
509 using values_layout = cloning_ptr<array_wrapper>;
530 [[nodiscard]]
static constexpr keys_layout create_keys_layout(arrow_proxy& proxy);
538 [[nodiscard]]
static values_layout create_values_layout(arrow_proxy& proxy);
545 [[nodiscard]]
constexpr arrow_proxy& get_arrow_proxy();
552 [[nodiscard]]
constexpr const arrow_proxy& get_arrow_proxy()
const;
557 keys_layout m_keys_layout;
559 values_layout p_values_layout;
579 template <std::
integral IT>
581 : m_proxy(
std::move(proxy))
582 , m_keys_layout(create_keys_layout(m_proxy))
583 , p_values_layout(create_values_layout(m_proxy))
588 template <std::
integral IT>
590 : m_proxy(rhs.m_proxy)
591 , m_keys_layout(create_keys_layout(m_proxy))
592 , p_values_layout(create_values_layout(m_proxy))
596 template <std::
integral IT>
601 m_proxy = rhs.m_proxy;
602 m_keys_layout = create_keys_layout(m_proxy);
603 p_values_layout = create_values_layout(m_proxy);
608 template <std::
integral IT>
610 : m_proxy(
std::move(rhs.m_proxy))
611 , m_keys_layout(create_keys_layout(m_proxy))
612 , p_values_layout(create_values_layout(m_proxy))
616 template <std::
integral IT>
622 swap(m_proxy, rhs.m_proxy);
623 m_keys_layout = create_keys_layout(m_proxy);
624 p_values_layout = create_values_layout(m_proxy);
629 template <std::
integral IT>
630 template <val
idity_bitmap_input VBI, input_metadata_container METADATA_RANGE>
631 auto dictionary_encoded_array<IT>::create_proxy(
634 VBI&& validity_input,
635 std::optional<std::string_view>
name,
636 std::optional<METADATA_RANGE>
metadata
639 const auto size = keys.size();
641 return create_proxy_impl(
642 std::forward<keys_buffer_type>(keys),
643 std::forward<array>(values),
644 std::make_optional<validity_bitmap>(std::move(vbitmap)),
650 template <std::
integral IT>
651 template <val
idity_bitmap_input VBI, input_metadata_container METADATA_RANGE>
652 auto dictionary_encoded_array<IT>::create_proxy(
653 keys_buffer_type&& keys,
656 std::optional<std::string_view> name,
657 std::optional<METADATA_RANGE> metadata
660 const auto size = keys.size();
661 return create_proxy_impl(
662 std::forward<keys_buffer_type>(keys),
663 std::forward<array>(values),
664 nullable ? std::make_optional<validity_bitmap>(
nullptr, size) : std::nullopt,
670 template <std::
integral IT>
671 template <input_metadata_container METADATA_RANGE>
672 [[nodiscard]]
arrow_proxy dictionary_encoded_array<IT>::create_proxy_impl(
675 std::optional<validity_bitmap> validity,
676 std::optional<std::string_view>
name,
677 std::optional<METADATA_RANGE>
metadata
680 const auto size = keys.size();
684 const std::optional<std::unordered_set<sparrow::ArrowFlag>>
685 flags = validity.has_value()
701 const size_t null_count = validity.has_value() ? validity->null_count() : 0;
703 std::vector<buffer<uint8_t>> buffers(2);
704 buffers[0] = validity.has_value() ? std::move(*validity).extract_storage()
706 buffers[1] = std::move(keys).extract_storage();
709 static_cast<std::int64_t
>(
size),
710 static_cast<std::int64_t
>(null_count),
718 return arrow_proxy(std::move(arr), std::move(schema));
721 template <std::
integral IT>
722 template <std::ranges::input_range NULLABLE_KEY_RANGE, input_metadata_container METADATA_RANGE>
723 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_KEY_RANGE>,
nullable<IT>>
724 arrow_proxy dictionary_encoded_array<IT>::create_proxy(
725 NULLABLE_KEY_RANGE&& nullable_keys,
727 std::optional<std::string_view> name,
728 std::optional<METADATA_RANGE> metadata
731 auto keys = nullable_keys
732 | std::views::transform(
738 auto is_non_null = nullable_keys
739 | std::views::transform(
742 return v.has_value();
747 std::forward<array>(values),
748 std::move(is_non_null),
754 template <std::
integral IT>
757 return m_proxy.name();
760 template <std::
integral IT>
763 return m_proxy.metadata();
766 template <std::
integral IT>
769 return m_proxy.length();
772 template <std::
integral IT>
778 template <std::
integral IT>
783 const auto index = m_keys_layout[i];
785 if (index.has_value())
788 return array_element(*p_values_layout,
static_cast<std::size_t
>(index.value()));
792 return dummy_const_reference();
796 template <std::
integral IT>
802 template <std::
integral IT>
808 template <std::
integral IT>
814 template <std::
integral IT>
820 template <std::
integral IT>
826 template <std::
integral IT>
832 template <std::
integral IT>
838 template <std::
integral IT>
844 template <std::
integral IT>
850 template <std::
integral IT>
856 template <std::
integral IT>
862 template <std::
integral IT>
868 template <std::
integral IT>
875 template <std::
integral IT>
882 template <std::
integral IT>
883 auto dictionary_encoded_array<IT>::dummy_inner_value() const -> const inner_value_type&
889 template <std::
integral IT>
896 template <std::
integral IT>
900 return self_type{get_arrow_proxy().slice_view(start,
end)};
911 template <std::
integral IT>
912 auto dictionary_encoded_array<IT>::dummy_const_reference() const -> const_reference
914 static const const_reference instance = std::visit(
915 [](
const auto& val) -> const_reference
917 using inner_ref =
typename arrow_traits<std::decay_t<
decltype(val)>>::const_reference;
925 template <std::
integral IT>
926 typename dictionary_encoded_array<IT>::values_layout
927 dictionary_encoded_array<IT>::create_values_layout(
arrow_proxy& proxy)
929 const auto& dictionary = proxy.dictionary();
931 arrow_proxy ar_dictionary{&(dictionary->array()), &(dictionary->schema())};
935 template <std::
integral IT>
936 constexpr auto dictionary_encoded_array<IT>::create_keys_layout(
arrow_proxy& proxy) -> keys_layout
938 return keys_layout{
arrow_proxy{&proxy.array(), &proxy.schema()}};
941 template <std::
integral IT>
942 constexpr auto dictionary_encoded_array<IT>::get_arrow_proxy() ->
arrow_proxy&
947 template <std::
integral IT>
948 constexpr auto dictionary_encoded_array<IT>::get_arrow_proxy() const -> const
arrow_proxy&
956 return std::ranges::equal(lhs, rhs);
960#if defined(__cpp_lib_format)
961template <std::
integral IT>
962struct std::formatter<
sparrow::dictionary_encoded_array<IT>>
964 constexpr auto parse(std::format_parse_context& ctx)
969 auto format(
const sparrow::dictionary_encoded_array<IT>& ar, std::format_context& ctx)
const
971 std::format_to(ctx.out(),
"Dictionary [size={}] <", ar.
size());
974 std::prev(ar.
cend()),
975 [&ctx](
const auto& value)
977 std::format_to(ctx.out(),
"{}, ", value);
980 std::format_to(ctx.out(),
"{}>", ar.
back());
985template <std::
integral IT>
988 os << std::format(
"{}", value);
Dynamically typed array encapsulating an Arrow layout.
Object that owns a piece of contiguous memory.
Forward declaration of dictionary_encoded_array.
array_traits::inner_value_type inner_value_type
u8_buffer< IT > keys_buffer_type
constexpr iterator begin()
Gets an iterator to the beginning of the array.
dictionary_encoded_array< IT > self_type
functor_index_iterator< const_functor_type > const_iterator
SPARROW_CONSTEXPR_CLANG_17 const_reference operator[](size_type i) const
Access operator for getting element at index.
array_traits::const_reference const_reference
constexpr self_type & operator=(const self_type &other)
Copy assignment operator.
constexpr const_reverse_iterator crbegin() const
Gets a constant reverse iterator to the beginning of the array.
constexpr self_type & operator=(self_type &&other)
Move assignment operator.
array_traits::value_type value_type
std::optional< key_value_view > metadata() const
Gets the metadata of the array.
constexpr self_type slice_view(size_type start, size_type end) const
Slices the array to keep only the elements between the given start and end.
layout_element_functor< self_type, true > functor_type
SPARROW_CONSTEXPR_CLANG_17 const_reference front() const
Gets a reference to the first element.
constexpr const_reverse_iterator rend() const
Gets a constant reverse iterator to the end of the array.
constexpr const_reverse_iterator crend() const
Gets a constant reverse iterator to the end of the array.
constexpr self_type slice(size_type start, size_type end) const
Slices the array to keep only the elements between the given start and end.
constexpr iterator end()
Gets an iterator to the end of the array.
constexpr const_iterator cend() const
Gets a constant iterator to the end of the array.
dictionary_encoded_array(arrow_proxy proxy)
Constructs a dictionary encoded array from an arrow proxy.
constexpr const_iterator cbegin() const
Gets a constant iterator to the beginning of the array.
layout_element_functor< self_type, true > const_functor_type
constexpr reverse_iterator rend()
Gets a reverse iterator to the end of the array.
dictionary_encoded_array(Args &&... args)
Constructs a dictionary encoded array with the given arguments.
constexpr const_iterator begin() const
Gets a constant iterator to the beginning of the array.
constexpr const_reverse_iterator rbegin() const
Gets a constant reverse iterator to the beginning of the array.
array_traits::const_reference reference
constexpr std::optional< std::string_view > name() const
Gets the name of the array.
constexpr bool empty() const
Checks if the array is empty.
constexpr dictionary_encoded_array(self_type &&other)
Move constructor.
constexpr size_type size() const
Gets the number of elements in the array.
std::reverse_iterator< iterator > reverse_iterator
constexpr const_iterator end() const
Gets a constant iterator to the end of the array.
SPARROW_CONSTEXPR_CLANG_17 const_reference back() const
Gets a reference to the last element.
std::ptrdiff_t difference_type
constexpr reverse_iterator rbegin()
Gets a reverse iterator to the beginning of the array.
functor_index_iterator< functor_type > iterator
constexpr dictionary_encoded_array(const self_type &other)
Copy constructor.
std::reverse_iterator< const_iterator > const_reverse_iterator
Functor for accessing elements in a layout.
constexpr layout_element_functor(storage_type layout_)
Constructs a functor with the given layout.
std:: conditional_t< is_const, typename layout_type::const_reference, typename layout_type::reference > return_type
constexpr layout_element_functor()=default
Default constructor.
std::conditional_t< is_const, const layout_type *, layout_type > storage_type
constexpr return_type operator()(std::size_t i) const
Access operator for getting element at index.
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
#define SPARROW_CONSTEXPR_CLANG_17
#define SPARROW_ASSERT_TRUE(expr__)
#define SPARROW_ASSERT_FALSE(expr__)
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr std::string_view data_type_to_format(data_type type)
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
constexpr bool is_dictionary_encoded_array_v
Checks whether T is a dictionary_encoded_array type.
SPARROW_API void swap(ArrowArray &lhs, ArrowArray &rhs)
Swaps the contents of the two ArrowArray objects.
SPARROW_API array_traits::inner_value_type array_default_element_value(const array_wrapper &ar)
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
primitive_array_impl< T > primitive_array
Array of values of whose type has fixed binary size.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
SPARROW_API array_traits::const_reference array_element(const array_wrapper &ar, std::size_t index)
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
constexpr bool data_type_is_integer(data_type dt) noexcept
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
std::ostream & operator<<(std::ostream &os, const sparrow::nullval_t &)
mpl::rename< mpl::unique< mpl::transform< detail::array_const_reference_t, all_base_types_t > >, nullable_variant > const_reference
mpl::rename< all_base_types_t, std::variant > inner_value_type
mpl::rename< mpl::transform< detail::array_value_type_t, all_base_types_t >, nullable_variant > value_type
Provides compile-time information about Arrow data types.
static constexpr sparrow::data_type get() noexcept
Gets the data type for the dictionary keys.
Metafunction for retrieving the data_type of a typed array.
static constexpr bool get() noexcept
Returns true for dictionary_encoded_array types.
static constexpr bool get() noexcept