45 template <
class Layout,
bool is_const>
51 using storage_type = std::conditional_t<is_const, const layout_type*, layout_type>;
53 conditional_t<is_const, typename layout_type::const_reference, typename layout_type::reference>;
78 return p_layout->operator[](i);
92 template <std::
integral IT>
93 class dictionary_encoded_array;
95 namespace copy_tracker
101 return "dictionary_encoded_array";
107 template <std::
integral IT>
126 template <std::
integral IT>
134 [[nodiscard]]
static constexpr bool get() noexcept
159 template <std::
integral IT>
226 [[nodiscard]]
constexpr std::optional<std::string_view>
name()
const;
233 [[nodiscard]] std::optional<key_value_view>
metadata()
const;
247 [[nodiscard]]
constexpr bool empty()
const;
361 template <
class... Args>
407 [[nodiscard]]
static auto create_proxy(
411 std::optional<std::string_view>
name = std::nullopt,
412 std::optional<METADATA_RANGE>
metadata = std::nullopt
430 [[nodiscard]]
static auto create_proxy(
434 std::optional<std::string_view>
name = std::nullopt,
435 std::optional<METADATA_RANGE>
metadata = std::nullopt
449 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
450 [[nodiscard]]
static auto create_proxy_impl(
453 std::optional<validity_bitmap> validity = std::nullopt,
454 std::optional<std::string_view>
name = std::nullopt,
455 std::optional<METADATA_RANGE>
metadata = std::nullopt
472 std::ranges::input_range KEY_RANGE,
476 !std::same_as<KEY_RANGE, keys_buffer_type>
477 and std::same_as<IT, std::ranges::range_value_t<KEY_RANGE>>
483 std::optional<std::string_view>
name = std::nullopt,
484 std::optional<METADATA_RANGE>
metadata = std::nullopt
489 std::move(keys_buffer),
490 std::forward<array>(values),
491 std::forward<R>(bitmaps),
509 std::ranges::input_range NULLABLE_KEY_RANGE,
510 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
511 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_KEY_RANGE>, nullable<IT>>
512 static arrow_proxy create_proxy(
513 NULLABLE_KEY_RANGE&& nullable_keys,
515 std::optional<std::string_view>
name = std::nullopt,
516 std::optional<METADATA_RANGE>
metadata = std::nullopt
520 using values_layout = cloning_ptr<array_wrapper>;
541 [[nodiscard]]
static constexpr keys_layout create_keys_layout(arrow_proxy& proxy);
549 [[nodiscard]]
static values_layout create_values_layout(arrow_proxy& proxy);
556 [[nodiscard]]
constexpr arrow_proxy& get_arrow_proxy();
563 [[nodiscard]]
constexpr const arrow_proxy& get_arrow_proxy()
const;
568 keys_layout m_keys_layout;
570 values_layout p_values_layout;
590 template <std::
integral IT>
592 : m_proxy(
std::move(proxy))
593 , m_keys_layout(create_keys_layout(m_proxy))
594 , p_values_layout(create_values_layout(m_proxy))
599 template <std::
integral IT>
601 : m_proxy(rhs.m_proxy)
602 , m_keys_layout(create_keys_layout(m_proxy))
603 , p_values_layout(create_values_layout(m_proxy))
608 template <std::
integral IT>
614 m_proxy = rhs.m_proxy;
615 m_keys_layout = create_keys_layout(m_proxy);
616 p_values_layout = create_values_layout(m_proxy);
621 template <std::
integral IT>
623 : m_proxy(
std::move(rhs.m_proxy))
624 , m_keys_layout(create_keys_layout(m_proxy))
625 , p_values_layout(create_values_layout(m_proxy))
629 template <std::
integral IT>
635 swap(m_proxy, rhs.m_proxy);
636 m_keys_layout = create_keys_layout(m_proxy);
637 p_values_layout = create_values_layout(m_proxy);
642 template <std::
integral IT>
643 template <val
idity_bitmap_input VBI, input_metadata_container METADATA_RANGE>
644 auto dictionary_encoded_array<IT>::create_proxy(
647 VBI&& validity_input,
648 std::optional<std::string_view>
name,
649 std::optional<METADATA_RANGE>
metadata
652 const auto size = keys.size();
654 return create_proxy_impl(
655 std::forward<keys_buffer_type>(keys),
656 std::forward<array>(values),
657 std::make_optional<validity_bitmap>(std::move(vbitmap)),
663 template <std::
integral IT>
664 template <val
idity_bitmap_input VBI, input_metadata_container METADATA_RANGE>
665 auto dictionary_encoded_array<IT>::create_proxy(
666 keys_buffer_type&& keys,
669 std::optional<std::string_view> name,
670 std::optional<METADATA_RANGE> metadata
673 const auto size = keys.size();
674 return create_proxy_impl(
675 std::forward<keys_buffer_type>(keys),
676 std::forward<array>(values),
684 template <std::
integral IT>
685 template <input_metadata_container METADATA_RANGE>
686 [[nodiscard]]
arrow_proxy dictionary_encoded_array<IT>::create_proxy_impl(
689 std::optional<validity_bitmap> validity,
690 std::optional<std::string_view>
name,
691 std::optional<METADATA_RANGE>
metadata
694 const auto size = keys.size();
698 const std::optional<std::unordered_set<sparrow::ArrowFlag>>
699 flags = validity.has_value()
715 const size_t null_count = validity.has_value() ? validity->null_count() : 0;
717 std::vector<buffer<uint8_t>> buffers;
719 buffers.emplace_back(
720 validity.has_value() ? std::move(*validity).extract_storage()
723 buffers.emplace_back(std::move(keys).extract_storage());
726 static_cast<std::int64_t
>(
size),
727 static_cast<std::int64_t
>(null_count),
735 return arrow_proxy(std::move(arr), std::move(schema));
738 template <std::
integral IT>
739 template <std::ranges::input_range NULLABLE_KEY_RANGE, input_metadata_container METADATA_RANGE>
740 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_KEY_RANGE>,
nullable<IT>>
741 arrow_proxy dictionary_encoded_array<IT>::create_proxy(
742 NULLABLE_KEY_RANGE&& nullable_keys,
744 std::optional<std::string_view> name,
745 std::optional<METADATA_RANGE> metadata
748 auto keys = nullable_keys
749 | std::views::transform(
755 auto is_non_null = nullable_keys
756 | std::views::transform(
759 return v.has_value();
764 std::forward<array>(values),
765 std::move(is_non_null),
771 template <std::
integral IT>
774 return m_proxy.name();
777 template <std::
integral IT>
780 return m_proxy.metadata();
783 template <std::
integral IT>
786 return m_proxy.length();
789 template <std::
integral IT>
795 template <std::
integral IT>
799 const auto index = m_keys_layout[i];
801 if (index.has_value())
804 return array_element(*p_values_layout,
static_cast<std::size_t
>(index.value()));
808 return dummy_const_reference();
812 template <std::
integral IT>
818 template <std::
integral IT>
824 template <std::
integral IT>
830 template <std::
integral IT>
836 template <std::
integral IT>
842 template <std::
integral IT>
848 template <std::
integral IT>
854 template <std::
integral IT>
860 template <std::
integral IT>
866 template <std::
integral IT>
872 template <std::
integral IT>
878 template <std::
integral IT>
884 template <std::
integral IT>
891 template <std::
integral IT>
898 template <std::
integral IT>
899 auto dictionary_encoded_array<IT>::dummy_inner_value() const -> const inner_value_type&
905 template <std::
integral IT>
912 template <std::
integral IT>
916 return self_type{get_arrow_proxy().slice_view(start,
end)};
927 template <std::
integral IT>
928 auto dictionary_encoded_array<IT>::dummy_const_reference() const -> const_reference
930 static const const_reference instance = std::visit(
931 [](
const auto& val) -> const_reference
933 using inner_ref =
typename arrow_traits<std::decay_t<
decltype(val)>>::const_reference;
941 template <std::
integral IT>
942 typename dictionary_encoded_array<IT>::values_layout
943 dictionary_encoded_array<IT>::create_values_layout(
arrow_proxy& proxy)
945 const auto& dictionary = proxy.dictionary();
947 arrow_proxy ar_dictionary{&(dictionary->array()), &(dictionary->schema())};
951 template <std::
integral IT>
952 constexpr auto dictionary_encoded_array<IT>::create_keys_layout(
arrow_proxy& proxy) -> keys_layout
954 return keys_layout{
arrow_proxy{&proxy.array(), &proxy.schema()}};
957 template <std::
integral IT>
958 constexpr auto dictionary_encoded_array<IT>::get_arrow_proxy() ->
arrow_proxy&
963 template <std::
integral IT>
964 constexpr auto dictionary_encoded_array<IT>::get_arrow_proxy() const -> const
arrow_proxy&
972 return std::ranges::equal(lhs, rhs);
976#if defined(__cpp_lib_format)
977template <std::
integral IT>
978struct std::formatter<
sparrow::dictionary_encoded_array<IT>>
980 constexpr auto parse(std::format_parse_context& ctx)
985 auto format(
const sparrow::dictionary_encoded_array<IT>& ar, std::format_context& ctx)
const
987 std::format_to(ctx.out(),
"Dictionary [size={}] <", ar.
size());
990 std::prev(ar.
cend()),
991 [&ctx](
const auto& value)
993 std::format_to(ctx.out(),
"{}, ", value);
996 std::format_to(ctx.out(),
"{}>", ar.
back());
1003 template <std::
integral IT>
1006 os << std::format(
"{}", value);
Dynamically typed array encapsulating an Arrow layout.
Object that owns a piece of contiguous memory.
Forward declaration of dictionary_encoded_array.
array_traits::inner_value_type inner_value_type
u8_buffer< IT > keys_buffer_type
constexpr iterator begin()
Gets an iterator to the beginning of the array.
dictionary_encoded_array< IT > self_type
functor_index_iterator< const_functor_type > const_iterator
array_traits::const_reference const_reference
SPARROW_CONSTEXPR_CLANG const_reference operator[](size_type i) const
Access operator for getting element at index.
constexpr self_type & operator=(const self_type &other)
Copy assignment operator.
constexpr const_reverse_iterator crbegin() const
Gets a constant reverse iterator to the beginning of the array.
constexpr self_type & operator=(self_type &&other)
Move assignment operator.
SPARROW_CONSTEXPR_CLANG const_reference back() const
Gets a reference to the last element.
array_traits::value_type value_type
std::optional< key_value_view > metadata() const
Gets the metadata of the array.
constexpr self_type slice_view(size_type start, size_type end) const
Slices the array to keep only the elements between the given start and end.
layout_element_functor< self_type, true > functor_type
constexpr const_reverse_iterator rend() const
Gets a constant reverse iterator to the end of the array.
constexpr const_reverse_iterator crend() const
Gets a constant reverse iterator to the end of the array.
constexpr self_type slice(size_type start, size_type end) const
Slices the array to keep only the elements between the given start and end.
constexpr iterator end()
Gets an iterator to the end of the array.
constexpr const_iterator cend() const
Gets a constant iterator to the end of the array.
dictionary_encoded_array(arrow_proxy proxy)
Constructs a dictionary encoded array from an arrow proxy.
constexpr const_iterator cbegin() const
Gets a constant iterator to the beginning of the array.
layout_element_functor< self_type, true > const_functor_type
constexpr reverse_iterator rend()
Gets a reverse iterator to the end of the array.
dictionary_encoded_array(Args &&... args)
Constructs a dictionary encoded array with the given arguments.
constexpr const_iterator begin() const
Gets a constant iterator to the beginning of the array.
constexpr const_reverse_iterator rbegin() const
Gets a constant reverse iterator to the beginning of the array.
SPARROW_CONSTEXPR_CLANG const_reference front() const
Gets a reference to the first element.
array_traits::const_reference reference
constexpr std::optional< std::string_view > name() const
Gets the name of the array.
constexpr bool empty() const
Checks if the array is empty.
constexpr dictionary_encoded_array(self_type &&other)
Move constructor.
constexpr size_type size() const
Gets the number of elements in the array.
std::reverse_iterator< iterator > reverse_iterator
constexpr const_iterator end() const
Gets a constant iterator to the end of the array.
std::ptrdiff_t difference_type
constexpr reverse_iterator rbegin()
Gets a reverse iterator to the beginning of the array.
functor_index_iterator< functor_type > iterator
constexpr dictionary_encoded_array(const self_type &other)
Copy constructor.
std::reverse_iterator< const_iterator > const_reverse_iterator
typename storage_type::default_allocator default_allocator
Functor for accessing elements in a layout.
constexpr layout_element_functor(storage_type layout_)
Constructs a functor with the given layout.
std:: conditional_t< is_const, typename layout_type::const_reference, typename layout_type::reference > return_type
constexpr layout_element_functor()=default
Default constructor.
std::conditional_t< is_const, const layout_type *, layout_type > storage_type
constexpr return_type operator()(std::size_t i) const
Access operator for getting element at index.
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
#define SPARROW_CONSTEXPR_CLANG
#define SPARROW_ASSERT_TRUE(expr__)
#define SPARROW_ASSERT_FALSE(expr__)
SPARROW_API void increase(const std::string &key)
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
constexpr bool is_type_instance_of_v
Variable template for convenient access to is_type_instance_of.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr std::string_view data_type_to_format(data_type type)
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
constexpr bool is_dictionary_encoded_array_v
Checks whether T is a dictionary_encoded_array type.
SPARROW_API array_traits::inner_value_type array_default_element_value(const array_wrapper &ar)
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
std::ostream & operator<<(std::ostream &os, const nullval_t &)
primitive_array_impl< T, Ext, T2 > primitive_array
Array of values of whose type has fixed binary size.
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
SPARROW_API array_traits::const_reference array_element(const array_wrapper &ar, std::size_t index)
SPARROW_API void swap(ArrowArray &lhs, ArrowArray &rhs) noexcept
Swaps the contents of the two ArrowArray objects.
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
constexpr bool data_type_is_integer(data_type dt) noexcept
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
Extensions to the C++ standard library.
mpl::rename< mpl::unique< mpl::transform< detail::array_const_reference_t, all_base_types_t > >, nullable_variant > const_reference
mpl::rename< all_base_types_t, std::variant > inner_value_type
mpl::rename< mpl::transform< detail::array_value_type_t, all_base_types_t >, nullable_variant > value_type
Provides compile-time information about Arrow data types.
static constexpr sparrow::data_type get() noexcept
Gets the data type for the dictionary keys.
Metafunction for retrieving the data_type of a typed array.
static constexpr bool get() noexcept
Returns true for dictionary_encoded_array types.
static constexpr bool get() noexcept