45 template <
class Layout,
bool is_const>
51 using storage_type = std::conditional_t<is_const, const layout_type*, layout_type>;
53 conditional_t<is_const, typename layout_type::const_reference, typename layout_type::reference>;
78 return p_layout->operator[](i);
84 storage_type p_layout;
92 template <std::
integral IT>
93 class dictionary_encoded_array;
95 namespace copy_tracker
101 return "dictionary_encoded_array";
107 template <std::
integral IT>
126 template <std::
integral IT>
134 [[nodiscard]]
static constexpr bool get() noexcept
159 template <std::
integral IT>
226 [[nodiscard]]
constexpr std::optional<std::string_view>
name()
const;
233 [[nodiscard]] std::optional<key_value_view>
metadata()
const;
247 [[nodiscard]]
constexpr bool empty()
const;
361 template <
class... Args>
421 [[nodiscard]]
static auto create_proxy(
425 std::optional<std::string_view>
name = std::nullopt,
426 std::optional<METADATA_RANGE>
metadata = std::nullopt
444 [[nodiscard]]
static auto create_proxy(
448 std::optional<std::string_view>
name = std::nullopt,
449 std::optional<METADATA_RANGE>
metadata = std::nullopt
463 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
464 [[nodiscard]]
static auto create_proxy_impl(
467 std::optional<validity_bitmap> validity = std::nullopt,
468 std::optional<std::string_view>
name = std::nullopt,
469 std::optional<METADATA_RANGE>
metadata = std::nullopt
486 std::ranges::input_range KEY_RANGE,
490 !std::same_as<KEY_RANGE, keys_buffer_type>
491 and std::same_as<IT, std::ranges::range_value_t<KEY_RANGE>>
497 std::optional<std::string_view>
name = std::nullopt,
498 std::optional<METADATA_RANGE>
metadata = std::nullopt
503 std::move(keys_buffer),
504 std::forward<array>(values),
505 std::forward<R>(bitmaps),
523 std::ranges::input_range NULLABLE_KEY_RANGE,
524 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
525 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_KEY_RANGE>, nullable<IT>>
526 static arrow_proxy create_proxy(
527 NULLABLE_KEY_RANGE&& nullable_keys,
529 std::optional<std::string_view>
name = std::nullopt,
530 std::optional<METADATA_RANGE>
metadata = std::nullopt
534 using values_layout = cloning_ptr<array_wrapper>;
555 [[nodiscard]]
static constexpr keys_layout create_keys_layout(arrow_proxy& proxy);
563 [[nodiscard]]
static values_layout create_values_layout(arrow_proxy& proxy);
570 [[nodiscard]]
constexpr arrow_proxy& get_arrow_proxy();
577 [[nodiscard]]
constexpr const arrow_proxy& get_arrow_proxy()
const;
582 keys_layout m_keys_layout;
584 values_layout p_values_layout;
604 template <std::
integral IT>
606 : m_proxy(
std::move(proxy))
607 , m_keys_layout(create_keys_layout(m_proxy))
608 , p_values_layout(create_values_layout(m_proxy))
613 template <std::
integral IT>
615 : m_proxy(rhs.m_proxy)
616 , m_keys_layout(create_keys_layout(m_proxy))
617 , p_values_layout(create_values_layout(m_proxy))
622 template <std::
integral IT>
628 m_proxy = rhs.m_proxy;
629 m_keys_layout = create_keys_layout(m_proxy);
630 p_values_layout = create_values_layout(m_proxy);
635 template <std::
integral IT>
637 : m_proxy(
std::move(rhs.m_proxy))
638 , m_keys_layout(create_keys_layout(m_proxy))
639 , p_values_layout(create_values_layout(m_proxy))
643 template <std::
integral IT>
649 swap(m_proxy, rhs.m_proxy);
650 m_keys_layout = create_keys_layout(m_proxy);
651 p_values_layout = create_values_layout(m_proxy);
656 template <std::
integral IT>
657 template <val
idity_bitmap_input VBI, input_metadata_container METADATA_RANGE>
658 auto dictionary_encoded_array<IT>::create_proxy(
661 VBI&& validity_input,
662 std::optional<std::string_view>
name,
663 std::optional<METADATA_RANGE>
metadata
666 const auto size = keys.size();
668 return create_proxy_impl(
669 std::forward<keys_buffer_type>(keys),
670 std::forward<array>(values),
671 std::make_optional<validity_bitmap>(std::move(vbitmap)),
677 template <std::
integral IT>
678 template <val
idity_bitmap_input VBI, input_metadata_container METADATA_RANGE>
679 auto dictionary_encoded_array<IT>::create_proxy(
680 keys_buffer_type&& keys,
683 std::optional<std::string_view> name,
684 std::optional<METADATA_RANGE> metadata
687 const auto size = keys.size();
688 return create_proxy_impl(
689 std::forward<keys_buffer_type>(keys),
690 std::forward<array>(values),
698 template <std::
integral IT>
699 template <input_metadata_container METADATA_RANGE>
700 [[nodiscard]]
arrow_proxy dictionary_encoded_array<IT>::create_proxy_impl(
703 std::optional<validity_bitmap> validity,
704 std::optional<std::string_view>
name,
705 std::optional<METADATA_RANGE>
metadata
708 const auto size = keys.size();
712 const std::optional<std::unordered_set<sparrow::ArrowFlag>>
713 flags = validity.has_value()
729 const size_t null_count = validity.has_value() ? validity->null_count() : 0;
731 std::vector<buffer<uint8_t>> buffers;
733 buffers.emplace_back(
734 validity.has_value() ? std::move(*validity).extract_storage()
737 buffers.emplace_back(std::move(keys).extract_storage());
740 static_cast<std::int64_t
>(
size),
741 static_cast<std::int64_t
>(null_count),
749 return arrow_proxy(std::move(arr), std::move(schema));
752 template <std::
integral IT>
753 template <std::ranges::input_range NULLABLE_KEY_RANGE, input_metadata_container METADATA_RANGE>
754 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_KEY_RANGE>,
nullable<IT>>
755 arrow_proxy dictionary_encoded_array<IT>::create_proxy(
756 NULLABLE_KEY_RANGE&& nullable_keys,
758 std::optional<std::string_view> name,
759 std::optional<METADATA_RANGE> metadata
762 auto keys = nullable_keys | std::views::transform(
nullable_get);
763 auto is_non_null = nullable_keys
764 | std::views::transform(
767 return v.has_value();
772 std::forward<array>(values),
773 std::move(is_non_null),
779 template <std::
integral IT>
782 return m_proxy.name();
785 template <std::
integral IT>
788 return m_proxy.metadata();
791 template <std::
integral IT>
794 return m_proxy.length();
797 template <std::
integral IT>
803 template <std::
integral IT>
807 const auto index = m_keys_layout[i];
809 if (index.has_value())
812 return array_element(*p_values_layout,
static_cast<std::size_t
>(index.value()));
816 return dummy_const_reference();
820 template <std::
integral IT>
826 template <std::
integral IT>
832 template <std::
integral IT>
838 template <std::
integral IT>
844 template <std::
integral IT>
850 template <std::
integral IT>
856 template <std::
integral IT>
862 template <std::
integral IT>
868 template <std::
integral IT>
874 template <std::
integral IT>
880 template <std::
integral IT>
886 template <std::
integral IT>
892 template <std::
integral IT>
899 template <std::
integral IT>
906 template <std::
integral IT>
907 auto dictionary_encoded_array<IT>::dummy_inner_value() const -> const inner_value_type&
913 template <std::
integral IT>
920 template <std::
integral IT>
924 return self_type{get_arrow_proxy().slice_view(start,
end)};
927 template <std::
integral IT>
931 get_arrow_proxy().slice_inplace(start,
end);
934 template <std::
integral IT>
937 return static_cast<size_type>(get_arrow_proxy().offset());
948 template <std::
integral IT>
949 auto dictionary_encoded_array<IT>::dummy_const_reference() const -> const_reference
951 static const const_reference instance = std::visit(
952 [](
const auto& val) -> const_reference
954 using inner_ref =
typename arrow_traits<std::decay_t<
decltype(val)>>::const_reference;
962 template <std::
integral IT>
963 typename dictionary_encoded_array<IT>::values_layout
964 dictionary_encoded_array<IT>::create_values_layout(
arrow_proxy& proxy)
966 const auto& dictionary = proxy.dictionary();
968 arrow_proxy ar_dictionary{&(dictionary->array()), &(dictionary->schema())};
972 template <std::
integral IT>
973 constexpr auto dictionary_encoded_array<IT>::create_keys_layout(
arrow_proxy& proxy) -> keys_layout
975 return keys_layout{
arrow_proxy{&proxy.array(), &proxy.schema()}};
978 template <std::
integral IT>
979 constexpr auto dictionary_encoded_array<IT>::get_arrow_proxy() ->
arrow_proxy&
984 template <std::
integral IT>
985 constexpr auto dictionary_encoded_array<IT>::get_arrow_proxy() const -> const
arrow_proxy&
993 return std::ranges::equal(lhs, rhs);
997#if defined(__cpp_lib_format)
998template <std::
integral IT>
999struct std::formatter<
sparrow::dictionary_encoded_array<IT>>
1001 constexpr auto parse(std::format_parse_context& ctx)
1006 auto format(
const sparrow::dictionary_encoded_array<IT>& ar, std::format_context& ctx)
const
1008 std::format_to(ctx.out(),
"Dictionary [size={}] <", ar.
size());
1011 std::prev(ar.
cend()),
1012 [&ctx](
const auto& value)
1014 std::format_to(ctx.out(),
"{}, ", value);
1017 std::format_to(ctx.out(),
"{}>", ar.
back());
1024 template <std::
integral IT>
1027 os << std::format(
"{}", value);
Dynamically typed array encapsulating an Arrow layout.
Object that owns a piece of contiguous memory.
Forward declaration of dictionary_encoded_array.
array_traits::inner_value_type inner_value_type
u8_buffer< IT > keys_buffer_type
constexpr iterator begin()
Gets an iterator to the beginning of the array.
dictionary_encoded_array< IT > self_type
functor_index_iterator< const_functor_type > const_iterator
array_traits::const_reference const_reference
SPARROW_CONSTEXPR_CLANG const_reference operator[](size_type i) const
Access operator for getting element at index.
constexpr self_type & operator=(const self_type &other)
Copy assignment operator.
constexpr const_reverse_iterator crbegin() const
Gets a constant reverse iterator to the beginning of the array.
constexpr self_type & operator=(self_type &&other)
Move assignment operator.
SPARROW_CONSTEXPR_CLANG const_reference back() const
Gets a reference to the last element.
array_traits::value_type value_type
constexpr void slice_inplace(size_type start, size_type end)
Slices the array in place to keep only the elements between the given start and end.
std::optional< key_value_view > metadata() const
Gets the metadata of the array.
constexpr self_type slice_view(size_type start, size_type end) const
Slices the array to keep only the elements between the given start and end.
layout_element_functor< self_type, true > functor_type
constexpr size_type offset() const
constexpr const_reverse_iterator rend() const
Gets a constant reverse iterator to the end of the array.
constexpr const_reverse_iterator crend() const
Gets a constant reverse iterator to the end of the array.
constexpr self_type slice(size_type start, size_type end) const
Slices the array to keep only the elements between the given start and end.
constexpr iterator end()
Gets an iterator to the end of the array.
constexpr const_iterator cend() const
Gets a constant iterator to the end of the array.
dictionary_encoded_array(arrow_proxy proxy)
Constructs a dictionary encoded array from an arrow proxy.
constexpr const_iterator cbegin() const
Gets a constant iterator to the beginning of the array.
layout_element_functor< self_type, true > const_functor_type
constexpr reverse_iterator rend()
Gets a reverse iterator to the end of the array.
dictionary_encoded_array(Args &&... args)
Constructs a dictionary encoded array with the given arguments.
constexpr const_iterator begin() const
Gets a constant iterator to the beginning of the array.
constexpr const_reverse_iterator rbegin() const
Gets a constant reverse iterator to the beginning of the array.
SPARROW_CONSTEXPR_CLANG const_reference front() const
Gets a reference to the first element.
array_traits::const_reference reference
constexpr std::optional< std::string_view > name() const
Gets the name of the array.
constexpr bool empty() const
Checks if the array is empty.
constexpr dictionary_encoded_array(self_type &&other)
Move constructor.
constexpr size_type size() const
Gets the number of elements in the array.
std::reverse_iterator< iterator > reverse_iterator
constexpr const_iterator end() const
Gets a constant iterator to the end of the array.
std::ptrdiff_t difference_type
constexpr reverse_iterator rbegin()
Gets a reverse iterator to the beginning of the array.
functor_index_iterator< functor_type > iterator
constexpr dictionary_encoded_array(const self_type &other)
Copy constructor.
std::reverse_iterator< const_iterator > const_reverse_iterator
typename storage_type::default_allocator default_allocator
Functor for accessing elements in a layout.
constexpr layout_element_functor(storage_type layout_)
Constructs a functor with the given layout.
std:: conditional_t< is_const, typename layout_type::const_reference, typename layout_type::reference > return_type
constexpr layout_element_functor()=default
Default constructor.
std::conditional_t< is_const, const layout_type *, layout_type > storage_type
constexpr return_type operator()(std::size_t i) const
Access operator for getting element at index.
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
#define SPARROW_CONSTEXPR_CLANG
#define SPARROW_ASSERT_TRUE(expr__)
#define SPARROW_ASSERT_FALSE(expr__)
SPARROW_API void increase(const std::string &key)
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
constexpr bool is_type_instance_of_v
Variable template for convenient access to is_type_instance_of.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr std::string_view data_type_to_format(data_type type)
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
constexpr bool is_dictionary_encoded_array_v
Checks whether T is a dictionary_encoded_array type.
SPARROW_API array_traits::inner_value_type array_default_element_value(const array_wrapper &ar)
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
constexpr nullable_get_fn nullable_get
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
std::ostream & operator<<(std::ostream &os, const nullval_t &)
primitive_array_impl< T, Ext, T2 > primitive_array
Array of values of whose type has fixed binary size.
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
SPARROW_API array_traits::const_reference array_element(const array_wrapper &ar, std::size_t index)
SPARROW_API void swap(ArrowArray &lhs, ArrowArray &rhs) noexcept
Swaps the contents of the two ArrowArray objects.
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
constexpr bool data_type_is_integer(data_type dt) noexcept
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
Extensions to the C++ standard library.
mpl::rename< mpl::unique< mpl::transform< detail::array_const_reference_t, all_base_types_t > >, nullable_variant > const_reference
mpl::rename< all_base_types_t, std::variant > inner_value_type
mpl::rename< mpl::transform< detail::array_value_type_t, all_base_types_t >, nullable_variant > value_type
Provides compile-time information about Arrow data types.
static constexpr sparrow::data_type get() noexcept
Gets the data type for the dictionary keys.
Metafunction for retrieving the data_type of a typed array.
static constexpr bool get() noexcept
Returns true for dictionary_encoded_array types.
static constexpr bool get() noexcept