34 template <
class Layout,
bool is_const>
40 using storage_type = std::conditional_t<is_const, const layout_type*, layout_type>;
42 conditional_t<is_const, typename layout_type::const_reference, typename layout_type::reference>;
53 return p_layout->operator[](i);
61 template <std::
integral IT>
62 class dictionary_encoded_array;
67 struct get_data_type_from_array;
69 template <std::
integral IT>
78 template <std::
integral IT>
81 [[nodiscard]]
static constexpr bool get()
94 template <std::
integral IT>
125 [[nodiscard]] std::optional<std::string_view>
name()
const;
126 [[nodiscard]] std::optional<key_value_view>
metadata()
const;
145 template <
class... Args>
179 [[nodiscard]]
static auto create_proxy(
183 std::optional<std::string_view>
name = std::nullopt,
184 std::optional<METADATA_RANGE>
metadata = std::nullopt
194 [[nodiscard]]
static keys_layout create_keys_layout(
arrow_proxy& proxy);
195 [[nodiscard]]
static values_layout create_values_layout(
arrow_proxy& proxy);
198 [[nodiscard]]
const arrow_proxy& get_arrow_proxy()
const;
201 keys_layout m_keys_layout;
202 values_layout p_values_layout;
214 template <std::
integral IT>
216 : m_proxy(
std::move(proxy))
217 , m_keys_layout(create_keys_layout(m_proxy))
218 , p_values_layout(create_values_layout(m_proxy))
223 template <std::
integral IT>
225 : m_proxy(rhs.m_proxy)
226 , m_keys_layout(create_keys_layout(m_proxy))
227 , p_values_layout(create_values_layout(m_proxy))
231 template <std::
integral IT>
236 m_proxy = rhs.m_proxy;
237 m_keys_layout = create_keys_layout(m_proxy);
238 p_values_layout = create_values_layout(m_proxy);
243 template <std::
integral IT>
245 : m_proxy(
std::move(rhs.m_proxy))
246 , m_keys_layout(create_keys_layout(m_proxy))
247 , p_values_layout(create_values_layout(m_proxy))
251 template <std::
integral IT>
257 swap(m_proxy, rhs.m_proxy);
258 m_keys_layout = create_keys_layout(m_proxy);
259 p_values_layout = create_values_layout(m_proxy);
264 template <std::
integral IT>
265 template <val
idity_bitmap_input VBI, input_metadata_container METADATA_RANGE>
266 auto dictionary_encoded_array<IT>::create_proxy(
269 VBI&& validity_input,
270 std::optional<std::string_view>
name,
271 std::optional<METADATA_RANGE>
metadata
274 const auto size = keys.size();
294 std::vector<buffer<uint8_t>> buffers(2);
295 buffers[0] = std::move(vbitmap).extract_storage();
296 buffers[1] = std::move(keys).extract_storage();
300 static_cast<std::int64_t
>(
size),
301 static_cast<int64_t
>(null_count),
309 return arrow_proxy(std::move(arr), std::move(schema));
312 template <std::
integral IT>
315 return m_proxy.
name();
318 template <std::
integral IT>
321 return m_proxy.metadata();
324 template <std::
integral IT>
327 return m_proxy.length();
330 template <std::
integral IT>
336 template <std::
integral IT>
340 const auto index = m_keys_layout[i];
342 if (index.has_value())
345 return array_element(*p_values_layout,
static_cast<std::size_t
>(index.value()));
349 return dummy_const_reference();
353 template <std::
integral IT>
359 template <std::
integral IT>
365 template <std::
integral IT>
371 template <std::
integral IT>
377 template <std::
integral IT>
383 template <std::
integral IT>
389 template <std::
integral IT>
396 template <std::
integral IT>
403 template <std::
integral IT>
404 auto dictionary_encoded_array<IT>::dummy_inner_value() const -> const inner_value_type&
410 template <std::
integral IT>
417 template <std::
integral IT>
421 return self_type{get_arrow_proxy().slice_view(start,
end)};
432 template <std::
integral IT>
433 auto dictionary_encoded_array<IT>::dummy_const_reference() const -> const_reference
435 static const const_reference instance = std::visit(
436 [](
const auto& val) -> const_reference
438 using inner_ref =
typename arrow_traits<std::decay_t<
decltype(val)>>::const_reference;
446 template <std::
integral IT>
447 typename dictionary_encoded_array<IT>::values_layout
448 dictionary_encoded_array<IT>::create_values_layout(
arrow_proxy& proxy)
450 const auto& dictionary = proxy.dictionary();
452 arrow_proxy ar_dictionary{&(dictionary->array()), &(dictionary->schema())};
456 template <std::
integral IT>
457 auto dictionary_encoded_array<IT>::create_keys_layout(
arrow_proxy& proxy) -> keys_layout
459 return keys_layout{
arrow_proxy{&proxy.array(), &proxy.schema()}};
462 template <std::
integral IT>
463 auto dictionary_encoded_array<IT>::get_arrow_proxy() ->
arrow_proxy&
468 template <std::
integral IT>
469 auto dictionary_encoded_array<IT>::get_arrow_proxy() const -> const
arrow_proxy&
477 return std::ranges::equal(lhs, rhs);
481#if defined(__cpp_lib_format)
482template <std::
integral IT>
483struct std::formatter<
sparrow::dictionary_encoded_array<IT>>
485 constexpr auto parse(std::format_parse_context& ctx)
490 auto format(
const sparrow::dictionary_encoded_array<IT>& ar, std::format_context& ctx)
const
492 std::format_to(ctx.out(),
"Dictionary [size={}] <", ar.
size());
495 std::prev(ar.
cend()),
496 [&ctx](
const auto& value)
498 std::format_to(ctx.out(),
"{}, ", value);
501 std::format_to(ctx.out(),
"{}>", ar.
back());
506template <std::
integral IT>
509 os << std::format(
"{}", value);
Dynamically typed array encapsulating an Arrow layout.
Proxy class over ArrowArray and ArrowSchema.
SPARROW_API std::optional< std::string_view > name() const
Smart pointer behaving like a copiable std::unique_ptr.
array_traits::inner_value_type inner_value_type
const_iterator end() const
u8_buffer< IT > keys_buffer_type
dictionary_encoded_array(arrow_proxy)
const_iterator begin() const
dictionary_encoded_array< IT > self_type
const_iterator cbegin() const
dictionary_encoded_array(const self_type &)
self_type & operator=(const self_type &)
functor_index_iterator< const_functor_type > const_iterator
array_traits::const_reference const_reference
const_iterator cend() const
array_traits::value_type value_type
self_type slice_view(size_type start, size_type end) const
Slices the array to keep only the elements between the given start and end.
std::optional< key_value_view > metadata() const
layout_element_functor< self_type, true > functor_type
std::optional< std::string_view > name() const
layout_element_functor< self_type, true > const_functor_type
dictionary_encoded_array(Args &&... args)
self_type slice(size_type start, size_type end) const
Slices the array to keep only the elements between the given start and end.
array_traits::const_reference reference
std::ptrdiff_t difference_type
const_reference back() const
functor_index_iterator< functor_type > iterator
self_type & operator=(self_type &&)
dictionary_encoded_array(self_type &&)
const_reference front() const
const_reference operator[](size_type i) const
constexpr size_type null_count() const noexcept
constexpr layout_element_functor(storage_type layout_)
std:: conditional_t< is_const, typename layout_type::const_reference, typename layout_type::reference > return_type
constexpr layout_element_functor()=default
std::conditional_t< is_const, const layout_type *, layout_type > storage_type
return_type operator()(std::size_t i) const
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
A view that repeats a value a given number of times.
This buffer class is use as storage buffer for all sparrow arrays.
#define SPARROW_ASSERT_TRUE(expr__)
#define SPARROW_ASSERT_FALSE(expr__)
constexpr bool excludes_copy_and_move_ctor_v
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr std::string_view data_type_format_of()
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
constexpr bool is_dictionary_encoded_array_v
Checks whether T is a dictionary_encoded_array type.
SPARROW_API void swap(ArrowArray &lhs, ArrowArray &rhs)
Swaps the contents of the two ArrowArray objects.
SPARROW_API array_traits::inner_value_type array_default_element_value(const array_wrapper &ar)
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArrays and ArrowSchema structures from the given array or typed layout.
primitive_array_impl< T > primitive_array
Array of values of whose type has fixed binary size.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
constexpr bool data_type_is_integer(data_type dt)
dynamic_bitset< std::uint8_t > validity_bitmap
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
SPARROW_API array_traits::const_reference array_element(const array_wrapper &ar, std::size_t index)
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
std::ostream & operator<<(std::ostream &os, const sparrow::nullval_t &)
mpl::rename< mpl::transform< detail::array_const_reference_t, all_base_types_t >, nullable_variant > const_reference
mpl::rename< all_base_types_t, std::variant > inner_value_type
mpl::rename< mpl::transform< detail::array_value_type_t, all_base_types_t >, nullable_variant > value_type
Provides compile-time information about Arrow data types.
static constexpr sparrow::data_type get()
static constexpr bool get()
static constexpr bool get()