38 template <
class Layout,
bool is_const>
44 using storage_type = std::conditional_t<is_const, const layout_type*, layout_type>;
46 conditional_t<is_const, typename layout_type::const_reference, typename layout_type::reference>;
57 return p_layout->operator[](i);
65 template <std::
integral IT>
66 class dictionary_encoded_array;
71 struct get_data_type_from_array;
73 template <std::
integral IT>
82 template <std::
integral IT>
85 [[nodiscard]]
static constexpr bool get()
98 template <std::
integral IT>
129 [[nodiscard]] std::optional<std::string_view>
name()
const;
130 [[nodiscard]] std::optional<key_value_view>
metadata()
const;
149 template <
class... Args>
183 [[nodiscard]]
static auto create_proxy(
187 std::optional<std::string_view>
name = std::nullopt,
188 std::optional<METADATA_RANGE>
metadata = std::nullopt
194 [[nodiscard]]
static auto create_proxy(
198 std::optional<std::string_view>
name = std::nullopt,
199 std::optional<METADATA_RANGE>
metadata = std::nullopt
202 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
203 [[nodiscard]]
static auto create_proxy_impl(
206 std::optional<validity_bitmap> validity = std::nullopt,
207 std::optional<std::string_view>
name = std::nullopt,
208 std::optional<METADATA_RANGE>
metadata = std::nullopt
212 std::ranges::input_range KEY_RANGE,
216 !std::same_as<KEY_RANGE, keys_buffer_type>
217 and std::same_as<IT, std::ranges::range_value_t<KEY_RANGE>>
223 std::optional<std::string_view>
name = std::nullopt,
224 std::optional<METADATA_RANGE>
metadata = std::nullopt
229 std::move(keys_buffer),
230 std::forward<array>(values),
231 std::forward<R>(bitmaps),
239 std::ranges::input_range NULLABLE_KEY_RANGE,
240 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
241 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_KEY_RANGE>, nullable<IT>>
242 static arrow_proxy create_proxy(
243 NULLABLE_KEY_RANGE&& nullable_keys,
245 std::optional<std::string_view>
name = std::nullopt,
246 std::optional<METADATA_RANGE>
metadata = std::nullopt
250 using values_layout = cloning_ptr<array_wrapper>;
255 [[nodiscard]]
static keys_layout create_keys_layout(arrow_proxy& proxy);
256 [[nodiscard]]
static values_layout create_values_layout(arrow_proxy& proxy);
258 [[nodiscard]] arrow_proxy& get_arrow_proxy();
259 [[nodiscard]]
const arrow_proxy& get_arrow_proxy()
const;
262 keys_layout m_keys_layout;
263 values_layout p_values_layout;
275 template <std::
integral IT>
277 : m_proxy(
std::move(proxy))
278 , m_keys_layout(create_keys_layout(m_proxy))
279 , p_values_layout(create_values_layout(m_proxy))
284 template <std::
integral IT>
286 : m_proxy(rhs.m_proxy)
287 , m_keys_layout(create_keys_layout(m_proxy))
288 , p_values_layout(create_values_layout(m_proxy))
292 template <std::
integral IT>
297 m_proxy = rhs.m_proxy;
298 m_keys_layout = create_keys_layout(m_proxy);
299 p_values_layout = create_values_layout(m_proxy);
304 template <std::
integral IT>
306 : m_proxy(
std::move(rhs.m_proxy))
307 , m_keys_layout(create_keys_layout(m_proxy))
308 , p_values_layout(create_values_layout(m_proxy))
312 template <std::
integral IT>
318 swap(m_proxy, rhs.m_proxy);
319 m_keys_layout = create_keys_layout(m_proxy);
320 p_values_layout = create_values_layout(m_proxy);
325 template <std::
integral IT>
326 template <val
idity_bitmap_input VBI, input_metadata_container METADATA_RANGE>
327 auto dictionary_encoded_array<IT>::create_proxy(
330 VBI&& validity_input,
331 std::optional<std::string_view>
name,
332 std::optional<METADATA_RANGE>
metadata
335 const auto size = keys.size();
337 return create_proxy_impl(
338 std::forward<keys_buffer_type>(keys),
339 std::forward<array>(values),
340 std::make_optional<validity_bitmap>(std::move(vbitmap)),
346 template <std::
integral IT>
347 template <val
idity_bitmap_input VBI, input_metadata_container METADATA_RANGE>
348 auto dictionary_encoded_array<IT>::create_proxy(
349 keys_buffer_type&& keys,
352 std::optional<std::string_view> name,
353 std::optional<METADATA_RANGE> metadata
356 const auto size = keys.size();
357 return create_proxy_impl(
358 std::forward<keys_buffer_type>(keys),
359 std::forward<array>(values),
360 nullable ? std::make_optional<validity_bitmap>(
nullptr, size) : std::nullopt,
366 template <std::
integral IT>
367 template <input_metadata_container METADATA_RANGE>
368 [[nodiscard]]
arrow_proxy dictionary_encoded_array<IT>::create_proxy_impl(
371 std::optional<validity_bitmap> validity,
372 std::optional<std::string_view>
name,
373 std::optional<METADATA_RANGE>
metadata
376 const auto size = keys.size();
380 const std::optional<std::unordered_set<sparrow::ArrowFlag>>
381 flags = validity.has_value()
397 const size_t null_count = validity.has_value() ? validity->null_count() : 0;
399 std::vector<buffer<uint8_t>> buffers(2);
400 buffers[0] = validity.has_value() ? std::move(*validity).extract_storage()
402 buffers[1] = std::move(keys).extract_storage();
405 static_cast<std::int64_t
>(
size),
406 static_cast<std::int64_t
>(null_count),
414 return arrow_proxy(std::move(arr), std::move(schema));
417 template <std::
integral IT>
418 template <std::ranges::input_range NULLABLE_KEY_RANGE, input_metadata_container METADATA_RANGE>
419 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_KEY_RANGE>,
nullable<IT>>
420 arrow_proxy dictionary_encoded_array<IT>::create_proxy(
421 NULLABLE_KEY_RANGE&& nullable_keys,
423 std::optional<std::string_view> name,
424 std::optional<METADATA_RANGE> metadata
427 auto keys = nullable_keys
428 | std::views::transform(
434 auto is_non_null = nullable_keys
435 | std::views::transform(
438 return v.has_value();
443 std::forward<array>(values),
444 std::move(is_non_null),
450 template <std::
integral IT>
453 return m_proxy.name();
456 template <std::
integral IT>
459 return m_proxy.metadata();
462 template <std::
integral IT>
465 return m_proxy.length();
468 template <std::
integral IT>
474 template <std::
integral IT>
478 const auto index = m_keys_layout[i];
480 if (index.has_value())
483 return array_element(*p_values_layout,
static_cast<std::size_t
>(index.value()));
487 return dummy_const_reference();
491 template <std::
integral IT>
497 template <std::
integral IT>
503 template <std::
integral IT>
509 template <std::
integral IT>
515 template <std::
integral IT>
521 template <std::
integral IT>
527 template <std::
integral IT>
534 template <std::
integral IT>
541 template <std::
integral IT>
542 auto dictionary_encoded_array<IT>::dummy_inner_value() const -> const inner_value_type&
548 template <std::
integral IT>
555 template <std::
integral IT>
559 return self_type{get_arrow_proxy().slice_view(start,
end)};
570 template <std::
integral IT>
571 auto dictionary_encoded_array<IT>::dummy_const_reference() const -> const_reference
573 static const const_reference instance = std::visit(
574 [](
const auto& val) -> const_reference
576 using inner_ref =
typename arrow_traits<std::decay_t<
decltype(val)>>::const_reference;
584 template <std::
integral IT>
585 typename dictionary_encoded_array<IT>::values_layout
586 dictionary_encoded_array<IT>::create_values_layout(
arrow_proxy& proxy)
588 const auto& dictionary = proxy.dictionary();
590 arrow_proxy ar_dictionary{&(dictionary->array()), &(dictionary->schema())};
594 template <std::
integral IT>
595 auto dictionary_encoded_array<IT>::create_keys_layout(
arrow_proxy& proxy) -> keys_layout
597 return keys_layout{
arrow_proxy{&proxy.array(), &proxy.schema()}};
600 template <std::
integral IT>
601 auto dictionary_encoded_array<IT>::get_arrow_proxy() ->
arrow_proxy&
606 template <std::
integral IT>
607 auto dictionary_encoded_array<IT>::get_arrow_proxy() const -> const
arrow_proxy&
615 return std::ranges::equal(lhs, rhs);
619#if defined(__cpp_lib_format)
620template <std::
integral IT>
621struct std::formatter<
sparrow::dictionary_encoded_array<IT>>
623 constexpr auto parse(std::format_parse_context& ctx)
628 auto format(
const sparrow::dictionary_encoded_array<IT>& ar, std::format_context& ctx)
const
630 std::format_to(ctx.out(),
"Dictionary [size={}] <", ar.
size());
633 std::prev(ar.
cend()),
634 [&ctx](
const auto& value)
636 std::format_to(ctx.out(),
"{}, ", value);
639 std::format_to(ctx.out(),
"{}>", ar.
back());
644template <std::
integral IT>
647 os << std::format(
"{}", value);
Dynamically typed array encapsulating an Arrow layout.
Proxy class over ArrowArray and ArrowSchema.
Object that owns a piece of contiguous memory.
array_traits::inner_value_type inner_value_type
const_iterator end() const
u8_buffer< IT > keys_buffer_type
dictionary_encoded_array(arrow_proxy)
const_iterator begin() const
dictionary_encoded_array< IT > self_type
const_iterator cbegin() const
dictionary_encoded_array(const self_type &)
self_type & operator=(const self_type &)
functor_index_iterator< const_functor_type > const_iterator
array_traits::const_reference const_reference
const_iterator cend() const
array_traits::value_type value_type
self_type slice_view(size_type start, size_type end) const
Slices the array to keep only the elements between the given start and end.
std::optional< key_value_view > metadata() const
layout_element_functor< self_type, true > functor_type
std::optional< std::string_view > name() const
layout_element_functor< self_type, true > const_functor_type
dictionary_encoded_array(Args &&... args)
self_type slice(size_type start, size_type end) const
Slices the array to keep only the elements between the given start and end.
array_traits::const_reference reference
std::ptrdiff_t difference_type
const_reference back() const
functor_index_iterator< functor_type > iterator
self_type & operator=(self_type &&)
dictionary_encoded_array(self_type &&)
const_reference front() const
const_reference operator[](size_type i) const
constexpr layout_element_functor(storage_type layout_)
std:: conditional_t< is_const, typename layout_type::const_reference, typename layout_type::reference > return_type
constexpr layout_element_functor()=default
std::conditional_t< is_const, const layout_type *, layout_type > storage_type
return_type operator()(std::size_t i) const
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
A view that repeats a value a given number of times.
This buffer class is use as storage buffer for all sparrow arrays.
#define SPARROW_ASSERT_TRUE(expr__)
#define SPARROW_ASSERT_FALSE(expr__)
constexpr bool excludes_copy_and_move_ctor_v
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr std::string_view data_type_format_of()
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
constexpr bool is_dictionary_encoded_array_v
Checks whether T is a dictionary_encoded_array type.
SPARROW_API void swap(ArrowArray &lhs, ArrowArray &rhs)
Swaps the contents of the two ArrowArray objects.
SPARROW_API array_traits::inner_value_type array_default_element_value(const array_wrapper &ar)
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArrays and ArrowSchema structures from the given array or typed layout.
primitive_array_impl< T > primitive_array
Array of values of whose type has fixed binary size.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
constexpr bool data_type_is_integer(data_type dt)
dynamic_bitset< std::uint8_t > validity_bitmap
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
SPARROW_API array_traits::const_reference array_element(const array_wrapper &ar, std::size_t index)
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
std::ostream & operator<<(std::ostream &os, const sparrow::nullval_t &)
mpl::rename< mpl::transform< detail::array_const_reference_t, all_base_types_t >, nullable_variant > const_reference
mpl::rename< all_base_types_t, std::variant > inner_value_type
mpl::rename< mpl::transform< detail::array_value_type_t, all_base_types_t >, nullable_variant > value_type
Provides compile-time information about Arrow data types.
static constexpr sparrow::data_type get()
static constexpr bool get()
static constexpr bool get()