33 template <
class Layout,
bool is_const>
39 using storage_type = std::conditional_t<is_const, const layout_type*, layout_type>;
41 conditional_t<is_const, typename layout_type::const_reference, typename layout_type::reference>;
52 return p_layout->operator[](i);
60 template <std::
integral IT>
61 class dictionary_encoded_array;
66 struct get_data_type_from_array;
68 template <std::
integral IT>
77 template <std::
integral IT>
80 [[nodiscard]]
static constexpr bool get()
93 template <std::
integral IT>
124 [[nodiscard]] std::optional<std::string_view>
name()
const;
125 [[nodiscard]] std::optional<std::string_view>
metadata()
const;
144 template <
class... Args>
175 template <val
idity_bitmap_input R = val
idity_bitmap>
176 [[nodiscard]]
static auto create_proxy(
180 std::optional<std::string_view>
name = std::nullopt,
181 std::optional<std::string_view>
metadata = std::nullopt
191 [[nodiscard]]
static keys_layout create_keys_layout(
arrow_proxy& proxy);
192 [[nodiscard]]
static values_layout create_values_layout(
arrow_proxy& proxy);
195 [[nodiscard]]
const arrow_proxy& get_arrow_proxy()
const;
198 keys_layout m_keys_layout;
199 values_layout p_values_layout;
211 template <std::
integral IT>
213 : m_proxy(
std::move(proxy))
214 , m_keys_layout(create_keys_layout(m_proxy))
215 , p_values_layout(create_values_layout(m_proxy))
220 template <std::
integral IT>
222 : m_proxy(rhs.m_proxy)
223 , m_keys_layout(create_keys_layout(m_proxy))
224 , p_values_layout(create_values_layout(m_proxy))
228 template <std::
integral IT>
233 m_proxy = rhs.m_proxy;
234 m_keys_layout = create_keys_layout(m_proxy);
235 p_values_layout = create_values_layout(m_proxy);
240 template <std::
integral IT>
242 : m_proxy(
std::move(rhs.m_proxy))
243 , m_keys_layout(create_keys_layout(m_proxy))
244 , p_values_layout(create_values_layout(m_proxy))
248 template <std::
integral IT>
254 swap(m_proxy, rhs.m_proxy);
255 m_keys_layout = create_keys_layout(m_proxy);
256 p_values_layout = create_values_layout(m_proxy);
261 template <std::
integral IT>
262 template <val
idity_bitmap_input VBI>
263 auto dictionary_encoded_array<IT>::create_proxy(
266 VBI&& validity_input,
267 std::optional<std::string_view>
name,
268 std::optional<std::string_view>
metadata
271 const auto size = keys.size();
288 std::vector<buffer<uint8_t>> buffers(2);
289 buffers[0] = std::move(vbitmap).extract_storage();
290 buffers[1] = std::move(keys).extract_storage();
294 static_cast<std::int64_t
>(
size),
295 static_cast<int64_t
>(null_count),
302 return arrow_proxy(std::move(arr), std::move(schema));
305 template <std::
integral IT>
308 return m_proxy.
name();
311 template <std::
integral IT>
314 return m_proxy.metadata();
317 template <std::
integral IT>
320 return m_proxy.length();
323 template <std::
integral IT>
329 template <std::
integral IT>
333 const auto index = m_keys_layout[i];
335 if (index.has_value())
338 return array_element(*p_values_layout,
static_cast<std::size_t
>(index.value()));
342 return dummy_const_reference();
346 template <std::
integral IT>
352 template <std::
integral IT>
358 template <std::
integral IT>
364 template <std::
integral IT>
370 template <std::
integral IT>
376 template <std::
integral IT>
382 template <std::
integral IT>
389 template <std::
integral IT>
396 template <std::
integral IT>
397 auto dictionary_encoded_array<IT>::dummy_inner_value() const -> const inner_value_type&
403 template <std::
integral IT>
410 template <std::
integral IT>
414 return self_type{get_arrow_proxy().slice_view(start,
end)};
425 template <std::
integral IT>
426 auto dictionary_encoded_array<IT>::dummy_const_reference() const -> const_reference
428 static const const_reference instance = std::visit(
429 [](
const auto& val) -> const_reference
431 using inner_ref =
typename arrow_traits<std::decay_t<
decltype(val)>>::const_reference;
439 template <std::
integral IT>
440 typename dictionary_encoded_array<IT>::values_layout
441 dictionary_encoded_array<IT>::create_values_layout(
arrow_proxy& proxy)
443 const auto& dictionary = proxy.dictionary();
445 arrow_proxy ar_dictionary{&(dictionary->array()), &(dictionary->schema())};
449 template <std::
integral IT>
450 auto dictionary_encoded_array<IT>::create_keys_layout(
arrow_proxy& proxy) -> keys_layout
452 return keys_layout{
arrow_proxy{&proxy.array(), &proxy.schema()}};
455 template <std::
integral IT>
456 auto dictionary_encoded_array<IT>::get_arrow_proxy() ->
arrow_proxy&
461 template <std::
integral IT>
462 auto dictionary_encoded_array<IT>::get_arrow_proxy() const -> const
arrow_proxy&
470 return std::ranges::equal(lhs, rhs);
474#if defined(__cpp_lib_format)
475template <std::
integral IT>
476struct std::formatter<
sparrow::dictionary_encoded_array<IT>>
478 constexpr auto parse(std::format_parse_context& ctx)
483 auto format(
const sparrow::dictionary_encoded_array<IT>& ar, std::format_context& ctx)
const
485 std::format_to(ctx.out(),
"Dictionary [size={}] <", ar.
size());
488 std::prev(ar.
cend()),
489 [&ctx](
const auto& value)
491 std::format_to(ctx.out(),
"{}, ", value);
494 std::format_to(ctx.out(),
"{}>", ar.
back());
499template <std::
integral IT>
502 os << std::format(
"{}", value);
Dynamically typed array encapsulating an Arrow layout.
Proxy class over ArrowArray and ArrowSchema.
SPARROW_API std::optional< std::string_view > name() const
Smart pointer behaving like a copiable std::unique_ptr.
array_traits::inner_value_type inner_value_type
const_iterator end() const
u8_buffer< IT > keys_buffer_type
dictionary_encoded_array(arrow_proxy)
const_iterator begin() const
dictionary_encoded_array< IT > self_type
const_iterator cbegin() const
dictionary_encoded_array(const self_type &)
self_type & operator=(const self_type &)
functor_index_iterator< const_functor_type > const_iterator
array_traits::const_reference const_reference
const_iterator cend() const
std::optional< std::string_view > metadata() const
array_traits::value_type value_type
self_type slice_view(size_type start, size_type end) const
Slices the array to keep only the elements between the given start and end.
layout_element_functor< self_type, true > functor_type
std::optional< std::string_view > name() const
layout_element_functor< self_type, true > const_functor_type
dictionary_encoded_array(Args &&... args)
self_type slice(size_type start, size_type end) const
Slices the array to keep only the elements between the given start and end.
array_traits::const_reference reference
std::ptrdiff_t difference_type
const_reference back() const
functor_index_iterator< functor_type > iterator
self_type & operator=(self_type &&)
dictionary_encoded_array(self_type &&)
const_reference front() const
const_reference operator[](size_type i) const
constexpr size_type null_count() const noexcept
constexpr layout_element_functor(storage_type layout_)
std:: conditional_t< is_const, typename layout_type::const_reference, typename layout_type::reference > return_type
constexpr layout_element_functor()=default
std::conditional_t< is_const, const layout_type *, layout_type > storage_type
return_type operator()(std::size_t i) const
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
#define SPARROW_ASSERT_TRUE(expr__)
#define SPARROW_ASSERT_FALSE(expr__)
constexpr bool excludes_copy_and_move_ctor_v
constexpr std::string_view data_type_format_of()
ArrowSchema make_arrow_schema(F format, N name, M metadata, std::optional< ArrowFlag > flags, int64_t n_children, ArrowSchema **children, ArrowSchema *dictionary)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
constexpr bool is_dictionary_encoded_array_v
Checks whether T is a dictionary_encoded_array type.
SPARROW_API void swap(ArrowArray &lhs, ArrowArray &rhs)
Swaps the contents of the two ArrowArray objects.
SPARROW_API array_traits::inner_value_type array_default_element_value(const array_wrapper &ar)
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArrays and ArrowSchema structures from the given array or typed layout.
constexpr bool data_type_is_integer(data_type dt)
dynamic_bitset< std::uint8_t > validity_bitmap
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
SPARROW_API array_traits::const_reference array_element(const array_wrapper &ar, std::size_t index)
std::ostream & operator<<(std::ostream &stream, T n)
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
array_trivial_copyable< T > primitive_array
Array of values of whose type has fixed binary size.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, size_t n_children, ArrowArray **children, ArrowArray *dictionary)
Creates an ArrowArray.
mpl::rename< mpl::transform< detail::array_const_reference_t, all_base_types_t >, nullable_variant > const_reference
mpl::rename< all_base_types_t, std::variant > inner_value_type
mpl::rename< mpl::transform< detail::array_value_type_t, all_base_types_t >, nullable_variant > value_type
Provides compile-time information about Arrow data types.
static constexpr sparrow::data_type get()
static constexpr bool get()
static constexpr bool get()