Map array implementation for storing key-value pairs in a list-like structure.
Map array implementation for storing key-value pairs in a list-like structure. This class implements an Arrow-compatible array for storing maps (associative arrays) where each element is a collection of key-value pairs. The implementation uses a struct array with two fields (keys and values) as the child array, combined with an offset buffer to delimit individual maps within the flat structure.
#pragma once
{
class map_array;
template <>
struct array_inner_types<map_array> : array_inner_types_base
{
using array_type = map_array;
using inner_value_type = map_value;
using inner_reference = map_value;
using inner_const_reference = map_value;
using value_iterator = functor_index_iterator<detail::layout_value_functor<array_type, inner_value_type>>;
using const_value_iterator = functor_index_iterator<
detail::layout_value_functor<const array_type, inner_value_type>>;
using iterator_tag = std::random_access_iterator_tag;
};
template <class T>
namespace detail
{
template <>
struct get_data_type_from_array<
sparrow::map_array>
{
{
}
};
}
{
public:
using value_iterator = inner_types::value_iterator;
using const_reference = nullable<inner_const_reference, bitmap_const_reference>;
template <class... Args>
:
self_type(create_proxy(std::forward<Args>(args)...))
{
}
template <std::ranges::range SIZES_RANGE>
private:
[[nodiscard]]
SPARROW_API value_iterator value_begin();
[[nodiscard]]
SPARROW_API cloning_ptr<array_wrapper> make_entries_array()
const;
[[nodiscard]]
SPARROW_API bool get_keys_sorted()
const;
template <
input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
[[nodiscard]] static arrow_proxy create_proxy(
array&& flat_keys,
array&& flat_items,
VB&& validity_input,
std::optional<std::string_view> name = std::nullopt,
std::optional<METADATA_RANGE> metadata = std::nullopt
);
template <
std::ranges::input_range OFFSET_BUFFER_RANGE,
input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>,
offset_type>
[[nodiscard]] static arrow_proxy create_proxy(
array&& flat_keys,
array&& flat_items,
OFFSET_BUFFER_RANGE&& list_offsets_range,
VB&& validity_input,
std::optional<std::string_view> name = std::nullopt,
std::optional<METADATA_RANGE> metadata = std::nullopt
)
{
return map_array::create_proxy(
std::move(flat_keys),
std::move(flat_items),
std::move(list_offsets),
std::forward<VB>(validity_input),
std::forward<std::optional<std::string_view>>(name),
std::forward<std::optional<METADATA_RANGE>>(metadata)
);
}
template <
input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
[[nodiscard]] static arrow_proxy create_proxy(
array&& flat_keys,
array&& flat_values,
bool nullable = true,
std::optional<std::string_view> name = std::nullopt,
std::optional<METADATA_RANGE> metadata = std::nullopt
);
template <
std::ranges::input_range OFFSET_BUFFER_RANGE,
input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>,
offset_type>
[[nodiscard]] static arrow_proxy create_proxy(
array&& flat_keys,
array&& flat_items,
OFFSET_BUFFER_RANGE&& list_offsets_range,
bool nullable = true,
std::optional<std::string_view> name = std::nullopt,
std::optional<METADATA_RANGE> metadata = std::nullopt
)
{
return map_array::create_proxy(
std::move(flat_keys),
std::move(flat_items),
std::move(list_offsets),
nullable,
std::forward<std::optional<std::string_view>>(name),
std::forward<std::optional<METADATA_RANGE>>(metadata)
);
}
static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
cloning_ptr<array_wrapper> p_entries_array;
bool m_keys_sorted;
friend class detail::layout_value_functor<const
map_array, map_value>;
};
template <std::ranges::range SIZES_RANGE>
{
std::forward<SIZES_RANGE>(sizes)
);
}
template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
arrow_proxy map_array::create_proxy(
array&& flat_keys,
array&& flat_items,
offset_buffer_type&& list_offsets,
VB&& validity_input,
std::optional<std::string_view> name,
std::optional<METADATA_RANGE> metadata
)
{
const auto size = list_offsets.size() - 1;
bool keys_sorted = check_keys_sorted(flat_keys, list_offsets);
if (keys_sorted)
{
}
std::array<sparrow::array, 2> struct_children = {std::move(flat_keys), std::move(flat_items)};
struct_array entries(std::move(struct_children),
false, std::string(
"entries"));
const auto null_count = vbitmap.null_count();
const repeat_view<bool> children_ownership{true, 1};
std::string("+m"),
name,
metadata,
flags,
children_ownership,
nullptr,
true
);
std::vector<buffer<std::uint8_t>> arr_buffs = {
std::move(vbitmap).extract_storage(),
std::move(list_offsets).extract_storage()
};
static_cast<std::int64_t>(size),
static_cast<std::int64_t>(null_count),
0,
std::move(arr_buffs),
children_ownership,
nullptr,
true
);
return arrow_proxy{std::move(arr), std::move(schema)};
}
template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
arrow_proxy map_array::create_proxy(
array&& flat_keys,
array&& flat_items,
offset_buffer_type&& list_offsets,
bool nullable,
std::optional<std::string_view> name,
std::optional<METADATA_RANGE> metadata
)
{
if (nullable)
{
return map_array::create_proxy(
std::move(flat_keys),
std::move(flat_items),
std::move(list_offsets),
name,
metadata
);
}
else
{
bool keys_sorted = check_keys_sorted(flat_keys, list_offsets);
auto flags = keys_sorted
: std::nullopt;
const auto size = list_offsets.size() - 1;
std::array<sparrow::array, 2> struct_children = {std::move(flat_keys), std::move(flat_items)};
struct_array entries(std::move(struct_children),
false, std::string(
"entries"));
const repeat_view<bool> children_ownership{true, 1};
std::string_view("+m"),
name,
metadata,
flags,
children_ownership,
nullptr,
true
);
std::vector<buffer<std::uint8_t>> arr_buffs = {
buffer<std::uint8_t>{nullptr, 0},
std::move(list_offsets).extract_storage()
};
static_cast<std::int64_t>(size),
0,
0,
std::move(arr_buffs),
children_ownership,
nullptr,
true
);
return arrow_proxy{std::move(arr), std::move(schema)};
}
}
}
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
inner_types::inner_const_reference inner_const_reference
inner_types::const_value_iterator const_value_iterator
typename base_type::iterator_tag iterator_tag
nullable< inner_value_type > value_type
SPARROW_API map_array & operator=(const self_type &rhs)
Copy assignment operator.
inner_types::inner_value_type inner_value_type
typename base_type::const_bitmap_range const_bitmap_range
SPARROW_API const array_wrapper * raw_items_array() const
Gets read-only access to the values array.
static auto offset_from_sizes(SIZES_RANGE &&sizes) -> offset_buffer_type
Creates offset buffer from map sizes.
SPARROW_API const array_wrapper * raw_keys_array() const
Gets read-only access to the keys array.
inner_types::inner_reference inner_reference
const std::int32_t offset_type
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
array_bitmap_base< self_type > base_type
SPARROW_API map_array(arrow_proxy proxy)
Constructs map array from Arrow proxy.
nullable< inner_const_reference, bitmap_const_reference > const_reference
array_inner_types< self_type > inner_types
typename base_type::bitmap_const_reference bitmap_const_reference
constexpr sparrow::u8_buffer< OFFSET_TYPE > offset_buffer_from_sizes(SIZES_RANGE &&sizes)
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
array_bitmap_base_impl< D, false > array_bitmap_base
Convenient alias for arrays with immutable validity bitmaps.
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
constexpr bool is_map_array_v
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.