Array implementation for storing structured data with named fields.
Array implementation for storing structured data with named fields. The struct_array provides a columnar storage format for structured data, where each struct element consists of multiple named fields (children arrays). This is similar to database records or C structs but optimized for analytical workloads with columnar memory layout.
#pragma once
#include <ranges>
#include <string_view>
#include <version>
#if defined(__cpp_lib_format)
#endif
{
namespace detail
{
template <>
{
{
}
};
}
template <>
struct array_inner_types<
struct_array> : array_inner_types_base
{
using inner_value_type = struct_value;
using inner_reference = struct_value;
using inner_const_reference = struct_value;
using value_iterator = functor_index_iterator<detail::layout_value_functor<array_type, inner_value_type>>;
using const_value_iterator = functor_index_iterator<
detail::layout_value_functor<const array_type, inner_value_type>>;
using iterator_tag = std::random_access_iterator_tag;
};
template <class T>
{
public:
using value_iterator = typename inner_types::value_iterator;
using const_reference = nullable<inner_const_reference, bitmap_const_reference>;
template <class... Args>
{
}
private:
template <
std::ranges::input_range CHILDREN_RANGE,
input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
[[nodiscard]] static auto create_proxy(
CHILDREN_RANGE&& children,
VB&& bitmaps,
std::optional<std::string_view> name = std::nullopt,
std::optional<METADATA_RANGE> metadata = std::nullopt
) -> arrow_proxy;
template <std::ranges::input_range CHILDREN_RANGE, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
[[nodiscard]] static auto create_proxy(
CHILDREN_RANGE&& children,
bool nullable = true,
std::optional<std::string_view> name = std::nullopt,
std::optional<METADATA_RANGE> metadata = std::nullopt
) -> arrow_proxy;
template <std::ranges::input_range CHILDREN_RANGE, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
[[nodiscard]] static auto create_proxy_impl(
CHILDREN_RANGE&& children,
std::optional<validity_bitmap>&& bitmap,
std::optional<std::string_view> name = std::nullopt,
std::optional<METADATA_RANGE> metadata = std::nullopt
) -> arrow_proxy;
using children_type = std::vector<cloning_ptr<array_wrapper>>;
[[nodiscard]]
SPARROW_API value_iterator value_begin();
[[nodiscard]]
SPARROW_API children_type make_children();
children_type m_children;
};
template <std::ranges::input_range CHILDREN_RANGE, validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
auto struct_array::create_proxy(
CHILDREN_RANGE&& children,
VB&& validity_input,
std::optional<std::string_view> name,
std::optional<METADATA_RANGE> metadata
) -> arrow_proxy
{
const auto size = children.empty() ? 0 : children[0].size();
return create_proxy_impl(
std::forward<CHILDREN_RANGE>(children),
std::move(vbitmap),
std::move(name),
std::move(metadata)
);
}
template <std::ranges::input_range CHILDREN_RANGE, input_metadata_container METADATA_RANGE>
requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
auto struct_array::create_proxy(
CHILDREN_RANGE&& children,
bool nullable,
std::optional<std::string_view> name,
std::optional<METADATA_RANGE> metadata
) -> arrow_proxy
{
const size_t size = children.empty() ? 0 : children[0].size();
return create_proxy_impl(
std::forward<CHILDREN_RANGE>(children),
nullable ? std::make_optional<validity_bitmap>(nullptr, size) : std::nullopt,
std::move(name),
std::move(metadata)
);
}
template <std::ranges::input_range CHILDREN_RANGE, input_metadata_container METADATA_RANGE>
requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
auto struct_array::create_proxy_impl(
CHILDREN_RANGE&& children,
std::optional<validity_bitmap>&& bitmap,
std::optional<std::string_view> name,
std::optional<METADATA_RANGE> metadata
) -> arrow_proxy
{
const auto n_children = children.size();
const auto size = children.empty() ? 0 : children[0].size();
for (std::size_t i = 0; i < n_children; ++i)
{
auto& child = children[i];
child_arrays[i] =
new ArrowArray(std::move(flat_arr));
child_schemas[i] =
new ArrowSchema(std::move(flat_schema));
}
const bool bitmap_has_value = bitmap.has_value();
const auto null_count = bitmap_has_value ? bitmap->null_count() : 0;
const auto flags = bitmap_has_value
: std::nullopt;
std::string("+s"),
std::move(name),
std::move(metadata),
flags,
child_schemas,
repeat_view<bool>(true, n_children),
nullptr,
true
);
buffer<uint8_t> bitmap_buffer = bitmap_has_value ? std::move(*bitmap).extract_storage()
: buffer<uint8_t>{nullptr, 0};
std::vector<buffer<std::uint8_t>> arr_buffs(1);
arr_buffs[0] = std::move(bitmap_buffer);
static_cast<std::int64_t>(size),
static_cast<std::int64_t>(null_count),
0,
std::move(arr_buffs),
child_arrays,
repeat_view<bool>(true, n_children),
nullptr,
true
);
return arrow_proxy{std::move(arr), std::move(schema)};
}
}
#if defined(__cpp_lib_format)
template <>
{
constexpr auto parse(std::format_parse_context& ctx)
{
return ctx.begin();
}
-> decltype(ctx.out());
};
{
}
#endif
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
array_inner_types< self_type > inner_types
SPARROW_API const array_wrapper * raw_child(std::size_t i) const
Gets const pointer to child array at specified index.
nullable< inner_value_type > value_type
nullable< inner_const_reference, bitmap_const_reference > const_reference
struct_value inner_reference
base_type::iterator_tag iterator_tag
struct_value inner_value_type
struct_value inner_const_reference
SPARROW_API size_type children_count() const
Gets the number of child arrays (fields).
base_type::const_bitmap_range const_bitmap_range
SPARROW_API struct_array & operator=(const struct_array &rhs)
Copy assignment operator.
typename base_type::bitmap_const_reference bitmap_const_reference
typename inner_types::const_value_iterator const_value_iterator
array_bitmap_base< self_type > base_type
#define SPARROW_ASSERT_TRUE(expr__)
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
array_bitmap_base_impl< D, false > array_bitmap_base
Convenient alias for arrays with immutable validity bitmaps.
constexpr bool is_struc_array_v
Type trait to check if a type is a struct_array.
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
std::ostream & operator<<(std::ostream &os, const nullval_t &)
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.