sparrow 1.2.0
Loading...
Searching...
No Matches
struct_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <ranges>
18#include <string_view>
19#include <version>
20
22
23#if defined(__cpp_lib_format)
25#endif
26
27#include "sparrow/array_api.hpp"
38
39namespace sparrow
40{
41 class struct_array;
42
43 namespace detail
44 {
45 template <>
47 {
48 [[nodiscard]] static constexpr sparrow::data_type get()
49 {
51 }
52 };
53 }
54
55 template <>
67
73 template <class T>
74 constexpr bool is_struc_array_v = std::same_as<T, struct_array>;
75
124 class struct_array final : public array_bitmap_base<struct_array>
125 {
126 public:
127
131 using value_iterator = typename inner_types::value_iterator;
132 using const_value_iterator = typename inner_types::const_value_iterator;
134
137
139
143
147
161
177 template <class... Args>
179 explicit struct_array(Args&&... args)
180 : struct_array(create_proxy(std::forward<Args>(args)...))
181 {
182 }
183
195
208
211
220 [[nodiscard]] SPARROW_API size_type children_count() const;
221
232 [[nodiscard]] SPARROW_API const array_wrapper* raw_child(std::size_t i) const;
233
244 [[nodiscard]] SPARROW_API array_wrapper* raw_child(std::size_t i);
245
251 [[nodiscard]] auto names() const
252 {
253 return get_arrow_proxy().children()
254 | std::views::transform(
255 [](const auto& child)
256 {
257 return child.name();
258 }
259 );
260 }
261
269 template <layout_or_array A>
270 void add_child(A&& child);
271
286 template <std::ranges::input_range R>
288 void add_children(R&& children);
289
300 template <layout_or_array A>
301 void set_child(A&& child, size_t index);
302
312 SPARROW_API void pop_children(size_t n);
313
314 protected:
315
337 template <
338 std::ranges::input_range CHILDREN_RANGE,
340 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
341 requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
342 [[nodiscard]] static auto create_proxy(
343 CHILDREN_RANGE&& children,
344 VB&& bitmaps,
345 std::optional<std::string_view> name = std::nullopt,
346 std::optional<METADATA_RANGE> metadata = std::nullopt
347 ) -> arrow_proxy;
348
366 template <std::ranges::input_range CHILDREN_RANGE, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
367 requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
368 [[nodiscard]] static auto create_proxy(
369 CHILDREN_RANGE&& children,
370 bool nullable = true,
371 std::optional<std::string_view> name = std::nullopt,
372 std::optional<METADATA_RANGE> metadata = std::nullopt
373 ) -> arrow_proxy;
374
394 template <std::ranges::input_range CHILDREN_RANGE, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
395 requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
396 [[nodiscard]] static auto create_proxy_impl(
397 CHILDREN_RANGE&& children,
398 std::optional<validity_bitmap>&& bitmap,
399 std::optional<std::string_view> name = std::nullopt,
400 std::optional<METADATA_RANGE> metadata = std::nullopt
401 ) -> arrow_proxy;
402
403 using children_type = std::vector<cloning_ptr<array_wrapper>>;
404
413
422
431
440
451
462
472
473
474 // data members
476
477 // friend classes
478 friend class array_crtp_base<self_type>;
479
480 // needs access to this->value(i)
483 };
484
485 template <std::ranges::input_range CHILDREN_RANGE, validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
486 requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
488 CHILDREN_RANGE&& children,
489 VB&& validity_input,
490 std::optional<std::string_view> name,
491 std::optional<METADATA_RANGE> metadata
492 ) -> arrow_proxy
493 {
494 const auto size = children.empty() ? 0 : children[0].size();
495 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
496 return create_proxy_impl(
497 std::forward<CHILDREN_RANGE>(children),
498 std::move(vbitmap),
499 std::move(name),
500 std::move(metadata)
501 );
502 }
503
504 template <std::ranges::input_range CHILDREN_RANGE, input_metadata_container METADATA_RANGE>
505 requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
507 CHILDREN_RANGE&& children,
508 bool nullable,
509 std::optional<std::string_view> name,
510 std::optional<METADATA_RANGE> metadata
511 ) -> arrow_proxy
512 {
513 const size_t size = children.empty() ? 0 : children[0].size();
514 return create_proxy_impl(
515 std::forward<CHILDREN_RANGE>(children),
516 nullable ? std::make_optional<validity_bitmap>(nullptr, size) : std::nullopt,
517 std::move(name),
518 std::move(metadata)
519 );
520 }
521
522 template <std::ranges::input_range CHILDREN_RANGE, input_metadata_container METADATA_RANGE>
523 requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
525 CHILDREN_RANGE&& children,
526 std::optional<validity_bitmap>&& bitmap,
527 std::optional<std::string_view> name,
528 std::optional<METADATA_RANGE> metadata
529 ) -> arrow_proxy
530 {
531 const auto n_children = children.size();
532 ArrowSchema** child_schemas = new ArrowSchema*[n_children];
533 ArrowArray** child_arrays = new ArrowArray*[n_children];
534
535 const auto size = children.empty() ? 0 : children[0].size();
536
537 for (std::size_t i = 0; i < n_children; ++i)
538 {
539 auto& child = children[i];
540 SPARROW_ASSERT_TRUE(child.size() == size);
541 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(child));
542 child_arrays[i] = new ArrowArray(std::move(flat_arr));
543 child_schemas[i] = new ArrowSchema(std::move(flat_schema));
544 }
545
546 const bool bitmap_has_value = bitmap.has_value();
547 const auto null_count = bitmap_has_value ? bitmap->null_count() : 0;
548 const auto flags = bitmap_has_value
549 ? std::make_optional<std::unordered_set<sparrow::ArrowFlag>>({ArrowFlag::NULLABLE})
550 : std::nullopt;
551
553 std::string("+s"), // format
554 std::move(name), // name
555 std::move(metadata), // metadata
556 flags, // flags,
557 child_schemas, // children
558 repeat_view<bool>(true, n_children), // children_ownership
559 nullptr, // dictionary
560 true // dictionary ownership
561 );
562
563 buffer<uint8_t> bitmap_buffer = bitmap_has_value ? std::move(*bitmap).extract_storage()
564 : buffer<uint8_t>{nullptr, 0};
565
566 std::vector<buffer<std::uint8_t>> arr_buffs(1);
567 arr_buffs[0] = std::move(bitmap_buffer);
568
570 static_cast<std::int64_t>(size), // length
571 static_cast<std::int64_t>(null_count), // null_count
572 0, // offset
573 std::move(arr_buffs),
574 child_arrays, // children
575 repeat_view<bool>(true, n_children), // children_ownership
576 nullptr, // dictionary
577 true // dictionary ownership
578 );
579 return arrow_proxy{std::move(arr), std::move(schema)};
580 }
581
582 template <layout_or_array A>
584 {
585 SPARROW_ASSERT_TRUE(child.size() == size());
586 auto [array, schema] = extract_arrow_structures(std::forward<A>(child));
587 get_arrow_proxy().add_child(std::move(array), std::move(schema));
588 m_children.emplace_back(array_factory(get_arrow_proxy().children().back().view()));
589 }
590
591 template <std::ranges::input_range R>
593 void struct_array::add_children(R&& children)
594 {
595 for (const auto& child : children)
596 {
597 SPARROW_ASSERT_TRUE(child.size() == size());
598 }
599 m_children.reserve(m_children.size() + children.size());
600 for (auto&& child : children)
601 {
602 add_child(std::forward<decltype(child)>(child));
603 }
604 }
605
606 template <layout_or_array A>
607 void struct_array::set_child(A&& child, size_t index)
608 {
609 SPARROW_ASSERT_TRUE(child.size() == size());
610 auto [array, schema] = extract_arrow_structures(std::forward<A>(child));
611 get_arrow_proxy().set_child(index, std::move(array), std::move(schema));
612 m_children[index] = array_factory(get_arrow_proxy().children()[index].view());
613 }
614}
615
616#if defined(__cpp_lib_format)
617
618
619template <>
620struct std::formatter<sparrow::struct_array>
621{
622 constexpr auto parse(std::format_parse_context& ctx)
623 {
624 return ctx.begin();
625 }
626
627 SPARROW_API auto format(const sparrow::struct_array& struct_array, std::format_context& ctx) const
628 -> decltype(ctx.out());
629};
630
631namespace sparrow
632{
633 SPARROW_API std::ostream& operator<<(std::ostream& os, const struct_array& value);
634}
635
636#endif
void struct_array()
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
Base class for array type erasure.
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:43
Object that owns a piece of contiguous memory.
Definition buffer.hpp:113
A view that repeats a value a given number of times.
SPARROW_API inner_reference value(size_type i)
Gets mutable reference to struct at specified index.
array_inner_types< self_type > inner_types
SPARROW_API children_type make_children()
Creates the children array wrappers.
void add_child(A &&child)
Adds a child array to the struct.
static auto create_proxy_impl(CHILDREN_RANGE &&children, std::optional< validity_bitmap > &&bitmap, std::optional< std::string_view > name=std::nullopt, std::optional< METADATA_RANGE > metadata=std::nullopt) -> arrow_proxy
Implementation helper for creating Arrow proxy from components.
SPARROW_API const_value_iterator value_cbegin() const
Gets const iterator to beginning of value range.
SPARROW_API struct_array(const struct_array &rhs)
Copy constructor.
SPARROW_API struct_array(arrow_proxy proxy)
Constructs struct array from Arrow proxy.
SPARROW_API value_iterator value_end()
Gets iterator to end of value range.
SPARROW_API void pop_children(size_t n)
Removes the last n children from the struct.
auto names() const
Gets the names of all child arrays.
void set_child(A &&child, size_t index)
Sets a child array at the specified index.
SPARROW_API inner_const_reference value(size_type i) const
Gets const reference to struct at specified index.
children_type m_children
Collection of child arrays (fields)
SPARROW_API array_wrapper * raw_child(std::size_t i)
Gets mutable pointer to child array at specified index.
void add_children(R &&children)
Adds multiple children to the struct array.
SPARROW_API const array_wrapper * raw_child(std::size_t i) const
Gets const pointer to child array at specified index.
nullable< inner_value_type > value_type
nullable< inner_const_reference, bitmap_const_reference > const_reference
SPARROW_API value_iterator value_begin()
Gets iterator to beginning of value range.
struct_value inner_reference
struct_array(Args &&... args)
Generic constructor for creating struct array from various inputs.
typename base_type::size_type size_type
typename inner_types::value_iterator value_iterator
base_type::iterator_tag iterator_tag
struct_value inner_value_type
std::vector< cloning_ptr< array_wrapper > > children_type
typename base_type::bitmap_type bitmap_type
struct_array(struct_array &&)=default
struct_array & operator=(struct_array &&)=default
static auto create_proxy(CHILDREN_RANGE &&children, VB &&bitmaps, std::optional< std::string_view > name=std::nullopt, std::optional< METADATA_RANGE > metadata=std::nullopt) -> arrow_proxy
Creates Arrow proxy from children arrays with explicit validity bitmap.
struct_value inner_const_reference
SPARROW_API size_type children_count() const
Gets the number of child arrays (fields).
base_type::const_bitmap_range const_bitmap_range
SPARROW_API const_value_iterator value_cend() const
Gets const iterator to end of value range.
SPARROW_API struct_array & operator=(const struct_array &rhs)
Copy assignment operator.
typename base_type::bitmap_const_reference bitmap_const_reference
typename inner_types::const_value_iterator const_value_iterator
array_bitmap_base< self_type > base_type
Concept for input containers that can provide metadata pairs.
Definition metadata.hpp:304
Concept defining valid input types for validity bitmap creation.
#define SPARROW_API
Definition config.hpp:38
#define SPARROW_ASSERT_TRUE(expr__)
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
array_bitmap_base_impl< D, false > array_bitmap_base
Convenient alias for arrays with immutable validity bitmaps.
constexpr bool is_struc_array_v
Type trait to check if a type is a struct_array.
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:110
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
std::ostream & operator<<(std::ostream &os, const nullval_t &)
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
std::random_access_iterator_tag iterator_tag
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Metafunction for retrieving the data_type of a typed array.