sparrow 0.3.0
Loading...
Searching...
No Matches
struct_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <string_view>
18#include <version>
19#if defined(__cpp_lib_format)
21#endif
22#include <ranges>
23
24#include "sparrow/array_api.hpp"
36
37namespace sparrow
38{
39 class struct_array;
40
41 template <>
53
57 template <class T>
58 constexpr bool is_struc_array_v = std::same_as<T, struct_array>;
59
60 class struct_array final : public array_bitmap_base<struct_array>
61 {
62 public:
63
67 using value_iterator = typename inner_types::value_iterator;
68 using const_value_iterator = typename inner_types::const_value_iterator;
70
73
75
79
83
85
86 template <class... Args>
88 explicit struct_array(Args&&... args)
89 : struct_array(create_proxy(std::forward<Args>(args)...))
90 {
91 }
92
95
98
99 [[nodiscard]] SPARROW_API size_type children_count() const;
100
101 [[nodiscard]] SPARROW_API const array_wrapper* raw_child(std::size_t i) const;
102 [[nodiscard]] SPARROW_API array_wrapper* raw_child(std::size_t i);
103
104 private:
105
106 template <validity_bitmap_input VB = validity_bitmap>
107 [[nodiscard]] static auto create_proxy(
108 std::vector<array>&& children,
109 VB&& bitmaps = validity_bitmap{},
110 std::optional<std::string_view> name = std::nullopt,
111 std::optional<std::string_view> metadata = std::nullopt
112 ) -> arrow_proxy;
113
114 using children_type = std::vector<cloning_ptr<array_wrapper>>;
115
116 [[nodiscard]] SPARROW_API value_iterator value_begin();
117 [[nodiscard]] SPARROW_API value_iterator value_end();
118 [[nodiscard]] SPARROW_API const_value_iterator value_cbegin() const;
119 [[nodiscard]] SPARROW_API const_value_iterator value_cend() const;
120 [[nodiscard]] SPARROW_API inner_reference value(size_type i);
121 [[nodiscard]] SPARROW_API inner_const_reference value(size_type i) const;
122
123 [[nodiscard]] SPARROW_API children_type make_children();
124
125 // data members
126 children_type m_children;
127
128 // friend classes
129 friend class array_crtp_base<self_type>;
130
131 // needs access to this->value(i)
134 };
135
136 template <validity_bitmap_input VB>
137 auto struct_array::create_proxy(
138 std::vector<array>&& children,
139 VB&& validity_input,
140 std::optional<std::string_view> name,
141 std::optional<std::string_view> metadata
142 ) -> arrow_proxy
143 {
144 const auto n_children = children.size();
145 ArrowSchema** child_schemas = new ArrowSchema*[n_children];
146 ArrowArray** child_arrays = new ArrowArray*[n_children];
147
148 const auto size = children[0].size();
149
150 for (std::size_t i = 0; i < n_children; ++i)
151 {
152 auto& child = children[i];
153 SPARROW_ASSERT_TRUE(child.size() == size);
154 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(child));
155 child_arrays[i] = new ArrowArray(std::move(flat_arr));
156 child_schemas[i] = new ArrowSchema(std::move(flat_schema));
157 }
158
159 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
160 const auto null_count = vbitmap.null_count();
161
163 std::string("+s"), // format
164 std::move(name), // name
165 std::move(metadata), // metadata
166 std::nullopt, // flags,
167 static_cast<int64_t>(n_children),
168 child_schemas, // children
169 nullptr // dictionary
170 );
171
172 std::vector<buffer<std::uint8_t>> arr_buffs = {std::move(vbitmap).extract_storage()};
173
174 ArrowArray arr = make_arrow_array(
175 static_cast<std::int64_t>(size), // length
176 static_cast<std::int64_t>(null_count),
177 0, // offset
178 std::move(arr_buffs),
179 static_cast<std::size_t>(n_children), // n_children
180 child_arrays, // children
181 nullptr // dictionary
182 );
183 return arrow_proxy{std::move(arr), std::move(schema)};
184 }
185}
186
187#if defined(__cpp_lib_format)
188
189template <>
190struct std::formatter<sparrow::struct_array>
191{
192 constexpr auto parse(std::format_parse_context& ctx)
193 {
194 return ctx.begin();
195 }
196
197 SPARROW_API auto format(const sparrow::struct_array& struct_array, std::format_context& ctx) const
198 -> decltype(ctx.out());
199};
200
201namespace sparrow
202{
203 SPARROW_API std::ostream& operator<<(std::ostream& os, const sparrow::struct_array& value);
204}
205
206#endif
void struct_array()
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
Base class defining common immutable interface for arrays with a bitmap.
Base class for array type erasure.
Proxy class over ArrowArray and ArrowSchema.
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
Definition nullable.hpp:280
array_inner_types< self_type > inner_types
SPARROW_API struct_array(arrow_proxy proxy)
SPARROW_API struct_array(const struct_array &)
SPARROW_API array_wrapper * raw_child(std::size_t i)
SPARROW_API const array_wrapper * raw_child(std::size_t i) const
nullable< inner_value_type > value_type
nullable< inner_const_reference, bitmap_const_reference > const_reference
struct_value inner_reference
struct_array(Args &&... args)
typename base_type::size_type size_type
typename inner_types::value_iterator value_iterator
base_type::iterator_tag iterator_tag
struct_value inner_value_type
typename base_type::bitmap_type bitmap_type
struct_array(struct_array &&)=default
struct_array & operator=(struct_array &&)=default
struct_value inner_const_reference
SPARROW_API struct_array & operator=(const struct_array &)
SPARROW_API size_type children_count() const
base_type::const_bitmap_range const_bitmap_range
typename base_type::bitmap_const_reference bitmap_const_reference
typename inner_types::const_value_iterator const_value_iterator
array_bitmap_base< self_type > base_type
#define SPARROW_API
Definition config.hpp:38
#define SPARROW_ASSERT_TRUE(expr__)
constexpr bool excludes_copy_and_move_ctor_v
Definition mp_utils.hpp:507
ArrowSchema make_arrow_schema(F format, N name, M metadata, std::optional< ArrowFlag > flags, int64_t n_children, ArrowSchema **children, ArrowSchema *dictionary)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
array_bitmap_base_impl< D, false > array_bitmap_base
Convenient typedef to be used as a crtp base class for arrays using an immutable validity buffer.
constexpr bool is_struc_array_v
Checks whether T is a struct_array type.
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArrays and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:91
dynamic_bitset< std::uint8_t > validity_bitmap
std::ostream & operator<<(std::ostream &stream, T n)
Definition large_int.hpp:93
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, size_t n_children, ArrowArray **children, ArrowArray *dictionary)
Creates an ArrowArray.
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
std::random_access_iterator_tag iterator_tag
Base class for array_inner_types specialization.
Traits class that must be specialized by array classes inheriting from array_crtp_base.