sparrow 0.9.0
Loading...
Searching...
No Matches
struct_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <ranges>
18#include <string_view>
19#include <version>
20
22
23#if defined(__cpp_lib_format)
25#endif
26
27#include "sparrow/array_api.hpp"
37
38namespace sparrow
39{
40 class struct_array;
41
42 template <>
54
58 template <class T>
59 constexpr bool is_struc_array_v = std::same_as<T, struct_array>;
60
61 class struct_array final : public array_bitmap_base<struct_array>
62 {
63 public:
64
68 using value_iterator = typename inner_types::value_iterator;
69 using const_value_iterator = typename inner_types::const_value_iterator;
71
74
76
80
84
86
87 template <class... Args>
89 explicit struct_array(Args&&... args)
90 : struct_array(create_proxy(std::forward<Args>(args)...))
91 {
92 }
93
96
99
100 [[nodiscard]] SPARROW_API size_type children_count() const;
101
102 [[nodiscard]] SPARROW_API const array_wrapper* raw_child(std::size_t i) const;
103 [[nodiscard]] SPARROW_API array_wrapper* raw_child(std::size_t i);
104
105
106 private:
107
108 template <
109 std::ranges::input_range CHILDREN_RANGE,
111 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
112 requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
113 [[nodiscard]] static auto create_proxy(
114 CHILDREN_RANGE&& children,
115 VB&& bitmaps,
116 std::optional<std::string_view> name = std::nullopt,
117 std::optional<METADATA_RANGE> metadata = std::nullopt
118 ) -> arrow_proxy;
119
120 template <std::ranges::input_range CHILDREN_RANGE, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
121 requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
122 [[nodiscard]] static auto create_proxy(
123 CHILDREN_RANGE&& children,
124 bool nullable = true,
125 std::optional<std::string_view> name = std::nullopt,
126 std::optional<METADATA_RANGE> metadata = std::nullopt
127 ) -> arrow_proxy;
128
129 template <std::ranges::input_range CHILDREN_RANGE, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
130 requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
131 [[nodiscard]] static auto create_proxy_impl(
132 CHILDREN_RANGE&& children,
133 std::optional<validity_bitmap>&& bitmap,
134 std::optional<std::string_view> name = std::nullopt,
135 std::optional<METADATA_RANGE> metadata = std::nullopt
136 ) -> arrow_proxy;
137
138 using children_type = std::vector<cloning_ptr<array_wrapper>>;
139
140 [[nodiscard]] SPARROW_API value_iterator value_begin();
141 [[nodiscard]] SPARROW_API value_iterator value_end();
142 [[nodiscard]] SPARROW_API const_value_iterator value_cbegin() const;
143 [[nodiscard]] SPARROW_API const_value_iterator value_cend() const;
144 [[nodiscard]] SPARROW_API inner_reference value(size_type i);
145 [[nodiscard]] SPARROW_API inner_const_reference value(size_type i) const;
146
147 [[nodiscard]] SPARROW_API children_type make_children();
148
149 // data members
150 children_type m_children;
151
152 // friend classes
153 friend class array_crtp_base<self_type>;
154
155 // needs access to this->value(i)
158 };
159
160 template <std::ranges::input_range CHILDREN_RANGE, validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
161 requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
162 auto struct_array::create_proxy(
163 CHILDREN_RANGE&& children,
164 VB&& validity_input,
165 std::optional<std::string_view> name,
166 std::optional<METADATA_RANGE> metadata
167 ) -> arrow_proxy
168 {
169 const auto size = children.empty() ? 0 : children[0].size();
170 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
171 return create_proxy_impl(
172 std::forward<std::vector<array>>(children),
173 std::move(vbitmap),
174 std::move(name),
175 std::move(metadata)
176 );
177 }
178
179 template <std::ranges::input_range CHILDREN_RANGE, input_metadata_container METADATA_RANGE>
180 requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
181 auto struct_array::create_proxy(
182 CHILDREN_RANGE&& children,
183 bool nullable,
184 std::optional<std::string_view> name,
185 std::optional<METADATA_RANGE> metadata
186 ) -> arrow_proxy
187 {
188 const size_t size = children.empty() ? 0 : children[0].size();
189 return create_proxy_impl(
190 std::forward<std::vector<array>>(children),
191 nullable ? std::make_optional<validity_bitmap>(nullptr, size) : std::nullopt,
192 std::move(name),
193 std::move(metadata)
194 );
195 }
196
197 template <std::ranges::input_range CHILDREN_RANGE, input_metadata_container METADATA_RANGE>
198 requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
199 auto struct_array::create_proxy_impl(
200 CHILDREN_RANGE&& children,
201 std::optional<validity_bitmap>&& bitmap,
202 std::optional<std::string_view> name,
203 std::optional<METADATA_RANGE> metadata
204 ) -> arrow_proxy
205 {
206 const auto n_children = children.size();
207 ArrowSchema** child_schemas = new ArrowSchema*[n_children];
208 ArrowArray** child_arrays = new ArrowArray*[n_children];
209
210 const auto size = children.empty() ? 0 : children[0].size();
211
212 for (std::size_t i = 0; i < n_children; ++i)
213 {
214 auto& child = children[i];
215 SPARROW_ASSERT_TRUE(child.size() == size);
216 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(child));
217 child_arrays[i] = new ArrowArray(std::move(flat_arr));
218 child_schemas[i] = new ArrowSchema(std::move(flat_schema));
219 }
220
221 const bool bitmap_has_value = bitmap.has_value();
222 const auto null_count = bitmap_has_value ? bitmap->null_count() : 0;
223 const auto flags = bitmap_has_value
224 ? std::make_optional<std::unordered_set<sparrow::ArrowFlag>>({ArrowFlag::NULLABLE})
225 : std::nullopt;
226
227 ArrowSchema schema = make_arrow_schema(
228 std::string("+s"), // format
229 std::move(name), // name
230 std::move(metadata), // metadata
231 flags, // flags,
232 child_schemas, // children
233 repeat_view<bool>(true, n_children), // children_ownership
234 nullptr, // dictionary
235 true // dictionary ownership
236 );
237
238 buffer<uint8_t> bitmap_buffer = bitmap_has_value ? std::move(*bitmap).extract_storage()
239 : buffer<uint8_t>{nullptr, 0};
240
241 std::vector<buffer<std::uint8_t>> arr_buffs(1);
242 arr_buffs[0] = std::move(bitmap_buffer);
243
244 ArrowArray arr = make_arrow_array(
245 static_cast<std::int64_t>(size), // length
246 static_cast<std::int64_t>(null_count), // null_count
247 0, // offset
248 std::move(arr_buffs),
249 child_arrays, // children
250 repeat_view<bool>(true, n_children), // children_ownership
251 nullptr, // dictionary
252 true // dictionary ownership
253 );
254 return arrow_proxy{std::move(arr), std::move(schema)};
255 }
256}
257
258#if defined(__cpp_lib_format)
259
260template <>
261struct std::formatter<sparrow::struct_array>
262{
263 constexpr auto parse(std::format_parse_context& ctx)
264 {
265 return ctx.begin();
266 }
267
268 SPARROW_API auto format(const sparrow::struct_array& struct_array, std::format_context& ctx) const
269 -> decltype(ctx.out());
270};
271
272namespace sparrow
273{
274 SPARROW_API std::ostream& operator<<(std::ostream& os, const sparrow::struct_array& value);
275}
276
277#endif
void struct_array()
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
Base class defining common immutable interface for arrays with a bitmap.
Base class for array type erasure.
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:39
Proxy class over ArrowArray and ArrowSchema.
Object that owns a piece of contiguous memory.
Definition buffer.hpp:112
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
Definition nullable.hpp:281
A view that repeats a value a given number of times.
array_inner_types< self_type > inner_types
SPARROW_API struct_array(arrow_proxy proxy)
SPARROW_API struct_array(const struct_array &)
SPARROW_API array_wrapper * raw_child(std::size_t i)
SPARROW_API const array_wrapper * raw_child(std::size_t i) const
nullable< inner_value_type > value_type
nullable< inner_const_reference, bitmap_const_reference > const_reference
struct_value inner_reference
struct_array(Args &&... args)
typename base_type::size_type size_type
typename inner_types::value_iterator value_iterator
base_type::iterator_tag iterator_tag
struct_value inner_value_type
typename base_type::bitmap_type bitmap_type
struct_array(struct_array &&)=default
struct_array & operator=(struct_array &&)=default
struct_value inner_const_reference
SPARROW_API struct_array & operator=(const struct_array &)
SPARROW_API size_type children_count() const
base_type::const_bitmap_range const_bitmap_range
typename base_type::bitmap_const_reference bitmap_const_reference
typename inner_types::const_value_iterator const_value_iterator
array_bitmap_base< self_type > base_type
#define SPARROW_API
Definition config.hpp:38
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Definition mp_utils.hpp:107
constexpr bool excludes_copy_and_move_ctor_v
Definition mp_utils.hpp:507
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
array_bitmap_base_impl< D, false > array_bitmap_base
Convenient typedef to be used as a crtp base class for arrays using an immutable validity buffer.
constexpr bool is_struc_array_v
Checks whether T is a struct_array type.
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArrays and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:91
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
std::ostream & operator<<(std::ostream &os, const sparrow::nullval_t &)
Definition nullable.hpp:933
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
std::random_access_iterator_tag iterator_tag
Base class for array_inner_types specialization.
Traits class that must be specialized by array classes inheriting from array_crtp_base.