sparrow 2.3.0
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
arrow_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <cstdint>
18#include <ranges>
19#include <type_traits>
20
21#if defined(__cpp_lib_format)
22# include <format>
23#endif
24
30
31namespace sparrow
32{
33 namespace copy_tracker
34 {
35 template <>
36 inline std::string key<ArrowArray>()
37 {
38 return "ArrowArray";
39 }
40 }
41
60 template <class B, std::ranges::input_range CHILDREN_OWNERSHIP>
61 requires std::constructible_from<arrow_array_private_data::BufferType, B>
62 && std::is_same_v<std::ranges::range_value_t<CHILDREN_OWNERSHIP>, bool>
63 [[nodiscard]] ArrowArray make_arrow_array(
64 int64_t length,
65 int64_t null_count,
66 int64_t offset,
67 B buffers,
68 ArrowArray** children,
69 const CHILDREN_OWNERSHIP& children_ownership,
70 ArrowArray* dictionary,
71 bool dictionary_ownership
72 );
73
78
84
103 template <class B, std::ranges::input_range CHILDREN_OWNERSHIP>
104 requires(
105 std::constructible_from<arrow_array_private_data::BufferType, B>
106 && std::is_same_v<std::ranges::range_value_t<CHILDREN_OWNERSHIP>, bool>
107 )
110 int64_t length,
111 int64_t null_count,
112 int64_t offset,
113 B buffers,
114 ArrowArray** children,
115 const CHILDREN_OWNERSHIP& children_ownership,
116 ArrowArray* dictionary,
118 )
119 {
120 SPARROW_ASSERT_TRUE(length >= 0);
121 SPARROW_ASSERT_TRUE(null_count >= -1);
122 SPARROW_ASSERT_TRUE(offset >= 0);
123 SPARROW_ASSERT_TRUE((children_ownership.size() == 0) == (children == nullptr));
124
125 array.length = length;
126 array.null_count = null_count;
127 array.offset = offset;
128 array.n_buffers = sparrow::ssize(buffers);
129 array.private_data = new arrow_array_private_data(
130 std::move(buffers),
133 );
134 const auto private_data = static_cast<arrow_array_private_data*>(array.private_data);
135 array.buffers = private_data->buffers_ptrs<void>();
136 array.n_children = static_cast<int64_t>(children_ownership.size());
137 array.children = children;
138 array.dictionary = dictionary;
139 array.release = release_arrow_array;
140 }
141
142 template <class B, std::ranges::input_range CHILDREN_OWNERSHIP>
143 requires std::constructible_from<arrow_array_private_data::BufferType, B>
144 && std::is_same_v<std::ranges::range_value_t<CHILDREN_OWNERSHIP>, bool>
146 int64_t length,
147 int64_t null_count,
148 int64_t offset,
149 B buffers,
150 ArrowArray** children,
151 const CHILDREN_OWNERSHIP& children_ownership,
152 ArrowArray* dictionary,
154 )
155 {
156 SPARROW_ASSERT_TRUE(length >= 0);
157 SPARROW_ASSERT_TRUE(null_count >= -1);
158 SPARROW_ASSERT_TRUE(offset >= 0);
159 SPARROW_ASSERT_TRUE(buffers.size() >= 0);
160 SPARROW_ASSERT_TRUE((children_ownership.size() == 0) == (children == nullptr));
161
164 array,
165 length,
166 null_count,
167 offset,
168 std::move(buffers),
169 children,
171 dictionary,
173 );
174 return array;
175 }
176
177 [[nodiscard]] inline ArrowArray make_empty_arrow_array()
178 {
179 using buffer_type = arrow_array_private_data::BufferType;
180 return make_arrow_array(0, 0, 0, buffer_type{}, nullptr, repeat_view<bool>(true, 0), nullptr, false);
181 }
182
184
186
187 [[nodiscard]] SPARROW_API std::vector<sparrow::buffer_view<uint8_t>>
189
193 SPARROW_API void swap(ArrowArray& lhs, ArrowArray& rhs) noexcept;
194
201 SPARROW_API void
202 copy_array(const ArrowArray& source_array, const ArrowSchema& source_schema, ArrowArray& target);
203
207 [[nodiscard]] inline ArrowArray copy_array(const ArrowArray& source_array, const ArrowSchema& source_schema)
208 {
209 ArrowArray target{};
210 copy_array(source_array, source_schema, target);
211 return target;
212 }
213
218 [[nodiscard]] inline ArrowArray move_array(ArrowArray&& source)
219 {
220 ArrowArray target = source;
221 source = ArrowArray{};
222 return target;
223 }
224
229 [[nodiscard]] inline ArrowArray move_array(ArrowArray& source)
230 {
231 return move_array(std::move(source));
232 }
233
238
239 using array_unique_ptr = std::unique_ptr<ArrowArray, arrow_array_deleter>;
240};
241
242#if defined(__cpp_lib_format)
243
244template <>
245struct std::formatter<ArrowArray>
246{
247 constexpr auto parse(std::format_parse_context& ctx)
248 {
249 return ctx.begin(); // Simple implementation
250 }
251
252 auto format(const ArrowArray& obj, std::format_context& ctx) const
253 {
254 std::string children_str = std::format("{}", static_cast<void*>(obj.children));
255 for (int i = 0; i < obj.n_children; ++i)
256 {
257 children_str += std::format("\n-{}", static_cast<void*>(obj.children[i]));
258 }
259
260 std::string buffer_str = std::format("{}", static_cast<void*>(obj.buffers));
261 for (int i = 0; i < obj.n_buffers; ++i)
262 {
263 buffer_str += std::format("\n\t- {}", obj.buffers[i]);
264 }
265
266 return std::format_to(
267 ctx.out(),
268 "ArrowArray - ptr address: {}\n- length: {}\n- null_count: {}\n- offset: {}\n- n_buffers: {}\n- buffers: {}\n- n_children: {}\n- children: {}\n- dictionary: {}\n",
269 static_cast<const void*>(&obj),
270 obj.length,
271 obj.null_count,
272 obj.offset,
273 obj.n_buffers,
274 buffer_str,
275 obj.n_children,
276 children_str,
277 static_cast<const void*>(obj.dictionary)
278 );
279 }
280};
281
282inline std::ostream& operator<<(std::ostream& os, const ArrowArray& value)
283{
284 os << std::format("{}", value);
285 return os;
286}
287
288#endif
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:43
auto children() const
SPARROW_API std::int64_t null_count() const
SPARROW_API size_type offset() const
SPARROW_API std::optional< array > dictionary() const
Retrieves the dictionary array associated with this array, if it exists.
Private data for ArrowArray.
std::vector< buffer< std::uint8_t > > BufferType
A view that repeats a value a given number of times.
#define SPARROW_API
Definition config.hpp:38
#define SPARROW_ASSERT_TRUE(expr__)
std::ostream & operator<<(std::ostream &stream, primesum::uint128_t n)
The __int128_t type (GCC/Clang) is not well supported by the C++ standard library (in 2016) so we hav...
Definition int128_t.hpp:48
std::string key< ArrowArray >()
ArrowArray make_empty_arrow_array()
void fill_arrow_array(ArrowArray &array, int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Fill an ArrowArray object.
constexpr int64_t ssize(const T &value)
Get the size of a range, a tuple or an optional.
SPARROW_API void copy_array(const ArrowArray &source_array, const ArrowSchema &source_schema, ArrowArray &target)
Fill the target ArrowArray with a deep copy of the data from the source ArrowArray.
SPARROW_API void empty_release_arrow_array(ArrowArray *array)
Empty release function to use for the ArrowArray.release member.
std::unique_ptr< ArrowArray, arrow_array_deleter > array_unique_ptr
SPARROW_API sparrow::buffer_view< uint8_t > get_bitmap_buffer(const ArrowArray &array)
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
SPARROW_API std::vector< sparrow::buffer_view< uint8_t > > get_arrow_array_buffers(const ArrowArray &array, const ArrowSchema &schema)
SPARROW_API void swap(ArrowArray &lhs, ArrowArray &rhs) noexcept
Swaps the contents of the two ArrowArray objects.
SPARROW_API void release_arrow_array(ArrowArray *array)
Release function to use for the ArrowArray.release member.
ArrowArray move_array(ArrowArray &&source)
Moves the content of source into a stack-allocated array, and reset the source to an empty ArrowArray...
int64_t n_children
int64_t null_count
struct ArrowArray ** children
int64_t offset
struct ArrowArray * dictionary
const void ** buffers
int64_t n_buffers
int64_t length
SPARROW_API void operator()(ArrowArray *array) const