sparrow ..
Loading...
Searching...
No Matches
arrow_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <cstddef>
18#include <cstdint>
19#include <ranges>
20#include <type_traits>
21
22#if defined(__cpp_lib_format)
23# include <format>
24#endif
25
30
31namespace sparrow
32{
51 template <class B, std::ranges::input_range CHILDREN_OWNERSHIP>
52 requires std::constructible_from<arrow_array_private_data::BufferType, B>
53 && std::is_same_v<std::ranges::range_value_t<CHILDREN_OWNERSHIP>, bool>
54 [[nodiscard]] ArrowArray make_arrow_array(
55 int64_t length,
56 int64_t null_count,
57 int64_t offset,
58 B buffers,
59 ArrowArray** children,
60 const CHILDREN_OWNERSHIP& children_ownership,
61 ArrowArray* dictionary,
63 );
64
69
75
94 template <class B, std::ranges::input_range CHILDREN_OWNERSHIP>
95 requires(
96 std::constructible_from<arrow_array_private_data::BufferType, B>
97 && std::is_same_v<std::ranges::range_value_t<CHILDREN_OWNERSHIP>, bool>
98 )
101 int64_t length,
102 int64_t null_count,
103 int64_t offset,
104 B buffers,
105 ArrowArray** children,
106 const CHILDREN_OWNERSHIP& children_ownership,
107 ArrowArray* dictionary,
109 )
110 {
111 SPARROW_ASSERT_TRUE(length >= 0);
112 SPARROW_ASSERT_TRUE(null_count >= -1);
113 SPARROW_ASSERT_TRUE(offset >= 0);
114 SPARROW_ASSERT_TRUE((children_ownership.size() == 0) == (children == nullptr));
115
116 array.length = length;
117 array.null_count = null_count;
118 array.offset = offset;
119 array.n_buffers = sparrow::ssize(buffers);
120 array.private_data = new arrow_array_private_data(
121 std::move(buffers),
124 );
125 const auto private_data = static_cast<arrow_array_private_data*>(array.private_data);
126 array.buffers = private_data->buffers_ptrs<void>();
127 array.n_children = static_cast<int64_t>(children_ownership.size());
128 array.children = children;
129 array.dictionary = dictionary;
130 array.release = release_arrow_array;
131 }
132
133 template <class B, std::ranges::input_range CHILDREN_OWNERSHIP>
134 requires std::constructible_from<arrow_array_private_data::BufferType, B>
135 && std::is_same_v<std::ranges::range_value_t<CHILDREN_OWNERSHIP>, bool>
137 int64_t length,
138 int64_t null_count,
139 int64_t offset,
140 B buffers,
141 ArrowArray** children,
142 const CHILDREN_OWNERSHIP& children_ownership,
143 ArrowArray* dictionary,
145 )
146 {
147 SPARROW_ASSERT_TRUE(length >= 0);
148 SPARROW_ASSERT_TRUE(null_count >= -1);
149 SPARROW_ASSERT_TRUE(offset >= 0);
150 SPARROW_ASSERT_TRUE(buffers.size() >= 0);
151 SPARROW_ASSERT_TRUE((children_ownership.size() == 0) == (children == nullptr));
152
155 array,
156 length,
157 null_count,
158 offset,
159 std::move(buffers),
160 children,
162 dictionary,
164 );
165 return array;
166 }
167
168 [[nodiscard]] inline ArrowArray make_empty_arrow_array()
169 {
170 using buffer_type = arrow_array_private_data::BufferType;
171 return make_arrow_array(0, 0, 0, buffer_type{}, nullptr, repeat_view<bool>(true, 0), nullptr, false);
172 }
173
175
177
178 [[nodiscard]] SPARROW_API std::vector<sparrow::buffer_view<uint8_t>>
180
185
192 SPARROW_API void
193 copy_array(const ArrowArray& source_array, const ArrowSchema& source_schema, ArrowArray& target);
194
198 [[nodiscard]] inline ArrowArray copy_array(const ArrowArray& source_array, const ArrowSchema& source_schema)
199 {
200 ArrowArray target{};
201 copy_array(source_array, source_schema, target);
202 return target;
203 }
204
209 [[nodiscard]] inline ArrowArray move_array(ArrowArray&& source)
210 {
212 swap(source, target);
213 source.release(&source);
214 return target;
215 }
216
221 [[nodiscard]] inline ArrowArray move_array(ArrowArray& source)
222 {
223 return move_array(std::move(source));
224 }
225};
226
227#if defined(__cpp_lib_format)
228
229template <>
230struct std::formatter<ArrowArray>
231{
232 constexpr auto parse(std::format_parse_context& ctx)
233 {
234 return ctx.begin(); // Simple implementation
235 }
236
237 auto format(const ArrowArray& obj, std::format_context& ctx) const
238 {
239 std::string children_str = std::format("{}", static_cast<void*>(obj.children));
240 for (int i = 0; i < obj.n_children; ++i)
241 {
242 children_str += std::format("\n-{}", static_cast<void*>(obj.children[i]));
243 }
244
245 std::string buffer_str = std::format("{}", static_cast<void*>(obj.buffers));
246 for (int i = 0; i < obj.n_buffers; ++i)
247 {
248 buffer_str += std::format("\n\t- {}", obj.buffers[i]);
249 }
250
251 return std::format_to(
252 ctx.out(),
253 "ArrowArray - ptr address: {}\n- length: {}\n- null_count: {}\n- offset: {}\n- n_buffers: {}\n- buffers: {}\n- n_children: {}\n- children: {}\n- dictionary: {}\n",
254 static_cast<const void*>(&obj),
255 obj.length,
256 obj.null_count,
257 obj.offset,
258 obj.n_buffers,
259 buffer_str,
260 obj.n_children,
261 children_str,
262 static_cast<const void*>(obj.dictionary)
263 );
264 }
265};
266
267inline std::ostream& operator<<(std::ostream& os, const ArrowArray& value)
268{
269 os << std::format("{}", value);
270 return os;
271}
272
273#endif
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:41
Private data for ArrowArray.
std::vector< buffer< std::uint8_t > > BufferType
A view that repeats a value a given number of times.
#define SPARROW_API
Definition config.hpp:38
#define SPARROW_ASSERT_TRUE(expr__)
ArrowArray make_empty_arrow_array()
void fill_arrow_array(ArrowArray &array, int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Fill an ArrowArray object.
constexpr int64_t ssize(const T &value)
Get the size of a range, a tuple or an optional.
SPARROW_API void swap(ArrowArray &lhs, ArrowArray &rhs)
Swaps the contents of the two ArrowArray objects.
SPARROW_API void copy_array(const ArrowArray &source_array, const ArrowSchema &source_schema, ArrowArray &target)
Fill the target ArrowArray with a deep copy of the data from the source ArrowArray.
SPARROW_API void empty_release_arrow_array(ArrowArray *array)
Empty release function to use for the ArrowArray.release member.
SPARROW_API sparrow::buffer_view< uint8_t > get_bitmap_buffer(const ArrowArray &array)
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
std::ostream & operator<<(std::ostream &os, const nullval_t &)
SPARROW_API std::vector< sparrow::buffer_view< uint8_t > > get_arrow_array_buffers(const ArrowArray &array, const ArrowSchema &schema)
SPARROW_API void release_arrow_array(ArrowArray *array)
Release function to use for the ArrowArray.release member.
ArrowArray move_array(ArrowArray &&source)
Moves the content of source into a stack-allocated array, and reset the source to an empty ArrowArray...
int64_t n_children
int64_t null_count
struct ArrowArray ** children
int64_t offset
struct ArrowArray * dictionary
const void ** buffers
int64_t n_buffers
int64_t length