sparrow 0.6.0
Loading...
Searching...
No Matches
arrow_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <cstddef>
18#include <cstdint>
19#include <ranges>
20#include <type_traits>
21
22#if defined(__cpp_lib_format)
23# include <format>
24#endif
25
30
31namespace sparrow
32{
48 template <class B, std::ranges::input_range CHILDREN_OWNERSHIP>
49 requires std::constructible_from<arrow_array_private_data::BufferType, B>
50 && std::is_same_v<std::ranges::range_value_t<CHILDREN_OWNERSHIP>, bool>
51 [[nodiscard]] ArrowArray make_arrow_array(
52 int64_t length,
53 int64_t null_count,
54 int64_t offset,
55 B buffers,
56 ArrowArray** children,
57 const CHILDREN_OWNERSHIP& children_ownership,
58 ArrowArray* dictionary,
60 );
61
66
72
88 template <class B, std::ranges::input_range CHILDREN_OWNERSHIP>
89 requires(
90 std::constructible_from<arrow_array_private_data::BufferType, B>
91 && std::is_same_v<std::ranges::range_value_t<CHILDREN_OWNERSHIP>, bool>
92 )
95 int64_t length,
96 int64_t null_count,
97 int64_t offset,
98 B buffers,
99 ArrowArray** children,
100 const CHILDREN_OWNERSHIP& children_ownership,
101 ArrowArray* dictionary,
103 )
104 {
105 SPARROW_ASSERT_TRUE(length >= 0);
106 SPARROW_ASSERT_TRUE(null_count >= -1);
107 SPARROW_ASSERT_TRUE(offset >= 0);
108 SPARROW_ASSERT_TRUE((children_ownership.size() == 0) == (children == nullptr));
109
110 array.length = length;
111 array.null_count = null_count;
112 array.offset = offset;
113 array.n_buffers = sparrow::ssize(buffers);
114 array.private_data = new arrow_array_private_data(
115 std::move(buffers),
118 );
119 const auto private_data = static_cast<arrow_array_private_data*>(array.private_data);
120 array.buffers = private_data->buffers_ptrs<void>();
121 array.n_children = static_cast<int64_t>(children_ownership.size());
122 array.children = children;
123 array.dictionary = dictionary;
124 array.release = release_arrow_array;
125 }
126
127 template <class B, std::ranges::input_range CHILDREN_OWNERSHIP>
128 requires std::constructible_from<arrow_array_private_data::BufferType, B>
129 && std::is_same_v<std::ranges::range_value_t<CHILDREN_OWNERSHIP>, bool>
131 int64_t length,
132 int64_t null_count,
133 int64_t offset,
134 B buffers,
135 ArrowArray** children,
136 const CHILDREN_OWNERSHIP& children_ownership,
137 ArrowArray* dictionary,
139 )
140 {
141 SPARROW_ASSERT_TRUE(length >= 0);
142 SPARROW_ASSERT_TRUE(null_count >= -1);
143 SPARROW_ASSERT_TRUE(offset >= 0);
144 SPARROW_ASSERT_TRUE(buffers.size() >= 0);
145 SPARROW_ASSERT_TRUE((children_ownership.size() == 0) == (children == nullptr));
146
149 array,
150 length,
151 null_count,
152 offset,
153 std::move(buffers),
154 children,
156 dictionary,
158 );
159 return array;
160 }
161
162 [[nodiscard]] inline ArrowArray make_empty_arrow_array()
163 {
164 using buffer_type = arrow_array_private_data::BufferType;
165 return make_arrow_array(0, 0, 0, buffer_type{}, nullptr, repeat_view<bool>(true, 0), nullptr, false);
166 }
167
169
171
172 [[nodiscard]] SPARROW_API std::vector<sparrow::buffer_view<uint8_t>>
174
179
186 SPARROW_API void
187 copy_array(const ArrowArray& source_array, const ArrowSchema& source_schema, ArrowArray& target);
188
192 [[nodiscard]] inline ArrowArray copy_array(const ArrowArray& source_array, const ArrowSchema& source_schema)
193 {
194 ArrowArray target{};
195 copy_array(source_array, source_schema, target);
196 return target;
197 }
198
203 [[nodiscard]] inline ArrowArray move_array(ArrowArray&& source)
204 {
206 swap(source, target);
207 source.release(&source);
208 return target;
209 }
210
215 [[nodiscard]] inline ArrowArray move_array(ArrowArray& source)
216 {
217 return move_array(std::move(source));
218 }
219};
220
221#if defined(__cpp_lib_format)
222
223template <>
224struct std::formatter<ArrowArray>
225{
226 constexpr auto parse(std::format_parse_context& ctx)
227 {
228 return ctx.begin(); // Simple implementation
229 }
230
231 auto format(const ArrowArray& obj, std::format_context& ctx) const
232 {
233 std::string children_str = std::format("{}", static_cast<void*>(obj.children));
234 for (int i = 0; i < obj.n_children; ++i)
235 {
236 children_str += std::format("\n-{}", static_cast<void*>(obj.children[i]));
237 }
238
239 std::string buffer_str = std::format("{}", static_cast<void*>(obj.buffers));
240 for (int i = 0; i < obj.n_buffers; ++i)
241 {
242 buffer_str += std::format("\n\t- {}", obj.buffers[i]);
243 }
244
245 return std::format_to(
246 ctx.out(),
247 "ArrowArray - ptr address: {}\n- length: {}\n- null_count: {}\n- offset: {}\n- n_buffers: {}\n- buffers: {}\n- n_children: {}\n- children: {}\n- dictionary: {}\n",
248 static_cast<const void*>(&obj),
249 obj.length,
250 obj.null_count,
251 obj.offset,
252 obj.n_buffers,
253 buffer_str,
254 obj.n_children,
255 children_str,
256 static_cast<const void*>(obj.dictionary)
257 );
258 }
259};
260
261inline std::ostream& operator<<(std::ostream& os, const ArrowArray& value)
262{
263 os << std::format("{}", value);
264 return os;
265}
266
267#endif
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:39
Private data for ArrowArray.
std::vector< buffer< std::uint8_t > > BufferType
A view that repeats a value a given number of times.
#define SPARROW_API
Definition config.hpp:38
#define SPARROW_ASSERT_TRUE(expr__)
ArrowArray make_empty_arrow_array()
void fill_arrow_array(ArrowArray &array, int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Fill an ArrowArray object.
constexpr int64_t ssize(const T &value)
Get the size of a range, a tuple or an optional.
SPARROW_API void swap(ArrowArray &lhs, ArrowArray &rhs)
Swaps the contents of the two ArrowArray objects.
SPARROW_API void copy_array(const ArrowArray &source_array, const ArrowSchema &source_schema, ArrowArray &target)
Fill the target ArrowArray with a deep copy of the data from the source ArrowArray.
SPARROW_API void empty_release_arrow_array(ArrowArray *array)
Empty release function to use for the ArrowArray.release member.
SPARROW_API sparrow::buffer_view< uint8_t > get_bitmap_buffer(const ArrowArray &array)
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
SPARROW_API std::vector< sparrow::buffer_view< uint8_t > > get_arrow_array_buffers(const ArrowArray &array, const ArrowSchema &schema)
SPARROW_API void release_arrow_array(ArrowArray *array)
Release function to use for the ArrowArray.release member.
ArrowArray move_array(ArrowArray &&source)
Moves the content of source into a stack-allocated array, and reset the source to an empty ArrowArray...
std::ostream & operator<<(std::ostream &os, const sparrow::nullval_t &)
Definition nullable.hpp:900
int64_t n_children
int64_t null_count
struct ArrowArray ** children
int64_t offset
struct ArrowArray * dictionary
const void ** buffers
int64_t n_buffers
int64_t length