sparrow 0.3.0
Loading...
Searching...
No Matches
arrow_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <cstddef>
18#include <cstdint>
19#if defined(__cpp_lib_format)
20# include <format>
21#endif
22
26
27namespace sparrow
28{
44 template <class B>
45 requires std::constructible_from<arrow_array_private_data::BufferType, B>
46 [[nodiscard]] ArrowArray make_arrow_array(
47 int64_t length,
48 int64_t null_count,
49 int64_t offset,
50 B buffers,
51 size_t n_children,
52 ArrowArray** children,
53 ArrowArray* dictionary
54 );
55
60
66
82 template <class B>
83 requires std::constructible_from<arrow_array_private_data::BufferType, B>
86 int64_t length,
87 int64_t null_count,
88 int64_t offset,
89 B buffers,
90 size_t n_children,
91 ArrowArray** children,
92 ArrowArray* dictionary
93 )
94 {
95 SPARROW_ASSERT_TRUE(length >= 0);
96 SPARROW_ASSERT_TRUE(null_count >= -1);
97 SPARROW_ASSERT_TRUE(offset >= 0);
98 SPARROW_ASSERT_TRUE((n_children == 0) == (children == nullptr));
99
100 array.length = length;
101 array.null_count = null_count;
102 array.offset = offset;
103 array.n_buffers = sparrow::ssize(buffers);
104 array.private_data = new arrow_array_private_data(std::move(buffers), n_children);
105 const auto private_data = static_cast<arrow_array_private_data*>(array.private_data);
106 array.buffers = private_data->buffers_ptrs<void>();
107 array.n_children = static_cast<int64_t>(n_children);
108 array.children = children;
109 array.dictionary = dictionary;
110 array.release = release_arrow_array;
111 }
112
113 template <class B>
114 requires std::constructible_from<arrow_array_private_data::BufferType, B>
116 int64_t length,
117 int64_t null_count,
118 int64_t offset,
119 B buffers,
120 size_t n_children,
121 ArrowArray** children,
122 ArrowArray* dictionary
123 )
124 {
125 SPARROW_ASSERT_TRUE(length >= 0);
126 SPARROW_ASSERT_TRUE(null_count >= -1);
127 SPARROW_ASSERT_TRUE(offset >= 0);
128 SPARROW_ASSERT_TRUE(buffers.size() >= 0);
129 SPARROW_ASSERT_TRUE((n_children == 0) == (children == nullptr));
130
132 fill_arrow_array(array, length, null_count, offset, std::move(buffers), n_children, children, dictionary);
133 return array;
134 }
135
136 [[nodiscard]] inline ArrowArray make_empty_arrow_array()
137 {
138 using buffer_type = arrow_array_private_data::BufferType;
139 return make_arrow_array(0, 0, 0, buffer_type{}, 0u, nullptr, nullptr);
140 }
141
143
145
146 [[nodiscard]] SPARROW_API std::vector<sparrow::buffer_view<uint8_t>>
148
153
157 SPARROW_API void
158 copy_array(const ArrowArray& source_array, const ArrowSchema& source_schema, ArrowArray& target);
159
163 [[nodiscard]] inline ArrowArray copy_array(const ArrowArray& source_array, const ArrowSchema& source_schema)
164 {
165 ArrowArray target{};
166 copy_array(source_array, source_schema, target);
167 return target;
168 }
169
174 [[nodiscard]] inline ArrowArray move_array(ArrowArray&& source)
175 {
177 swap(source, target);
178 return target;
179 }
180
185 [[nodiscard]] inline ArrowArray move_array(ArrowArray& source)
186 {
187 return move_array(std::move(source));
188 }
189};
190
191#if defined(__cpp_lib_format)
192
193template <>
194struct std::formatter<ArrowArray>
195{
196 constexpr auto parse(std::format_parse_context& ctx)
197 {
198 return ctx.begin(); // Simple implementation
199 }
200
201 auto format(const ArrowArray& obj, std::format_context& ctx) const
202 {
203 std::string children_str = std::format("{}", static_cast<void*>(obj.children));
204 for (int i = 0; i < obj.n_children; ++i)
205 {
206 children_str += std::format("\n-{}", static_cast<void*>(obj.children[i]));
207 }
208
209 std::string buffer_str = std::format("{}", static_cast<void*>(obj.buffers));
210 for (int i = 0; i < obj.n_buffers; ++i)
211 {
212 buffer_str += std::format("\n\t- {}", obj.buffers[i]);
213 }
214
215 return std::format_to(
216 ctx.out(),
217 "ArrowArray - ptr address: {}\n- length: {}\n- null_count: {}\n- offset: {}\n- n_buffers: {}\n- buffers: {}\n- n_children: {}\n- children: {}\n- dictionary: {}\n",
218 static_cast<const void*>(&obj),
219 obj.length,
220 obj.null_count,
221 obj.offset,
222 obj.n_buffers,
223 buffer_str,
224 obj.n_children,
225 children_str,
226 static_cast<const void*>(obj.dictionary)
227 );
228 }
229};
230
231inline std::ostream& operator<<(std::ostream& os, const ArrowArray& value)
232{
233 os << std::format("{}", value);
234 return os;
235}
236
237#endif
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:39
Private data for ArrowArray.
std::vector< buffer< std::uint8_t > > BufferType
#define SPARROW_API
Definition config.hpp:38
#define SPARROW_ASSERT_TRUE(expr__)
ArrowArray make_empty_arrow_array()
constexpr int64_t ssize(const T &value)
Get the size of a range, a tuple or an optional.
SPARROW_API void swap(ArrowArray &lhs, ArrowArray &rhs)
Swaps the contents of the two ArrowArray objects.
SPARROW_API void copy_array(const ArrowArray &source_array, const ArrowSchema &source_schema, ArrowArray &target)
Fill the target ArrowArray with a deep copy of the data from the source ArrowArray.
SPARROW_API void empty_release_arrow_array(ArrowArray *array)
Empty release function to use for the ArrowArray.release member.
void fill_arrow_array(ArrowArray &array, int64_t length, int64_t null_count, int64_t offset, B buffers, size_t n_children, ArrowArray **children, ArrowArray *dictionary)
Fill an ArrowArray object.
SPARROW_API sparrow::buffer_view< uint8_t > get_bitmap_buffer(const ArrowArray &array)
SPARROW_API std::vector< sparrow::buffer_view< uint8_t > > get_arrow_array_buffers(const ArrowArray &array, const ArrowSchema &schema)
std::ostream & operator<<(std::ostream &stream, T n)
Definition large_int.hpp:93
SPARROW_API void release_arrow_array(ArrowArray *array)
Release function to use for the ArrowArray.release member.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, size_t n_children, ArrowArray **children, ArrowArray *dictionary)
Creates an ArrowArray.
ArrowArray move_array(ArrowArray &&source)
Moves the content of source into a stack-allocated array, and reset the source to an empty ArrowArray...
int64_t n_children
int64_t null_count
struct ArrowArray ** children
int64_t offset
struct ArrowArray * dictionary
const void ** buffers
int64_t n_buffers
int64_t length