sparrow 2.2.1
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
arrow_schema.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <array>
18#include <concepts>
19#include <cstdint>
20#include <memory>
21#include <optional>
22#include <ranges>
23#include <unordered_set>
24
25#if defined(__cpp_lib_format)
26# include <format>
27# include <ostream>
28#endif
29
36
37namespace sparrow
38{
39 namespace copy_tracker
40 {
41 template <>
42 inline std::string key<ArrowSchema>()
43 {
44 return "ArrowSchema";
45 }
46 }
47
70 template <class F, class N, input_metadata_container M = std::vector<metadata_pair>, std::ranges::input_range CHILDREN_OWNERSHIP>
71 requires std::constructible_from<arrow_schema_private_data::FormatType, F>
72 && std::constructible_from<arrow_schema_private_data::NameType, N>
73 && std::is_same_v<std::ranges::range_value_t<CHILDREN_OWNERSHIP>, bool>
74 [[nodiscard]] ArrowSchema make_arrow_schema(
75 F format,
76 N name,
77 std::optional<M> metadata,
78 std::optional<std::unordered_set<ArrowFlag>> flags,
79 ArrowSchema** children,
80 const CHILDREN_OWNERSHIP& children_ownership,
81 ArrowSchema* dictionary,
82 bool dictionary_ownership
83 );
84
89
95
96 template <class F, class N, input_metadata_container M = std::vector<metadata_pair>, std::ranges::input_range CHILDREN_OWNERSHIP>
97 requires std::constructible_from<arrow_schema_private_data::FormatType, F>
98 && std::constructible_from<arrow_schema_private_data::NameType, N>
99 && std::is_same_v<std::ranges::range_value_t<CHILDREN_OWNERSHIP>, bool>
101 ArrowSchema& schema,
102 F format,
103 N name,
104 std::optional<M> metadata,
105 std::optional<std::unordered_set<ArrowFlag>> flags,
106 ArrowSchema** children,
107 const CHILDREN_OWNERSHIP& children_ownership,
108 ArrowSchema* dictionary,
110 )
111 {
113 SPARROW_ASSERT_TRUE(children_ownership.size() > 0 ? children != nullptr : children == nullptr);
114 SPARROW_ASSERT_FALSE(format.empty());
115 if (children)
116 {
117 for (size_t i = 0; i < children_ownership.size(); ++i)
118 {
119 SPARROW_ASSERT_FALSE(children[i] == nullptr);
120 }
121 }
122
123 schema.flags = 0;
124 if (flags.has_value())
125 {
126 for (const auto& flag : *flags)
127 {
128 schema.flags |= static_cast<int64_t>(flag);
129 }
130 }
131 schema.n_children = static_cast<int64_t>(children_ownership.size());
132
133 std::optional<std::string> metadata_str = metadata.has_value()
134 ? std::make_optional(
136 )
137 : std::nullopt;
138
140 std::move(format),
141 std::move(name),
142 std::move(metadata_str),
145 );
146
147 const auto private_data = static_cast<arrow_schema_private_data*>(schema.private_data);
148 schema.format = private_data->format_ptr();
149 schema.name = private_data->name_ptr();
150 schema.metadata = private_data->metadata_ptr();
151 schema.children = children;
152 schema.dictionary = dictionary;
154 }
155
156 template <class F, class N, input_metadata_container M, std::ranges::input_range CHILDREN_OWNERSHIP>
157 requires std::constructible_from<arrow_schema_private_data::FormatType, F>
158 && std::constructible_from<arrow_schema_private_data::NameType, N>
159 && std::is_same_v<std::ranges::range_value_t<CHILDREN_OWNERSHIP>, bool>
161 F format,
162 N name,
163 std::optional<M> metadata,
164 std::optional<std::unordered_set<ArrowFlag>> flags,
165 ArrowSchema** children,
166 const CHILDREN_OWNERSHIP& children_ownership,
167 ArrowSchema* dictionary,
169 )
170 {
172 SPARROW_ASSERT_TRUE(children_ownership.size() > 0 ? children != nullptr : children == nullptr);
173 SPARROW_ASSERT_FALSE(format.empty());
174 if (children)
175 {
176 for (size_t i = 0; i < children_ownership.size(); ++i)
177 {
178 SPARROW_ASSERT_FALSE(children[i] == nullptr);
179 }
180 }
181
182 ArrowSchema schema{};
184 schema,
185 std::move(format),
186 std::move(name),
187 std::move(metadata),
188 flags,
189 children,
191 dictionary,
193 );
194 return schema;
195 };
196
198 {
199 using namespace std::literals;
201 "n"sv,
202 ""sv,
203 std::nullopt,
204 std::nullopt,
205 nullptr,
206 std::array<bool, 0>{},
207 nullptr,
208 false
209 );
210 }
211
215 SPARROW_API void swap(ArrowSchema& lhs, ArrowSchema& rhs) noexcept;
216
220 SPARROW_API void copy_schema(const ArrowSchema& source, ArrowSchema& target);
221
228 [[nodiscard]]
229 inline ArrowSchema copy_schema(const ArrowSchema& source)
230 {
231 ArrowSchema target{};
232 copy_schema(source, target);
233 return target;
234 }
235
241 {
242 ArrowSchema target = source;
243 source = ArrowSchema{};
244 return target;
245 }
246
252 {
253 return move_schema(std::move(source));
254 }
255
256 bool SPARROW_API check_compatible_schema(const ArrowSchema& schema1, const ArrowSchema& schema2);
257
259 {
260 SPARROW_API void operator()(ArrowSchema* schema) const;
261 };
262
263 using schema_unique_ptr = std::unique_ptr<ArrowSchema, arrow_schema_deleter>;
264}
265
266#if defined(__cpp_lib_format)
267
268template <>
269struct std::formatter<ArrowSchema>
270{
271 constexpr auto parse(std::format_parse_context& ctx)
272 {
273 return ctx.begin(); // Simple implementation
274 }
275
276 auto format(const ArrowSchema& obj, std::format_context& ctx) const
277 {
278 std::string children_str = std::format("{}", static_cast<void*>(obj.children));
279 for (int i = 0; i < obj.n_children; ++i)
280 {
281 children_str += std::format("\n-{}", static_cast<void*>(obj.children[i]));
282 }
283
284 const std::string format = obj.format ? obj.format : "nullptr";
285 const std::string name = obj.name ? obj.name : "nullptr";
286 const std::string metadata = obj.metadata ? obj.metadata : "nullptr";
287
288 return std::format_to(
289 ctx.out(),
290 "ArrowSchema - ptr address: {}\n- format: {}\n- name: {}\n- metadata: {}\n- flags: {}\n- n_children: {}\n- children: {}\n- dictionary: {}\n- release: {}\n- private_data: {}\n",
291 static_cast<const void*>(&obj),
292 format,
293 name,
294 metadata,
295 obj.flags,
296 obj.n_children,
297 children_str,
298 static_cast<const void*>(obj.dictionary),
299 static_cast<const void*>(std::addressof(obj.release)),
300 obj.private_data
301 );
302 }
303};
304
305inline std::ostream& operator<<(std::ostream& os, const ArrowSchema& value)
306{
307 os << std::format("{}", value);
308 return os;
309}
310
311#endif
Private data for ArrowSchema.
#define SPARROW_API
Definition config.hpp:38
#define SPARROW_ASSERT_TRUE(expr__)
#define SPARROW_ASSERT_FALSE(expr__)
std::ostream & operator<<(std::ostream &stream, primesum::uint128_t n)
The __int128_t type (GCC/Clang) is not well supported by the C++ standard library (in 2016) so we hav...
Definition int128_t.hpp:48
std::string key< ArrowSchema >()
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
SPARROW_API void copy_schema(const ArrowSchema &source, ArrowSchema &target)
Fills the target ArrowSchema with a deep copy of the data from the source ArrowSchema.
SPARROW_API void release_arrow_schema(ArrowSchema *schema)
Release function to use for the ArrowSchema.release member.
std::unique_ptr< ArrowSchema, arrow_schema_deleter > schema_unique_ptr
std::string get_metadata_from_key_values(const T &metadata)
Converts a container of key-value pairs to binary metadata format.
Definition metadata.hpp:367
SPARROW_API void empty_release_arrow_schema(ArrowSchema *schema)
Empty release function to use for the ArrowSchema.release member.
ArrowSchema move_schema(ArrowSchema &&source)
Moves the content of source into a stack-allocated array, and reset the source to an empty ArrowSchem...
bool SPARROW_API check_compatible_schema(const ArrowSchema &schema1, const ArrowSchema &schema2)
void fill_arrow_schema(ArrowSchema &schema, F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
SPARROW_API void swap(ArrowArray &lhs, ArrowArray &rhs) noexcept
Swaps the contents of the two ArrowArray objects.
ArrowSchema make_empty_arrow_schema()
int64_t flags
const char * metadata
int64_t n_children
const char * name
void * private_data
const char * format
struct ArrowSchema * dictionary
void(* release)(struct ArrowSchema *)
struct ArrowSchema ** children
SPARROW_API void operator()(ArrowSchema *schema) const