sparrow 0.7.0
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages Concepts
arrow_schema.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <array>
18#include <concepts>
19#include <cstdint>
20#include <memory>
21#include <optional>
22#include <ranges>
23#include <unordered_set>
24
26
27
28#if defined(__cpp_lib_format)
29# include <format>
30# include <ostream>
31#endif
32
38
39namespace sparrow
40{
60 template <class F, class N, input_metadata_container M = std::vector<metadata_pair>, std::ranges::input_range CHILDREN_OWNERSHIP>
61 requires std::constructible_from<arrow_schema_private_data::FormatType, F>
62 && std::constructible_from<arrow_schema_private_data::NameType, N>
63 && std::is_same_v<std::ranges::range_value_t<CHILDREN_OWNERSHIP>, bool>
64 [[nodiscard]] ArrowSchema make_arrow_schema(
65 F format,
66 N name,
67 std::optional<M> metadata,
68 std::optional<std::unordered_set<ArrowFlag>> flags,
69 ArrowSchema** children,
70 const CHILDREN_OWNERSHIP& children_ownership,
71 ArrowSchema* dictionary,
73 );
74
79
85
86 template <class F, class N, input_metadata_container M = std::vector<metadata_pair>, std::ranges::input_range CHILDREN_OWNERSHIP>
87 requires std::constructible_from<arrow_schema_private_data::FormatType, F>
88 && std::constructible_from<arrow_schema_private_data::NameType, N>
89 && std::is_same_v<std::ranges::range_value_t<CHILDREN_OWNERSHIP>, bool>
91 ArrowSchema& schema,
92 F format,
93 N name,
94 std::optional<M> metadata,
95 std::optional<std::unordered_set<ArrowFlag>> flags,
96 ArrowSchema** children,
97 const CHILDREN_OWNERSHIP& children_ownership,
98 ArrowSchema* dictionary,
100 )
101 {
103 SPARROW_ASSERT_TRUE(children_ownership.size() > 0 ? children != nullptr : children == nullptr);
104 SPARROW_ASSERT_FALSE(format.empty());
105 if (children)
106 {
107 for (size_t i = 0; i < children_ownership.size(); ++i)
108 {
109 SPARROW_ASSERT_FALSE(children[i] == nullptr);
110 }
111 }
112
113 schema.flags = 0;
114 if (flags.has_value())
115 {
116 for (const auto& flag : *flags)
117 {
118 schema.flags |= static_cast<int64_t>(flag);
119 }
120 }
121 schema.n_children = static_cast<int64_t>(children_ownership.size());
122
123 std::optional<std::string> metadata_str = metadata.has_value()
124 ? std::make_optional(get_metadata_from_key_values(*metadata
125 ))
126 : std::nullopt;
127
129 std::move(format),
130 std::move(name),
131 std::move(metadata_str),
134 );
135
136 const auto private_data = static_cast<arrow_schema_private_data*>(schema.private_data);
137 schema.format = private_data->format_ptr();
138 schema.name = private_data->name_ptr();
139 schema.metadata = private_data->metadata_ptr();
140 schema.children = children;
141 schema.dictionary = dictionary;
143 }
144
145 template <class F, class N, input_metadata_container M, std::ranges::input_range CHILDREN_OWNERSHIP>
146 requires std::constructible_from<arrow_schema_private_data::FormatType, F>
147 && std::constructible_from<arrow_schema_private_data::NameType, N>
148 && std::is_same_v<std::ranges::range_value_t<CHILDREN_OWNERSHIP>, bool>
150 F format,
151 N name,
152 std::optional<M> metadata,
153 std::optional<std::unordered_set<ArrowFlag>> flags,
154 ArrowSchema** children,
155 const CHILDREN_OWNERSHIP& children_ownership,
156 ArrowSchema* dictionary,
158 )
159 {
161 SPARROW_ASSERT_TRUE(children_ownership.size() > 0 ? children != nullptr : children == nullptr);
162 SPARROW_ASSERT_FALSE(format.empty());
163 if (children)
164 {
165 for (size_t i = 0; i < children_ownership.size(); ++i)
166 {
167 SPARROW_ASSERT_FALSE(children[i] == nullptr);
168 }
169 }
170
171 ArrowSchema schema{};
173 schema,
174 std::move(format),
175 std::move(name),
176 std::move(metadata),
177 flags,
178 children,
180 dictionary,
182 );
183 return schema;
184 };
185
187 {
188 using namespace std::literals;
190 "n"sv,
191 ""sv,
192 std::nullopt,
193 std::nullopt,
194 nullptr,
195 std::array<bool, 0>{},
196 nullptr,
197 false
198 );
199 }
200
205
209 SPARROW_API void copy_schema(const ArrowSchema& source, ArrowSchema& target);
210
217 [[nodiscard]]
218 inline ArrowSchema copy_schema(const ArrowSchema& source)
219 {
220 ArrowSchema target{};
221 copy_schema(source, target);
222 return target;
223 }
224
230 {
232 swap(source, target);
233 source.release(&source);
234 return target;
235 }
236
242 {
243 return move_schema(std::move(source));
244 }
245}
246
247#if defined(__cpp_lib_format)
248
249template <>
250struct std::formatter<ArrowSchema>
251{
252 constexpr auto parse(std::format_parse_context& ctx)
253 {
254 return ctx.begin(); // Simple implementation
255 }
256
257 auto format(const ArrowSchema& obj, std::format_context& ctx) const
258 {
259 std::string children_str = std::format("{}", static_cast<void*>(obj.children));
260 for (int i = 0; i < obj.n_children; ++i)
261 {
262 children_str += std::format("\n-{}", static_cast<void*>(obj.children[i]));
263 }
264
265 const std::string format = obj.format ? obj.format : "nullptr";
266 const std::string name = obj.name ? obj.name : "nullptr";
267 const std::string metadata = obj.metadata ? obj.metadata : "nullptr";
268
269 return std::format_to(
270 ctx.out(),
271 "ArrowSchema - ptr address: {}\n- format: {}\n- name: {}\n- metadata: {}\n- flags: {}\n- n_children: {}\n- children: {}\n- dictionary: {}\n- release: {}\n- private_data: {}\n",
272 static_cast<const void*>(&obj),
273 format,
274 name,
275 metadata,
276 obj.flags,
277 obj.n_children,
278 children_str,
279 static_cast<const void*>(obj.dictionary),
280 static_cast<const void*>(std::addressof(obj.release)),
281 obj.private_data
282 );
283 }
284};
285
286inline std::ostream& operator<<(std::ostream& os, const ArrowSchema& value)
287{
288 os << std::format("{}", value);
289 return os;
290}
291
292#endif
Private data for ArrowSchema.
#define SPARROW_API
Definition config.hpp:38
#define SPARROW_ASSERT_TRUE(expr__)
#define SPARROW_ASSERT_FALSE(expr__)
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
SPARROW_API void copy_schema(const ArrowSchema &source, ArrowSchema &target)
Fills the target ArrowSchema with a deep copy of the data from the source ArrowSchema.
SPARROW_API void release_arrow_schema(ArrowSchema *schema)
Release function to use for the ArrowSchema.release member.
SPARROW_API void swap(ArrowArray &lhs, ArrowArray &rhs)
Swaps the contents of the two ArrowArray objects.
std::string get_metadata_from_key_values(const T &metadata)
Definition metadata.hpp:115
SPARROW_API void empty_release_arrow_schema(ArrowSchema *schema)
Empty release function to use for the ArrowSchema.release member.
ArrowSchema move_schema(ArrowSchema &&source)
Moves the content of source into a stack-allocated array, and reset the source to an empty ArrowSchem...
void fill_arrow_schema(ArrowSchema &schema, F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
ArrowSchema make_empty_arrow_schema()
std::ostream & operator<<(std::ostream &os, const sparrow::nullval_t &)
Definition nullable.hpp:900
int64_t flags
const char * metadata
int64_t n_children
const char * name
void * private_data
const char * format
struct ArrowSchema * dictionary
void(* release)(struct ArrowSchema *)
struct ArrowSchema ** children