sparrow 2.2.0
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
arrow_array_stream_proxy.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <ranges>
18
19#include "sparrow/array.hpp"
20#include "sparrow/array_api.hpp"
26
27namespace sparrow
28{
57 {
58 public:
59
70
83
98
99 // explicit arrow_array_stream_proxy(ArrowSchema* schema_ptr);
100
103
108
116
120 [[nodiscard]] SPARROW_API bool owns_stream() const;
121
137
150 template <std::ranges::input_range R>
152 void push(R&& arrays)
153 {
154 arrow_array_stream_private_data& private_data = get_private_data();
155
156 // Check if we need to create schema from first array
157 if (private_data.schema() == nullptr)
158 {
160 copy_schema(*get_arrow_schema(*std::ranges::begin(arrays)), *schema);
161 private_data.import_schema(std::move(schema));
162 }
163
164 // Validate schema compatibility for all arrays
165 for (const auto& array : arrays)
166 {
167 if (!check_compatible_schema(*private_data.schema(), *get_arrow_schema(array)))
168 {
169 throw std::runtime_error("Incompatible schema when adding array to ArrowArrayStream");
170 }
171 }
172
173 // Import all arrays
174 for (auto&& array : std::forward<R>(arrays))
175 {
176 ArrowArray extracted_array = extract_arrow_array(std::move(array));
178 swap(*array_ptr, extracted_array);
179 private_data.import_array(std::move(array_ptr));
180 }
181 }
182
195 template <layout_or_array A>
196 void push(A&& array)
197 {
198 push(std::ranges::single_view(std::forward<A>(array)));
199 }
200
214 SPARROW_API std::optional<array> pop();
215
216 private:
217
218 std::variant<ArrowArrayStream*, ArrowArrayStream> m_stream;
219
225 [[nodiscard]] ArrowArrayStream* get_stream_ptr();
226
232 [[nodiscard]] const ArrowArrayStream* get_stream_ptr() const;
233
243 void throw_if_immutable() const;
244
250 [[nodiscard]] SPARROW_API arrow_array_stream_private_data& get_private_data();
251 };
252}
Implementation of the Arrow C Stream Interface for streaming data exchange.
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:43
void import_schema(schema_unique_ptr &&out_schema)
void import_array(array_unique_ptr &&array)
arrow_array_stream_proxy(const arrow_array_stream_proxy &)=delete
void push(R &&arrays)
Adds a range of arrays to the stream.
SPARROW_API ArrowArrayStream * export_stream()
Export the stream pointer.
SPARROW_API arrow_array_stream_proxy(ArrowArrayStream &&stream)
Constructs from an existing ArrowArrayStream by taking ownership.
SPARROW_API bool owns_stream() const
Check whether the proxy has ownership of its internal ArrowArrayStream.
SPARROW_API arrow_array_stream_proxy(arrow_array_stream_proxy &&other) noexcept
SPARROW_API ~arrow_array_stream_proxy()
Destructor that releases all resources.
SPARROW_API arrow_array_stream_proxy()
Constructs a new ArrowArrayStream producer.
SPARROW_API std::optional< array > pop()
Retrieves the next array from the stream.
SPARROW_API arrow_array_stream_proxy & operator=(arrow_array_stream_proxy &&other) noexcept
arrow_array_stream_proxy & operator=(const arrow_array_stream_proxy &)=delete
SPARROW_API arrow_array_stream_proxy(ArrowArrayStream *stream)
Constructs from an existing ArrowArrayStream pointer by referencing it.
void push(A &&array)
Adds a single array to the stream.
#define SPARROW_API
Definition config.hpp:38
SPARROW_API void copy_schema(const ArrowSchema &source, ArrowSchema &target)
Fills the target ArrowSchema with a deep copy of the data from the source ArrowSchema.
std::unique_ptr< ArrowSchema, arrow_schema_deleter > schema_unique_ptr
ArrowArray extract_arrow_array(A &&a)
Extracts the internal ArrowArray structure from the given Array or typed layout.
Definition array.hpp:98
std::unique_ptr< ArrowArray, arrow_array_deleter > array_unique_ptr
bool SPARROW_API check_compatible_schema(const ArrowSchema &schema1, const ArrowSchema &schema2)
SPARROW_API void swap(ArrowArray &lhs, ArrowArray &rhs) noexcept
Swaps the contents of the two ArrowArray objects.
ArrowSchema * get_arrow_schema(A &a)
Returns a pointer to the internal ArrowSchema of the given array or layout.
Definition array.hpp:72