sparrow 0.3.0
Loading...
Searching...
No Matches
record_batch.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <algorithm>
18#include <initializer_list>
19#include <ranges>
20#include <string>
21#include <unordered_map>
22#include <vector>
23
24#include "sparrow/array.hpp"
27
28#if defined(__cpp_lib_format)
30#endif
31
32namespace sparrow
33{
47 {
48 public:
49
50 using name_type = std::string;
51 using size_type = std::size_t;
52 using initializer_type = std::initializer_list<std::pair<name_type, array>>;
53
54 using name_range = std::ranges::ref_view<const std::vector<name_type>>;
55 using column_range = std::ranges::ref_view<const std::vector<array>>;
56
65 template <std::ranges::input_range NR, std::ranges::input_range CR>
66 requires(std::convertible_to<std::ranges::range_value_t<NR>, std::string> and std::same_as<std::ranges::range_value_t<CR>, array>)
67 record_batch(NR&& names, CR&& columns);
68
69 /*
70 * Constructs a @ref record_batch from a range of arrays. Each array
71 * must have a name: if \c arr is an array, then \c arr.name(), must
72 * not return an empty string.
73 *
74 * @param comumns An input range of arrays
75 */
76 template <std::ranges::input_range CR>
77 requires std::same_as<std::ranges::range_value_t<CR>, array>
79
86
94
97
100
105
111
119 SPARROW_API bool contains_column(const name_type& key) const;
120
127
133 SPARROW_API const array& get_column(const name_type& key) const;
134
140
145
151
158
167
175
176 private:
177
178 template <class U, class R>
179 [[nodiscard]] std::vector<U> to_vector(R&& range) const;
180
181 SPARROW_API void update_array_map_cache() const;
182
183 [[nodiscard]] SPARROW_API bool check_consistency() const;
184
185 std::vector<name_type> m_name_list;
186 std::vector<array> m_array_list;
187 mutable std::unordered_map<name_type, const array*> m_array_map;
188 mutable bool m_dirty_map = true;
189 };
190
200 bool operator==(const record_batch& lhs, const record_batch& rhs);
201
202 /*******************************
203 * record_batch implementation *
204 *******************************/
205
206 template <std::ranges::input_range NR, std::ranges::input_range CR>
207 requires(std::convertible_to<std::ranges::range_value_t<NR>, std::string>
208 and std::same_as<std::ranges::range_value_t<CR>, array>)
210 : m_name_list(to_vector<name_type>(std::move(names)))
211 , m_array_list(to_vector<array>(std::move(columns)))
212 {
213 update_array_map_cache();
214 }
215
216 namespace detail
217 {
218 inline std::vector<record_batch::name_type> get_names(const std::vector<array>& array_list)
219 {
220 const auto names = array_list
221 | std::views::transform(
222 [](const array& ar)
223 {
224 return ar.name().value();
225 }
226 );
227 return {names.begin(), names.end()};
228 }
229 }
230
231 template <std::ranges::input_range CR>
232 requires std::same_as<std::ranges::range_value_t<CR>, array>
234 : m_name_list(detail::get_names(columns))
235 , m_array_list(to_vector<array>(std::move(columns)))
236 {
237 update_array_map_cache();
238 }
239
240 template <class U, class R>
241 std::vector<U> record_batch::to_vector(R&& range) const
242 {
243 std::vector<U> v;
244 if constexpr (std::ranges::sized_range<decltype(range)>)
245 {
246 v.reserve(std::ranges::size(range));
247 }
248 std::ranges::move(range, std::back_inserter(v));
249 return v;
250 }
251}
252
253#if defined(__cpp_lib_format)
254template <>
255struct std::formatter<sparrow::record_batch>
256{
257 constexpr auto parse(std::format_parse_context& ctx)
258 {
259 return ctx.begin(); // Simple implementation
260 }
261
262 auto format(const sparrow::record_batch& rb, std::format_context& ctx) const
263 {
264 const auto values_by_columns = rb.columns()
265 | std::views::transform(
266 [&rb](const auto& ar)
267 {
268 return std::views::iota(0u, rb.nb_rows())
269 | std::views::transform(
270 [&ar](const auto i)
271 {
272 return ar[i];
273 }
274 );
275 }
276 );
277
278 sparrow::to_table_with_columns(ctx.out(), rb.names(), values_by_columns);
279 return ctx.out();
280 }
281};
282
283inline std::ostream& operator<<(std::ostream& os, const sparrow::record_batch& value)
284{
285 os << std::format("{}", value);
286 return os;
287}
288
289#endif
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:39
SPARROW_API std::optional< std::string_view > name() const
Table-like data structure.
std::initializer_list< std::pair< name_type, array > > initializer_type
SPARROW_API void add_column(array column)
Appends the array column to the record batch, and maps it to its internal name.
SPARROW_API record_batch & operator=(const record_batch &)
SPARROW_API const array & get_column(const name_type &key) const
SPARROW_API record_batch(struct_array &&ar)
Construct a record batch from the given struct array.
SPARROW_API name_range names() const
SPARROW_API record_batch(const record_batch &)
record_batch & operator=(record_batch &&)=default
SPARROW_API bool contains_column(const name_type &key) const
Checks if the record_batch constains a column mapped to the specified name.
SPARROW_API struct_array extract_struct_array()
Moves the internal columns of the record batch into a struct_array object.
record_batch(NR &&names, CR &&columns)
Constructs a record_batch from a range of names and a range of arrays.
SPARROW_API column_range columns() const
SPARROW_API const name_type & get_column_name(size_type index) const
SPARROW_API void add_column(name_type name, array column)
Appends the array column to the record batch, and maps it with name.
SPARROW_API size_type nb_rows() const
SPARROW_API const array & get_column(size_type index) const
SPARROW_API size_type nb_columns() const
record_batch(record_batch &&)=default
SPARROW_API record_batch(initializer_type init)
Constructs a record_batch from a list of std::pair<name_type, array>.
std::ranges::ref_view< const std::vector< array > > column_range
std::ranges::ref_view< const std::vector< name_type > > name_range
#define SPARROW_API
Definition config.hpp:38
std::vector< record_batch::name_type > get_names(const std::vector< array > &array_list)
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
constexpr void to_table_with_columns(OutputIt out, const Headers &headers, const Columns &columns)
Definition format.hpp:139
std::ostream & operator<<(std::ostream &stream, T n)
Definition large_int.hpp:93