sparrow 0.6.0
Loading...
Searching...
No Matches
record_batch.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <algorithm>
18#include <initializer_list>
19#include <ranges>
20#include <string>
21#include <unordered_map>
22#include <vector>
23
24#include "sparrow/array.hpp"
27
28#if defined(__cpp_lib_format)
30#endif
31
32namespace sparrow
33{
47 {
48 public:
49
50 using name_type = std::string;
51 using size_type = std::size_t;
52 using initializer_type = std::initializer_list<std::pair<name_type, array>>;
53
54 using name_range = std::ranges::ref_view<const std::vector<name_type>>;
55 using column_range = std::ranges::ref_view<const std::vector<array>>;
56
65 template <std::ranges::input_range NR, std::ranges::input_range CR>
66 requires(
67 std::convertible_to<std::ranges::range_value_t<NR>, std::string>
68 and std::same_as<std::ranges::range_value_t<CR>, array>
69 )
70 record_batch(NR&& names, CR&& columns);
71
72 /*
73 * Constructs a @ref record_batch from a range of arrays. Each array
74 * must have a name: if \c arr is an array, then \c arr.name(), must
75 * not return an empty string.
76 *
77 * @param comumns An input range of arrays
78 */
79 template <std::ranges::input_range CR>
80 requires std::same_as<std::ranges::range_value_t<CR>, array>
82
89
97
100
103
108
114
122 SPARROW_API bool contains_column(const name_type& key) const;
123
130
136 SPARROW_API const array& get_column(const name_type& key) const;
137
143
148
154
161
170
178
179 private:
180
181 template <class U, class R>
182 [[nodiscard]] std::vector<U> to_vector(R&& range) const;
183
184 SPARROW_API void update_array_map_cache() const;
185
186 [[nodiscard]] SPARROW_API bool check_consistency() const;
187
188 std::vector<name_type> m_name_list;
189 std::vector<array> m_array_list;
190 mutable std::unordered_map<name_type, const array*> m_array_map;
191 mutable bool m_dirty_map = true;
192 };
193
203 bool operator==(const record_batch& lhs, const record_batch& rhs);
204
205 /*******************************
206 * record_batch implementation *
207 *******************************/
208
209 template <std::ranges::input_range NR, std::ranges::input_range CR>
210 requires(std::convertible_to<std::ranges::range_value_t<NR>, std::string>
211 and std::same_as<std::ranges::range_value_t<CR>, array>)
213 : m_name_list(to_vector<name_type>(std::move(names)))
214 , m_array_list(to_vector<array>(std::move(columns)))
215 {
216 update_array_map_cache();
217 }
218
219 namespace detail
220 {
221 inline std::vector<record_batch::name_type> get_names(const std::vector<array>& array_list)
222 {
223 const auto names = array_list
224 | std::views::transform(
225 [](const array& ar)
226 {
227 return ar.name().value();
228 }
229 );
230 return {names.begin(), names.end()};
231 }
232 }
233
234 template <std::ranges::input_range CR>
235 requires std::same_as<std::ranges::range_value_t<CR>, array>
237 : m_name_list(detail::get_names(columns))
238 , m_array_list(to_vector<array>(std::move(columns)))
239 {
240 update_array_map_cache();
241 }
242
243 template <class U, class R>
244 std::vector<U> record_batch::to_vector(R&& range) const
245 {
246 std::vector<U> v;
247 if constexpr (std::ranges::sized_range<decltype(range)>)
248 {
249 v.reserve(std::ranges::size(range));
250 }
251 std::ranges::move(range, std::back_inserter(v));
252 return v;
253 }
254}
255
256#if defined(__cpp_lib_format)
257template <>
258struct std::formatter<sparrow::record_batch>
259{
260 constexpr auto parse(std::format_parse_context& ctx)
261 {
262 return ctx.begin(); // Simple implementation
263 }
264
265 auto format(const sparrow::record_batch& rb, std::format_context& ctx) const
266 {
267 const auto values_by_columns = rb.columns()
268 | std::views::transform(
269 [&rb](const auto& ar)
270 {
271 return std::views::iota(0u, rb.nb_rows())
272 | std::views::transform(
273 [&ar](const auto i)
274 {
275 return ar[i];
276 }
277 );
278 }
279 );
280
281 sparrow::to_table_with_columns(ctx.out(), rb.names(), values_by_columns);
282 return ctx.out();
283 }
284};
285
286inline std::ostream& operator<<(std::ostream& os, const sparrow::record_batch& value)
287{
288 os << std::format("{}", value);
289 return os;
290}
291
292#endif
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:39
SPARROW_API std::optional< std::string_view > name() const
Table-like data structure.
std::initializer_list< std::pair< name_type, array > > initializer_type
SPARROW_API void add_column(array column)
Appends the array column to the record batch, and maps it to its internal name.
SPARROW_API record_batch & operator=(const record_batch &)
SPARROW_API const array & get_column(const name_type &key) const
SPARROW_API record_batch(struct_array &&ar)
Construct a record batch from the given struct array.
SPARROW_API name_range names() const
SPARROW_API record_batch(const record_batch &)
record_batch & operator=(record_batch &&)=default
SPARROW_API bool contains_column(const name_type &key) const
Checks if the record_batch constains a column mapped to the specified name.
SPARROW_API struct_array extract_struct_array()
Moves the internal columns of the record batch into a struct_array object.
SPARROW_API column_range columns() const
SPARROW_API const name_type & get_column_name(size_type index) const
SPARROW_API void add_column(name_type name, array column)
Appends the array column to the record batch, and maps it with name.
SPARROW_API size_type nb_rows() const
SPARROW_API const array & get_column(size_type index) const
SPARROW_API size_type nb_columns() const
record_batch(record_batch &&)=default
record_batch(NR &&names, CR &&columns)
Constructs a record_batch from a range of names and a range of arrays.
SPARROW_API record_batch(initializer_type init)
Constructs a record_batch from a list of std::pair<name_type, array>.
std::ranges::ref_view< const std::vector< array > > column_range
std::ranges::ref_view< const std::vector< name_type > > name_range
#define SPARROW_API
Definition config.hpp:38
std::vector< record_batch::name_type > get_names(const std::vector< array > &array_list)
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
constexpr void to_table_with_columns(OutputIt out, const Headers &headers, const Columns &columns)
Definition format.hpp:139
std::ostream & operator<<(std::ostream &os, const sparrow::nullval_t &)
Definition nullable.hpp:900