sparrow 0.9.0
Loading...
Searching...
No Matches
record_batch.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <algorithm>
18#include <initializer_list>
19#include <optional>
20#include <ranges>
21#include <string>
22#include <unordered_map>
23#include <vector>
24
25#include "sparrow/array.hpp"
28
29#if defined(__cpp_lib_format)
31#endif
32
33namespace sparrow
34{
48 {
49 public:
50
51 using name_type = std::string;
52 using size_type = std::size_t;
53 using initializer_type = std::initializer_list<std::pair<name_type, array>>;
54
55 using name_range = std::ranges::ref_view<const std::vector<name_type>>;
56 using column_range = std::ranges::ref_view<const std::vector<array>>;
57
66 template <std::ranges::input_range NR, std::ranges::input_range CR>
67 requires(
68 std::convertible_to<std::ranges::range_value_t<NR>, std::string>
69 and std::same_as<std::ranges::range_value_t<CR>, array>
70 )
71 record_batch(NR&& names, CR&& columns, std::optional<std::string_view> name = std::nullopt);
72
73 /*
74 * Constructs a @ref record_batch from a range of arrays. Each array
75 * must have a name: if \c arr is an array, then \c arr.name(), must
76 * not return an empty string.
77 *
78 * @param comumns An input range of arrays
79 */
80 template <std::ranges::input_range CR>
81 requires std::same_as<std::ranges::range_value_t<CR>, array>
82 record_batch(CR&& columns, std::optional<std::string_view> name = std::nullopt);
83
90
98
101
104
109
115
123 SPARROW_API bool contains_column(const name_type& key) const;
124
131
137 SPARROW_API const array& get_column(const name_type& key) const;
138
144
148 SPARROW_API const std::optional<name_type>& name() const;
149
154
160
167
176
184
185 private:
186
187 template <class U, class R>
188 [[nodiscard]] std::vector<U> to_vector(R&& range) const;
189
190 SPARROW_API void update_array_map_cache() const;
191
192 [[nodiscard]] SPARROW_API bool check_consistency() const;
193
194 std::optional<name_type> m_name;
195 std::vector<name_type> m_name_list;
196 std::vector<array> m_array_list;
197 mutable std::unordered_map<name_type, const array*> m_array_map;
198 mutable bool m_dirty_map = true;
199 };
200
210 bool operator==(const record_batch& lhs, const record_batch& rhs);
211
212 /*******************************
213 * record_batch implementation *
214 *******************************/
215
216 template <std::ranges::input_range NR, std::ranges::input_range CR>
217 requires(std::convertible_to<std::ranges::range_value_t<NR>, std::string>
218 and std::same_as<std::ranges::range_value_t<CR>, array>)
219 record_batch::record_batch(NR&& names, CR&& columns, std::optional<std::string_view> name)
220 : m_name(name)
221 , m_name_list(to_vector<name_type>(std::forward<NR>(names)))
222 , m_array_list(to_vector<array>(std::forward<CR>(columns)))
223 {
224 update_array_map_cache();
225 }
226
227 namespace detail
228 {
229 inline std::vector<record_batch::name_type> get_names(const std::vector<array>& array_list)
230 {
231 const auto names = array_list
232 | std::views::transform(
233 [](const array& ar)
234 {
235 return ar.name().value();
236 }
237 );
238 return {names.begin(), names.end()};
239 }
240 }
241
242 template <std::ranges::input_range CR>
243 requires std::same_as<std::ranges::range_value_t<CR>, array>
244 record_batch::record_batch(CR&& columns, std::optional<std::string_view> name)
245 : m_name(name)
246 , m_name_list(detail::get_names(columns))
247 , m_array_list(to_vector<array>(std::move(columns)))
248 {
249 update_array_map_cache();
250 }
251
252 template <class U, class R>
253 std::vector<U> record_batch::to_vector(R&& range) const
254 {
255 std::vector<U> v;
256 if constexpr (std::ranges::sized_range<decltype(range)>)
257 {
258 v.reserve(std::ranges::size(range));
259 }
260 std::ranges::move(range, std::back_inserter(v));
261 return v;
262 }
263}
264
265#if defined(__cpp_lib_format)
266template <>
267struct std::formatter<sparrow::record_batch>
268{
269 constexpr auto parse(std::format_parse_context& ctx)
270 {
271 return ctx.begin(); // Simple implementation
272 }
273
274 auto format(const sparrow::record_batch& rb, std::format_context& ctx) const
275 {
276 const auto values_by_columns = rb.columns()
277 | std::views::transform(
278 [&rb](const auto& ar)
279 {
280 return std::views::iota(0u, rb.nb_rows())
281 | std::views::transform(
282 [&ar](const auto i)
283 {
284 return ar[i];
285 }
286 );
287 }
288 );
289
290 sparrow::to_table_with_columns(ctx.out(), rb.names(), values_by_columns);
291 return ctx.out();
292 }
293};
294
295inline std::ostream& operator<<(std::ostream& os, const sparrow::record_batch& value)
296{
297 os << std::format("{}", value);
298 return os;
299}
300
301#endif
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:39
SPARROW_API std::optional< std::string_view > name() const
Table-like data structure.
SPARROW_API const std::optional< name_type > & name() const
std::initializer_list< std::pair< name_type, array > > initializer_type
SPARROW_API void add_column(array column)
Appends the array column to the record batch, and maps it to its internal name.
SPARROW_API record_batch & operator=(const record_batch &)
record_batch(NR &&names, CR &&columns, std::optional< std::string_view > name=std::nullopt)
Constructs a record_batch from a range of names and a range of arrays.
SPARROW_API const array & get_column(const name_type &key) const
SPARROW_API record_batch(struct_array &&ar)
Construct a record batch from the given struct array.
SPARROW_API name_range names() const
SPARROW_API record_batch(const record_batch &)
record_batch & operator=(record_batch &&)=default
SPARROW_API bool contains_column(const name_type &key) const
Checks if the record_batch constains a column mapped to the specified name.
SPARROW_API struct_array extract_struct_array()
Moves the internal columns of the record batch into a struct_array object.
SPARROW_API column_range columns() const
SPARROW_API const name_type & get_column_name(size_type index) const
SPARROW_API void add_column(name_type name, array column)
Appends the array column to the record batch, and maps it with name.
SPARROW_API size_type nb_rows() const
SPARROW_API const array & get_column(size_type index) const
SPARROW_API size_type nb_columns() const
record_batch(record_batch &&)=default
SPARROW_API record_batch(initializer_type init)
Constructs a record_batch from a list of std::pair<name_type, array>.
std::ranges::ref_view< const std::vector< array > > column_range
std::ranges::ref_view< const std::vector< name_type > > name_range
#define SPARROW_API
Definition config.hpp:38
std::vector< record_batch::name_type > get_names(const std::vector< array > &array_list)
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
constexpr void to_table_with_columns(OutputIt out, const Headers &headers, const Columns &columns)
Definition format.hpp:139
std::ostream & operator<<(std::ostream &os, const sparrow::nullval_t &)
Definition nullable.hpp:933