sparrow 0.9.0
Loading...
Searching...
No Matches
struct_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <ranges>
18#include <string_view>
19#include <version>
20
22
23#if defined(__cpp_lib_format)
25#endif
26
27#include "sparrow/array_api.hpp"
37
38namespace sparrow
39{
40 class struct_array;
41
42 namespace detail
43 {
44 template <>
46 {
47 [[nodiscard]] static constexpr sparrow::data_type get()
48 {
50 }
51 };
52 }
53
54 template <>
66
72 template <class T>
73 constexpr bool is_struc_array_v = std::same_as<T, struct_array>;
74
123 class struct_array final : public array_bitmap_base<struct_array>
124 {
125 public:
126
130 using value_iterator = typename inner_types::value_iterator;
131 using const_value_iterator = typename inner_types::const_value_iterator;
133
136
138
142
146
160
176 template <class... Args>
178 explicit struct_array(Args&&... args)
179 : struct_array(create_proxy(std::forward<Args>(args)...))
180 {
181 }
182
194
207
210
219 [[nodiscard]] SPARROW_API size_type children_count() const;
220
231 [[nodiscard]] SPARROW_API const array_wrapper* raw_child(std::size_t i) const;
232
243 [[nodiscard]] SPARROW_API array_wrapper* raw_child(std::size_t i);
244
245 private:
246
268 template <
269 std::ranges::input_range CHILDREN_RANGE,
271 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
272 requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
273 [[nodiscard]] static auto create_proxy(
274 CHILDREN_RANGE&& children,
275 VB&& bitmaps,
276 std::optional<std::string_view> name = std::nullopt,
277 std::optional<METADATA_RANGE> metadata = std::nullopt
278 ) -> arrow_proxy;
279
297 template <std::ranges::input_range CHILDREN_RANGE, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
298 requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
299 [[nodiscard]] static auto create_proxy(
300 CHILDREN_RANGE&& children,
301 bool nullable = true,
302 std::optional<std::string_view> name = std::nullopt,
303 std::optional<METADATA_RANGE> metadata = std::nullopt
304 ) -> arrow_proxy;
305
325 template <std::ranges::input_range CHILDREN_RANGE, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
326 requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
327 [[nodiscard]] static auto create_proxy_impl(
328 CHILDREN_RANGE&& children,
329 std::optional<validity_bitmap>&& bitmap,
330 std::optional<std::string_view> name = std::nullopt,
331 std::optional<METADATA_RANGE> metadata = std::nullopt
332 ) -> arrow_proxy;
333
334 using children_type = std::vector<cloning_ptr<array_wrapper>>;
335
343 [[nodiscard]] SPARROW_API value_iterator value_begin();
344
352 [[nodiscard]] SPARROW_API value_iterator value_end();
353
361 [[nodiscard]] SPARROW_API const_value_iterator value_cbegin() const;
362
370 [[nodiscard]] SPARROW_API const_value_iterator value_cend() const;
371
381 [[nodiscard]] SPARROW_API inner_reference value(size_type i);
382
392 [[nodiscard]] SPARROW_API inner_const_reference value(size_type i) const;
393
402 [[nodiscard]] SPARROW_API children_type make_children();
403
404 // data members
405 children_type m_children;
406
407 // friend classes
408 friend class array_crtp_base<self_type>;
409
410 // needs access to this->value(i)
413 };
414
415 template <std::ranges::input_range CHILDREN_RANGE, validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
416 requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
417 auto struct_array::create_proxy(
418 CHILDREN_RANGE&& children,
419 VB&& validity_input,
420 std::optional<std::string_view> name,
421 std::optional<METADATA_RANGE> metadata
422 ) -> arrow_proxy
423 {
424 const auto size = children.empty() ? 0 : children[0].size();
425 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
426 return create_proxy_impl(
427 std::forward<CHILDREN_RANGE>(children),
428 std::move(vbitmap),
429 std::move(name),
430 std::move(metadata)
431 );
432 }
433
434 template <std::ranges::input_range CHILDREN_RANGE, input_metadata_container METADATA_RANGE>
435 requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
436 auto struct_array::create_proxy(
437 CHILDREN_RANGE&& children,
438 bool nullable,
439 std::optional<std::string_view> name,
440 std::optional<METADATA_RANGE> metadata
441 ) -> arrow_proxy
442 {
443 const size_t size = children.empty() ? 0 : children[0].size();
444 return create_proxy_impl(
445 std::forward<CHILDREN_RANGE>(children),
446 nullable ? std::make_optional<validity_bitmap>(nullptr, size) : std::nullopt,
447 std::move(name),
448 std::move(metadata)
449 );
450 }
451
452 template <std::ranges::input_range CHILDREN_RANGE, input_metadata_container METADATA_RANGE>
453 requires std::same_as<std::ranges::range_value_t<CHILDREN_RANGE>, array>
454 auto struct_array::create_proxy_impl(
455 CHILDREN_RANGE&& children,
456 std::optional<validity_bitmap>&& bitmap,
457 std::optional<std::string_view> name,
458 std::optional<METADATA_RANGE> metadata
459 ) -> arrow_proxy
460 {
461 const auto n_children = children.size();
462 ArrowSchema** child_schemas = new ArrowSchema*[n_children];
463 ArrowArray** child_arrays = new ArrowArray*[n_children];
464
465 const auto size = children.empty() ? 0 : children[0].size();
466
467 for (std::size_t i = 0; i < n_children; ++i)
468 {
469 auto& child = children[i];
470 SPARROW_ASSERT_TRUE(child.size() == size);
471 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(child));
472 child_arrays[i] = new ArrowArray(std::move(flat_arr));
473 child_schemas[i] = new ArrowSchema(std::move(flat_schema));
474 }
475
476 const bool bitmap_has_value = bitmap.has_value();
477 const auto null_count = bitmap_has_value ? bitmap->null_count() : 0;
478 const auto flags = bitmap_has_value
479 ? std::make_optional<std::unordered_set<sparrow::ArrowFlag>>({ArrowFlag::NULLABLE})
480 : std::nullopt;
481
482 ArrowSchema schema = make_arrow_schema(
483 std::string("+s"), // format
484 std::move(name), // name
485 std::move(metadata), // metadata
486 flags, // flags,
487 child_schemas, // children
488 repeat_view<bool>(true, n_children), // children_ownership
489 nullptr, // dictionary
490 true // dictionary ownership
491 );
492
493 buffer<uint8_t> bitmap_buffer = bitmap_has_value ? std::move(*bitmap).extract_storage()
494 : buffer<uint8_t>{nullptr, 0};
495
496 std::vector<buffer<std::uint8_t>> arr_buffs(1);
497 arr_buffs[0] = std::move(bitmap_buffer);
498
499 ArrowArray arr = make_arrow_array(
500 static_cast<std::int64_t>(size), // length
501 static_cast<std::int64_t>(null_count), // null_count
502 0, // offset
503 std::move(arr_buffs),
504 child_arrays, // children
505 repeat_view<bool>(true, n_children), // children_ownership
506 nullptr, // dictionary
507 true // dictionary ownership
508 );
509 return arrow_proxy{std::move(arr), std::move(schema)};
510 }
511}
512
513#if defined(__cpp_lib_format)
514
515
516template <>
517struct std::formatter<sparrow::struct_array>
518{
519 constexpr auto parse(std::format_parse_context& ctx)
520 {
521 return ctx.begin();
522 }
523
524 SPARROW_API auto format(const sparrow::struct_array& struct_array, std::format_context& ctx) const
525 -> decltype(ctx.out());
526};
527
528namespace sparrow
529{
530 SPARROW_API std::ostream& operator<<(std::ostream& os, const sparrow::struct_array& value);
531}
532
533#endif
void struct_array()
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
Base class for array type erasure.
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:39
Object that owns a piece of contiguous memory.
Definition buffer.hpp:112
A view that repeats a value a given number of times.
array_inner_types< self_type > inner_types
SPARROW_API struct_array(const struct_array &rhs)
Copy constructor.
SPARROW_API struct_array(arrow_proxy proxy)
Constructs struct array from Arrow proxy.
SPARROW_API array_wrapper * raw_child(std::size_t i)
Gets mutable pointer to child array at specified index.
SPARROW_API const array_wrapper * raw_child(std::size_t i) const
Gets const pointer to child array at specified index.
nullable< inner_value_type > value_type
nullable< inner_const_reference, bitmap_const_reference > const_reference
struct_value inner_reference
struct_array(Args &&... args)
Generic constructor for creating struct array from various inputs.
typename base_type::size_type size_type
typename inner_types::value_iterator value_iterator
base_type::iterator_tag iterator_tag
struct_value inner_value_type
typename base_type::bitmap_type bitmap_type
struct_array(struct_array &&)=default
struct_array & operator=(struct_array &&)=default
struct_value inner_const_reference
SPARROW_API size_type children_count() const
Gets the number of child arrays (fields).
base_type::const_bitmap_range const_bitmap_range
SPARROW_API struct_array & operator=(const struct_array &rhs)
Copy assignment operator.
typename base_type::bitmap_const_reference bitmap_const_reference
typename inner_types::const_value_iterator const_value_iterator
array_bitmap_base< self_type > base_type
Concept for input containers that can provide metadata pairs.
Definition metadata.hpp:291
Concept defining valid input types for validity bitmap creation.
#define SPARROW_API
Definition config.hpp:38
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
Definition mp_utils.hpp:216
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
array_bitmap_base_impl< D, false > array_bitmap_base
Convenient alias for arrays with immutable validity bitmaps.
constexpr bool is_struc_array_v
Type trait to check if a type is a struct_array.
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:91
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
std::ostream & operator<<(std::ostream &os, const sparrow::nullval_t &)
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
std::random_access_iterator_tag iterator_tag
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Metafunction for retrieving the data_type of a typed array.