sparrow 1.2.0
Loading...
Searching...
No Matches
map_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include "sparrow/array_api.hpp"
25
26namespace sparrow
27{
28 class map_array;
29
30 template <>
42
43 template <class T>
44 constexpr bool is_map_array_v = std::same_as<T, map_array>;
45
46 namespace detail
47 {
48 template <>
50 {
51 [[nodiscard]] static constexpr sparrow::data_type get()
52 {
54 }
55 };
56 }
57
87 class map_array final : public array_bitmap_base<map_array>
88 {
89 public:
90
94 using value_iterator = inner_types::value_iterator;
95 using const_value_iterator = inner_types::const_value_iterator;
97 using offset_type = const std::int32_t;
99
102
104
105 using inner_value_type = inner_types::inner_value_type;
106 using inner_reference = inner_types::inner_reference;
107 using inner_const_reference = inner_types::inner_const_reference;
108
112
127
143 template <class... Args>
145 explicit map_array(Args&&... args)
146 : self_type(create_proxy(std::forward<Args>(args)...))
147 {
148 }
149
160
173
174 map_array(map_array&&) noexcept = default;
175 map_array& operator=(map_array&&) noexcept = default;
176
184 [[nodiscard]] SPARROW_API const array_wrapper* raw_keys_array() const;
185
194
202 [[nodiscard]] SPARROW_API const array_wrapper* raw_items_array() const;
203
212
230 template <std::ranges::range SIZES_RANGE>
231 [[nodiscard]] static auto offset_from_sizes(SIZES_RANGE&& sizes) -> offset_buffer_type;
232
233 private:
234
242 [[nodiscard]] SPARROW_API value_iterator value_begin();
243
251 [[nodiscard]] SPARROW_API value_iterator value_end();
252
260 [[nodiscard]] SPARROW_API const_value_iterator value_cbegin() const;
261
269 [[nodiscard]] SPARROW_API const_value_iterator value_cend() const;
270
280 [[nodiscard]] SPARROW_API inner_reference value(size_type i);
281
291 [[nodiscard]] SPARROW_API inner_const_reference value(size_type i) const;
292
301 [[nodiscard]] SPARROW_API offset_type* make_list_offsets() const;
302
311 [[nodiscard]] SPARROW_API cloning_ptr<array_wrapper> make_entries_array() const;
312
320 [[nodiscard]] SPARROW_API bool get_keys_sorted() const;
321
337 [[nodiscard]] SPARROW_API static bool
338 check_keys_sorted(const array& flat_keys, const offset_buffer_type& offsets);
339
358 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
359 [[nodiscard]] static arrow_proxy create_proxy_impl(
360 array&& flat_keys,
361 array&& flat_items,
362 offset_buffer_type&& list_offsets,
363 buffer<std::uint8_t>&& validity_buffer,
364 std::int64_t null_count,
365 std::optional<std::unordered_set<ArrowFlag>> flags,
366 std::optional<std::string_view> name,
367 std::optional<METADATA_RANGE> metadata
368 );
369
393 template <
395 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
396 [[nodiscard]] static arrow_proxy create_proxy(
397 array&& flat_keys,
398 array&& flat_items,
399 offset_buffer_type&& list_offsets,
400 VB&& validity_input,
401 std::optional<std::string_view> name = std::nullopt,
402 std::optional<METADATA_RANGE> metadata = std::nullopt
403 );
404
427 template <
429 std::ranges::input_range OFFSET_BUFFER_RANGE,
430 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
431 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
432 [[nodiscard]] static arrow_proxy create_proxy(
433 array&& flat_keys,
434 array&& flat_items,
435 OFFSET_BUFFER_RANGE&& list_offsets_range,
436 VB&& validity_input,
437 std::optional<std::string_view> name = std::nullopt,
438 std::optional<METADATA_RANGE> metadata = std::nullopt
439 )
440 {
441 offset_buffer_type list_offsets{std::move(list_offsets_range)};
442 return map_array::create_proxy(
443 std::move(flat_keys),
444 std::move(flat_items),
445 std::move(list_offsets),
446 std::forward<VB>(validity_input),
447 std::forward<std::optional<std::string_view>>(name),
448 std::forward<std::optional<METADATA_RANGE>>(metadata)
449 );
450 }
451
452 /*
453 * @brief Creates Arrow proxy from keys, values, offsets, and nullable flag.
454 *
455 * @tparam METADATA_RANGE Type of metadata container
456 * @param flat_keys Array containing all map keys
457 * @param flat_values Array containing all map values
458 * @param list_offsets Buffer of offsets indicating map boundaries
459 * @param nullable Whether the array should support null values
460 * @param name Optional name for the array
461 * @param metadata Optional metadata for the array
462 * @return Arrow proxy containing the map array data and schema
463 *
464 * @pre flat_keys must be a valid array
465 * @pre flat_values must be a valid array
466 * @pre flat_keys.size() must equal flat_values.size()
467 * @pre list_offsets.size() must be >= 1
468 * @pre Last offset must not exceed flat_keys.size()
469 * @post If nullable is true, array supports null values (though none initially set)
470 * @post If nullable is false, array does not support null values
471 * @post Keys sorted flag is determined from actual key ordering
472 */
473 template <
475 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
476 [[nodiscard]] static arrow_proxy create_proxy(
477 array&& flat_keys,
478 array&& flat_values,
479 offset_buffer_type&& list_offsets,
480 bool nullable = true,
481 std::optional<std::string_view> name = std::nullopt,
482 std::optional<METADATA_RANGE> metadata = std::nullopt
483 );
484
507 template <
509 std::ranges::input_range OFFSET_BUFFER_RANGE,
510 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
511 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
512 [[nodiscard]] static arrow_proxy create_proxy(
513 array&& flat_keys,
514 array&& flat_items,
515 OFFSET_BUFFER_RANGE&& list_offsets_range,
516 bool nullable = true,
517 std::optional<std::string_view> name = std::nullopt,
518 std::optional<METADATA_RANGE> metadata = std::nullopt
519 )
520 {
521 offset_buffer_type list_offsets{std::move(list_offsets_range)};
522 return map_array::create_proxy(
523 std::move(flat_keys),
524 std::move(flat_items),
525 std::move(list_offsets),
526 nullable,
527 std::forward<std::optional<std::string_view>>(name),
528 std::forward<std::optional<METADATA_RANGE>>(metadata)
529 );
530 }
531
532 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
533 offset_type* p_list_offsets;
534
535 cloning_ptr<array_wrapper> p_entries_array;
536 bool m_keys_sorted;
537
538 // friend classes
539 friend class array_crtp_base<map_array>;
541 };
542
543 template <std::ranges::range SIZES_RANGE>
545 {
547 std::forward<SIZES_RANGE>(sizes)
548 );
549 }
550
551 template <input_metadata_container METADATA_RANGE>
552 arrow_proxy map_array::create_proxy_impl(
553 array&& flat_keys,
554 array&& flat_items,
555 offset_buffer_type&& list_offsets,
556 buffer<std::uint8_t>&& validity_buffer,
557 std::int64_t null_count,
558 std::optional<std::unordered_set<ArrowFlag>> flags,
559 std::optional<std::string_view> name,
560 std::optional<METADATA_RANGE> metadata
561 )
562 {
563 const auto size = list_offsets.size() - 1;
564
565 std::array<sparrow::array, 2> struct_children = {std::move(flat_keys), std::move(flat_items)};
566 struct_array entries(std::move(struct_children), false, std::string("entries"));
567
568 auto [entries_arr, entries_schema] = extract_arrow_structures(std::move(entries));
569
570 const repeat_view<bool> children_ownership{true, 1};
571
572 ArrowSchema schema = make_arrow_schema(
573 std::string("+m"),
574 name, // name
575 metadata, // metadata
576 flags, // flags,
577 new ArrowSchema*[1]{new ArrowSchema(std::move(entries_schema))},
578 children_ownership, // children ownership
579 nullptr, // dictionary
580 true // dictionary ownership
581
582 );
583
584 std::vector<buffer<std::uint8_t>> arr_buffs = {
585 std::move(validity_buffer),
586 std::move(list_offsets).extract_storage()
587 };
588
589 ArrowArray arr = make_arrow_array(
590 static_cast<std::int64_t>(size), // length
591 null_count,
592 0, // offset
593 std::move(arr_buffs),
594 new ArrowArray*[1]{new ArrowArray(std::move(entries_arr))},
595 children_ownership, // children ownership
596 nullptr, // dictionary
597 true // dictionary ownership
598 );
599 return arrow_proxy{std::move(arr), std::move(schema)};
600 }
601
602 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
603 arrow_proxy map_array::create_proxy(
604 array&& flat_keys,
605 array&& flat_items,
606 offset_buffer_type&& list_offsets,
607 VB&& validity_input,
608 std::optional<std::string_view> name,
609 std::optional<METADATA_RANGE> metadata
610 )
611 {
612 const auto size = list_offsets.size() - 1;
613 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
614
615 std::optional<std::unordered_set<ArrowFlag>> flags{{ArrowFlag::NULLABLE}};
616 bool keys_sorted = check_keys_sorted(flat_keys, list_offsets);
617 if (keys_sorted)
618 {
619 flags.value().insert(ArrowFlag::MAP_KEYS_SORTED);
620 }
621
622 const auto null_count = vbitmap.null_count();
623 buffer<std::uint8_t> validity_buffer = std::move(vbitmap).extract_storage();
624
625 return create_proxy_impl(
626 std::move(flat_keys),
627 std::move(flat_items),
628 std::move(list_offsets),
629 std::move(validity_buffer),
630 static_cast<std::int64_t>(null_count),
631 std::move(flags),
632 name,
633 std::forward<std::optional<METADATA_RANGE>>(metadata)
634 );
635 }
636
637 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
638 arrow_proxy map_array::create_proxy(
639 array&& flat_keys,
640 array&& flat_items,
641 offset_buffer_type&& list_offsets,
642 bool nullable,
643 std::optional<std::string_view> name,
644 std::optional<METADATA_RANGE> metadata
645 )
646 {
647 if (nullable)
648 {
649 return map_array::create_proxy(
650 std::move(flat_keys),
651 std::move(flat_items),
652 std::move(list_offsets),
654 name,
655 metadata
656 );
657 }
658 else
659 {
660 bool keys_sorted = check_keys_sorted(flat_keys, list_offsets);
661 auto flags = keys_sorted
662 ? std::optional<std::unordered_set<ArrowFlag>>{{ArrowFlag::MAP_KEYS_SORTED}}
663 : std::nullopt;
664
665 return create_proxy_impl(
666 std::move(flat_keys),
667 std::move(flat_items),
668 std::move(list_offsets),
669 buffer<std::uint8_t>{nullptr, 0}, // no validity bitmap
670 0, // null_count
671 std::move(flags),
672 name,
673 std::forward<std::optional<METADATA_RANGE>>(metadata)
674 );
675 }
676 }
677}
void struct_array()
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
Base class for array type erasure.
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:41
Object that owns a piece of contiguous memory.
Definition buffer.hpp:113
Smart pointer behaving like a copiable std::unique_ptr.
Definition memory.hpp:126
inner_types::inner_const_reference inner_const_reference
inner_types::const_value_iterator const_value_iterator
Definition map_array.hpp:95
typename base_type::iterator_tag iterator_tag
nullable< inner_value_type > value_type
SPARROW_API map_array & operator=(const self_type &rhs)
Copy assignment operator.
SPARROW_API map_array(const self_type &rhs)
Copy constructor.
inner_types::inner_value_type inner_value_type
map_array(Args &&... args)
Generic constructor for creating map array from various inputs.
map_array self_type
Definition map_array.hpp:91
typename base_type::const_bitmap_range const_bitmap_range
SPARROW_API const array_wrapper * raw_items_array() const
Gets read-only access to the values array.
inner_types::value_iterator value_iterator
Definition map_array.hpp:94
static auto offset_from_sizes(SIZES_RANGE &&sizes) -> offset_buffer_type
Creates offset buffer from map sizes.
map_array(map_array &&) noexcept=default
SPARROW_API const array_wrapper * raw_keys_array() const
Gets read-only access to the keys array.
inner_types::inner_reference inner_reference
const std::int32_t offset_type
Definition map_array.hpp:97
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
Definition map_array.hpp:98
array_bitmap_base< self_type > base_type
Definition map_array.hpp:92
typename base_type::bitmap_type bitmap_type
SPARROW_API map_array(arrow_proxy proxy)
Constructs map array from Arrow proxy.
nullable< inner_const_reference, bitmap_const_reference > const_reference
array_inner_types< self_type > inner_types
Definition map_array.hpp:93
typename base_type::size_type size_type
Definition map_array.hpp:96
typename base_type::bitmap_const_reference bitmap_const_reference
This buffer class is used as storage buffer for all sparrow arrays.
Concept for input containers that can provide metadata pairs.
Definition metadata.hpp:304
Concept defining valid input types for validity bitmap creation.
#define SPARROW_API
Definition config.hpp:38
constexpr sparrow::u8_buffer< OFFSET_TYPE > offset_buffer_from_sizes(SIZES_RANGE &&sizes)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
Definition mp_utils.hpp:216
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
array_bitmap_base_impl< D, false > array_bitmap_base
Convenient alias for arrays with immutable validity bitmaps.
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:98
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
constexpr bool is_map_array_v
Definition map_array.hpp:44
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
std::pair< metadata_key, metadata_value > metadata_pair
Type alias for metadata key-value pairs.
Definition metadata.hpp:61
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
Definition map_array.hpp:37
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
Definition map_array.hpp:38
std::random_access_iterator_tag iterator_tag
Definition map_array.hpp:40
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Metafunction for retrieving the data_type of a typed array.