sparrow ..
Loading...
Searching...
No Matches
map_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include "sparrow/array_api.hpp"
25
26namespace sparrow
27{
28 class map_array;
29
30 template <>
42
43 template <class T>
44 constexpr bool is_map_array_v = std::same_as<T, map_array>;
45
46 namespace detail
47 {
48 template <>
50 {
51 [[nodiscard]] static constexpr sparrow::data_type get()
52 {
54 }
55 };
56 }
57
87 class map_array final : public array_bitmap_base<map_array>
88 {
89 public:
90
94 using value_iterator = inner_types::value_iterator;
95 using const_value_iterator = inner_types::const_value_iterator;
97 using offset_type = const std::int32_t;
99
102
104
105 using inner_value_type = inner_types::inner_value_type;
106 using inner_reference = inner_types::inner_reference;
107 using inner_const_reference = inner_types::inner_const_reference;
108
112
127
143 template <class... Args>
145 explicit map_array(Args&&... args)
146 : self_type(create_proxy(std::forward<Args>(args)...))
147 {
148 }
149
160
173
174 map_array(map_array&&) noexcept = default;
175 map_array& operator=(map_array&&) noexcept = default;
176
184 [[nodiscard]] SPARROW_API const array_wrapper* raw_keys_array() const;
185
194
202 [[nodiscard]] SPARROW_API const array_wrapper* raw_items_array() const;
203
212
230 template <std::ranges::range SIZES_RANGE>
231 [[nodiscard]] static auto offset_from_sizes(SIZES_RANGE&& sizes) -> offset_buffer_type;
232
233 private:
234
242 [[nodiscard]] SPARROW_API value_iterator value_begin();
243
251 [[nodiscard]] SPARROW_API value_iterator value_end();
252
260 [[nodiscard]] SPARROW_API const_value_iterator value_cbegin() const;
261
269 [[nodiscard]] SPARROW_API const_value_iterator value_cend() const;
270
280 [[nodiscard]] SPARROW_API inner_reference value(size_type i);
281
291 [[nodiscard]] SPARROW_API inner_const_reference value(size_type i) const;
292
301 [[nodiscard]] SPARROW_API offset_type* make_list_offsets() const;
302
311 [[nodiscard]] SPARROW_API cloning_ptr<array_wrapper> make_entries_array() const;
312
320 [[nodiscard]] SPARROW_API bool get_keys_sorted() const;
321
337 [[nodiscard]] SPARROW_API static bool
338 check_keys_sorted(const array& flat_keys, const offset_buffer_type& offsets);
339
363 template <
365 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
366 [[nodiscard]] static arrow_proxy create_proxy(
367 array&& flat_keys,
368 array&& flat_items,
369 offset_buffer_type&& list_offsets,
370 VB&& validity_input,
371 std::optional<std::string_view> name = std::nullopt,
372 std::optional<METADATA_RANGE> metadata = std::nullopt
373 );
374
397 template <
399 std::ranges::input_range OFFSET_BUFFER_RANGE,
400 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
401 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
402 [[nodiscard]] static arrow_proxy create_proxy(
403 array&& flat_keys,
404 array&& flat_items,
405 OFFSET_BUFFER_RANGE&& list_offsets_range,
406 VB&& validity_input,
407 std::optional<std::string_view> name = std::nullopt,
408 std::optional<METADATA_RANGE> metadata = std::nullopt
409 )
410 {
411 offset_buffer_type list_offsets{std::move(list_offsets_range)};
412 return map_array::create_proxy(
413 std::move(flat_keys),
414 std::move(flat_items),
415 std::move(list_offsets),
416 std::forward<VB>(validity_input),
417 std::forward<std::optional<std::string_view>>(name),
418 std::forward<std::optional<METADATA_RANGE>>(metadata)
419 );
420 }
421
422 /*
423 * @brief Creates Arrow proxy from keys, values, offsets, and nullable flag.
424 *
425 * @tparam METADATA_RANGE Type of metadata container
426 * @param flat_keys Array containing all map keys
427 * @param flat_values Array containing all map values
428 * @param list_offsets Buffer of offsets indicating map boundaries
429 * @param nullable Whether the array should support null values
430 * @param name Optional name for the array
431 * @param metadata Optional metadata for the array
432 * @return Arrow proxy containing the map array data and schema
433 *
434 * @pre flat_keys must be a valid array
435 * @pre flat_values must be a valid array
436 * @pre flat_keys.size() must equal flat_values.size()
437 * @pre list_offsets.size() must be >= 1
438 * @pre Last offset must not exceed flat_keys.size()
439 * @post If nullable is true, array supports null values (though none initially set)
440 * @post If nullable is false, array does not support null values
441 * @post Keys sorted flag is determined from actual key ordering
442 */
443 template <
445 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
446 [[nodiscard]] static arrow_proxy create_proxy(
447 array&& flat_keys,
448 array&& flat_values,
449 offset_buffer_type&& list_offsets,
450 bool nullable = true,
451 std::optional<std::string_view> name = std::nullopt,
452 std::optional<METADATA_RANGE> metadata = std::nullopt
453 );
454
477 template <
479 std::ranges::input_range OFFSET_BUFFER_RANGE,
480 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
481 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
482 [[nodiscard]] static arrow_proxy create_proxy(
483 array&& flat_keys,
484 array&& flat_items,
485 OFFSET_BUFFER_RANGE&& list_offsets_range,
486 bool nullable = true,
487 std::optional<std::string_view> name = std::nullopt,
488 std::optional<METADATA_RANGE> metadata = std::nullopt
489 )
490 {
491 offset_buffer_type list_offsets{std::move(list_offsets_range)};
492 return map_array::create_proxy(
493 std::move(flat_keys),
494 std::move(flat_items),
495 std::move(list_offsets),
496 nullable,
497 std::forward<std::optional<std::string_view>>(name),
498 std::forward<std::optional<METADATA_RANGE>>(metadata)
499 );
500 }
501
502 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
503 offset_type* p_list_offsets;
504
505 cloning_ptr<array_wrapper> p_entries_array;
506 bool m_keys_sorted;
507
508 // friend classes
509 friend class array_crtp_base<map_array>;
511 };
512
513 template <std::ranges::range SIZES_RANGE>
515 {
517 std::forward<SIZES_RANGE>(sizes)
518 );
519 }
520
521 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
522 arrow_proxy map_array::create_proxy(
523 array&& flat_keys,
524 array&& flat_items,
525 offset_buffer_type&& list_offsets,
526 VB&& validity_input,
527 std::optional<std::string_view> name,
528 std::optional<METADATA_RANGE> metadata
529 )
530 {
531 const auto size = list_offsets.size() - 1;
532 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
533
534 std::optional<std::unordered_set<ArrowFlag>> flags{{ArrowFlag::NULLABLE}};
535 bool keys_sorted = check_keys_sorted(flat_keys, list_offsets);
536 if (keys_sorted)
537 {
538 flags.value().insert(ArrowFlag::MAP_KEYS_SORTED);
539 }
540
541 std::array<sparrow::array, 2> struct_children = {std::move(flat_keys), std::move(flat_items)};
542 struct_array entries(std::move(struct_children), false, std::string("entries"));
543
544 auto [entries_arr, entries_schema] = extract_arrow_structures(std::move(entries));
545
546 const auto null_count = vbitmap.null_count();
547 const repeat_view<bool> children_ownership{true, 1};
548
549 ArrowSchema schema = make_arrow_schema(
550 std::string("+m"),
551 name, // name
552 metadata, // metadata
553 flags, // flags,
554 new ArrowSchema*[1]{new ArrowSchema(std::move(entries_schema))},
555 children_ownership, // children ownership
556 nullptr, // dictionary
557 true // dictionary ownership
558
559 );
560
561 std::vector<buffer<std::uint8_t>> arr_buffs = {
562 std::move(vbitmap).extract_storage(),
563 std::move(list_offsets).extract_storage()
564 };
565
566 ArrowArray arr = make_arrow_array(
567 static_cast<std::int64_t>(size), // length
568 static_cast<std::int64_t>(null_count),
569 0, // offset
570 std::move(arr_buffs),
571 new ArrowArray*[1]{new ArrowArray(std::move(entries_arr))},
572 children_ownership, // children ownership
573 nullptr, // dictionary
574 true // dictionary ownership
575 );
576 return arrow_proxy{std::move(arr), std::move(schema)};
577 }
578
579 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
580 arrow_proxy map_array::create_proxy(
581 array&& flat_keys,
582 array&& flat_items,
583 offset_buffer_type&& list_offsets,
584 bool nullable,
585 std::optional<std::string_view> name,
586 std::optional<METADATA_RANGE> metadata
587 )
588 {
589 if (nullable)
590 {
591 return map_array::create_proxy(
592 std::move(flat_keys),
593 std::move(flat_items),
594 std::move(list_offsets),
596 name,
597 metadata
598 );
599 }
600 else
601 {
602 bool keys_sorted = check_keys_sorted(flat_keys, list_offsets);
603 auto flags = keys_sorted
604 ? std::optional<std::unordered_set<ArrowFlag>>{{ArrowFlag::MAP_KEYS_SORTED}}
605 : std::nullopt;
606
607 const auto size = list_offsets.size() - 1;
608
609 std::array<sparrow::array, 2> struct_children = {std::move(flat_keys), std::move(flat_items)};
610 struct_array entries(std::move(struct_children), false, std::string("entries"));
611
612 auto [entries_arr, entries_schema] = extract_arrow_structures(std::move(entries));
613 const repeat_view<bool> children_ownership{true, 1};
614
615 ArrowSchema schema = make_arrow_schema(
616 std::string_view("+m"),
617 name, // name
618 metadata, // metadata
619 flags, // flags,
620 new ArrowSchema*[1]{new ArrowSchema(std::move(entries_schema))},
621 children_ownership, // children ownership
622 nullptr, // dictionary
623 true // dictionary ownership
624
625 );
626
627 std::vector<buffer<std::uint8_t>> arr_buffs = {
628 buffer<std::uint8_t>{nullptr, 0}, // no validity bitmap
629 std::move(list_offsets).extract_storage()
630 };
631
632 ArrowArray arr = make_arrow_array(
633 static_cast<std::int64_t>(size), // length
634 0,
635 0, // offset
636 std::move(arr_buffs),
637 new ArrowArray*[1]{new ArrowArray(std::move(entries_arr))},
638 children_ownership, // children ownership
639 nullptr, // dictionary
640 true // dictionary ownership
641 );
642 return arrow_proxy{std::move(arr), std::move(schema)};
643 }
644 }
645}
void struct_array()
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
Base class for array type erasure.
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:41
Smart pointer behaving like a copiable std::unique_ptr.
Definition memory.hpp:126
constexpr size_type null_count() const noexcept
Returns the number of bits set to false (null/invalid).
inner_types::inner_const_reference inner_const_reference
inner_types::const_value_iterator const_value_iterator
Definition map_array.hpp:95
typename base_type::iterator_tag iterator_tag
nullable< inner_value_type > value_type
SPARROW_API map_array & operator=(const self_type &rhs)
Copy assignment operator.
SPARROW_API map_array(const self_type &rhs)
Copy constructor.
inner_types::inner_value_type inner_value_type
map_array(Args &&... args)
Generic constructor for creating map array from various inputs.
map_array self_type
Definition map_array.hpp:91
typename base_type::const_bitmap_range const_bitmap_range
SPARROW_API const array_wrapper * raw_items_array() const
Gets read-only access to the values array.
inner_types::value_iterator value_iterator
Definition map_array.hpp:94
static auto offset_from_sizes(SIZES_RANGE &&sizes) -> offset_buffer_type
Creates offset buffer from map sizes.
map_array(map_array &&) noexcept=default
SPARROW_API const array_wrapper * raw_keys_array() const
Gets read-only access to the keys array.
inner_types::inner_reference inner_reference
const std::int32_t offset_type
Definition map_array.hpp:97
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
Definition map_array.hpp:98
array_bitmap_base< self_type > base_type
Definition map_array.hpp:92
typename base_type::bitmap_type bitmap_type
SPARROW_API map_array(arrow_proxy proxy)
Constructs map array from Arrow proxy.
nullable< inner_const_reference, bitmap_const_reference > const_reference
array_inner_types< self_type > inner_types
Definition map_array.hpp:93
typename base_type::size_type size_type
Definition map_array.hpp:96
typename base_type::bitmap_const_reference bitmap_const_reference
This buffer class is used as storage buffer for all sparrow arrays.
Concept for input containers that can provide metadata pairs.
Definition metadata.hpp:304
Concept defining valid input types for validity bitmap creation.
#define SPARROW_API
Definition config.hpp:38
constexpr sparrow::u8_buffer< OFFSET_TYPE > offset_buffer_from_sizes(SIZES_RANGE &&sizes)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
Definition mp_utils.hpp:216
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
array_bitmap_base_impl< D, false > array_bitmap_base
Convenient alias for arrays with immutable validity bitmaps.
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:98
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
constexpr bool is_map_array_v
Definition map_array.hpp:44
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
std::pair< metadata_key, metadata_value > metadata_pair
Type alias for metadata key-value pairs.
Definition metadata.hpp:61
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
Definition map_array.hpp:37
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
Definition map_array.hpp:38
std::random_access_iterator_tag iterator_tag
Definition map_array.hpp:40
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Metafunction for retrieving the data_type of a typed array.