sparrow 2.0.0
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
decimal_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <ranges>
18
27#include "sparrow/u8_buffer.hpp"
33
34namespace sparrow
35{
41 template <decimal_type T>
42 class decimal_array;
43
52
53 namespace detail
54 {
55 template <>
57 {
63 [[nodiscard]] static constexpr sparrow::data_type get()
64 {
66 }
67 };
68
70 template <>
72 {
78 [[nodiscard]] static constexpr sparrow::data_type get()
79 {
81 }
82 };
83
85 template <>
87 {
93 [[nodiscard]] static constexpr sparrow::data_type get()
94 {
96 }
97 };
98
100 template <>
102 {
108 [[nodiscard]] static constexpr sparrow::data_type get()
109 {
111 }
112 };
113
114 }
115
116 template <decimal_type T>
134
135
141 template <class T>
143
152 template <decimal_type T>
153 class decimal_array final : public mutable_array_bitmap_base<decimal_array<T>>
154 {
155 public:
156
159
161 using inner_value_type = typename inner_types::inner_value_type;
162 using inner_reference = typename inner_types::inner_reference;
163 using inner_const_reference = typename inner_types::inner_const_reference;
164
165 // the integral value type used to store the bits
166 using storage_type = typename T::integer_type;
167 static_assert(
168 sizeof(storage_type) == 4 || sizeof(storage_type) == 8 || sizeof(storage_type) == 16
169 || sizeof(storage_type) == 32,
170 "The storage type must be an integral type of size 4, 8, 16 or 32 bytes"
171 );
172
177
180
184
185 using value_iterator = typename inner_types::value_iterator;
186 using const_value_iterator = typename inner_types::const_value_iterator;
187
193 explicit decimal_array(arrow_proxy proxy);
194
201 template <class... Args>
203 explicit decimal_array(Args&&... args)
204 : decimal_array(create_proxy(std::forward<Args>(args)...))
205 {
206 }
207
214 [[nodiscard]] constexpr inner_reference value(size_type i);
215
222 [[nodiscard]] constexpr inner_const_reference value(size_type i) const;
223
224 private:
225
240 template <
241 std::ranges::input_range VALUE_RANGE,
242 validity_bitmap_input VALIDITY_RANGE,
243 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
244 requires std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, typename T::integer_type>
245 [[nodiscard]] static auto create_proxy(
246 VALUE_RANGE&& range,
247 VALIDITY_RANGE&& bitmaps,
248 std::size_t precision,
249 int scale,
250 std::optional<std::string_view> name = std::nullopt,
251 std::optional<METADATA_RANGE> metadata = std::nullopt
252 ) -> arrow_proxy;
253
266 template <
267 std::ranges::input_range NULLABLE_VALUE_RANGE,
268 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
269 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_VALUE_RANGE>, nullable<typename T::integer_type>>
270 [[nodiscard]] static auto create_proxy(
271 NULLABLE_VALUE_RANGE&& range,
272 std::size_t precision,
273 int scale,
274 std::optional<std::string_view> name = std::nullopt,
275 std::optional<METADATA_RANGE> metadata = std::nullopt
276 ) -> arrow_proxy;
277
291 template <std::ranges::input_range VALUE_RANGE, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
292 requires std::is_same_v<std::ranges::range_value_t<VALUE_RANGE>, typename T::integer_type>
293 [[nodiscard]] static auto create_proxy(
294 VALUE_RANGE&& range,
295 std::size_t precision,
296 int scale,
297 bool nullable = true,
298 std::optional<std::string_view> name = std::nullopt,
299 std::optional<METADATA_RANGE> metadata = std::nullopt
300 ) -> arrow_proxy;
301
315 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
316 [[nodiscard]] static auto create_proxy(
317 u8_buffer<storage_type>&& data_buffer,
318 R&& bitmaps,
319 std::size_t precision,
320 int scale,
321 std::optional<std::string_view> name = std::nullopt,
322 std::optional<METADATA_RANGE> metadata = std::nullopt
323 ) -> arrow_proxy;
324
337 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
338 [[nodiscard]] static auto create_proxy(
339 u8_buffer<storage_type>&& data_buffer,
340 std::size_t precision,
341 int scale,
342 bool nullable = true,
343 std::optional<std::string_view> name = std::nullopt,
344 std::optional<METADATA_RANGE> metadata = std::nullopt
345 ) -> arrow_proxy;
346
359 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
360 [[nodiscard]] static auto create_proxy_impl(
361 u8_buffer<storage_type>&& data_buffer,
362 std::size_t precision,
363 int scale,
364 std::optional<validity_bitmap> bitmap,
365 std::optional<std::string_view> name = std::nullopt,
366 std::optional<METADATA_RANGE> metadata = std::nullopt
367 ) -> arrow_proxy;
368
376 static constexpr std::string generate_format(std::size_t precision, int scale);
377
383 [[nodiscard]] constexpr value_iterator value_begin();
384
390 [[nodiscard]] constexpr value_iterator value_end();
391
397 [[nodiscard]] constexpr const_value_iterator value_cbegin() const;
398
404 [[nodiscard]] constexpr const_value_iterator value_cend() const;
405
412 constexpr void assign(const T& rhs, size_type index);
413
414 // Modifiers
415
422 constexpr void resize_values(size_t new_length, const inner_value_type& value);
423
432 constexpr value_iterator insert_value(const_value_iterator pos, inner_value_type value, size_t count);
433
443 template <std::input_iterator InputIt>
444 requires std::convertible_to<
445 typename std::iterator_traits<InputIt>::value_type,
447 constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last)
448 {
449 SPARROW_ASSERT_TRUE(value_cbegin() <= pos);
450 SPARROW_ASSERT_TRUE(pos <= value_cend());
451 const auto distance = std::distance(value_cbegin(), pos);
452 const auto offset = static_cast<difference_type>(this->get_arrow_proxy().offset());
453 auto data_buffer = get_data_buffer();
454 auto value_range = std::ranges::subrange(first, last);
455 auto storage_view = std::ranges::transform_view(
456 value_range,
457 [](const auto& v)
458 {
459 return v.storage();
460 }
461 );
462 const auto insertion_pos = data_buffer.cbegin() + distance + offset;
463 data_buffer.insert(insertion_pos, storage_view.begin(), storage_view.end());
464 return value_iterator(
466 static_cast<size_type>(distance)
467 );
468 }
469
477 constexpr value_iterator erase_values(const_value_iterator pos, size_t count);
478
484 [[nodiscard]] constexpr auto get_data_buffer()
485 {
486 auto& buffers = this->get_arrow_proxy().get_array_private_data()->buffers();
487 return make_buffer_adaptor<storage_type>(buffers[DATA_BUFFER_INDEX]);
488 }
489
491 static constexpr size_type DATA_BUFFER_INDEX = 1;
492 friend base_type;
497 friend class decimal_reference<self_type>;
498
500 std::size_t m_precision;
502 int m_scale;
503 };
504
505 /**********************************
506 * decimal_array implementation *
507 **********************************/
508
509 template <decimal_type T>
511 : base_type(std::move(proxy))
512 , m_precision(0)
513 , m_scale(0)
514 {
515 // parse the format string
516 const auto format = this->get_arrow_proxy().format();
517
518 // ensure that the format string starts with d:
519 if (format.size() < 2 || format[0] != 'd' || format[1] != ':')
520 {
521 throw std::runtime_error("Invalid format string for decimal array");
522 }
523
524 // substring staring aftet d:
525 const auto format_str = format.substr(2);
526
527 std::stringstream ss;
528 ss << format_str;
529 char c = 0;
530 ss >> m_precision >> c >> m_scale;
531
532 // check for failure
533 if (ss.fail())
534 {
535 throw std::runtime_error("Invalid format string for decimal array");
536 }
537 }
538
539 template <decimal_type T>
540 template <std::ranges::input_range VALUE_RANGE, validity_bitmap_input VALIDITY_RANGE, input_metadata_container METADATA_RANGE>
541 requires std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, typename T::integer_type>
542 arrow_proxy decimal_array<T>::create_proxy(
543 VALUE_RANGE&& range,
544 VALIDITY_RANGE&& bitmaps,
545 std::size_t precision,
546 int scale,
547 std::optional<std::string_view> name,
548 std::optional<METADATA_RANGE> metadata
549 )
550 {
551 u8_buffer<storage_type> u8_data_buffer(std::forward<VALUE_RANGE>(range));
552 const auto size = u8_data_buffer.size();
553 validity_bitmap bitmap = ensure_validity_bitmap(size, std::forward<VALIDITY_RANGE>(bitmaps));
554 return create_proxy_impl(
555 std::move(u8_data_buffer),
556 precision,
557 scale,
558 std::move(bitmap),
559 std::move(name),
560 std::move(metadata)
561 );
562 }
563
564 template <decimal_type T>
565 template <std::ranges::input_range NULLABLE_VALUE_RANGE, input_metadata_container METADATA_RANGE>
566 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_VALUE_RANGE>, nullable<typename T::integer_type>>
567 arrow_proxy decimal_array<T>::create_proxy(
568 NULLABLE_VALUE_RANGE&& range,
569 std::size_t precision,
570 int scale,
571 std::optional<std::string_view> name,
572 std::optional<METADATA_RANGE> metadata
573 )
574 {
575 auto values = range
576 | std::views::transform(
577 [](const auto& v)
578 {
579 return v.get();
580 }
581 );
582 auto is_non_null = range
583 | std::views::transform(
584 [](const auto& v)
585 {
586 return v.has_value();
587 }
588 );
589 return create_proxy(values, is_non_null, precision, scale, std::move(name), std::move(metadata));
590 }
591
592 template <decimal_type T>
593 template <input_metadata_container METADATA_RANGE>
594 auto decimal_array<T>::create_proxy(
595 u8_buffer<storage_type>&& data_buffer,
596 std::size_t precision,
597 int scale,
598 bool nullable,
599 std::optional<std::string_view> name,
600 std::optional<METADATA_RANGE> metadata
601 ) -> arrow_proxy
602 {
603 const size_t size = data_buffer.size();
604 return create_proxy_impl(
605 std::move(data_buffer),
606 precision,
607 scale,
608 nullable ? std::make_optional<validity_bitmap>(nullptr, size, validity_bitmap::default_allocator())
609 : std::nullopt,
610 name,
611 metadata
612 );
613 }
614
615 template <decimal_type T>
616 template <std::ranges::input_range VALUE_RANGE, input_metadata_container METADATA_RANGE>
617 requires std::is_same_v<std::ranges::range_value_t<VALUE_RANGE>, typename T::integer_type>
618 arrow_proxy decimal_array<T>::create_proxy(
619 VALUE_RANGE&& range,
620 std::size_t precision,
621 int scale,
622 bool nullable,
623 std::optional<std::string_view> name,
624 std::optional<METADATA_RANGE> metadata
625 )
626 {
627 u8_buffer<storage_type> u8_data_buffer(std::forward<VALUE_RANGE>(range));
628 const auto size = u8_data_buffer.size();
629 return create_proxy_impl(
630 std::move(u8_data_buffer),
631 precision,
632 scale,
633 nullable ? std::make_optional<validity_bitmap>(nullptr, size, validity_bitmap::default_allocator())
634 : std::nullopt,
635 name,
636 metadata
637 );
638 }
639
640 template <decimal_type T>
641 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
642 auto decimal_array<T>::create_proxy(
643 u8_buffer<storage_type>&& data_buffer,
644 R&& bitmap_input,
645 std::size_t precision,
646 int scale,
647 std::optional<std::string_view> name,
648 std::optional<METADATA_RANGE> metadata
649 ) -> arrow_proxy
650 {
651 const auto size = data_buffer.size();
652 validity_bitmap bitmap = ensure_validity_bitmap(size, std::forward<R>(bitmap_input));
653 return create_proxy_impl(
654 std::move(data_buffer),
655 precision,
656 scale,
657 std::move(bitmap),
658 std::move(name),
659 std::move(metadata)
660 );
661 }
662
663 template <decimal_type T>
664 template <input_metadata_container METADATA_RANGE>
665 [[nodiscard]] auto decimal_array<T>::create_proxy_impl(
666 u8_buffer<storage_type>&& data_buffer,
667 std::size_t precision,
668 int scale,
669 std::optional<validity_bitmap> bitmap,
670 std::optional<std::string_view> name,
671 std::optional<METADATA_RANGE> metadata
672 ) -> arrow_proxy
673 {
674 const std::optional<std::unordered_set<sparrow::ArrowFlag>>
675 flags = bitmap.has_value()
676 ? std::make_optional<std::unordered_set<sparrow::ArrowFlag>>({ArrowFlag::NULLABLE})
677 : std::nullopt;
678 static const repeat_view<bool> children_ownership{true, 0};
679 const auto size = data_buffer.size();
680 const size_t null_count = bitmap.has_value() ? bitmap->null_count() : 0;
681
682 // create arrow schema and array
683 ArrowSchema schema = make_arrow_schema(
684 generate_format(precision, scale),
685 name, // name
686 metadata, // metadata
687 flags, // flags
688 nullptr, // children
690 nullptr, // dictionary
691 true // dictionary ownership
692 );
693
694 std::vector<buffer<uint8_t>> buffers(2);
695 buffers[0] = bitmap.has_value() ? std::move(*bitmap).extract_storage()
697 buffers[1] = std::move(data_buffer).extract_storage();
698
699 // create arrow array
700 ArrowArray arr = make_arrow_array(
701 static_cast<std::int64_t>(size), // lengths
702 static_cast<int64_t>(null_count),
703 0, // offset
704 std::move(buffers),
705 nullptr, // children
706 repeat_view<bool>(true, 0), // children_ownership
707 nullptr, // dictionary
708 true
709 );
710 return arrow_proxy(std::move(arr), std::move(schema));
711 }
712
713 template <decimal_type T>
715 {
716 SPARROW_ASSERT_TRUE(i < this->size());
717 return inner_reference(this, i);
718 }
719
720 template <decimal_type T>
722 {
723 SPARROW_ASSERT_TRUE(i < this->size());
724 const auto ptr = this->get_arrow_proxy().buffers()[DATA_BUFFER_INDEX].template data<const storage_type>();
725 return inner_const_reference(ptr[i], m_scale);
726 }
727
728 template <decimal_type T>
729 constexpr auto decimal_array<T>::value_begin() -> value_iterator
730 {
731 return value_iterator(detail::layout_value_functor<self_type, inner_reference>(this), 0);
732 }
733
734 template <decimal_type T>
735 constexpr auto decimal_array<T>::value_end() -> value_iterator
736 {
737 return value_iterator(detail::layout_value_functor<self_type, inner_reference>(this), this->size());
738 }
739
740 template <decimal_type T>
741 constexpr auto decimal_array<T>::value_cbegin() const -> const_value_iterator
742 {
743 return const_value_iterator(detail::layout_value_functor<const self_type, inner_value_type>(this), 0);
744 }
745
746 template <decimal_type T>
747 constexpr auto decimal_array<T>::value_cend() const -> const_value_iterator
748 {
749 return const_value_iterator(
751 this->size()
752 );
753 }
754
755 template <decimal_type T>
756 constexpr void decimal_array<T>::assign(const T& rhs, size_type index)
757 {
758 SPARROW_ASSERT_TRUE(index < this->size());
759 const auto ptr = this->get_arrow_proxy().buffers()[DATA_BUFFER_INDEX].template data<storage_type>();
760 const auto storage = rhs.storage();
761 // Scale the storage value to match the scale of the decimal type
762 const auto scaled_storage = storage
763 * static_cast<storage_type>(
764 static_cast<size_t>(std::pow(10, m_scale - rhs.scale()))
765 );
766 ptr[index] = scaled_storage;
767 }
768
769 template <decimal_type T>
770 constexpr std::string decimal_array<T>::generate_format(std::size_t precision, int scale)
771 {
772 constexpr std::size_t sizeof_decimal = sizeof(storage_type);
773 std::string format_str = "d:" + std::to_string(precision) + "," + std::to_string(scale);
774 if constexpr (sizeof_decimal != 16) // We don't need to specify the size for 128-bit
775 // decimals
776 {
777 format_str += "," + std::to_string(sizeof_decimal * 8);
778 }
779 return format_str;
780 }
781
782 template <decimal_type T>
783 constexpr void decimal_array<T>::resize_values(size_t new_length, const inner_value_type& value)
784 {
785 const size_t offset = static_cast<size_t>(this->get_arrow_proxy().offset());
786 const size_t new_size = new_length + offset;
787 auto data_buffer = get_data_buffer();
788 data_buffer.resize(new_size, value.storage());
789 }
790
791 template <decimal_type T>
792 constexpr auto
793 decimal_array<T>::insert_value(const_value_iterator pos, inner_value_type value, size_t count)
794 -> value_iterator
795 {
796 SPARROW_ASSERT_TRUE(value_cbegin() <= pos);
797 SPARROW_ASSERT_TRUE(pos <= value_cend());
798 const auto distance = std::distance(value_cbegin(), pos);
799 const auto offset = static_cast<difference_type>(this->get_arrow_proxy().offset());
800 auto data_buffer = get_data_buffer();
801 const auto insertion_pos = data_buffer.cbegin() + distance + offset;
802 data_buffer.insert(insertion_pos, count, value.storage());
803 return value_iterator(
805 static_cast<size_type>(distance)
806 );
807 }
808
809 template <decimal_type T>
810 constexpr auto decimal_array<T>::erase_values(const_value_iterator pos, size_t count) -> value_iterator
811 {
812 SPARROW_ASSERT_TRUE(value_cbegin() <= pos);
813 SPARROW_ASSERT_TRUE(pos < value_cend());
814 const auto distance = std::distance(value_cbegin(), pos);
815 const auto offset = static_cast<difference_type>(this->get_arrow_proxy().offset());
816 auto data_buffer = get_data_buffer();
817 const auto erase_begin = data_buffer.cbegin() + distance + offset;
818 const auto erase_end = erase_begin + static_cast<difference_type>(count);
819 data_buffer.erase(erase_begin, erase_end);
820 return value_iterator(
822 static_cast<size_type>(distance)
823 );
824 }
825}
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
typename base_type::const_bitmap_iterator const_bitmap_iterator
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
typename base_type::difference_type difference_type
constexpr size_type size() const noexcept(!SPARROW_CONTRACTS_THROW_ON_FAILURE)
Returns the number of elements that can be held in currently allocated storage.
Object that owns a piece of contiguous memory.
Definition buffer.hpp:114
xsimd::aligned_allocator< T > default_allocator
Definition buffer.hpp:126
Array implementation for decimal types.
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename base_type::difference_type difference_type
typename base_type::bitmap_const_reference bitmap_const_reference
array_inner_types< self_type > inner_types
decimal_array(arrow_proxy proxy)
Constructs a decimal array from an arrow proxy.
typename inner_types::const_value_iterator const_value_iterator
typename base_type::const_bitmap_iterator const_bitmap_iterator
typename base_type::bitmap_type bitmap_type
typename base_type::const_bitmap_range const_bitmap_range
typename inner_types::inner_reference inner_reference
constexpr inner_const_reference value(size_type i) const
Gets a constant reference to the value at the specified index.
typename inner_types::inner_const_reference inner_const_reference
mutable_array_bitmap_base< self_type > base_type
typename inner_types::inner_value_type inner_value_type
typename base_type::iterator_tag iterator_tag
decimal_array(Args &&... args)
Constructs a decimal array with the given arguments.
constexpr inner_reference value(size_type i)
Gets a mutable reference to the value at the specified index.
typename inner_types::value_iterator value_iterator
storage_type extract_storage() noexcept
Extracts the underlying storage (move operation).
constexpr size_type null_count() const noexcept
Returns the number of bits set to false (null/invalid).
typename storage_type::default_allocator default_allocator
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
Concept for input containers that can provide metadata pairs.
Definition metadata.hpp:332
Concept defining valid input types for validity bitmap creation.
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
Definition mp_utils.hpp:216
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
constexpr bool is_type_instance_of_v
Variable template for convenient access to is_type_instance_of.
Definition mp_utils.hpp:102
constexpr bool is_decimal_array_v
Type trait to check if a type is a decimal array.
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient alias for arrays with mutable validity bitmaps.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
decimal_array< decimal< int128_t > > decimal_128_array
Type alias for 128-bit decimal array.
decimal_array< decimal< int32_t > > decimal_32_array
Type alias for 32-bit decimal array.
decimal_array< decimal< int64_t > > decimal_64_array
Type alias for 64-bit decimal array.
decimal_array< decimal< int256_t > > decimal_256_array
Type alias for 256-bit decimal array.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
auto make_buffer_adaptor(FromBufferRef &buf)
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
Extensions to the C++ standard library.
functor_index_iterator< detail::layout_value_functor< array_type, inner_reference > > value_iterator
decimal_reference< array_type > inner_reference
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
bitmap_type::const_reference bitmap_const_reference
nullable< inner_const_reference, bitmap_const_reference > const_reference
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
static constexpr sparrow::data_type get()
Gets the data type for 128-bit decimal.
static constexpr sparrow::data_type get()
Gets the data type for 256-bit decimal.
static constexpr sparrow::data_type get()
Gets the data type for 32-bit decimal.
static constexpr sparrow::data_type get()
Gets the data type for 64-bit decimal.
Metafunction for retrieving the data_type of a typed array.