sparrow 0.9.0
Loading...
Searching...
No Matches
decimal_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <ranges>
18
33
34namespace sparrow
35{
41 template <decimal_type T>
42 class decimal_array;
43
52
53 namespace detail
54 {
55 template <>
57 {
63 [[nodiscard]] static constexpr sparrow::data_type get()
64 {
66 }
67 };
68
70 template <>
72 {
78 [[nodiscard]] static constexpr sparrow::data_type get()
79 {
81 }
82 };
83
85 template <>
87 {
93 [[nodiscard]] static constexpr sparrow::data_type get()
94 {
96 }
97 };
98
100 template <>
102 {
108 [[nodiscard]] static constexpr sparrow::data_type get()
109 {
111 }
112 };
113
114 }
115
116 template <decimal_type T>
134
135
141 template <class T>
143
152 template <decimal_type T>
153 class decimal_array final : public mutable_array_bitmap_base<decimal_array<T>>
154 {
155 public:
156
159
161 using inner_value_type = typename inner_types::inner_value_type;
162 using inner_reference = typename inner_types::inner_reference;
163 using inner_const_reference = typename inner_types::inner_const_reference;
164
165 // the integral value type used to store the bits
166 using storage_type = typename T::integer_type;
167 static_assert(
168 sizeof(storage_type) == 4 || sizeof(storage_type) == 8 || sizeof(storage_type) == 16
169 || sizeof(storage_type) == 32,
170 "The storage type must be an integral type of size 4, 8, 16 or 32 bytes"
171 );
172
177
180
184
185 using value_iterator = typename inner_types::value_iterator;
186 using const_value_iterator = typename inner_types::const_value_iterator;
187
193 explicit decimal_array(arrow_proxy proxy);
194
201 template <class... Args>
203 explicit decimal_array(Args&&... args)
204 : decimal_array(create_proxy(std::forward<Args>(args)...))
205 {
206 }
207
214 [[nodiscard]] constexpr inner_reference value(size_type i);
215
222 [[nodiscard]] constexpr inner_const_reference value(size_type i) const;
223
224 private:
225
240 template <
241 std::ranges::input_range VALUE_RANGE,
242 validity_bitmap_input VALIDITY_RANGE,
243 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
244 requires std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, typename T::integer_type>
245 [[nodiscard]] static auto create_proxy(
246 VALUE_RANGE&& range,
247 VALIDITY_RANGE&& bitmaps,
248 std::size_t precision,
249 int scale,
250 std::optional<std::string_view> name = std::nullopt,
251 std::optional<METADATA_RANGE> metadata = std::nullopt
252 ) -> arrow_proxy;
253
266 template <
267 std::ranges::input_range NULLABLE_VALUE_RANGE,
268 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
269 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_VALUE_RANGE>, nullable<typename T::integer_type>>
270 [[nodiscard]] static auto create_proxy(
271 NULLABLE_VALUE_RANGE&& range,
272 std::size_t precision,
273 int scale,
274 std::optional<std::string_view> name = std::nullopt,
275 std::optional<METADATA_RANGE> metadata = std::nullopt
276 ) -> arrow_proxy;
277
291 template <std::ranges::input_range VALUE_RANGE, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
292 requires std::is_same_v<std::ranges::range_value_t<VALUE_RANGE>, typename T::integer_type>
293 [[nodiscard]] static auto create_proxy(
294 VALUE_RANGE&& range,
295 std::size_t precision,
296 int scale,
297 bool nullable = true,
298 std::optional<std::string_view> name = std::nullopt,
299 std::optional<METADATA_RANGE> metadata = std::nullopt
300 ) -> arrow_proxy;
301
315 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
316 [[nodiscard]] static auto create_proxy(
317 u8_buffer<storage_type>&& data_buffer,
318 R&& bitmaps,
319 std::size_t precision,
320 int scale,
321 std::optional<std::string_view> name = std::nullopt,
322 std::optional<METADATA_RANGE> metadata = std::nullopt
323 ) -> arrow_proxy;
324
337 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
338 [[nodiscard]] static auto create_proxy(
339 u8_buffer<storage_type>&& data_buffer,
340 std::size_t precision,
341 int scale,
342 bool nullable = true,
343 std::optional<std::string_view> name = std::nullopt,
344 std::optional<METADATA_RANGE> metadata = std::nullopt
345 ) -> arrow_proxy;
346
359 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
360 [[nodiscard]] static auto create_proxy_impl(
361 u8_buffer<storage_type>&& data_buffer,
362 std::size_t precision,
363 int scale,
364 std::optional<validity_bitmap> bitmap,
365 std::optional<std::string_view> name = std::nullopt,
366 std::optional<METADATA_RANGE> metadata = std::nullopt
367 ) -> arrow_proxy;
368
376 static constexpr std::string generate_format(std::size_t precision, int scale);
377
383 [[nodiscard]] constexpr value_iterator value_begin();
384
390 [[nodiscard]] constexpr value_iterator value_end();
391
397 [[nodiscard]] constexpr const_value_iterator value_cbegin() const;
398
404 [[nodiscard]] constexpr const_value_iterator value_cend() const;
405
412 constexpr void assign(const T& rhs, size_type index);
413
414 // Modifiers
415
417 static constexpr size_type DATA_BUFFER_INDEX = 1;
418 friend base_type;
423 friend class decimal_reference<self_type>;
424
426 std::size_t m_precision;
428 int m_scale;
429 };
430
431 /**********************************
432 * decimal_array implementation *
433 **********************************/
434
435 template <decimal_type T>
437 : base_type(std::move(proxy))
438 , m_precision(0)
439 , m_scale(0)
440 {
441 // parse the format string
442 const auto format = this->get_arrow_proxy().format();
443
444 // ensure that the format string starts with d:
445 if (format.size() < 2 || format[0] != 'd' || format[1] != ':')
446 {
447 throw std::runtime_error("Invalid format string for decimal array");
448 }
449
450 // substring staring aftet d:
451 const auto format_str = format.substr(2);
452
453 std::stringstream ss;
454 ss << format_str;
455 char c = 0;
456 ss >> m_precision >> c >> m_scale;
457
458 // check for failure
459 if (ss.fail())
460 {
461 throw std::runtime_error("Invalid format string for decimal array");
462 }
463 }
464
465 template <decimal_type T>
466 template <std::ranges::input_range VALUE_RANGE, validity_bitmap_input VALIDITY_RANGE, input_metadata_container METADATA_RANGE>
467 requires std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, typename T::integer_type>
468 arrow_proxy decimal_array<T>::create_proxy(
469 VALUE_RANGE&& range,
470 VALIDITY_RANGE&& bitmaps,
471 std::size_t precision,
472 int scale,
473 std::optional<std::string_view> name,
474 std::optional<METADATA_RANGE> metadata
475 )
476 {
477 u8_buffer<storage_type> u8_data_buffer(std::forward<VALUE_RANGE>(range));
478 const auto size = u8_data_buffer.size();
479 validity_bitmap bitmap = ensure_validity_bitmap(size, std::forward<VALIDITY_RANGE>(bitmaps));
480 return create_proxy_impl(
481 std::move(u8_data_buffer),
482 precision,
483 scale,
484 std::move(bitmap),
485 std::move(name),
486 std::move(metadata)
487 );
488 }
489
490 template <decimal_type T>
491 template <std::ranges::input_range NULLABLE_VALUE_RANGE, input_metadata_container METADATA_RANGE>
492 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_VALUE_RANGE>, nullable<typename T::integer_type>>
493 arrow_proxy decimal_array<T>::create_proxy(
494 NULLABLE_VALUE_RANGE&& range,
495 std::size_t precision,
496 int scale,
497 std::optional<std::string_view> name,
498 std::optional<METADATA_RANGE> metadata
499 )
500 {
501 auto values = range
502 | std::views::transform(
503 [](const auto& v)
504 {
505 return v.get();
506 }
507 );
508 auto is_non_null = range
509 | std::views::transform(
510 [](const auto& v)
511 {
512 return v.has_value();
513 }
514 );
515 return create_proxy(values, is_non_null, precision, scale, std::move(name), std::move(metadata));
516 }
517
518 template <decimal_type T>
519 template <input_metadata_container METADATA_RANGE>
520 auto decimal_array<T>::create_proxy(
521 u8_buffer<storage_type>&& data_buffer,
522 std::size_t precision,
523 int scale,
524 bool nullable,
525 std::optional<std::string_view> name,
526 std::optional<METADATA_RANGE> metadata
527 ) -> arrow_proxy
528 {
529 const size_t size = data_buffer.size();
530 return create_proxy_impl(
531 std::move(data_buffer),
532 precision,
533 scale,
534 nullable ? std::make_optional<validity_bitmap>(nullptr, size) : std::nullopt,
535 name,
536 metadata
537 );
538 }
539
540 template <decimal_type T>
541 template <std::ranges::input_range VALUE_RANGE, input_metadata_container METADATA_RANGE>
542 requires std::is_same_v<std::ranges::range_value_t<VALUE_RANGE>, typename T::integer_type>
543 arrow_proxy decimal_array<T>::create_proxy(
544 VALUE_RANGE&& range,
545 std::size_t precision,
546 int scale,
547 bool nullable,
548 std::optional<std::string_view> name,
549 std::optional<METADATA_RANGE> metadata
550 )
551 {
552 u8_buffer<storage_type> u8_data_buffer(std::forward<VALUE_RANGE>(range));
553 const auto size = u8_data_buffer.size();
554 return create_proxy_impl(
555 std::move(u8_data_buffer),
556 precision,
557 scale,
558 nullable ? std::make_optional<validity_bitmap>(nullptr, size) : std::nullopt,
559 name,
560 metadata
561 );
562 }
563
564 template <decimal_type T>
565 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
566 auto decimal_array<T>::create_proxy(
567 u8_buffer<storage_type>&& data_buffer,
568 R&& bitmap_input,
569 std::size_t precision,
570 int scale,
571 std::optional<std::string_view> name,
572 std::optional<METADATA_RANGE> metadata
573 ) -> arrow_proxy
574 {
575 const auto size = data_buffer.size();
576 validity_bitmap bitmap = ensure_validity_bitmap(size, std::forward<R>(bitmap_input));
577 return create_proxy_impl(
578 std::move(data_buffer),
579 precision,
580 scale,
581 std::move(bitmap),
582 std::move(name),
583 std::move(metadata)
584 );
585 }
586
587 template <decimal_type T>
588 template <input_metadata_container METADATA_RANGE>
589 [[nodiscard]] auto decimal_array<T>::create_proxy_impl(
590 u8_buffer<storage_type>&& data_buffer,
591 std::size_t precision,
592 int scale,
593 std::optional<validity_bitmap> bitmap,
594 std::optional<std::string_view> name,
595 std::optional<METADATA_RANGE> metadata
596 ) -> arrow_proxy
597 {
598 const std::optional<std::unordered_set<sparrow::ArrowFlag>>
599 flags = bitmap.has_value()
600 ? std::make_optional<std::unordered_set<sparrow::ArrowFlag>>({ArrowFlag::NULLABLE})
601 : std::nullopt;
602 static const repeat_view<bool> children_ownership{true, 0};
603 const auto size = data_buffer.size();
604 const size_t null_count = bitmap.has_value() ? bitmap->null_count() : 0;
605
606 // create arrow schema and array
607 ArrowSchema schema = make_arrow_schema(
608 generate_format(precision, scale),
609 name, // name
610 metadata, // metadata
611 flags, // flags
612 nullptr, // children
614 nullptr, // dictionary
615 true // dictionary ownership
616 );
617
618 std::vector<buffer<uint8_t>> buffers(2);
619 buffers[0] = bitmap.has_value() ? std::move(*bitmap).extract_storage() : buffer<uint8_t>{nullptr, 0};
620 buffers[1] = std::move(data_buffer).extract_storage();
621
622 // create arrow array
623 ArrowArray arr = make_arrow_array(
624 static_cast<std::int64_t>(size), // lengths
625 static_cast<int64_t>(null_count),
626 0, // offset
627 std::move(buffers),
628 nullptr, // children
629 repeat_view<bool>(true, 0), // children_ownership
630 nullptr, // dictionary
631 true
632 );
633 return arrow_proxy(std::move(arr), std::move(schema));
634 }
635
636 template <decimal_type T>
638 {
639 SPARROW_ASSERT_TRUE(i < this->size());
640 return inner_reference(this, i);
641 }
642
643 template <decimal_type T>
645 {
646 SPARROW_ASSERT_TRUE(i < this->size());
647 const auto ptr = this->get_arrow_proxy().buffers()[DATA_BUFFER_INDEX].template data<const storage_type>();
648 return inner_const_reference(ptr[i], m_scale);
649 }
650
651 template <decimal_type T>
652 constexpr auto decimal_array<T>::value_begin() -> value_iterator
653 {
654 return value_iterator(detail::layout_value_functor<self_type, inner_reference>(this), 0);
655 }
656
657 template <decimal_type T>
658 constexpr auto decimal_array<T>::value_end() -> value_iterator
659 {
660 return value_iterator(detail::layout_value_functor<self_type, inner_reference>(this), this->size());
661 }
662
663 template <decimal_type T>
664 constexpr auto decimal_array<T>::value_cbegin() const -> const_value_iterator
665 {
666 return const_value_iterator(detail::layout_value_functor<const self_type, inner_value_type>(this), 0);
667 }
668
669 template <decimal_type T>
670 constexpr auto decimal_array<T>::value_cend() const -> const_value_iterator
671 {
672 return const_value_iterator(
674 this->size()
675 );
676 }
677
678 template <decimal_type T>
679 constexpr void decimal_array<T>::assign(const T& rhs, size_type index)
680 {
681 SPARROW_ASSERT_TRUE(index < this->size());
682 const auto ptr = this->get_arrow_proxy().buffers()[DATA_BUFFER_INDEX].template data<storage_type>();
683 const auto storage = rhs.storage();
684 // Scale the storage value to match the scale of the decimal type
685 const auto scaled_storage = storage
686 * static_cast<storage_type>(
687 static_cast<size_t>(std::pow(10, m_scale - rhs.scale()))
688 );
689 ptr[index] = scaled_storage;
690 }
691
692 template <decimal_type T>
693 constexpr std::string decimal_array<T>::generate_format(std::size_t precision, int scale)
694 {
695 constexpr std::size_t sizeof_decimal = sizeof(storage_type);
696 std::string format_str = "d:" + std::to_string(precision) + "," + std::to_string(scale);
697 if constexpr (sizeof_decimal != 16) // We don't need to specify the size for 128-bit
698 // decimals
699 {
700 format_str += "," + std::to_string(sizeof_decimal * 8);
701 }
702 return format_str;
703 }
704}
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
typename base_type::const_bitmap_iterator const_bitmap_iterator
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
typename base_type::difference_type difference_type
constexpr size_type size() const noexcept(!SPARROW_CONTRACTS_THROW_ON_FAILURE)
Returns the number of elements that can be held in currently allocated storage.
Object that owns a piece of contiguous memory.
Definition buffer.hpp:112
Array implementation for decimal types.
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename base_type::difference_type difference_type
typename base_type::bitmap_const_reference bitmap_const_reference
array_inner_types< self_type > inner_types
decimal_array(arrow_proxy proxy)
Constructs a decimal array from an arrow proxy.
typename inner_types::const_value_iterator const_value_iterator
typename base_type::const_bitmap_iterator const_bitmap_iterator
typename base_type::bitmap_type bitmap_type
typename base_type::const_bitmap_range const_bitmap_range
typename inner_types::inner_reference inner_reference
constexpr inner_const_reference value(size_type i) const
Gets a constant reference to the value at the specified index.
typename inner_types::inner_const_reference inner_const_reference
mutable_array_bitmap_base< self_type > base_type
typename inner_types::inner_value_type inner_value_type
typename base_type::iterator_tag iterator_tag
decimal_array(Args &&... args)
Constructs a decimal array with the given arguments.
constexpr inner_reference value(size_type i)
Gets a mutable reference to the value at the specified index.
typename inner_types::value_iterator value_iterator
storage_type extract_storage() noexcept
Extracts the underlying storage (move operation).
constexpr size_type null_count() const noexcept
Returns the number of bits set to false (null/invalid).
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
Concept for input containers that can provide metadata pairs.
Definition metadata.hpp:291
Concept defining valid input types for validity bitmap creation.
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
Definition mp_utils.hpp:216
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
constexpr bool is_type_instance_of_v
Variable template for convenient access to is_type_instance_of.
Definition mp_utils.hpp:102
constexpr bool is_decimal_array_v
Type trait to check if a type is a decimal array.
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient alias for arrays with mutable validity bitmaps.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
decimal_array< decimal< int128_t > > decimal_128_array
Type alias for 128-bit decimal array.
decimal_array< decimal< int32_t > > decimal_32_array
Type alias for 32-bit decimal array.
decimal_array< decimal< int64_t > > decimal_64_array
Type alias for 64-bit decimal array.
decimal_array< decimal< int256_t > > decimal_256_array
Type alias for 256-bit decimal array.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
functor_index_iterator< detail::layout_value_functor< array_type, inner_reference > > value_iterator
decimal_reference< array_type > inner_reference
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
bitmap_type::const_reference bitmap_const_reference
nullable< inner_const_reference, bitmap_const_reference > const_reference
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
static constexpr sparrow::data_type get()
Gets the data type for 128-bit decimal.
static constexpr sparrow::data_type get()
Gets the data type for 256-bit decimal.
static constexpr sparrow::data_type get()
Gets the data type for 32-bit decimal.
static constexpr sparrow::data_type get()
Gets the data type for 64-bit decimal.
Metafunction for retrieving the data_type of a typed array.