sparrow 0.9.0
Loading...
Searching...
No Matches
timestamp_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
27#include "sparrow/u8_buffer.hpp"
33
34// tts : timestamp<std::chrono::seconds>
35// tsm : timestamp<std::chrono::milliseconds>
36// tsu : timestamp<std::chrono::microseconds>
37// tsn : timestamp<std::chrono::nanoseconds>
38
39namespace sparrow
40{
41 template <timestamp_type T>
42 class timestamp_array;
43
44 template <timestamp_type T>
60
66 template <typename T>
67 struct is_timestamp_array : std::false_type
68 {
69 };
70
76 template <typename T>
77 struct is_timestamp_array<timestamp_array<T>> : std::true_type
78 {
79 };
80
86 template <typename T>
88
96
104
105 namespace detail
106 {
107 template <>
109 {
110 [[nodiscard]] static constexpr sparrow::data_type get()
111 {
113 }
114 };
115
116 template <>
118 {
119 [[nodiscard]] static constexpr sparrow::data_type get()
120 {
122 }
123 };
124
125 template <>
127 {
128 [[nodiscard]] static constexpr sparrow::data_type get()
129 {
131 }
132 };
133
134 template <>
136 {
137 [[nodiscard]] static constexpr sparrow::data_type get()
138 {
140 }
141 };
142 }
143
196 template <timestamp_type T>
197 class timestamp_array final : public mutable_array_bitmap_base<timestamp_array<T>>
198 {
199 public:
200
203
205 using inner_value_type = typename inner_types::inner_value_type;
206 using inner_reference = typename inner_types::inner_reference;
207 using inner_const_reference = typename inner_types::inner_const_reference;
208
210 using bitmap_reference = typename base_type::bitmap_reference;
214 using bitmap_range = typename base_type::bitmap_range;
216
220
224
225 using value_iterator = typename base_type::value_iterator;
226 using const_value_iterator = typename base_type::const_value_iterator;
227
228 using iterator = typename base_type::iterator;
229 using const_iterator = typename base_type::const_iterator;
230
231 using functor_type = typename inner_types::functor_type;
232 using const_functor_type = typename inner_types::const_functor_type;
233
234 using inner_value_type_duration = inner_value_type::duration;
235 using buffer_inner_value_type = inner_value_type_duration::rep;
238
252
292 template <class... Args>
294 constexpr explicit timestamp_array(Args&&... args)
295 : base_type(create_proxy(std::forward<Args>(args)...))
296 , m_timezone(get_timezone(this->get_arrow_proxy()))
297 , m_data_access(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
298 {
299 }
300
315 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
317 const date::time_zone* timezone,
318 std::initializer_list<inner_value_type> init,
319 std::optional<std::string_view> name = std::nullopt,
320 std::optional<METADATA_RANGE> metadata = std::nullopt
321 )
322 : base_type(create_proxy(timezone, init, std::move(name), std::move(metadata)))
323 , m_timezone(timezone)
324 , m_data_access(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
325 {
326 }
327
338 constexpr timestamp_array(const timestamp_array& rhs);
339
352
363
375
376 private:
377
389 [[nodiscard]] constexpr inner_reference value(size_type i);
390
403 [[nodiscard]] constexpr inner_const_reference value(size_type i) const;
404
412 [[nodiscard]] constexpr value_iterator value_begin();
413
421 [[nodiscard]] constexpr value_iterator value_end();
422
430 [[nodiscard]] constexpr const_value_iterator value_cbegin() const;
431
439 [[nodiscard]] constexpr const_value_iterator value_cend() const;
440
441 template <input_metadata_container METADATA_RANGE>
442 [[nodiscard]] static arrow_proxy create_proxy(
443 const date::time_zone* timezone,
444 size_type n,
445 std::optional<std::string_view> name = std::nullopt,
446 std::optional<METADATA_RANGE> metadata = std::nullopt
447 );
448
449 template <
451 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
452 [[nodiscard]] static auto create_proxy(
453 const date::time_zone* timezone,
455 R&& bitmaps = validity_bitmap{},
456 std::optional<std::string_view> name = std::nullopt,
457 std::optional<METADATA_RANGE> metadata = std::nullopt
458 ) -> arrow_proxy;
459
460 // range of values (no missing values)
461 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
462 requires std::convertible_to<std::ranges::range_value_t<R>, T>
463 [[nodiscard]] static auto create_proxy(
464 const date::time_zone* timezone,
465 R&& range,
466 bool nullable = true,
467 std::optional<std::string_view> name = std::nullopt,
468 std::optional<METADATA_RANGE> metadata = std::nullopt
469 ) -> arrow_proxy;
470
471 template <typename U, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
472 requires std::convertible_to<U, T>
473 [[nodiscard]] static arrow_proxy create_proxy(
474 const date::time_zone* timezone,
475 size_type n,
476 const U& value = U{},
477 std::optional<std::string_view> name = std::nullopt,
478 std::optional<METADATA_RANGE> metadata = std::nullopt
479 );
480
481 // range of values, validity_bitmap_input
482 template <
483 std::ranges::input_range VALUE_RANGE,
484 validity_bitmap_input VALIDITY_RANGE,
485 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
486 requires(std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, T>)
487 [[nodiscard]] static arrow_proxy create_proxy(
488 const date::time_zone* timezone,
489 VALUE_RANGE&&,
490 VALIDITY_RANGE&&,
491 std::optional<std::string_view> name = std::nullopt,
492 std::optional<METADATA_RANGE> metadata = std::nullopt
493 );
494
495 // range of nullable values
496 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
497 requires std::is_same_v<std::ranges::range_value_t<R>, nullable<T>>
498 [[nodiscard]] static arrow_proxy create_proxy(
499 const date::time_zone* timezone,
500 R&&,
501 std::optional<std::string_view> name = std::nullopt,
502 std::optional<METADATA_RANGE> metadata = std::nullopt
503 );
504
505 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
506 [[nodiscard]] static arrow_proxy create_proxy_impl(
507 const date::time_zone* timezone,
508 u8_buffer<buffer_inner_value_type>&& data_buffer,
509 std::optional<validity_bitmap>&& bitmap_input,
510 std::optional<std::string_view> name = std::nullopt,
511 std::optional<METADATA_RANGE> metadata = std::nullopt
512 );
513
514 // Modifiers
515
527 constexpr void resize_values(size_type new_length, inner_value_type value);
528
543 constexpr value_iterator
544 insert_value(const_value_iterator pos, inner_value_type value, size_type count);
545
563 template <mpl::iterator_of_type<typename timestamp_array<T>::inner_value_type> InputIt>
564 constexpr auto insert_values(const_value_iterator pos, InputIt first, InputIt last) -> value_iterator
565 {
566 const auto input_range = std::ranges::subrange(first, last);
567 const auto values = input_range
568 | std::views::transform(
569 [](const auto& v)
570 {
571 return v.get_sys_time().time_since_epoch();
572 }
573 );
574 const size_t idx = static_cast<size_t>(std::distance(value_cbegin(), pos));
575 m_data_access.insert_values(idx, values.begin(), values.end());
576 return sparrow::next(value_begin(), idx);
577 }
578
594 constexpr value_iterator erase_values(const_value_iterator pos, size_type count);
595
609 constexpr void assign(const T& rhs, size_type index);
610
624 constexpr void assign(T&& rhs, size_type index);
625
626
627 const date::time_zone* m_timezone;
628 details::primitive_data_access<inner_value_type_duration> m_data_access;
629
630 static constexpr size_type DATA_BUFFER_INDEX = 1;
631 friend class timestamp_reference<self_type>;
632 friend base_type;
635 friend functor_type;
636 friend const_functor_type;
637 };
638
639 template <timestamp_type T>
641 : base_type(rhs)
642 , m_timezone(rhs.m_timezone)
643 , m_data_access(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
644 {
645 }
646
647 template <timestamp_type T>
649 {
651 m_timezone = rhs.m_timezone;
652 m_data_access.reset_proxy(this->get_arrow_proxy());
653 return *this;
654 }
655
656 template <timestamp_type T>
658 : base_type(std::move(rhs))
659 , m_timezone(rhs.m_timezone)
660 , m_data_access(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
661 {
662 }
663
664 template <timestamp_type T>
666 {
667 base_type::operator=(std::move(rhs));
668 m_timezone = rhs.m_timezone;
669 m_data_access.reset_proxy(this->get_arrow_proxy());
670 return *this;
671 }
672
673 template <timestamp_type T>
675 : base_type(std::move(proxy))
676 , m_timezone(get_timezone(this->get_arrow_proxy()))
677 , m_data_access(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
678 {
679 }
680
681 template <timestamp_type T>
682 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
683 auto timestamp_array<T>::create_proxy(
684 const date::time_zone* timezone,
686 R&& bitmap_input,
687 std::optional<std::string_view> name,
688 std::optional<METADATA_RANGE> metadata
689 ) -> arrow_proxy
690 {
691 const auto size = data_buffer.size();
692 validity_bitmap bitmap = ensure_validity_bitmap(size, std::forward<R>(bitmap_input));
693 return create_proxy_impl(
694 timezone,
695 std::forward<u8_buffer<buffer_inner_value_type>>(data_buffer),
696 std::move(bitmap),
697 std::move(name),
698 std::move(metadata)
699 );
700 }
701
702 template <timestamp_type T>
703 template <std::ranges::input_range VALUE_RANGE, validity_bitmap_input VALIDITY_RANGE, input_metadata_container METADATA_RANGE>
704 requires(std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, T>)
705 arrow_proxy timestamp_array<T>::create_proxy(
706 const date::time_zone* timezone,
707 VALUE_RANGE&& values,
708 VALIDITY_RANGE&& validity_input,
709 std::optional<std::string_view> name,
710 std::optional<METADATA_RANGE> metadata
711 )
712 {
713 const auto range = values
714 | std::views::transform(
715 [](const auto& v)
716 {
717 return v.get_sys_time().time_since_epoch().count();
718 }
719 );
720
721
722 u8_buffer<buffer_inner_value_type> data_buffer(range);
723 return create_proxy(
724 timezone,
725 std::move(data_buffer),
726 std::forward<VALIDITY_RANGE>(validity_input),
727 std::move(name),
728 std::move(metadata)
729 );
730 }
731
732 template <timestamp_type T>
733 template <typename U, input_metadata_container METADATA_RANGE>
734 requires std::convertible_to<U, T>
735 arrow_proxy timestamp_array<T>::create_proxy(
736 const date::time_zone* timezone,
737 size_type n,
738 const U& value,
739 std::optional<std::string_view> name,
740 std::optional<METADATA_RANGE> metadata
741 )
742 {
743 // create data_buffer
744 u8_buffer<buffer_inner_value_type> data_buffer(n, to_days_since_the_UNIX_epoch(value));
745 return create_proxy(timezone, std::move(data_buffer), std::move(name), std::move(metadata));
746 }
747
748 template <timestamp_type T>
749 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
750 requires std::convertible_to<std::ranges::range_value_t<R>, T>
751 arrow_proxy timestamp_array<T>::create_proxy(
752 const date::time_zone* timezone,
753 R&& range,
754 bool nullable,
755 std::optional<std::string_view> name,
756 std::optional<METADATA_RANGE> metadata
757 )
758 {
759 std::optional<validity_bitmap> bitmap = nullable ? std::make_optional<validity_bitmap>(nullptr, 0)
760 : std::nullopt;
761 const auto values = range
762 | std::views::transform(
763 [](const auto& v)
764 {
765 return v.get_sys_time().time_since_epoch().count();
766 }
767 );
768 u8_buffer<buffer_inner_value_type> data_buffer(values);
769 return self_type::create_proxy_impl(
770 timezone,
771 std::move(data_buffer),
772 std::move(bitmap),
773 std::move(name),
774 std::move(metadata)
775 );
776 }
777
778 // range of nullable values
779 template <timestamp_type T>
780 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
781 requires std::is_same_v<std::ranges::range_value_t<R>, nullable<T>>
782 arrow_proxy timestamp_array<T>::create_proxy(
783 const date::time_zone* timezone,
784 R&& range,
785 std::optional<std::string_view> name,
786 std::optional<METADATA_RANGE> metadata
787 )
788 { // split into values and is_non_null ranges
789 auto values = range
790 | std::views::transform(
791 [](const auto& v)
792 {
793 return v.get();
794 }
795 );
796 auto is_non_null = range
797 | std::views::transform(
798 [](const auto& v)
799 {
800 return v.has_value();
801 }
802 );
803 return self_type::create_proxy(timezone, values, is_non_null, std::move(name), std::move(metadata));
804 }
805
806 template <timestamp_type T>
807 template <input_metadata_container METADATA_RANGE>
808 arrow_proxy timestamp_array<T>::create_proxy_impl(
809 const date::time_zone* timezone,
811 std::optional<validity_bitmap>&& bitmap,
812 std::optional<std::string_view> name,
813 std::optional<METADATA_RANGE> metadata
814 )
815 {
816 const auto size = data_buffer.size();
817 const auto null_count = bitmap.has_value() ? bitmap->null_count() : 0;
818
820 format += timezone->name();
821
823
824 const std::optional<std::unordered_set<sparrow::ArrowFlag>>
825 flags = bitmap.has_value()
826 ? std::make_optional<std::unordered_set<sparrow::ArrowFlag>>({ArrowFlag::NULLABLE})
827 : std::nullopt;
828
829 // create arrow schema and array
830 ArrowSchema schema = make_arrow_schema(
831 std::move(format), // format
832 std::move(name), // name
833 std::move(metadata), // metadata
834 flags, // flags
835 nullptr, // children
836 children_ownership, // children ownership
837 nullptr, // dictionary,
838 true // dictionary ownership
839 );
840
841 std::vector<buffer<uint8_t>> buffers{
842 bitmap.has_value() ? std::move(bitmap.value()).extract_storage() : buffer<uint8_t>{nullptr, 0},
843 std::move(data_buffer).extract_storage()
844 };
845
846 // create arrow array
847 ArrowArray arr = make_arrow_array(
848 static_cast<std::int64_t>(size), // length
849 static_cast<int64_t>(null_count),
850 0, // offset
851 std::move(buffers),
852 nullptr, // children
853 children_ownership, // children ownership
854 nullptr, // dictionary
855 true // dicitonary ownership
856 );
857 return arrow_proxy(std::move(arr), std::move(schema));
858 }
859
860 template <timestamp_type T>
861 constexpr void timestamp_array<T>::assign(const T& rhs, size_type index)
862 {
863 SPARROW_ASSERT_TRUE(index < this->size());
864 m_data_access.value(index) = rhs.get_sys_time().time_since_epoch();
865 }
866
867 template <timestamp_type T>
868 constexpr void timestamp_array<T>::assign(T&& rhs, size_type index)
869 {
870 SPARROW_ASSERT_TRUE(index < this->size());
871 m_data_access.value(index) = rhs.get_sys_time().time_since_epoch();
872 }
873
874 template <timestamp_type T>
875 constexpr auto timestamp_array<T>::value(size_type i) -> inner_reference
876 {
877 SPARROW_ASSERT_TRUE(i < this->size());
878 return inner_reference(this, i);
879 }
880
881 template <timestamp_type T>
882 constexpr auto timestamp_array<T>::value(size_type i) const -> inner_const_reference
883 {
884 SPARROW_ASSERT_TRUE(i < this->size());
885 const auto& val = m_data_access.value(i);
886 using time_duration = typename T::duration;
887 const auto sys_time = std::chrono::sys_time<time_duration>{val};
888 return T{m_timezone, sys_time};
889 }
890
891 template <timestamp_type T>
892 constexpr auto timestamp_array<T>::value_begin() -> value_iterator
893 {
894 return value_iterator(functor_type(this), 0);
895 }
896
897 template <timestamp_type T>
898 constexpr auto timestamp_array<T>::value_end() -> value_iterator
899 {
900 return value_iterator(functor_type(this), this->size());
901 }
902
903 template <timestamp_type T>
904 constexpr auto timestamp_array<T>::value_cbegin() const -> const_value_iterator
905 {
906 return const_value_iterator(const_functor_type(this), 0);
907 }
908
909 template <timestamp_type T>
910 constexpr auto timestamp_array<T>::value_cend() const -> const_value_iterator
911 {
912 return const_value_iterator(const_functor_type(this), this->size());
913 }
914
915 template <timestamp_type T>
916 constexpr void timestamp_array<T>::resize_values(size_type new_length, inner_value_type value)
917 {
918 m_data_access.resize_values(new_length, value.get_sys_time().time_since_epoch());
919 }
920
921 template <timestamp_type T>
922 constexpr auto
923 timestamp_array<T>::insert_value(const_value_iterator pos, inner_value_type value, size_type count)
924 -> value_iterator
925 {
926 SPARROW_ASSERT_TRUE(pos <= value_cend());
927 const size_t idx = static_cast<size_t>(std::distance(value_cbegin(), pos));
928 m_data_access.insert_value(idx, value.get_sys_time().time_since_epoch(), count);
929 return value_iterator(functor_type(this), idx);
930 }
931
932 template <timestamp_type T>
933 constexpr auto timestamp_array<T>::erase_values(const_value_iterator pos, size_type count) -> value_iterator
934 {
935 SPARROW_ASSERT_TRUE(pos < value_cend());
936 const size_t idx = static_cast<size_t>(std::distance(value_cbegin(), pos));
937 m_data_access.erase_values(idx, count);
938 return value_iterator(functor_type(this), idx);
939 }
940}
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::bitmap_iterator bitmap_iterator
typename base_type::iterator_tag iterator_tag
typename base_type::const_bitmap_iterator const_bitmap_iterator
constexpr array_bitmap_base_impl & operator=(const array_bitmap_base_impl &)
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
typename base_type::difference_type difference_type
Object that owns a piece of contiguous memory.
Definition buffer.hpp:112
storage_type extract_storage() noexcept
Extracts the underlying storage (move operation).
constexpr size_type null_count() const noexcept
Returns the number of bits set to false (null/invalid).
A view that repeats a value a given number of times.
Array implementation for storing timestamp values with timezone information.
mutable_array_bitmap_base< self_type > base_type
typename inner_types::inner_reference inner_reference
constexpr timestamp_array(timestamp_array &&rhs)
Move constructor.
constexpr timestamp_array(const timestamp_array &rhs)
Copy constructor.
timestamp_array(arrow_proxy)
Constructs timestamp array from Arrow proxy.
typename base_type::const_bitmap_iterator const_bitmap_iterator
typename base_type::const_value_iterator const_value_iterator
pointer_iterator< const buffer_inner_value_type * > buffer_inner_const_value_iterator
typename base_type::bitmap_const_reference bitmap_const_reference
constexpr timestamp_array & operator=(const timestamp_array &rhs)
Copy assignment operator.
pointer_iterator< buffer_inner_value_type * > buffer_inner_value_iterator
typename base_type::const_bitmap_range const_bitmap_range
constexpr timestamp_array(Args &&... args)
Generic constructor for creating timestamp arrays from various inputs.
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename inner_types::inner_const_reference inner_const_reference
nullable< inner_reference, bitmap_reference > reference
typename base_type::bitmap_reference bitmap_reference
constexpr timestamp_array(const date::time_zone *timezone, std::initializer_list< inner_value_type > init, std::optional< std::string_view > name=std::nullopt, std::optional< METADATA_RANGE > metadata=std::nullopt)
Constructs timestamp array from initializer list.
constexpr timestamp_array & operator=(timestamp_array &&rhs)
Move assignment operator.
typename inner_types::const_functor_type const_functor_type
typename inner_types::inner_value_type inner_value_type
Implementation of reference to inner type used for layout L.
This buffer class is used as storage buffer for all sparrow arrays.
Concept for input containers that can provide metadata pairs.
Definition metadata.hpp:304
Concept defining valid input types for validity bitmap creation.
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
Definition mp_utils.hpp:216
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient alias for arrays with mutable validity bitmaps.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
timestamp< std::chrono::microseconds > timestamp_microsecond
constexpr std::string_view data_type_to_format(data_type type)
timestamp< std::chrono::nanoseconds > timestamp_nanosecond
constexpr InputIt next(InputIt it, Distance n)
Definition iterator.hpp:503
SPARROW_API const date::time_zone * get_timezone(const arrow_proxy &proxy)
timestamp_array< timestamp_second > timestamp_seconds_array
Type aliases for timestamp arrays with common durations.
date::zoned_time< Duration, TimeZonePtr > timestamp
constexpr bool is_timestamp_array_v
Variable template for convenient access to is_timestamp_array.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
timestamp_array< timestamp_nanosecond > timestamp_nanoseconds_array
timestamp< std::chrono::seconds > timestamp_second
Type aliases for common timestamp durations.
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
timestamp< std::chrono::milliseconds > timestamp_millisecond
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
timestamp_array< timestamp_microsecond > timestamp_microseconds_array
timestamp_array< timestamp_millisecond > timestamp_milliseconds_array
functor_index_iterator< functor_type > value_iterator
functor_index_iterator< const_functor_type > const_value_iterator
detail::layout_value_functor< self_type, inner_reference > functor_type
detail::layout_value_functor< const self_type, inner_const_reference > const_functor_type
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Metafunction for retrieving the data_type of a typed array.
Type trait to check if a type is a timestamp_array.