sparrow 0.9.0
Loading...
Searching...
No Matches
timestamp_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
33
34// tts : timestamp<std::chrono::seconds>
35// tsm : timestamp<std::chrono::milliseconds>
36// tsu : timestamp<std::chrono::microseconds>
37// tsn : timestamp<std::chrono::nanoseconds>
38
39namespace sparrow
40{
41 template <timestamp_type T>
42 class timestamp_array;
43
44 template <timestamp_type T>
60
61 template <typename T>
62 struct is_timestamp_array : std::false_type
63 {
64 };
65
66 template <typename T>
67 struct is_timestamp_array<timestamp_array<T>> : std::true_type
68 {
69 };
70
71 template <typename T>
73
78
83
106 template <timestamp_type T>
107 class timestamp_array final : public mutable_array_bitmap_base<timestamp_array<T>>
108 {
109 public:
110
113
115 using inner_value_type = typename inner_types::inner_value_type;
116 using inner_reference = typename inner_types::inner_reference;
117 using inner_const_reference = typename inner_types::inner_const_reference;
118
120 using bitmap_reference = typename base_type::bitmap_reference;
124 using bitmap_range = typename base_type::bitmap_range;
126
130
134
135 using value_iterator = typename base_type::value_iterator;
136 using const_value_iterator = typename base_type::const_value_iterator;
137
138 using iterator = typename base_type::iterator;
139 using const_iterator = typename base_type::const_iterator;
140
141 using functor_type = typename inner_types::functor_type;
142 using const_functor_type = typename inner_types::const_functor_type;
143
144 using inner_value_type_duration = inner_value_type::duration;
145 using buffer_inner_value_type = inner_value_type_duration::rep;
148
150
178 template <class... Args>
180 explicit timestamp_array(Args&&... args)
181 : base_type(create_proxy(std::forward<Args>(args)...))
182 , m_timezone(get_timezone(this->get_arrow_proxy()))
183 , m_data_access(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
184 {
185 }
186
187 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
189 const date::time_zone* timezone,
190 std::initializer_list<inner_value_type> init,
191 std::optional<std::string_view> name = std::nullopt,
192 std::optional<METADATA_RANGE> metadata = std::nullopt
193 )
194 : base_type(create_proxy(timezone, init, std::move(name), std::move(metadata)))
195 , m_timezone(timezone)
196 , m_data_access(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
197 {
198 }
199
202
205
206 private:
207
208 [[nodiscard]] inner_reference value(size_type i);
209 [[nodiscard]] inner_const_reference value(size_type i) const;
210
211 [[nodiscard]] value_iterator value_begin();
212 [[nodiscard]] value_iterator value_end();
213
214 [[nodiscard]] const_value_iterator value_cbegin() const;
215 [[nodiscard]] const_value_iterator value_cend() const;
216
217 template <input_metadata_container METADATA_RANGE>
218 [[nodiscard]] static arrow_proxy create_proxy(
219 const date::time_zone* timezone,
220 size_type n,
221 std::optional<std::string_view> name = std::nullopt,
222 std::optional<METADATA_RANGE> metadata = std::nullopt
223 );
224
225 template <
227 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
228 [[nodiscard]] static auto create_proxy(
229 const date::time_zone* timezone,
231 R&& bitmaps = validity_bitmap{},
232 std::optional<std::string_view> name = std::nullopt,
233 std::optional<METADATA_RANGE> metadata = std::nullopt
234 ) -> arrow_proxy;
235
236 // range of values (no missing values)
237 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
238 requires std::convertible_to<std::ranges::range_value_t<R>, T>
239 [[nodiscard]] static auto create_proxy(
240 const date::time_zone* timezone,
241 R&& range,
242 bool nullable = true,
243 std::optional<std::string_view> name = std::nullopt,
244 std::optional<METADATA_RANGE> metadata = std::nullopt
245 ) -> arrow_proxy;
246
247 template <typename U, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
248 requires std::convertible_to<U, T>
249 [[nodiscard]] static arrow_proxy create_proxy(
250 const date::time_zone* timezone,
251 size_type n,
252 const U& value = U{},
253 std::optional<std::string_view> name = std::nullopt,
254 std::optional<METADATA_RANGE> metadata = std::nullopt
255 );
256
257 // range of values, validity_bitmap_input
258 template <
259 std::ranges::input_range VALUE_RANGE,
260 validity_bitmap_input VALIDITY_RANGE,
261 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
262 requires(std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, T>)
263 [[nodiscard]] static arrow_proxy create_proxy(
264 const date::time_zone* timezone,
265 VALUE_RANGE&&,
266 VALIDITY_RANGE&&,
267 std::optional<std::string_view> name = std::nullopt,
268 std::optional<METADATA_RANGE> metadata = std::nullopt
269 );
270
271 // range of nullable values
272 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
273 requires std::is_same_v<std::ranges::range_value_t<R>, nullable<T>>
274 [[nodiscard]] static arrow_proxy create_proxy(
275 const date::time_zone* timezone,
276 R&&,
277 std::optional<std::string_view> name = std::nullopt,
278 std::optional<METADATA_RANGE> metadata = std::nullopt
279 );
280
281 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
282 [[nodiscard]] static arrow_proxy create_proxy_impl(
283 const date::time_zone* timezone,
284 u8_buffer<buffer_inner_value_type>&& data_buffer,
285 std::optional<validity_bitmap>&& bitmap_input,
286 std::optional<std::string_view> name = std::nullopt,
287 std::optional<METADATA_RANGE> metadata = std::nullopt
288 );
289
290 // Modifiers
291
292 void resize_values(size_type new_length, inner_value_type value);
293
294 value_iterator insert_value(const_value_iterator pos, inner_value_type value, size_type count);
295
296 template <mpl::iterator_of_type<typename timestamp_array<T>::inner_value_type> InputIt>
297 auto insert_values(const_value_iterator pos, InputIt first, InputIt last) -> value_iterator
298 {
299 const auto input_range = std::ranges::subrange(first, last);
300 const auto values = input_range
301 | std::views::transform(
302 [](const auto& v)
303 {
304 return v.get_sys_time().time_since_epoch();
305 }
306 );
307 const size_t idx = static_cast<size_t>(std::distance(value_cbegin(), pos));
308 m_data_access.insert_values(idx, values.begin(), values.end());
309 return sparrow::next(value_begin(), idx);
310 }
311
312 value_iterator erase_values(const_value_iterator pos, size_type count);
313
314 void assign(const T& rhs, size_type index);
315 void assign(T&& rhs, size_type index);
316
317
318 const date::time_zone* m_timezone;
319 details::primitive_data_access<inner_value_type_duration> m_data_access;
320
321 static constexpr size_type DATA_BUFFER_INDEX = 1;
322 friend class timestamp_reference<self_type>;
323 friend base_type;
326 friend functor_type;
327 friend const_functor_type;
328 };
329
330 template <timestamp_type T>
332 : base_type(rhs)
333 , m_timezone(rhs.m_timezone)
334 , m_data_access(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
335 {
336 }
337
338 template <timestamp_type T>
340 {
342 m_timezone = rhs.m_timezone;
343 m_data_access.reset_proxy(this->get_arrow_proxy());
344 return *this;
345 }
346
347 template <timestamp_type T>
349 : base_type(std::move(rhs))
350 , m_timezone(rhs.m_timezone)
351 , m_data_access(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
352 {
353 }
354
355 template <timestamp_type T>
357 {
358 base_type::operator=(std::move(rhs));
359 m_timezone = rhs.m_timezone;
360 m_data_access.reset_proxy(this->get_arrow_proxy());
361 return *this;
362 }
363
364 template <timestamp_type T>
366 : base_type(std::move(proxy))
367 , m_timezone(get_timezone(this->get_arrow_proxy()))
368 , m_data_access(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
369 {
370 }
371
372 template <timestamp_type T>
373 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
374 auto timestamp_array<T>::create_proxy(
375 const date::time_zone* timezone,
377 R&& bitmap_input,
378 std::optional<std::string_view> name,
379 std::optional<METADATA_RANGE> metadata
380 ) -> arrow_proxy
381 {
382 const auto size = data_buffer.size();
383 validity_bitmap bitmap = ensure_validity_bitmap(size, std::forward<R>(bitmap_input));
384 return create_proxy_impl(
385 timezone,
386 std::forward<u8_buffer<buffer_inner_value_type>>(data_buffer),
387 std::move(bitmap),
388 std::move(name),
389 std::move(metadata)
390 );
391 }
392
393 template <timestamp_type T>
394 template <std::ranges::input_range VALUE_RANGE, validity_bitmap_input VALIDITY_RANGE, input_metadata_container METADATA_RANGE>
395 requires(std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, T>)
396 arrow_proxy timestamp_array<T>::create_proxy(
397 const date::time_zone* timezone,
398 VALUE_RANGE&& values,
399 VALIDITY_RANGE&& validity_input,
400 std::optional<std::string_view> name,
401 std::optional<METADATA_RANGE> metadata
402 )
403 {
404 const auto range = values
405 | std::views::transform(
406 [](const auto& v)
407 {
408 return v.get_sys_time().time_since_epoch().count();
409 }
410 );
411
412
413 u8_buffer<buffer_inner_value_type> data_buffer(range);
414 return create_proxy(
415 timezone,
416 std::move(data_buffer),
417 std::forward<VALIDITY_RANGE>(validity_input),
418 std::move(name),
419 std::move(metadata)
420 );
421 }
422
423 template <timestamp_type T>
424 template <typename U, input_metadata_container METADATA_RANGE>
425 requires std::convertible_to<U, T>
426 arrow_proxy timestamp_array<T>::create_proxy(
427 const date::time_zone* timezone,
428 size_type n,
429 const U& value,
430 std::optional<std::string_view> name,
431 std::optional<METADATA_RANGE> metadata
432 )
433 {
434 // create data_buffer
435 u8_buffer<buffer_inner_value_type> data_buffer(n, to_days_since_the_UNIX_epoch(value));
436 return create_proxy(timezone, std::move(data_buffer), std::move(name), std::move(metadata));
437 }
438
439 template <timestamp_type T>
440 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
441 requires std::convertible_to<std::ranges::range_value_t<R>, T>
442 arrow_proxy timestamp_array<T>::create_proxy(
443 const date::time_zone* timezone,
444 R&& range,
445 bool nullable,
446 std::optional<std::string_view> name,
447 std::optional<METADATA_RANGE> metadata
448 )
449 {
450 std::optional<validity_bitmap> bitmap = nullable ? std::make_optional<validity_bitmap>(nullptr, 0)
451 : std::nullopt;
452 const auto values = range
453 | std::views::transform(
454 [](const auto& v)
455 {
456 return v.get_sys_time().time_since_epoch().count();
457 }
458 );
459 u8_buffer<buffer_inner_value_type> data_buffer(values);
460 return self_type::create_proxy_impl(
461 timezone,
462 std::move(data_buffer),
463 std::move(bitmap),
464 std::move(name),
465 std::move(metadata)
466 );
467 }
468
469 // range of nullable values
470 template <timestamp_type T>
471 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
472 requires std::is_same_v<std::ranges::range_value_t<R>, nullable<T>>
473 arrow_proxy timestamp_array<T>::create_proxy(
474 const date::time_zone* timezone,
475 R&& range,
476 std::optional<std::string_view> name,
477 std::optional<METADATA_RANGE> metadata
478 )
479 { // split into values and is_non_null ranges
480 auto values = range
481 | std::views::transform(
482 [](const auto& v)
483 {
484 return v.get();
485 }
486 );
487 auto is_non_null = range
488 | std::views::transform(
489 [](const auto& v)
490 {
491 return v.has_value();
492 }
493 );
494 return self_type::create_proxy(timezone, values, is_non_null, std::move(name), std::move(metadata));
495 }
496
497 template <timestamp_type T>
498 template <input_metadata_container METADATA_RANGE>
499 arrow_proxy timestamp_array<T>::create_proxy_impl(
500 const date::time_zone* timezone,
502 std::optional<validity_bitmap>&& bitmap,
503 std::optional<std::string_view> name,
504 std::optional<METADATA_RANGE> metadata
505 )
506 {
507 const auto size = data_buffer.size();
508 const auto null_count = bitmap.has_value() ? bitmap->null_count() : 0;
509
511 format += timezone->name();
512
514
515 const std::optional<std::unordered_set<sparrow::ArrowFlag>>
516 flags = bitmap.has_value()
517 ? std::make_optional<std::unordered_set<sparrow::ArrowFlag>>({ArrowFlag::NULLABLE})
518 : std::nullopt;
519
520 // create arrow schema and array
521 ArrowSchema schema = make_arrow_schema(
522 std::move(format), // format
523 std::move(name), // name
524 std::move(metadata), // metadata
525 flags, // flags
526 nullptr, // children
527 children_ownership, // children ownership
528 nullptr, // dictionary,
529 true // dictionary ownership
530 );
531
532 std::vector<buffer<uint8_t>> buffers{
533 bitmap.has_value() ? std::move(bitmap.value()).extract_storage() : buffer<uint8_t>{nullptr, 0},
534 std::move(data_buffer).extract_storage()
535 };
536
537 // create arrow array
538 ArrowArray arr = make_arrow_array(
539 static_cast<std::int64_t>(size), // length
540 static_cast<int64_t>(null_count),
541 0, // offset
542 std::move(buffers),
543 nullptr, // children
544 children_ownership, // children ownership
545 nullptr, // dictionary
546 true // dicitonary ownership
547 );
548 return arrow_proxy(std::move(arr), std::move(schema));
549 }
550
551 template <timestamp_type T>
552 void timestamp_array<T>::assign(const T& rhs, size_type index)
553 {
554 SPARROW_ASSERT_TRUE(index < this->size());
555 m_data_access.value(index) = rhs.get_sys_time().time_since_epoch();
556 }
557
558 template <timestamp_type T>
559 void timestamp_array<T>::assign(T&& rhs, size_type index)
560 {
561 SPARROW_ASSERT_TRUE(index < this->size());
562 m_data_access.value(index) = rhs.get_sys_time().time_since_epoch();
563 }
564
565 template <timestamp_type T>
566 auto timestamp_array<T>::value(size_type i) -> inner_reference
567 {
568 SPARROW_ASSERT_TRUE(i < this->size());
569 return inner_reference(this, i);
570 }
571
572 template <timestamp_type T>
573 auto timestamp_array<T>::value(size_type i) const -> inner_const_reference
574 {
575 SPARROW_ASSERT_TRUE(i < this->size());
576 const auto& val = m_data_access.value(i);
577 using time_duration = typename T::duration;
578 const auto sys_time = std::chrono::sys_time<time_duration>{val};
579 return T{m_timezone, sys_time};
580 }
581
582 template <timestamp_type T>
583 auto timestamp_array<T>::value_begin() -> value_iterator
584 {
585 return value_iterator(functor_type(this), 0);
586 }
587
588 template <timestamp_type T>
589 auto timestamp_array<T>::value_end() -> value_iterator
590 {
591 return value_iterator(functor_type(this), this->size());
592 }
593
594 template <timestamp_type T>
595 auto timestamp_array<T>::value_cbegin() const -> const_value_iterator
596 {
597 return const_value_iterator(const_functor_type(this), 0);
598 }
599
600 template <timestamp_type T>
601 auto timestamp_array<T>::value_cend() const -> const_value_iterator
602 {
603 return const_value_iterator(const_functor_type(this), this->size());
604 }
605
606 template <timestamp_type T>
607 void timestamp_array<T>::resize_values(size_type new_length, inner_value_type value)
608 {
609 m_data_access.resize_values(new_length, value.get_sys_time().time_since_epoch());
610 }
611
612 template <timestamp_type T>
613 auto timestamp_array<T>::insert_value(const_value_iterator pos, inner_value_type value, size_type count)
614 -> value_iterator
615 {
616 SPARROW_ASSERT_TRUE(pos <= value_cend());
617 const size_t idx = static_cast<size_t>(std::distance(value_cbegin(), pos));
618 m_data_access.insert_value(idx, value.get_sys_time().time_since_epoch(), count);
619 return value_iterator(functor_type(this), idx);
620 }
621
622 template <timestamp_type T>
623 auto timestamp_array<T>::erase_values(const_value_iterator pos, size_type count) -> value_iterator
624 {
625 SPARROW_ASSERT_TRUE(pos < value_cend());
626 const size_t idx = static_cast<size_t>(std::distance(value_cbegin(), pos));
627 m_data_access.erase_values(idx, count);
628 return value_iterator(functor_type(this), idx);
629 }
630}
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::bitmap_iterator bitmap_iterator
array_bitmap_base_impl & operator=(const array_bitmap_base_impl &)
typename base_type::iterator_tag iterator_tag
typename base_type::const_bitmap_iterator const_bitmap_iterator
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
typename base_type::difference_type difference_type
Proxy class over ArrowArray and ArrowSchema.
Object that owns a piece of contiguous memory.
Definition buffer.hpp:112
storage_type extract_storage() noexcept
constexpr size_type null_count() const noexcept
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
Definition nullable.hpp:281
A view that repeats a value a given number of times.
timestamp_array(Args &&... args)
Construct a timestamp array with the passed range of values and an optional bitmap.
mutable_array_bitmap_base< self_type > base_type
timestamp_array(const timestamp_array &rhs)
typename inner_types::inner_reference inner_reference
timestamp_array & operator=(const timestamp_array &rhs)
timestamp_array(timestamp_array &&rhs)
timestamp_array & operator=(timestamp_array &&rhs)
typename base_type::const_bitmap_iterator const_bitmap_iterator
typename base_type::const_value_iterator const_value_iterator
timestamp_array(const date::time_zone *timezone, std::initializer_list< inner_value_type > init, std::optional< std::string_view > name=std::nullopt, std::optional< METADATA_RANGE > metadata=std::nullopt)
pointer_iterator< const buffer_inner_value_type * > buffer_inner_const_value_iterator
typename base_type::bitmap_const_reference bitmap_const_reference
pointer_iterator< buffer_inner_value_type * > buffer_inner_value_iterator
typename base_type::const_bitmap_range const_bitmap_range
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename inner_types::inner_const_reference inner_const_reference
nullable< inner_reference, bitmap_reference > reference
typename base_type::bitmap_reference bitmap_reference
typename inner_types::const_functor_type const_functor_type
typename inner_types::inner_value_type inner_value_type
Implementation of reference to inner type used for layout L.
This buffer class is use as storage buffer for all sparrow arrays.
Definition u8_buffer.hpp:75
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Definition mp_utils.hpp:107
constexpr bool excludes_copy_and_move_ctor_v
Definition mp_utils.hpp:507
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient typedef to be used as a crtp base class for arrays using a mutable validity buffer.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
timestamp< std::chrono::microseconds > timestamp_microsecond
constexpr std::string_view data_type_to_format(data_type type)
timestamp< std::chrono::nanoseconds > timestamp_nanosecond
constexpr InputIt next(InputIt it, Distance n)
Definition iterator.hpp:503
SPARROW_API const date::time_zone * get_timezone(const arrow_proxy &proxy)
timestamp_array< timestamp_second > timestamp_seconds_array
date::zoned_time< Duration, TimeZonePtr > timestamp
constexpr bool is_timestamp_array_v
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
timestamp_array< timestamp_nanosecond > timestamp_nanoseconds_array
timestamp< std::chrono::seconds > timestamp_second
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
timestamp< std::chrono::milliseconds > timestamp_millisecond
timestamp_array< timestamp_microsecond > timestamp_microseconds_array
timestamp_array< timestamp_millisecond > timestamp_milliseconds_array
functor_index_iterator< functor_type > value_iterator
functor_index_iterator< const_functor_type > const_value_iterator
detail::layout_value_functor< self_type, inner_reference > functor_type
detail::layout_value_functor< const self_type, inner_const_reference > const_functor_type
Base class for array_inner_types specialization.
Traits class that must be specialized by array classes inheriting from array_crtp_base.
Provides compile-time information about Arrow data types.