sparrow 0.6.0
Loading...
Searching...
No Matches
timestamp_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
31
32// tts : timestamp<std::chrono::seconds>
33// tsm : timestamp<std::chrono::milliseconds>
34// tsu : timestamp<std::chrono::microseconds>
35// tsn : timestamp<std::chrono::nanoseconds>
36
37namespace sparrow
38{
39 template <timestamp_type T>
40 class timestamp_array;
41
42 template <timestamp_type T>
58
59 template <typename T>
60 struct is_timestamp_array : std::false_type
61 {
62 };
63
64 template <typename T>
65 struct is_timestamp_array<timestamp_array<T>> : std::true_type
66 {
67 };
68
69 template <typename T>
71
76
81
104 template <timestamp_type T>
105 class timestamp_array final : public mutable_array_bitmap_base<timestamp_array<T>>
106 {
107 public:
108
111
113 using inner_value_type = typename inner_types::inner_value_type;
114 using inner_reference = typename inner_types::inner_reference;
115 using inner_const_reference = typename inner_types::inner_const_reference;
116
118 using bitmap_reference = typename base_type::bitmap_reference;
122 using bitmap_range = typename base_type::bitmap_range;
124
128
132
133 using value_iterator = typename base_type::value_iterator;
134 using const_value_iterator = typename base_type::const_value_iterator;
135
136 using iterator = typename base_type::iterator;
137 using const_iterator = typename base_type::const_iterator;
138
139 using functor_type = typename inner_types::functor_type;
140 using const_functor_type = typename inner_types::const_functor_type;
141
142 using inner_value_type_duration = inner_value_type::duration;
143 using buffer_inner_value_type = inner_value_type_duration::rep;
146
148
176 template <class... Args>
178 explicit timestamp_array(Args&&... args)
179 : base_type(create_proxy(std::forward<Args>(args)...))
180 , m_timezone(get_timezone(this->get_arrow_proxy()))
181 , m_data_access(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
182 {
183 }
184
185 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
187 const date::time_zone* timezone,
188 std::initializer_list<inner_value_type> init,
189 std::optional<std::string_view> name = std::nullopt,
190 std::optional<METADATA_RANGE> metadata = std::nullopt
191 )
192 : base_type(create_proxy(timezone, init, std::move(name), std::move(metadata)))
193 , m_timezone(timezone)
194 , m_data_access(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
195 {
196 }
197
200
203
204 private:
205
206 [[nodiscard]] inner_reference value(size_type i);
207 [[nodiscard]] inner_const_reference value(size_type i) const;
208
209 [[nodiscard]] value_iterator value_begin();
210 [[nodiscard]] value_iterator value_end();
211
212 [[nodiscard]] const_value_iterator value_cbegin() const;
213 [[nodiscard]] const_value_iterator value_cend() const;
214
215 template <input_metadata_container METADATA_RANGE>
216 [[nodiscard]] static arrow_proxy create_proxy(
217 const date::time_zone* timezone,
218 size_type n,
219 std::optional<std::string_view> name = std::nullopt,
220 std::optional<METADATA_RANGE> metadata = std::nullopt
221 );
222
223 template <
225 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
226 [[nodiscard]] static auto create_proxy(
227 const date::time_zone* timezone,
229 R&& bitmaps = validity_bitmap{},
230 std::optional<std::string_view> name = std::nullopt,
231 std::optional<METADATA_RANGE> metadata = std::nullopt
232 ) -> arrow_proxy;
233
234 // range of values (no missing values)
235 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
236 requires std::convertible_to<std::ranges::range_value_t<R>, T>
237 [[nodiscard]] static auto create_proxy(
238 const date::time_zone* timezone,
239 R&& range,
240 std::optional<std::string_view> name = std::nullopt,
241 std::optional<METADATA_RANGE> metadata = std::nullopt
242 ) -> arrow_proxy;
243
244 template <typename U, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
245 requires std::convertible_to<U, T>
246 [[nodiscard]] static arrow_proxy create_proxy(
247 const date::time_zone* timezone,
248 size_type n,
249 const U& value = U{},
250 std::optional<std::string_view> name = std::nullopt,
251 std::optional<METADATA_RANGE> metadata = std::nullopt
252 );
253
254 // range of values, validity_bitmap_input
255 template <
256 std::ranges::input_range VALUE_RANGE,
257 validity_bitmap_input VALIDITY_RANGE,
258 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
259 requires(std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, T>)
260 [[nodiscard]] static arrow_proxy create_proxy(
261 const date::time_zone* timezone,
262 VALUE_RANGE&&,
263 VALIDITY_RANGE&&,
264 std::optional<std::string_view> name = std::nullopt,
265 std::optional<METADATA_RANGE> metadata = std::nullopt
266 );
267
268 // range of nullable values
269 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
270 requires std::is_same_v<std::ranges::range_value_t<R>, nullable<T>>
271 [[nodiscard]] static arrow_proxy create_proxy(
272 const date::time_zone* timezone,
273 R&&,
274 std::optional<std::string_view> name = std::nullopt,
275 std::optional<METADATA_RANGE> metadata = std::nullopt
276 );
277
278 // Modifiers
279
280 void resize_values(size_type new_length, inner_value_type value);
281
282 value_iterator insert_value(const_value_iterator pos, inner_value_type value, size_type count);
283
284 template <mpl::iterator_of_type<typename timestamp_array<T>::inner_value_type> InputIt>
285 auto insert_values(const_value_iterator pos, InputIt first, InputIt last) -> value_iterator
286 {
287 const auto input_range = std::ranges::subrange(first, last);
288 const auto values = input_range
289 | std::views::transform(
290 [](const auto& v)
291 {
292 return v.get_sys_time().time_since_epoch();
293 }
294 );
295 const size_t idx = static_cast<size_t>(std::distance(value_cbegin(), pos));
296 m_data_access.insert_values(idx, values.begin(), values.end());
297 return sparrow::next(value_begin(), idx);
298 }
299
300 value_iterator erase_values(const_value_iterator pos, size_type count);
301
302 void assign(const T& rhs, size_type index);
303 void assign(T&& rhs, size_type index);
304
305 [[nodiscard]] static const date::time_zone* get_timezone(const arrow_proxy& proxy);
306
307 const date::time_zone* m_timezone;
308 details::primitive_data_access<inner_value_type_duration> m_data_access;
309
310 static constexpr size_type DATA_BUFFER_INDEX = 1;
311 friend class timestamp_reference<self_type>;
312 friend base_type;
315 friend functor_type;
316 friend const_functor_type;
317 };
318
319 template <timestamp_type T>
321 : base_type(rhs)
322 , m_timezone(rhs.m_timezone)
323 , m_data_access(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
324 {
325 }
326
327 template <timestamp_type T>
329 {
331 m_timezone = rhs.m_timezone;
332 m_data_access.reset_proxy(this->get_arrow_proxy());
333 return *this;
334 }
335
336 template <timestamp_type T>
338 : base_type(std::move(rhs))
339 , m_timezone(rhs.m_timezone)
340 , m_data_access(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
341 {
342 }
343
344 template <timestamp_type T>
346 {
347 base_type::operator=(std::move(rhs));
348 m_timezone = rhs.m_timezone;
349 m_data_access.reset_proxy(this->get_arrow_proxy());
350 return *this;
351 }
352
353 template <timestamp_type T>
354 const date::time_zone* timestamp_array<T>::get_timezone(const arrow_proxy& proxy)
355 {
356 const std::string_view timezone_string = proxy.format().substr(4);
357 return date::locate_zone(timezone_string);
358 }
359
360 template <timestamp_type T>
362 : base_type(std::move(proxy))
363 , m_timezone(get_timezone(this->get_arrow_proxy()))
364 , m_data_access(this->get_arrow_proxy(), DATA_BUFFER_INDEX)
365 {
366 }
367
368 template <timestamp_type T>
369 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
370 auto timestamp_array<T>::create_proxy(
371 const date::time_zone* timezone,
373 R&& bitmap_input,
374 std::optional<std::string_view> name,
375 std::optional<METADATA_RANGE> metadata
376 ) -> arrow_proxy
377 {
378 const auto size = data_buffer.size();
379 validity_bitmap bitmap = ensure_validity_bitmap(size, std::forward<R>(bitmap_input));
380 const auto null_count = bitmap.null_count();
381
383 format += timezone->name();
384
386
387 // create arrow schema and array
389 std::move(format), // format
390 std::move(name), // name
391 std::move(metadata), // metadata
392 std::nullopt, // flags
393 nullptr, // children
394 children_ownership, // children ownership
395 nullptr, // dictionary,
396 true // dictionary ownership
397 );
398
399 std::vector<buffer<uint8_t>> buffers(2);
400 buffers[0] = std::move(bitmap).extract_storage();
401 buffers[1] = std::move(data_buffer).extract_storage();
402
403 // create arrow array
405 static_cast<std::int64_t>(size), // length
406 static_cast<int64_t>(null_count),
407 0, // offset
408 std::move(buffers),
409 nullptr, // children
410 children_ownership, // children ownership
411 nullptr, // dictionary
412 true // dicitonary ownership
413 );
414 return arrow_proxy(std::move(arr), std::move(schema));
415 }
416
417 template <timestamp_type T>
418 template <std::ranges::input_range VALUE_RANGE, validity_bitmap_input VALIDITY_RANGE, input_metadata_container METADATA_RANGE>
419 requires(std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, T>)
420 arrow_proxy timestamp_array<T>::create_proxy(
421 const date::time_zone* timezone,
422 VALUE_RANGE&& values,
423 VALIDITY_RANGE&& validity_input,
424 std::optional<std::string_view> name,
425 std::optional<METADATA_RANGE> metadata
426 )
427 {
428 const auto range = values
429 | std::views::transform(
430 [](const auto& v)
431 {
432 return v.get_sys_time().time_since_epoch().count();
433 }
434 );
435
436
437 u8_buffer<buffer_inner_value_type> data_buffer(range);
438 return create_proxy(
439 timezone,
440 std::move(data_buffer),
441 std::forward<VALIDITY_RANGE>(validity_input),
442 std::move(name),
443 std::move(metadata)
444 );
445 }
446
447 template <timestamp_type T>
448 template <typename U, input_metadata_container METADATA_RANGE>
449 requires std::convertible_to<U, T>
450 arrow_proxy timestamp_array<T>::create_proxy(
451 const date::time_zone* timezone,
452 size_type n,
453 const U& value,
454 std::optional<std::string_view> name,
455 std::optional<METADATA_RANGE> metadata
456 )
457 {
458 // create data_buffer
459 u8_buffer<buffer_inner_value_type> data_buffer(n, to_days_since_the_UNIX_epoch(value));
460 return create_proxy(timezone, std::move(data_buffer), std::move(name), std::move(metadata));
461 }
462
463 template <timestamp_type T>
464 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
465 requires std::convertible_to<std::ranges::range_value_t<R>, T>
466 arrow_proxy timestamp_array<T>::create_proxy(
467 const date::time_zone* timezone,
468 R&& range,
469 std::optional<std::string_view> name,
470 std::optional<METADATA_RANGE> metadata
471 )
472 {
473 const std::size_t n = range_size(range);
474 const auto iota = std::ranges::iota_view{std::size_t(0), n};
475 std::ranges::transform_view iota_to_is_non_missing(
476 iota,
477 [](std::size_t)
478 {
479 return true;
480 }
481 );
482 return self_type::create_proxy(
483 timezone,
484 std::forward<R>(range),
485 std::move(iota_to_is_non_missing),
486 std::move(name),
487 std::move(metadata)
488 );
489 }
490
491 // range of nullable values
492 template <timestamp_type T>
493 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
494 requires std::is_same_v<std::ranges::range_value_t<R>, nullable<T>>
495 arrow_proxy timestamp_array<T>::create_proxy(
496 const date::time_zone* timezone,
497 R&& range,
498 std::optional<std::string_view> name,
499 std::optional<METADATA_RANGE> metadata
500 )
501 { // split into values and is_non_null ranges
502 auto values = range
503 | std::views::transform(
504 [](const auto& v)
505 {
506 return v.get();
507 }
508 );
509 auto is_non_null = range
510 | std::views::transform(
511 [](const auto& v)
512 {
513 return v.has_value();
514 }
515 );
516 return self_type::create_proxy(timezone, values, is_non_null, std::move(name), std::move(metadata));
517 }
518
519 template <timestamp_type T>
520 void timestamp_array<T>::assign(const T& rhs, size_type index)
521 {
522 SPARROW_ASSERT_TRUE(index < this->size());
523 m_data_access.value(index) = rhs.get_sys_time().time_since_epoch();
524 }
525
526 template <timestamp_type T>
527 void timestamp_array<T>::assign(T&& rhs, size_type index)
528 {
529 SPARROW_ASSERT_TRUE(index < this->size());
530 m_data_access.value(index) = rhs.get_sys_time().time_since_epoch();
531 }
532
533 template <timestamp_type T>
534 auto timestamp_array<T>::value(size_type i) -> inner_reference
535 {
536 SPARROW_ASSERT_TRUE(i < this->size());
537 return inner_reference(this, i);
538 }
539
540 template <timestamp_type T>
541 auto timestamp_array<T>::value(size_type i) const -> inner_const_reference
542 {
543 SPARROW_ASSERT_TRUE(i < this->size());
544 const auto& val = m_data_access.value(i);
545 using time_duration = typename T::duration;
546 const auto sys_time = std::chrono::sys_time<time_duration>{val};
547 return T{m_timezone, sys_time};
548 }
549
550 template <timestamp_type T>
551 auto timestamp_array<T>::value_begin() -> value_iterator
552 {
553 return value_iterator(functor_type(this), 0);
554 }
555
556 template <timestamp_type T>
557 auto timestamp_array<T>::value_end() -> value_iterator
558 {
559 return value_iterator(functor_type(this), this->size());
560 }
561
562 template <timestamp_type T>
563 auto timestamp_array<T>::value_cbegin() const -> const_value_iterator
564 {
565 return const_value_iterator(const_functor_type(this), 0);
566 }
567
568 template <timestamp_type T>
569 auto timestamp_array<T>::value_cend() const -> const_value_iterator
570 {
571 return const_value_iterator(const_functor_type(this), this->size());
572 }
573
574 template <timestamp_type T>
575 void timestamp_array<T>::resize_values(size_type new_length, inner_value_type value)
576 {
577 m_data_access.resize_values(new_length, value.get_sys_time().time_since_epoch());
578 }
579
580 template <timestamp_type T>
581 auto timestamp_array<T>::insert_value(const_value_iterator pos, inner_value_type value, size_type count)
582 -> value_iterator
583 {
584 SPARROW_ASSERT_TRUE(pos <= value_cend());
585 const size_t idx = static_cast<size_t>(std::distance(value_cbegin(), pos));
586 m_data_access.insert_value(idx, value.get_sys_time().time_since_epoch(), count);
587 return value_iterator(functor_type(this), idx);
588 }
589
590 template <timestamp_type T>
591 auto timestamp_array<T>::erase_values(const_value_iterator pos, size_type count) -> value_iterator
592 {
593 SPARROW_ASSERT_TRUE(pos < value_cend());
594 const size_t idx = static_cast<size_t>(std::distance(value_cbegin(), pos));
595 m_data_access.erase_values(idx, count);
596 return value_iterator(functor_type(this), idx);
597 }
598}
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::bitmap_iterator bitmap_iterator
array_bitmap_base_impl & operator=(const array_bitmap_base_impl &)
typename base_type::iterator_tag iterator_tag
typename base_type::const_bitmap_iterator const_bitmap_iterator
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
typename base_type::difference_type difference_type
Proxy class over ArrowArray and ArrowSchema.
SPARROW_API const std::string_view format() const
constexpr size_type null_count() const noexcept
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
Definition nullable.hpp:280
A view that repeats a value a given number of times.
timestamp_array(Args &&... args)
Construct a timestamp array with the passed range of values and an optional bitmap.
mutable_array_bitmap_base< self_type > base_type
timestamp_array(const timestamp_array &rhs)
typename inner_types::inner_reference inner_reference
timestamp_array & operator=(const timestamp_array &rhs)
timestamp_array(timestamp_array &&rhs)
timestamp_array & operator=(timestamp_array &&rhs)
typename base_type::const_bitmap_iterator const_bitmap_iterator
typename base_type::const_value_iterator const_value_iterator
timestamp_array(const date::time_zone *timezone, std::initializer_list< inner_value_type > init, std::optional< std::string_view > name=std::nullopt, std::optional< METADATA_RANGE > metadata=std::nullopt)
pointer_iterator< const buffer_inner_value_type * > buffer_inner_const_value_iterator
typename base_type::bitmap_const_reference bitmap_const_reference
pointer_iterator< buffer_inner_value_type * > buffer_inner_value_iterator
typename base_type::const_bitmap_range const_bitmap_range
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename inner_types::inner_const_reference inner_const_reference
nullable< inner_reference, bitmap_reference > reference
typename base_type::bitmap_reference bitmap_reference
typename inner_types::const_functor_type const_functor_type
typename inner_types::inner_value_type inner_value_type
Implementation of reference to inner type used for layout L.
This buffer class is use as storage buffer for all sparrow arrays.
Definition u8_buffer.hpp:75
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Definition mp_utils.hpp:107
constexpr bool excludes_copy_and_move_ctor_v
Definition mp_utils.hpp:507
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient typedef to be used as a crtp base class for arrays using a mutable validity buffer.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
timestamp< std::chrono::microseconds > timestamp_microsecond
constexpr std::string_view data_type_to_format(data_type type)
timestamp< std::chrono::nanoseconds > timestamp_nanosecond
constexpr InputIt next(InputIt it, Distance n)
Definition iterator.hpp:503
timestamp_array< timestamp_second > timestamp_seconds_array
date::zoned_time< Duration, TimeZonePtr > timestamp
constexpr bool is_timestamp_array_v
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
timestamp_array< timestamp_nanosecond > timestamp_nanoseconds_array
timestamp< std::chrono::seconds > timestamp_second
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
std::size_t range_size(R &&r)
Definition ranges.hpp:33
timestamp< std::chrono::milliseconds > timestamp_millisecond
timestamp_array< timestamp_microsecond > timestamp_microseconds_array
timestamp_array< timestamp_millisecond > timestamp_milliseconds_array
functor_index_iterator< functor_type > value_iterator
functor_index_iterator< const_functor_type > const_value_iterator
detail::layout_value_functor< self_type, inner_reference > functor_type
detail::layout_value_functor< const self_type, inner_const_reference > const_functor_type
Base class for array_inner_types specialization.
Traits class that must be specialized by array classes inheriting from array_crtp_base.
Provides compile-time information about Arrow data types.