sparrow 0.3.0
Loading...
Searching...
No Matches
timestamp_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
29
30// tts : timestamp<std::chrono::seconds>
31// tsm : timestamp<std::chrono::milliseconds>
32// tsu : timestamp<std::chrono::microseconds>
33// tsn : timestamp<std::chrono::nanoseconds>
34
35namespace sparrow
36{
37 template <timestamp_type T>
38 class timestamp_array;
39
40 template <timestamp_type T>
56
57 template <typename T>
58 struct is_timestamp_array : std::false_type
59 {
60 };
61
62 template <typename T>
63 struct is_timestamp_array<timestamp_array<T>> : std::true_type
64 {
65 };
66
67 template <typename T>
69
74
79
102 template <timestamp_type T>
103 class timestamp_array final : public mutable_array_bitmap_base<timestamp_array<T>>
104 {
105 public:
106
109
111 using inner_value_type = typename inner_types::inner_value_type;
112 using inner_reference = typename inner_types::inner_reference;
113 using inner_const_reference = typename inner_types::inner_const_reference;
114
116 using bitmap_reference = typename base_type::bitmap_reference;
120 using bitmap_range = typename base_type::bitmap_range;
122
126
130
131 using value_iterator = typename base_type::value_iterator;
132 using const_value_iterator = typename base_type::const_value_iterator;
133
134 using iterator = typename base_type::iterator;
135 using const_iterator = typename base_type::const_iterator;
136
137 using functor_type = typename inner_types::functor_type;
138 using const_functor_type = typename inner_types::const_functor_type;
139
140 using inner_value_type_duration = inner_value_type::duration;
141 using buffer_inner_value_type = inner_value_type_duration::rep;
144
146
174 template <class... Args>
176 explicit timestamp_array(Args&&... args)
177 : base_type(create_proxy(std::forward<Args>(args)...))
178 , m_timezone(get_timezone(this->get_arrow_proxy()))
179 , m_data_access(this, DATA_BUFFER_INDEX)
180 {
181 }
182
184 const date::time_zone* timezone,
185 std::initializer_list<inner_value_type> init,
186 std::optional<std::string_view> name = std::nullopt,
187 std::optional<std::string_view> metadata = std::nullopt
188 )
189 : base_type(create_proxy(timezone, init, std::move(name), std::move(metadata)))
190 , m_timezone(timezone)
191 , m_data_access(this, DATA_BUFFER_INDEX)
192 {
193 }
194
195 private:
196
197 [[nodiscard]] inner_reference value(size_type i);
198 [[nodiscard]] inner_const_reference value(size_type i) const;
199
200 [[nodiscard]] value_iterator value_begin();
201 [[nodiscard]] value_iterator value_end();
202
203 [[nodiscard]] const_value_iterator value_cbegin() const;
204 [[nodiscard]] const_value_iterator value_cend() const;
205
206 [[nodiscard]] static arrow_proxy create_proxy(
207 const date::time_zone* timezone,
208 size_type n,
209 std::optional<std::string_view> name = std::nullopt,
210 std::optional<std::string_view> metadata = std::nullopt
211 );
212
213 template <validity_bitmap_input R = validity_bitmap>
214 [[nodiscard]] static auto create_proxy(
215 const date::time_zone* timezone,
217 R&& bitmaps = validity_bitmap{},
218 std::optional<std::string_view> name = std::nullopt,
219 std::optional<std::string_view> metadata = std::nullopt
220 ) -> arrow_proxy;
221
222 // range of values (no missing values)
223 template <std::ranges::input_range R>
224 requires std::convertible_to<std::ranges::range_value_t<R>, T>
225 [[nodiscard]] static auto create_proxy(
226 const date::time_zone* timezone,
227 R&& range,
228 std::optional<std::string_view> name = std::nullopt,
229 std::optional<std::string_view> metadata = std::nullopt
230 ) -> arrow_proxy;
231
232 template <typename U>
233 requires std::convertible_to<U, T>
234 [[nodiscard]] static arrow_proxy create_proxy(
235 const date::time_zone* timezone,
236 size_type n,
237 const U& value = U{},
238 std::optional<std::string_view> name = std::nullopt,
239 std::optional<std::string_view> metadata = std::nullopt
240 );
241
242 // range of values, validity_bitmap_input
243 template <std::ranges::input_range VALUE_RANGE, validity_bitmap_input VALIDITY_RANGE>
244 requires(std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, T>)
245 [[nodiscard]] static arrow_proxy create_proxy(
246 const date::time_zone* timezone,
247 VALUE_RANGE&&,
248 VALIDITY_RANGE&&,
249 std::optional<std::string_view> name = std::nullopt,
250 std::optional<std::string_view> metadata = std::nullopt
251 );
252
253 // range of nullable values
254 template <std::ranges::input_range R>
255 requires std::is_same_v<std::ranges::range_value_t<R>, nullable<T>>
256 [[nodiscard]] static arrow_proxy create_proxy(
257 const date::time_zone* timezone,
258 R&&,
259 std::optional<std::string_view> name = std::nullopt,
260 std::optional<std::string_view> metadata = std::nullopt
261 );
262
263 // Modifiers
264
265 void resize_values(size_type new_length, inner_value_type value);
266
267 value_iterator insert_value(const_value_iterator pos, inner_value_type value, size_type count);
268
269 template <mpl::iterator_of_type<typename timestamp_array<T>::inner_value_type> InputIt>
270 auto insert_values(const_value_iterator pos, InputIt first, InputIt last) -> value_iterator
271 {
272 const auto input_range = std::ranges::subrange(first, last);
273 const auto values = input_range
274 | std::views::transform(
275 [](const auto& v)
276 {
277 return v.get_sys_time().time_since_epoch();
278 }
279 );
280 const size_t idx = static_cast<size_t>(std::distance(value_cbegin(), pos));
281 m_data_access.insert_values(idx, values.begin(), values.end());
282 return sparrow::next(value_begin(), idx);
283 }
284
285 value_iterator erase_values(const_value_iterator pos, size_type count);
286
287 void assign(const T& rhs, size_type index);
288 void assign(T&& rhs, size_type index);
289
290 [[nodiscard]] static const date::time_zone* get_timezone(const arrow_proxy& proxy);
291
292 const date::time_zone* m_timezone;
293 details::trivial_copyable_data_access<inner_value_type_duration, self_type> m_data_access;
294
295 static constexpr size_type DATA_BUFFER_INDEX = 1;
296 friend class timestamp_reference<self_type>;
297 friend base_type;
300 friend functor_type;
301 friend const_functor_type;
302 };
303
304 template <timestamp_type T>
305 const date::time_zone* timestamp_array<T>::get_timezone(const arrow_proxy& proxy)
306 {
307 const std::string_view timezone_string = proxy.format().substr(4);
308 return date::locate_zone(timezone_string);
309 }
310
311 template <timestamp_type T>
313 : base_type(std::move(proxy))
314 , m_timezone(get_timezone(this->get_arrow_proxy()))
315 , m_data_access(this, DATA_BUFFER_INDEX)
316 {
317 }
318
319 template <timestamp_type T>
320 template <validity_bitmap_input R>
321 auto timestamp_array<T>::create_proxy(
322 const date::time_zone* timezone,
324 R&& bitmap_input,
325 std::optional<std::string_view> name,
326 std::optional<std::string_view> metadata
327 ) -> arrow_proxy
328 {
329 const auto size = data_buffer.size();
330 validity_bitmap bitmap = ensure_validity_bitmap(size, std::forward<R>(bitmap_input));
331 const auto null_count = bitmap.null_count();
332
334 format += timezone->name();
335
336 // create arrow schema and array
338 std::move(format), // format
339 std::move(name), // name
340 std::move(metadata), // metadata
341 std::nullopt, // flags
342 0, // n_children
343 nullptr, // children
344 nullptr // dictionary
345 );
346
347 std::vector<buffer<uint8_t>> buffers(2);
348 buffers[0] = std::move(bitmap).extract_storage();
349 buffers[1] = std::move(data_buffer).extract_storage();
350
351 // create arrow array
353 static_cast<std::int64_t>(size), // length
354 static_cast<int64_t>(null_count),
355 0, // offset
356 std::move(buffers),
357 0, // n_children
358 nullptr, // children
359 nullptr // dictionary
360 );
361 return arrow_proxy(std::move(arr), std::move(schema));
362 }
363
364 template <timestamp_type T>
365 template <std::ranges::input_range VALUE_RANGE, validity_bitmap_input VALIDITY_RANGE>
366 requires(std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, T>)
367 arrow_proxy timestamp_array<T>::create_proxy(
368 const date::time_zone* timezone,
369 VALUE_RANGE&& values,
370 VALIDITY_RANGE&& validity_input,
371 std::optional<std::string_view> name,
372 std::optional<std::string_view> metadata
373 )
374 {
375 const auto range = values
376 | std::views::transform(
377 [](const auto& v)
378 {
379 return v.get_sys_time().time_since_epoch().count();
380 }
381 );
382
383
384 u8_buffer<buffer_inner_value_type> data_buffer(range);
385 return create_proxy(
386 timezone,
387 std::move(data_buffer),
388 std::forward<VALIDITY_RANGE>(validity_input),
389 std::move(name),
390 std::move(metadata)
391 );
392 }
393
394 template <timestamp_type T>
395 template <typename U>
396 requires std::convertible_to<U, T>
397 arrow_proxy timestamp_array<T>::create_proxy(
398 const date::time_zone* timezone,
399 size_type n,
400 const U& value,
401 std::optional<std::string_view> name,
402 std::optional<std::string_view> metadata
403 )
404 {
405 // create data_buffer
406 u8_buffer<buffer_inner_value_type> data_buffer(n, to_days_since_the_UNIX_epoch(value));
407 return create_proxy(timezone, std::move(data_buffer), std::move(name), std::move(metadata));
408 }
409
410 template <timestamp_type T>
411 template <std::ranges::input_range R>
412 requires std::convertible_to<std::ranges::range_value_t<R>, T>
413 arrow_proxy timestamp_array<T>::create_proxy(
414 const date::time_zone* timezone,
415 R&& range,
416 std::optional<std::string_view> name,
417 std::optional<std::string_view> metadata
418 )
419 {
420 const std::size_t n = range_size(range);
421 const auto iota = std::ranges::iota_view{std::size_t(0), n};
422 std::ranges::transform_view iota_to_is_non_missing(
423 iota,
424 [](std::size_t)
425 {
426 return true;
427 }
428 );
429 return self_type::create_proxy(
430 timezone,
431 std::forward<R>(range),
432 std::move(iota_to_is_non_missing),
433 std::move(name),
434 std::move(metadata)
435 );
436 }
437
438 // range of nullable values
439 template <timestamp_type T>
440 template <std::ranges::input_range R>
441 requires std::is_same_v<std::ranges::range_value_t<R>, nullable<T>>
442 arrow_proxy timestamp_array<T>::create_proxy(
443 const date::time_zone* timezone,
444 R&& range,
445 std::optional<std::string_view> name,
446 std::optional<std::string_view> metadata
447 )
448 { // split into values and is_non_null ranges
449 auto values = range
450 | std::views::transform(
451 [](const auto& v)
452 {
453 return v.get();
454 }
455 );
456 auto is_non_null = range
457 | std::views::transform(
458 [](const auto& v)
459 {
460 return v.has_value();
461 }
462 );
463 return self_type::create_proxy(timezone, values, is_non_null, std::move(name), std::move(metadata));
464 }
465
466 template <timestamp_type T>
467 void timestamp_array<T>::assign(const T& rhs, size_type index)
468 {
469 SPARROW_ASSERT_TRUE(index < this->size());
470 m_data_access.value(index) = rhs.get_sys_time().time_since_epoch();
471 }
472
473 template <timestamp_type T>
474 void timestamp_array<T>::assign(T&& rhs, size_type index)
475 {
476 SPARROW_ASSERT_TRUE(index < this->size());
477 m_data_access.value(index) = rhs.get_sys_time().time_since_epoch();
478 }
479
480 template <timestamp_type T>
481 auto timestamp_array<T>::value(size_type i) -> inner_reference
482 {
483 SPARROW_ASSERT_TRUE(i < this->size());
484 return inner_reference(this, i);
485 }
486
487 template <timestamp_type T>
488 auto timestamp_array<T>::value(size_type i) const -> inner_const_reference
489 {
490 SPARROW_ASSERT_TRUE(i < this->size());
491 const auto& val = m_data_access.value(i);
492 using time_duration = typename T::duration;
493 const auto sys_time = std::chrono::sys_time<time_duration>{val};
494 return T{m_timezone, sys_time};
495 }
496
497 template <timestamp_type T>
498 auto timestamp_array<T>::value_begin() -> value_iterator
499 {
500 return value_iterator(functor_type(this), 0);
501 }
502
503 template <timestamp_type T>
504 auto timestamp_array<T>::value_end() -> value_iterator
505 {
506 return value_iterator(functor_type(this), this->size());
507 }
508
509 template <timestamp_type T>
510 auto timestamp_array<T>::value_cbegin() const -> const_value_iterator
511 {
512 return const_value_iterator(const_functor_type(this), 0);
513 }
514
515 template <timestamp_type T>
516 auto timestamp_array<T>::value_cend() const -> const_value_iterator
517 {
518 return const_value_iterator(const_functor_type(this), this->size());
519 }
520
521 template <timestamp_type T>
522 void timestamp_array<T>::resize_values(size_type new_length, inner_value_type value)
523 {
524 m_data_access.resize_values(new_length, value.get_sys_time().time_since_epoch());
525 }
526
527 template <timestamp_type T>
528 auto timestamp_array<T>::insert_value(const_value_iterator pos, inner_value_type value, size_type count)
529 -> value_iterator
530 {
531 SPARROW_ASSERT_TRUE(pos <= value_cend());
532 const size_t idx = static_cast<size_t>(std::distance(value_cbegin(), pos));
533 m_data_access.insert_value(idx, value.get_sys_time().time_since_epoch(), count);
534 return value_iterator(functor_type(this), idx);
535 }
536
537 template <timestamp_type T>
538 auto timestamp_array<T>::erase_values(const_value_iterator pos, size_type count) -> value_iterator
539 {
540 SPARROW_ASSERT_TRUE(pos < value_cend());
541 const size_t idx = static_cast<size_t>(std::distance(value_cbegin(), pos));
542 m_data_access.erase_values(idx, count);
543 return value_iterator(functor_type(this), idx);
544 }
545}
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::bitmap_iterator bitmap_iterator
typename base_type::iterator_tag iterator_tag
typename base_type::const_bitmap_iterator const_bitmap_iterator
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
typename base_type::difference_type difference_type
Proxy class over ArrowArray and ArrowSchema.
SPARROW_API const std::string_view format() const
constexpr size_type null_count() const noexcept
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
Definition nullable.hpp:280
timestamp_array(Args &&... args)
Construct a timestamp array with the passed range of values and an optional bitmap.
mutable_array_bitmap_base< self_type > base_type
typename inner_types::inner_reference inner_reference
typename base_type::const_bitmap_iterator const_bitmap_iterator
typename base_type::const_value_iterator const_value_iterator
pointer_iterator< const buffer_inner_value_type * > buffer_inner_const_value_iterator
typename base_type::bitmap_const_reference bitmap_const_reference
timestamp_array(const date::time_zone *timezone, std::initializer_list< inner_value_type > init, std::optional< std::string_view > name=std::nullopt, std::optional< std::string_view > metadata=std::nullopt)
pointer_iterator< buffer_inner_value_type * > buffer_inner_value_iterator
typename base_type::const_bitmap_range const_bitmap_range
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename inner_types::inner_const_reference inner_const_reference
nullable< inner_reference, bitmap_reference > reference
typename base_type::bitmap_reference bitmap_reference
typename inner_types::const_functor_type const_functor_type
typename inner_types::inner_value_type inner_value_type
Implementation of reference to inner type used for layout L.
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Definition mp_utils.hpp:107
constexpr bool excludes_copy_and_move_ctor_v
Definition mp_utils.hpp:507
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient typedef to be used as a crtp base class for arrays using a mutable validity buffer.
timestamp< std::chrono::microseconds > timestamp_microsecond
constexpr std::string_view data_type_to_format(data_type type)
ArrowSchema make_arrow_schema(F format, N name, M metadata, std::optional< ArrowFlag > flags, int64_t n_children, ArrowSchema **children, ArrowSchema *dictionary)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
timestamp< std::chrono::nanoseconds > timestamp_nanosecond
constexpr InputIt next(InputIt it, Distance n)
Definition iterator.hpp:503
timestamp_array< timestamp_second > timestamp_seconds_array
date::zoned_time< Duration, TimeZonePtr > timestamp
constexpr bool is_timestamp_array_v
dynamic_bitset< std::uint8_t > validity_bitmap
timestamp_array< timestamp_nanosecond > timestamp_nanoseconds_array
timestamp< std::chrono::seconds > timestamp_second
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
std::size_t range_size(R &&r)
Definition ranges.hpp:31
timestamp< std::chrono::milliseconds > timestamp_millisecond
timestamp_array< timestamp_microsecond > timestamp_microseconds_array
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, size_t n_children, ArrowArray **children, ArrowArray *dictionary)
Creates an ArrowArray.
timestamp_array< timestamp_millisecond > timestamp_milliseconds_array
functor_index_iterator< functor_type > value_iterator
functor_index_iterator< const_functor_type > const_value_iterator
detail::layout_value_functor< self_type, inner_reference > functor_type
detail::layout_value_functor< const self_type, inner_const_reference > const_functor_type
Base class for array_inner_types specialization.
Traits class that must be specialized by array classes inheriting from array_crtp_base.
Provides compile-time information about Arrow data types.