sparrow 0.9.0
Loading...
Searching...
No Matches
decimal_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <ranges>
18
33
34namespace sparrow
35{
36 template <decimal_type T>
37 class decimal_array;
38
43
44 namespace detail
45 {
46 template <class T>
47 struct get_data_type_from_array;
48
49 template <>
51 {
52 [[nodiscard]] static constexpr sparrow::data_type get()
53 {
55 }
56 };
57
58 template <>
60 {
61 [[nodiscard]] static constexpr sparrow::data_type get()
62 {
64 }
65 };
66
67 template <>
69 {
70 [[nodiscard]] static constexpr sparrow::data_type get()
71 {
73 }
74 };
75
76 template <>
78 {
79 [[nodiscard]] static constexpr sparrow::data_type get()
80 {
82 }
83 };
84
85 }
86
87 template <decimal_type T>
105
106
107 template <class T>
109
110 template <decimal_type T>
111 class decimal_array final : public mutable_array_bitmap_base<decimal_array<T>>
112 {
113 public:
114
117
119 using inner_value_type = typename inner_types::inner_value_type;
120 using inner_reference = typename inner_types::inner_reference;
121 using inner_const_reference = typename inner_types::inner_const_reference;
122
123 // the integral value type used to store the bits
124 using storage_type = typename T::integer_type;
125 static_assert(
126 sizeof(storage_type) == 4 || sizeof(storage_type) == 8 || sizeof(storage_type) == 16
127 || sizeof(storage_type) == 32,
128 "The storage type must be an integral type of size 4, 8, 16 or 32 bytes"
129 );
130
135
138
142
143 using value_iterator = typename inner_types::value_iterator;
144 using const_value_iterator = typename inner_types::const_value_iterator;
145
147
148 template <class... Args>
150 explicit decimal_array(Args&&... args)
151 : decimal_array(create_proxy(std::forward<Args>(args)...))
152 {
153 }
154
156 [[nodiscard]] inner_const_reference value(size_type i) const;
157
158
159 private:
160
161 template <
162 std::ranges::input_range VALUE_RANGE,
163 validity_bitmap_input VALIDITY_RANGE,
164 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
165 requires std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, typename T::integer_type>
166 [[nodiscard]] static auto create_proxy(
167 VALUE_RANGE&& range,
168 VALIDITY_RANGE&& bitmaps,
169 std::size_t precision,
170 int scale,
171 std::optional<std::string_view> name = std::nullopt,
172 std::optional<METADATA_RANGE> metadata = std::nullopt
173 ) -> arrow_proxy;
174
175 template <
176 std::ranges::input_range NULLABLE_VALUE_RANGE,
177 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
178 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_VALUE_RANGE>, nullable<typename T::integer_type>>
179 [[nodiscard]] static auto create_proxy(
180 NULLABLE_VALUE_RANGE&& range,
181 std::size_t precision,
182 int scale,
183 std::optional<std::string_view> name = std::nullopt,
184 std::optional<METADATA_RANGE> metadata = std::nullopt
185 ) -> arrow_proxy;
186
187 template <std::ranges::input_range VALUE_RANGE, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
188 requires std::is_same_v<std::ranges::range_value_t<VALUE_RANGE>, typename T::integer_type>
189 [[nodiscard]] static auto create_proxy(
190 VALUE_RANGE&& range,
191 std::size_t precision,
192 int scale,
193 bool nullable = true,
194 std::optional<std::string_view> name = std::nullopt,
195 std::optional<METADATA_RANGE> metadata = std::nullopt
196 ) -> arrow_proxy;
197
198 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
199 [[nodiscard]] static auto create_proxy(
200 u8_buffer<storage_type>&& data_buffer,
201 R&& bitmaps,
202 std::size_t precision,
203 int scale,
204 std::optional<std::string_view> name = std::nullopt,
205 std::optional<METADATA_RANGE> metadata = std::nullopt
206 ) -> arrow_proxy;
207
208 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
209 [[nodiscard]] static auto create_proxy(
210 u8_buffer<storage_type>&& data_buffer,
211 std::size_t precision,
212 int scale,
213 bool nullable = true,
214 std::optional<std::string_view> name = std::nullopt,
215 std::optional<METADATA_RANGE> metadata = std::nullopt
216 ) -> arrow_proxy;
217
218 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
219 [[nodiscard]] static auto create_proxy_impl(
220 u8_buffer<storage_type>&& data_buffer,
221 std::size_t precision,
222 int scale,
223 std::optional<validity_bitmap>,
224 std::optional<std::string_view> name = std::nullopt,
225 std::optional<METADATA_RANGE> metadata = std::nullopt
226 ) -> arrow_proxy;
227
228 static std::string generate_format(std::size_t precision, int scale);
229
230 [[nodiscard]] value_iterator value_begin();
231 [[nodiscard]] value_iterator value_end();
232
233 [[nodiscard]] const_value_iterator value_cbegin() const;
234 [[nodiscard]] const_value_iterator value_cend() const;
235
236 void assign(const T& rhs, size_type index);
237
238 // Modifiers
239
240 static constexpr size_type DATA_BUFFER_INDEX = 1;
241 friend base_type;
246 friend class decimal_reference<self_type>;
247
248 std::size_t m_precision; // The precision of the decimal value
249 int m_scale; // The scale of the decimal value (can be negative)
250 };
251
252 /**********************************
253 * decimal_array implementation *
254 **********************************/
255
256 template <decimal_type T>
258 : base_type(std::move(proxy))
259 , m_precision(0)
260 , m_scale(0)
261 {
262 // parse the format string
263 const auto format = this->get_arrow_proxy().format();
264
265 // ensure that the format string starts with d:
266 if (format.size() < 2 || format[0] != 'd' || format[1] != ':')
267 {
268 throw std::runtime_error("Invalid format string for decimal array");
269 }
270
271 // substring staring aftet d:
272 const auto format_str = format.substr(2);
273
274 std::stringstream ss;
275 ss << format_str;
276 char c = 0;
277 ss >> m_precision >> c >> m_scale;
278
279 // check for failure
280 if (ss.fail())
281 {
282 throw std::runtime_error("Invalid format string for decimal array");
283 }
284 }
285
286 template <decimal_type T>
287 template <std::ranges::input_range VALUE_RANGE, validity_bitmap_input VALIDITY_RANGE, input_metadata_container METADATA_RANGE>
288 requires std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, typename T::integer_type>
289 arrow_proxy decimal_array<T>::create_proxy(
290 VALUE_RANGE&& range,
291 VALIDITY_RANGE&& bitmaps,
292 std::size_t precision,
293 int scale,
294 std::optional<std::string_view> name,
295 std::optional<METADATA_RANGE> metadata
296 )
297 {
298 u8_buffer<storage_type> u8_data_buffer(std::forward<VALUE_RANGE>(range));
299 const auto size = u8_data_buffer.size();
300 validity_bitmap bitmap = ensure_validity_bitmap(size, std::forward<VALIDITY_RANGE>(bitmaps));
301 return create_proxy_impl(
302 std::move(u8_data_buffer),
303 precision,
304 scale,
305 std::move(bitmap),
306 std::move(name),
307 std::move(metadata)
308 );
309 }
310
311 template <decimal_type T>
312 template <std::ranges::input_range NULLABLE_VALUE_RANGE, input_metadata_container METADATA_RANGE>
313 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_VALUE_RANGE>, nullable<typename T::integer_type>>
314 arrow_proxy decimal_array<T>::create_proxy(
315 NULLABLE_VALUE_RANGE&& range,
316 std::size_t precision,
317 int scale,
318 std::optional<std::string_view> name,
319 std::optional<METADATA_RANGE> metadata
320 )
321 {
322 auto values = range
323 | std::views::transform(
324 [](const auto& v)
325 {
326 return v.get();
327 }
328 );
329 auto is_non_null = range
330 | std::views::transform(
331 [](const auto& v)
332 {
333 return v.has_value();
334 }
335 );
336 return create_proxy(values, is_non_null, precision, scale, std::move(name), std::move(metadata));
337 }
338
339 template <decimal_type T>
340 template <input_metadata_container METADATA_RANGE>
341 auto decimal_array<T>::create_proxy(
342 u8_buffer<storage_type>&& data_buffer,
343 std::size_t precision,
344 int scale,
345 bool nullable,
346 std::optional<std::string_view> name,
347 std::optional<METADATA_RANGE> metadata
348 ) -> arrow_proxy
349 {
350 const size_t size = data_buffer.size();
351 return create_proxy_impl(
352 std::move(data_buffer),
353 precision,
354 scale,
355 nullable ? std::make_optional<validity_bitmap>(nullptr, size) : std::nullopt,
356 name,
357 metadata
358 );
359 }
360
361 template <decimal_type T>
362 template <std::ranges::input_range VALUE_RANGE, input_metadata_container METADATA_RANGE>
363 requires std::is_same_v<std::ranges::range_value_t<VALUE_RANGE>, typename T::integer_type>
364 arrow_proxy decimal_array<T>::create_proxy(
365 VALUE_RANGE&& range,
366 std::size_t precision,
367 int scale,
368 bool nullable,
369 std::optional<std::string_view> name,
370 std::optional<METADATA_RANGE> metadata
371 )
372 {
373 u8_buffer<storage_type> u8_data_buffer(std::forward<VALUE_RANGE>(range));
374 const auto size = u8_data_buffer.size();
375 return create_proxy_impl(
376 std::move(u8_data_buffer),
377 precision,
378 scale,
379 nullable ? std::make_optional<validity_bitmap>(nullptr, size) : std::nullopt,
380 name,
381 metadata
382 );
383 }
384
385 template <decimal_type T>
386 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
387 auto decimal_array<T>::create_proxy(
388 u8_buffer<storage_type>&& data_buffer,
389 R&& bitmap_input,
390 std::size_t precision,
391 int scale,
392 std::optional<std::string_view> name,
393 std::optional<METADATA_RANGE> metadata
394 ) -> arrow_proxy
395 {
396 const auto size = data_buffer.size();
397 validity_bitmap bitmap = ensure_validity_bitmap(size, std::forward<R>(bitmap_input));
398 return create_proxy_impl(
399 std::move(data_buffer),
400 precision,
401 scale,
402 std::move(bitmap),
403 std::move(name),
404 std::move(metadata)
405 );
406 }
407
408 template <decimal_type T>
409 template <input_metadata_container METADATA_RANGE>
410 [[nodiscard]] auto decimal_array<T>::create_proxy_impl(
411 u8_buffer<storage_type>&& data_buffer,
412 std::size_t precision,
413 int scale,
414 std::optional<validity_bitmap> bitmap,
415 std::optional<std::string_view> name,
416 std::optional<METADATA_RANGE> metadata
417 ) -> arrow_proxy
418 {
419 const std::optional<std::unordered_set<sparrow::ArrowFlag>>
420 flags = bitmap.has_value()
421 ? std::make_optional<std::unordered_set<sparrow::ArrowFlag>>({ArrowFlag::NULLABLE})
422 : std::nullopt;
423 static const repeat_view<bool> children_ownership{true, 0};
424 const auto size = data_buffer.size();
425 const size_t null_count = bitmap.has_value() ? bitmap->null_count() : 0;
426
427 // create arrow schema and array
428 ArrowSchema schema = make_arrow_schema(
429 generate_format(precision, scale),
430 name, // name
431 metadata, // metadata
432 flags, // flags
433 nullptr, // children
435 nullptr, // dictionary
436 true // dictionary ownership
437 );
438
439 std::vector<buffer<uint8_t>> buffers(2);
440 buffers[0] = bitmap.has_value() ? std::move(*bitmap).extract_storage() : buffer<uint8_t>{nullptr, 0};
441 buffers[1] = std::move(data_buffer).extract_storage();
442
443 // create arrow array
444 ArrowArray arr = make_arrow_array(
445 static_cast<std::int64_t>(size), // lengths
446 static_cast<int64_t>(null_count),
447 0, // offset
448 std::move(buffers),
449 nullptr, // children
450 repeat_view<bool>(true, 0), // children_ownership
451 nullptr, // dictionary
452 true
453 );
454 return arrow_proxy(std::move(arr), std::move(schema));
455 }
456
457 template <decimal_type T>
459 {
460 SPARROW_ASSERT_TRUE(i < this->size());
461 return inner_reference(this, i);
462 }
463
464 template <decimal_type T>
466 {
467 SPARROW_ASSERT_TRUE(i < this->size());
468 const auto ptr = this->get_arrow_proxy().buffers()[DATA_BUFFER_INDEX].template data<const storage_type>();
469 return inner_const_reference(ptr[i], m_scale);
470 }
471
472 template <decimal_type T>
473 auto decimal_array<T>::value_begin() -> value_iterator
474 {
475 return value_iterator(detail::layout_value_functor<self_type, inner_reference>(this), 0);
476 }
477
478 template <decimal_type T>
479 auto decimal_array<T>::value_end() -> value_iterator
480 {
481 return value_iterator(detail::layout_value_functor<self_type, inner_reference>(this), this->size());
482 }
483
484 template <decimal_type T>
485 auto decimal_array<T>::value_cbegin() const -> const_value_iterator
486 {
487 return const_value_iterator(detail::layout_value_functor<const self_type, inner_value_type>(this), 0);
488 }
489
490 template <decimal_type T>
491 auto decimal_array<T>::value_cend() const -> const_value_iterator
492 {
493 return const_value_iterator(
495 this->size()
496 );
497 }
498
499 template <decimal_type T>
500 void decimal_array<T>::assign(const T& rhs, size_type index)
501 {
502 SPARROW_ASSERT_TRUE(index < this->size());
503 const auto ptr = this->get_arrow_proxy().buffers()[DATA_BUFFER_INDEX].template data<storage_type>();
504 const auto storage = rhs.storage();
505 // Scale the storage value to match the scale of the decimal type
506 const auto scaled_storage = storage
507 * static_cast<storage_type>(
508 static_cast<size_t>(std::pow(10, m_scale - rhs.scale()))
509 );
510 ptr[index] = scaled_storage;
511 }
512
513 template <decimal_type T>
514 std::string decimal_array<T>::generate_format(std::size_t precision, int scale)
515 {
516 constexpr std::size_t sizeof_decimal = sizeof(storage_type);
517 std::stringstream format_str;
518 format_str << "d:" << precision << "," << scale;
519 if (sizeof_decimal != 16) // We don't need to specify the size for 128-bit decimals
520 {
521 format_str << "," << sizeof_decimal * 8;
522 }
523 return format_str.str();
524 }
525
526
527}
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
typename base_type::const_bitmap_iterator const_bitmap_iterator
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
typename base_type::difference_type difference_type
Proxy class over ArrowArray and ArrowSchema.
constexpr size_type size() const noexcept
Object that owns a piece of contiguous memory.
Definition buffer.hpp:112
inner_const_reference value(size_type i) const
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename base_type::difference_type difference_type
typename base_type::bitmap_const_reference bitmap_const_reference
array_inner_types< self_type > inner_types
typename inner_types::const_value_iterator const_value_iterator
typename base_type::const_bitmap_iterator const_bitmap_iterator
typename base_type::bitmap_type bitmap_type
typename base_type::const_bitmap_range const_bitmap_range
typename inner_types::inner_reference inner_reference
typename inner_types::inner_const_reference inner_const_reference
mutable_array_bitmap_base< self_type > base_type
typename inner_types::inner_value_type inner_value_type
typename base_type::iterator_tag iterator_tag
decimal_array(Args &&... args)
inner_reference value(size_type i)
typename inner_types::value_iterator value_iterator
Implementation of reference to inner type used for layout L.
storage_type extract_storage() noexcept
constexpr size_type null_count() const noexcept
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
Definition nullable.hpp:281
A view that repeats a value a given number of times.
This buffer class is use as storage buffer for all sparrow arrays.
Definition u8_buffer.hpp:75
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Definition mp_utils.hpp:107
constexpr bool excludes_copy_and_move_ctor_v
Definition mp_utils.hpp:507
constexpr bool is_type_instance_of_v
true if T is a concrete type template instanciation of U which is a type template.
Definition mp_utils.hpp:50
constexpr bool is_decimal_array_v
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient typedef to be used as a crtp base class for arrays using a mutable validity buffer.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
decimal_array< decimal< int128_t > > decimal_128_array
decimal_array< decimal< int32_t > > decimal_32_array
decimal_array< decimal< int64_t > > decimal_64_array
decimal_array< decimal< int256_t > > decimal_256_array
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
functor_index_iterator< detail::layout_value_functor< array_type, inner_reference > > value_iterator
decimal_reference< array_type > inner_reference
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
bitmap_type::const_reference bitmap_const_reference
nullable< inner_const_reference, bitmap_const_reference > const_reference
Base class for array_inner_types specialization.
Traits class that must be specialized by array classes inheriting from array_crtp_base.