sparrow 0.6.0
Loading...
Searching...
No Matches
decimal_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <cstddef>
18#include <ranges>
19#include <sstream>
20
33
34namespace sparrow
35{
36 template <decimal_type T>
37 class decimal_array;
38
43
44 namespace detail
45 {
46 template <class T>
47 struct get_data_type_from_array;
48
49 template <>
51 {
52 [[nodiscard]] static constexpr sparrow::data_type get()
53 {
55 }
56 };
57
58 template <>
60 {
61 [[nodiscard]] static constexpr sparrow::data_type get()
62 {
64 }
65 };
66
67 template <>
69 {
70 [[nodiscard]] static constexpr sparrow::data_type get()
71 {
73 }
74 };
75
76 template <>
78 {
79 [[nodiscard]] static constexpr sparrow::data_type get()
80 {
82 }
83 };
84
85 }
86
87 template <decimal_type T>
105
106
107 template <class T>
109
110 template <decimal_type T>
111 class decimal_array final : public array_bitmap_base<decimal_array<T>>
112 {
113 public:
114
117
119 using inner_value_type = typename inner_types::inner_value_type;
120 using inner_reference = typename inner_types::inner_reference;
121 using inner_const_reference = typename inner_types::inner_const_reference;
122
123 // the integral value type used to store the bits
124 using storage_type = typename T::integer_type;
125 static_assert(
126 sizeof(storage_type) == 4 || sizeof(storage_type) == 8 || sizeof(storage_type) == 16
127 || sizeof(storage_type) == 32,
128 "The storage type must be an integral type of size 4, 8, 16 or 32 bytes"
129 );
130
135
138
142
143 using value_iterator = typename inner_types::value_iterator;
144 using const_value_iterator = typename inner_types::const_value_iterator;
145
147
148 template <class... Args>
150 explicit decimal_array(Args&&... args)
151 : decimal_array(create_proxy(std::forward<Args>(args)...))
152 {
153 }
154
155
156 private:
157
158 template <
159 std::ranges::input_range VALUE_RANGE,
160 validity_bitmap_input VALIDITY_RANGE,
161 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
162 requires std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, typename T::integer_type>
163 [[nodiscard]] static auto create_proxy(
164 VALUE_RANGE&& range,
165 VALIDITY_RANGE&& bitmaps,
166 std::size_t precision,
167 int scale,
168 std::optional<std::string_view> name = std::nullopt,
169 std::optional<METADATA_RANGE> metadata = std::nullopt
170 ) -> arrow_proxy;
171
172 template <
173 std::ranges::input_range NULLABLE_VALUE_RANGE,
174 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
175 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_VALUE_RANGE>, nullable<typename T::integer_type>>
176 [[nodiscard]] static auto create_proxy(
177 NULLABLE_VALUE_RANGE&& range,
178 std::size_t precision,
179 int scale,
180 std::optional<std::string_view> name = std::nullopt,
181 std::optional<METADATA_RANGE> metadata = std::nullopt
182 ) -> arrow_proxy;
183
184 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
185 [[nodiscard]] static auto create_proxy(
186 u8_buffer<storage_type>&& data_buffer,
187 R&& bitmaps,
188 std::size_t precision,
189 int scale,
190 std::optional<std::string_view> name = std::nullopt,
191 std::optional<METADATA_RANGE> metadata = std::nullopt
192 ) -> arrow_proxy;
193
194 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
195 [[nodiscard]] static auto create_proxy(
196 u8_buffer<storage_type>&& data_buffer,
197 std::size_t precision,
198 int scale,
199 std::optional<std::string_view> name = std::nullopt,
200 std::optional<METADATA_RANGE> metadata = std::nullopt
201 ) -> arrow_proxy;
202
203
204 [[nodiscard]] inner_reference value(size_type i);
205 [[nodiscard]] inner_const_reference value(size_type i) const;
206
207 [[nodiscard]] value_iterator value_begin();
208 [[nodiscard]] value_iterator value_end();
209
210 [[nodiscard]] const_value_iterator value_cbegin() const;
211 [[nodiscard]] const_value_iterator value_cend() const;
212
213 // Modifiers
214
215 static constexpr size_type DATA_BUFFER_INDEX = 1;
216 friend base_type;
220
221 std::size_t m_precision; // The precision of the decimal value
222 int m_scale; // The scale of the decimal value (can be negative)
223 };
224
225 /**********************************
226 * decimal_array implementation *
227 **********************************/
228
229 template <decimal_type T>
231 : base_type(std::move(proxy))
232 , m_precision(0)
233 , m_scale(0)
234 {
235 // parse the format string
236 const auto format = this->get_arrow_proxy().format();
237
238 // ensure that the format string starts with d:
239 if (format.size() < 2 || format[0] != 'd' || format[1] != ':')
240 {
241 throw std::runtime_error("Invalid format string for decimal array");
242 }
243
244 // substring staring aftet d:
245 const auto format_str = format.substr(2);
246
247 std::stringstream ss;
248 ss << format_str;
249 char c;
250 ss >> m_precision >> c >> m_scale;
251
252 // check for failure
253 if (ss.fail())
254 {
255 throw std::runtime_error("Invalid format string for decimal array");
256 }
257 }
258
259 template <decimal_type T>
260 template <std::ranges::input_range VALUE_RANGE, validity_bitmap_input VALIDITY_RANGE, input_metadata_container METADATA_RANGE>
261 requires std::convertible_to<std::ranges::range_value_t<VALUE_RANGE>, typename T::integer_type>
262 arrow_proxy decimal_array<T>::create_proxy(
263 VALUE_RANGE&& range,
264 VALIDITY_RANGE&& bitmaps,
265 std::size_t precision,
266 int scale,
267 std::optional<std::string_view> name,
268 std::optional<METADATA_RANGE> metadata
269 )
270 {
271 u8_buffer<storage_type> u8_data_buffer(std::forward<VALUE_RANGE>(range));
272 return create_proxy(
273 std::move(u8_data_buffer),
274 std::forward<VALIDITY_RANGE>(bitmaps),
275 precision,
276 scale,
277 std::move(name),
278 std::move(metadata)
279 );
280 }
281
282 template <decimal_type T>
283 template <std::ranges::input_range NULLABLE_VALUE_RANGE, input_metadata_container METADATA_RANGE>
284 requires std::is_same_v<std::ranges::range_value_t<NULLABLE_VALUE_RANGE>, nullable<typename T::integer_type>>
285 arrow_proxy decimal_array<T>::create_proxy(
286 NULLABLE_VALUE_RANGE&& range,
287 std::size_t precision,
288 int scale,
289 std::optional<std::string_view> name,
290 std::optional<METADATA_RANGE> metadata
291 )
292 {
293 auto values = range
294 | std::views::transform(
295 [](const auto& v)
296 {
297 return v.get();
298 }
299 );
300 auto is_non_null = range
301 | std::views::transform(
302 [](const auto& v)
303 {
304 return v.has_value();
305 }
306 );
307 return create_proxy(values, is_non_null, precision, scale, std::move(name), std::move(metadata));
308 }
309
310 template <decimal_type T>
311 template <input_metadata_container METADATA_RANGE>
312 auto decimal_array<T>::create_proxy(
313 u8_buffer<storage_type>&& data_buffer,
314 std::size_t precision,
315 int scale,
316 std::optional<std::string_view> name,
317 std::optional<METADATA_RANGE> metadata
318 ) -> arrow_proxy
319 {
320 return decimal_array<T>::create_proxy(
321 std::move(data_buffer),
323 precision,
324 scale,
325 name,
326 metadata
327 );
328 }
329
330 template <decimal_type T>
331 template <validity_bitmap_input R, input_metadata_container METADATA_RANGE>
332 auto decimal_array<T>::create_proxy(
333 u8_buffer<storage_type>&& data_buffer,
334 R&& bitmap_input,
335 std::size_t precision,
336 int scale,
337 std::optional<std::string_view> name,
338 std::optional<METADATA_RANGE> metadata
339 ) -> arrow_proxy
340 {
341 const auto size = data_buffer.size();
342 validity_bitmap bitmap = ensure_validity_bitmap(size, std::forward<R>(bitmap_input));
343 const auto null_count = bitmap.null_count();
344
345 constexpr std::size_t sizeof_decimal = sizeof(storage_type);
346 std::stringstream format_str;
347 format_str << "d:" << precision << "," << scale << "," << sizeof_decimal * 8;
348
349 // create arrow schema and array
350 ArrowSchema schema = make_arrow_schema(
351 format_str.str(),
352 name, // name
353 metadata, // metadata
354 std::nullopt, // flags
355 nullptr, // children
356 repeat_view<bool>(true, 0),
357 nullptr, // dictionary
358 true // dictionary ownership
359 );
360
361 std::vector<buffer<uint8_t>> buffers{
362 std::move(bitmap).extract_storage(),
363 std::move(data_buffer).extract_storage()
364 };
365
366 // create arrow array
367 ArrowArray arr = make_arrow_array(
368 static_cast<std::int64_t>(size), // length
369 static_cast<int64_t>(null_count),
370 0, // offset
371 std::move(buffers),
372 nullptr, // children
373 repeat_view<bool>(true, 0), // children_ownership
374 nullptr, // dictionary
375 true
376 );
377 return arrow_proxy(std::move(arr), std::move(schema));
378 }
379
380 template <decimal_type T>
381 auto decimal_array<T>::value(size_type i) -> inner_reference
382 {
383 SPARROW_ASSERT_TRUE(i < this->size());
384 const auto ptr = this->get_arrow_proxy().buffers()[DATA_BUFFER_INDEX].template data<const storage_type>();
385 return inner_reference(ptr[i], m_scale);
386 }
387
388 template <decimal_type T>
389 auto decimal_array<T>::value(size_type i) const -> inner_const_reference
390 {
391 SPARROW_ASSERT_TRUE(i < this->size());
392 const auto ptr = this->get_arrow_proxy().buffers()[DATA_BUFFER_INDEX].template data<const storage_type>();
393 return inner_const_reference(ptr[i], m_scale);
394 }
395
396 template <decimal_type T>
397 auto decimal_array<T>::value_begin() -> value_iterator
398 {
399 return value_iterator(detail::layout_value_functor<self_type, inner_value_type>(this), 0);
400 }
401
402 template <decimal_type T>
403 auto decimal_array<T>::value_end() -> value_iterator
404 {
405 return value_iterator(detail::layout_value_functor<self_type, inner_value_type>(this), this->size());
406 }
407
408 template <decimal_type T>
409 auto decimal_array<T>::value_cbegin() const -> const_value_iterator
410 {
411 return const_value_iterator(detail::layout_value_functor<const self_type, inner_value_type>(this), 0);
412 }
413
414 template <decimal_type T>
415 auto decimal_array<T>::value_cend() const -> const_value_iterator
416 {
417 return const_value_iterator(
419 this->size()
420 );
421 }
422}
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
typename base_type::const_bitmap_iterator const_bitmap_iterator
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
typename base_type::difference_type difference_type
Proxy class over ArrowArray and ArrowSchema.
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename base_type::difference_type difference_type
typename base_type::bitmap_const_reference bitmap_const_reference
array_inner_types< self_type > inner_types
typename inner_types::const_value_iterator const_value_iterator
typename base_type::const_bitmap_iterator const_bitmap_iterator
typename base_type::bitmap_type bitmap_type
typename base_type::const_bitmap_range const_bitmap_range
typename inner_types::inner_reference inner_reference
typename inner_types::inner_const_reference inner_const_reference
typename inner_types::inner_value_type inner_value_type
typename base_type::iterator_tag iterator_tag
array_bitmap_base< self_type > base_type
decimal_array(Args &&... args)
typename inner_types::value_iterator value_iterator
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
Definition nullable.hpp:280
A view that repeats a value a given number of times.
This buffer class is use as storage buffer for all sparrow arrays.
Definition u8_buffer.hpp:75
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Definition mp_utils.hpp:107
constexpr bool excludes_copy_and_move_ctor_v
Definition mp_utils.hpp:507
constexpr bool is_type_instance_of_v
true if T is a concrete type template instanciation of U which is a type template.
Definition mp_utils.hpp:50
constexpr bool is_decimal_array_v
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
decimal_array< decimal< int128_t > > decimal_128_array
decimal_array< decimal< int32_t > > decimal_32_array
decimal_array< decimal< int64_t > > decimal_64_array
decimal_array< decimal< int256_t > > decimal_256_array
array_bitmap_base_impl< D, false > array_bitmap_base
Convenient typedef to be used as a crtp base class for arrays using an immutable validity buffer.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
bitmap_type::const_reference bitmap_const_reference
nullable< inner_const_reference, bitmap_const_reference > const_reference
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
Base class for array_inner_types specialization.
Traits class that must be specialized by array classes inheriting from array_crtp_base.