sparrow 0.3.0
Loading...
Searching...
No Matches
decimal_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <cstddef>
18#include <sstream>
19
31
32namespace sparrow
33{
34 template <class T>
35 class decimal_array;
36
41
42 namespace detail
43 {
44 template <class T>
45 struct get_data_type_from_array;
46
47 template <>
49 {
50 [[nodiscard]] static constexpr sparrow::data_type get()
51 {
53 }
54 };
55
56 template <>
58 {
59 [[nodiscard]] static constexpr sparrow::data_type get()
60 {
62 }
63 };
64
65 template <>
67 {
68 [[nodiscard]] static constexpr sparrow::data_type get()
69 {
71 }
72 };
73
74 template <>
76 {
77 [[nodiscard]] static constexpr sparrow::data_type get()
78 {
80 }
81 };
82
83 }
84
85 template <class T>
103
104
105 template <class T>
107
108 template <class T>
109 class decimal_array final : public array_bitmap_base<decimal_array<T>>
110 {
111 public:
112
115
117 using inner_value_type = typename inner_types::inner_value_type;
118 using inner_reference = typename inner_types::inner_reference;
119 using inner_const_reference = typename inner_types::inner_const_reference;
120
121 // the integral value type used to store the bits
122 using storage_type = typename T::integer_type;
123 static_assert(
124 sizeof(storage_type) == 4 || sizeof(storage_type) == 8 || sizeof(storage_type) == 16
125 || sizeof(storage_type) == 32,
126 "The storage type must be an integral type of size 4, 8, 16 or 32 bytes"
127 );
128
133
136
140
141 using value_iterator = typename inner_types::value_iterator;
142 using const_value_iterator = typename inner_types::const_value_iterator;
143
145
146 template <class... Args>
148 explicit decimal_array(Args&&... args)
149 : decimal_array(create_proxy(std::forward<Args>(args)...))
150 {
151 }
152
153
154 private:
155
156 template <validity_bitmap_input R>
157 [[nodiscard]] static auto create_proxy(
158 u8_buffer<storage_type>&& data_buffer,
159 R&& bitmaps,
160 std::size_t precision,
161 int scale,
162 std::optional<std::string_view> name = std::nullopt,
163 std::optional<std::string_view> metadata = std::nullopt
164 ) -> arrow_proxy;
165
166 [[nodiscard]] static auto create_proxy(
167 u8_buffer<storage_type>&& data_buffer,
168 std::size_t precision,
169 int scale,
170 std::optional<std::string_view> name = std::nullopt,
171 std::optional<std::string_view> metadata = std::nullopt
172 ) -> arrow_proxy;
173
174
175 [[nodiscard]] inner_reference value(size_type i);
176 [[nodiscard]] inner_const_reference value(size_type i) const;
177
178 [[nodiscard]] value_iterator value_begin();
179 [[nodiscard]] value_iterator value_end();
180
181 [[nodiscard]] const_value_iterator value_cbegin() const;
182 [[nodiscard]] const_value_iterator value_cend() const;
183
184 // Modifiers
185
186 static constexpr size_type DATA_BUFFER_INDEX = 1;
187 friend base_type;
191
192 std::size_t m_precision; // The precision of the decimal value
193 int m_scale; // The scale of the decimal value (can be negative)
194 };
195
196 /**********************************
197 * decimal_array implementation *
198 **********************************/
199
200 template <class T>
202 : base_type(std::move(proxy))
203 , m_precision(0)
204 , m_scale(0)
205 {
206 // parse the format string
207 const auto format = this->get_arrow_proxy().format();
208
209 // ensure that the format string starts with d:
210 if (format.size() < 2 || format[0] != 'd' || format[1] != ':')
211 {
212 throw std::runtime_error("Invalid format string for decimal array");
213 }
214
215 // substring staring aftet d:
216 const auto format_str = format.substr(2);
217
218 std::stringstream ss;
219 ss << format_str;
220 char c;
221 ss >> m_precision >> c >> m_scale;
222
223 // check for failure
224 if (ss.fail())
225 {
226 throw std::runtime_error("Invalid format string for decimal array");
227 }
228 }
229
230 template <class T>
231 auto decimal_array<T>::create_proxy(
232 u8_buffer<storage_type>&& data_buffer,
233 std::size_t precision,
234 int scale,
235 std::optional<std::string_view> name,
236 std::optional<std::string_view> metadata
237 ) -> arrow_proxy
238 {
239 return decimal_array<T>::create_proxy(
240 std::move(data_buffer),
242 precision,
243 scale,
244 name,
245 metadata
246 );
247 }
248
249 template <class T>
250 template <validity_bitmap_input R>
251 auto decimal_array<T>::create_proxy(
252 u8_buffer<storage_type>&& data_buffer,
253 R&& bitmap_input,
254 std::size_t precision,
255 int scale,
256 std::optional<std::string_view> name,
257 std::optional<std::string_view> metadata
258 ) -> arrow_proxy
259 {
260 const auto size = data_buffer.size();
261 validity_bitmap bitmap = ensure_validity_bitmap(size, std::forward<R>(bitmap_input));
262 const auto null_count = bitmap.null_count();
263
264 constexpr std::size_t sizeof_decimal = sizeof(storage_type);
265 std::stringstream format_str;
266 format_str << "d:" << precision << "," << scale << "," << sizeof_decimal * 8;
267
268 // create arrow schema and array
269 ArrowSchema schema = make_arrow_schema(
270 format_str.str(),
271 name, // name
272 metadata, // metadata
273 std::nullopt, // flags
274 0, // n_children
275 nullptr, // children
276 nullptr // dictionary
277 );
278
279 std::vector<buffer<uint8_t>> buffers{
280 std::move(bitmap).extract_storage(),
281 std::move(data_buffer).extract_storage()
282 };
283
284 // create arrow array
285 ArrowArray arr = make_arrow_array(
286 static_cast<std::int64_t>(size), // length
287 static_cast<int64_t>(null_count),
288 0, // offset
289 std::move(buffers),
290 0, // n_children
291 nullptr, // children
292 nullptr // dictionary
293 );
294 return arrow_proxy(std::move(arr), std::move(schema));
295 }
296
297 template <class T>
298 auto decimal_array<T>::value(size_type i) -> inner_reference
299 {
300 SPARROW_ASSERT_TRUE(i < this->size());
301 const auto ptr = this->get_arrow_proxy().buffers()[DATA_BUFFER_INDEX].template data<const storage_type>();
302 return inner_reference(ptr[i], m_scale);
303 }
304
305 template <class T>
306 auto decimal_array<T>::value(size_type i) const -> inner_const_reference
307 {
308 SPARROW_ASSERT_TRUE(i < this->size());
309 const auto ptr = this->get_arrow_proxy().buffers()[DATA_BUFFER_INDEX].template data<const storage_type>();
310 return inner_const_reference(ptr[i], m_scale);
311 }
312
313 template <class T>
314 auto decimal_array<T>::value_begin() -> value_iterator
315 {
316 return value_iterator(detail::layout_value_functor<self_type, inner_value_type>(this), 0);
317 }
318
319 template <class T>
320 auto decimal_array<T>::value_end() -> value_iterator
321 {
322 return value_iterator(detail::layout_value_functor<self_type, inner_value_type>(this), this->size());
323 }
324
325 template <class T>
326 auto decimal_array<T>::value_cbegin() const -> const_value_iterator
327 {
328 return const_value_iterator(detail::layout_value_functor<const self_type, inner_value_type>(this), 0);
329 }
330
331 template <class T>
332 auto decimal_array<T>::value_cend() const -> const_value_iterator
333 {
334 return const_value_iterator(
336 this->size()
337 );
338 }
339}
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
typename base_type::const_bitmap_iterator const_bitmap_iterator
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
typename base_type::difference_type difference_type
Proxy class over ArrowArray and ArrowSchema.
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename base_type::difference_type difference_type
typename base_type::bitmap_const_reference bitmap_const_reference
array_inner_types< self_type > inner_types
typename inner_types::const_value_iterator const_value_iterator
typename base_type::const_bitmap_iterator const_bitmap_iterator
typename base_type::bitmap_type bitmap_type
typename base_type::const_bitmap_range const_bitmap_range
typename decimal< int32_t >::integer_type storage_type
decimal_array< decimal< int32_t > > self_type
typename inner_types::inner_reference inner_reference
typename inner_types::inner_const_reference inner_const_reference
typename inner_types::inner_value_type inner_value_type
typename base_type::iterator_tag iterator_tag
array_bitmap_base< self_type > base_type
decimal_array(Args &&... args)
typename inner_types::value_iterator value_iterator
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
Definition nullable.hpp:280
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Definition mp_utils.hpp:107
constexpr bool excludes_copy_and_move_ctor_v
Definition mp_utils.hpp:507
constexpr bool is_type_instance_of_v
true if T is a concrete type template instanciation of U which is a type template.
Definition mp_utils.hpp:50
constexpr bool is_decimal_array_v
decimal_array< decimal< int128_t > > decimal_128_array
ArrowSchema make_arrow_schema(F format, N name, M metadata, std::optional< ArrowFlag > flags, int64_t n_children, ArrowSchema **children, ArrowSchema *dictionary)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
decimal_array< decimal< int32_t > > decimal_32_array
decimal_array< decimal< int64_t > > decimal_64_array
decimal_array< decimal< int256_t > > decimal_256_array
array_bitmap_base_impl< D, false > array_bitmap_base
Convenient typedef to be used as a crtp base class for arrays using an immutable validity buffer.
dynamic_bitset< std::uint8_t > validity_bitmap
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, size_t n_children, ArrowArray **children, ArrowArray *dictionary)
Creates an ArrowArray.
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
bitmap_type::const_reference bitmap_const_reference
nullable< inner_const_reference, bitmap_const_reference > const_reference
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
Base class for array_inner_types specialization.
Traits class that must be specialized by array classes inheriting from array_crtp_base.