sparrow 2.2.1
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
primitive_data_access.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <type_traits>
18
23#include "sparrow/u8_buffer.hpp"
24
25namespace sparrow
26{
27 template <typename T>
28 concept trivial_copyable_type = std::is_trivially_copyable_v<T> && std::is_standard_layout_v<T>;
29
30 namespace details
31 {
63 template <trivial_copyable_type T, trivial_copyable_type T2 = T>
65 {
66 public:
67
68 using inner_value_type = T2;
69 using inner_reference = T2&;
70 using inner_const_reference = std::conditional_t<std::is_same_v<T2, bool>, T2, const T2&>;
73
76
82 primitive_data_access(arrow_proxy& proxy, size_t data_buffer_index);
83
84 // This class is meant to be use as a private member of array classes,
85 // and holds a inner_pointer to the arrow_proxy of the array. Therefore we
86 // forbid the copy and the move semantics to:
87 // - force the array constructors to call the primitive_data_access
88 // constructor taking an arrow_proxy
89 // - force the arra assignment operators to call the reset_proxy
90 // method.
95
96 [[nodiscard]] constexpr inner_pointer data();
97 [[nodiscard]] constexpr inner_const_pointer data() const;
98
99 [[nodiscard]] constexpr inner_reference value(size_t i);
100 [[nodiscard]] constexpr inner_const_reference value(size_t i) const;
101
102 [[nodiscard]] constexpr value_iterator value_begin();
103 [[nodiscard]] constexpr value_iterator value_end();
104
105 [[nodiscard]] constexpr const_value_iterator value_cbegin() const;
106 [[nodiscard]] constexpr const_value_iterator value_cend() const;
107
108 constexpr void resize_values(size_t new_length, const T2& value);
109
110 constexpr value_iterator insert_value(const_value_iterator pos, T2 value, size_t count);
111 constexpr value_iterator insert_value(size_t idx, T2 value, size_t count);
112
113 // Template parameter InputIt must be an value_iterator type that iterates over elements of type T
114 template <mpl::iterator_of_type<T2> InputIt>
115 constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last);
116
117 template <mpl::iterator_of_type<T2> InputIt>
118 constexpr value_iterator insert_values(size_t idx, InputIt first, InputIt last);
119
121 constexpr value_iterator erase_values(size_t idx, size_t count);
122
123 constexpr void reset_proxy(arrow_proxy& proxy);
124
125 template <std::ranges::input_range RANGE>
126 [[nodiscard]] static constexpr u8_buffer<T2> make_data_buffer(RANGE&& r);
127
128 [[nodiscard]] static constexpr u8_buffer<T2> make_data_buffer(size_t n, const T2& value);
129
130 private:
131
132 [[nodiscard]] constexpr buffer_adaptor<T2, buffer<uint8_t>&> get_data_buffer();
133
134 [[nodiscard]] arrow_proxy& get_proxy();
135 [[nodiscard]] const arrow_proxy& get_proxy() const;
136
137 arrow_proxy* p_proxy;
138 size_t m_data_buffer_index;
139 };
140
141 template <>
143 {
144 public:
145
152
155
156 primitive_data_access(arrow_proxy& proxy, size_t data_buffer_index);
157
162
163 [[nodiscard]] inner_reference value(size_t i);
164 [[nodiscard]] inner_const_reference value(size_t i) const;
165
166 [[nodiscard]] value_iterator value_begin();
167 [[nodiscard]] value_iterator value_end();
168
169 [[nodiscard]] const_value_iterator value_cbegin() const;
170 [[nodiscard]] const_value_iterator value_cend() const;
171
172 void resize_values(size_t new_length, bool value);
173
174 value_iterator insert_value(const_value_iterator pos, bool value, size_t count);
175 value_iterator insert_value(size_t idx, bool value, size_t count);
176
177 // Template parameter InputIt must be an value_iterator type that iterates over elements of type T
178 template <mpl::iterator_of_type<bool> InputIt>
179 constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last);
180
181 template <mpl::iterator_of_type<bool> InputIt>
182 constexpr value_iterator insert_values(size_t idx, InputIt first, InputIt last);
183
185 value_iterator erase_values(size_t idx, size_t count);
186
187 void reset_proxy(arrow_proxy& proxy);
188
189 template <std::ranges::input_range RANGE>
190 [[nodiscard]] static u8_buffer<bool> make_data_buffer(RANGE&& r);
191
192 [[nodiscard]] static u8_buffer<bool> make_data_buffer(size_t size, bool value);
193
194 private:
195
197 using difference_type = typename bitset_adaptor::difference_type;
198 using adaptor_iterator = typename bitset_adaptor::iterator;
199 using const_adaptor_iterator = typename bitset_adaptor::const_iterator;
200
201 template <class F>
202 [[nodiscard]] static u8_buffer<bool> make_data_buffer(size_t size, F init_func);
203
204 [[nodiscard]] size_t get_offset(size_t i) const;
205
206 [[nodiscard]] adaptor_iterator adaptor_begin();
207 [[nodiscard]] adaptor_iterator adaptor_end();
208
209 [[nodiscard]] const_adaptor_iterator adaptor_cbegin() const;
210 [[nodiscard]] const_adaptor_iterator adaptor_cend() const;
211
212 [[nodiscard]] arrow_proxy& get_proxy();
213 [[nodiscard]] const arrow_proxy& get_proxy() const;
214
215 [[nodiscard]] bitset_view get_data_view();
216 [[nodiscard]] bitset_adaptor get_data_adaptor();
217
218 void update_data_view();
219
220 arrow_proxy* p_proxy;
221 size_t m_data_buffer_index;
222 bitset_view m_view;
223 buffer<std::uint8_t> m_dummy_buffer;
224 bitset_adaptor m_adaptor;
225 };
226
227 /****************************************
228 * primitive_data_access implementation *
229 ****************************************/
230
231 template <trivial_copyable_type T, trivial_copyable_type T2>
233 : p_proxy(&proxy)
234 , m_data_buffer_index(data_buffer_index)
235 {
236 }
237
238 template <trivial_copyable_type T, trivial_copyable_type T2>
239 [[nodiscard]] constexpr auto primitive_data_access<T, T2>::data() -> inner_pointer
240 {
241 return get_proxy().buffers()[m_data_buffer_index].template data<T2>()
242 + static_cast<size_t>(get_proxy().offset());
243 }
244
245 template <trivial_copyable_type T, trivial_copyable_type T2>
246 [[nodiscard]] constexpr auto primitive_data_access<T, T2>::data() const -> inner_const_pointer
247 {
248 return get_proxy().buffers()[m_data_buffer_index].template data<T2>()
249 + static_cast<size_t>(get_proxy().offset());
250 }
251
252 template <trivial_copyable_type T, trivial_copyable_type T2>
253 [[nodiscard]] constexpr auto primitive_data_access<T, T2>::value(size_t i) -> inner_reference
254 {
255 SPARROW_ASSERT_TRUE(i < get_proxy().length());
256 return data()[i];
257 }
258
259 template <trivial_copyable_type T, trivial_copyable_type T2>
260 [[nodiscard]] constexpr auto primitive_data_access<T, T2>::value(size_t i) const
262 {
263 SPARROW_ASSERT_TRUE(i < get_proxy().length());
264 return data()[i];
265 }
266
267 template <trivial_copyable_type T, trivial_copyable_type T2>
269 {
270 return value_iterator{data()};
271 }
272
273 template <trivial_copyable_type T, trivial_copyable_type T2>
275 {
276 return sparrow::next(value_begin(), get_proxy().length());
277 }
278
279 template <trivial_copyable_type T, trivial_copyable_type T2>
281 {
282 return const_value_iterator{data()};
283 }
284
285 template <trivial_copyable_type T, trivial_copyable_type T2>
287 {
288 return sparrow::next(value_cbegin(), get_proxy().length());
289 }
290
291 template <trivial_copyable_type T, trivial_copyable_type T2>
292 constexpr void primitive_data_access<T, T2>::resize_values(size_t new_length, const T2& value)
293 {
294 const size_t new_size = new_length + static_cast<size_t>(get_proxy().offset());
295 get_data_buffer().resize(new_size, value);
296 }
297
298 template <trivial_copyable_type T, trivial_copyable_type T2>
299 constexpr auto
302 {
304 const const_value_iterator value_cend{sparrow::next(value_cbegin, get_proxy().length())};
307 const auto distance = std::distance(value_cbegin, sparrow::next(pos, get_proxy().offset()));
308 get_data_buffer().insert(pos, count, value);
310 return sparrow::next(value_begin, distance);
311 }
312
313 template <trivial_copyable_type T, trivial_copyable_type T2>
314 constexpr auto primitive_data_access<T, T2>::insert_value(size_t idx, T2 value, size_t count)
316 {
317 SPARROW_ASSERT_TRUE(idx <= get_proxy().length());
318 const const_value_iterator begin{data()};
319 const const_value_iterator it = sparrow::next(begin, idx);
320 return insert_value(it, value, count);
321 }
322
323 // Template parameter InputIt must be an value_iterator type that iterates over elements of type T
324 template <trivial_copyable_type T, trivial_copyable_type T2>
325 template <mpl::iterator_of_type<T2> InputIt>
326 constexpr auto
329 {
331 const const_value_iterator value_cend{sparrow::next(value_cbegin, get_proxy().length())};
334 const auto distance = std::distance(value_cbegin, sparrow::next(pos, get_proxy().offset()));
335 get_data_buffer().insert(pos, first, last);
337 return sparrow::next(value_begin, distance);
338 }
339
340 template <trivial_copyable_type T, trivial_copyable_type T2>
341 template <mpl::iterator_of_type<T2> InputIt>
342 constexpr auto primitive_data_access<T, T2>::insert_values(size_t idx, InputIt first, InputIt last)
344 {
345 SPARROW_ASSERT_TRUE(idx <= get_proxy().length());
346 const const_value_iterator begin{data()};
347 const const_value_iterator it = sparrow::next(begin, idx);
348 return insert_values(it, first, last);
349 }
350
351 template <trivial_copyable_type T, trivial_copyable_type T2>
354 {
356 const const_value_iterator value_cend{sparrow::next(value_cbegin, get_proxy().length())};
359 const auto distance = static_cast<size_t>(
360 std::distance(value_cbegin, sparrow::next(pos, get_proxy().offset()))
361 );
362 auto data_buffer = get_data_buffer();
363 const auto first = sparrow::next(data_buffer.cbegin(), distance);
364 const auto last = sparrow::next(first, count);
365 data_buffer.erase(first, last);
367 return sparrow::next(value_begin, distance);
368 }
369
370 template <trivial_copyable_type T, trivial_copyable_type T2>
371 constexpr auto primitive_data_access<T, T2>::erase_values(size_t idx, size_t count) -> value_iterator
372 {
373 SPARROW_ASSERT_TRUE(idx <= get_proxy().length());
374 const const_value_iterator cbegin{data()};
375 const const_value_iterator it = sparrow::next(cbegin, idx);
376 erase_values(it, count);
377 return sparrow::next(value_iterator{data()}, idx);
378 }
379
380 template <trivial_copyable_type T, trivial_copyable_type T2>
382 {
383 p_proxy = &proxy;
384 }
385
386 template <trivial_copyable_type T, trivial_copyable_type T2>
387 template <std::ranges::input_range RANGE>
389 {
390 return u8_buffer<T2>(std::forward<RANGE>(r));
391 }
392
393 template <trivial_copyable_type T, trivial_copyable_type T2>
394 [[nodiscard]] constexpr u8_buffer<T2>
396 {
397 return u8_buffer<T2>(size, value);
398 }
399
400 template <trivial_copyable_type T, trivial_copyable_type T2>
401 [[nodiscard]] constexpr buffer_adaptor<T2, buffer<uint8_t>&>
402 primitive_data_access<T, T2>::get_data_buffer()
403 {
404 auto& buffers = get_proxy().get_array_private_data()->buffers();
405 return make_buffer_adaptor<T2>(buffers[m_data_buffer_index]);
406 }
407
408 template <trivial_copyable_type T, trivial_copyable_type T2>
409 [[nodiscard]] arrow_proxy& primitive_data_access<T, T2>::get_proxy()
410 {
411 return *p_proxy;
412 }
413
414 template <trivial_copyable_type T, trivial_copyable_type T2>
415 [[nodiscard]] const arrow_proxy& primitive_data_access<T, T2>::get_proxy() const
416 {
417 return *p_proxy;
418 }
419
420 /**********************************************
421 * primitive_data_access<bool> implementation *
422 **********************************************/
423
425 : p_proxy(&proxy)
426 , m_data_buffer_index(data_buffer_index)
427 , m_view(get_data_view())
428 , m_dummy_buffer()
429 , m_adaptor(get_data_adaptor())
430 {
431 }
432
433 [[nodiscard]] inline auto primitive_data_access<bool>::value(size_t i) -> inner_reference
434 {
435 return m_view[get_offset(i)];
436 }
437
438 [[nodiscard]] inline auto primitive_data_access<bool>::value(size_t i) const -> inner_const_reference
439 {
440 return m_view[get_offset(i)];
441 }
442
444 {
445 return sparrow::next(m_view.begin(), get_offset(0u));
446 }
447
449 {
450 return m_view.end();
451 }
452
454 {
455 return sparrow::next(m_view.cbegin(), get_offset(0u));
456 }
457
459 {
460 return m_view.cend();
461 }
462
463 inline void primitive_data_access<bool>::resize_values(size_t new_length, bool value)
464 {
465 m_adaptor.resize(get_offset(new_length), value);
466 update_data_view();
467 }
468
469 inline auto
472 {
473 auto ins_iter = sparrow::next(adaptor_cbegin(), std::distance(value_cbegin(), pos));
474 auto res = m_adaptor.insert(ins_iter, count, value);
475 update_data_view();
476 return sparrow::next(value_begin(), std::distance(adaptor_begin(), res));
477 }
478
479 inline auto primitive_data_access<bool>::insert_value(size_t idx, bool value, size_t count)
481 {
482 auto iter = sparrow::next(adaptor_cbegin(), static_cast<difference_type>(idx));
483 auto res = m_adaptor.insert(iter, count, value);
484 update_data_view();
485 return sparrow::next(value_begin(), std::distance(adaptor_begin(), res));
486 }
487
488 // Template parameter InputIt must be an value_iterator type that iterates over elements of type T
489 template <mpl::iterator_of_type<bool> InputIt>
490 constexpr auto
493 {
494 auto ins_iter = sparrow::next(adaptor_cbegin(), std::distance(value_cbegin(), pos));
495 auto res = m_adaptor.insert(ins_iter, first, last);
496 update_data_view();
497 return sparrow::next(value_begin(), std::distance(adaptor_begin(), res));
498 }
499
500 template <mpl::iterator_of_type<bool> InputIt>
501 constexpr auto primitive_data_access<bool>::insert_values(size_t idx, InputIt first, InputIt last)
503 {
504 auto iter = sparrow::next(adaptor_cbegin(), static_cast<difference_type>(idx));
505 auto res = m_adaptor.insert(iter, first, last);
506 update_data_view();
507 return sparrow::next(value_begin(), std::distance(adaptor_begin(), res));
508 }
509
512 {
513 auto iter = sparrow::next(adaptor_cbegin(), std::distance(value_cbegin(), pos));
514 auto iter_end = sparrow::next(iter, count);
515 auto res = m_adaptor.erase(iter, iter_end);
516 update_data_view();
517 return sparrow::next(value_begin(), std::distance(adaptor_begin(), res));
518 }
519
520 inline auto primitive_data_access<bool>::erase_values(size_t idx, size_t count) -> value_iterator
521 {
522 auto iter = sparrow::next(adaptor_cbegin(), idx);
523 auto iter_end = sparrow::next(iter, count);
524 auto res = m_adaptor.erase(iter, iter_end);
525 update_data_view();
526 return sparrow::next(value_begin(), std::distance(adaptor_begin(), res));
527 }
528
530 {
531 p_proxy = &proxy;
532 m_view = get_data_view();
533 m_adaptor = get_data_adaptor();
534 }
535
536 template <std::ranges::input_range RANGE>
538 {
539 auto size = static_cast<size_t>(std::ranges::distance(r));
540 auto init_func = [&r](bitset_view& v)
541 {
542 std::copy(r.begin(), r.end(), v.begin());
543 };
544 return make_data_buffer(size, init_func);
545 }
546
547 [[nodiscard]] inline u8_buffer<bool>
549 {
550 auto init_func = [&value](bitset_view& v)
551 {
552 std::fill(v.begin(), v.end(), value);
553 };
554 return make_data_buffer(size, init_func);
555 }
556
557 template <class F>
558 [[nodiscard]] inline u8_buffer<bool>
559 primitive_data_access<bool>::make_data_buffer(size_t size, F init_func)
560 {
561 std::size_t block_nb = size / 8;
562 if (block_nb * 8 < size)
563 {
564 ++block_nb;
565 }
566 u8_buffer<bool> res(block_nb);
567 std::uint8_t* buffer = reinterpret_cast<std::uint8_t*>(res.data());
568 bitset_view v(buffer, size);
569 init_func(v);
570 return res;
571 }
572
573 [[nodiscard]] inline size_t primitive_data_access<bool>::get_offset(size_t i) const
574 {
575 return i + get_proxy().offset();
576 }
577
578 [[nodiscard]] inline auto primitive_data_access<bool>::adaptor_begin() -> adaptor_iterator
579 {
580 return sparrow::next(m_adaptor.begin(), get_offset(0u));
581 }
582
583 [[nodiscard]] inline auto primitive_data_access<bool>::adaptor_end() -> adaptor_iterator
584 {
585 return m_adaptor.end();
586 }
587
588 [[nodiscard]] inline auto primitive_data_access<bool>::adaptor_cbegin() const -> const_adaptor_iterator
589 {
590 return sparrow::next(m_adaptor.cbegin(), get_offset(0u));
591 }
592
593 [[nodiscard]] inline auto primitive_data_access<bool>::adaptor_cend() const -> const_adaptor_iterator
594 {
595 return m_adaptor.cend();
596 }
597
598 [[nodiscard]] inline arrow_proxy& primitive_data_access<bool>::get_proxy()
599 {
600 return *p_proxy;
601 }
602
603 [[nodiscard]] inline const arrow_proxy& primitive_data_access<bool>::get_proxy() const
604 {
605 return *p_proxy;
606 }
607
608 [[nodiscard]] inline auto primitive_data_access<bool>::get_data_view() -> bitset_view
609 {
610 auto& proxy = get_proxy();
611 const size_t size = proxy.length() + proxy.offset();
612 return {proxy.buffers()[m_data_buffer_index].data(), size};
613 }
614
615 [[nodiscard]] inline auto primitive_data_access<bool>::get_data_adaptor() -> bitset_adaptor
616 {
617 auto& proxy = get_proxy();
618 if (proxy.is_created_with_sparrow())
619 {
620 size_t size = proxy.length() + proxy.offset();
621 return bitset_adaptor(&(proxy.get_array_private_data()->buffers()[m_data_buffer_index]), size);
622 }
623 else
624 {
625 return bitset_adaptor(&m_dummy_buffer, 0u);
626 }
627 }
628
630 {
631 m_view = bitset_view(get_proxy().buffers()[m_data_buffer_index].data(), m_adaptor.size());
632 }
633 }
634}
Class which has internally a reference to a contiguous container of a certain type and provides an AP...
Object that owns a piece of contiguous memory.
Definition buffer.hpp:114
static u8_buffer< bool > make_data_buffer(RANGE &&r)
primitive_data_access(arrow_proxy &proxy, size_t data_buffer_index)
constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last)
primitive_data_access & operator=(primitive_data_access &&)=delete
typename bitset_view::const_reference inner_const_reference
dynamic_bitset_view< std::uint8_t, non_tracking_null_count<> > bitset_view
typename bitset_view::const_iterator const_value_iterator
primitive_data_access(const primitive_data_access &)=delete
primitive_data_access(primitive_data_access &&)=delete
primitive_data_access & operator=(const primitive_data_access &)=delete
constexpr value_iterator insert_values(size_t idx, InputIt first, InputIt last)
Provides access to primitive data stored in Arrow format buffers.
primitive_data_access(arrow_proxy &proxy, size_t data_buffer_index)
Constructor for primitive_data_access.
constexpr inner_reference value(size_t i)
pointer_iterator< inner_const_pointer > const_value_iterator
std::conditional_t< std::is_same_v< T2, bool >, T2, const T2 & > inner_const_reference
primitive_data_access(primitive_data_access &&)=delete
constexpr const_value_iterator value_cbegin() const
constexpr value_iterator insert_value(const_value_iterator pos, T2 value, size_t count)
static constexpr u8_buffer< T2 > make_data_buffer(RANGE &&r)
constexpr inner_const_pointer data() const
constexpr const_value_iterator value_cend() const
constexpr value_iterator insert_value(size_t idx, T2 value, size_t count)
constexpr value_iterator insert_values(size_t idx, InputIt first, InputIt last)
primitive_data_access & operator=(const primitive_data_access &)=delete
constexpr void reset_proxy(arrow_proxy &proxy)
primitive_data_access(const primitive_data_access &)=delete
constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last)
constexpr void resize_values(size_t new_length, const T2 &value)
static constexpr u8_buffer< T2 > make_data_buffer(size_t n, const T2 &value)
constexpr inner_const_reference value(size_t i) const
constexpr value_iterator erase_values(size_t idx, size_t count)
primitive_data_access & operator=(primitive_data_access &&)=delete
constexpr value_iterator erase_values(const_value_iterator pos, size_t count)
typename storage_type_without_cvrefpointer::difference_type difference_type
A non-owning view to a dynamic size sequence of bits stored in external memory.
This buffer class is used as storage buffer for all sparrow arrays.
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
Definition mp_utils.hpp:216
constexpr InputIt next(InputIt it, Distance n)
Definition iterator.hpp:503
auto make_buffer_adaptor(FromBufferRef &buf)