sparrow 1.4.0
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
primitive_data_access.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <type_traits>
18
22#include "sparrow/u8_buffer.hpp"
23
24namespace sparrow
25{
26 template <typename T>
27 concept trivial_copyable_type = std::is_trivially_copyable_v<T> && std::is_standard_layout_v<T>;
28
29 namespace details
30 {
62 template <trivial_copyable_type T, trivial_copyable_type T2 = T>
64 {
65 public:
66
67 using inner_value_type = T2;
68 using inner_reference = T2&;
69 using inner_const_reference = std::conditional_t<std::is_same_v<T2, bool>, T2, const T2&>;
72
75
81 primitive_data_access(arrow_proxy& proxy, size_t data_buffer_index);
82
83 // This class is meant to be use as a private member of array classes,
84 // and holds a inner_pointer to the arrow_proxy of the array. Therefore we
85 // forbid the copy and the move semantics to:
86 // - force the array constructors to call the primitive_data_access
87 // constructor taking an arrow_proxy
88 // - force the arra assignment operators to call the reset_proxy
89 // method.
94
95 [[nodiscard]] constexpr inner_pointer data();
96 [[nodiscard]] constexpr inner_const_pointer data() const;
97
98 [[nodiscard]] constexpr inner_reference value(size_t i);
99 [[nodiscard]] constexpr inner_const_reference value(size_t i) const;
100
101 [[nodiscard]] constexpr value_iterator value_begin();
102 [[nodiscard]] constexpr value_iterator value_end();
103
104 [[nodiscard]] constexpr const_value_iterator value_cbegin() const;
105 [[nodiscard]] constexpr const_value_iterator value_cend() const;
106
107 constexpr void resize_values(size_t new_length, const T2& value);
108
109 constexpr value_iterator insert_value(const_value_iterator pos, T2 value, size_t count);
110 constexpr value_iterator insert_value(size_t idx, T2 value, size_t count);
111
112 // Template parameter InputIt must be an value_iterator type that iterates over elements of type T
113 template <mpl::iterator_of_type<T2> InputIt>
114 constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last);
115
116 template <mpl::iterator_of_type<T2> InputIt>
117 constexpr value_iterator insert_values(size_t idx, InputIt first, InputIt last);
118
120 constexpr value_iterator erase_values(size_t idx, size_t count);
121
122 constexpr void reset_proxy(arrow_proxy& proxy);
123
124 template <std::ranges::input_range RANGE>
125 [[nodiscard]] static constexpr u8_buffer<T2> make_data_buffer(RANGE&& r);
126
127 [[nodiscard]] static constexpr u8_buffer<T2> make_data_buffer(size_t n, const T2& value);
128
129 private:
130
131 [[nodiscard]] constexpr buffer_adaptor<T2, buffer<uint8_t>&> get_data_buffer();
132
133 [[nodiscard]] arrow_proxy& get_proxy();
134 [[nodiscard]] const arrow_proxy& get_proxy() const;
135
136 arrow_proxy* p_proxy;
137 size_t m_data_buffer_index;
138 };
139
140 template <>
142 {
143 public:
144
151
154
155 primitive_data_access(arrow_proxy& proxy, size_t data_buffer_index);
156
161
162 [[nodiscard]] inner_reference value(size_t i);
163 [[nodiscard]] inner_const_reference value(size_t i) const;
164
165 [[nodiscard]] value_iterator value_begin();
166 [[nodiscard]] value_iterator value_end();
167
168 [[nodiscard]] const_value_iterator value_cbegin() const;
169 [[nodiscard]] const_value_iterator value_cend() const;
170
171 void resize_values(size_t new_length, bool value);
172
173 value_iterator insert_value(const_value_iterator pos, bool value, size_t count);
174 value_iterator insert_value(size_t idx, bool value, size_t count);
175
176 // Template parameter InputIt must be an value_iterator type that iterates over elements of type T
177 template <mpl::iterator_of_type<bool> InputIt>
178 constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last);
179
180 template <mpl::iterator_of_type<bool> InputIt>
181 constexpr value_iterator insert_values(size_t idx, InputIt first, InputIt last);
182
184 value_iterator erase_values(size_t idx, size_t count);
185
186 void reset_proxy(arrow_proxy& proxy);
187
188 template <std::ranges::input_range RANGE>
189 [[nodiscard]] static u8_buffer<bool> make_data_buffer(RANGE&& r);
190
191 [[nodiscard]] static u8_buffer<bool> make_data_buffer(size_t size, bool value);
192
193 private:
194
195 using bitset_adaptor = non_owning_dynamic_bitset<std::uint8_t>;
196 using difference_type = typename bitset_adaptor::difference_type;
197 using adaptor_iterator = typename bitset_adaptor::iterator;
198 using const_adaptor_iterator = typename bitset_adaptor::const_iterator;
199
200 template <class F>
201 [[nodiscard]] static u8_buffer<bool> make_data_buffer(size_t size, F init_func);
202
203 [[nodiscard]] size_t get_offset(size_t i) const;
204
205 [[nodiscard]] adaptor_iterator adaptor_begin();
206 [[nodiscard]] adaptor_iterator adaptor_end();
207
208 [[nodiscard]] const_adaptor_iterator adaptor_cbegin() const;
209 [[nodiscard]] const_adaptor_iterator adaptor_cend() const;
210
211 [[nodiscard]] arrow_proxy& get_proxy();
212 [[nodiscard]] const arrow_proxy& get_proxy() const;
213
214 [[nodiscard]] bitset_view get_data_view();
215 [[nodiscard]] bitset_adaptor get_data_adaptor();
216
217 void update_data_view();
218
219 arrow_proxy* p_proxy;
220 size_t m_data_buffer_index;
221 bitset_view m_view;
222 buffer<std::uint8_t> m_dummy_buffer;
223 bitset_adaptor m_adaptor;
224 };
225
226 /****************************************
227 * primitive_data_access implementation *
228 ****************************************/
229
230 template <trivial_copyable_type T, trivial_copyable_type T2>
232 : p_proxy(&proxy)
233 , m_data_buffer_index(data_buffer_index)
234 {
235 }
236
237 template <trivial_copyable_type T, trivial_copyable_type T2>
238 [[nodiscard]] constexpr auto primitive_data_access<T, T2>::data() -> inner_pointer
239 {
240 return get_proxy().buffers()[m_data_buffer_index].template data<T2>()
241 + static_cast<size_t>(get_proxy().offset());
242 }
243
244 template <trivial_copyable_type T, trivial_copyable_type T2>
245 [[nodiscard]] constexpr auto primitive_data_access<T, T2>::data() const -> inner_const_pointer
246 {
247 return get_proxy().buffers()[m_data_buffer_index].template data<T2>()
248 + static_cast<size_t>(get_proxy().offset());
249 }
250
251 template <trivial_copyable_type T, trivial_copyable_type T2>
252 [[nodiscard]] constexpr auto primitive_data_access<T, T2>::value(size_t i) -> inner_reference
253 {
254 SPARROW_ASSERT_TRUE(i < get_proxy().length());
255 return data()[i];
256 }
257
258 template <trivial_copyable_type T, trivial_copyable_type T2>
259 [[nodiscard]] constexpr auto primitive_data_access<T, T2>::value(size_t i) const
261 {
262 SPARROW_ASSERT_TRUE(i < get_proxy().length());
263 return data()[i];
264 }
265
266 template <trivial_copyable_type T, trivial_copyable_type T2>
268 {
269 return value_iterator{data()};
270 }
271
272 template <trivial_copyable_type T, trivial_copyable_type T2>
274 {
275 return sparrow::next(value_begin(), get_proxy().length());
276 }
277
278 template <trivial_copyable_type T, trivial_copyable_type T2>
280 {
281 return const_value_iterator{data()};
282 }
283
284 template <trivial_copyable_type T, trivial_copyable_type T2>
286 {
287 return sparrow::next(value_cbegin(), get_proxy().length());
288 }
289
290 template <trivial_copyable_type T, trivial_copyable_type T2>
291 constexpr void primitive_data_access<T, T2>::resize_values(size_t new_length, const T2& value)
292 {
293 const size_t new_size = new_length + static_cast<size_t>(get_proxy().offset());
294 get_data_buffer().resize(new_size, value);
295 }
296
297 template <trivial_copyable_type T, trivial_copyable_type T2>
298 constexpr auto
301 {
303 const const_value_iterator value_cend{sparrow::next(value_cbegin, get_proxy().length())};
306 const auto distance = std::distance(value_cbegin, sparrow::next(pos, get_proxy().offset()));
307 get_data_buffer().insert(pos, count, value);
309 return sparrow::next(value_begin, distance);
310 }
311
312 template <trivial_copyable_type T, trivial_copyable_type T2>
313 constexpr auto primitive_data_access<T, T2>::insert_value(size_t idx, T2 value, size_t count)
315 {
316 SPARROW_ASSERT_TRUE(idx <= get_proxy().length());
317 const const_value_iterator begin{data()};
318 const const_value_iterator it = sparrow::next(begin, idx);
319 return insert_value(it, value, count);
320 }
321
322 // Template parameter InputIt must be an value_iterator type that iterates over elements of type T
323 template <trivial_copyable_type T, trivial_copyable_type T2>
324 template <mpl::iterator_of_type<T2> InputIt>
325 constexpr auto
328 {
330 const const_value_iterator value_cend{sparrow::next(value_cbegin, get_proxy().length())};
333 const auto distance = std::distance(value_cbegin, sparrow::next(pos, get_proxy().offset()));
334 get_data_buffer().insert(pos, first, last);
336 return sparrow::next(value_begin, distance);
337 }
338
339 template <trivial_copyable_type T, trivial_copyable_type T2>
340 template <mpl::iterator_of_type<T2> InputIt>
341 constexpr auto primitive_data_access<T, T2>::insert_values(size_t idx, InputIt first, InputIt last)
343 {
344 SPARROW_ASSERT_TRUE(idx <= get_proxy().length());
345 const const_value_iterator begin{data()};
346 const const_value_iterator it = sparrow::next(begin, idx);
347 return insert_values(it, first, last);
348 }
349
350 template <trivial_copyable_type T, trivial_copyable_type T2>
353 {
355 const const_value_iterator value_cend{sparrow::next(value_cbegin, get_proxy().length())};
358 const auto distance = static_cast<size_t>(
359 std::distance(value_cbegin, sparrow::next(pos, get_proxy().offset()))
360 );
361 auto data_buffer = get_data_buffer();
362 const auto first = sparrow::next(data_buffer.cbegin(), distance);
363 const auto last = sparrow::next(first, count);
364 data_buffer.erase(first, last);
366 return sparrow::next(value_begin, distance);
367 }
368
369 template <trivial_copyable_type T, trivial_copyable_type T2>
370 constexpr auto primitive_data_access<T, T2>::erase_values(size_t idx, size_t count) -> value_iterator
371 {
372 SPARROW_ASSERT_TRUE(idx <= get_proxy().length());
373 const const_value_iterator cbegin{data()};
374 const const_value_iterator it = sparrow::next(cbegin, idx);
375 erase_values(it, count);
376 return sparrow::next(value_iterator{data()}, idx);
377 }
378
379 template <trivial_copyable_type T, trivial_copyable_type T2>
381 {
382 p_proxy = &proxy;
383 }
384
385 template <trivial_copyable_type T, trivial_copyable_type T2>
386 template <std::ranges::input_range RANGE>
388 {
389 return u8_buffer<T2>(std::forward<RANGE>(r));
390 }
391
392 template <trivial_copyable_type T, trivial_copyable_type T2>
393 [[nodiscard]] constexpr u8_buffer<T2>
395 {
396 return u8_buffer<T2>(size, value);
397 }
398
399 template <trivial_copyable_type T, trivial_copyable_type T2>
400 [[nodiscard]] constexpr buffer_adaptor<T2, buffer<uint8_t>&>
401 primitive_data_access<T, T2>::get_data_buffer()
402 {
403 auto& buffers = get_proxy().get_array_private_data()->buffers();
404 return make_buffer_adaptor<T2>(buffers[m_data_buffer_index]);
405 }
406
407 template <trivial_copyable_type T, trivial_copyable_type T2>
408 [[nodiscard]] arrow_proxy& primitive_data_access<T, T2>::get_proxy()
409 {
410 return *p_proxy;
411 }
412
413 template <trivial_copyable_type T, trivial_copyable_type T2>
414 [[nodiscard]] const arrow_proxy& primitive_data_access<T, T2>::get_proxy() const
415 {
416 return *p_proxy;
417 }
418
419 /**********************************************
420 * primitive_data_access<bool> implementation *
421 **********************************************/
422
424 : p_proxy(&proxy)
425 , m_data_buffer_index(data_buffer_index)
426 , m_view(get_data_view())
427 , m_dummy_buffer()
428 , m_adaptor(get_data_adaptor())
429 {
430 }
431
432 [[nodiscard]] inline auto primitive_data_access<bool>::value(size_t i) -> inner_reference
433 {
434 return m_view[get_offset(i)];
435 }
436
437 [[nodiscard]] inline auto primitive_data_access<bool>::value(size_t i) const -> inner_const_reference
438 {
439 return m_view[get_offset(i)];
440 }
441
443 {
444 return sparrow::next(m_view.begin(), get_offset(0u));
445 }
446
448 {
449 return m_view.end();
450 }
451
453 {
454 return sparrow::next(m_view.cbegin(), get_offset(0u));
455 }
456
458 {
459 return m_view.cend();
460 }
461
462 inline void primitive_data_access<bool>::resize_values(size_t new_length, bool value)
463 {
464 m_adaptor.resize(get_offset(new_length), value);
465 update_data_view();
466 }
467
468 inline auto
471 {
472 auto ins_iter = sparrow::next(adaptor_cbegin(), std::distance(value_cbegin(), pos));
473 auto res = m_adaptor.insert(ins_iter, count, value);
474 update_data_view();
475 return sparrow::next(value_begin(), std::distance(adaptor_begin(), res));
476 }
477
478 inline auto primitive_data_access<bool>::insert_value(size_t idx, bool value, size_t count)
480 {
481 auto iter = sparrow::next(adaptor_cbegin(), static_cast<difference_type>(idx));
482 auto res = m_adaptor.insert(iter, count, value);
483 update_data_view();
484 return sparrow::next(value_begin(), std::distance(adaptor_begin(), res));
485 }
486
487 // Template parameter InputIt must be an value_iterator type that iterates over elements of type T
488 template <mpl::iterator_of_type<bool> InputIt>
489 constexpr auto
492 {
493 auto ins_iter = sparrow::next(adaptor_cbegin(), std::distance(value_cbegin(), pos));
494 auto res = m_adaptor.insert(ins_iter, first, last);
495 update_data_view();
496 return sparrow::next(value_begin(), std::distance(adaptor_begin(), res));
497 }
498
499 template <mpl::iterator_of_type<bool> InputIt>
500 constexpr auto primitive_data_access<bool>::insert_values(size_t idx, InputIt first, InputIt last)
502 {
503 auto iter = sparrow::next(adaptor_cbegin(), static_cast<difference_type>(idx));
504 auto res = m_adaptor.insert(iter, first, last);
505 update_data_view();
506 return sparrow::next(value_begin(), std::distance(adaptor_begin(), res));
507 }
508
511 {
512 auto iter = sparrow::next(adaptor_cbegin(), std::distance(value_cbegin(), pos));
513 auto iter_end = sparrow::next(iter, count);
514 auto res = m_adaptor.erase(iter, iter_end);
515 update_data_view();
516 return sparrow::next(value_begin(), std::distance(adaptor_begin(), res));
517 }
518
519 inline auto primitive_data_access<bool>::erase_values(size_t idx, size_t count) -> value_iterator
520 {
521 auto iter = sparrow::next(adaptor_cbegin(), idx);
522 auto iter_end = sparrow::next(iter, count);
523 auto res = m_adaptor.erase(iter, iter_end);
524 update_data_view();
525 return sparrow::next(value_begin(), std::distance(adaptor_begin(), res));
526 }
527
529 {
530 p_proxy = &proxy;
531 m_view = get_data_view();
532 m_adaptor = get_data_adaptor();
533 }
534
535 template <std::ranges::input_range RANGE>
537 {
538 auto size = static_cast<size_t>(std::ranges::distance(r));
539 auto init_func = [&r](bitset_view& v)
540 {
541 std::copy(r.begin(), r.end(), v.begin());
542 };
543 return make_data_buffer(size, init_func);
544 }
545
546 [[nodiscard]] inline u8_buffer<bool>
548 {
549 auto init_func = [&value](bitset_view& v)
550 {
551 std::fill(v.begin(), v.end(), value);
552 };
553 return make_data_buffer(size, init_func);
554 }
555
556 template <class F>
557 [[nodiscard]] inline u8_buffer<bool>
558 primitive_data_access<bool>::make_data_buffer(size_t size, F init_func)
559 {
560 std::size_t block_nb = size / 8;
561 if (block_nb * 8 < size)
562 {
563 ++block_nb;
564 }
565 u8_buffer<bool> res(block_nb);
566 std::uint8_t* buffer = reinterpret_cast<std::uint8_t*>(res.data());
567 bitset_view v(buffer, size);
568 init_func(v);
569 return res;
570 }
571
572 [[nodiscard]] inline size_t primitive_data_access<bool>::get_offset(size_t i) const
573 {
574 return i + get_proxy().offset();
575 }
576
577 [[nodiscard]] inline auto primitive_data_access<bool>::adaptor_begin() -> adaptor_iterator
578 {
579 return sparrow::next(m_adaptor.begin(), get_offset(0u));
580 }
581
582 [[nodiscard]] inline auto primitive_data_access<bool>::adaptor_end() -> adaptor_iterator
583 {
584 return m_adaptor.end();
585 }
586
587 [[nodiscard]] inline auto primitive_data_access<bool>::adaptor_cbegin() const -> const_adaptor_iterator
588 {
589 return sparrow::next(m_adaptor.cbegin(), get_offset(0u));
590 }
591
592 [[nodiscard]] inline auto primitive_data_access<bool>::adaptor_cend() const -> const_adaptor_iterator
593 {
594 return m_adaptor.cend();
595 }
596
597 [[nodiscard]] inline arrow_proxy& primitive_data_access<bool>::get_proxy()
598 {
599 return *p_proxy;
600 }
601
602 [[nodiscard]] inline const arrow_proxy& primitive_data_access<bool>::get_proxy() const
603 {
604 return *p_proxy;
605 }
606
607 [[nodiscard]] inline auto primitive_data_access<bool>::get_data_view() -> bitset_view
608 {
609 auto& proxy = get_proxy();
610 size_t size = proxy.length() + proxy.offset();
611 return bitset_view(proxy.buffers()[m_data_buffer_index].data(), size);
612 }
613
614 [[nodiscard]] inline auto primitive_data_access<bool>::get_data_adaptor() -> bitset_adaptor
615 {
616 auto& proxy = get_proxy();
617 if (proxy.is_created_with_sparrow())
618 {
619 size_t size = proxy.length() + proxy.offset();
620 return bitset_adaptor(&(proxy.get_array_private_data()->buffers()[m_data_buffer_index]), size);
621 }
622 else
623 {
624 return bitset_adaptor(&m_dummy_buffer, 0u);
625 }
626 }
627
629 {
630 m_view = bitset_view(get_proxy().buffers()[m_data_buffer_index].data(), m_adaptor.size());
631 }
632 }
633}
Class which has internally a reference to a contiguous container of a certain type and provides an AP...
Object that owns a piece of contiguous memory.
Definition buffer.hpp:113
static u8_buffer< bool > make_data_buffer(RANGE &&r)
primitive_data_access(arrow_proxy &proxy, size_t data_buffer_index)
constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last)
primitive_data_access & operator=(primitive_data_access &&)=delete
typename bitset_view::const_reference inner_const_reference
typename bitset_view::const_iterator const_value_iterator
primitive_data_access(const primitive_data_access &)=delete
primitive_data_access(primitive_data_access &&)=delete
primitive_data_access & operator=(const primitive_data_access &)=delete
constexpr value_iterator insert_values(size_t idx, InputIt first, InputIt last)
Provides access to primitive data stored in Arrow format buffers.
primitive_data_access(arrow_proxy &proxy, size_t data_buffer_index)
Constructor for primitive_data_access.
constexpr inner_reference value(size_t i)
pointer_iterator< inner_const_pointer > const_value_iterator
std::conditional_t< std::is_same_v< T2, bool >, T2, const T2 & > inner_const_reference
primitive_data_access(primitive_data_access &&)=delete
constexpr const_value_iterator value_cbegin() const
constexpr value_iterator insert_value(const_value_iterator pos, T2 value, size_t count)
static constexpr u8_buffer< T2 > make_data_buffer(RANGE &&r)
constexpr inner_const_pointer data() const
constexpr const_value_iterator value_cend() const
constexpr value_iterator insert_value(size_t idx, T2 value, size_t count)
constexpr value_iterator insert_values(size_t idx, InputIt first, InputIt last)
primitive_data_access & operator=(const primitive_data_access &)=delete
constexpr void reset_proxy(arrow_proxy &proxy)
primitive_data_access(const primitive_data_access &)=delete
constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last)
constexpr void resize_values(size_t new_length, const T2 &value)
static constexpr u8_buffer< T2 > make_data_buffer(size_t n, const T2 &value)
constexpr inner_const_reference value(size_t i) const
constexpr value_iterator erase_values(size_t idx, size_t count)
primitive_data_access & operator=(primitive_data_access &&)=delete
constexpr value_iterator erase_values(const_value_iterator pos, size_t count)
bitset_iterator< self_type, true > const_iterator
typename storage_type_without_cvrefpointer::difference_type difference_type
A non-owning view to a dynamic size sequence of bits stored in external memory.
This buffer class is used as storage buffer for all sparrow arrays.
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
Definition mp_utils.hpp:216
constexpr InputIt next(InputIt it, Distance n)
Definition iterator.hpp:503
auto make_buffer_adaptor(FromBufferRef &buf)