sparrow 0.6.0
Loading...
Searching...
No Matches
primitive_data_access.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
20
21namespace sparrow
22{
23 template <typename T>
24 concept trivial_copyable_type = std::is_trivially_copyable_v<T> && std::is_standard_layout_v<T>;
25
26 namespace details
27 {
33 template <trivial_copyable_type T>
35 {
36 public:
37
39 using inner_reference = T&;
40 using inner_const_reference = const T&;
43
46
52 primitive_data_access(arrow_proxy& proxy, size_t data_buffer_index);
53
54 // This class is meant to be use as a private member of array classes,
55 // and holds a inner_pointer to the arrow_proxy of the array. Therefore we
56 // forbid the copy and the move semantics to:
57 // - force the array constructors to call the primitive_data_access
58 // constructor taking an arrow_proxy
59 // - force the arra assignment operators to call the reset_proxy
60 // method.
65
66 [[nodiscard]] constexpr inner_pointer data();
67 [[nodiscard]] constexpr inner_const_pointer data() const;
68
69 [[nodiscard]] constexpr inner_reference value(size_t i);
70 [[nodiscard]] constexpr inner_const_reference value(size_t i) const;
71
72 [[nodiscard]] constexpr value_iterator value_begin();
73 [[nodiscard]] constexpr value_iterator value_end();
74
75 [[nodiscard]] constexpr const_value_iterator value_cbegin() const;
76 [[nodiscard]] constexpr const_value_iterator value_cend() const;
77
78 constexpr void resize_values(size_t new_length, const T& value);
79
80 constexpr value_iterator insert_value(const_value_iterator pos, T value, size_t count);
81 constexpr value_iterator insert_value(size_t idx, T value, size_t count);
82
83 // Template parameter InputIt must be an value_iterator type that iterates over elements of type T
84 template <mpl::iterator_of_type<T> InputIt>
85 constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last);
86
87 template <mpl::iterator_of_type<T> InputIt>
88 constexpr value_iterator insert_values(size_t idx, InputIt first, InputIt last);
89
90 constexpr value_iterator erase_values(const_value_iterator pos, size_t count);
91 constexpr value_iterator erase_values(size_t idx, size_t count);
92
94
95 template <std::ranges::input_range RANGE>
96 [[nodiscard]] static u8_buffer<T> make_data_buffer(RANGE&& r);
97
98 [[nodiscard]] static u8_buffer<T> make_data_buffer(size_t n, const T& value);
99
100 private:
101
102 [[nodiscard]] constexpr buffer_adaptor<T, buffer<uint8_t>&> get_data_buffer();
103
104 [[nodiscard]] arrow_proxy& get_proxy();
105 [[nodiscard]] const arrow_proxy& get_proxy() const;
106
107 arrow_proxy* p_proxy;
108 size_t m_data_buffer_index;
109 };
110
111 template <>
113 {
114 public:
115
122
125
126 primitive_data_access(arrow_proxy& proxy, size_t data_buffer_index);
127
132
133 [[nodiscard]] inner_reference value(size_t i);
134 [[nodiscard]] inner_const_reference value(size_t i) const;
135
136 [[nodiscard]] value_iterator value_begin();
137 [[nodiscard]] value_iterator value_end();
138
139 [[nodiscard]] const_value_iterator value_cbegin() const;
140 [[nodiscard]] const_value_iterator value_cend() const;
141
142 void resize_values(size_t new_length, bool value);
143
144 value_iterator insert_value(const_value_iterator pos, bool value, size_t count);
145 value_iterator insert_value(size_t idx, bool value, size_t count);
146
147 // Template parameter InputIt must be an value_iterator type that iterates over elements of type T
148 template <mpl::iterator_of_type<bool> InputIt>
149 constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last);
150
151 template <mpl::iterator_of_type<bool> InputIt>
152 constexpr value_iterator insert_values(size_t idx, InputIt first, InputIt last);
153
155 value_iterator erase_values(size_t idx, size_t count);
156
157 void reset_proxy(arrow_proxy& proxy);
158
159 template <std::ranges::input_range RANGE>
160 [[nodiscard]] static u8_buffer<bool> make_data_buffer(RANGE&& r);
161
162 [[nodiscard]] static u8_buffer<bool> make_data_buffer(size_t size, bool value);
163
164 private:
165
166 using bitset_adaptor = non_owning_dynamic_bitset<std::uint8_t>;
167 using difference_type = typename bitset_adaptor::difference_type;
168 using adaptor_iterator = typename bitset_adaptor::iterator;
169 using const_adaptor_iterator = typename bitset_adaptor::const_iterator;
170
171 template <class F>
172 [[nodiscard]] static u8_buffer<bool> make_data_buffer(size_t size, F init_func);
173
174 [[nodiscard]] size_t get_offset(size_t i) const;
175
176 [[nodiscard]] adaptor_iterator adaptor_begin();
177 [[nodiscard]] adaptor_iterator adaptor_end();
178
179 [[nodiscard]] const_adaptor_iterator adaptor_cbegin() const;
180 [[nodiscard]] const_adaptor_iterator adaptor_cend() const;
181
182 [[nodiscard]] arrow_proxy& get_proxy();
183 [[nodiscard]] const arrow_proxy& get_proxy() const;
184
185 [[nodiscard]] bitset_view get_data_view();
186 [[nodiscard]] bitset_adaptor get_data_adaptor();
187
188 void update_data_view();
189
190 arrow_proxy* p_proxy;
191 size_t m_data_buffer_index;
192 bitset_view m_view;
193 buffer<std::uint8_t> m_dummy_buffer;
194 bitset_adaptor m_adaptor;
195 };
196
197 /****************************************
198 * primitiva_data_access implementation *
199 ****************************************/
200
201 template <trivial_copyable_type T>
203 : p_proxy(&proxy)
204 , m_data_buffer_index(data_buffer_index)
205 {
206 }
207
208 template <trivial_copyable_type T>
209 [[nodiscard]] constexpr auto primitive_data_access<T>::data() -> inner_pointer
210 {
211 return get_proxy().buffers()[m_data_buffer_index].template data<T>()
212 + static_cast<size_t>(get_proxy().offset());
213 }
214
215 template <trivial_copyable_type T>
216 [[nodiscard]] constexpr auto primitive_data_access<T>::data() const -> inner_const_pointer
217 {
218 return get_proxy().buffers()[m_data_buffer_index].template data<T>()
219 + static_cast<size_t>(get_proxy().offset());
220 }
221
222 template <trivial_copyable_type T>
223 [[nodiscard]] constexpr auto primitive_data_access<T>::value(size_t i) -> inner_reference
224 {
225 SPARROW_ASSERT_TRUE(i < get_proxy().length());
226 return data()[i];
227 }
228
229 template <trivial_copyable_type T>
230 [[nodiscard]] constexpr auto primitive_data_access<T>::value(size_t i) const -> inner_const_reference
231 {
232 SPARROW_ASSERT_TRUE(i < get_proxy().length());
233 return data()[i];
234 }
235
236 template <trivial_copyable_type T>
238 {
239 return value_iterator{data()};
240 }
241
242 template <trivial_copyable_type T>
243 [[nodiscard]] constexpr auto primitive_data_access<T>::value_end() -> value_iterator
244 {
245 return sparrow::next(value_begin(), get_proxy().length());
246 }
247
248 template <trivial_copyable_type T>
249 [[nodiscard]] constexpr auto primitive_data_access<T>::value_cbegin() const -> const_value_iterator
250 {
251 return const_value_iterator{data()};
252 }
253
254 template <trivial_copyable_type T>
255 [[nodiscard]] constexpr auto primitive_data_access<T>::value_cend() const -> const_value_iterator
256 {
257 return sparrow::next(value_cbegin(), get_proxy().length());
258 }
259
260 template <trivial_copyable_type T>
261 constexpr void primitive_data_access<T>::resize_values(size_t new_length, const T& value)
262 {
263 const size_t new_size = new_length + static_cast<size_t>(get_proxy().offset());
264 get_data_buffer().resize(new_size, value);
265 }
266
267 template <trivial_copyable_type T>
270 {
272 const const_value_iterator value_cend{sparrow::next(value_cbegin, get_proxy().length())};
275 const auto distance = std::distance(value_cbegin, sparrow::next(pos, get_proxy().offset()));
276 get_data_buffer().insert(pos, count, value);
278 return sparrow::next(value_begin, distance);
279 }
280
281 template <trivial_copyable_type T>
282 constexpr auto primitive_data_access<T>::insert_value(size_t idx, T value, size_t count)
284 {
285 SPARROW_ASSERT_TRUE(idx <= get_proxy().length());
286 const const_value_iterator begin{data()};
287 const const_value_iterator it = sparrow::next(begin, idx);
288 return insert_value(it, value, count);
289 }
290
291 // Template parameter InputIt must be an value_iterator type that iterates over elements of type T
292 template <trivial_copyable_type T>
293 template <mpl::iterator_of_type<T> InputIt>
294 constexpr auto
297 {
299 const const_value_iterator value_cend{sparrow::next(value_cbegin, get_proxy().length())};
302 const auto distance = std::distance(value_cbegin, sparrow::next(pos, get_proxy().offset()));
303 get_data_buffer().insert(pos, first, last);
305 return sparrow::next(value_begin, distance);
306 }
307
308 template <trivial_copyable_type T>
309 template <mpl::iterator_of_type<T> InputIt>
310 constexpr auto primitive_data_access<T>::insert_values(size_t idx, InputIt first, InputIt last)
312 {
313 SPARROW_ASSERT_TRUE(idx <= get_proxy().length());
314 const const_value_iterator begin{data()};
315 const const_value_iterator it = sparrow::next(begin, idx);
316 return insert_values(it, first, last);
317 }
318
319 template <trivial_copyable_type T>
322 {
324 const const_value_iterator value_cend{sparrow::next(value_cbegin, get_proxy().length())};
327 const auto distance = static_cast<size_t>(
328 std::distance(value_cbegin, sparrow::next(pos, get_proxy().offset()))
329 );
330 auto data_buffer = get_data_buffer();
331 const auto first = sparrow::next(data_buffer.cbegin(), distance);
332 const auto last = sparrow::next(first, count);
333 data_buffer.erase(first, last);
335 return sparrow::next(value_begin, distance);
336 }
337
338 template <trivial_copyable_type T>
339 constexpr auto primitive_data_access<T>::erase_values(size_t idx, size_t count) -> value_iterator
340 {
341 SPARROW_ASSERT_TRUE(idx <= get_proxy().length());
342 const const_value_iterator cbegin{data()};
343 const const_value_iterator it = sparrow::next(cbegin, idx);
344 erase_values(it, count);
345 return sparrow::next(value_iterator{data()}, idx);
346 }
347
348 template <trivial_copyable_type T>
350 {
351 p_proxy = &proxy;
352 }
353
354 template <trivial_copyable_type T>
355 template <std::ranges::input_range RANGE>
357 {
358 return u8_buffer<T>(std::forward<RANGE>(r));
359 }
360
361 template <trivial_copyable_type T>
363 {
364 return u8_buffer<T>(size, value);
365 }
366
367 template <trivial_copyable_type T>
368 [[nodiscard]] constexpr buffer_adaptor<T, buffer<uint8_t>&> primitive_data_access<T>::get_data_buffer()
369 {
370 auto& buffers = get_proxy().get_array_private_data()->buffers();
371 return make_buffer_adaptor<T>(buffers[m_data_buffer_index]);
372 }
373
374 template <trivial_copyable_type T>
375 [[nodiscard]] arrow_proxy& primitive_data_access<T>::get_proxy()
376 {
377 return *p_proxy;
378 }
379
380 template <trivial_copyable_type T>
381 [[nodiscard]] const arrow_proxy& primitive_data_access<T>::get_proxy() const
382 {
383 return *p_proxy;
384 }
385
386 /**********************************************
387 * primitive_data_access<bool> implementation *
388 **********************************************/
389
391 : p_proxy(&proxy)
392 , m_data_buffer_index(data_buffer_index)
393 , m_view(get_data_view())
394 , m_dummy_buffer()
395 , m_adaptor(get_data_adaptor())
396 {
397 }
398
399 [[nodiscard]] inline auto primitive_data_access<bool>::value(size_t i) -> inner_reference
400 {
401 return m_view[get_offset(i)];
402 }
403
404 [[nodiscard]] inline auto primitive_data_access<bool>::value(size_t i) const -> inner_const_reference
405 {
406 return m_view[get_offset(i)];
407 }
408
410 {
411 return sparrow::next(m_view.begin(), get_offset(0u));
412 }
413
415 {
416 return m_view.end();
417 }
418
420 {
421 return sparrow::next(m_view.cbegin(), get_offset(0u));
422 }
423
425 {
426 return m_view.cend();
427 }
428
429 inline void primitive_data_access<bool>::resize_values(size_t new_length, bool value)
430 {
431 m_adaptor.resize(get_offset(new_length), value);
432 update_data_view();
433 }
434
435 inline auto
438 {
439 auto ins_iter = sparrow::next(adaptor_cbegin(), std::distance(value_cbegin(), pos));
440 auto res = m_adaptor.insert(ins_iter, count, value);
441 update_data_view();
442 return sparrow::next(value_begin(), std::distance(adaptor_begin(), res));
443 }
444
445 inline auto primitive_data_access<bool>::insert_value(size_t idx, bool value, size_t count)
447 {
448 auto iter = sparrow::next(adaptor_cbegin(), static_cast<difference_type>(idx));
449 auto res = m_adaptor.insert(iter, count, value);
450 update_data_view();
451 return sparrow::next(value_begin(), std::distance(adaptor_begin(), res));
452 }
453
454 // Template parameter InputIt must be an value_iterator type that iterates over elements of type T
455 template <mpl::iterator_of_type<bool> InputIt>
456 constexpr auto
459 {
460 auto ins_iter = sparrow::next(adaptor_cbegin(), std::distance(value_cbegin(), pos));
461 auto res = m_adaptor.insert(ins_iter, first, last);
462 update_data_view();
463 return sparrow::next(value_begin(), std::distance(adaptor_begin(), res));
464 }
465
466 template <mpl::iterator_of_type<bool> InputIt>
467 constexpr auto primitive_data_access<bool>::insert_values(size_t idx, InputIt first, InputIt last)
469 {
470 auto iter = sparrow::next(adaptor_cbegin(), static_cast<difference_type>(idx));
471 auto res = m_adaptor.insert(iter, first, last);
472 update_data_view();
473 return sparrow::next(value_begin(), std::distance(adaptor_begin(), res));
474 }
475
478 {
479 auto iter = sparrow::next(adaptor_cbegin(), std::distance(value_cbegin(), pos));
480 auto iter_end = sparrow::next(iter, count);
481 auto res = m_adaptor.erase(iter, iter_end);
482 update_data_view();
483 return sparrow::next(value_begin(), std::distance(adaptor_begin(), res));
484 }
485
486 inline auto primitive_data_access<bool>::erase_values(size_t idx, size_t count) -> value_iterator
487 {
488 auto iter = sparrow::next(adaptor_cbegin(), idx);
489 auto iter_end = sparrow::next(iter, count);
490 auto res = m_adaptor.erase(iter, iter_end);
491 update_data_view();
492 return sparrow::next(value_begin(), std::distance(adaptor_begin(), res));
493 }
494
496 {
497 p_proxy = &proxy;
498 m_view = get_data_view();
499 m_adaptor = get_data_adaptor();
500 }
501
502 template <std::ranges::input_range RANGE>
504 {
505 auto size = static_cast<size_t>(std::ranges::distance(r));
506 auto init_func = [&r](bitset_view& v)
507 {
508 std::copy(r.begin(), r.end(), v.begin());
509 };
510 return make_data_buffer(size, init_func);
511 }
512
513 [[nodiscard]] inline u8_buffer<bool>
515 {
516 auto init_func = [&value](bitset_view& v)
517 {
518 std::fill(v.begin(), v.end(), value);
519 };
520 return make_data_buffer(size, init_func);
521 }
522
523 template <class F>
524 [[nodiscard]] inline u8_buffer<bool>
525 primitive_data_access<bool>::make_data_buffer(size_t size, F init_func)
526 {
527 std::size_t block_nb = size / 8;
528 if (block_nb * 8 < size)
529 {
530 ++block_nb;
531 }
532 u8_buffer<bool> res(block_nb);
533 std::uint8_t* buffer = reinterpret_cast<std::uint8_t*>(res.data());
534 bitset_view v(buffer, size);
535 init_func(v);
536 return res;
537 }
538
539 [[nodiscard]] inline size_t primitive_data_access<bool>::get_offset(size_t i) const
540 {
541 return i + get_proxy().offset();
542 }
543
544 [[nodiscard]] inline auto primitive_data_access<bool>::adaptor_begin() -> adaptor_iterator
545 {
546 return sparrow::next(m_adaptor.begin(), get_offset(0u));
547 }
548
549 [[nodiscard]] inline auto primitive_data_access<bool>::adaptor_end() -> adaptor_iterator
550 {
551 return m_adaptor.end();
552 }
553
554 [[nodiscard]] inline auto primitive_data_access<bool>::adaptor_cbegin() const -> const_adaptor_iterator
555 {
556 return sparrow::next(m_adaptor.cbegin(), get_offset(0u));
557 }
558
559 [[nodiscard]] inline auto primitive_data_access<bool>::adaptor_cend() const -> const_adaptor_iterator
560 {
561 return m_adaptor.cend();
562 }
563
564 [[nodiscard]] inline arrow_proxy& primitive_data_access<bool>::get_proxy()
565 {
566 return *p_proxy;
567 }
568
569 [[nodiscard]] inline const arrow_proxy& primitive_data_access<bool>::get_proxy() const
570 {
571 return *p_proxy;
572 }
573
574 [[nodiscard]] inline auto primitive_data_access<bool>::get_data_view() -> bitset_view
575 {
576 auto& proxy = get_proxy();
577 size_t size = proxy.length() + proxy.offset();
578 return bitset_view(proxy.buffers()[m_data_buffer_index].data(), size);
579 }
580
581 [[nodiscard]] inline auto primitive_data_access<bool>::get_data_adaptor() -> bitset_adaptor
582 {
583 auto& proxy = get_proxy();
584 if (proxy.is_created_with_sparrow())
585 {
586 size_t size = proxy.length() + proxy.offset();
587 return bitset_adaptor(&(proxy.get_array_private_data()->buffers()[m_data_buffer_index]), size);
588 }
589 else
590 {
591 return bitset_adaptor(&m_dummy_buffer, 0u);
592 }
593 }
594
596 {
597 m_view = bitset_view(get_proxy().buffers()[m_data_buffer_index].data(), m_adaptor.size());
598 }
599 }
600}
Proxy class over ArrowArray and ArrowSchema.
Class which have internally a reference to a contiguous container of a certain type and provides an A...
Object that owns a piece of contiguous memory.
Definition buffer.hpp:109
static u8_buffer< bool > make_data_buffer(RANGE &&r)
primitive_data_access(arrow_proxy &proxy, size_t data_buffer_index)
constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last)
primitive_data_access & operator=(primitive_data_access &&)=delete
typename bitset_view::const_reference inner_const_reference
typename bitset_view::const_iterator const_value_iterator
primitive_data_access(const primitive_data_access &)=delete
primitive_data_access(primitive_data_access &&)=delete
primitive_data_access & operator=(const primitive_data_access &)=delete
constexpr value_iterator insert_values(size_t idx, InputIt first, InputIt last)
Data access class for trivial copyable types.
constexpr void resize_values(size_t new_length, const T &value)
constexpr value_iterator insert_values(size_t idx, InputIt first, InputIt last)
primitive_data_access & operator=(const primitive_data_access &)=delete
constexpr value_iterator erase_values(const_value_iterator pos, size_t count)
constexpr value_iterator erase_values(size_t idx, size_t count)
static u8_buffer< T > make_data_buffer(RANGE &&r)
primitive_data_access(primitive_data_access &&)=delete
constexpr inner_const_reference value(size_t i) const
constexpr inner_reference value(size_t i)
constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last)
static u8_buffer< T > make_data_buffer(size_t n, const T &value)
constexpr const_value_iterator value_cend() const
constexpr value_iterator insert_value(size_t idx, T value, size_t count)
pointer_iterator< inner_const_pointer > const_value_iterator
primitive_data_access(arrow_proxy &proxy, size_t data_buffer_index)
Constructor for primitive_data_access.
primitive_data_access & operator=(primitive_data_access &&)=delete
constexpr value_iterator insert_value(const_value_iterator pos, T value, size_t count)
constexpr inner_const_pointer data() const
primitive_data_access(const primitive_data_access &)=delete
pointer_iterator< inner_pointer > value_iterator
constexpr const_value_iterator value_cbegin() const
bitset_iterator< self_type, true > const_iterator
typename storage_type_without_cvrefpointer::difference_type difference_type
This class represents a view to a dynamic size sequence of bits.
This buffer class is use as storage buffer for all sparrow arrays.
Definition u8_buffer.hpp:75
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Definition mp_utils.hpp:107
constexpr InputIt next(InputIt it, Distance n)
Definition iterator.hpp:503
auto make_buffer_adaptor(FromBufferRef &buf)