sparrow 0.9.0
Loading...
Searching...
No Matches
union_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <array>
18
19#include "sparrow/array_api.hpp"
33
34namespace sparrow
35{
38
39 namespace detail
40 {
41 template <class T>
43
44 template <>
46 {
47 [[nodiscard]] static constexpr sparrow::data_type get()
48 {
50 }
51 };
52
53 template <>
55 {
56 [[nodiscard]] static constexpr sparrow::data_type get()
57 {
59 }
60 };
61 }
62
66 template <class T>
67 constexpr bool is_dense_union_array_v = std::same_as<T, dense_union_array>;
68
72 template <class T>
73 constexpr bool is_sparse_union_array_v = std::same_as<T, sparse_union_array>;
74
75 // helper crtp-base to have sparse and dense and dense union share most of their code
76 template <class DERIVED>
77 class union_array_crtp_base : public crtp_base<DERIVED>
78 {
79 public:
80
82 using derived_type = DERIVED;
89 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
90 using size_type = std::size_t;
91
93
94 [[nodiscard]] std::optional<std::string_view> name() const;
95 [[nodiscard]] std::optional<key_value_view> metadata() const;
96
97 [[nodiscard]] value_type at(size_type i) const;
98 [[nodiscard]] value_type operator[](size_type i) const;
100 [[nodiscard]] value_type front() const;
101 [[nodiscard]] value_type back() const;
102
103 [[nodiscard]] bool empty() const;
104 [[nodiscard]] size_type size() const;
105
106 [[nodiscard]] iterator begin();
107 [[nodiscard]] iterator end();
108 [[nodiscard]] const_iterator begin() const;
109 [[nodiscard]] const_iterator end() const;
110 [[nodiscard]] const_iterator cbegin() const;
111 [[nodiscard]] const_iterator cend() const;
112
113 [[nodiscard]] const_reverse_iterator rbegin() const;
114 [[nodiscard]] const_reverse_iterator rend() const;
115
116 [[nodiscard]] const_reverse_iterator crbegin() const;
117 [[nodiscard]] const_reverse_iterator crend() const;
118
127 {
128 sparrow::zero_null_values(*this, value);
129 }
130
131 protected:
132
133 using type_id_map = std::array<std::uint8_t, 256>;
134 static type_id_map parse_type_id_map(std::string_view format_string);
135
136 template <std::ranges::input_range R>
137 static type_id_map type_id_map_from_child_to_type_id(R&& child_index_to_type_id);
138
139 template <std::ranges::input_range R>
140 requires(std::convertible_to<std::ranges::range_value_t<R>, std::uint8_t>)
141 static std::string make_format_string(bool dense, std::size_t n, R&& child_index_to_type_id);
142
143 using children_type = std::vector<cloning_ptr<array_wrapper>>;
145
147
150
152 self_type& operator=(self_type&& rhs) = default;
153
154 [[nodiscard]] arrow_proxy& get_arrow_proxy();
155 [[nodiscard]] const arrow_proxy& get_arrow_proxy() const;
156
158 const std::uint8_t* p_type_ids;
160
161 // map from type-id to child-index
162 std::array<std::uint8_t, 256> m_type_id_map;
163
165
166#if defined(__cpp_lib_format)
167 friend struct std::formatter<DERIVED>;
168#endif
169 };
170
171 template <class D>
173
174 class dense_union_array : public union_array_crtp_base<dense_union_array>
175 {
176 public:
177
181
182 template <class... Args>
184 explicit dense_union_array(Args&&... args)
185 : dense_union_array(create_proxy(std::forward<Args>(args)...))
186 {
187 }
188
190
193
196
197 private:
198
199 using type_id_map = typename base_type::type_id_map;
200
201 template <
202 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
203 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
204 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
205 [[nodiscard]] static auto create_proxy(
206 std::vector<array>&& children,
207 type_id_buffer_type&& element_type,
208 offset_buffer_type&& offsets,
209 TYPE_MAPPING&& type_mapping = TYPE_MAPPING{},
210 std::optional<std::string_view> name = std::nullopt,
211 std::optional<METADATA_RANGE> metadata = std::nullopt
212 ) -> arrow_proxy;
213
214 template <
215 std::ranges::input_range TYPE_ID_BUFFER_RANGE,
216 std::ranges::input_range OFFSET_BUFFER_RANGE,
217 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
218 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
219 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
220 [[nodiscard]] static arrow_proxy create_proxy(
221 std::vector<array>&& children,
222 TYPE_ID_BUFFER_RANGE&& element_type,
223 OFFSET_BUFFER_RANGE&& offsets,
224 TYPE_MAPPING&& type_mapping = TYPE_MAPPING{},
225 std::optional<std::string_view> name = std::nullopt,
226 std::optional<METADATA_RANGE> metadata = std::nullopt
227 )
228 {
229 type_id_buffer_type element_type_buffer{std::move(element_type)};
230 offset_buffer_type offsets_buffer{std::move(offsets)};
231 return dense_union_array::create_proxy(
232 std::forward<std::vector<array>>(children),
233 std::move(element_type_buffer),
234 std::move(offsets_buffer),
235 std::forward<TYPE_MAPPING>(type_mapping),
236 std::forward<std::optional<std::string_view>>(name),
237 std::forward<std::optional<METADATA_RANGE>>(metadata)
238 );
239 }
240
241 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
242 [[nodiscard]] static arrow_proxy create_proxy_impl(
243 std::vector<array>&& children,
244 type_id_buffer_type&& element_type,
245 offset_buffer_type&& offsets,
246 std::string&& format,
247 type_id_map&& tim,
248 std::optional<std::string_view> name = std::nullopt,
249 std::optional<METADATA_RANGE> metadata = std::nullopt
250 );
251
252 template <
253 std::ranges::input_range TYPE_ID_BUFFER_RANGE,
254 std::ranges::input_range OFFSET_BUFFER_RANGE,
255 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
256 [[nodiscard]] static arrow_proxy create_proxy_impl(
257 std::vector<array>&& children,
258 TYPE_ID_BUFFER_RANGE&& element_type,
259 OFFSET_BUFFER_RANGE&& offsets,
260 std::string&& format,
261 type_id_map&& tim,
262 std::optional<std::string_view> name = std::nullopt,
263 std::optional<METADATA_RANGE> metadata = std::nullopt
264 )
265 {
266 type_id_buffer_type element_type_buffer{std::move(element_type)};
267 offset_buffer_type offsets_buffer{std::move(offsets)};
268 return dense_union_array::create_proxy_impl(
269 std::forward<std::vector<array>>(children),
270 std::move(element_type_buffer),
271 std::move(offsets_buffer),
272 std::forward<std::string>(format),
273 std::forward<type_id_map>(tim),
274 std::forward<std::optional<std::string_view>>(name),
275 std::forward<std::optional<METADATA_RANGE>>(metadata)
276 );
277 }
278
279 SPARROW_API std::size_t element_offset(std::size_t i) const;
280
281 const std::int32_t* p_offsets;
283 };
284
285 class sparse_union_array : public union_array_crtp_base<sparse_union_array>
286 {
287 public:
288
291
292 template <class... Args>
294 explicit sparse_union_array(Args&&... args)
295 : sparse_union_array(create_proxy(std::forward<Args>(args)...))
296 {
297 }
298
300
301 private:
302
303 using type_id_map = typename base_type::type_id_map;
304
305 template <
306 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
307 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
308 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
309 static auto create_proxy(
310 std::vector<array>&& children,
311 type_id_buffer_type&& element_type,
312 TYPE_MAPPING&& type_mapping = TYPE_MAPPING{},
313 std::optional<std::string_view> name = std::nullopt,
314 std::optional<METADATA_RANGE> metadata = std::nullopt
315 ) -> arrow_proxy;
316
317 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
318 static auto create_proxy_impl(
319 std::vector<array>&& children,
320 type_id_buffer_type&& element_type,
321 std::string&& format,
322 type_id_map&& tim,
323 std::optional<std::string_view> name = std::nullopt,
324 std::optional<METADATA_RANGE> metadata = std::nullopt
325 ) -> arrow_proxy;
326
327 SPARROW_API std::size_t element_offset(std::size_t i) const;
329 };
330
331 /****************************************
332 * union_array_crtp_base implementation *
333 ****************************************/
334
335 template <class DERIVED>
337 {
338 type_id_map ret;
339 // remove +du: / +su: prefix
340 format_string.remove_prefix(4);
341
342 constexpr std::string_view delim{","};
343 std::size_t child_index = 0;
344 std::ranges::for_each(
345 format_string | std::views::split(delim),
346 [&](const auto& s)
347 {
348 const std::string str(
349 std::string_view{&*std::ranges::begin(s), static_cast<size_t>(std::ranges::distance(s))}
350 );
351 const auto as_int = std::atoi(str.c_str());
352 ret[static_cast<std::size_t>(as_int)] = static_cast<std::uint8_t>(child_index);
353 ++child_index;
354 }
355 );
356 return ret;
357 }
358
359 template <class DERIVED>
360 template <std::ranges::input_range R>
362 -> type_id_map
363 {
364 const std::size_t n = std::ranges::size(child_index_to_type_id);
365 std::array<std::uint8_t, 256> ret;
366 if (n == 0)
367 {
368 for (std::size_t i = 0; i < 256; ++i)
369 {
370 ret[i] = static_cast<std::uint8_t>(i);
371 }
372 }
373 else
374 {
375 for (std::size_t i = 0; i < n; ++i)
376 {
377 ret[child_index_to_type_id[i]] = static_cast<std::uint8_t>(i);
378 }
379 }
380 return ret;
381 }
382
383 template <class DERIVED>
384 template <std::ranges::input_range R>
385 requires(std::convertible_to<std::ranges::range_value_t<R>, std::uint8_t>)
386 std::string union_array_crtp_base<DERIVED>::make_format_string(bool dense, const std::size_t n, R&& range)
387 {
388 const auto range_size = std::ranges::size(range);
389 if (range_size == n || range_size == 0)
390 {
391 std::string ret = dense ? "+ud:" : "+us:";
392 if (range_size == 0)
393 {
394 for (std::size_t i = 0; i < n; ++i)
395 {
396 ret += std::to_string(i) + ",";
397 }
398 }
399 else
400 {
401 for (const auto& v : range)
402 {
403 ret += std::to_string(v) + ",";
404 }
405 }
406 ret.pop_back();
407 return ret;
408 }
409 else
410 {
411 throw std::invalid_argument("Invalid type-id map");
412 }
413 }
414
415 template <class DERIVED>
416 std::optional<std::string_view> union_array_crtp_base<DERIVED>::name() const
417 {
418 return m_proxy.name();
419 }
420
421 template <class DERIVED>
422 std::optional<key_value_view> union_array_crtp_base<DERIVED>::metadata() const
423 {
424 return m_proxy.metadata();
425 }
426
427 template <class DERIVED>
432
433 template <class DERIVED>
438
439 template <class DERIVED>
441 : m_proxy(std::move(proxy))
442 , p_type_ids(reinterpret_cast<std::uint8_t*>(m_proxy.buffers()[0 /*index of type-ids*/].data()))
445 {
446 }
447
448 template <class DERIVED>
453
454 template <class DERIVED>
456 {
457 if (this != &rhs)
458 {
459 m_proxy = rhs.m_proxy;
460 p_type_ids = reinterpret_cast<std::uint8_t*>(m_proxy.buffers()[0 /*index of type-ids*/].data());
463 }
464 return *this;
465 }
466
467 template <class DERIVED>
469 {
470 const auto type_id = static_cast<std::size_t>(p_type_ids[i]);
471 const auto child_index = m_type_id_map[type_id];
472 const auto offset = this->derived_cast().element_offset(i);
473 return array_element(*m_children[child_index], static_cast<std::size_t>(offset));
474 }
475
476 template <class DERIVED>
478 {
479 return static_cast<const derived_type&>(*this)[i];
480 }
481
482 template <class DERIVED>
484 {
485 return m_proxy.length();
486 }
487
488 template <class DERIVED>
490 {
491 return size() == 0;
492 }
493
494 template <class DERIVED>
496 {
497 return iterator(functor_type{&(this->derived_cast())}, 0);
498 }
499
500 template <class DERIVED>
502 {
503 return iterator(functor_type{&(this->derived_cast())}, this->size());
504 }
505
506 template <class DERIVED>
508 {
509 return cbegin();
510 }
511
512 template <class DERIVED>
514 {
515 return cend();
516 }
517
518 template <class DERIVED>
523
524 template <class DERIVED>
526 {
527 return const_iterator(const_functor_type{&(this->derived_cast())}, this->size());
528 }
529
530 template <class DERIVED>
535
536 template <class DERIVED>
541
542 template <class DERIVED>
547
548 template <class DERIVED>
550 {
551 return rend();
552 }
553
554 template <class DERIVED>
556 {
557 return (*this)[0];
558 }
559
560 template <class DERIVED>
562 {
563 return (*this)[this->size() - 1];
564 }
565
566 template <class DERIVED>
568 {
569 children_type children(proxy.children().size(), nullptr);
570 for (std::size_t i = 0; i < children.size(); ++i)
571 {
572 children[i] = array_factory(proxy.children()[i].view());
573 }
574 return children;
575 }
576
577 template <class D>
579 {
580 return std::ranges::equal(lhs, rhs);
581 }
582
583 /************************************
584 * dense_union_array implementation *
585 ************************************/
586
587 template <std::ranges::input_range TYPE_MAPPING, input_metadata_container METADATA_RANGE>
588 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
589 auto dense_union_array::create_proxy(
590 std::vector<array>&& children,
591 type_id_buffer_type&& element_type,
592 offset_buffer_type&& offsets,
593 TYPE_MAPPING&& child_index_to_type_id,
594 std::optional<std::string_view> name,
595 std::optional<METADATA_RANGE> metadata
596 ) -> arrow_proxy
597 {
598 const auto n_children = children.size();
599
600 // inverse type mapping (type_id -> child_index)
601 auto type_id_to_child_index = type_id_map_from_child_to_type_id(child_index_to_type_id);
602
603 std::string format = make_format_string(
604 true /*dense union*/,
605 n_children,
606 std::forward<TYPE_MAPPING>(child_index_to_type_id)
607 );
608
609 return create_proxy_impl(
610 std::move(children),
611 std::move(element_type),
612 std::move(offsets),
613 std::move(format),
614 std::move(type_id_to_child_index),
615 std::move(name),
616 std::move(metadata)
617 );
618 }
619
620 template <input_metadata_container METADATA_RANGE>
621 auto dense_union_array::create_proxy_impl(
622 std::vector<array>&& children,
623 type_id_buffer_type&& element_type,
624 offset_buffer_type&& offsets,
625 std::string&& format,
626 type_id_map&& tim,
627 std::optional<std::string_view> name,
628 std::optional<METADATA_RANGE> metadata
629 ) -> arrow_proxy
630 {
631 const auto n_children = children.size();
632 ArrowSchema** child_schemas = new ArrowSchema*[n_children];
633 ArrowArray** child_arrays = new ArrowArray*[n_children];
634 const auto size = element_type.size();
635
636 // count nulls (expensive!)
637 int64_t null_count = 0;
638 for (std::size_t i = 0; i < size; ++i)
639 {
640 // child_id from type_id
641 const auto type_id = static_cast<std::uint8_t>(element_type[i]);
642 const auto child_index = tim[type_id];
643 const auto offset = static_cast<std::size_t>(offsets[i]);
644 // check if child is null
645 if (!children[child_index][offset].has_value())
646 {
647 ++null_count;
648 }
649 }
650
651 for (std::size_t i = 0; i < n_children; ++i)
652 {
653 auto& child = children[i];
654 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(child));
655 child_arrays[i] = new ArrowArray(std::move(flat_arr));
656 child_schemas[i] = new ArrowSchema(std::move(flat_schema));
657 }
658
659 static const std::optional<std::unordered_set<sparrow::ArrowFlag>> flags{{ArrowFlag::NULLABLE}};
660
662 std::move(format),
663 std::move(name), // name
664 std::move(metadata), // metadata
665 flags, // flags,
666 child_schemas, // children
667 repeat_view<bool>(true, n_children), // children_ownership
668 nullptr, // dictionary,
669 true // dictionary ownership
670 );
671
672 std::vector<buffer<std::uint8_t>> arr_buffs = {
673 std::move(element_type).extract_storage(),
674 std::move(offsets).extract_storage()
675 };
676
678 static_cast<std::int64_t>(size), // length
679 static_cast<std::int64_t>(null_count),
680 0, // offset
681 std::move(arr_buffs),
682 child_arrays, // children
683 repeat_view<bool>(true, n_children), // children_ownership
684 nullptr, // dictionary,
685 true
686 );
687 return arrow_proxy{std::move(arr), std::move(schema)};
688 }
689
690 /*************************************
691 * sparse_union_array implementation *
692 *************************************/
693
694 template <std::ranges::input_range TYPE_MAPPING, input_metadata_container METADATA_RANGE>
695 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
696 auto sparse_union_array::create_proxy(
697 std::vector<array>&& children,
698 type_id_buffer_type&& element_type,
699 TYPE_MAPPING&& child_index_to_type_id,
700 std::optional<std::string_view> name,
701 std::optional<METADATA_RANGE> metadata
702 ) -> arrow_proxy
703 {
704 const auto n_children = children.size();
705
706 // inverse type mapping (type_id -> child_index)
707 auto type_id_to_child_index = type_id_map_from_child_to_type_id(child_index_to_type_id);
708
709 std::string format = make_format_string(
710 false /*is dense union*/,
711 n_children,
712 std::forward<TYPE_MAPPING>(child_index_to_type_id)
713 );
714
715 return create_proxy_impl(
716 std::move(children),
717 std::move(element_type),
718 std::move(format),
719 std::move(type_id_to_child_index),
720 name,
721 metadata
722 );
723 }
724
725 template <input_metadata_container METADATA_RANGE>
726 auto sparse_union_array::create_proxy_impl(
727 std::vector<array>&& children,
728 type_id_buffer_type&& element_type,
729 std::string&& format,
730 type_id_map&& tim,
731 std::optional<std::string_view> name,
732 std::optional<METADATA_RANGE> metadata
733 ) -> arrow_proxy
734 {
735 const auto n_children = children.size();
736 ArrowSchema** child_schemas = new ArrowSchema*[n_children];
737 ArrowArray** child_arrays = new ArrowArray*[n_children];
738 const auto size = element_type.size();
739
740 // count nulls (expensive!)
741 int64_t null_count = 0;
742 for (std::size_t i = 0; i < size; ++i)
743 {
744 // child_id from type_id
745 const auto type_id = static_cast<std::uint8_t>(element_type[i]);
746 const auto child_index = tim[type_id];
747 // check if child is null
748 if (!children[child_index][i].has_value())
749 {
750 ++null_count;
751 }
752 }
753
754 for (std::size_t i = 0; i < n_children; ++i)
755 {
756 auto& child = children[i];
757 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(child));
758 child_arrays[i] = new ArrowArray(std::move(flat_arr));
759 child_schemas[i] = new ArrowSchema(std::move(flat_schema));
760 }
761
762 ArrowSchema schema = make_arrow_schema(
763 std::move(format),
764 name, // name
765 metadata, // metadata
766 std::nullopt, // flags,
767 child_schemas, // children
768 repeat_view<bool>(true, n_children), // children_ownership
769 nullptr, // dictionary,
770 true // dictionary ownership
771 );
772
773 std::vector<buffer<std::uint8_t>> arr_buffs(1);
774 arr_buffs[0] = std::move(element_type).extract_storage();
775
776 ArrowArray arr = make_arrow_array(
777 static_cast<std::int64_t>(size), // length
778 static_cast<std::int64_t>(null_count),
779 0, // offset
780 std::move(arr_buffs),
781 child_arrays, // children
782 repeat_view<bool>(true, n_children), // children_ownership
783 nullptr, // dictionary
784 true
785 );
786 return arrow_proxy{std::move(arr), std::move(schema)};
787 }
788}
789
790#if defined(__cpp_lib_format)
791
792template <typename U>
793 requires std::derived_from<U, sparrow::union_array_crtp_base<U>>
794struct std::formatter<U>
795{
796 constexpr auto parse(std::format_parse_context& ctx)
797 {
798 return ctx.begin(); // Simple implementation
799 }
800
801 auto format(const U& ar, std::format_context& ctx) const
802 {
803 if constexpr (std::is_same_v<U, sparrow::dense_union_array>)
804 {
805 std::format_to(ctx.out(), "DenseUnion");
806 }
807 else if constexpr (std::is_same_v<U, sparrow::sparse_union_array>)
808 {
809 std::format_to(ctx.out(), "SparseUnion");
810 }
811 else
812 {
813 static_assert(sparrow::mpl::dependent_false<U>::value, "Unknown union array type");
815 }
816 const auto& proxy = ar.get_arrow_proxy();
817 std::format_to(ctx.out(), " [name={} | size={}] <", proxy.name().value_or("nullptr"), proxy.length());
818
819 std::for_each(
820 ar.cbegin(),
821 std::prev(ar.cend()),
822 [&ctx](const auto& value)
823 {
824 std::format_to(ctx.out(), "{}, ", value);
825 }
826 );
827
828 return std::format_to(ctx.out(), "{}>", ar.back());
829 }
830};
831
832namespace sparrow
833{
834 template <typename U>
835 requires std::derived_from<U, sparrow::union_array_crtp_base<U>>
836 std::ostream& operator<<(std::ostream& os, const U& value)
837 {
838 os << std::format("{}", value);
839 return os;
840 }
841}
842
843#endif
void sparse_union_array()
Proxy class over ArrowArray and ArrowSchema.
Base class for CRTP base classes.
Definition crtp_base.hpp:29
derived_type & derived_cast()
Definition crtp_base.hpp:39
SPARROW_API dense_union_array & operator=(const dense_union_array &rhs)
u8_buffer< std::uint32_t > offset_buffer_type
union_array_crtp_base< dense_union_array > base_type
typename base_type::type_id_buffer_type type_id_buffer_type
dense_union_array(Args &&... args)
SPARROW_API dense_union_array(arrow_proxy proxy)
SPARROW_API dense_union_array(const dense_union_array &rhs)
dense_union_array(dense_union_array &&rhs)=default
dense_union_array & operator=(dense_union_array &&rhs)=default
A view that repeats a value a given number of times.
sparse_union_array(Args &&... args)
union_array_crtp_base< sparse_union_array > base_type
typename base_type::type_id_buffer_type type_id_buffer_type
SPARROW_API sparse_union_array(arrow_proxy proxy)
This buffer class is use as storage buffer for all sparrow arrays.
Definition u8_buffer.hpp:75
array_traits::inner_value_type inner_value_type
std::reverse_iterator< const_iterator > const_reverse_iterator
std::array< std::uint8_t, 256 > type_id_map
const_iterator begin() const
self_type & operator=(self_type &&rhs)=default
union_array_crtp_base(const self_type &rhs)
static std::string make_format_string(bool dense, std::size_t n, R &&child_index_to_type_id)
detail::layout_bracket_functor< const derived_type, value_type > const_functor_type
void zero_null_values(const inner_value_type &value)
Sets all null values to the specified value.
union_array_crtp_base(self_type &&rhs)=default
array_traits::const_reference value_type
u8_buffer< std::uint8_t > type_id_buffer_type
const_reverse_iterator rend() const
self_type & operator=(const self_type &rhs)
const_iterator cbegin() const
union_array_crtp_base< DERIVED > self_type
const_iterator cend() const
union_array_crtp_base(arrow_proxy proxy)
value_type operator[](size_type i)
detail::layout_bracket_functor< derived_type, value_type > functor_type
const_reverse_iterator crbegin() const
const_reverse_iterator crend() const
static type_id_map parse_type_id_map(std::string_view format_string)
functor_index_iterator< const_functor_type > const_iterator
value_type operator[](size_type i) const
value_type at(size_type i) const
functor_index_iterator< functor_type > iterator
const_reverse_iterator rbegin() const
const arrow_proxy & get_arrow_proxy() const
std::optional< key_value_view > metadata() const
std::optional< std::string_view > name() const
static type_id_map type_id_map_from_child_to_type_id(R &&child_index_to_type_id)
children_type make_children(arrow_proxy &proxy)
const_iterator end() const
std::vector< cloning_ptr< array_wrapper > > children_type
#define SPARROW_API
Definition config.hpp:38
constexpr bool excludes_copy_and_move_ctor_v
Definition mp_utils.hpp:507
void unreachable()
Invokes undefined behavior.
Definition mp_utils.hpp:425
constexpr bool is_dense_union_array_v
Checks whether T is a dense_union_array type.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr void zero_null_values(R &range, const T &default_value=T{})
Definition nullable.hpp:779
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArrays and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:91
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
SPARROW_API array_traits::const_reference array_element(const array_wrapper &ar, std::size_t index)
constexpr bool is_sparse_union_array_v
Checks whether T is a sparse_union_array type.
std::size_t range_size(R &&r)
Definition ranges.hpp:31
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
std::ostream & operator<<(std::ostream &os, const sparrow::nullval_t &)
Definition nullable.hpp:933
mpl::rename< mpl::transform< detail::array_const_reference_t, all_base_types_t >, nullable_variant > const_reference
mpl::rename< all_base_types_t, std::variant > inner_value_type