sparrow 2.3.1
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
union_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <array>
18#include <optional>
19
20#include "sparrow/array_api.hpp"
36
37namespace sparrow
38{
41
42 namespace detail
43 {
44 template <>
46 {
47 [[nodiscard]] static constexpr sparrow::data_type get()
48 {
50 }
51 };
52
53 template <>
55 {
56 [[nodiscard]] static constexpr sparrow::data_type get()
57 {
59 }
60 };
61 }
62
68 template <class T>
69 constexpr bool is_dense_union_array_v = std::same_as<T, dense_union_array>;
70
76 template <class T>
77 constexpr bool is_sparse_union_array_v = std::same_as<T, sparse_union_array>;
78
123 template <class DERIVED>
124 class union_array_crtp_base : public crtp_base<DERIVED>
125 {
126 public:
127
129 using derived_type = DERIVED;
137 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
138 using size_type = std::size_t;
139
141
150 [[nodiscard]] constexpr std::optional<std::string_view> name() const;
151
160 [[nodiscard]] SPARROW_CONSTEXPR_CLANG std::optional<key_value_view> metadata() const;
161
175
187
199
210
221
229 [[nodiscard]] constexpr bool empty() const;
230
239 [[nodiscard]] constexpr size_type size() const;
240
249 [[nodiscard]] constexpr iterator begin();
250
259 [[nodiscard]] constexpr iterator end();
260
269 [[nodiscard]] constexpr const_iterator begin() const;
270
279 [[nodiscard]] constexpr const_iterator end() const;
280
289 [[nodiscard]] constexpr const_iterator cbegin() const;
290
299 [[nodiscard]] constexpr const_iterator cend() const;
300
309 [[nodiscard]] constexpr const_reverse_iterator rbegin() const;
310
319 [[nodiscard]] constexpr const_reverse_iterator rend() const;
320
329 [[nodiscard]] constexpr const_reverse_iterator crbegin() const;
330
339 [[nodiscard]] constexpr const_reverse_iterator crend() const;
340
354 constexpr void zero_null_values(const inner_value_type& value)
355 {
356 sparrow::zero_null_values(*this, value);
357 }
358
359 protected:
360
361 static constexpr size_t TYPE_ID_MAP_SIZE = 256;
362
363 using type_id_map = std::array<std::uint8_t, TYPE_ID_MAP_SIZE>;
364
375 static constexpr type_id_map parse_type_id_map(std::string_view format_string);
376
387 template <std::ranges::input_range R>
388 static constexpr type_id_map
389 type_id_map_from_child_to_type_id(const std::optional<R>& child_index_to_type_id);
390
406 template <std::ranges::input_range R>
407 requires(std::convertible_to<std::ranges::range_value_t<R>, std::uint8_t>)
408 static constexpr std::string
409 make_format_string(bool dense, std::size_t n, const std::optional<R>& child_index_to_type_id);
410
411 using children_type = std::vector<cloning_ptr<array_wrapper>>;
412
424
437
447 constexpr union_array_crtp_base(const self_type& rhs);
448
460 constexpr self_type& operator=(const self_type& rhs);
461
462 constexpr union_array_crtp_base(self_type&& rhs) = default;
463 constexpr self_type& operator=(self_type&& rhs) = default;
464
472 [[nodiscard]] constexpr arrow_proxy& get_arrow_proxy();
473
481 [[nodiscard]] constexpr const arrow_proxy& get_arrow_proxy() const;
482
484 const std::uint8_t* p_type_ids;
486 std::array<std::uint8_t, TYPE_ID_MAP_SIZE> m_type_id_map;
487
489
490#if defined(__cpp_lib_format)
491 friend struct std::formatter<DERIVED>;
492#endif
493 };
494
508 template <class D>
509 constexpr bool operator==(const union_array_crtp_base<D>& lhs, const union_array_crtp_base<D>& rhs);
510
542 class dense_union_array : public union_array_crtp_base<dense_union_array>
543 {
544 public:
545
549
564 template <class... Args>
566 explicit dense_union_array(Args&&... args)
567 : dense_union_array(create_proxy(std::forward<Args>(args)...))
568 {
569 }
570
583
594
607
610
611 private:
612
613 using type_id_map = typename base_type::type_id_map;
614
631 template <
632 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
633 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
634 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
635 [[nodiscard]] static auto create_proxy(
636 std::vector<array>&& children,
637 type_id_buffer_type&& element_type,
638 offset_buffer_type&& offsets,
639 std::optional<TYPE_MAPPING>&& type_mapping = std::nullopt,
640 std::optional<std::string_view> name = std::nullopt,
641 std::optional<METADATA_RANGE> metadata = std::nullopt
642 ) -> arrow_proxy;
643
662 template <
663 std::ranges::input_range TYPE_ID_BUFFER_RANGE,
664 std::ranges::input_range OFFSET_BUFFER_RANGE,
665 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
666 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
667 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
668 [[nodiscard]] static arrow_proxy create_proxy(
669 std::vector<array>&& children,
670 TYPE_ID_BUFFER_RANGE&& element_type,
671 OFFSET_BUFFER_RANGE&& offsets,
672 std::optional<TYPE_MAPPING>&& type_mapping = std::nullopt,
673 std::optional<std::string_view> name = std::nullopt,
674 std::optional<METADATA_RANGE> metadata = std::nullopt
675 )
676 {
677 SPARROW_ASSERT_TRUE(element_type.size() == offsets.size());
678 type_id_buffer_type element_type_buffer{std::move(element_type)};
679 offset_buffer_type offsets_buffer{std::move(offsets)};
680 return dense_union_array::create_proxy(
681 std::forward<std::vector<array>>(children),
682 std::move(element_type_buffer),
683 std::move(offsets_buffer),
684 std::move(type_mapping),
685 std::forward<std::optional<std::string_view>>(name),
686 std::forward<std::optional<METADATA_RANGE>>(metadata)
687 );
688 }
689
706 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
707 [[nodiscard]] static arrow_proxy create_proxy_impl(
708 std::vector<array>&& children,
709 type_id_buffer_type&& element_type,
710 offset_buffer_type&& offsets,
711 std::string&& format,
712 std::optional<std::string_view> name = std::nullopt,
713 std::optional<METADATA_RANGE> metadata = std::nullopt
714 );
715
732 template <
733 std::ranges::input_range TYPE_ID_BUFFER_RANGE,
734 std::ranges::input_range OFFSET_BUFFER_RANGE,
735 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
736 [[nodiscard]] static arrow_proxy create_proxy_impl(
737 std::vector<array>&& children,
738 TYPE_ID_BUFFER_RANGE&& element_type,
739 OFFSET_BUFFER_RANGE&& offsets,
740 std::string&& format,
741 std::optional<std::string_view> name = std::nullopt,
742 std::optional<METADATA_RANGE> metadata = std::nullopt
743 )
744 {
745 SPARROW_ASSERT_TRUE(std::ranges::distance(element_type) == std::ranges::distance(offsets));
746 SPARROW_ASSERT_TRUE(std::ranges::distance(element_type) == children.size());
747 type_id_buffer_type element_type_buffer{std::move(element_type)};
748 offset_buffer_type offsets_buffer{std::move(offsets)};
749 return dense_union_array::create_proxy_impl(
750 std::forward<std::vector<array>>(children),
751 std::move(element_type_buffer),
752 std::move(offsets_buffer),
753 std::forward<std::string>(format),
754 std::forward<std::optional<std::string_view>>(name),
755 std::forward<std::optional<METADATA_RANGE>>(metadata)
756 );
757 }
758
769 SPARROW_API std::size_t element_offset(std::size_t i) const;
770
771 const std::int32_t* p_offsets;
773 };
774
803 class sparse_union_array : public union_array_crtp_base<sparse_union_array>
804 {
805 public:
806
809
824 template <class... Args>
826 explicit sparse_union_array(Args&&... args)
827 : sparse_union_array(create_proxy(std::forward<Args>(args)...))
828 {
829 }
830
843
846
847 private:
848
849 using type_id_map = typename base_type::type_id_map;
850
866 template <
867 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
868 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
869 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
870 static auto create_proxy(
871 std::vector<array>&& children,
872 type_id_buffer_type&& element_type,
873 std::optional<TYPE_MAPPING>&& type_mapping = std::nullopt,
874 std::optional<std::string_view> name = std::nullopt,
875 std::optional<METADATA_RANGE> metadata = std::nullopt
876 ) -> arrow_proxy;
877
893 template <input_metadata_container METADATA_RANGE>
894 static auto create_proxy_impl(
895 std::vector<array>&& children,
896 type_id_buffer_type&& element_type,
897 std::string&& format,
898 std::optional<std::string_view> name = std::nullopt,
899 std::optional<METADATA_RANGE> metadata = std::nullopt
900 ) -> arrow_proxy;
901
915 [[nodiscard]] SPARROW_API std::size_t element_offset(std::size_t i) const;
917 };
918
919 /****************************************
920 * union_array_crtp_base implementation *
921 ****************************************/
922
923 template <class DERIVED>
924 constexpr auto union_array_crtp_base<DERIVED>::parse_type_id_map(std::string_view format_string)
925 -> type_id_map
926 {
927 type_id_map ret;
928 // remove +du: / +su: prefix
929 format_string.remove_prefix(4);
930
931 constexpr std::string_view delim{","};
932 std::size_t child_index = 0;
933 std::ranges::for_each(
934 format_string | std::views::split(delim),
935 [&](const auto& s)
936 {
937 const std::string str(
938 std::string_view{&*std::ranges::begin(s), static_cast<size_t>(std::ranges::distance(s))}
939 );
940 const auto as_int = std::atoi(str.c_str());
941 ret[static_cast<std::size_t>(as_int)] = static_cast<std::uint8_t>(child_index);
942 ++child_index;
943 }
944 );
945 return ret;
946 }
947
948 template <class DERIVED>
949 template <std::ranges::input_range R>
950 constexpr auto
951 union_array_crtp_base<DERIVED>::type_id_map_from_child_to_type_id(const std::optional<R>& child_index_to_type_id)
952 -> type_id_map
953 {
954 std::array<std::uint8_t, TYPE_ID_MAP_SIZE> ret;
955 if (!child_index_to_type_id.has_value())
956 {
957 constexpr std::array<std::uint8_t, TYPE_ID_MAP_SIZE> default_mapping = []
958 {
959 std::array<std::uint8_t, TYPE_ID_MAP_SIZE> arr{};
960 std::iota(arr.begin(), arr.end(), 0);
961 return arr;
962 }();
963 return default_mapping;
964 }
965 else
966 {
967 const std::size_t n = std::ranges::size(*child_index_to_type_id);
968 for (std::size_t i = 0; i < n; ++i)
969 {
970 ret[(*child_index_to_type_id)[static_cast<std::uint8_t>(i)]] = static_cast<std::uint8_t>(i);
971 }
972 }
973 return ret;
974 }
975
976 template <class DERIVED>
977 template <std::ranges::input_range R>
978 requires(std::convertible_to<std::ranges::range_value_t<R>, std::uint8_t>)
979 constexpr std::string
980 union_array_crtp_base<DERIVED>::make_format_string(bool dense, const std::size_t n, const std::optional<R>& range)
981 {
982 const auto range_size = range.has_value() ? std::ranges::size(*range) : 0;
983 if (range_size == n || range_size == 0)
984 {
985 std::string ret = dense ? "+ud:" : "+us:";
986 if (range_size == 0)
987 {
988 for (std::size_t i = 0; i < n; ++i)
989 {
990 ret += std::to_string(i) + ",";
991 }
992 }
993 else
994 {
995 for (const auto& v : *range)
996 {
997 ret += std::to_string(v) + ",";
998 }
999 }
1000 ret.pop_back();
1001 return ret;
1002 }
1003 else
1004 {
1005 throw std::invalid_argument("Invalid type-id map");
1006 }
1007 }
1008
1009 template <class DERIVED>
1010 constexpr std::optional<std::string_view> union_array_crtp_base<DERIVED>::name() const
1011 {
1012 return m_proxy.name();
1013 }
1014
1015 template <class DERIVED>
1017 {
1018 return m_proxy.metadata();
1019 }
1020
1021 template <class DERIVED>
1026
1027 template <class DERIVED>
1029 {
1030 return m_proxy;
1031 }
1032
1033 template <class DERIVED>
1035 : m_proxy(std::move(proxy))
1036 , p_type_ids(reinterpret_cast<std::uint8_t*>(m_proxy.buffers()[0 /*index of type-ids*/].data()))
1039 {
1040 }
1041
1042 template <class DERIVED>
1047
1048 template <class DERIVED>
1050 {
1051 if (this != &rhs)
1052 {
1053 m_proxy = rhs.m_proxy;
1054 p_type_ids = reinterpret_cast<std::uint8_t*>(m_proxy.buffers()[0 /*index of type-ids*/].data());
1057 }
1058 return *this;
1059 }
1060
1061 template <class DERIVED>
1063 {
1064 const auto type_id = static_cast<std::size_t>(p_type_ids[i]);
1065 const auto child_index = m_type_id_map[type_id];
1066 const auto offset = this->derived_cast().element_offset(i);
1067 return array_element(*m_children[child_index], static_cast<std::size_t>(offset));
1068 }
1069
1070 template <class DERIVED>
1072 {
1073 return static_cast<const derived_type&>(*this)[i];
1074 }
1075
1076 template <class DERIVED>
1077 constexpr std::size_t union_array_crtp_base<DERIVED>::size() const
1078 {
1079 return m_proxy.length();
1080 }
1081
1082 template <class DERIVED>
1084 {
1085 return size() == 0;
1086 }
1087
1088 template <class DERIVED>
1090 {
1091 return iterator(functor_type{&(this->derived_cast())}, 0);
1092 }
1093
1094 template <class DERIVED>
1096 {
1097 return iterator(functor_type{&(this->derived_cast())}, this->size());
1098 }
1099
1100 template <class DERIVED>
1102 {
1103 return cbegin();
1104 }
1105
1106 template <class DERIVED>
1108 {
1109 return cend();
1110 }
1111
1112 template <class DERIVED>
1114 {
1115 return const_iterator(const_functor_type{&(this->derived_cast())}, 0);
1116 }
1117
1118 template <class DERIVED>
1120 {
1121 return const_iterator(const_functor_type{&(this->derived_cast())}, this->size());
1122 }
1123
1124 template <class DERIVED>
1126 {
1127 return const_reverse_iterator{cend()};
1128 }
1129
1130 template <class DERIVED>
1132 {
1134 }
1135
1136 template <class DERIVED>
1138 {
1139 return rbegin();
1140 }
1141
1142 template <class DERIVED>
1144 {
1145 return rend();
1146 }
1147
1148 template <class DERIVED>
1150 {
1151 return (*this)[0];
1152 }
1153
1154 template <class DERIVED>
1156 {
1157 return (*this)[this->size() - 1];
1158 }
1159
1160 template <class DERIVED>
1162 {
1163 children_type children(proxy.children().size(), nullptr);
1164 for (std::size_t i = 0; i < children.size(); ++i)
1165 {
1166 children[i] = array_factory(proxy.children()[i].view());
1167 }
1168 return children;
1169 }
1170
1171 template <class D>
1172 constexpr bool operator==(const union_array_crtp_base<D>& lhs, const union_array_crtp_base<D>& rhs)
1173 {
1174 return std::ranges::equal(lhs, rhs);
1175 }
1176
1177 /************************************
1178 * Union array shared implementation *
1179 ************************************/
1180
1181 namespace detail
1182 {
1183 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
1185 std::vector<array>&& children,
1186 std::vector<buffer<std::uint8_t>>&& buffers,
1187 std::size_t size,
1188 std::string&& format,
1189 std::optional<std::string_view> name,
1190 std::optional<METADATA_RANGE> metadata
1191 )
1192 {
1193 const auto n_children = children.size();
1194 ArrowSchema** child_schemas = new ArrowSchema*[n_children];
1195 ArrowArray** child_arrays = new ArrowArray*[n_children];
1196
1197 for (std::size_t i = 0; i < n_children; ++i)
1198 {
1199 auto& child = children[i];
1200 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(child));
1201 child_arrays[i] = new ArrowArray(std::move(flat_arr));
1202 child_schemas[i] = new ArrowSchema(std::move(flat_schema));
1203 }
1204
1205 const bool is_nullable = std::all_of(
1206 child_schemas,
1207 child_schemas + n_children,
1208 [](const ArrowSchema* schema)
1209 {
1210 return to_set_of_ArrowFlags(schema->flags).contains(ArrowFlag::NULLABLE);
1211 }
1212 );
1213
1214 const std::optional<std::unordered_set<sparrow::ArrowFlag>>
1215 flags = is_nullable
1216 ? std::make_optional(std::unordered_set<sparrow::ArrowFlag>{ArrowFlag::NULLABLE})
1217 : std::nullopt;
1218
1220 std::move(format),
1221 std::move(name), // name
1222 std::move(metadata), // metadata
1223 flags, // flags,
1224 child_schemas, // children
1225 repeat_view<bool>(true, n_children), // children_ownership
1226 nullptr, // dictionary,
1227 true // dictionary ownership
1228 );
1229
1231 static_cast<std::int64_t>(size), // length
1232 0, // null_count: always 0 as the nullability is in children
1233 0, // offset
1234 std::move(buffers),
1235 child_arrays, // children
1236 repeat_view<bool>(true, n_children), // children_ownership
1237 nullptr, // dictionary
1238 true
1239 );
1240
1241 return arrow_proxy{std::move(arr), std::move(schema)};
1242 }
1243 }
1244
1245 /************************************
1246 * dense_union_array implementation *
1247 ************************************/
1248
1249 template <std::ranges::input_range TYPE_MAPPING, input_metadata_container METADATA_RANGE>
1250 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
1251 auto dense_union_array::create_proxy(
1252 std::vector<array>&& children,
1253 type_id_buffer_type&& element_type,
1254 offset_buffer_type&& offsets,
1255 std::optional<TYPE_MAPPING>&& child_index_to_type_id,
1256 std::optional<std::string_view> name,
1257 std::optional<METADATA_RANGE> metadata
1258 ) -> arrow_proxy
1259 {
1260 SPARROW_ASSERT_TRUE(element_type.size() == offsets.size());
1261 const auto n_children = children.size();
1262
1263 std::string format = make_format_string(true /*dense union*/, n_children, child_index_to_type_id);
1264
1265 return create_proxy_impl(
1266 std::move(children),
1267 std::move(element_type),
1268 std::move(offsets),
1269 std::move(format),
1270 std::move(name),
1271 std::move(metadata)
1272 );
1273 }
1274
1275 template <input_metadata_container METADATA_RANGE>
1276 auto dense_union_array::create_proxy_impl(
1277 std::vector<array>&& children,
1278 type_id_buffer_type&& element_type,
1279 offset_buffer_type&& offsets,
1280 std::string&& format,
1281 std::optional<std::string_view> name,
1282 std::optional<METADATA_RANGE> metadata
1283 ) -> arrow_proxy
1284 {
1285 SPARROW_ASSERT_TRUE(element_type.size() == offsets.size());
1286 const auto size = element_type.size();
1287
1288 std::vector<buffer<std::uint8_t>> arr_buffs;
1289 arr_buffs.reserve(2);
1290 arr_buffs.emplace_back(std::move(element_type).extract_storage());
1291 arr_buffs.emplace_back(std::move(offsets).extract_storage());
1292
1294 std::move(children),
1295 std::move(arr_buffs),
1296 size,
1297 std::move(format),
1298 std::move(name),
1299 std::move(metadata)
1300 );
1301 }
1302
1303 /*************************************
1304 * sparse_union_array implementation *
1305 *************************************/
1306
1307 template <std::ranges::input_range TYPE_MAPPING, input_metadata_container METADATA_RANGE>
1308 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
1309 auto sparse_union_array::create_proxy(
1310 std::vector<array>&& children,
1311 type_id_buffer_type&& element_type,
1312 std::optional<TYPE_MAPPING>&& child_index_to_type_id,
1313 std::optional<std::string_view> name,
1314 std::optional<METADATA_RANGE> metadata
1315 ) -> arrow_proxy
1316 {
1317 const auto n_children = children.size();
1318 if (child_index_to_type_id.has_value())
1319 {
1320 SPARROW_ASSERT_TRUE((*child_index_to_type_id).size() == n_children);
1321 }
1322
1323 std::string format = make_format_string(false /*is dense union*/, n_children, child_index_to_type_id);
1324
1325 return create_proxy_impl(
1326 std::move(children),
1327 std::move(element_type),
1328 std::move(format),
1329 std::move(name),
1330 std::move(metadata)
1331 );
1332 }
1333
1334 template <input_metadata_container METADATA_RANGE>
1335 auto sparse_union_array::create_proxy_impl(
1336 std::vector<array>&& children,
1337 type_id_buffer_type&& element_type,
1338 std::string&& format,
1339 std::optional<std::string_view> name,
1340 std::optional<METADATA_RANGE> metadata
1341 ) -> arrow_proxy
1342 {
1343 for (const auto& child : children)
1344 {
1345 SPARROW_ASSERT_TRUE(child.size() == element_type.size());
1346 }
1347 const auto size = element_type.size();
1348
1349 std::vector<buffer<std::uint8_t>> arr_buffs;
1350 arr_buffs.reserve(1);
1351 arr_buffs.emplace_back(std::move(element_type).extract_storage());
1352
1354 std::move(children),
1355 std::move(arr_buffs),
1356 size,
1357 std::move(format),
1358 std::move(name),
1359 std::move(metadata)
1360 );
1361 }
1362}
1363
1364#if defined(__cpp_lib_format)
1365
1374template <typename U>
1375 requires std::derived_from<U, sparrow::union_array_crtp_base<U>>
1376struct std::formatter<U>
1377{
1384 constexpr auto parse(std::format_parse_context& ctx)
1385 {
1386 return ctx.begin(); // Simple implementation
1387 }
1388
1399 auto format(const U& ar, std::format_context& ctx) const
1400 {
1401 if constexpr (std::is_same_v<U, sparrow::dense_union_array>)
1402 {
1403 std::format_to(ctx.out(), "DenseUnion");
1404 }
1405 else if constexpr (std::is_same_v<U, sparrow::sparse_union_array>)
1406 {
1407 std::format_to(ctx.out(), "SparseUnion");
1408 }
1409 else
1410 {
1411 static_assert(sparrow::mpl::dependent_false<U>::value, "Unknown union array type");
1413 }
1414 const auto& proxy = ar.get_arrow_proxy();
1415 std::format_to(ctx.out(), " [name={} | size={}] <", proxy.name().value_or("nullptr"), proxy.length());
1416
1417 std::for_each(
1418 ar.cbegin(),
1419 std::prev(ar.cend()),
1420 [&ctx](const auto& value)
1421 {
1422 std::format_to(ctx.out(), "{}, ", value);
1423 }
1424 );
1425
1426 return std::format_to(ctx.out(), "{}>", ar.back());
1427 }
1428};
1429
1430namespace sparrow
1431{
1442 template <typename U>
1443 requires std::derived_from<U, union_array_crtp_base<U>>
1444 std::ostream& operator<<(std::ostream& os, const U& value)
1445 {
1446 os << std::format("{}", value);
1447 return os;
1448 }
1449}
1450
1451#endif
void sparse_union_array()
Object that owns a piece of contiguous memory.
Definition buffer.hpp:131
Base class for CRTP base classes.
Definition crtp_base.hpp:29
constexpr derived_type & derived_cast()
Definition crtp_base.hpp:39
Dense union array implementation with offset buffer.
SPARROW_API dense_union_array & operator=(const dense_union_array &rhs)
Copy assignment operator.
u8_buffer< std::uint32_t > offset_buffer_type
union_array_crtp_base< dense_union_array > base_type
typename base_type::type_id_buffer_type type_id_buffer_type
dense_union_array(Args &&... args)
Generic constructor for creating dense union arrays.
SPARROW_API dense_union_array(arrow_proxy proxy)
Constructs dense union array from Arrow proxy.
SPARROW_API dense_union_array(const dense_union_array &rhs)
Copy constructor.
dense_union_array(dense_union_array &&rhs)=default
dense_union_array & operator=(dense_union_array &&rhs)=default
A view that repeats a value a given number of times.
Sparse union array implementation without offset buffer.
SPARROW_API sparse_union_array & operator=(const sparse_union_array &)
sparse_union_array(Args &&... args)
Generic constructor for creating sparse union arrays.
union_array_crtp_base< sparse_union_array > base_type
typename base_type::type_id_buffer_type type_id_buffer_type
SPARROW_API sparse_union_array(const sparse_union_array &)
SPARROW_API sparse_union_array(arrow_proxy proxy)
Constructs sparse union array from Arrow proxy.
This buffer class is used as storage buffer for all sparrow arrays.
CRTP base class providing shared functionality for union array implementations.
array_traits::inner_value_type inner_value_type
std::reverse_iterator< const_iterator > const_reverse_iterator
constexpr iterator begin()
Gets iterator to the beginning of the array.
detail::layout_bracket_functor< const derived_type, value_type > const_functor_type
constexpr std::optional< std::string_view > name() const
Gets the optional name of the union array.
array_traits::const_reference value_type
constexpr union_array_crtp_base(const self_type &rhs)
Copy constructor.
u8_buffer< std::uint8_t > type_id_buffer_type
static constexpr std::string make_format_string(bool dense, std::size_t n, const std::optional< R > &child_index_to_type_id)
Creates Arrow format string for union arrays.
SPARROW_CONSTEXPR_CLANG value_type front() const
Gets reference to the first element.
constexpr const_reverse_iterator crbegin() const
Gets const reverse iterator to the beginning of reversed array.
std::array< std::uint8_t, TYPE_ID_MAP_SIZE > m_type_id_map
constexpr arrow_proxy & get_arrow_proxy()
Gets mutable reference to the Arrow proxy.
union_array_crtp_base< DERIVED > self_type
constexpr self_type & operator=(self_type &&rhs)=default
union_array_crtp_base(arrow_proxy proxy)
Protected constructor from Arrow proxy.
array_traits::const_reference const_reference
detail::layout_bracket_functor< derived_type, value_type > functor_type
SPARROW_CONSTEXPR_CLANG value_type operator[](size_type i) const
Gets element at specified position without bounds checking.
std::array< std::uint8_t, TYPE_ID_MAP_SIZE > type_id_map
functor_index_iterator< const_functor_type > const_iterator
static constexpr type_id_map type_id_map_from_child_to_type_id(const std::optional< R > &child_index_to_type_id)
Creates type ID mapping from child index to type ID mapping.
constexpr const_reverse_iterator rbegin() const
Gets reverse iterator to the beginning of reversed array.
constexpr const_iterator begin() const
Gets const iterator to the beginning of the array.
SPARROW_CONSTEXPR_CLANG std::optional< key_value_view > metadata() const
Gets the metadata associated with the union array.
functor_index_iterator< functor_type > iterator
constexpr self_type & operator=(const self_type &rhs)
Copy assignment operator.
constexpr bool empty() const
Checks if the union array is empty.
SPARROW_CONSTEXPR_CLANG value_type at(size_type i) const
Gets element at specified position with bounds checking.
constexpr const_iterator cbegin() const
Gets const iterator to the beginning of the array.
constexpr const_reverse_iterator crend() const
Gets const reverse iterator to the end of reversed array.
constexpr const_iterator end() const
Gets const iterator to the end of the array.
static constexpr type_id_map parse_type_id_map(std::string_view format_string)
Parses type ID mapping from Arrow format string.
SPARROW_CONSTEXPR_CLANG value_type operator[](size_type i)
Gets mutable element at specified position.
constexpr const_iterator cend() const
Gets const iterator to the end of the array.
SPARROW_CONSTEXPR_CLANG value_type back() const
Gets reference to the last element.
constexpr union_array_crtp_base(self_type &&rhs)=default
constexpr const arrow_proxy & get_arrow_proxy() const
Gets const reference to the Arrow proxy.
constexpr iterator end()
Gets iterator to the end of the array.
constexpr size_type size() const
Gets the number of elements in the union array.
constexpr const_reverse_iterator rend() const
Gets reverse iterator to the end of reversed array.
constexpr void zero_null_values(const inner_value_type &value)
Sets all null values to the specified value.
std::vector< cloning_ptr< array_wrapper > > children_type
constexpr children_type make_children(arrow_proxy &proxy)
Creates child array wrappers from Arrow proxy.
Concept for input containers that can provide metadata pairs.
Definition metadata.hpp:332
#define SPARROW_CONSTEXPR_CLANG
Definition config.hpp:64
#define SPARROW_API
Definition config.hpp:38
#define SPARROW_ASSERT_TRUE(expr__)
arrow_proxy create_union_proxy_impl(std::vector< array > &&children, std::vector< buffer< std::uint8_t > > &&buffers, std::size_t size, std::string &&format, std::optional< std::string_view > name, std::optional< METADATA_RANGE > metadata)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
Definition mp_utils.hpp:216
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
void unreachable()
Invokes undefined behavior for optimization purposes.
Definition mp_utils.hpp:882
constexpr bool is_dense_union_array_v
Type trait to check if a type is a dense_union_array.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr void zero_null_values(R &range, const T &default_value=T{})
Sets null values in a range to a default value.
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
constexpr std::size_t range_size(R &&r)
Definition ranges.hpp:35
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:110
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
std::ostream & operator<<(std::ostream &os, const nullval_t &)
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
SPARROW_API array_traits::const_reference array_element(const array_wrapper &ar, std::size_t index)
constexpr bool is_sparse_union_array_v
Type trait to check if a type is a sparse_union_array.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
std::unordered_set< ArrowFlag > to_set_of_ArrowFlags(int64_t flag_values)
Converts a bitfield of ArrowFlag values to a set of ArrowFlag values.
Extensions to the C++ standard library.
int64_t flags
mpl::rename< mpl::unique< mpl::transform< detail::array_const_reference_t, all_base_types_t > >, nullable_variant > const_reference
mpl::rename< all_base_types_t, std::variant > inner_value_type
Metafunction for retrieving the data_type of a typed array.