sparrow ..
Loading...
Searching...
No Matches
union_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <array>
18#include <optional>
19
20#include "sparrow/array_api.hpp"
36
37namespace sparrow
38{
41
42 namespace detail
43 {
44 template <>
46 {
47 [[nodiscard]] static constexpr sparrow::data_type get()
48 {
50 }
51 };
52
53 template <>
55 {
56 [[nodiscard]] static constexpr sparrow::data_type get()
57 {
59 }
60 };
61 }
62
68 template <class T>
69 constexpr bool is_dense_union_array_v = std::same_as<T, dense_union_array>;
70
76 template <class T>
77 constexpr bool is_sparse_union_array_v = std::same_as<T, sparse_union_array>;
78
123 template <class DERIVED>
124 class union_array_crtp_base : public crtp_base<DERIVED>
125 {
126 public:
127
129 using derived_type = DERIVED;
137 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
138 using size_type = std::size_t;
139
141
150 [[nodiscard]] constexpr std::optional<std::string_view> name() const;
151
160 [[nodiscard]] SPARROW_CONSTEXPR_CLANG std::optional<key_value_view> metadata() const;
161
175
187
199
210
221
229 [[nodiscard]] constexpr bool empty() const;
230
239 [[nodiscard]] constexpr size_type size() const;
240
249 [[nodiscard]] constexpr iterator begin();
250
259 [[nodiscard]] constexpr iterator end();
260
269 [[nodiscard]] constexpr const_iterator begin() const;
270
279 [[nodiscard]] constexpr const_iterator end() const;
280
289 [[nodiscard]] constexpr const_iterator cbegin() const;
290
299 [[nodiscard]] constexpr const_iterator cend() const;
300
309 [[nodiscard]] constexpr const_reverse_iterator rbegin() const;
310
319 [[nodiscard]] constexpr const_reverse_iterator rend() const;
320
329 [[nodiscard]] constexpr const_reverse_iterator crbegin() const;
330
339 [[nodiscard]] constexpr const_reverse_iterator crend() const;
340
354 constexpr void zero_null_values(const inner_value_type& value)
355 {
356 sparrow::zero_null_values(*this, value);
357 }
358
359 protected:
360
361 static constexpr size_t TYPE_ID_MAP_SIZE = 256;
362
363 using type_id_map = std::array<std::uint8_t, TYPE_ID_MAP_SIZE>;
364
375 static constexpr type_id_map parse_type_id_map(std::string_view format_string);
376
387 template <std::ranges::input_range R>
388 static constexpr type_id_map
389 type_id_map_from_child_to_type_id(const std::optional<R>& child_index_to_type_id);
390
406 template <std::ranges::input_range R>
407 requires(std::convertible_to<std::ranges::range_value_t<R>, std::uint8_t>)
408 static constexpr std::string
409 make_format_string(bool dense, std::size_t n, const std::optional<R>& child_index_to_type_id);
410
411 using children_type = std::vector<cloning_ptr<array_wrapper>>;
412
424
437
447 constexpr union_array_crtp_base(const self_type& rhs);
448
460 constexpr self_type& operator=(const self_type& rhs);
461
462 constexpr union_array_crtp_base(self_type&& rhs) = default;
463 constexpr self_type& operator=(self_type&& rhs) = default;
464
472 [[nodiscard]] constexpr arrow_proxy& get_arrow_proxy();
473
481 [[nodiscard]] constexpr const arrow_proxy& get_arrow_proxy() const;
482
484 const std::uint8_t* p_type_ids;
486 std::array<std::uint8_t, TYPE_ID_MAP_SIZE> m_type_id_map;
487
489
490#if defined(__cpp_lib_format)
491 friend struct std::formatter<DERIVED>;
492#endif
493 };
494
508 template <class D>
509 constexpr bool operator==(const union_array_crtp_base<D>& lhs, const union_array_crtp_base<D>& rhs);
510
542 class dense_union_array : public union_array_crtp_base<dense_union_array>
543 {
544 public:
545
549
564 template <class... Args>
566 explicit dense_union_array(Args&&... args)
567 : dense_union_array(create_proxy(std::forward<Args>(args)...))
568 {
569 }
570
583
594
607
610
611 private:
612
613 using type_id_map = typename base_type::type_id_map;
614
631 template <
632 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
633 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
634 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
635 [[nodiscard]] static auto create_proxy(
636 std::vector<array>&& children,
637 type_id_buffer_type&& element_type,
638 offset_buffer_type&& offsets,
639 std::optional<TYPE_MAPPING>&& type_mapping = std::nullopt,
640 std::optional<std::string_view> name = std::nullopt,
641 std::optional<METADATA_RANGE> metadata = std::nullopt
642 ) -> arrow_proxy;
643
662 template <
663 std::ranges::input_range TYPE_ID_BUFFER_RANGE,
664 std::ranges::input_range OFFSET_BUFFER_RANGE,
665 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
666 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
667 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
668 [[nodiscard]] static arrow_proxy create_proxy(
669 std::vector<array>&& children,
670 TYPE_ID_BUFFER_RANGE&& element_type,
671 OFFSET_BUFFER_RANGE&& offsets,
672 std::optional<TYPE_MAPPING>&& type_mapping = std::nullopt,
673 std::optional<std::string_view> name = std::nullopt,
674 std::optional<METADATA_RANGE> metadata = std::nullopt
675 )
676 {
677 SPARROW_ASSERT_TRUE(element_type.size() == offsets.size());
678 type_id_buffer_type element_type_buffer{std::move(element_type)};
679 offset_buffer_type offsets_buffer{std::move(offsets)};
680 return dense_union_array::create_proxy(
681 std::forward<std::vector<array>>(children),
682 std::move(element_type_buffer),
683 std::move(offsets_buffer),
684 std::move(type_mapping),
685 std::forward<std::optional<std::string_view>>(name),
686 std::forward<std::optional<METADATA_RANGE>>(metadata)
687 );
688 }
689
706 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
707 [[nodiscard]] static arrow_proxy create_proxy_impl(
708 std::vector<array>&& children,
709 type_id_buffer_type&& element_type,
710 offset_buffer_type&& offsets,
711 std::string&& format,
712 std::optional<std::string_view> name = std::nullopt,
713 std::optional<METADATA_RANGE> metadata = std::nullopt
714 );
715
732 template <
733 std::ranges::input_range TYPE_ID_BUFFER_RANGE,
734 std::ranges::input_range OFFSET_BUFFER_RANGE,
735 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
736 [[nodiscard]] static arrow_proxy create_proxy_impl(
737 std::vector<array>&& children,
738 TYPE_ID_BUFFER_RANGE&& element_type,
739 OFFSET_BUFFER_RANGE&& offsets,
740 std::string&& format,
741 std::optional<std::string_view> name = std::nullopt,
742 std::optional<METADATA_RANGE> metadata = std::nullopt
743 )
744 {
745 SPARROW_ASSERT_TRUE(std::ranges::distance(element_type) == std::ranges::distance(offsets));
746 SPARROW_ASSERT_TRUE(std::ranges::distance(element_type) == children.size());
747 type_id_buffer_type element_type_buffer{std::move(element_type)};
748 offset_buffer_type offsets_buffer{std::move(offsets)};
749 return dense_union_array::create_proxy_impl(
750 std::forward<std::vector<array>>(children),
751 std::move(element_type_buffer),
752 std::move(offsets_buffer),
753 std::forward<std::string>(format),
754 std::forward<std::optional<std::string_view>>(name),
755 std::forward<std::optional<METADATA_RANGE>>(metadata)
756 );
757 }
758
769 SPARROW_API std::size_t element_offset(std::size_t i) const;
770
771 const std::int32_t* p_offsets;
773 };
774
803 class sparse_union_array : public union_array_crtp_base<sparse_union_array>
804 {
805 public:
806
809
824 template <class... Args>
826 explicit sparse_union_array(Args&&... args)
827 : sparse_union_array(create_proxy(std::forward<Args>(args)...))
828 {
829 }
830
843
844 private:
845
846 using type_id_map = typename base_type::type_id_map;
847
863 template <
864 std::ranges::input_range TYPE_MAPPING = std::vector<std::uint8_t>,
865 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
866 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
867 static auto create_proxy(
868 std::vector<array>&& children,
869 type_id_buffer_type&& element_type,
870 std::optional<TYPE_MAPPING>&& type_mapping = std::nullopt,
871 std::optional<std::string_view> name = std::nullopt,
872 std::optional<METADATA_RANGE> metadata = std::nullopt
873 ) -> arrow_proxy;
874
890 template <input_metadata_container METADATA_RANGE>
891 static auto create_proxy_impl(
892 std::vector<array>&& children,
893 type_id_buffer_type&& element_type,
894 std::string&& format,
895 std::optional<std::string_view> name = std::nullopt,
896 std::optional<METADATA_RANGE> metadata = std::nullopt
897 ) -> arrow_proxy;
898
912 [[nodiscard]] SPARROW_API std::size_t element_offset(std::size_t i) const;
914 };
915
916 /****************************************
917 * union_array_crtp_base implementation *
918 ****************************************/
919
920 template <class DERIVED>
921 constexpr auto union_array_crtp_base<DERIVED>::parse_type_id_map(std::string_view format_string)
922 -> type_id_map
923 {
924 type_id_map ret;
925 // remove +du: / +su: prefix
926 format_string.remove_prefix(4);
927
928 constexpr std::string_view delim{","};
929 std::size_t child_index = 0;
930 std::ranges::for_each(
931 format_string | std::views::split(delim),
932 [&](const auto& s)
933 {
934 const std::string str(
935 std::string_view{&*std::ranges::begin(s), static_cast<size_t>(std::ranges::distance(s))}
936 );
937 const auto as_int = std::atoi(str.c_str());
938 ret[static_cast<std::size_t>(as_int)] = static_cast<std::uint8_t>(child_index);
939 ++child_index;
940 }
941 );
942 return ret;
943 }
944
945 template <class DERIVED>
946 template <std::ranges::input_range R>
947 constexpr auto
948 union_array_crtp_base<DERIVED>::type_id_map_from_child_to_type_id(const std::optional<R>& child_index_to_type_id)
949 -> type_id_map
950 {
951 std::array<std::uint8_t, TYPE_ID_MAP_SIZE> ret;
952 if (!child_index_to_type_id.has_value())
953 {
954 constexpr std::array<std::uint8_t, TYPE_ID_MAP_SIZE> default_mapping = []
955 {
956 std::array<std::uint8_t, TYPE_ID_MAP_SIZE> arr{};
957 std::iota(arr.begin(), arr.end(), 0);
958 return arr;
959 }();
960 return default_mapping;
961 }
962 else
963 {
964 const std::size_t n = std::ranges::size(*child_index_to_type_id);
965 for (std::size_t i = 0; i < n; ++i)
966 {
967 ret[(*child_index_to_type_id)[static_cast<std::uint8_t>(i)]] = static_cast<std::uint8_t>(i);
968 }
969 }
970 return ret;
971 }
972
973 template <class DERIVED>
974 template <std::ranges::input_range R>
975 requires(std::convertible_to<std::ranges::range_value_t<R>, std::uint8_t>)
976 constexpr std::string
977 union_array_crtp_base<DERIVED>::make_format_string(bool dense, const std::size_t n, const std::optional<R>& range)
978 {
979 const auto range_size = range.has_value() ? std::ranges::size(*range) : 0;
980 if (range_size == n || range_size == 0)
981 {
982 std::string ret = dense ? "+ud:" : "+us:";
983 if (range_size == 0)
984 {
985 for (std::size_t i = 0; i < n; ++i)
986 {
987 ret += std::to_string(i) + ",";
988 }
989 }
990 else
991 {
992 for (const auto& v : *range)
993 {
994 ret += std::to_string(v) + ",";
995 }
996 }
997 ret.pop_back();
998 return ret;
999 }
1000 else
1001 {
1002 throw std::invalid_argument("Invalid type-id map");
1003 }
1004 }
1005
1006 template <class DERIVED>
1007 constexpr std::optional<std::string_view> union_array_crtp_base<DERIVED>::name() const
1008 {
1009 return m_proxy.name();
1010 }
1011
1012 template <class DERIVED>
1014 {
1015 return m_proxy.metadata();
1016 }
1017
1018 template <class DERIVED>
1023
1024 template <class DERIVED>
1026 {
1027 return m_proxy;
1028 }
1029
1030 template <class DERIVED>
1032 : m_proxy(std::move(proxy))
1033 , p_type_ids(reinterpret_cast<std::uint8_t*>(m_proxy.buffers()[0 /*index of type-ids*/].data()))
1036 {
1037 }
1038
1039 template <class DERIVED>
1044
1045 template <class DERIVED>
1047 {
1048 if (this != &rhs)
1049 {
1050 m_proxy = rhs.m_proxy;
1051 p_type_ids = reinterpret_cast<std::uint8_t*>(m_proxy.buffers()[0 /*index of type-ids*/].data());
1054 }
1055 return *this;
1056 }
1057
1058 template <class DERIVED>
1060 {
1061 const auto type_id = static_cast<std::size_t>(p_type_ids[i]);
1062 const auto child_index = m_type_id_map[type_id];
1063 const auto offset = this->derived_cast().element_offset(i);
1064 return array_element(*m_children[child_index], static_cast<std::size_t>(offset));
1065 }
1066
1067 template <class DERIVED>
1069 {
1070 return static_cast<const derived_type&>(*this)[i];
1071 }
1072
1073 template <class DERIVED>
1074 constexpr std::size_t union_array_crtp_base<DERIVED>::size() const
1075 {
1076 return m_proxy.length();
1077 }
1078
1079 template <class DERIVED>
1081 {
1082 return size() == 0;
1083 }
1084
1085 template <class DERIVED>
1087 {
1088 return iterator(functor_type{&(this->derived_cast())}, 0);
1089 }
1090
1091 template <class DERIVED>
1093 {
1094 return iterator(functor_type{&(this->derived_cast())}, this->size());
1095 }
1096
1097 template <class DERIVED>
1099 {
1100 return cbegin();
1101 }
1102
1103 template <class DERIVED>
1105 {
1106 return cend();
1107 }
1108
1109 template <class DERIVED>
1111 {
1112 return const_iterator(const_functor_type{&(this->derived_cast())}, 0);
1113 }
1114
1115 template <class DERIVED>
1117 {
1118 return const_iterator(const_functor_type{&(this->derived_cast())}, this->size());
1119 }
1120
1121 template <class DERIVED>
1123 {
1124 return const_reverse_iterator{cend()};
1125 }
1126
1127 template <class DERIVED>
1129 {
1131 }
1132
1133 template <class DERIVED>
1135 {
1136 return rbegin();
1137 }
1138
1139 template <class DERIVED>
1141 {
1142 return rend();
1143 }
1144
1145 template <class DERIVED>
1147 {
1148 return (*this)[0];
1149 }
1150
1151 template <class DERIVED>
1153 {
1154 return (*this)[this->size() - 1];
1155 }
1156
1157 template <class DERIVED>
1159 {
1160 children_type children(proxy.children().size(), nullptr);
1161 for (std::size_t i = 0; i < children.size(); ++i)
1162 {
1163 children[i] = array_factory(proxy.children()[i].view());
1164 }
1165 return children;
1166 }
1167
1168 template <class D>
1169 constexpr bool operator==(const union_array_crtp_base<D>& lhs, const union_array_crtp_base<D>& rhs)
1170 {
1171 return std::ranges::equal(lhs, rhs);
1172 }
1173
1174 /************************************
1175 * dense_union_array implementation *
1176 ************************************/
1177
1178 template <std::ranges::input_range TYPE_MAPPING, input_metadata_container METADATA_RANGE>
1179 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
1180 auto dense_union_array::create_proxy(
1181 std::vector<array>&& children,
1182 type_id_buffer_type&& element_type,
1183 offset_buffer_type&& offsets,
1184 std::optional<TYPE_MAPPING>&& child_index_to_type_id,
1185 std::optional<std::string_view> name,
1186 std::optional<METADATA_RANGE> metadata
1187 ) -> arrow_proxy
1188 {
1189 SPARROW_ASSERT_TRUE(element_type.size() == offsets.size());
1190 const auto n_children = children.size();
1191
1192 std::string format = make_format_string(true /*dense union*/, n_children, child_index_to_type_id);
1193
1194 return create_proxy_impl(
1195 std::move(children),
1196 std::move(element_type),
1197 std::move(offsets),
1198 std::move(format),
1199 std::move(name),
1200 std::move(metadata)
1201 );
1202 }
1203
1204 template <input_metadata_container METADATA_RANGE>
1205 auto dense_union_array::create_proxy_impl(
1206 std::vector<array>&& children,
1207 type_id_buffer_type&& element_type,
1208 offset_buffer_type&& offsets,
1209 std::string&& format,
1210 std::optional<std::string_view> name,
1211 std::optional<METADATA_RANGE> metadata
1212 ) -> arrow_proxy
1213 {
1214 SPARROW_ASSERT_TRUE(element_type.size() == offsets.size());
1215 const auto n_children = children.size();
1216 ArrowSchema** child_schemas = new ArrowSchema*[n_children];
1217 ArrowArray** child_arrays = new ArrowArray*[n_children];
1218 const auto size = element_type.size();
1219
1220 for (std::size_t i = 0; i < n_children; ++i)
1221 {
1222 auto& child = children[i];
1223 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(child));
1224 child_arrays[i] = new ArrowArray(std::move(flat_arr));
1225 child_schemas[i] = new ArrowSchema(std::move(flat_schema));
1226 }
1227
1228 const bool is_nullable = std::all_of(
1229 child_schemas,
1230 child_schemas + n_children,
1231 [](const ArrowSchema* schema)
1232 {
1233 return to_set_of_ArrowFlags(schema->flags).contains(ArrowFlag::NULLABLE);
1234 }
1235 );
1236
1237 const std::optional<std::unordered_set<sparrow::ArrowFlag>>
1238 flags = is_nullable
1239 ? std::make_optional(std::unordered_set<sparrow::ArrowFlag>{ArrowFlag::NULLABLE})
1240 : std::nullopt;
1241
1243 std::move(format),
1244 std::move(name), // name
1245 std::move(metadata), // metadata
1246 flags, // flags,
1247 child_schemas, // children
1248 repeat_view<bool>(true, n_children), // children_ownership
1249 nullptr, // dictionary,
1250 true // dictionary ownership
1251 );
1252
1253 std::vector<buffer<std::uint8_t>> arr_buffs = {
1254 std::move(element_type).extract_storage(),
1255 std::move(offsets).extract_storage()
1256 };
1257
1259 static_cast<std::int64_t>(size), // length
1260 0, // null_count: always 0 as the nullability is in children
1261 0, // offset
1262 std::move(arr_buffs),
1263 child_arrays, // children
1264 repeat_view<bool>(true, n_children), // children_ownership
1265 nullptr, // dictionary,
1266 true
1267 );
1268 return arrow_proxy{std::move(arr), std::move(schema)};
1269 }
1270
1271 /*************************************
1272 * sparse_union_array implementation *
1273 *************************************/
1274
1275 template <std::ranges::input_range TYPE_MAPPING, input_metadata_container METADATA_RANGE>
1276 requires(std::convertible_to<std::ranges::range_value_t<TYPE_MAPPING>, std::uint8_t>)
1277 auto sparse_union_array::create_proxy(
1278 std::vector<array>&& children,
1279 type_id_buffer_type&& element_type,
1280 std::optional<TYPE_MAPPING>&& child_index_to_type_id,
1281 std::optional<std::string_view> name,
1282 std::optional<METADATA_RANGE> metadata
1283 ) -> arrow_proxy
1284 {
1285 const auto n_children = children.size();
1286 if (child_index_to_type_id.has_value())
1287 {
1288 SPARROW_ASSERT_TRUE((*child_index_to_type_id).size() == n_children);
1289 }
1290
1291 std::string format = make_format_string(false /*is dense union*/, n_children, child_index_to_type_id);
1292
1293 return create_proxy_impl(
1294 std::move(children),
1295 std::move(element_type),
1296 std::move(format),
1297 std::move(name),
1298 std::move(metadata)
1299 );
1300 }
1301
1302 template <input_metadata_container METADATA_RANGE>
1303 auto sparse_union_array::create_proxy_impl(
1304 std::vector<array>&& children,
1305 type_id_buffer_type&& element_type,
1306 std::string&& format,
1307 std::optional<std::string_view> name,
1308 std::optional<METADATA_RANGE> metadata
1309 ) -> arrow_proxy
1310 {
1311 for (const auto& child : children)
1312 {
1313 SPARROW_ASSERT_TRUE(child.size() == element_type.size());
1314 }
1315 const auto n_children = children.size();
1316 ArrowSchema** child_schemas = new ArrowSchema*[n_children];
1317 ArrowArray** child_arrays = new ArrowArray*[n_children];
1318 const auto size = element_type.size();
1319
1320 for (std::size_t i = 0; i < n_children; ++i)
1321 {
1322 auto& child = children[i];
1323 auto [flat_arr, flat_schema] = extract_arrow_structures(std::move(child));
1324 child_arrays[i] = new ArrowArray(std::move(flat_arr));
1325 child_schemas[i] = new ArrowSchema(std::move(flat_schema));
1326 }
1327
1328 const bool is_nullable = std::all_of(
1329 child_schemas,
1330 child_schemas + n_children,
1331 [](const ArrowSchema* schema)
1332 {
1333 return to_set_of_ArrowFlags(schema->flags).contains(ArrowFlag::NULLABLE);
1334 }
1335 );
1336
1337 const std::optional<std::unordered_set<sparrow::ArrowFlag>>
1338 flags = is_nullable
1339 ? std::make_optional(std::unordered_set<sparrow::ArrowFlag>{ArrowFlag::NULLABLE})
1340 : std::nullopt;
1341
1342 ArrowSchema schema = make_arrow_schema(
1343 std::move(format),
1344 std::move(name), // name
1345 std::move(metadata), // metadata
1346 flags, // flags,
1347 child_schemas, // children
1348 repeat_view<bool>(true, n_children), // children_ownership
1349 nullptr, // dictionary,
1350 true // dictionary ownership
1351 );
1352
1353 std::vector<buffer<std::uint8_t>> arr_buffs = {std::move(element_type).extract_storage()};
1354
1355 ArrowArray arr = make_arrow_array(
1356 static_cast<std::int64_t>(size), // length
1357 0, // null_count: always 0 as the nullability is in children
1358 0, // offset
1359 std::move(arr_buffs),
1360 child_arrays, // children
1361 repeat_view<bool>(true, n_children), // children_ownership
1362 nullptr, // dictionary
1363 true
1364 );
1365 return arrow_proxy{std::move(arr), std::move(schema)};
1366 }
1367}
1368
1369#if defined(__cpp_lib_format)
1370
1379template <typename U>
1380 requires std::derived_from<U, sparrow::union_array_crtp_base<U>>
1381struct std::formatter<U>
1382{
1389 constexpr auto parse(std::format_parse_context& ctx)
1390 {
1391 return ctx.begin(); // Simple implementation
1392 }
1393
1404 auto format(const U& ar, std::format_context& ctx) const
1405 {
1406 if constexpr (std::is_same_v<U, sparrow::dense_union_array>)
1407 {
1408 std::format_to(ctx.out(), "DenseUnion");
1409 }
1410 else if constexpr (std::is_same_v<U, sparrow::sparse_union_array>)
1411 {
1412 std::format_to(ctx.out(), "SparseUnion");
1413 }
1414 else
1415 {
1416 static_assert(sparrow::mpl::dependent_false<U>::value, "Unknown union array type");
1418 }
1419 const auto& proxy = ar.get_arrow_proxy();
1420 std::format_to(ctx.out(), " [name={} | size={}] <", proxy.name().value_or("nullptr"), proxy.length());
1421
1422 std::for_each(
1423 ar.cbegin(),
1424 std::prev(ar.cend()),
1425 [&ctx](const auto& value)
1426 {
1427 std::format_to(ctx.out(), "{}, ", value);
1428 }
1429 );
1430
1431 return std::format_to(ctx.out(), "{}>", ar.back());
1432 }
1433};
1434
1435namespace sparrow
1436{
1447 template <typename U>
1448 requires std::derived_from<U, union_array_crtp_base<U>>
1449 std::ostream& operator<<(std::ostream& os, const U& value)
1450 {
1451 os << std::format("{}", value);
1452 return os;
1453 }
1454}
1455
1456#endif
void sparse_union_array()
Base class for CRTP base classes.
Definition crtp_base.hpp:29
constexpr derived_type & derived_cast()
Definition crtp_base.hpp:39
Dense union array implementation with offset buffer.
SPARROW_API dense_union_array & operator=(const dense_union_array &rhs)
Copy assignment operator.
u8_buffer< std::uint32_t > offset_buffer_type
union_array_crtp_base< dense_union_array > base_type
typename base_type::type_id_buffer_type type_id_buffer_type
dense_union_array(Args &&... args)
Generic constructor for creating dense union arrays.
SPARROW_API dense_union_array(arrow_proxy proxy)
Constructs dense union array from Arrow proxy.
SPARROW_API dense_union_array(const dense_union_array &rhs)
Copy constructor.
dense_union_array(dense_union_array &&rhs)=default
dense_union_array & operator=(dense_union_array &&rhs)=default
A view that repeats a value a given number of times.
Sparse union array implementation without offset buffer.
sparse_union_array(Args &&... args)
Generic constructor for creating sparse union arrays.
union_array_crtp_base< sparse_union_array > base_type
typename base_type::type_id_buffer_type type_id_buffer_type
SPARROW_API sparse_union_array(arrow_proxy proxy)
Constructs sparse union array from Arrow proxy.
This buffer class is used as storage buffer for all sparrow arrays.
CRTP base class providing shared functionality for union array implementations.
array_traits::inner_value_type inner_value_type
std::reverse_iterator< const_iterator > const_reverse_iterator
constexpr iterator begin()
Gets iterator to the beginning of the array.
detail::layout_bracket_functor< const derived_type, value_type > const_functor_type
constexpr std::optional< std::string_view > name() const
Gets the optional name of the union array.
array_traits::const_reference value_type
constexpr union_array_crtp_base(const self_type &rhs)
Copy constructor.
u8_buffer< std::uint8_t > type_id_buffer_type
static constexpr std::string make_format_string(bool dense, std::size_t n, const std::optional< R > &child_index_to_type_id)
Creates Arrow format string for union arrays.
SPARROW_CONSTEXPR_CLANG value_type front() const
Gets reference to the first element.
constexpr const_reverse_iterator crbegin() const
Gets const reverse iterator to the beginning of reversed array.
std::array< std::uint8_t, TYPE_ID_MAP_SIZE > m_type_id_map
constexpr arrow_proxy & get_arrow_proxy()
Gets mutable reference to the Arrow proxy.
union_array_crtp_base< DERIVED > self_type
constexpr self_type & operator=(self_type &&rhs)=default
union_array_crtp_base(arrow_proxy proxy)
Protected constructor from Arrow proxy.
array_traits::const_reference const_reference
detail::layout_bracket_functor< derived_type, value_type > functor_type
SPARROW_CONSTEXPR_CLANG value_type operator[](size_type i) const
Gets element at specified position without bounds checking.
std::array< std::uint8_t, TYPE_ID_MAP_SIZE > type_id_map
functor_index_iterator< const_functor_type > const_iterator
static constexpr type_id_map type_id_map_from_child_to_type_id(const std::optional< R > &child_index_to_type_id)
Creates type ID mapping from child index to type ID mapping.
constexpr const_reverse_iterator rbegin() const
Gets reverse iterator to the beginning of reversed array.
constexpr const_iterator begin() const
Gets const iterator to the beginning of the array.
SPARROW_CONSTEXPR_CLANG std::optional< key_value_view > metadata() const
Gets the metadata associated with the union array.
functor_index_iterator< functor_type > iterator
constexpr self_type & operator=(const self_type &rhs)
Copy assignment operator.
constexpr bool empty() const
Checks if the union array is empty.
SPARROW_CONSTEXPR_CLANG value_type at(size_type i) const
Gets element at specified position with bounds checking.
constexpr const_iterator cbegin() const
Gets const iterator to the beginning of the array.
constexpr const_reverse_iterator crend() const
Gets const reverse iterator to the end of reversed array.
constexpr const_iterator end() const
Gets const iterator to the end of the array.
static constexpr type_id_map parse_type_id_map(std::string_view format_string)
Parses type ID mapping from Arrow format string.
SPARROW_CONSTEXPR_CLANG value_type operator[](size_type i)
Gets mutable element at specified position.
constexpr const_iterator cend() const
Gets const iterator to the end of the array.
SPARROW_CONSTEXPR_CLANG value_type back() const
Gets reference to the last element.
constexpr union_array_crtp_base(self_type &&rhs)=default
constexpr const arrow_proxy & get_arrow_proxy() const
Gets const reference to the Arrow proxy.
constexpr iterator end()
Gets iterator to the end of the array.
constexpr size_type size() const
Gets the number of elements in the union array.
constexpr const_reverse_iterator rend() const
Gets reverse iterator to the end of reversed array.
constexpr void zero_null_values(const inner_value_type &value)
Sets all null values to the specified value.
std::vector< cloning_ptr< array_wrapper > > children_type
constexpr children_type make_children(arrow_proxy &proxy)
Creates child array wrappers from Arrow proxy.
Concept for input containers that can provide metadata pairs.
Definition metadata.hpp:304
#define SPARROW_CONSTEXPR_CLANG
Definition config.hpp:64
#define SPARROW_API
Definition config.hpp:38
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
Definition mp_utils.hpp:216
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
void unreachable()
Invokes undefined behavior for optimization purposes.
Definition mp_utils.hpp:882
constexpr bool is_dense_union_array_v
Type trait to check if a type is a dense_union_array.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr void zero_null_values(R &range, const T &default_value=T{})
Sets null values in a range to a default value.
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
constexpr std::size_t range_size(R &&r)
Definition ranges.hpp:32
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:98
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
std::ostream & operator<<(std::ostream &os, const nullval_t &)
SPARROW_API cloning_ptr< array_wrapper > array_factory(arrow_proxy proxy)
SPARROW_API array_traits::const_reference array_element(const array_wrapper &ar, std::size_t index)
constexpr bool is_sparse_union_array_v
Type trait to check if a type is a sparse_union_array.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
std::unordered_set< ArrowFlag > to_set_of_ArrowFlags(int64_t flag_values)
Converts a bitfield of ArrowFlag values to a set of ArrowFlag values.
int64_t flags
mpl::rename< mpl::unique< mpl::transform< detail::array_const_reference_t, all_base_types_t > >, nullable_variant > const_reference
mpl::rename< all_base_types_t, std::variant > inner_value_type
Metafunction for retrieving the data_type of a typed array.