sparrow 0.9.0
Loading...
Searching...
No Matches
map_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include "sparrow/array_api.hpp"
25
26namespace sparrow
27{
28 class map_array;
29
30 template <>
42
43 template <class T>
44 constexpr bool is_map_array_v = std::same_as<T, map_array>;
45
46 class SPARROW_API map_array final : public array_bitmap_base<map_array>
47 {
48 public:
49
53 using value_iterator = inner_types::value_iterator;
54 using const_value_iterator = inner_types::const_value_iterator;
56 using offset_type = const std::int32_t;
58
61
63
64 using inner_value_type = inner_types::inner_value_type;
65 using inner_reference = inner_types::inner_reference;
66 using inner_const_reference = inner_types::inner_const_reference;
67
71
72 explicit map_array(arrow_proxy proxy);
73
74 template <class... Args>
76 explicit map_array(Args&&... args)
77 : self_type(create_proxy(std::forward<Args>(args)...))
78 {
79 }
80
83
84 map_array(map_array&&) noexcept = default;
85 map_array& operator=(map_array&&) noexcept = default;
86
87 [[nodiscard]] const array_wrapper* raw_keys_array() const;
88 [[nodiscard]] array_wrapper* raw_keys_array();
89
90 [[nodiscard]] const array_wrapper* raw_items_array() const;
92
93 template <std::ranges::range SIZES_RANGE>
94 [[nodiscard]] static auto offset_from_sizes(SIZES_RANGE&& sizes) -> offset_buffer_type;
95
96 private:
97
98 [[nodiscard]] value_iterator value_begin();
99 [[nodiscard]] value_iterator value_end();
100 [[nodiscard]] const_value_iterator value_cbegin() const;
101 [[nodiscard]] const_value_iterator value_cend() const;
102
103 [[nodiscard]] inner_reference value(size_type i);
104 [[nodiscard]] inner_const_reference value(size_type i) const;
105
106 [[nodiscard]] offset_type* make_list_offsets() const;
107 [[nodiscard]] cloning_ptr<array_wrapper> make_entries_array() const;
108 [[nodiscard]] bool get_keys_sorted() const;
109
110 [[nodiscard]] static bool check_keys_sorted(const array& flat_keys, const offset_buffer_type& offsets);
111
112 template <
114 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
115 [[nodiscard]] static arrow_proxy create_proxy(
116 array&& flat_keys,
117 array&& flat_items,
118 offset_buffer_type&& list_offsets,
119 VB&& validity_input,
120 std::optional<std::string_view> name = std::nullopt,
121 std::optional<METADATA_RANGE> metadata = std::nullopt
122 );
123
124 template <
126 std::ranges::input_range OFFSET_BUFFER_RANGE,
127 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
128 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
129 [[nodiscard]] static arrow_proxy create_proxy(
130 array&& flat_keys,
131 array&& flat_items,
132 OFFSET_BUFFER_RANGE&& list_offsets_range,
133 VB&& validity_input,
134 std::optional<std::string_view> name = std::nullopt,
135 std::optional<METADATA_RANGE> metadata = std::nullopt
136 )
137 {
138 offset_buffer_type list_offsets{std::move(list_offsets_range)};
139 return map_array::create_proxy(
140 std::move(flat_keys),
141 std::move(flat_items),
142 std::move(list_offsets),
143 std::forward<VB>(validity_input),
144 std::forward<std::optional<std::string_view>>(name),
145 std::forward<std::optional<METADATA_RANGE>>(metadata)
146 );
147 }
148
149 template <
151 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
152 [[nodiscard]] static arrow_proxy create_proxy(
153 array&& flat_keys,
154 array&& flat_values,
155 offset_buffer_type&& list_offsets,
156 bool nullable = true,
157 std::optional<std::string_view> name = std::nullopt,
158 std::optional<METADATA_RANGE> metadata = std::nullopt
159 );
160
161 template <
163 std::ranges::input_range OFFSET_BUFFER_RANGE,
164 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
165 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
166 [[nodiscard]] static arrow_proxy create_proxy(
167 array&& flat_keys,
168 array&& flat_items,
169 OFFSET_BUFFER_RANGE&& list_offsets_range,
170 bool nullable = true,
171 std::optional<std::string_view> name = std::nullopt,
172 std::optional<METADATA_RANGE> metadata = std::nullopt
173 )
174 {
175 offset_buffer_type list_offsets{std::move(list_offsets_range)};
176 return map_array::create_proxy(
177 std::move(flat_keys),
178 std::move(flat_items),
179 std::move(list_offsets),
180 nullable,
181 std::forward<std::optional<std::string_view>>(name),
182 std::forward<std::optional<METADATA_RANGE>>(metadata)
183 );
184 }
185
186 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
187 offset_type* p_list_offsets;
188
189 cloning_ptr<array_wrapper> p_entries_array;
190 bool m_keys_sorted;
191
192 // friend classes
193 friend class array_crtp_base<map_array>;
195 };
196
197 template <std::ranges::range SIZES_RANGE>
199 {
201 std::forward<SIZES_RANGE>(sizes)
202 );
203 }
204
205 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
206 arrow_proxy map_array::create_proxy(
207 array&& flat_keys,
208 array&& flat_items,
209 offset_buffer_type&& list_offsets,
210 VB&& validity_input,
211 std::optional<std::string_view> name,
212 std::optional<METADATA_RANGE> metadata
213 )
214 {
215 const auto size = list_offsets.size() - 1;
216 validity_bitmap vbitmap = ensure_validity_bitmap(size, std::forward<VB>(validity_input));
217
218 std::optional<std::unordered_set<ArrowFlag>> flags{{ArrowFlag::NULLABLE}};
219 bool keys_sorted = check_keys_sorted(flat_keys, list_offsets);
220 if (keys_sorted)
221 {
222 flags.value().insert(ArrowFlag::MAP_KEYS_SORTED);
223 }
224
225 std::array<sparrow::array, 2> struct_children = {std::move(flat_keys), std::move(flat_items)};
226 struct_array entries(std::move(struct_children), false, std::string("entries"));
227
228 auto [entries_arr, entries_schema] = extract_arrow_structures(std::move(entries));
229
230 const auto null_count = vbitmap.null_count();
231 const repeat_view<bool> children_ownership{true, 1};
232
233 ArrowSchema schema = make_arrow_schema(
234 std::string("+m"),
235 name, // name
236 metadata, // metadata
237 flags, // flags,
238 new ArrowSchema*[1]{new ArrowSchema(std::move(entries_schema))},
239 children_ownership, // children ownership
240 nullptr, // dictionary
241 true // dictionary ownership
242
243 );
244
245 std::vector<buffer<std::uint8_t>> arr_buffs = {
246 std::move(vbitmap).extract_storage(),
247 std::move(list_offsets).extract_storage()
248 };
249
250 ArrowArray arr = make_arrow_array(
251 static_cast<std::int64_t>(size), // length
252 static_cast<std::int64_t>(null_count),
253 0, // offset
254 std::move(arr_buffs),
255 new ArrowArray*[1]{new ArrowArray(std::move(entries_arr))},
256 children_ownership, // children ownership
257 nullptr, // dictionary
258 true // dictionary ownership
259 );
260 return arrow_proxy{std::move(arr), std::move(schema)};
261 }
262
263 template <validity_bitmap_input VB, input_metadata_container METADATA_RANGE>
264 arrow_proxy map_array::create_proxy(
265 array&& flat_keys,
266 array&& flat_items,
267 offset_buffer_type&& list_offsets,
268 bool nullable,
269 std::optional<std::string_view> name,
270 std::optional<METADATA_RANGE> metadata
271 )
272 {
273 if (nullable)
274 {
275 return map_array::create_proxy(
276 std::move(flat_keys),
277 std::move(flat_items),
278 std::move(list_offsets),
280 name,
281 metadata
282 );
283 }
284 else
285 {
286 bool keys_sorted = check_keys_sorted(flat_keys, list_offsets);
287 auto flags = keys_sorted
288 ? std::optional<std::unordered_set<ArrowFlag>>{{ArrowFlag::MAP_KEYS_SORTED}}
289 : std::nullopt;
290
291 const auto size = list_offsets.size() - 1;
292
293 std::array<sparrow::array, 2> struct_children = {std::move(flat_keys), std::move(flat_items)};
294 struct_array entries(std::move(struct_children), false, std::string("entries"));
295
296 auto [entries_arr, entries_schema] = extract_arrow_structures(std::move(entries));
297 const repeat_view<bool> children_ownership{true, 1};
298
299 ArrowSchema schema = make_arrow_schema(
300 std::string_view("+m"),
301 name, // name
302 metadata, // metadata
303 flags, // flags,
304 new ArrowSchema*[1]{new ArrowSchema(std::move(entries_schema))},
305 children_ownership, // children ownership
306 nullptr, // dictionary
307 true // dictionary ownership
308
309 );
310
311 std::vector<buffer<std::uint8_t>> arr_buffs = {
312 buffer<std::uint8_t>{nullptr, 0}, // no validity bitmap
313 std::move(list_offsets).extract_storage()
314 };
315
316 ArrowArray arr = make_arrow_array(
317 static_cast<std::int64_t>(size), // length
318 0,
319 0, // offset
320 std::move(arr_buffs),
321 new ArrowArray*[1]{new ArrowArray(std::move(entries_arr))},
322 children_ownership, // children ownership
323 nullptr, // dictionary
324 true // dictionary ownership
325 );
326 return arrow_proxy{std::move(arr), std::move(schema)};
327 }
328 }
329}
void struct_array()
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
Base class defining common immutable interface for arrays with a bitmap.
Base class for array type erasure.
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:39
Proxy class over ArrowArray and ArrowSchema.
Smart pointer behaving like a copiable std::unique_ptr.
Definition memory.hpp:126
constexpr size_type null_count() const noexcept
inner_types::inner_const_reference inner_const_reference
Definition map_array.hpp:66
inner_types::const_value_iterator const_value_iterator
Definition map_array.hpp:54
typename base_type::iterator_tag iterator_tag
Definition map_array.hpp:70
nullable< inner_value_type > value_type
Definition map_array.hpp:68
map_array(arrow_proxy proxy)
inner_types::inner_value_type inner_value_type
Definition map_array.hpp:64
map_array & operator=(const self_type &)
map_array(Args &&... args)
Definition map_array.hpp:76
map_array self_type
Definition map_array.hpp:50
const array_wrapper * raw_keys_array() const
map_array(const self_type &)
typename base_type::const_bitmap_range const_bitmap_range
Definition map_array.hpp:62
inner_types::value_iterator value_iterator
Definition map_array.hpp:53
static auto offset_from_sizes(SIZES_RANGE &&sizes) -> offset_buffer_type
map_array(map_array &&) noexcept=default
inner_types::inner_reference inner_reference
Definition map_array.hpp:65
const std::int32_t offset_type
Definition map_array.hpp:56
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
Definition map_array.hpp:57
array_bitmap_base< self_type > base_type
Definition map_array.hpp:51
typename base_type::bitmap_type bitmap_type
Definition map_array.hpp:59
const array_wrapper * raw_items_array() const
nullable< inner_const_reference, bitmap_const_reference > const_reference
Definition map_array.hpp:69
array_inner_types< self_type > inner_types
Definition map_array.hpp:52
typename base_type::size_type size_type
Definition map_array.hpp:55
typename base_type::bitmap_const_reference bitmap_const_reference
Definition map_array.hpp:60
The nullable class models a value or a reference that can be "null", or missing, like values traditio...
Definition nullable.hpp:278
This buffer class is use as storage buffer for all sparrow arrays.
Definition u8_buffer.hpp:75
#define SPARROW_API
Definition config.hpp:38
constexpr sparrow::u8_buffer< OFFSET_TYPE > offset_buffer_from_sizes(SIZES_RANGE &&sizes)
constexpr std::size_t size(typelist< T... >={})
Definition mp_utils.hpp:106
constexpr bool excludes_copy_and_move_ctor_v
Definition mp_utils.hpp:574
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
array_bitmap_base_impl< D, false > array_bitmap_base
Convenient typedef to be used as a crtp base class for arrays using an immutable validity buffer.
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArrays and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:91
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
dynamic_bitset< std::uint8_t > validity_bitmap
constexpr bool is_map_array_v
Definition map_array.hpp:44
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
std::pair< metadata_key, metadata_value > metadata_pair
Definition metadata.hpp:39
functor_index_iterator< detail::layout_value_functor< array_type, inner_value_type > > value_iterator
Definition map_array.hpp:37
functor_index_iterator< detail::layout_value_functor< const array_type, inner_value_type > > const_value_iterator
Definition map_array.hpp:38
std::random_access_iterator_tag iterator_tag
Definition map_array.hpp:40
Base class for array_inner_types specialization.
Traits class that must be specialized by array classes inheriting from array_crtp_base.