sparrow 1.0.0
Loading...
Searching...
No Matches
run_end_encoded_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include "sparrow/array_api.hpp"
23
24namespace sparrow
25{
27
28 namespace detail
29 {
30 template <>
32 {
33 [[nodiscard]] static constexpr sparrow::data_type get()
34 {
36 }
37 };
38 }
39
43 template <class T>
44 constexpr bool is_run_end_encoded_array_v = std::same_as<T, run_end_encoded_array>;
45
58 {
59 public:
60
62 using size_type = std::size_t;
68 using reverse_iterator = std::reverse_iterator<iterator>;
69 using const_reverse_iterator = std::reverse_iterator<const_iterator>;
70
82
95 template <class... Args>
97 explicit run_end_encoded_array(Args&&... args)
98 : run_end_encoded_array(create_proxy(std::forward<Args>(args)...))
99 {
100 }
101
112
125
128
135 [[nodiscard]] SPARROW_API array_traits::const_reference operator[](std::uint64_t i) const;
136
142 [[nodiscard]] SPARROW_API iterator begin();
143
144
150 [[nodiscard]] SPARROW_API iterator end();
151
157 [[nodiscard]] SPARROW_API const_iterator begin() const;
158
164 [[nodiscard]] SPARROW_API const_iterator end() const;
165
171 [[nodiscard]] SPARROW_API const_iterator cbegin() const;
172
178 [[nodiscard]] SPARROW_API const_iterator cend() const;
179
186
193
200
207
214
221
228
235
241 [[nodiscard]] SPARROW_API bool empty() const;
242
248 [[nodiscard]] SPARROW_API size_type size() const;
249
255 [[nodiscard]] std::optional<std::string_view> name() const;
256
262 [[nodiscard]] std::optional<key_value_view> metadata() const;
263
264 private:
265
276 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
277 [[nodiscard]] static auto create_proxy(
278 array&& acc_lengths,
279 array&& encoded_values,
280 std::optional<std::string_view> name = std::nullopt,
281 std::optional<METADATA_RANGE> metadata = std::nullopt
282 ) -> arrow_proxy;
283
284 using acc_length_ptr_variant_type = std::variant<const std::int16_t*, const std::int32_t*, const std::int64_t*>;
285
294 [[nodiscard]] SPARROW_API static std::pair<std::int64_t, std::int64_t>
295 extract_length_and_null_count(const array& acc_lengths_arr, const array& encoded_values_arr);
296
303 [[nodiscard]] SPARROW_API static acc_length_ptr_variant_type
304 get_acc_lengths_ptr(const array_wrapper& ar);
305
312 [[nodiscard]] SPARROW_API std::uint64_t get_acc_length(std::uint64_t run_index) const;
313
319 [[nodiscard]] SPARROW_API arrow_proxy& get_arrow_proxy();
320
326 [[nodiscard]] SPARROW_API const arrow_proxy& get_arrow_proxy() const;
327
329 arrow_proxy m_proxy;
331 std::uint64_t m_encoded_length;
332
334 cloning_ptr<array_wrapper> p_acc_lengths_array;
336 cloning_ptr<array_wrapper> p_encoded_values_array;
338 acc_length_ptr_variant_type m_acc_lengths;
339
340 // friend classes
341 friend class run_encoded_array_iterator<false>;
342 friend class run_encoded_array_iterator<true>;
344 };
345
348
349 template <input_metadata_container METADATA_RANGE>
350 auto run_end_encoded_array::create_proxy(
351 array&& acc_lengths,
352 array&& encoded_values,
353 std::optional<std::string_view> name,
354 std::optional<METADATA_RANGE> metadata
355 ) -> arrow_proxy
356 {
357 const auto flags = detail::array_access::get_arrow_proxy(encoded_values).flags();
358 auto [null_count, length] = extract_length_and_null_count(acc_lengths, encoded_values);
359
360 auto [acc_length_array, acc_length_schema] = extract_arrow_structures(std::move(acc_lengths));
361 auto [encoded_values_array, encoded_values_schema] = extract_arrow_structures(std::move(encoded_values));
362
363 constexpr auto n_children = 2;
364 ArrowSchema** child_schemas = new ArrowSchema*[n_children];
365 ArrowArray** child_arrays = new ArrowArray*[n_children];
366
367 child_schemas[0] = new ArrowSchema(std::move(acc_length_schema));
368 child_schemas[1] = new ArrowSchema(std::move(encoded_values_schema));
369
370 child_arrays[0] = new ArrowArray(std::move(acc_length_array));
371 child_arrays[1] = new ArrowArray(std::move(encoded_values_array));
372
373 const repeat_view<bool> children_ownserhip{true, n_children};
374
376 std::string("+r"),
377 std::move(name), // name
378 std::move(metadata), // metadata
379 flags, // flags,
380 child_schemas, // children
381 children_ownserhip, // children ownership
382 nullptr, // dictionary
383 true // dictionary ownership
384 );
385
386 std::vector<buffer<std::uint8_t>> arr_buffs = {};
387
388 ArrowArray arr = make_arrow_array(
389 static_cast<std::int64_t>(length), // length
390 static_cast<int64_t>(null_count),
391 0, // offset
392 std::move(arr_buffs),
393 child_arrays, // children
394 children_ownserhip, // children ownership
395 nullptr, // dictionary
396 true // dictionary ownership
397 );
398
399 return arrow_proxy{std::move(arr), std::move(schema)};
400 }
401} // namespace sparrow
402
403
404#if defined(__cpp_lib_format)
405
406template <>
407struct std::formatter<sparrow::run_end_encoded_array>
408{
409 constexpr auto parse(std::format_parse_context& ctx)
410 {
411 return ctx.begin(); // Simple implementation
412 }
413
414 SPARROW_API auto format(const sparrow::run_end_encoded_array& ar, std::format_context& ctx) const
415 -> decltype(ctx.out());
416};
417
418namespace sparrow
419{
420 SPARROW_API std::ostream& operator<<(std::ostream& os, const sparrow::run_end_encoded_array& value);
421}
422
423#endif
Base class for array type erasure.
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:40
SPARROW_API std::unordered_set< ArrowFlag > flags() const
Gets the Arrow flags set for this array.
Smart pointer behaving like a copiable std::unique_ptr.
Definition memory.hpp:126
static const sparrow::arrow_proxy & get_arrow_proxy(const ARRAY &array)
A view that repeats a value a given number of times.
SPARROW_API const_iterator end() const
Gets a constant iterator to the end of the array.
SPARROW_API const_reverse_iterator rend() const
Gets a constant reverse iterator to the end of the reversed array.
std::optional< key_value_view > metadata() const
Gets the metadata of the array.
array_traits::const_reference const_reference
std::reverse_iterator< iterator > reverse_iterator
run_encoded_array_iterator< true > const_iterator
array_traits::inner_value_type inner_value_type
SPARROW_API size_type size() const
Gets the number of elements in the array.
SPARROW_API array_traits::const_reference back() const
Gets a reference to the last element.
SPARROW_API bool empty() const
Checks if the array is empty.
SPARROW_API const_reverse_iterator rbegin() const
Gets a constant reverse iterator to the beginning of reversed the array.
SPARROW_API iterator end()
Gets an iterator to the end of the array.
SPARROW_API iterator begin()
Gets an iterator to the beginning of the array.
SPARROW_API const_iterator begin() const
Gets a constant iterator to the beginning of the array.
self_type & operator=(self_type &&)=default
SPARROW_API array_traits::const_reference operator[](std::uint64_t i) const
Constant access operator for getting element at index.
SPARROW_API run_end_encoded_array(const self_type &)
Copy constructor.
run_end_encoded_array(self_type &&)=default
SPARROW_API run_end_encoded_array(arrow_proxy proxy)
Constructs run-end encoded array from Arrow proxy.
std::optional< std::string_view > name() const
Gets the name of the array.
std::reverse_iterator< const_iterator > const_reverse_iterator
SPARROW_API const_reverse_iterator crbegin() const
Gets a constant reverse iterator to the beginning of reversed the array.
SPARROW_API reverse_iterator rend()
Gets a reverse iterator to the end of the reversed array.
SPARROW_API array_traits::const_reference front() const
Gets a constant reference to the first element.
SPARROW_API const_iterator cend() const
Gets a constant iterator to the end of the array.
SPARROW_API self_type & operator=(const self_type &)
Copy assignment operator.
run_end_encoded_array(Args &&... args)
Generic constructor for creating run-end encoded array.
SPARROW_API reverse_iterator rbegin()
Gets a reverse iterator to the beginning of the reversed array.
SPARROW_API const_reverse_iterator crend() const
Gets a constant reverse iterator to the end of the reversed array.
SPARROW_API const_iterator cbegin() const
Gets a constant iterator to the beginning of the array.
run_encoded_array_iterator< false > iterator
#define SPARROW_API
Definition config.hpp:38
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
constexpr bool is_run_end_encoded_array_v
Checks whether T is a run_end_encoded_array type.
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:91
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
std::ostream & operator<<(std::ostream &os, const sparrow::nullval_t &)
mpl::rename< mpl::unique< mpl::transform< detail::array_const_reference_t, all_base_types_t > >, nullable_variant > const_reference
mpl::rename< all_base_types_t, std::variant > inner_value_type
mpl::rename< mpl::transform< detail::array_value_type_t, all_base_types_t >, nullable_variant > value_type
Metafunction for retrieving the data_type of a typed array.