sparrow 0.6.0
Loading...
Searching...
No Matches
run_end_encoded_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include "sparrow/array_api.hpp"
23
24namespace sparrow
25{
27
31 template <class T>
32 constexpr bool is_run_end_encoded_array_v = std::same_as<T, run_end_encoded_array>;
33
34 namespace detail
35 {
36 template <class T>
37 struct get_data_type_from_array;
38
39 template <>
41 {
42 [[nodiscard]] static constexpr sparrow::data_type get()
43 {
45 }
46 };
47 }
48
50 {
51 public:
52
54 using size_type = std::size_t;
58
60
61 template <class... Args>
63 explicit run_end_encoded_array(Args&&... args)
64 : run_end_encoded_array(create_proxy(std::forward<Args>(args)...))
65 {
66 }
67
70
73
75 [[nodiscard]] SPARROW_API array_traits::const_reference operator[](std::uint64_t i) const;
76
77 [[nodiscard]] SPARROW_API iterator begin();
78 [[nodiscard]] SPARROW_API iterator end();
79
80 [[nodiscard]] SPARROW_API const_iterator begin() const;
81 [[nodiscard]] SPARROW_API const_iterator end() const;
82
83 [[nodiscard]] SPARROW_API const_iterator cbegin() const;
84 [[nodiscard]] SPARROW_API const_iterator cend() const;
85
88
89 [[nodiscard]] SPARROW_API bool empty() const;
90 [[nodiscard]] SPARROW_API size_type size() const;
91
92 [[nodiscard]] std::optional<std::string_view> name() const;
93 [[nodiscard]] std::optional<key_value_view> metadata() const;
94
95 private:
96
97 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
98 [[nodiscard]] static auto create_proxy(
99 array&& acc_lengths,
100 array&& encoded_values,
101 std::optional<std::string_view> name = std::nullopt,
102 std::optional<METADATA_RANGE> metadata = std::nullopt
103 ) -> arrow_proxy;
104
105 using acc_length_ptr_variant_type = std::variant<const std::uint16_t*, const std::uint32_t*, const std::uint64_t*>;
106
107 [[nodiscard]] SPARROW_API static std::pair<std::int64_t, std::int64_t>
108 extract_length_and_null_count(const array&, const array&);
109 [[nodiscard]] SPARROW_API static acc_length_ptr_variant_type
110 get_acc_lengths_ptr(const array_wrapper& ar);
111 [[nodiscard]] SPARROW_API std::uint64_t get_run_length(std::uint64_t run_index) const;
112
113 [[nodiscard]] SPARROW_API arrow_proxy& get_arrow_proxy();
114 [[nodiscard]] SPARROW_API const arrow_proxy& get_arrow_proxy() const;
115
116 arrow_proxy m_proxy;
117 std::uint64_t m_encoded_length;
118
119 cloning_ptr<array_wrapper> p_acc_lengths_array;
120 cloning_ptr<array_wrapper> p_encoded_values_array;
121 acc_length_ptr_variant_type m_acc_lengths;
122
123 // friend classes
124 friend class run_encoded_array_iterator<false>;
125 friend class run_encoded_array_iterator<true>;
127 };
128
131
132 template <input_metadata_container METADATA_RANGE>
133 auto run_end_encoded_array::create_proxy(
134 array&& acc_lengths,
135 array&& encoded_values,
136 std::optional<std::string_view> name,
137 std::optional<METADATA_RANGE> metadata
138 ) -> arrow_proxy
139 {
140 auto [null_count, length] = extract_length_and_null_count(acc_lengths, encoded_values);
141
142 auto [acc_length_array, acc_length_schema] = extract_arrow_structures(std::move(acc_lengths));
143 auto [encoded_values_array, encoded_values_schema] = extract_arrow_structures(std::move(encoded_values));
144
145 constexpr auto n_children = 2;
146 ArrowSchema** child_schemas = new ArrowSchema*[n_children];
147 ArrowArray** child_arrays = new ArrowArray*[n_children];
148
149 child_schemas[0] = new ArrowSchema(std::move(acc_length_schema));
150 child_schemas[1] = new ArrowSchema(std::move(encoded_values_schema));
151
152 child_arrays[0] = new ArrowArray(std::move(acc_length_array));
153 child_arrays[1] = new ArrowArray(std::move(encoded_values_array));
154
155 const repeat_view<bool> children_ownserhip{true, n_children};
156
158 std::string("+r"),
159 std::move(name), // name
160 std::move(metadata), // metadata
161 std::nullopt, // flags,
162 child_schemas, // children
163 children_ownserhip, // children ownership
164 nullptr, // dictionary
165 true // dictionary ownership
166 );
167
168 std::vector<buffer<std::uint8_t>> arr_buffs = {};
169
170 ArrowArray arr = make_arrow_array(
171 static_cast<std::int64_t>(length), // length
172 static_cast<int64_t>(null_count),
173 0, // offset
174 std::move(arr_buffs),
175 child_arrays, // children
176 children_ownserhip, // children ownership
177 nullptr, // dictionary
178 true // dictionary ownership
179 );
180
181 return arrow_proxy{std::move(arr), std::move(schema)};
182 }
183} // namespace sparrow
184
185
186#if defined(__cpp_lib_format)
187
188template <>
189struct std::formatter<sparrow::run_end_encoded_array>
190{
191 constexpr auto parse(std::format_parse_context& ctx)
192 {
193 return ctx.begin(); // Simple implementation
194 }
195
196 SPARROW_API auto format(const sparrow::run_end_encoded_array& ar, std::format_context& ctx) const
197 -> decltype(ctx.out());
198};
199
200namespace sparrow
201{
202 SPARROW_API std::ostream& operator<<(std::ostream& os, const sparrow::run_end_encoded_array& value);
203}
204
205#endif
Base class for array type erasure.
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:39
Proxy class over ArrowArray and ArrowSchema.
Smart pointer behaving like a copiable std::unique_ptr.
Definition memory.hpp:127
A view that repeats a value a given number of times.
SPARROW_API const_iterator end() const
SPARROW_API array_traits::const_reference operator[](std::uint64_t i)
std::optional< key_value_view > metadata() const
run_encoded_array_iterator< true > const_iterator
array_traits::inner_value_type inner_value_type
SPARROW_API size_type size() const
SPARROW_API array_traits::const_reference back() const
SPARROW_API bool empty() const
SPARROW_API iterator end()
SPARROW_API iterator begin()
SPARROW_API const_iterator begin() const
self_type & operator=(self_type &&)=default
SPARROW_API array_traits::const_reference operator[](std::uint64_t i) const
SPARROW_API run_end_encoded_array(const self_type &)
run_end_encoded_array(self_type &&)=default
SPARROW_API run_end_encoded_array(arrow_proxy proxy)
std::optional< std::string_view > name() const
SPARROW_API array_traits::const_reference front() const
SPARROW_API const_iterator cend() const
SPARROW_API self_type & operator=(const self_type &)
SPARROW_API const_iterator cbegin() const
run_encoded_array_iterator< false > iterator
#define SPARROW_API
Definition config.hpp:38
constexpr bool excludes_copy_and_move_ctor_v
Definition mp_utils.hpp:507
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
constexpr bool is_run_end_encoded_array_v
Checks whether T is a run_end_encoded_array type.
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArrays and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:91
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
std::ostream & operator<<(std::ostream &stream, T n)
Definition large_int.hpp:93
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
mpl::rename< mpl::transform< detail::array_const_reference_t, all_base_types_t >, nullable_variant > const_reference
mpl::rename< all_base_types_t, std::variant > inner_value_type