sparrow 0.9.0
Loading...
Searching...
No Matches
run_end_encoded_array.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include "sparrow/array_api.hpp"
23
24namespace sparrow
25{
27
28 namespace detail
29 {
30 template <>
32 {
33 [[nodiscard]] static constexpr sparrow::data_type get()
34 {
36 }
37 };
38 }
39
43 template <class T>
44 constexpr bool is_run_end_encoded_array_v = std::same_as<T, run_end_encoded_array>;
45
58 {
59 public:
60
62 using size_type = std::size_t;
68
70
71 template <class... Args>
73 explicit run_end_encoded_array(Args&&... args)
74 : run_end_encoded_array(create_proxy(std::forward<Args>(args)...))
75 {
76 }
77
80
83
85 [[nodiscard]] SPARROW_API array_traits::const_reference operator[](std::uint64_t i) const;
86
87 [[nodiscard]] SPARROW_API iterator begin();
88 [[nodiscard]] SPARROW_API iterator end();
89
90 [[nodiscard]] SPARROW_API const_iterator begin() const;
91 [[nodiscard]] SPARROW_API const_iterator end() const;
92
93 [[nodiscard]] SPARROW_API const_iterator cbegin() const;
94 [[nodiscard]] SPARROW_API const_iterator cend() const;
95
98
99 [[nodiscard]] SPARROW_API bool empty() const;
100 [[nodiscard]] SPARROW_API size_type size() const;
101
102 [[nodiscard]] std::optional<std::string_view> name() const;
103 [[nodiscard]] std::optional<key_value_view> metadata() const;
104
105 private:
106
107 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
108 [[nodiscard]] static auto create_proxy(
109 array&& acc_lengths,
110 array&& encoded_values,
111 std::optional<std::string_view> name = std::nullopt,
112 std::optional<METADATA_RANGE> metadata = std::nullopt
113 ) -> arrow_proxy;
114
115 using acc_length_ptr_variant_type = std::variant<const std::int16_t*, const std::int32_t*, const std::int64_t*>;
116
117 [[nodiscard]] SPARROW_API static std::pair<std::int64_t, std::int64_t>
118 extract_length_and_null_count(const array&, const array&);
119 [[nodiscard]] SPARROW_API static acc_length_ptr_variant_type
120 get_acc_lengths_ptr(const array_wrapper& ar);
121 [[nodiscard]] SPARROW_API std::uint64_t get_run_length(std::uint64_t run_index) const;
122
123 [[nodiscard]] SPARROW_API arrow_proxy& get_arrow_proxy();
124 [[nodiscard]] SPARROW_API const arrow_proxy& get_arrow_proxy() const;
125
126 arrow_proxy m_proxy;
127 std::uint64_t m_encoded_length;
128
129 cloning_ptr<array_wrapper> p_acc_lengths_array;
130 cloning_ptr<array_wrapper> p_encoded_values_array;
131 acc_length_ptr_variant_type m_acc_lengths;
132
133 // friend classes
134 friend class run_encoded_array_iterator<false>;
135 friend class run_encoded_array_iterator<true>;
137 };
138
141
142 template <input_metadata_container METADATA_RANGE>
143 auto run_end_encoded_array::create_proxy(
144 array&& acc_lengths,
145 array&& encoded_values,
146 std::optional<std::string_view> name,
147 std::optional<METADATA_RANGE> metadata
148 ) -> arrow_proxy
149 {
150 const auto flags = detail::array_access::get_arrow_proxy(encoded_values).flags();
151 auto [null_count, length] = extract_length_and_null_count(acc_lengths, encoded_values);
152
153 auto [acc_length_array, acc_length_schema] = extract_arrow_structures(std::move(acc_lengths));
154 auto [encoded_values_array, encoded_values_schema] = extract_arrow_structures(std::move(encoded_values));
155
156 constexpr auto n_children = 2;
157 ArrowSchema** child_schemas = new ArrowSchema*[n_children];
158 ArrowArray** child_arrays = new ArrowArray*[n_children];
159
160 child_schemas[0] = new ArrowSchema(std::move(acc_length_schema));
161 child_schemas[1] = new ArrowSchema(std::move(encoded_values_schema));
162
163 child_arrays[0] = new ArrowArray(std::move(acc_length_array));
164 child_arrays[1] = new ArrowArray(std::move(encoded_values_array));
165
166 const repeat_view<bool> children_ownserhip{true, n_children};
167
169 std::string("+r"),
170 std::move(name), // name
171 std::move(metadata), // metadata
172 flags, // flags,
173 child_schemas, // children
174 children_ownserhip, // children ownership
175 nullptr, // dictionary
176 true // dictionary ownership
177 );
178
179 std::vector<buffer<std::uint8_t>> arr_buffs = {};
180
181 ArrowArray arr = make_arrow_array(
182 static_cast<std::int64_t>(length), // length
183 static_cast<int64_t>(null_count),
184 0, // offset
185 std::move(arr_buffs),
186 child_arrays, // children
187 children_ownserhip, // children ownership
188 nullptr, // dictionary
189 true // dictionary ownership
190 );
191
192 return arrow_proxy{std::move(arr), std::move(schema)};
193 }
194} // namespace sparrow
195
196
197#if defined(__cpp_lib_format)
198
199template <>
200struct std::formatter<sparrow::run_end_encoded_array>
201{
202 constexpr auto parse(std::format_parse_context& ctx)
203 {
204 return ctx.begin(); // Simple implementation
205 }
206
207 SPARROW_API auto format(const sparrow::run_end_encoded_array& ar, std::format_context& ctx) const
208 -> decltype(ctx.out());
209};
210
211namespace sparrow
212{
213 SPARROW_API std::ostream& operator<<(std::ostream& os, const sparrow::run_end_encoded_array& value);
214}
215
216#endif
Base class for array type erasure.
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:39
SPARROW_API std::unordered_set< ArrowFlag > flags() const
Gets the Arrow flags set for this array.
Smart pointer behaving like a copiable std::unique_ptr.
Definition memory.hpp:126
static const sparrow::arrow_proxy & get_arrow_proxy(const ARRAY &array)
A view that repeats a value a given number of times.
SPARROW_API const_iterator end() const
SPARROW_API array_traits::const_reference operator[](std::uint64_t i)
std::optional< key_value_view > metadata() const
array_traits::const_reference const_reference
run_encoded_array_iterator< true > const_iterator
array_traits::inner_value_type inner_value_type
SPARROW_API size_type size() const
SPARROW_API array_traits::const_reference back() const
SPARROW_API bool empty() const
SPARROW_API iterator end()
SPARROW_API iterator begin()
SPARROW_API const_iterator begin() const
self_type & operator=(self_type &&)=default
SPARROW_API array_traits::const_reference operator[](std::uint64_t i) const
SPARROW_API run_end_encoded_array(const self_type &)
run_end_encoded_array(self_type &&)=default
SPARROW_API run_end_encoded_array(arrow_proxy proxy)
std::optional< std::string_view > name() const
SPARROW_API array_traits::const_reference front() const
SPARROW_API const_iterator cend() const
SPARROW_API self_type & operator=(const self_type &)
SPARROW_API const_iterator cbegin() const
run_encoded_array_iterator< false > iterator
#define SPARROW_API
Definition config.hpp:38
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
constexpr bool is_run_end_encoded_array_v
Checks whether T is a run_end_encoded_array type.
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
Definition array.hpp:91
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
std::ostream & operator<<(std::ostream &os, const sparrow::nullval_t &)
mpl::rename< mpl::unique< mpl::transform< detail::array_const_reference_t, all_base_types_t > >, nullable_variant > const_reference
mpl::rename< all_base_types_t, std::variant > inner_value_type
mpl::rename< mpl::transform< detail::array_value_type_t, all_base_types_t >, nullable_variant > value_type
Metafunction for retrieving the data_type of a typed array.