sparrow 2.4.0
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
arrow_array_schema_proxy.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#pragma once
16
17#include <cstdint>
18#include <iterator>
19#include <optional>
20#include <ranges>
21#include <string>
22#include <string_view>
23#include <unordered_set>
24
36
37namespace sparrow
38{
45 class arrow_proxy_exception : public std::runtime_error
46 {
47 public:
48
54 explicit arrow_proxy_exception(const std::string& message)
55 : std::runtime_error(message)
56 {
57 }
58 };
59
65
71
111 {
112 public:
113
116
130
145
160
176
192
205
218
230
243
252
261 [[nodiscard]] SPARROW_API const std::string_view format() const;
262
275 SPARROW_API void set_format(const std::string_view format);
276
284 [[nodiscard]] SPARROW_API enum data_type data_type() const;
285
299
308 [[nodiscard]] SPARROW_API std::optional<std::string_view> name() const;
309
321 SPARROW_API void set_name(std::optional<std::string_view> name);
322
331 [[nodiscard]] SPARROW_API std::optional<key_value_view> metadata() const;
332
346 template <input_metadata_container R>
347 void set_metadata(std::optional<R> metadata)
348 {
349 if (!schema_created_with_sparrow())
350 {
351 throw arrow_proxy_exception("Cannot set metadata on non-sparrow created ArrowArray");
352 }
354 if (!metadata.has_value())
355 {
356 private_data->metadata() = std::nullopt;
357 }
358 else
359 {
361 }
362 schema().metadata = private_data->metadata_ptr();
363 }
364
373 [[nodiscard]] SPARROW_API std::unordered_set<ArrowFlag> flags() const;
374
387 SPARROW_API void set_flags(const std::unordered_set<ArrowFlag>& flags);
388
397 [[nodiscard]] SPARROW_API size_t length() const;
398
415
424 [[nodiscard]] SPARROW_API int64_t null_count() const;
425
442
451 [[nodiscard]] SPARROW_API size_t offset() const;
452
467
476 [[nodiscard]] SPARROW_API size_t n_buffers() const;
477
492
501 [[nodiscard]] SPARROW_API size_t n_children() const;
502
512 [[nodiscard]] SPARROW_API const std::vector<sparrow::buffer_view<uint8_t>>& buffers() const;
513
523 [[nodiscard]] SPARROW_API std::vector<sparrow::buffer_view<uint8_t>>& buffers();
524
542
561
578 SPARROW_API void resize_bitmap(size_t new_size, bool value = true);
579
600 SPARROW_API size_t insert_bitmap(size_t index, bool value, size_t count = 1);
601
624 template <std::ranges::input_range R>
625 size_t insert_bitmap(size_t index, const R& range);
626
647 SPARROW_API size_t erase_bitmap(size_t index, size_t count = 1);
648
663
677
684 template <std::ranges::input_range R>
685 requires std::same_as<std::ranges::range_value_t<R>, arrow_array_and_schema_pointers>
687
694 template <std::ranges::input_range R>
695 requires std::same_as<std::ranges::range_value_t<R>, arrow_array_and_schema>
696 void add_children(R&& arrow_array_and_schemas);
697
706
715
724
730 SPARROW_API void pop_children(size_t n);
731
742
752 SPARROW_API void set_child(size_t index, const ArrowArray* array, const ArrowSchema* schema);
753
765
766
777 [[nodiscard]] SPARROW_API const std::vector<arrow_proxy>& children() const;
778
789 [[nodiscard]] SPARROW_API std::vector<arrow_proxy>& children();
790
801 [[nodiscard]] SPARROW_API const std::unique_ptr<arrow_proxy>& dictionary() const;
802
813 [[nodiscard]] SPARROW_API std::unique_ptr<arrow_proxy>& dictionary();
814
824
834
844 SPARROW_API void set_dictionary(ArrowArray&& array_dictionary, ArrowSchema&& schema_dictionary);
845
849 [[nodiscard]] SPARROW_API bool is_created_with_sparrow() const;
850
851 [[nodiscard]] SPARROW_API void* private_data() const;
852
856 [[nodiscard]] SPARROW_API arrow_proxy view() const;
857
861 [[nodiscard]] SPARROW_API bool is_view() const noexcept;
862
866 [[nodiscard]] SPARROW_API bool owns_array() const;
867
877
885 [[nodiscard]] SPARROW_API ArrowArray& array();
886
894 [[nodiscard]] SPARROW_API const ArrowArray& array() const;
895
899 [[nodiscard]] SPARROW_API bool owns_schema() const;
900
910
919
927 [[nodiscard]] SPARROW_API const ArrowSchema& schema() const;
928
931
946 [[nodiscard]] SPARROW_API arrow_proxy slice(size_t start, size_t end) const;
947
964 [[nodiscard]] SPARROW_API arrow_proxy slice_view(size_t start, size_t end) const;
965
979 SPARROW_API void slice_inplace(size_t start, size_t end);
980
985
989 [[nodiscard]] SPARROW_API bool is_array_const() const;
990
994 [[nodiscard]] SPARROW_API bool is_schema_const() const;
995
996 [[nodiscard]] SPARROW_API std::optional<bitmap_type>& bitmap();
997
998 [[nodiscard]] SPARROW_API const std::optional<const_bitmap_type>& const_bitmap() const;
999
1000 private:
1001
1002 std::variant<ArrowArray*, ArrowArray> m_array;
1003 std::variant<ArrowSchema*, ArrowSchema> m_schema;
1004 std::vector<sparrow::buffer_view<uint8_t>> m_buffers;
1005 std::vector<arrow_proxy> m_children;
1006 std::unique_ptr<arrow_proxy> m_dictionary;
1007 bool m_array_is_immutable = false;
1008 bool m_schema_is_immutable = false;
1009 bool m_is_dictionary_immutable = false;
1010 std::vector<bool> m_children_array_immutable;
1011 std::vector<bool> m_children_schema_immutable;
1012 std::optional<bitmap_type> m_null_bitmap;
1013 std::optional<const_bitmap_type> m_const_bitmap;
1014
1015 struct impl_tag
1016 {
1017 };
1018
1019 // Build an empty proxy. Convenient for resizing vector of children
1020 arrow_proxy();
1021
1022 template <typename AA, typename AS>
1023 requires std::same_as<std::remove_const_t<std::remove_pointer_t<std::remove_cvref_t<AA>>>, ArrowArray>
1024 && std::same_as<std::remove_const_t<std::remove_pointer_t<std::remove_cvref_t<AS>>>, ArrowSchema>
1025 arrow_proxy(AA&& array, AS&& schema, impl_tag);
1026
1027 [[nodiscard]] bool empty() const;
1028 SPARROW_API void resize_children(size_t children_count);
1029
1030 void update_children();
1031 void update_dictionary();
1032 void update_null_count();
1033 void reset();
1034 void remove_dictionary();
1035 void remove_child(size_t index);
1036 void create_bitmap_view(std::optional<size_t> null_count = std::nullopt);
1037
1038 [[nodiscard]] bool array_created_with_sparrow() const;
1039 [[nodiscard]] SPARROW_API bool schema_created_with_sparrow() const;
1040
1041 void validate_array_and_schema() const;
1042
1043 [[nodiscard]] bool is_arrow_array_valid() const;
1044 [[nodiscard]] bool is_arrow_schema_valid() const;
1045 [[nodiscard]] bool is_proxy_valid() const;
1046
1047 [[nodiscard]] size_t get_null_count() const;
1048
1049 [[nodiscard]] ArrowArray& array_without_sanitize();
1050 [[nodiscard]] const ArrowArray& array_without_sanitize() const;
1051
1052 [[nodiscard]] ArrowSchema& schema_without_sanitize();
1053 [[nodiscard]] const ArrowSchema& schema_without_sanitize() const;
1054
1059 void sanitize_schema();
1060
1061 void swap(arrow_proxy& other) noexcept;
1062
1063 template <const char* function_name, bool check_array_is_mutable, bool check_schema_is_mutable>
1064 void throw_if_immutable() const;
1065 };
1066
1067 template <std::ranges::input_range R>
1068 requires std::same_as<std::ranges::range_value_t<R>, arrow_array_and_schema_pointers>
1070 {
1071 static constexpr const char function_name[] = "add_children";
1072 throw_if_immutable<function_name, true, true>();
1073 const size_t add_children_count = std::ranges::size(arrow_array_and_schema_pointers);
1074 const size_t original_children_count = n_children();
1075 const size_t new_children_count = original_children_count + add_children_count;
1076
1077 resize_children(new_children_count);
1078 for (size_t i = 0; i < add_children_count; ++i)
1079 {
1080 set_child(
1081 i + original_children_count,
1084 );
1085 }
1086 }
1087
1088 template <std::ranges::input_range R>
1089 requires std::same_as<std::ranges::range_value_t<R>, arrow_array_and_schema>
1090 void arrow_proxy::add_children(R&& arrow_arrays_and_schemas)
1091 {
1093 {
1094 throw arrow_proxy_exception("Cannot set n_buffers on non-sparrow created ArrowArray or ArrowSchema");
1095 }
1096
1097 const size_t add_children_count = std::ranges::size(arrow_arrays_and_schemas);
1098 const size_t original_children_count = n_children();
1099 const size_t new_children_count = original_children_count + add_children_count;
1100
1101 resize_children(new_children_count);
1102 for (size_t i = 0; i < add_children_count; ++i)
1103 {
1104 set_child(
1105 i + original_children_count,
1106 std::move(arrow_arrays_and_schemas[i].array),
1107 std::move(arrow_arrays_and_schemas[i].schema)
1108 );
1109 }
1110 }
1111
1112 template <std::ranges::input_range R>
1113 inline size_t arrow_proxy::insert_bitmap(size_t index, const R& range)
1114 {
1115 static constexpr const char function_name[] = "insert_bitmap";
1116 throw_if_immutable<function_name, true, false>();
1117 SPARROW_ASSERT_TRUE(m_null_bitmap.has_value())
1118 const auto it = m_null_bitmap->insert(
1119 sparrow::next(m_null_bitmap->cbegin(), index),
1120 range.begin(),
1121 range.end()
1122 );
1123 set_null_count(static_cast<int64_t>(m_null_bitmap->null_count()));
1124 m_const_bitmap = const_bitmap_type(
1125 m_null_bitmap->data(),
1126 m_null_bitmap->size(),
1127 static_cast<size_t>(m_null_bitmap->offset()),
1128 static_cast<size_t>(m_null_bitmap->null_count())
1129 );
1131 return static_cast<size_t>(std::distance(m_null_bitmap->begin(), it));
1132 }
1133
1134 template <typename AA, typename AS>
1135 requires std::same_as<std::remove_const_t<std::remove_pointer_t<std::remove_cvref_t<AA>>>, ArrowArray>
1136 && std::same_as<std::remove_const_t<std::remove_pointer_t<std::remove_cvref_t<AS>>>, ArrowSchema>
1137 arrow_proxy::arrow_proxy(AA&& array, AS&& schema, impl_tag)
1138 {
1139 if constexpr (std::is_const_v<std::remove_pointer_t<std::remove_reference_t<AA>>>)
1140 {
1141 m_array_is_immutable = true;
1142 m_array = const_cast<ArrowArray*>(array);
1143 }
1144 else
1145 {
1146 m_array = std::forward<AA>(array);
1147 }
1148
1149 if constexpr (std::is_const_v<std::remove_pointer_t<std::remove_reference_t<AS>>>)
1150 {
1151 m_schema_is_immutable = true;
1152 m_schema = const_cast<ArrowSchema*>(schema);
1153 }
1154 else
1155 {
1156 m_schema = std::forward<AS>(schema);
1157 }
1158
1159 if constexpr (std::is_rvalue_reference_v<AA&&>)
1160 {
1161 array = {};
1162 }
1163 else if constexpr (std::is_pointer_v<std::remove_cvref_t<AA>>)
1164 {
1165 SPARROW_ASSERT_TRUE(array != nullptr);
1166 }
1167
1168 if constexpr (std::is_rvalue_reference_v<AS&&>)
1169 {
1170 schema = {};
1171 }
1172 else if constexpr (std::is_pointer_v<std::remove_cvref_t<AS>>)
1173 {
1174 SPARROW_ASSERT_TRUE(schema != nullptr);
1175 }
1176
1177 m_children_array_immutable = std::vector<bool>(n_children(), m_array_is_immutable);
1178 m_children_schema_immutable = std::vector<bool>(n_children(), m_schema_is_immutable);
1179 validate_array_and_schema();
1181 update_children();
1182 update_dictionary();
1183 create_bitmap_view();
1184 }
1185
1186 template <const char* function_name, bool check_array_is_mutable, bool check_schema_is_mutable>
1187 void arrow_proxy::throw_if_immutable() const
1188 {
1189 static const std::string cannot_call = "Cannot call ";
1191 {
1192 auto error_message = cannot_call + std::string(function_name)
1193 + " on non-sparrow created ArrowArray or ArrowSchema";
1194 throw arrow_proxy_exception(error_message);
1195 }
1196 if constexpr (check_array_is_mutable || check_schema_is_mutable)
1197 {
1198 if (m_array_is_immutable || m_schema_is_immutable)
1199 {
1200 {
1201 std::string error_message = cannot_call + std::string(function_name);
1202 if constexpr (check_array_is_mutable && !check_schema_is_mutable)
1203 {
1204 if (m_array_is_immutable)
1205 {
1206 error_message += " on an immutable ArrowArray. You may have passed a const ArrowArray* at the creation.";
1207 }
1208 }
1209 else if constexpr (check_schema_is_mutable && !check_array_is_mutable)
1210 {
1211 if (m_schema_is_immutable)
1212 {
1213 error_message += " on an immutable ArrowSchema. You may have passed a const ArrowSchema* at the creation.";
1214 }
1215 }
1216 else if constexpr (check_array_is_mutable && check_schema_is_mutable)
1217 {
1218 if (m_array_is_immutable && m_schema_is_immutable)
1219 {
1220 error_message += " on an immutable ArrowArray and ArrowSchema. You may have passed const ArrowArray* and const ArrowSchema* at the creation.";
1221 }
1222 }
1223 throw arrow_proxy_exception(error_message);
1224 }
1225 }
1226 }
1227 }
1228}
1229
1230#if defined(__cpp_lib_format)
1231
1232template <>
1233struct std::formatter<sparrow::buffer_view<uint8_t>>
1234{
1235private:
1236
1237 char delimiter = ' ';
1238 static constexpr std::string_view opening = "[";
1239 static constexpr std::string_view closing = "]";
1240
1241public:
1242
1243 constexpr auto parse(std::format_parse_context& ctx)
1244 {
1245 auto it = ctx.begin();
1246 auto end = ctx.end();
1247
1248 // Parse optional delimiter
1249 if (it != end && *it != '}')
1250 {
1251 delimiter = *it++;
1252 }
1253
1254 if (it != end && *it != '}')
1255 {
1256 throw std::format_error("Invalid format specifier for range");
1257 }
1258
1259 return it;
1260 }
1261
1262 auto format(const sparrow::buffer_view<uint8_t>& range, std::format_context& ctx) const
1263 {
1264 auto out = ctx.out();
1265
1266 // Write opening bracket
1267 out = sparrow::ranges::copy(opening, out).out;
1268
1269 // Write range elements
1270 bool first = true;
1271 for (const auto& elem : range)
1272 {
1273 if (!first)
1274 {
1275 *out++ = delimiter;
1276 }
1277 out = std::format_to(out, "{}", elem);
1278 first = false;
1279 }
1280
1281 // Write closing bracket
1282 out = sparrow::ranges::copy(closing, out).out;
1283
1284 return out;
1285 }
1286};
1287
1288namespace sparrow
1289{
1290 inline std::ostream& operator<<(std::ostream& os, const buffer_view<uint8_t>& value)
1291 {
1292 os << std::format("{}", value);
1293 return os;
1294 }
1295}
1296
1297template <>
1298struct std::formatter<sparrow::arrow_proxy>
1299{
1300 constexpr auto parse(std::format_parse_context& ctx)
1301 {
1302 return ctx.begin(); // Simple implementation
1303 }
1304
1305 auto format(const sparrow::arrow_proxy& obj, std::format_context& ctx) const
1306 {
1307 std::string buffers_description_str;
1308 for (size_t i = 0; i < obj.n_buffers(); ++i)
1309 {
1310 std::format_to(
1311 std::back_inserter(buffers_description_str),
1312 "<{}[{} b]{}",
1313 "uint8_t",
1314 obj.buffers()[i].size() * sizeof(uint8_t),
1315 obj.buffers()[i]
1316 );
1317 }
1318
1319 std::string children_str;
1320 for (const auto& child : obj.children())
1321 {
1322 std::format_to(std::back_inserter(children_str), "{}\n", child);
1323 }
1324
1325 const std::string dictionary_str = obj.dictionary() ? std::format("{}", *obj.dictionary()) : "nullptr";
1326 const std::string metadata_str = obj.metadata() ? std::format("{}", *obj.metadata()) : "nullptr";
1327
1328 return std::format_to(
1329 ctx.out(),
1330 "arrow_proxy\n- format: {}\n- name; {}\n- metadata: {}\n- data_type: {}\n- null_count:{}\n- length: {}\n- offset: {}\n- n_buffers: {}\n- buffers:\n{}\n- n_children: {}\n-children: {}\n- dictionary: {}",
1331 obj.format(),
1332 obj.name().value_or(""),
1333 metadata_str,
1334 obj.data_type(),
1335 obj.null_count(),
1336 obj.length(),
1337 obj.offset(),
1338 obj.n_buffers(),
1339 buffers_description_str,
1340 obj.n_children(),
1341 children_str,
1342 dictionary_str
1343 );
1344 }
1345};
1346
1347namespace sparrow
1348{
1349 inline std::ostream& operator<<(std::ostream& os, const arrow_proxy& value)
1350 {
1351 os << std::format("{}", value);
1352 return os;
1353 }
1354}
1355
1356#endif
Dynamically typed array encapsulating an Arrow layout.
Definition array_api.hpp:50
Private data for ArrowArray.
Exception thrown by arrow_proxy operations.
arrow_proxy_exception(const std::string &message)
Constructs an arrow_proxy_exception with a descriptive message.
SPARROW_API void push_back_bitmap(bool value)
Appends a validity bit at the end of the bitmap.
SPARROW_API ArrowArray extract_array()
Extract the ArrowArray from the proxy, and transfers the responsibility to release it after usage to ...
SPARROW_API void add_child(const ArrowArray *array, const ArrowSchema *schema)
Add a child without taking its ownership.
SPARROW_API void add_child(ArrowArray *array, ArrowSchema *schema)
Add a child without taking its ownership.
SPARROW_API arrow_proxy(ArrowArray &&array, ArrowSchema *schema)
Constructs an arrow_proxy taking ownership of ArrowArray, referencing ArrowSchema.
SPARROW_API void set_buffer(size_t index, buffer< uint8_t > &&buffer)
Sets a specific buffer by moving it at the given index.
SPARROW_API ArrowSchema & schema()
Get a reference to the ArrowSchema of the proxy.
SPARROW_API const std::unique_ptr< arrow_proxy > & dictionary() const
Returns a constant reference to the dictionary arrow proxy.
SPARROW_API std::unordered_set< ArrowFlag > flags() const
Gets the Arrow flags set for this array.
void SPARROW_API set_data_type(enum data_type data_type)
Sets the data type (updates format string accordingly).
SPARROW_API bool owns_schema() const
Check whether the proxy has ownership of its internal the ArrowSchema.
SPARROW_API std::vector< arrow_proxy > & children()
Returns a mutable reference to the vector of child arrow proxies.
SPARROW_API void add_child(ArrowArray &&array, ArrowSchema &&schema)
Add a child and takes its ownership.
SPARROW_API arrow_proxy & operator=(const arrow_proxy &other)
Copy assignment operator.
SPARROW_API bool is_created_with_sparrow() const
Check if the ArrowArray and ArrowSchema were created with sparrow.
SPARROW_API size_t offset() const
Gets the starting offset within the buffers.
SPARROW_API arrow_proxy(ArrowArray &&array, ArrowSchema &&schema)
Constructs an arrow_proxy taking ownership of both ArrowArray and ArrowSchema.
SPARROW_API bool is_view() const noexcept
Check whether the proxy is a view.
dynamic_bitset_view< const uint8_t > const_bitmap_type
SPARROW_API void set_name(std::optional< std::string_view > name)
Sets the name of the array/field.
SPARROW_API const std::string_view format() const
Gets the Arrow format string describing the data type.
SPARROW_API size_t length() const
Gets the number of elements in the array.
SPARROW_API void set_n_buffers(size_t n_buffers)
Sets the number of buffers and resizes the buffer vector.
SPARROW_API void set_buffer(size_t index, const buffer_view< uint8_t > &buffer)
Sets a specific buffer at the given index.
SPARROW_API void set_child(size_t index, ArrowArray *array, ArrowSchema *schema)
Set the child at the given index.
SPARROW_API arrow_proxy & operator=(arrow_proxy &&other) noexcept
Move assignment operator.
SPARROW_API void set_dictionary(const ArrowArray *array, const ArrowSchema *schema)
Set the dictionary.
SPARROW_API void slice_inplace(size_t start, size_t end)
Restricts *this to the half-open range [start, end) in place.
SPARROW_API void pop_back_bitmap()
Removes the last validity bit from the bitmap.
SPARROW_API void set_null_count(int64_t null_count)
Sets the number of null values in the array.
SPARROW_API const std::vector< arrow_proxy > & children() const
Returns a constant reference to the vector of child arrow proxies.
SPARROW_API std::vector< sparrow::buffer_view< uint8_t > > & buffers()
Gets mutable reference to the buffer views.
non_owning_dynamic_bitset< uint8_t > bitmap_type
SPARROW_API enum data_type data_type() const
Gets the data type enum corresponding to the format.
void set_metadata(std::optional< R > metadata)
Sets the metadata key-value pairs.
SPARROW_API bool is_schema_const() const
Check if the schema is const.
SPARROW_API void set_child(size_t index, ArrowArray &&array, ArrowSchema &&schema)
Set the child at the given index.
SPARROW_API bool is_array_const() const
Check if the array is const.
SPARROW_API size_t n_children() const
Gets the number of child arrays.
SPARROW_API ArrowArray & array()
Get a reference to the ArrowArray of the proxy.
SPARROW_API arrow_proxy(ArrowArray &&array, const ArrowSchema *schema)
Constructs an arrow_proxy taking ownership of ArrowArray, referencing const ArrowSchema.
SPARROW_API ~arrow_proxy()
Destructor releasing owned Arrow structures.
SPARROW_API std::optional< std::string_view > name() const
Gets the optional name of the array/field.
SPARROW_API arrow_proxy(const ArrowArray *array, const ArrowSchema *schema)
Constructs an arrow_proxy referencing external const ArrowArray and const ArrowSchema.
SPARROW_API std::optional< key_value_view > metadata() const
Gets the metadata key-value pairs.
SPARROW_API arrow_array_private_data * get_array_private_data()
SPARROW_API bool owns_array() const
Check whether the proxy has ownership of its internal the ArrowArray.
SPARROW_API std::optional< bitmap_type > & bitmap()
SPARROW_API void set_length(size_t length)
Sets the number of elements in the array.
SPARROW_API void set_dictionary(ArrowArray &&array_dictionary, ArrowSchema &&schema_dictionary)
Set the dictionary.
SPARROW_API void set_offset(size_t offset)
Sets the starting offset within the buffers.
SPARROW_API void * private_data() const
SPARROW_API void set_format(const std::string_view format)
Sets the Arrow format string.
SPARROW_API arrow_proxy view() const
Get a non-owning view of the arrow_proxy.
SPARROW_API arrow_proxy slice_view(size_t start, size_t end) const
Returns a zero-copy view of this proxy restricted to the half-open range [start, end).
SPARROW_API void pop_children(size_t n)
Pop n children.
SPARROW_API const std::optional< const_bitmap_type > & const_bitmap() const
SPARROW_API std::unique_ptr< arrow_proxy > & dictionary()
Returns a mutable reference to the dictionary arrow proxy.
SPARROW_API void resize_bitmap(size_t new_size, bool value=true)
Resizes the validity bitmap buffer.
SPARROW_API size_t n_buffers() const
Gets the number of buffers in the array.
SPARROW_API size_t erase_bitmap(size_t index, size_t count=1)
Erases validity bits starting at specified position.
SPARROW_API int64_t null_count() const
Gets the number of null values in the array.
SPARROW_API arrow_schema_private_data * get_schema_private_data()
SPARROW_API arrow_proxy(arrow_proxy &&other) noexcept
Move constructor transferring ownership.
SPARROW_API arrow_proxy slice(size_t start, size_t end) const
Returns a deep-copy of this proxy restricted to the half-open range [start, end).
SPARROW_API arrow_proxy(const arrow_proxy &other)
Copy constructor creating independent copy.
void add_children(const R &arrow_array_and_schema_pointers)
Add children without taking their ownership.
SPARROW_API void set_dictionary(ArrowArray *array, ArrowSchema *schema)
Set the dictionary.
SPARROW_API const std::vector< sparrow::buffer_view< uint8_t > > & buffers() const
Gets const reference to the buffer views.
SPARROW_API size_t insert_bitmap(size_t index, bool value, size_t count=1)
Inserts validity bits with the same value at specified position.
SPARROW_API ArrowSchema extract_schema()
Extract the ArrowSchema from the proxy, and transfers the responsibility to release it after usage to...
SPARROW_API void set_flags(const std::unordered_set< ArrowFlag > &flags)
Sets the Arrow flags for this array.
SPARROW_API void update_buffers()
Refresh the buffers views.
SPARROW_API arrow_proxy(ArrowArray *array, ArrowSchema *schema)
Constructs an arrow_proxy referencing external ArrowArray and ArrowSchema.
SPARROW_API void set_child(size_t index, const ArrowArray *array, const ArrowSchema *schema)
Set the child at the given index.
Private data for ArrowSchema.
Object that owns a piece of contiguous memory.
Definition buffer.hpp:131
A non-owning view to a dynamic size sequence of bits stored in external memory.
#define SPARROW_API
Definition config.hpp:38
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::ranges::copy_result< std::ranges::borrowed_iterator_t< R >, O > copy(R &&r, O result)
Definition ranges.hpp:132
constexpr InputIt next(InputIt it, Distance n)
Definition iterator.hpp:605
std::string get_metadata_from_key_values(const T &metadata)
Converts a container of key-value pairs to binary metadata format.
Definition metadata.hpp:367
std::ostream & operator<<(std::ostream &os, const nullval_t &)
SPARROW_API void swap(ArrowArray &lhs, ArrowArray &rhs) noexcept
Swaps the contents of the two ArrowArray objects.
Extensions to the C++ standard library.
const char * metadata