23#include <unordered_set>
55 :
std::runtime_error(message)
346 template <input_metadata_container R>
349 if (!schema_created_with_sparrow())
624 template <std::ranges::input_range R>
684 template <std::ranges::input_range R>
694 template <std::ranges::input_range R>
980 bool m_array_is_immutable = false;
981 bool m_schema_is_immutable = false;
982 bool m_is_dictionary_immutable = false;
983 std::vector<
bool> m_children_array_immutable;
984 std::vector<
bool> m_children_schema_immutable;
995 template <
typename AA,
typename AS>
996 requires std::same_as<std::remove_const_t<std::remove_pointer_t<std::remove_cvref_t<AA>>>,
ArrowArray>
997 && std::same_as<std::remove_const_t<std::remove_pointer_t<std::remove_cvref_t<AS>>>,
ArrowSchema>
1000 [[nodiscard]]
bool empty()
const;
1001 SPARROW_API void resize_children(
size_t children_count);
1003 void update_children();
1004 void update_dictionary();
1005 void update_null_count();
1007 void remove_dictionary();
1008 void remove_child(
size_t index);
1009 void create_bitmap_view(std::optional<size_t>
null_count = std::nullopt);
1011 [[nodiscard]]
bool array_created_with_sparrow()
const;
1012 [[nodiscard]]
SPARROW_API bool schema_created_with_sparrow()
const;
1014 void validate_array_and_schema()
const;
1016 [[nodiscard]]
bool is_arrow_array_valid()
const;
1017 [[nodiscard]]
bool is_arrow_schema_valid()
const;
1018 [[nodiscard]]
bool is_proxy_valid()
const;
1020 [[nodiscard]]
size_t get_null_count()
const;
1022 [[nodiscard]]
ArrowArray& array_without_sanitize();
1023 [[nodiscard]]
const ArrowArray& array_without_sanitize()
const;
1025 [[nodiscard]]
ArrowSchema& schema_without_sanitize();
1026 [[nodiscard]]
const ArrowSchema& schema_without_sanitize()
const;
1032 void sanitize_schema();
1036 template <const
char* function_name,
bool check_array_is_mutable,
bool check_schema_is_mutable>
1037 void throw_if_immutable()
const;
1040 template <std::ranges::input_range R>
1041 requires std::same_as<std::ranges::range_value_t<R>, arrow_array_and_schema_pointers>
1044 static constexpr const char function_name[] =
"add_children";
1045 throw_if_immutable<function_name, true, true>();
1047 const size_t original_children_count =
n_children();
1048 const size_t new_children_count = original_children_count + add_children_count;
1050 resize_children(new_children_count);
1051 for (
size_t i = 0; i < add_children_count; ++i)
1054 i + original_children_count,
1061 template <std::ranges::input_range R>
1067 throw arrow_proxy_exception(
"Cannot set n_buffers on non-sparrow created ArrowArray or ArrowSchema");
1070 const size_t add_children_count = std::ranges::size(arrow_arrays_and_schemas);
1071 const size_t original_children_count =
n_children();
1072 const size_t new_children_count = original_children_count + add_children_count;
1074 resize_children(new_children_count);
1075 for (
size_t i = 0; i < add_children_count; ++i)
1078 i + original_children_count,
1079 std::move(arrow_arrays_and_schemas[i].
array),
1080 std::move(arrow_arrays_and_schemas[i].
schema)
1085 template <std::ranges::input_range R>
1088 static constexpr const char function_name[] =
"insert_bitmap";
1089 throw_if_immutable<function_name, true, false>();
1091 const auto it = m_null_bitmap->insert(
1096 set_null_count(
static_cast<int64_t
>(m_null_bitmap->null_count()));
1098 m_null_bitmap->data(),
1100 static_cast<size_t>(m_null_bitmap->offset()),
1101 static_cast<size_t>(m_null_bitmap->null_count())
1104 return static_cast<size_t>(std::distance(m_null_bitmap->begin(), it));
1107 template <
typename AA,
typename AS>
1108 requires std::same_as<std::remove_const_t<std::remove_pointer_t<std::remove_cvref_t<AA>>>,
ArrowArray>
1109 && std::same_as<std::remove_const_t<std::remove_pointer_t<std::remove_cvref_t<AS>>>,
ArrowSchema>
1112 if constexpr (std::is_const_v<std::remove_pointer_t<std::remove_reference_t<AA>>>)
1114 m_array_is_immutable =
true;
1119 m_array = std::forward<AA>(
array);
1122 if constexpr (std::is_const_v<std::remove_pointer_t<std::remove_reference_t<AS>>>)
1124 m_schema_is_immutable =
true;
1125 m_schema =
const_cast<ArrowSchema*
>(
schema);
1129 m_schema = std::forward<AS>(
schema);
1132 if constexpr (std::is_rvalue_reference_v<AA&&>)
1136 else if constexpr (std::is_pointer_v<std::remove_cvref_t<AA>>)
1141 if constexpr (std::is_rvalue_reference_v<AS&&>)
1145 else if constexpr (std::is_pointer_v<std::remove_cvref_t<AS>>)
1150 m_children_array_immutable = std::vector<bool>(
n_children(), m_array_is_immutable);
1151 m_children_schema_immutable = std::vector<bool>(
n_children(), m_schema_is_immutable);
1152 validate_array_and_schema();
1155 update_dictionary();
1156 create_bitmap_view();
1159 template <const
char* function_name,
bool check_array_is_mutable,
bool check_schema_is_mutable>
1160 void arrow_proxy::throw_if_immutable()
const
1162 static const std::string cannot_call =
"Cannot call ";
1165 auto error_message = cannot_call + std::string(function_name)
1166 +
" on non-sparrow created ArrowArray or ArrowSchema";
1167 throw arrow_proxy_exception(error_message);
1169 if constexpr (check_array_is_mutable || check_schema_is_mutable)
1171 if (m_array_is_immutable || m_schema_is_immutable)
1174 std::string error_message = cannot_call + std::string(function_name);
1175 if constexpr (check_array_is_mutable && !check_schema_is_mutable)
1177 if (m_array_is_immutable)
1179 error_message +=
" on an immutable ArrowArray. You may have passed a const ArrowArray* at the creation.";
1182 else if constexpr (check_schema_is_mutable && !check_array_is_mutable)
1184 if (m_schema_is_immutable)
1186 error_message +=
" on an immutable ArrowSchema. You may have passed a const ArrowSchema* at the creation.";
1189 else if constexpr (check_array_is_mutable && check_schema_is_mutable)
1191 if (m_array_is_immutable && m_schema_is_immutable)
1193 error_message +=
" on an immutable ArrowArray and ArrowSchema. You may have passed const ArrowArray* and const ArrowSchema* at the creation.";
1196 throw arrow_proxy_exception(error_message);
1203#if defined(__cpp_lib_format)
1206struct std::formatter<
sparrow::buffer_view<uint8_t>>
1210 char delimiter =
' ';
1211 static constexpr std::string_view opening =
"[";
1212 static constexpr std::string_view closing =
"]";
1216 constexpr auto parse(std::format_parse_context& ctx)
1218 auto it = ctx.begin();
1219 auto end = ctx.end();
1222 if (it != end && *it !=
'}')
1227 if (it != end && *it !=
'}')
1229 throw std::format_error(
"Invalid format specifier for range");
1235 auto format(
const sparrow::buffer_view<uint8_t>& range, std::format_context& ctx)
const
1237 auto out = ctx.out();
1244 for (
const auto& elem : range)
1250 out = std::format_to(out,
"{}", elem);
1265 os << std::format(
"{}", value);
1271struct std::formatter<
sparrow::arrow_proxy>
1273 constexpr auto parse(std::format_parse_context& ctx)
1278 auto format(
const sparrow::arrow_proxy& obj, std::format_context& ctx)
const
1280 std::string buffers_description_str;
1281 for (
size_t i = 0; i < obj.
n_buffers(); ++i)
1284 std::back_inserter(buffers_description_str),
1287 obj.
buffers()[i].size() *
sizeof(uint8_t),
1292 std::string children_str;
1293 for (
const auto& child : obj.
children())
1295 std::format_to(std::back_inserter(children_str),
"{}\n", child);
1298 const std::string dictionary_str = obj.
dictionary() ? std::format(
"{}", *obj.
dictionary()) :
"nullptr";
1300 return std::format_to(
1302 "arrow_proxy\n- format: {}\n- name; {}\n- metadata: {}\n- data_type: {}\n- null_count:{}\n- length: {}\n- offset: {}\n- n_buffers: {}\n- buffers:\n{}\n- n_children: {}\n-children: {}\n- dictionary: {}",
1304 obj.
name().value_or(
""),
1311 buffers_description_str,
1323 os << std::format(
"{}", value);
Dynamically typed array encapsulating an Arrow layout.
Private data for ArrowArray.
Exception thrown by arrow_proxy operations.
arrow_proxy_exception(const std::string &message)
Constructs an arrow_proxy_exception with a descriptive message.
SPARROW_API void push_back_bitmap(bool value)
Appends a validity bit at the end of the bitmap.
SPARROW_API ArrowArray extract_array()
Extract the ArrowArray from the proxy, and transfers the responsibility to release it after usage to ...
SPARROW_API void add_child(const ArrowArray *array, const ArrowSchema *schema)
Add a child without taking its ownership.
SPARROW_API void add_child(ArrowArray *array, ArrowSchema *schema)
Add a child without taking its ownership.
SPARROW_API arrow_proxy(ArrowArray &&array, ArrowSchema *schema)
Constructs an arrow_proxy taking ownership of ArrowArray, referencing ArrowSchema.
SPARROW_API void set_buffer(size_t index, buffer< uint8_t > &&buffer)
Sets a specific buffer by moving it at the given index.
SPARROW_API ArrowSchema & schema()
Get a reference to the ArrowSchema of the proxy.
SPARROW_API const std::unique_ptr< arrow_proxy > & dictionary() const
Returns a constant reference to the dictionary arrow proxy.
SPARROW_API std::unordered_set< ArrowFlag > flags() const
Gets the Arrow flags set for this array.
void SPARROW_API set_data_type(enum data_type data_type)
Sets the data type (updates format string accordingly).
SPARROW_API bool owns_schema() const
Check whether the proxy has ownership of its internal the ArrowSchema.
SPARROW_API std::vector< arrow_proxy > & children()
Returns a mutable reference to the vector of child arrow proxies.
SPARROW_API void add_child(ArrowArray &&array, ArrowSchema &&schema)
Add a child and takes its ownership.
SPARROW_API arrow_proxy & operator=(const arrow_proxy &other)
Copy assignment operator.
SPARROW_API bool is_created_with_sparrow() const
Check if the ArrowArray and ArrowSchema were created with sparrow.
SPARROW_API size_t offset() const
Gets the starting offset within the buffers.
SPARROW_API arrow_proxy(ArrowArray &&array, ArrowSchema &&schema)
Constructs an arrow_proxy taking ownership of both ArrowArray and ArrowSchema.
SPARROW_API bool is_view() const noexcept
Check whether the proxy is a view.
dynamic_bitset_view< const uint8_t > const_bitmap_type
SPARROW_API void set_name(std::optional< std::string_view > name)
Sets the name of the array/field.
SPARROW_API const std::string_view format() const
Gets the Arrow format string describing the data type.
SPARROW_API size_t length() const
Gets the number of elements in the array.
SPARROW_API void set_n_buffers(size_t n_buffers)
Sets the number of buffers and resizes the buffer vector.
SPARROW_API void set_buffer(size_t index, const buffer_view< uint8_t > &buffer)
Sets a specific buffer at the given index.
SPARROW_API void set_child(size_t index, ArrowArray *array, ArrowSchema *schema)
Set the child at the given index.
SPARROW_API arrow_proxy & operator=(arrow_proxy &&other) noexcept
Move assignment operator.
SPARROW_API void set_dictionary(const ArrowArray *array, const ArrowSchema *schema)
Set the dictionary.
SPARROW_API void pop_back_bitmap()
Removes the last validity bit from the bitmap.
SPARROW_API void set_null_count(int64_t null_count)
Sets the number of null values in the array.
SPARROW_API const std::vector< arrow_proxy > & children() const
Returns a constant reference to the vector of child arrow proxies.
SPARROW_API std::vector< sparrow::buffer_view< uint8_t > > & buffers()
Gets mutable reference to the buffer views.
non_owning_dynamic_bitset< uint8_t > bitmap_type
SPARROW_API enum data_type data_type() const
Gets the data type enum corresponding to the format.
void set_metadata(std::optional< R > metadata)
Sets the metadata key-value pairs.
SPARROW_API bool is_schema_const() const
Check if the schema is const.
SPARROW_API void set_child(size_t index, ArrowArray &&array, ArrowSchema &&schema)
Set the child at the given index.
SPARROW_API bool is_array_const() const
Check if the array is const.
SPARROW_API size_t n_children() const
Gets the number of child arrays.
SPARROW_API ArrowArray & array()
Get a reference to the ArrowArray of the proxy.
SPARROW_API arrow_proxy(ArrowArray &&array, const ArrowSchema *schema)
Constructs an arrow_proxy taking ownership of ArrowArray, referencing const ArrowSchema.
SPARROW_API ~arrow_proxy()
Destructor releasing owned Arrow structures.
SPARROW_API std::optional< std::string_view > name() const
Gets the optional name of the array/field.
SPARROW_API arrow_proxy(const ArrowArray *array, const ArrowSchema *schema)
Constructs an arrow_proxy referencing external const ArrowArray and const ArrowSchema.
SPARROW_API std::optional< key_value_view > metadata() const
Gets the metadata key-value pairs.
SPARROW_API arrow_array_private_data * get_array_private_data()
SPARROW_API bool owns_array() const
Check whether the proxy has ownership of its internal the ArrowArray.
SPARROW_API std::optional< bitmap_type > & bitmap()
SPARROW_API void set_length(size_t length)
Sets the number of elements in the array.
SPARROW_API void set_dictionary(ArrowArray &&array_dictionary, ArrowSchema &&schema_dictionary)
Set the dictionary.
SPARROW_API void set_offset(size_t offset)
Sets the starting offset within the buffers.
SPARROW_API void * private_data() const
SPARROW_API void set_format(const std::string_view format)
Sets the Arrow format string.
SPARROW_API arrow_proxy view() const
Get a non-owning view of the arrow_proxy.
SPARROW_API arrow_proxy slice_view(size_t start, size_t end) const
Slices the array to keep only the elements between the given start and end.
SPARROW_API void pop_children(size_t n)
Pop n children.
SPARROW_API const std::optional< const_bitmap_type > & const_bitmap() const
SPARROW_API std::unique_ptr< arrow_proxy > & dictionary()
Returns a mutable reference to the dictionary arrow proxy.
SPARROW_API void resize_bitmap(size_t new_size, bool value=true)
Resizes the validity bitmap buffer.
SPARROW_API size_t n_buffers() const
Gets the number of buffers in the array.
SPARROW_API size_t erase_bitmap(size_t index, size_t count=1)
Erases validity bits starting at specified position.
SPARROW_API int64_t null_count() const
Gets the number of null values in the array.
SPARROW_API arrow_schema_private_data * get_schema_private_data()
SPARROW_API arrow_proxy(arrow_proxy &&other) noexcept
Move constructor transferring ownership.
SPARROW_API arrow_proxy slice(size_t start, size_t end) const
Slices the array to keep only the elements between the given start and end.
SPARROW_API arrow_proxy(const arrow_proxy &other)
Copy constructor creating independent copy.
void add_children(const R &arrow_array_and_schema_pointers)
Add children without taking their ownership.
SPARROW_API void set_dictionary(ArrowArray *array, ArrowSchema *schema)
Set the dictionary.
SPARROW_API const std::vector< sparrow::buffer_view< uint8_t > > & buffers() const
Gets const reference to the buffer views.
SPARROW_API size_t insert_bitmap(size_t index, bool value, size_t count=1)
Inserts validity bits with the same value at specified position.
SPARROW_API ArrowSchema extract_schema()
Extract the ArrowSchema from the proxy, and transfers the responsibility to release it after usage to...
SPARROW_API void set_flags(const std::unordered_set< ArrowFlag > &flags)
Sets the Arrow flags for this array.
SPARROW_API void update_buffers()
Refresh the buffers views.
SPARROW_API arrow_proxy(ArrowArray *array, ArrowSchema *schema)
Constructs an arrow_proxy referencing external ArrowArray and ArrowSchema.
SPARROW_API void set_child(size_t index, const ArrowArray *array, const ArrowSchema *schema)
Set the child at the given index.
Private data for ArrowSchema.
Object that owns a piece of contiguous memory.
A non-owning view to a dynamic size sequence of bits stored in external memory.
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::ranges::copy_result< std::ranges::borrowed_iterator_t< R >, O > copy(R &&r, O result)
constexpr InputIt next(InputIt it, Distance n)
std::string get_metadata_from_key_values(const T &metadata)
Converts a container of key-value pairs to binary metadata format.
std::ostream & operator<<(std::ostream &os, const nullval_t &)
SPARROW_API void swap(ArrowArray &lhs, ArrowArray &rhs) noexcept
Swaps the contents of the two ArrowArray objects.
Extensions to the C++ standard library.