sparrow 0.9.0
|
#include <arrow_array_schema_proxy.hpp>
Public Member Functions | |
SPARROW_API | arrow_proxy (ArrowArray &&array, ArrowSchema &&schema) |
Constructs an arrow_proxy taking ownership of both ArrowArray and ArrowSchema. | |
SPARROW_API | arrow_proxy (ArrowArray &&array, ArrowSchema *schema) |
Constructs an arrow_proxy taking ownership of ArrowArray, referencing ArrowSchema. | |
SPARROW_API | arrow_proxy (ArrowArray *array, ArrowSchema *schema) |
Constructs an arrow_proxy referencing external ArrowArray and ArrowSchema. | |
SPARROW_API | arrow_proxy (const arrow_proxy &other) |
Copy constructor creating independent copy. | |
SPARROW_API arrow_proxy & | operator= (const arrow_proxy &other) |
Copy assignment operator. | |
SPARROW_API | arrow_proxy (arrow_proxy &&other) noexcept |
Move constructor transferring ownership. | |
SPARROW_API arrow_proxy & | operator= (arrow_proxy &&other) noexcept |
Move assignment operator. | |
SPARROW_API | ~arrow_proxy () |
Destructor releasing owned Arrow structures. | |
SPARROW_API const std::string_view | format () const |
Gets the Arrow format string describing the data type. | |
SPARROW_API void | set_format (const std::string_view format) |
Sets the Arrow format string. | |
SPARROW_API enum data_type | data_type () const |
Gets the data type enum corresponding to the format. | |
void SPARROW_API | set_data_type (enum data_type data_type) |
Sets the data type (updates format string accordingly). | |
SPARROW_API std::optional< std::string_view > | name () const |
Gets the optional name of the array/field. | |
SPARROW_API void | set_name (std::optional< std::string_view > name) |
Sets the name of the array/field. | |
SPARROW_API std::optional< key_value_view > | metadata () const |
Gets the metadata key-value pairs. | |
template<input_metadata_container R> | |
void | set_metadata (std::optional< R > metadata) |
Sets the metadata key-value pairs. | |
SPARROW_API std::unordered_set< ArrowFlag > | flags () const |
Gets the Arrow flags set for this array. | |
SPARROW_API void | set_flags (const std::unordered_set< ArrowFlag > &flags) |
Sets the Arrow flags for this array. | |
SPARROW_API size_t | length () const |
Gets the number of elements in the array. | |
SPARROW_API void | set_length (size_t length) |
Sets the number of elements in the array. | |
SPARROW_API int64_t | null_count () const |
Gets the number of null values in the array. | |
SPARROW_API void | set_null_count (int64_t null_count) |
Sets the number of null values in the array. | |
SPARROW_API size_t | offset () const |
Gets the starting offset within the buffers. | |
SPARROW_API void | set_offset (size_t offset) |
Sets the starting offset within the buffers. | |
SPARROW_API size_t | n_buffers () const |
Gets the number of buffers in the array. | |
SPARROW_API void | set_n_buffers (size_t n_buffers) |
Sets the number of buffers and resizes the buffer vector. | |
SPARROW_API size_t | n_children () const |
Gets the number of child arrays. | |
SPARROW_API const std::vector< sparrow::buffer_view< uint8_t > > & | buffers () const |
Gets const reference to the buffer views. | |
SPARROW_API std::vector< sparrow::buffer_view< uint8_t > > & | buffers () |
Gets mutable reference to the buffer views. | |
SPARROW_API void | set_buffer (size_t index, const buffer_view< uint8_t > &buffer) |
Sets a specific buffer at the given index. | |
SPARROW_API void | set_buffer (size_t index, buffer< uint8_t > &&buffer) |
Sets a specific buffer by moving it at the given index. | |
SPARROW_API void | resize_bitmap (size_t new_size, bool value=true) |
Resizes the validity bitmap buffer. | |
SPARROW_API size_t | insert_bitmap (size_t index, bool value, size_t count=1) |
Inserts validity bits with the same value at specified position. | |
template<std::ranges::input_range R> | |
size_t | insert_bitmap (size_t index, const R &range) |
Inserts a range of validity bits at specified position. | |
SPARROW_API size_t | erase_bitmap (size_t index, size_t count=1) |
Erases validity bits starting at specified position. | |
SPARROW_API void | push_back_bitmap (bool value) |
Appends a validity bit at the end of the bitmap. | |
SPARROW_API void | pop_back_bitmap () |
Removes the last validity bit from the bitmap. | |
template<std::ranges::input_range R> requires std::same_as<std::ranges::range_value_t<R>, arrow_array_and_schema_pointers> | |
void | add_children (const R &arrow_array_and_schema_pointers) |
Add children without taking their ownership. | |
template<std::ranges::input_range R> requires std::same_as<std::ranges::range_value_t<R>, arrow_array_and_schema> | |
void | add_children (R &&arrow_array_and_schemas) |
Add children and take their ownership. | |
SPARROW_API void | add_child (ArrowArray *array, ArrowSchema *schema) |
Add a child without taking its ownership. | |
SPARROW_API void | add_child (ArrowArray &&array, ArrowSchema &&schema) |
Add a child and takes its ownership. | |
SPARROW_API void | pop_children (size_t n) |
Pop n children. | |
SPARROW_API void | set_child (size_t index, ArrowArray *array, ArrowSchema *schema) |
Set the child at the given index. | |
SPARROW_API void | set_child (size_t index, ArrowArray &&array, ArrowSchema &&schema) |
Set the child at the given index. | |
SPARROW_API const std::vector< arrow_proxy > & | children () const |
Returns a constant reference to the vector of child arrow proxies. | |
SPARROW_API std::vector< arrow_proxy > & | children () |
Returns a mutable reference to the vector of child arrow proxies. | |
SPARROW_API const std::unique_ptr< arrow_proxy > & | dictionary () const |
Returns a constant reference to the dictionary arrow proxy. | |
SPARROW_API std::unique_ptr< arrow_proxy > & | dictionary () |
Returns a mutable reference to the dictionary arrow proxy. | |
SPARROW_API void | set_dictionary (ArrowArray *array, ArrowSchema *schema) |
Set the dictionary.It does not take the ownership on the ArrowArray and ArrowSchema passed by pointers. | |
SPARROW_API void | set_dictionary (ArrowArray &&array_dictionary, ArrowSchema &&schema_dictionary) |
Set the dictionary. | |
SPARROW_API bool | is_created_with_sparrow () const |
Check if the ArrowArray and ArrowSchema were created with sparrow. | |
SPARROW_API void * | private_data () const |
SPARROW_API arrow_proxy | view () const |
Get a non-owning view of the arrow_proxy. | |
SPARROW_API bool | owns_array () const |
Check whether the proxy has ownership of its internal the ArrowArray . | |
SPARROW_API ArrowArray | extract_array () |
Extract the ArrowArray from the proxy, and transfers the responsibility to release it after usage to the caller. | |
SPARROW_API ArrowArray & | array () |
Get a reference to the ArrowArray of the proxy. | |
SPARROW_API const ArrowArray & | array () const |
Get a const reference to the ArrowArray of the proxy. | |
SPARROW_API bool | owns_schema () const |
Check whether the proxy has ownership of its internal the ArrowSchema . | |
SPARROW_API ArrowSchema | extract_schema () |
Extract the ArrowSchema from the proxy, and transfers the responsibility to release it after usage to the caller. | |
SPARROW_API ArrowSchema & | schema () |
Get a reference to the ArrowSchema of the proxy. | |
SPARROW_API const ArrowSchema & | schema () const |
Get a const reference to the ArrowSchema of the proxy. | |
SPARROW_API arrow_schema_private_data * | get_schema_private_data () |
SPARROW_API arrow_array_private_data * | get_array_private_data () |
SPARROW_API arrow_proxy | slice (size_t start, size_t end) const |
Slices the array to keep only the elements between the given start and end . | |
SPARROW_API arrow_proxy | slice_view (size_t start, size_t end) const |
Slices the array to keep only the elements between the given start and end . | |
SPARROW_API void | update_buffers () |
Refresh the buffers views. | |
Definition at line 109 of file arrow_array_schema_proxy.hpp.
|
explicit |
Constructs an arrow_proxy taking ownership of both ArrowArray and ArrowSchema.
array | ArrowArray to take ownership of |
schema | ArrowSchema to take ownership of |
|
explicit |
Constructs an arrow_proxy taking ownership of ArrowArray, referencing ArrowSchema.
array | ArrowArray to take ownership of |
schema | Pointer to ArrowSchema (not owned) |
|
explicit |
Constructs an arrow_proxy referencing external ArrowArray and ArrowSchema.
array | Pointer to ArrowArray (not owned) |
schema | Pointer to ArrowSchema (not owned) |
SPARROW_API sparrow::arrow_proxy::arrow_proxy | ( | const arrow_proxy & | other | ) |
Copy constructor creating independent copy.
other | Source arrow_proxy to copy from |
|
noexcept |
Move constructor transferring ownership.
other | Source arrow_proxy to move from |
SPARROW_API sparrow::arrow_proxy::~arrow_proxy | ( | ) |
Destructor releasing owned Arrow structures.
SPARROW_API void sparrow::arrow_proxy::add_child | ( | ArrowArray && | array, |
ArrowSchema && | schema ) |
Add a child and takes its ownership.
`arrow_proxy_exception` | If the `ArrowArray` or the `ArrowSchema` wrapped in this proxy were not created with sparrow. |
array | The ArrowArray to set as child. |
schema | The ArrowSchema to set as child. |
SPARROW_API void sparrow::arrow_proxy::add_child | ( | ArrowArray * | array, |
ArrowSchema * | schema ) |
Add a child without taking its ownership.
`arrow_proxy_exception` | If the `ArrowArray` or the `ArrowSchema` wrapped in this proxy were not created with sparrow. |
array | The ArrowArray to set as child. |
schema | The ArrowSchema to set as child. |
void sparrow::arrow_proxy::add_children | ( | const R & | arrow_array_and_schema_pointers | ) |
Add children without taking their ownership.
`arrow_proxy_exception` | If the `ArrowArray` or the `ArrowSchema` wrapped in this proxy were not created with sparrow. |
arrow_array_and_schema_pointers | The children to add. |
Definition at line 947 of file arrow_array_schema_proxy.hpp.
void sparrow::arrow_proxy::add_children | ( | R && | arrow_array_and_schemas | ) |
Add children and take their ownership.
`arrow_proxy_exception` | If the `ArrowArray` or the `ArrowSchema` wrapped in this proxy were not created with sparrow. |
arrow_array_and_schemas | The children to add. |
Definition at line 971 of file arrow_array_schema_proxy.hpp.
|
nodiscard |
Get a reference to the ArrowArray
of the proxy.
The proxy is still reponsible for releasing it, and the reference returned from this method should not outlive the proxy.
The schema flags can be updated by adding sparrow::ArrowFlag::NULLABLE, if null_count is greater than 0.
ArrowArray
.
|
nodiscard |
Get a const reference to the ArrowArray
of the proxy.
The proxy is still reponsible for releasing it, and the reference returned from this method should not outlive the proxy.
The schema flags can be updated by adding sparrow::ArrowFlag::NULLABLE, if null_count is greater than 0.
ArrowArray
const reference.
|
nodiscard |
Gets mutable reference to the buffer views.
|
nodiscard |
Gets const reference to the buffer views.
|
nodiscard |
Returns a mutable reference to the vector of child arrow proxies.
This method provides read-write access to the collection of child arrow proxies associated with this arrow array schema proxy. The children represent nested or structured data elements within the schema.
|
nodiscard |
Returns a constant reference to the vector of child arrow proxies.
This method provides read-only access to the collection of child arrow proxies associated with this arrow array schema proxy. The children represent nested or structured data elements within the schema.
|
nodiscard |
Gets the data type enum corresponding to the format.
|
nodiscard |
Returns a mutable reference to the dictionary arrow proxy.
This method provides read-write access to the dictionary arrow proxy associated with this arrow array schema proxy. The dictionary is used for encoding categorical data types.
|
nodiscard |
Returns a constant reference to the dictionary arrow proxy.
This method provides read-only access to the dictionary arrow proxy associated with this arrow array schema proxy. The dictionary is used for encoding categorical data types.
SPARROW_API size_t sparrow::arrow_proxy::erase_bitmap | ( | size_t | index, |
size_t | count = 1 ) |
Erases validity bits starting at specified position.
index | Position where to start erasing bits |
count | Number of bits to erase |
arrow_proxy_exception | if array is not owned by sparrow |
arrow_proxy_exception | if data type doesn't support validity bitmap |
std::out_of_range | if index >= bitmap length |
|
nodiscard |
Extract the ArrowArray
from the proxy, and transfers the responsibility to release it after usage to the caller.
`arrow_proxy_exception` | If the `ArrowArray` was not created with sparrow. |
The schema flags can be updated by adding sparrow::ArrowFlag::NULLABLE, if null_count is greater than 0.
|
nodiscard |
Extract the ArrowSchema
from the proxy, and transfers the responsibility to release it after usage to the caller.
`arrow_proxy_exception` | If the `ArrowSchema` was not created with sparrow. |
The schema flags can be updated by adding sparrow::ArrowFlag::NULLABLE, if null_count is greater than 0.
ArrowSchema
.
|
nodiscard |
Gets the Arrow flags set for this array.
|
nodiscard |
Gets the Arrow format string describing the data type.
|
nodiscard |
|
nodiscard |
SPARROW_API size_t sparrow::arrow_proxy::insert_bitmap | ( | size_t | index, |
bool | value, | ||
size_t | count = 1 ) |
Inserts validity bits with the same value at specified position.
index | Position where to insert bits |
value | Validity value to insert (true = valid, false = null) |
count | Number of bits to insert |
arrow_proxy_exception | if array is not owned by sparrow |
arrow_proxy_exception | if data type doesn't support validity bitmap |
std::out_of_range | if index > bitmap length |
|
inline |
Inserts a range of validity bits at specified position.
R | Range type containing boolean values |
index | Position where to insert bits |
range | Range of boolean values to insert |
arrow_proxy_exception | if array is not owned by sparrow |
arrow_proxy_exception | if data type doesn't support validity bitmap |
std::out_of_range | if index > bitmap length |
Definition at line 994 of file arrow_array_schema_proxy.hpp.
|
nodiscard |
Check if the ArrowArray
and ArrowSchema
were created with sparrow.
|
nodiscard |
Gets the number of elements in the array.
|
nodiscard |
Gets the metadata key-value pairs.
|
nodiscard |
Gets the number of buffers in the array.
|
nodiscard |
Gets the number of child arrays.
|
nodiscard |
Gets the optional name of the array/field.
|
nodiscard |
Gets the number of null values in the array.
|
nodiscard |
Gets the starting offset within the buffers.
|
noexcept |
Move assignment operator.
other | Source arrow_proxy to move from |
SPARROW_API arrow_proxy & sparrow::arrow_proxy::operator= | ( | const arrow_proxy & | other | ) |
Copy assignment operator.
other | Source arrow_proxy to copy from |
|
nodiscard |
Check whether the proxy has ownership of its internal the ArrowArray
.
|
nodiscard |
Check whether the proxy has ownership of its internal the ArrowSchema
.
SPARROW_API void sparrow::arrow_proxy::pop_back_bitmap | ( | ) |
Removes the last validity bit from the bitmap.
arrow_proxy_exception | if array is not owned by sparrow |
arrow_proxy_exception | if data type doesn't support validity bitmap |
SPARROW_API void sparrow::arrow_proxy::pop_children | ( | size_t | n | ) |
Pop n children.
If the children were created by sparrow or are owned by the proxy, it will delete them.
n | The number of children to pop. |
|
nodiscard |
SPARROW_API void sparrow::arrow_proxy::push_back_bitmap | ( | bool | value | ) |
Appends a validity bit at the end of the bitmap.
value | Validity value to append (true = valid, false = null) |
arrow_proxy_exception | if array is not owned by sparrow |
arrow_proxy_exception | if data type doesn't support validity bitmap |
SPARROW_API void sparrow::arrow_proxy::resize_bitmap | ( | size_t | new_size, |
bool | value = true ) |
Resizes the validity bitmap buffer.
new_size | New size for the bitmap buffer |
value | Default value for new bits (true = valid, false = null) |
arrow_proxy_exception | if array is not owned by sparrow |
arrow_proxy_exception | if data type doesn't support validity bitmap |
|
nodiscard |
Get a reference to the ArrowSchema
of the proxy.
The proxy is still reponsible for releasing it, and the reference returned from this method should not outlive the proxy.
The schema flags can be updated by adding sparrow::ArrowFlag::NULLABLE, if null_count is greater than 0.
ArrowSchema
reference.
|
nodiscard |
Get a const reference to the ArrowSchema
of the proxy.
The proxy is still reponsible for releasing it, and the reference returned from this method should not outlive the proxy.
The schema flags can be updated by adding sparrow::ArrowFlag::NULLABLE, if null_count is greater than 0.
ArrowSchema
const reference. SPARROW_API void sparrow::arrow_proxy::set_buffer | ( | size_t | index, |
buffer< uint8_t > && | buffer ) |
Sets a specific buffer by moving it at the given index.
index | Index of the buffer to set |
buffer | Buffer to move and set |
arrow_proxy_exception | if array is not owned by sparrow |
std::out_of_range | if index >= n_buffers() |
SPARROW_API void sparrow::arrow_proxy::set_buffer | ( | size_t | index, |
const buffer_view< uint8_t > & | buffer ) |
Sets a specific buffer at the given index.
index | Index of the buffer to set |
buffer | Buffer view to set |
arrow_proxy_exception | if array is not owned by sparrow |
std::out_of_range | if index >= n_buffers() |
SPARROW_API void sparrow::arrow_proxy::set_child | ( | size_t | index, |
ArrowArray && | array, | ||
ArrowSchema && | schema ) |
Set the child at the given index.
It takes the ownership on the ArrowArray
andArrowSchema
passed by rvalue referencess.
`arrow_proxy_exception` | If the `ArrowArray` or `ArrowSchema` wrapped in this proxy were not created with sparrow. |
index | The index of the child to set. |
array | The ArrowArray to set as child. |
schema | The ArrowSchema to set as child. |
SPARROW_API void sparrow::arrow_proxy::set_child | ( | size_t | index, |
ArrowArray * | array, | ||
ArrowSchema * | schema ) |
Set the child at the given index.
It does not take the ownership on the ArrowArray
and ArrowSchema
passed by pointers.
`arrow_proxy_exception` | If the `ArrowArray` or the `ArrowSchema` wrapped in this proxy were not created with sparrow. |
index | The index of the child to set. |
array | The ArrowArray to set as child. |
schema | The ArrowSchema to set as child. |
void SPARROW_API sparrow::arrow_proxy::set_data_type | ( | enum data_type | data_type | ) |
Sets the data type (updates format string accordingly).
data_type | New data type to set |
arrow_proxy_exception | if schema is not owned by sparrow |
SPARROW_API void sparrow::arrow_proxy::set_dictionary | ( | ArrowArray && | array_dictionary, |
ArrowSchema && | schema_dictionary ) |
Set the dictionary.
It takes the ownership on the ArrowArray
andArrowSchema
passed by rvalue referencess.
`arrow_proxy_exception` | If the `ArrowArray` or `ArrowSchema` wrapped in this proxy were not created with sparrow. |
array_dictionary | The ArrowArray to set as dictionary. |
schema_dictionary | The ArrowSchema to set as dictionary.s |
SPARROW_API void sparrow::arrow_proxy::set_dictionary | ( | ArrowArray * | array, |
ArrowSchema * | schema ) |
Set the dictionary.It does not take the ownership on the ArrowArray
and ArrowSchema
passed by pointers.
`arrow_proxy_exception` | If the `ArrowArray` or `ArrowSchema` were not created with sparrow. |
array | The ArrowArray to set as dictionary. |
schema | The ArrowSchema to set as dictionary. |
SPARROW_API void sparrow::arrow_proxy::set_flags | ( | const std::unordered_set< ArrowFlag > & | flags | ) |
Sets the Arrow flags for this array.
flags | Set of Arrow flags to apply |
arrow_proxy_exception | if schema is not owned by sparrow |
SPARROW_API void sparrow::arrow_proxy::set_format | ( | const std::string_view | format | ) |
Sets the Arrow format string.
format | New format string to set |
arrow_proxy_exception | if schema is not owned by sparrow |
SPARROW_API void sparrow::arrow_proxy::set_length | ( | size_t | length | ) |
Sets the number of elements in the array.
This method updates the length field but does not resize buffers. Buffers should be resized separately to match the new length.
length | New number of elements |
arrow_proxy_exception | if array is not owned by sparrow |
|
inline |
Sets the metadata key-value pairs.
R | Container type for metadata pairs |
metadata | Optional metadata container (nullopt to clear) |
arrow_proxy_exception | if schema is not owned by sparrow |
Definition at line 312 of file arrow_array_schema_proxy.hpp.
SPARROW_API void sparrow::arrow_proxy::set_n_buffers | ( | size_t | n_buffers | ) |
Sets the number of buffers and resizes the buffer vector.
n_buffers | New number of buffers |
arrow_proxy_exception | if array is not owned by sparrow |
SPARROW_API void sparrow::arrow_proxy::set_name | ( | std::optional< std::string_view > | name | ) |
Sets the name of the array/field.
name | Optional name to set (nullopt to clear) |
arrow_proxy_exception | if schema is not owned by sparrow |
SPARROW_API void sparrow::arrow_proxy::set_null_count | ( | int64_t | null_count | ) |
Sets the number of null values in the array.
This method updates the null count field but does not modify the bitmap. The bitmap should be updated separately to reflect the actual null count.
null_count | New null count (-1 for unknown) |
arrow_proxy_exception | if array is not owned by sparrow |
SPARROW_API void sparrow::arrow_proxy::set_offset | ( | size_t | offset | ) |
Sets the starting offset within the buffers.
offset | New starting offset |
arrow_proxy_exception | if array is not owned by sparrow |
|
nodiscard |
Slices the array to keep only the elements between the given start
and end
.
A copy of the Array is modified. The data is not modified, only the ArrowArray.offset and ArrowArray.length are updated. If end
is greater than the size of the buffers, the following elements will be invalid.
start | The index of the first element to keep. Must be less than end . |
end | The index of the first element to discard. Must be less than the size of the buffers. |
|
nodiscard |
Slices the array to keep only the elements between the given start
and end
.
A view of the Array is returned. The data is not modified, only the ArrowArray.offset and ArrowArray.length are updated. If end
is greater than the size of the buffers, the following elements will be invalid.
start | The index of the first element to keep. Must be less than end . |
end | The index of the first element to discard. Must be less than the size of the buffers. |
SPARROW_API void sparrow::arrow_proxy::update_buffers | ( | ) |
Refresh the buffers views.
This method should be called after modifying the buffers of the array.
|
nodiscard |
Get a non-owning view of the arrow_proxy.