43 :
std::runtime_error(message)
221 template <std::ranges::input_range R>
256 template <std::ranges::input_range R>
266 template <std::ranges::input_range R>
385 std::variant<ArrowArray*, ArrowArray> m_array;
386 std::variant<ArrowSchema*, ArrowSchema> m_schema;
387 std::vector<sparrow::buffer_view<uint8_t>> m_buffers;
388 std::vector<arrow_proxy> m_children;
389 std::unique_ptr<arrow_proxy> m_dictionary;
398 template <
typename AA,
typename AS>
399 requires std::same_as<std::remove_pointer_t<std::remove_cvref_t<AA>>,
ArrowArray>
400 && std::same_as<std::remove_pointer_t<std::remove_cvref_t<AS>>,
ArrowSchema>
403 [[nodiscard]]
bool empty()
const;
404 SPARROW_API void resize_children(
size_t children_count);
408 void update_children();
409 void update_dictionary();
410 void update_null_count();
413 [[nodiscard]]
bool array_created_with_sparrow()
const;
414 [[nodiscard]]
bool schema_created_with_sparrow()
const;
416 void validate_array_and_schema()
const;
418 [[nodiscard]]
bool is_arrow_array_valid()
const;
419 [[nodiscard]]
bool is_arrow_schema_valid()
const;
420 [[nodiscard]]
bool is_proxy_valid()
const;
422 [[nodiscard]]
size_t get_null_count()
const;
427 template <std::ranges::input_range R>
428 requires std::same_as<std::ranges::range_value_t<R>, arrow_array_and_schema_pointers>
433 throw arrow_proxy_exception(
"Cannot set n_buffers on non-sparrow created ArrowArray or ArrowSchema");
437 const size_t original_children_count =
n_children();
438 const size_t new_children_count = original_children_count + add_children_count;
440 resize_children(new_children_count);
441 for (
size_t i = 0; i < add_children_count; ++i)
444 i + original_children_count,
451 template <std::ranges::input_range R>
457 throw arrow_proxy_exception(
"Cannot set n_buffers on non-sparrow created ArrowArray or ArrowSchema");
460 const size_t add_children_count = std::ranges::size(arrow_arrays_and_schemas);
461 const size_t original_children_count =
n_children();
462 const size_t new_children_count = original_children_count + add_children_count;
464 resize_children(new_children_count);
465 for (
size_t i = 0; i < add_children_count; ++i)
468 i + original_children_count,
469 std::move(arrow_arrays_and_schemas[i].
array),
470 std::move(arrow_arrays_and_schemas[i].
schema)
475 template <std::ranges::input_range R>
483 auto bitmap = get_non_owning_dynamic_bitset();
484 const auto it = bitmap.insert(
sparrow::next(bitmap.cbegin(), index), range.begin(), range.end());
485 return static_cast<size_t>(std::distance(bitmap.begin(), it));
489#if defined(__cpp_lib_format)
492struct std::formatter<
sparrow::buffer_view<uint8_t>>
496 char delimiter =
' ';
497 static constexpr std::string_view opening =
"[";
498 static constexpr std::string_view closing =
"]";
502 constexpr auto parse(std::format_parse_context& ctx)
504 auto it = ctx.begin();
505 auto end = ctx.end();
508 if (it != end && *it !=
'}')
513 if (it != end && *it !=
'}')
515 throw std::format_error(
"Invalid format specifier for range");
521 auto format(
const sparrow::buffer_view<uint8_t>& range, std::format_context& ctx)
const
523 auto out = ctx.out();
526 out = std::ranges::copy(opening, out).out;
530 for (
const auto& elem : range)
536 out = std::format_to(out,
"{}", elem);
541 out = std::ranges::copy(closing, out).out;
549 os << std::format(
"{}", value);
554struct std::formatter<
sparrow::arrow_proxy>
556 constexpr auto parse(std::format_parse_context& ctx)
561 auto format(
const sparrow::arrow_proxy& obj, std::format_context& ctx)
const
563 std::string buffers_description_str;
564 for (
size_t i = 0; i < obj.
n_buffers(); ++i)
567 std::back_inserter(buffers_description_str),
570 obj.
buffers()[i].size() *
sizeof(uint8_t),
575 std::string children_str;
576 for (
const auto& child : obj.
children())
578 std::format_to(std::back_inserter(children_str),
"{}\n", child);
581 const std::string dictionary_str = obj.
dictionary() ? std::format(
"{}", *obj.
dictionary()) :
"nullptr";
583 return std::format_to(
585 "arrow_proxy\n- format: {}\n- name; {}\n- metadata: {}\n- data_type: {}\n- null_count:{}\n- length: {}\n- offset: {}\n- n_buffers: {}\n- buffers:\n{}\n- n_children: {}\n-children: {}\n- dictionary: {}",
587 obj.
name().value_or(
""),
594 buffers_description_str,
604 os << std::format(
"{}", value);
Dynamically typed array encapsulating an Arrow layout.
Private data for ArrowArray.
Exception thrown by the arrow_proxy class.
arrow_proxy_exception(const std::string &message)
Proxy class over ArrowArray and ArrowSchema.
SPARROW_API void push_back_bitmap(bool value)
Push a value at the end of the bitmap buffer.
SPARROW_API ArrowArray extract_array()
SPARROW_API const ArrowSchema & schema() const
SPARROW_API void add_child(ArrowArray *array, ArrowSchema *schema)
Add a child without taking its ownership.
SPARROW_API arrow_proxy & operator=(const arrow_proxy &)
SPARROW_API arrow_proxy(ArrowArray &&array, ArrowSchema *schema)
Constructs an arrow_proxy which takes the ownership of the ArrowArray and uses the provided ArrowSche...
SPARROW_API void set_metadata(std::optional< std::string_view > metadata)
Set the metadata of the ArrowSchema.
SPARROW_API void set_buffer(size_t index, buffer< uint8_t > &&buffer)
Set the buffer at the given index.
SPARROW_API ArrowSchema & schema()
SPARROW_API const std::unique_ptr< arrow_proxy > & dictionary() const
void SPARROW_API set_data_type(enum data_type data_type)
Set the data type.
SPARROW_API bool owns_schema() const
SPARROW_API std::vector< arrow_proxy > & children()
SPARROW_API void add_child(ArrowArray &&array, ArrowSchema &&schema)
Add a child and takes its ownership.
SPARROW_API bool is_created_with_sparrow() const
SPARROW_API size_t offset() const
SPARROW_API std::vector< ArrowFlag > flags() const
SPARROW_API arrow_proxy & operator=(arrow_proxy &&)
SPARROW_API arrow_proxy(ArrowArray &&array, ArrowSchema &&schema)
Constructs an arrow_proxy which takes the ownership of the ArrowArray and ArrowSchema.
SPARROW_API void set_name(std::optional< std::string_view > name)
Set the name of the ArrowSchema.
SPARROW_API const std::string_view format() const
SPARROW_API void set_flags(const std::vector< ArrowFlag > &flags)
Set the flags of the ArrowSchema.
SPARROW_API size_t length() const
SPARROW_API void set_n_buffers(size_t n_buffers)
Set the number of buffers of the ArrowArray.
SPARROW_API void set_buffer(size_t index, const buffer_view< uint8_t > &buffer)
Set the buffer at the given index.
SPARROW_API void set_child(size_t index, ArrowArray *array, ArrowSchema *schema)
Set the child at the given index.
SPARROW_API void pop_back_bitmap()
Pop a value at the end of the bitmap buffer.
SPARROW_API void set_null_count(int64_t null_count)
Set the null count of the ArrowArray.
SPARROW_API const std::vector< arrow_proxy > & children() const
SPARROW_API std::vector< sparrow::buffer_view< uint8_t > > & buffers()
SPARROW_API enum data_type data_type() const
SPARROW_API arrow_proxy(const arrow_proxy &)
SPARROW_API arrow_proxy(arrow_proxy &&)
SPARROW_API const ArrowArray & array() const
SPARROW_API void set_child(size_t index, ArrowArray &&array, ArrowSchema &&schema)
Set the child at the given index.
SPARROW_API std::optional< std::string_view > metadata() const
SPARROW_API size_t n_children() const
SPARROW_API ArrowArray & array()
SPARROW_API ~arrow_proxy()
SPARROW_API std::optional< std::string_view > name() const
SPARROW_API arrow_array_private_data * get_array_private_data()
SPARROW_API bool owns_array() const
SPARROW_API void set_length(size_t length)
Set the length of the ArrowArray.
SPARROW_API void set_offset(size_t offset)
Set the offset of the ArrowArray.
SPARROW_API void * private_data() const
SPARROW_API void set_format(const std::string_view format)
Set the format according to the Arrow format specification: https://arrow.apache.org/docs/dev/format/...
SPARROW_API arrow_proxy view() const
get a non-owning view of the arrow_proxy.
SPARROW_API arrow_proxy slice_view(size_t start, size_t end) const
Slices the array to keep only the elements between the given start and end.
SPARROW_API void pop_children(size_t n)
Pop n children.
SPARROW_API std::unique_ptr< arrow_proxy > & dictionary()
SPARROW_API void resize_bitmap(size_t new_size, bool value=true)
Resize the bitmap buffer of the ArrowArray.
SPARROW_API size_t n_buffers() const
SPARROW_API size_t erase_bitmap(size_t index, size_t count=1)
Erase several elements in the bitmap buffer at the given index.
SPARROW_API int64_t null_count() const
SPARROW_API arrow_schema_private_data * get_schema_private_data()
SPARROW_API arrow_proxy slice(size_t start, size_t end) const
Slices the array to keep only the elements between the given start and end.
void add_children(const R &arrow_array_and_schema_pointers)
Add children without taking their ownership.
SPARROW_API void set_dictionary(ArrowArray *array, ArrowSchema *schema)
Set the dictionary.
SPARROW_API const std::vector< sparrow::buffer_view< uint8_t > > & buffers() const
SPARROW_API size_t insert_bitmap(size_t index, bool value, size_t count=1)
Insert elements of the same value in the bitmap buffer at the given index.
SPARROW_API ArrowSchema extract_schema()
SPARROW_API void update_buffers()
Refresh the buffers views.
SPARROW_API arrow_proxy(ArrowArray *array, ArrowSchema *schema)
Constructs an arrow_proxy which uses the provided ArrowArray and ArrowSchema.
Private data for ArrowSchema.
Object that owns a piece of contiguous memory.
#define SPARROW_ASSERT_TRUE(expr__)
SPARROW_API void swap(ArrowArray &lhs, ArrowArray &rhs)
Swaps the contents of the two ArrowArray objects.
constexpr bool has_bitmap(data_type dt)
constexpr InputIt next(InputIt it, Distance n)
std::ostream & operator<<(std::ostream &stream, T n)