18#include <initializer_list>
22#include <unordered_map>
29#if defined(__cpp_lib_format)
86 using name_range = std::ranges::ref_view<const std::vector<name_type>>;
121 std::ranges::input_range NR,
122 std::ranges::input_range CR,
125 std::convertible_to<std::ranges::range_value_t<NR>, std::string>
126 and std::same_as<std::ranges::range_value_t<CR>,
array>
131 std::optional<std::string_view>
name = std::nullopt,
132 std::optional<METADATA_RANGE> metadata = std::nullopt
156 template <std::ranges::input_range CR, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
157 requires std::same_as<std::ranges::range_value_t<CR>,
array>
160 std::optional<std::string_view>
name = std::nullopt,
161 std::optional<METADATA_RANGE> metadata = std::nullopt
410 | std::views::transform(
519 template <
class AA,
class AS>
520 void init(AA* arr, AS* sch);
535 template <
class U,
class R>
536 [[nodiscard]] std::vector<U> to_vector(R&& range)
const;
564 using metadata_type = std::vector<metadata_pair>;
565 using array_storage_type = std::variant<array, std::reference_wrapper<array>>;
567 std::optional<name_type> m_name = std::nullopt;
568 std::optional<metadata_type> m_metadata = std::nullopt;
569 std::vector<name_type> m_name_list;
570 std::vector<array_storage_type> m_array_list;
572 mutable std::unordered_map<name_type, array*> m_array_map;
574 mutable bool m_dirty_map =
true;
580 SPARROW_API static const array* get_array_ptr(
const array_storage_type& storage);
608 template <std::ranges::input_range NR, std::ranges::input_range CR, input_metadata_container METADATA_RANGE>
609 requires(std::convertible_to<std::ranges::range_value_t<NR>, std::string>
610 and std::same_as<std::ranges::range_value_t<CR>,
array>)
614 std::optional<std::string_view>
name,
615 std::optional<METADATA_RANGE> metadata
618 , m_metadata(std::move(metadata))
619 , m_name_list(to_vector<name_type>(std::forward<NR>(
names)))
621 m_array_list.reserve(std::ranges::size(
columns));
624 if constexpr (std::is_lvalue_reference_v<CR>)
626 m_array_list.emplace_back(col);
630 m_array_list.emplace_back(std::move(col));
633 update_array_map_cache();
638 inline std::vector<record_batch::name_type>
get_names(
const std::vector<array>& array_list)
640 const auto names = array_list
641 | std::views::transform(
644 return ar.
name().value();
647 return {names.begin(), names.end()};
651 template <std::ranges::input_range CR, input_metadata_container METADATA_RANGE>
652 requires std::same_as<std::ranges::range_value_t<CR>, array>
655 , m_metadata(
std::move(metadata))
658 m_array_list.reserve(std::ranges::size(
columns));
661 if constexpr (std::is_lvalue_reference_v<CR>)
663 m_array_list.emplace_back(col);
667 m_array_list.emplace_back(std::move(col));
670 update_array_map_cache();
674 void record_batch::init(
ArrowArray&& arr, AS* sch)
676 partial_init_from_schema(*sch);
677 std::size_t column_size = m_name_list.capacity();
678 for (std::size_t i = 0; i < column_size; ++i)
680 m_name_list.emplace_back(sch->children[i]->name);
681 m_array_list.emplace_back(
array(std::move(*(arr.children[i])), sch->children[i]));
685 update_array_map_cache();
688 template <
class AA,
class AS>
689 void record_batch::init(AA* arr, AS* sch)
691 partial_init_from_schema(*sch);
692 std::size_t column_size = m_name_list.capacity();
693 for (std::size_t i = 0; i < column_size; ++i)
695 m_name_list.emplace_back(sch->children[i]->name);
696 m_array_list.emplace_back(array(arr->children[i], sch->children[i]));
698 update_array_map_cache();
701 template <
class U,
class R>
702 std::vector<U> record_batch::to_vector(R&& range)
const
705 if constexpr (std::ranges::sized_range<
decltype(range)>)
707 v.reserve(std::ranges::size(range));
709 if constexpr (std::is_lvalue_reference_v<R>)
711 std::ranges::copy(range, std::back_inserter(v));
715 std::ranges::move(range, std::back_inserter(v));
721#if defined(__cpp_lib_format)
723struct std::formatter<
sparrow::record_batch>
725 constexpr auto parse(std::format_parse_context& ctx)
730 auto format(
const sparrow::record_batch& rb, std::format_context& ctx)
const
732 auto columns_view = rb.
columns();
733 std::vector<std::vector<sparrow::array_traits::const_reference>> values_by_columns;
736 for (
const auto& ar : columns_view)
738 std::vector<sparrow::array_traits::const_reference> column_values;
739 column_values.reserve(rb.
nb_rows());
740 for (std::size_t i = 0; i < rb.
nb_rows(); ++i)
742 column_values.push_back(ar[i]);
744 values_by_columns.push_back(std::move(column_values));
756 os << std::format(
"{}", value);
Dynamically typed array encapsulating an Arrow layout.
SPARROW_API std::optional< std::string_view > name() const
SPARROW_API record_batch(const record_batch &other)
Copy constructor.
SPARROW_API record_batch(ArrowArray &&array, ArrowSchema &&schema)
Constructs a record_batch from the given Arrow C structures, whose ownership is transferred to the re...
SPARROW_API const std::optional< name_type > & name() const
Gets the name of the record batch.
SPARROW_API void add_column_reference(name_type name, array &column)
Adds a column by reference with the specified name.
std::initializer_list< std::pair< name_type, array > > initializer_type
SPARROW_API void add_column(array column)
Adds a new column using the array's internal name.
record_batch()=default
Default constructor creating an empty record batch.
SPARROW_API const array & get_column(const name_type &key) const
Gets the column with the specified name.
SPARROW_API record_batch(struct_array &&ar)
Constructs a record_batch from a struct_array.
record_batch(record_batch &&) noexcept=default
SPARROW_API name_range names() const
Gets a range view of the column names.
SPARROW_API bool contains_column(const name_type &key) const
Checks if the record batch contains a column with the specified name.
SPARROW_API struct_array extract_struct_array()
Moves the internal columns into a struct_array and empties the record batch.
SPARROW_API record_batch(ArrowArray &&array, ArrowSchema *schema)
Constructs an record_batch from the given Arrow C structures.
SPARROW_API const name_type & get_column_name(size_type index) const
Gets the name of the column at the specified index.
SPARROW_API record_batch(ArrowArray *array, ArrowSchema *schema)
Constructs an record_batch from the given Arrow C structures.
SPARROW_API record_batch(ArrowArray &&array, const ArrowSchema *schema)
Constructs an record_batch from the given Arrow C structures.
SPARROW_API void add_column(name_type name, array column)
Adds a new column to the record batch with the specified name.
SPARROW_API size_type nb_rows() const
Gets the number of rows in the record batch.
SPARROW_API record_batch & operator=(const record_batch &other)
Copy assignment operator.
SPARROW_API record_batch(const ArrowArray *array, const ArrowSchema *schema)
Constructs an record_batch from the given Arrow C structures.
SPARROW_API size_type nb_columns() const
Gets the number of columns in the record batch.
SPARROW_API void add_column_reference(array &column)
Adds a column by reference using the array's internal name.
SPARROW_API record_batch(initializer_type init)
Constructs a record_batch from initializer list of name-array pairs.
auto columns() const
Gets a range view of the columns.
std::ranges::ref_view< const std::vector< name_type > > name_range
std::vector< record_batch::name_type > get_names(const std::vector< array > &array_list)
ArrowArray make_empty_arrow_array()
SPARROW_API bool operator==(const array &lhs, const array &rhs)
Compares the content of two arrays.
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
std::ostream & operator<<(std::ostream &os, const nullval_t &)
constexpr void to_table_with_columns(OutputIt out, const Headers &headers, const Columns &columns)