sparrow ..
Loading...
Searching...
No Matches
sparrow::record_batch Class Reference

#include <record_batch.hpp>

Public Types

using name_type = std::string
 
using size_type = std::size_t
 
using initializer_type = std::initializer_list<std::pair<name_type, array>>
 
using name_range = std::ranges::ref_view<const std::vector<name_type>>
 
using column_range = std::ranges::ref_view<const std::vector<array>>
 

Public Member Functions

template<std::ranges::input_range NR, std::ranges::input_range CR, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
requires ( std::convertible_to<std::ranges::range_value_t<NR>, std::string> and std::same_as<std::ranges::range_value_t<CR>, array> )
constexpr record_batch (NR &&names, CR &&columns, std::optional< std::string_view > name=std::nullopt, std::optional< METADATA_RANGE > metadata=std::nullopt)
 Constructs a record_batch from separate name and array ranges.
 
template<std::ranges::input_range CR, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
requires std::same_as<std::ranges::range_value_t<CR>, array>
 record_batch (CR &&columns, std::optional< std::string_view > name=std::nullopt, std::optional< METADATA_RANGE > metadata=std::nullopt)
 Constructs a record_batch from arrays with existing names.
 
SPARROW_API record_batch (initializer_type init)
 Constructs a record_batch from initializer list of name-array pairs.
 
SPARROW_API record_batch (ArrowArray &&array, ArrowSchema &&schema)
 Constructs a record_batch from the given Arrow C structures, whose ownership is transferred to the record_batch.
 
SPARROW_API record_batch (ArrowArray &&array, ArrowSchema *schema)
 Constructs an record_batch from the given Arrow C structures.
 
SPARROW_API record_batch (ArrowArray &&array, const ArrowSchema *schema)
 Constructs an record_batch from the given Arrow C structures.
 
SPARROW_API record_batch (ArrowArray *array, ArrowSchema *schema)
 Constructs an record_batch from the given Arrow C structures.
 
SPARROW_API record_batch (const ArrowArray *array, const ArrowSchema *schema)
 Constructs an record_batch from the given Arrow C structures.
 
SPARROW_API record_batch (struct_array &&ar)
 Constructs a record_batch from a struct_array.
 
SPARROW_API record_batch (const record_batch &other)
 Copy constructor.
 
SPARROW_API record_batchoperator= (const record_batch &other)
 Copy assignment operator.
 
 record_batch (record_batch &&)=default
 
record_batchoperator= (record_batch &&)=default
 
SPARROW_API size_type nb_columns () const
 Gets the number of columns in the record batch.
 
SPARROW_API size_type nb_rows () const
 Gets the number of rows in the record batch.
 
SPARROW_API bool contains_column (const name_type &key) const
 Checks if the record batch contains a column with the specified name.
 
SPARROW_API const name_typeget_column_name (size_type index) const
 Gets the name of the column at the specified index.
 
SPARROW_API const arrayget_column (const name_type &key) const
 Gets the column with the specified name.
 
SPARROW_API arrayget_column (const name_type &key)
 Gets the column with the specified name.
 
SPARROW_API const arrayget_column (size_type index) const
 Gets the column at the specified index.
 
SPARROW_API arrayget_column (size_type index)
 Gets the column at the specified index.
 
SPARROW_API const std::optional< name_type > & name () const
 Gets the name of the record batch.
 
SPARROW_API name_range names () const
 Gets a range view of the column names.
 
SPARROW_API column_range columns () const
 Gets a range view of the columns.
 
SPARROW_API struct_array extract_struct_array ()
 Moves the internal columns into a struct_array and empties the record batch.
 
SPARROW_API void add_column (name_type name, array column)
 Adds a new column to the record batch with the specified name.
 
SPARROW_API void add_column (array column)
 Adds a new column using the array's internal name.
 

Detailed Description

Member Typedef Documentation

◆ column_range

using sparrow::record_batch::column_range = std::ranges::ref_view<const std::vector<array>>

◆ initializer_type

◆ name_range

using sparrow::record_batch::name_range = std::ranges::ref_view<const std::vector<name_type>>

◆ name_type

◆ size_type

using sparrow::record_batch::size_type = std::size_t

Definition at line 74 of file record_batch.hpp.

Constructor & Destructor Documentation

◆ record_batch() [1/11]

template<std::ranges::input_range NR, std::ranges::input_range CR, input_metadata_container METADATA_RANGE>
requires ( std::convertible_to<std::ranges::range_value_t<NR>, std::string> and std::same_as<std::ranges::range_value_t<CR>, array> )
sparrow::record_batch::record_batch ( NR && names,
CR && columns,
std::optional< std::string_view > name = std::nullopt,
std::optional< METADATA_RANGE > metadata = std::nullopt )
constexpr

Constructs a record_batch from separate name and array ranges.

Each array is mapped to the name at the corresponding position in the names range. The ranges must have the same size, and all arrays must have equal length.

Template Parameters
NRInput range type for names (convertible to std::string)
CRInput range type for arrays
METADATA_RANGEType of metadata container (default: std::vector<metadata_pair>)
Parameters
namesInput range of column names (must be unique)
columnsInput range of arrays (must have equal lengths)
nameOptional name for the record batch itself
metadataOptional metadata for the record batch
Precondition
std::ranges::size(names) == std::ranges::size(columns)
All names in the range must be unique
All arrays must have the same length
Names must be convertible to std::string
Postcondition
Record batch contains mapping from names to arrays
Internal consistency is maintained
Array map cache is properly initialized
Exceptions
std::invalid_argumentif preconditions are violated
Examples
/home/runner/work/sparrow/sparrow/include/sparrow/record_batch.hpp.

Definition at line 531 of file record_batch.hpp.

Here is the call graph for this function:
Here is the caller graph for this function:

◆ record_batch() [2/11]

template<std::ranges::input_range CR, input_metadata_container METADATA_RANGE>
requires std::same_as<std::ranges::range_value_t<CR>, array>
sparrow::record_batch::record_batch ( CR && columns,
std::optional< std::string_view > name = std::nullopt,
std::optional< METADATA_RANGE > metadata = std::nullopt )

Constructs a record_batch from arrays with existing names.

Each array must have a non-empty name. The array names are extracted and used as column names in the record batch.

Template Parameters
CRInput range type for arrays
METADATA_RANGEType of metadata container (default: std::vector<metadata_pair>)
Parameters
columnsInput range of named arrays
nameOptional name for the record batch itself
metadataOptional metadata for the record batch
Precondition
All arrays must have non-empty names (arr.name().has_value() && !arr.name()->empty())
All array names must be unique within the range
All arrays must have the same length
Postcondition
Record batch contains arrays mapped to their internal names
Internal consistency is maintained
Exceptions
std::invalid_argumentif any array lacks a name or names are not unique
std::invalid_argumentif arrays have different lengths

Definition at line 562 of file record_batch.hpp.

Here is the call graph for this function:

◆ record_batch() [3/11]

SPARROW_API sparrow::record_batch::record_batch ( initializer_type init)

Constructs a record_batch from initializer list of name-array pairs.

Parameters
initInitializer list of std::pair<name_type, array>
Precondition
All names in the initializer list must be unique
All arrays must have the same length
Postcondition
Record batch contains the specified name-array mappings
Internal consistency is maintained
Exceptions
std::invalid_argumentif names are not unique or arrays have different lengths

◆ record_batch() [4/11]

SPARROW_API sparrow::record_batch::record_batch ( ArrowArray && array,
ArrowSchema && schema )

Constructs a record_batch from the given Arrow C structures, whose ownership is transferred to the record_batch.

The user should not use array nor schema after calling this constructor.

Parameters
arrayThe ArrowArray structure to transfer into the record_batch.
schemaThe ArrowSchema structure to transfer into the record_batch.

◆ record_batch() [5/11]

SPARROW_API sparrow::record_batch::record_batch ( ArrowArray && array,
ArrowSchema * schema )

Constructs an record_batch from the given Arrow C structures.

The record_batch takes the ownership of the ArrowArray only. The user should not use array after calling this constructor. schema can still be used normally.

Parameters
arrayThe ArrowArray structure to transfer into the record_batch.
schemaThe ArrowSchema to reference in the record_batch.

◆ record_batch() [6/11]

SPARROW_API sparrow::record_batch::record_batch ( ArrowArray && array,
const ArrowSchema * schema )

Constructs an record_batch from the given Arrow C structures.

The record_batch takes the ownership of the ArrowArray only. The user should not use array after calling this constructor. schema can still be used normally.

Parameters
arrayThe ArrowArray structure to transfer into the record_batch.
schemaThe const ArrowSchema to reference in the record_batch.

◆ record_batch() [7/11]

SPARROW_API sparrow::record_batch::record_batch ( ArrowArray * array,
ArrowSchema * schema )

Constructs an record_batch from the given Arrow C structures.

Both structures are referenced from the record_batch and can still be used normally after calling this constructor.

Parameters
arrayThe ArrowArray structure to reference in the record_batch.
schemaThe ArrowSchema to reference in the record_batch.

◆ record_batch() [8/11]

SPARROW_API sparrow::record_batch::record_batch ( const ArrowArray * array,
const ArrowSchema * schema )

Constructs an record_batch from the given Arrow C structures.

Both structures are referenced from the record_batch and can still be used normally after calling this constructor.

Parameters
arrayThe const ArrowArray structure to reference in the record_batch.
schemaThe const ArrowSchema to reference in the record_batch.

◆ record_batch() [9/11]

SPARROW_API sparrow::record_batch::record_batch ( struct_array && ar)

Constructs a record_batch from a struct_array.

The struct array's fields become the columns of the record batch, with field names becoming column names.

Parameters
arStruct array to convert (must own its internal Arrow structures)
Precondition
ar must be a valid struct array with owned Arrow structures
ar must have at least one field
All field names must be unique (guaranteed by struct_array invariants)
Postcondition
Record batch contains columns corresponding to struct fields
Struct array is moved and becomes invalid
Internal consistency is maintained

◆ record_batch() [10/11]

SPARROW_API sparrow::record_batch::record_batch ( const record_batch & other)

Copy constructor.

Parameters
otherThe record_batch to copy from
Precondition
other must be in a valid state
Postcondition
This record batch is an independent copy of other
All arrays are deep-copied
Internal consistency is maintained
Here is the call graph for this function:

◆ record_batch() [11/11]

sparrow::record_batch::record_batch ( record_batch && )
default
Here is the call graph for this function:

Member Function Documentation

◆ add_column() [1/2]

SPARROW_API void sparrow::record_batch::add_column ( array column)

Adds a new column using the array's internal name.

Parameters
columnThe array to add (must have a non-empty name)
Precondition
column must have a non-empty name (column.name().has_value() && !column.name()->empty())
column.name() must not already exist in the record batch
If record batch is not empty, column.size() must equal nb_rows()
Postcondition
Record batch contains the new column mapped to column.name()
nb_columns() increases by 1
If this was the first column, nb_rows() equals column.size()
Internal consistency is maintained
Exceptions
std::invalid_argumentif column lacks name, name exists, or size is incompatible

◆ add_column() [2/2]

SPARROW_API void sparrow::record_batch::add_column ( name_type name,
array column )

Adds a new column to the record batch with the specified name.

Parameters
nameThe name for the new column (must be unique)
columnThe array to add as a column
Precondition
name must not already exist in the record batch
If record batch is not empty, column.size() must equal nb_rows()
name must not be empty
Postcondition
Record batch contains the new column mapped to name
nb_columns() increases by 1
If this was the first column, nb_rows() equals column.size()
Internal consistency is maintained
Array map cache is updated
Exceptions
std::invalid_argumentif name already exists or column size is incompatible
Examples
/home/runner/work/sparrow/sparrow/include/sparrow/record_batch.hpp.
Here is the call graph for this function:

◆ columns()

SPARROW_API column_range sparrow::record_batch::columns ( ) const

Gets a range view of the columns.

Returns
Range view over the arrays in insertion order
Postcondition
Range size equals nb_columns()
Range elements correspond to names() in the same order
Range remains valid while record batch exists and is not modified
Examples
/home/runner/work/sparrow/sparrow/include/sparrow/record_batch.hpp.
Here is the caller graph for this function:

◆ contains_column()

SPARROW_API bool sparrow::record_batch::contains_column ( const name_type & key) const

Checks if the record batch contains a column with the specified name.

Parameters
keyThe name of the column to search for
Returns
true if the column exists, false otherwise
Postcondition
Return value is consistent with get_column(key) success/failure
O(1) average time complexity due to internal hash map
Examples
/home/runner/work/sparrow/sparrow/include/sparrow/record_batch.hpp.

◆ extract_struct_array()

SPARROW_API struct_array sparrow::record_batch::extract_struct_array ( )

Moves the internal columns into a struct_array and empties the record batch.

After this operation, the record batch becomes empty and should not be used until new data is added.

Returns
struct_array containing the moved columns
Precondition
Record batch must not be empty (nb_columns() > 0)
Postcondition
Record batch becomes empty (nb_columns() == 0, nb_rows() == 0)
Returned struct_array contains all previous columns as fields
Column data is moved, not copied
Internal state is reset to empty but valid state
Examples
/home/runner/work/sparrow/sparrow/include/sparrow/record_batch.hpp.

◆ get_column() [1/4]

SPARROW_API array & sparrow::record_batch::get_column ( const name_type & key)

Gets the column with the specified name.

Parameters
keyThe name of the column to retrieve
Returns
Reference to the array
Precondition
Column with the specified name must exist
Postcondition
Returns valid reference to the column array
Returned reference remains valid while record batch exists
Exceptions
std::out_of_rangeif column with key does not exist

◆ get_column() [2/4]

SPARROW_API const array & sparrow::record_batch::get_column ( const name_type & key) const

Gets the column with the specified name.

Parameters
keyThe name of the column to retrieve
Returns
Const reference to the array
Precondition
Column with the specified name must exist
Postcondition
Returns valid const reference to the column array
Returned reference remains valid while record batch exists
Exceptions
std::out_of_rangeif column with key does not exist
Examples
/home/runner/work/sparrow/sparrow/include/sparrow/record_batch.hpp.

◆ get_column() [3/4]

SPARROW_API array & sparrow::record_batch::get_column ( size_type index)

Gets the column at the specified index.

Parameters
indexThe index of the column (0-based)
Returns
Reference to the array
Precondition
index must be < nb_columns()
Postcondition
Returns valid reference to the column array
Returned reference remains valid while record batch exists
Exceptions
std::out_of_rangeif index >= nb_columns()

◆ get_column() [4/4]

SPARROW_API const array & sparrow::record_batch::get_column ( size_type index) const

Gets the column at the specified index.

Parameters
indexThe index of the column (0-based)
Returns
Const reference to the array
Precondition
index must be < nb_columns()
Postcondition
Returns valid const reference to the column array
Returned reference remains valid while record batch exists
Exceptions
std::out_of_rangeif index >= nb_columns()

◆ get_column_name()

SPARROW_API const name_type & sparrow::record_batch::get_column_name ( size_type index) const

Gets the name of the column at the specified index.

Parameters
indexThe index of the column (0-based)
Returns
Const reference to the column name
Precondition
index must be < nb_columns()
Postcondition
Returns valid reference to the column name
Returned reference remains valid while record batch exists
Exceptions
std::out_of_rangeif index >= nb_columns()
Examples
/home/runner/work/sparrow/sparrow/include/sparrow/record_batch.hpp.

◆ name()

SPARROW_API const std::optional< name_type > & sparrow::record_batch::name ( ) const

Gets the name of the record batch.

Returns
Optional name of the record batch
Postcondition
Returns the name specified during construction (if any)
Examples
/home/runner/work/sparrow/sparrow/include/sparrow/record_batch.hpp.
Here is the caller graph for this function:

◆ names()

SPARROW_API name_range sparrow::record_batch::names ( ) const

Gets a range view of the column names.

Returns
Range view over the column names in insertion order
Postcondition
Range size equals nb_columns()
Range elements are in the same order as columns
Range remains valid while record batch exists and is not modified
Examples
/home/runner/work/sparrow/sparrow/include/sparrow/record_batch.hpp.
Here is the caller graph for this function:

◆ nb_columns()

SPARROW_API size_type sparrow::record_batch::nb_columns ( ) const

Gets the number of columns in the record batch.

Returns
Number of columns (arrays) in the record batch
Postcondition
Returns non-negative value
Return value equals the number of unique column names
Examples
/home/runner/work/sparrow/sparrow/include/sparrow/record_batch.hpp.

◆ nb_rows()

SPARROW_API size_type sparrow::record_batch::nb_rows ( ) const

Gets the number of rows in the record batch.

Returns
Number of rows (length of each array) in the record batch
Postcondition
Returns non-negative value
If nb_columns() > 0, all arrays have this length
If nb_columns() == 0, returns 0
Examples
/home/runner/work/sparrow/sparrow/include/sparrow/record_batch.hpp.

◆ operator=() [1/2]

SPARROW_API record_batch & sparrow::record_batch::operator= ( const record_batch & other)

Copy assignment operator.

Parameters
otherThe record_batch to copy from
Returns
Reference to this record_batch
Precondition
other must be in a valid state
Postcondition
This record batch is an independent copy of other
Previous data is properly released
All arrays are deep-copied
Internal consistency is maintained
Examples
/home/runner/work/sparrow/sparrow/include/sparrow/record_batch.hpp.
Here is the call graph for this function:

◆ operator=() [2/2]

record_batch & sparrow::record_batch::operator= ( record_batch && )
default
Here is the call graph for this function:

The documentation for this class was generated from the following file: