41 template <
class DERIVED>
53 requires std::same_as<T, list_array_impl<false>> || std::same_as<T, list_array_impl<true>>
60 requires std::same_as<T, list_view_array_impl<false>> || std::same_as<T, list_view_array_impl<true>>
63 return "list_view_array";
112 return "fixed_sized_list_array";
177 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
181 std::optional<std::string_view> name,
182 std::optional<METADATA_RANGE> metadata,
187 std::optional<std::unordered_set<ArrowFlag>>
207 std::int64_t null_count,
218 std::move(arr_buffs),
286 template <
class DERIVED>
401 using list_size_type =
inner_types::list_size_type;
405 template <
std::input_iterator InputIt>
412 [[nodiscard]]
array make_flat_array();
439 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
495 template <class... ARGS>
519 template <std::ranges::range SIZES_RANGE>
550 std::optional<std::string_view> name = std::nullopt,
551 std::optional<METADATA_RANGE> metadata = std::nullopt
574 std::ranges::input_range OFFSET_BUFFER_RANGE,
576 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>,
offset_type>
579 OFFSET_BUFFER_RANGE&& list_offsets_range,
581 std::optional<std::string_view> name = std::nullopt,
582 std::optional<METADATA_RANGE> metadata = std::nullopt
585 offset_buffer_type list_offsets{std::forward<OFFSET_BUFFER_RANGE>(list_offsets_range)};
586 return list_array_impl<BIG>::create_proxy(
587 std::move(flat_values),
588 std::move(list_offsets),
589 std::forward<VB>(validity_input),
590 std::forward<std::optional<std::string_view>>(name),
591 std::forward<std::optional<METADATA_RANGE>>(metadata)
597 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
598 [[nodiscard]]
static arrow_proxy create_proxy(
600 offset_buffer_type&& list_offsets,
601 bool nullable =
true,
602 std::optional<std::string_view> name = std::nullopt,
603 std::optional<METADATA_RANGE> metadata = std::nullopt
608 std::ranges::input_range OFFSET_BUFFER_RANGE,
609 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
610 requires std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
611 [[nodiscard]]
static arrow_proxy create_proxy(
613 OFFSET_BUFFER_RANGE&& list_offsets_range,
614 bool nullable =
true,
615 std::optional<std::string_view> name = std::nullopt,
616 std::optional<METADATA_RANGE> metadata = std::nullopt
619 offset_buffer_type list_offsets{std::forward<OFFSET_BUFFER_RANGE>(list_offsets_range)};
620 return list_array_impl<BIG>::create_proxy(
621 std::move(flat_values),
622 std::move(list_offsets),
624 std::forward<std::optional<std::string_view>>(name),
625 std::forward<std::optional<METADATA_RANGE>>(metadata)
629 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
630 [[nodiscard]]
constexpr std::pair<offset_type, offset_type> offset_range(
size_type i)
const;
632 [[nodiscard]]
constexpr offset_type* make_list_offsets();
634 void replace_value(
size_type index,
const list_value& value);
663 using offset_type = std::conditional_t<BIG, const std::int64_t, const std::int32_t>;
721 template <
class... ARGS>
724 :
self_type(create_proxy(std::forward<ARGS>(args)...))
754 std::ranges::input_range OFFSET_BUFFER_RANGE,
755 std::ranges::input_range SIZE_RANGE,
759 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
760 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
764 OFFSET_BUFFER_RANGE&& list_offsets,
765 SIZE_RANGE&& list_sizes,
767 std::optional<std::string_view> name = std::nullopt,
768 std::optional<METADATA_RANGE> metadata = std::nullopt
771 return list_view_array_impl<BIG>::create_proxy(
772 std::move(flat_values),
773 offset_buffer_type(std::forward<OFFSET_BUFFER_RANGE>(list_offsets)),
774 size_buffer_type(std::forward<SIZE_RANGE>(list_sizes)),
775 std::forward<VB>(validity_input),
782 validity_bitmap_input VB = validity_bitmap,
783 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
784 [[nodiscard]]
static arrow_proxy create_proxy(
786 offset_buffer_type&& list_offsets,
787 size_buffer_type&& list_sizes,
789 std::optional<std::string_view> name = std::nullopt,
790 std::optional<METADATA_RANGE> metadata = std::nullopt
794 std::ranges::input_range OFFSET_BUFFER_RANGE,
795 std::ranges::input_range SIZE_RANGE,
796 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
798 std::convertible_to<std::ranges::range_value_t<OFFSET_BUFFER_RANGE>, offset_type>
799 && std::convertible_to<std::ranges::range_value_t<SIZE_RANGE>, list_size_type>
801 [[nodiscard]]
static arrow_proxy create_proxy(
803 OFFSET_BUFFER_RANGE&& list_offsets,
804 SIZE_RANGE&& list_sizes,
805 bool nullable =
true,
806 std::optional<std::string_view> name = std::nullopt,
807 std::optional<METADATA_RANGE> metadata = std::nullopt
810 return list_view_array_impl<BIG>::create_proxy(
811 std::move(flat_values),
812 offset_buffer_type(std::forward<OFFSET_BUFFER_RANGE>(list_offsets)),
813 size_buffer_type(std::forward<SIZE_RANGE>(list_sizes)),
820 template <input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
821 [[nodiscard]]
static arrow_proxy create_proxy(
823 offset_buffer_type&& list_offsets,
824 size_buffer_type&& list_sizes,
825 bool nullable =
true,
826 std::optional<std::string_view> name = std::nullopt,
827 std::optional<METADATA_RANGE> metadata = std::nullopt
830 static constexpr std::size_t OFFSET_BUFFER_INDEX = 1;
831 static constexpr std::size_t SIZES_BUFFER_INDEX = 2;
832 [[nodiscard]]
constexpr std::pair<offset_type, offset_type> offset_range(size_type i)
const;
834 [[nodiscard]]
constexpr offset_type* make_list_offsets()
const;
835 [[nodiscard]]
constexpr const list_size_type* make_list_sizes()
const;
837 void replace_value(size_type index,
const list_value& value);
839 constexpr value_iterator
840 insert_value(const_value_iterator pos,
const list_value& value, size_type count);
842 template <std::input_iterator InputIt>
843 requires std::convertible_to<typename std::iterator_traits<InputIt>::value_type, list_value>
844 constexpr value_iterator insert_values(const_value_iterator pos, InputIt first, InputIt last);
846 constexpr value_iterator erase_values(const_value_iterator pos, size_type count);
848 offset_type* p_list_offsets;
902 template <
class... ARGS>
905 :
self_type(create_proxy(std::forward<ARGS>(args)...))
933 std::uint64_t list_size,
936 std::optional<std::string_view> name = std::nullopt,
937 std::optional<METADATA_RANGE> metadata = std::nullopt
962 std::uint64_t list_size,
965 std::optional<std::string_view> name = std::nullopt,
966 std::optional<METADATA_RANGE> metadata = std::nullopt
981 [[nodiscard]]
static uint64_t list_size_from_format(
const std::string_view format);
993 [[nodiscard]]
constexpr std::pair<offset_type, offset_type> offset_range(size_type i)
const;
995 [[nodiscard]]
constexpr size_type flat_element_count(size_type list_count)
const;
997 void replace_value(size_type index,
const list_value& value);
999 value_iterator insert_value(const_value_iterator pos,
const list_value& value, size_type count);
1001 value_iterator erase_values(const_value_iterator pos, size_type count);
1003 uint64_t m_list_size;
1017 template <
class DERIVED>
1020 , p_flat_array(make_flat_array())
1024 template <
class DERIVED>
1027 , p_flat_array(make_flat_array())
1031 template <
class DERIVED>
1035 p_flat_array = make_flat_array();
1039 template <
class DERIVED>
1042 return &p_flat_array;
1045 template <
class DERIVED>
1048 return &p_flat_array;
1051 template <
class DERIVED>
1054 if (this->get_arrow_proxy().offset() != 0)
1056 throw std::logic_error(std::string(operation) +
" does not support sliced arrays (non-zero offset)");
1060 template <
class DERIVED>
1066 template <
class DERIVED>
1075 template <
class DERIVED>
1084 template <
class DERIVED>
1093 template <
class DERIVED>
1094 constexpr auto list_array_crtp_base<DERIVED>::value(size_type i) -> inner_reference
1096 return inner_reference(&this->derived_cast(), i);
1099 template <
class DERIVED>
1100 constexpr auto list_array_crtp_base<DERIVED>::value(size_type i)
const -> inner_const_reference
1102 const auto r = this->derived_cast().offset_range(i);
1104 return list_value{&p_flat_array,
static_cast<st
>(r.first),
static_cast<st
>(r.second)};
1107 template <
class DERIVED>
1108 array list_array_crtp_base<DERIVED>::make_flat_array()
1110 auto& child_proxy = this->get_arrow_proxy().
children()[0];
1111 return array{&child_proxy.array(), &child_proxy.schema()};
1114 template <
class DERIVED>
1118 if (value.size() == 0)
1125 flat_array.
cbegin() +
static_cast<std::ptrdiff_t
>(flat_pos),
1126 value.flat_array()->cbegin() +
static_cast<std::ptrdiff_t
>(value.begin_index()),
1127 value.flat_array()->cbegin() +
static_cast<std::ptrdiff_t
>(value.end_index()),
1132 template <
class DERIVED>
1136 if (flat_count == 0)
1142 flat_array.
cbegin() +
static_cast<std::ptrdiff_t
>(flat_begin),
1143 flat_array.
cbegin() +
static_cast<std::ptrdiff_t
>(flat_begin + flat_count)
1147 template <
class DERIVED>
1148 constexpr void list_array_crtp_base<DERIVED>::resize_values(size_type new_length,
const list_value& value)
1150 DERIVED& derived =
static_cast<DERIVED&
>(*this);
1151 const size_type n = this->size();
1154 derived.erase_values(
1155 sparrow::next(this->value_cbegin(),
static_cast<std::ptrdiff_t
>(new_length)),
1159 else if (new_length > n)
1161 derived.insert_value(this->value_cend(), value, new_length - n);
1165 template <
class DERIVED>
1166 template <std::input_iterator InputIt>
1167 requires std::convertible_to<typename std::iterator_traits<InputIt>::value_type,
list_value>
1172 DERIVED& derived =
static_cast<DERIVED&
>(*this);
1175 for (
auto it = first; it != last; ++it, ++count)
1178 derived.insert_value(cur_pos, *it, 1);
1180 return sparrow::next(this->value_begin(),
static_cast<std::ptrdiff_t
>(idx));
1188# pragma GCC diagnostic push
1189# pragma GCC diagnostic ignored "-Wcast-align"
1195 , p_list_offsets(make_list_offsets())
1200 template <std::ranges::range SIZES_RANGE>
1204 std::forward<SIZES_RANGE>(sizes)
1209 template <val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1211 array&& flat_values,
1212 offset_buffer_type&& list_offsets,
1213 VB&& validity_input,
1214 std::optional<std::string_view> name,
1215 std::optional<METADATA_RANGE> metadata
1218 const auto size = list_offsets.size() - 1;
1220 const auto null_count = vbitmap.
null_count();
1225 BIG ? std::string(
"+L") : std::string(
"+l"),
1226 std::move(flat_schema),
1232 std::vector<buffer<std::uint8_t>> arr_buffs;
1233 arr_buffs.reserve(2);
1234 arr_buffs.emplace_back(std::move(vbitmap).extract_storage());
1235 arr_buffs.emplace_back(std::move(list_offsets).extract_storage());
1238 static_cast<std::int64_t
>(size),
1239 static_cast<std::int64_t
>(null_count),
1240 std::move(arr_buffs),
1244 return arrow_proxy{std::move(arr), std::move(schema)};
1248 template <val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1250 array&& flat_values,
1251 offset_buffer_type&& list_offsets,
1253 std::optional<std::string_view> name,
1254 std::optional<METADATA_RANGE> metadata
1259 return list_array_impl<BIG>::create_proxy(
1260 std::move(flat_values),
1261 std::move(list_offsets),
1268 const auto size = list_offsets.size() - 1;
1272 BIG ? std::string(
"+L") : std::string(
"+l"),
1273 std::move(flat_schema),
1279 std::vector<buffer<std::uint8_t>> arr_buffs;
1280 arr_buffs.reserve(2);
1282 arr_buffs.emplace_back(std::move(list_offsets).extract_storage());
1285 static_cast<std::int64_t
>(size),
1287 std::move(arr_buffs),
1291 return arrow_proxy{std::move(arr), std::move(schema)};
1297 , p_list_offsets(make_list_offsets())
1309 p_list_offsets = make_list_offsets();
1317 base_type::slice_inplace(start, end);
1318 p_list_offsets = make_list_offsets();
1322 constexpr auto list_array_impl<BIG>::offset_range(size_type i)
const -> std::pair<offset_type, offset_type>
1324 return std::make_pair(p_list_offsets[i], p_list_offsets[i + 1]);
1328 constexpr auto list_array_impl<BIG>::make_list_offsets() -> offset_type*
1330 return this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].template data<offset_type>()
1331 +
static_cast<size_type
>(this->get_arrow_proxy().offset());
1336 list_array_impl<BIG>::insert_value(const_value_iterator pos,
const list_value& value, size_type count)
1339 using mutable_offset_type = std::remove_const_t<offset_type>;
1340 const auto idx =
static_cast<size_type
>(std::distance(this->value_cbegin(), pos));
1341 auto& proxy = this->get_arrow_proxy();
1343 this->throw_if_sliced_for_mutation(
"list_array_impl::insert_value");
1345 auto& offset_buffer = proxy.get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
1347 const size_type n = offset_adaptor.size() - 1;
1350 const auto flat_insert_pos =
static_cast<size_type
>(p_list_offsets[idx]);
1351 this->insert_flat_elements(flat_insert_pos, value, count);
1354 const auto value_size = value.
size();
1355 const auto count_size =
count;
1358 const auto val_sz =
static_cast<mutable_offset_type
>(value_size);
1359 const auto count_mt =
static_cast<mutable_offset_type
>(count_size);
1360 const auto max_offset = std::numeric_limits<mutable_offset_type>::max();
1362 if (val_sz != mutable_offset_type{})
1366 const mutable_offset_type total_delta = count_mt * val_sz;
1368 const mutable_offset_type existing_max = offset_adaptor[n];
1371 offset_adaptor.resize(n + 1 + count, mutable_offset_type{});
1374 for (size_type i = n; i > idx; --i)
1376 offset_adaptor[i +
count] = offset_adaptor[i] + total_delta;
1379 const auto flat_insert_offset =
static_cast<mutable_offset_type
>(flat_insert_pos);
1381 for (size_type k = 1; k <=
count; ++k)
1383 offset_adaptor[idx + k] = flat_insert_offset +
static_cast<mutable_offset_type
>(k) * val_sz;
1386 proxy.update_buffers();
1387 p_list_offsets = make_list_offsets();
1388 return sparrow::next(this->value_begin(),
static_cast<std::ptrdiff_t
>(idx));
1392 constexpr auto list_array_impl<BIG>::erase_values(const_value_iterator pos, size_type count)
1395 using mutable_offset_type = std::remove_const_t<offset_type>;
1396 const size_type idx =
static_cast<size_type
>(std::distance(this->value_cbegin(), pos));
1397 const size_type n = this->
size();
1398 auto& proxy = this->get_arrow_proxy();
1402 return sparrow::next(this->value_begin(),
static_cast<std::ptrdiff_t
>(idx));
1405 this->throw_if_sliced_for_mutation(
"list_array_impl::erase_values");
1408 const mutable_offset_type flat_erase_begin = p_list_offsets[idx];
1409 const mutable_offset_type flat_erase_end = p_list_offsets[idx +
count];
1410 const mutable_offset_type flat_erase_count_val = flat_erase_end - flat_erase_begin;
1412 this->erase_flat_elements(
1413 static_cast<size_type
>(flat_erase_begin),
1414 static_cast<size_type
>(flat_erase_count_val)
1418 auto& offset_buffer = proxy.get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
1421 offset_adaptor.begin() +
static_cast<std::ptrdiff_t
>(idx + count + 1),
1422 offset_adaptor.begin() +
static_cast<std::ptrdiff_t
>(n + 1),
1423 offset_adaptor.begin() +
static_cast<std::ptrdiff_t
>(idx + 1)
1425 auto delta_begin = offset_adaptor.begin() +
static_cast<std::ptrdiff_t
>(idx + 1);
1426 auto delta_end = offset_adaptor.begin() +
static_cast<std::ptrdiff_t
>(n + 1 -
count);
1431 [flat_erase_count_val](mutable_offset_type v)
1433 return v - flat_erase_count_val;
1436 offset_adaptor.resize(n + 1 - count);
1438 proxy.update_buffers();
1439 p_list_offsets = make_list_offsets();
1440 return sparrow::next(this->value_begin(),
static_cast<std::ptrdiff_t
>(idx));
1444 void list_array_impl<BIG>::replace_value(size_type index,
const list_value& value)
1446 using mutable_offset_type = std::remove_const_t<offset_type>;
1448 this->throw_if_sliced_for_mutation(
"list_array_impl::replace_value");
1449 const size_type new_size = value.size();
1452 const size_type n = this->
size();
1453 const auto [flat_begin, flat_end] = offset_range(index);
1454 const auto old_size =
static_cast<size_type
>(flat_end - flat_begin);
1455 const auto old_size_mt =
static_cast<mutable_offset_type
>(old_size);
1456 const auto new_size_mt =
static_cast<mutable_offset_type
>(new_size);
1459 const array* source = value.flat_array();
1460 if (source !=
nullptr && this->raw_flat_array() == source)
1462 source_owner = *source;
1463 source = &source_owner;
1466 if (old_size == new_size)
1474 const auto flat_begin_index =
static_cast<size_type
>(flat_begin);
1475 this->erase_flat_elements(flat_begin_index, old_size);
1476 this->insert_flat_elements(
1478 list_value{source, value.begin_index(), value.end_index()},
1481 auto& proxy_eq = this->get_arrow_proxy();
1482 proxy_eq.update_buffers();
1483 p_list_offsets = make_list_offsets();
1487 this->erase_flat_elements(
static_cast<size_type
>(flat_begin), old_size);
1491 this->insert_flat_elements(
1492 static_cast<size_type
>(flat_begin),
1493 list_value{source, value.begin_index(), value.end_index()},
1498 auto& proxy = this->get_arrow_proxy();
1499 auto& offset_buffer = proxy.get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
1503 auto tail_begin = offset_adaptor.begin() +
static_cast<std::ptrdiff_t
>(index + 1);
1504 auto tail_end = offset_adaptor.begin() +
static_cast<std::ptrdiff_t
>(n + 1);
1505 if (new_size_mt > old_size_mt)
1507 const mutable_offset_type delta = new_size_mt - old_size_mt;
1508 const auto max_offset = std::numeric_limits<mutable_offset_type>::max();
1514 [delta](mutable_offset_type v)
1522 const mutable_offset_type delta = old_size_mt - new_size_mt;
1527 [delta](mutable_offset_type v)
1534 proxy.update_buffers();
1535 p_list_offsets = make_list_offsets();
1545 , p_list_offsets(make_list_offsets())
1546 , p_list_sizes(make_list_sizes())
1551 template <val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
1552 arrow_proxy list_view_array_impl<BIG>::create_proxy(
1553 array&& flat_values,
1554 offset_buffer_type&& list_offsets,
1555 size_buffer_type&& list_sizes,
1556 VB&& validity_input,
1557 std::optional<std::string_view> name,
1558 std::optional<METADATA_RANGE> metadata
1561 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(),
"sizes and offset must have the same size");
1562 const auto size = list_sizes.size();
1564 const auto null_count = vbitmap.
null_count();
1569 BIG ? std::string(
"+vL") : std::string(
"+vl"),
1570 std::move(flat_schema),
1576 std::vector<buffer<std::uint8_t>> arr_buffs;
1577 arr_buffs.reserve(3);
1578 arr_buffs.emplace_back(std::move(vbitmap).extract_storage());
1579 arr_buffs.emplace_back(std::move(list_offsets).extract_storage());
1580 arr_buffs.emplace_back(std::move(list_sizes).extract_storage());
1583 static_cast<std::int64_t
>(size),
1584 static_cast<std::int64_t
>(null_count),
1585 std::move(arr_buffs),
1589 return arrow_proxy{std::move(arr), std::move(schema)};
1593 template <input_metadata_container METADATA_RANGE>
1594 arrow_proxy list_view_array_impl<BIG>::create_proxy(
1595 array&& flat_values,
1596 offset_buffer_type&& list_offsets,
1597 size_buffer_type&& list_sizes,
1599 std::optional<std::string_view> name,
1600 std::optional<METADATA_RANGE> metadata
1605 return list_view_array_impl<BIG>::create_proxy(
1606 std::move(flat_values),
1607 std::move(list_offsets),
1608 std::move(list_sizes),
1615 SPARROW_ASSERT(list_offsets.size() == list_sizes.size(),
"sizes and offset must have the same size");
1616 const auto size = list_sizes.size();
1620 BIG ? std::string(
"+vL") : std::string(
"+vl"),
1621 std::move(flat_schema),
1627 std::vector<buffer<std::uint8_t>> arr_buffs;
1628 arr_buffs.reserve(3);
1630 arr_buffs.emplace_back(std::move(list_offsets).extract_storage());
1631 arr_buffs.emplace_back(std::move(list_sizes).extract_storage());
1634 static_cast<std::int64_t
>(size),
1636 std::move(arr_buffs),
1640 return arrow_proxy{std::move(arr), std::move(schema)};
1646 , p_list_offsets(make_list_offsets())
1647 , p_list_sizes(make_list_sizes())
1659 p_list_offsets = make_list_offsets();
1660 p_list_sizes = make_list_sizes();
1668 base_type::slice_inplace(start, end);
1669 p_list_offsets = make_list_offsets();
1670 p_list_sizes = make_list_sizes();
1674 inline constexpr auto list_view_array_impl<BIG>::offset_range(size_type i)
const
1675 -> std::pair<offset_type, offset_type>
1677 const auto offset = p_list_offsets[i];
1679 return std::make_pair(offset, offset +
static_cast<offset_type
>(p_list_sizes[i]));
1683 constexpr auto list_view_array_impl<BIG>::make_list_offsets() const -> offset_type*
1685 return this->get_arrow_proxy().buffers()[OFFSET_BUFFER_INDEX].template data<offset_type>()
1686 +
static_cast<size_type
>(this->get_arrow_proxy().offset());
1690 constexpr auto list_view_array_impl<BIG>::make_list_sizes() const -> const list_size_type*
1692 return this->get_arrow_proxy().buffers()[SIZES_BUFFER_INDEX].template data<list_size_type>()
1693 +
static_cast<size_type
>(this->get_arrow_proxy().offset());
1698 list_view_array_impl<BIG>::insert_value(const_value_iterator pos,
const list_value& value, size_type count)
1701 using mutable_offset_type = std::remove_const_t<offset_type>;
1702 using mutable_size_type = std::remove_const_t<list_size_type>;
1703 const size_type idx =
static_cast<size_type
>(std::distance(this->value_cbegin(), pos));
1705 this->throw_if_sliced_for_mutation(
"list_view_array_impl::insert_value");
1710 const mutable_offset_type val_sz =
static_cast<mutable_offset_type
>(value.size());
1711 const size_type flat_append_pos = this->raw_flat_array()->size();
1713 this->insert_flat_elements(flat_append_pos, value, count);
1715 auto& proxy = this->get_arrow_proxy();
1718 const size_type n = this->
size();
1720 auto& offset_buffer = proxy.get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
1722 offset_adaptor.resize(n + count);
1724 auto& sizes_buffer = proxy.get_array_private_data()->buffers()[SIZES_BUFFER_INDEX];
1726 sizes_adaptor.resize(n + count);
1731 offset_adaptor.begin() +
static_cast<std::ptrdiff_t
>(idx),
1732 offset_adaptor.begin() +
static_cast<std::ptrdiff_t
>(n),
1733 offset_adaptor.begin() +
static_cast<std::ptrdiff_t
>(n + count)
1736 sizes_adaptor.begin() +
static_cast<std::ptrdiff_t
>(idx),
1737 sizes_adaptor.begin() +
static_cast<std::ptrdiff_t
>(n),
1738 sizes_adaptor.begin() +
static_cast<std::ptrdiff_t
>(n + count)
1743 if (count > 0 && val_sz > 0)
1745 const auto base_offset_ull =
static_cast<unsigned long long>(flat_append_pos);
1746 const auto step_ull =
static_cast<unsigned long long>(val_sz);
1747 const auto max_k_ull =
static_cast<unsigned long long>(
count - 1);
1748 const auto max_offset_ull =
static_cast<unsigned long long>(
1749 std::numeric_limits<mutable_offset_type>::max()
1751 const auto last_offset_ull = base_offset_ull + max_k_ull * step_ull;
1755 for (size_type k = 0; k <
count; ++k)
1757 offset_adaptor[idx + k] =
static_cast<mutable_offset_type
>(flat_append_pos)
1758 +
static_cast<mutable_offset_type
>(k) * val_sz;
1759 sizes_adaptor[idx + k] =
static_cast<mutable_size_type
>(val_sz);
1762 proxy.update_buffers();
1763 p_list_offsets = make_list_offsets();
1764 p_list_sizes = make_list_sizes();
1765 return sparrow::next(this->value_begin(),
static_cast<std::ptrdiff_t
>(idx));
1769 template <std::input_iterator InputIt>
1770 requires std::convertible_to<typename std::iterator_traits<InputIt>::value_type,
list_value>
1776 auto& proxy = this->get_arrow_proxy();
1777 const size_type original_size = this->size();
1778 if constexpr (std::forward_iterator<InputIt>)
1780 const auto count =
static_cast<size_type>(std::distance(first, last));
1783 return sparrow::next(this->value_begin(),
static_cast<std::ptrdiff_t
>(idx));
1787 proxy.set_length(original_size + count);
1788 for (
auto it = first; it != last; ++it)
1792 static_cast<std::ptrdiff_t
>(idx + inserted_count)
1794 insert_value(cur_pos, *it, 1);
1797 proxy.set_length(original_size);
1802 for (
auto it = first; it != last; ++it)
1806 static_cast<std::ptrdiff_t
>(idx + inserted_count)
1808 insert_value(cur_pos, *it, 1);
1810 proxy.set_length(original_size + inserted_count);
1812 proxy.set_length(original_size);
1814 return sparrow::next(this->value_begin(),
static_cast<std::ptrdiff_t
>(idx));
1818 constexpr auto list_view_array_impl<BIG>::erase_values(const_value_iterator pos, size_type count)
1821 using mutable_offset_type = std::remove_const_t<offset_type>;
1822 using mutable_size_type = std::remove_const_t<list_size_type>;
1823 const size_type idx =
static_cast<size_type
>(std::distance(this->value_cbegin(), pos));
1824 const size_type n = this->size();
1828 return sparrow::next(this->value_begin(),
static_cast<std::ptrdiff_t
>(idx));
1831 this->throw_if_sliced_for_mutation(
"list_view_array_impl::erase_values");
1833 auto& proxy = this->get_arrow_proxy();
1834 auto& offset_buffer = proxy.get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
1837 auto& sizes_buffer = proxy.get_array_private_data()->buffers()[SIZES_BUFFER_INDEX];
1842 offset_adaptor.begin() +
static_cast<std::ptrdiff_t
>(idx + count),
1843 offset_adaptor.end(),
1844 offset_adaptor.begin() +
static_cast<std::ptrdiff_t
>(idx)
1847 sizes_adaptor.begin() +
static_cast<std::ptrdiff_t
>(idx + count),
1848 sizes_adaptor.end(),
1849 sizes_adaptor.begin() +
static_cast<std::ptrdiff_t
>(idx)
1851 offset_adaptor.resize(n - count);
1852 sizes_adaptor.resize(n - count);
1853 proxy.update_buffers();
1854 p_list_offsets = make_list_offsets();
1855 p_list_sizes = make_list_sizes();
1856 return sparrow::next(this->value_begin(),
static_cast<std::ptrdiff_t
>(idx));
1860 void list_view_array_impl<BIG>::replace_value(size_type index,
const list_value& value)
1862 using mutable_offset_type = std::remove_const_t<offset_type>;
1863 using mutable_size_type = std::remove_const_t<list_size_type>;
1865 this->throw_if_sliced_for_mutation(
"list_view_array_impl::replace_value");
1870 const auto value_size =
static_cast<mutable_offset_type
>(value.size());
1871 const size_type flat_append_pos = this->raw_flat_array()->size();
1874 this->insert_flat_elements(flat_append_pos, value, 1);
1876 auto& proxy = this->get_arrow_proxy();
1877 auto& offset_buffer = proxy.get_array_private_data()->buffers()[OFFSET_BUFFER_INDEX];
1879 auto& sizes_buffer = proxy.get_array_private_data()->buffers()[SIZES_BUFFER_INDEX];
1881 offset_adaptor[index] =
static_cast<mutable_offset_type
>(flat_append_pos);
1882 sizes_adaptor[index] =
static_cast<mutable_size_type
>(value_size);
1883 proxy.update_buffers();
1884 p_list_offsets = make_list_offsets();
1885 p_list_sizes = make_list_sizes();
1889# pragma GCC diagnostic pop
1896 inline auto fixed_sized_list_array::list_size_from_format(
const std::string_view format) -> uint64_t
1899 const auto n_digits = format.size() - 3;
1900 const auto list_size_str = format.substr(3, n_digits);
1901 return std::stoull(std::string(list_size_str));
1912 , m_list_size(rhs.m_list_size)
1923 m_list_size = rhs.m_list_size;
1928 constexpr auto fixed_sized_list_array::offset_range(size_type i)
const
1929 -> std::pair<offset_type, offset_type>
1931 const auto offset = (i + this->offset()) * m_list_size;
1932 return std::make_pair(offset, offset + m_list_size);
1935 constexpr auto fixed_sized_list_array::flat_element_count(size_type list_count)
const -> size_type
1937 const offset_type max_flat_count =
static_cast<offset_type
>(std::numeric_limits<size_type>::max());
1939 m_list_size == 0 ||
static_cast<offset_type
>(list_count) <= max_flat_count / m_list_size
1941 return static_cast<size_type
>(
static_cast<offset_type
>(list_count) * m_list_size);
1945 fixed_sized_list_array::insert_value(const_value_iterator pos,
const list_value& value, size_type count)
1949 const auto idx =
static_cast<size_type
>(std::distance(this->value_cbegin(), pos));
1951 this->throw_if_sliced_for_mutation(
"fixed_sized_list_array::insert_value");
1953 const size_type flat_insert_pos = flat_element_count(idx);
1955 this->insert_flat_elements(flat_insert_pos, value, count);
1957 return sparrow::next(this->value_begin(),
static_cast<std::ptrdiff_t
>(idx));
1960 inline auto fixed_sized_list_array::erase_values(const_value_iterator pos, size_type count) -> value_iterator
1962 const auto idx =
static_cast<size_type
>(std::distance(this->value_cbegin(), pos));
1965 return sparrow::next(this->value_begin(),
static_cast<std::ptrdiff_t
>(idx));
1968 this->throw_if_sliced_for_mutation(
"fixed_sized_list_array::erase_values");
1970 this->erase_flat_elements(flat_element_count(idx), flat_element_count(count));
1971 return sparrow::next(this->value_begin(),
static_cast<std::ptrdiff_t
>(idx));
1974 inline void fixed_sized_list_array::replace_value(size_type index,
const list_value& value)
1980 if (m_list_size == 0)
1986 const array* source = value.flat_array();
1989 source_owner = *source;
1990 source = &source_owner;
1994 const size_type flat_index = flat_element_count(index);
1996 this->
insert_flat_elements(flat_index, list_value{source, value.begin_index(), value.end_index()}, 1);
1999 template <val
idity_bitmap_input R, input_metadata_container METADATA_RANGE>
2000 inline arrow_proxy fixed_sized_list_array::create_proxy(
2001 std::uint64_t list_size,
2002 array&& flat_values,
2004 std::optional<std::string_view> name,
2005 std::optional<METADATA_RANGE> metadata
2008 const auto size = flat_values.size() /
static_cast<std::size_t
>(list_size);
2010 const auto null_count = vbitmap.null_count();
2014 std::string format =
"+w:" + std::to_string(list_size);
2017 std::move(flat_schema),
2023 std::vector<buffer<std::uint8_t>> arr_buffs;
2024 arr_buffs.reserve(1);
2025 arr_buffs.emplace_back(vbitmap.extract_storage());
2028 static_cast<std::int64_t
>(size),
2029 static_cast<std::int64_t
>(null_count),
2030 std::move(arr_buffs),
2034 return arrow_proxy{std::move(arr), std::move(schema)};
2037 template <val
idity_bitmap_input R, input_metadata_container METADATA_RANGE>
2038 inline arrow_proxy fixed_sized_list_array::create_proxy(
2039 std::uint64_t list_size,
2040 array&& flat_values,
2042 std::optional<std::string_view> name,
2043 std::optional<METADATA_RANGE> metadata
2048 return fixed_sized_list_array::create_proxy(
2050 std::move(flat_values),
2057 const auto size = flat_values.size() /
static_cast<std::size_t
>(list_size);
2060 std::string format =
"+w:" + std::to_string(list_size);
2063 std::move(flat_schema),
2069 std::vector<buffer<std::uint8_t>> arr_buffs;
2070 arr_buffs.reserve(1);
2074 static_cast<std::int64_t
>(size),
2076 std::move(arr_buffs),
2080 return arrow_proxy{std::move(arr), std::move(schema)};
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::iterator_tag iterator_tag
constexpr array_bitmap_base_impl & operator=(const array_bitmap_base_impl &)
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
Dynamically typed array encapsulating an Arrow layout.
SPARROW_API iterator erase(const_iterator pos)
Inserts a copy of value before pos in the array, repeating the insertion count times.
SPARROW_API iterator insert(const_iterator pos, const_iterator first, const_iterator last)
Inserts elements from the range [first, last) before the element at the specified position.
SPARROW_API const_iterator cbegin() const
Returns a constant iterator to the beginning of the array.
Object that owns a piece of contiguous memory.
xsimd::aligned_allocator< T > default_allocator
constexpr size_type null_count() const noexcept
Returns the number of bits set to false (null/invalid).
typename storage_type::default_allocator default_allocator
friend class list_reference
typename base_type::value_iterator value_iterator
inner_types::list_size_type list_size_type
array_inner_types< self_type > inner_types
fixed_sized_list_array(arrow_proxy proxy)
Constructs fixed size list array from Arrow proxy.
fixed_sized_list_array(ARGS &&... args)
Generic constructor for creating fixed size list array.
list_array_crtp_base< self_type > base_type
fixed_sized_list_array self_type
fixed_sized_list_array & operator=(self_type &&)=default
typename base_type::const_value_iterator const_value_iterator
fixed_sized_list_array(self_type &&)=default
fixed_sized_list_array & operator=(const self_type &)
typename base_type::size_type size_type
std::uint64_t offset_type
CRTP base class for all list array implementations.
constexpr void throw_if_sliced_for_mutation(const char *operation) const
typename base_type::const_bitmap_range const_bitmap_range
constexpr list_array_crtp_base & operator=(const self_type &)
Copy assignment operator.
constexpr array * raw_flat_array()
Gets mutable access to the underlying flat array.
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename inner_types::const_value_iterator const_value_iterator
list_value inner_value_type
typename base_type::bitmap_const_reference bitmap_const_reference
constexpr void insert_flat_elements(size_type flat_pos, const list_value &value, size_type count)
typename inner_types::inner_const_reference inner_const_reference
typename base_type::iterator_tag iterator_tag
mutable_array_bitmap_base< DERIVED > base_type
typename base_type::bitmap_reference bitmap_reference
list_array_crtp_base(arrow_proxy proxy)
Constructs list array base from Arrow proxy.
constexpr list_array_crtp_base(const self_type &)
Copy constructor.
constexpr const_value_iterator value_cbegin() const
constexpr void erase_flat_elements(size_type flat_begin, size_type flat_count)
typename inner_types::value_iterator value_iterator
constexpr const_value_iterator value_cend() const
typename base_type::bitmap_type bitmap_type
list_array_crtp_base< DERIVED > self_type
typename base_type::size_type size_type
array_inner_types< DERIVED > inner_types
nullable< inner_reference, bitmap_reference > reference
nullable< inner_value_type > value_type
typename inner_types::inner_reference inner_reference
constexpr value_iterator value_begin()
constexpr const array * raw_flat_array() const
Gets read-only access to the underlying flat array.
constexpr value_iterator value_end()
constexpr list_array_crtp_base(self_type &&) noexcept=default
list_array_impl< BIG > self_type
constexpr void slice_inplace(size_type start, size_type end)
constexpr list_array_impl(const self_type &)
Copy constructor.
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
friend class list_reference
typename base_type::size_type size_type
constexpr list_array_impl & operator=(const self_type &)
Copy assignment operator.
typename base_type::value_iterator value_iterator
array_inner_types< self_type > inner_types
constexpr list_array_impl(self_type &&) noexcept=default
static constexpr auto offset_from_sizes(SIZES_RANGE &&sizes) -> offset_buffer_type
Creates offset buffer from list sizes.
typename base_type::const_value_iterator const_value_iterator
inner_types::list_size_type list_size_type
list_array_crtp_base< list_array_impl< BIG > > base_type
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_array_impl(arrow_proxy proxy)
Constructs list array from Arrow proxy.
size_type size() const
Gets the number of elements in the list.
constexpr list_view_array_impl & operator=(self_type &&)=default
typename base_type::size_type size_type
constexpr void slice_inplace(size_type start, size_type end)
constexpr list_view_array_impl(self_type &&)=default
friend class list_reference
u8_buffer< std::remove_const_t< offset_type > > offset_buffer_type
list_view_array_impl(arrow_proxy proxy)
Constructs list view array from Arrow proxy.
std::conditional_t< BIG, const std::int64_t, const std::int32_t > offset_type
array_inner_types< self_type > inner_types
list_array_crtp_base< list_view_array_impl< BIG > > base_type
typename base_type::const_value_iterator const_value_iterator
typename base_type::value_iterator value_iterator
list_view_array_impl(ARGS &&... args)
Generic constructor for creating list view array from various inputs.
list_view_array_impl< BIG > self_type
constexpr list_view_array_impl(const self_type &)
Copy constructor.
inner_types::list_size_type list_size_type
u8_buffer< std::remove_const_t< list_size_type > > size_buffer_type
constexpr list_view_array_impl & operator=(const self_type &)
Copy assignment operator.
Base class definining common interface for arrays with a bitmap.
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
#define SPARROW_ASSERT_TRUE(expr__)
#define SPARROW_ASSERT(expr__, message__)
SPARROW_API void increase(const std::string &key)
std::string key< fixed_sized_list_array >()
SPARROW_API int count(const std::string &key, int disabled_value=0)
ArrowArray make_list_arrow_array(std::int64_t size, std::int64_t null_count, std::vector< buffer< std::uint8_t > > &&arr_buffs, ArrowArray &&flat_arr)
ArrowSchema make_list_arrow_schema(std::string format, ArrowSchema &&flat_schema, std::optional< std::string_view > name, std::optional< METADATA_RANGE > metadata, bool nullable)
constexpr sparrow::u8_buffer< OFFSET_TYPE > offset_buffer_from_sizes(SIZES_RANGE &&sizes)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient alias for arrays with mutable validity bitmaps.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_list_view_array_v
Checks whether T is a list_view_array type.
list_array_impl< false > list_array
A list array implementation.
constexpr InputIt next(InputIt it, Distance n)
constexpr bool is_fixed_sized_list_array_v
Checks whether T is a fixed_sized_list_array type.
list_view_array_impl< true > big_list_view_array
std::pair< ArrowArray, ArrowSchema > extract_arrow_structures(A &&a)
Extracts the internal ArrowArray and ArrowSchema structures from the given array or typed layout.
constexpr bool is_big_list_array_v
Checks whether T is a big_list_array type.
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
list_view_array_impl< false > list_view_array
A list view array implementation.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
constexpr bool is_list_array_v
Checks whether T is a list_array type.
list_array_impl< true > big_list_array
A big list array implementation.
auto make_buffer_adaptor(FromBufferRef &buf)
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
constexpr bool is_big_list_view_array_v
Checks whether T is a big_list_view_array type.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
Extensions to the C++ standard library.
list_value inner_value_type
std::random_access_iterator_tag iterator_tag
functor_index_iterator< detail::layout_value_functor< const array_type, inner_const_reference > > const_value_iterator
fixed_sized_list_array array_type
list_value inner_const_reference
std::uint64_t list_size_type
list_reference< array_type > inner_reference
functor_index_iterator< detail::layout_value_functor< array_type, inner_reference > > value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
functor_index_iterator< detail::layout_value_functor< const array_type, inner_const_reference > > const_value_iterator
std::random_access_iterator_tag iterator_tag
list_array_impl< BIG > array_type
functor_index_iterator< detail::layout_value_functor< array_type, inner_reference > > value_iterator
list_value inner_value_type
list_reference< array_type > inner_reference
list_value inner_const_reference
list_value inner_value_type
list_value inner_const_reference
list_reference< array_type > inner_reference
std::random_access_iterator_tag iterator_tag
functor_index_iterator< detail::layout_value_functor< array_type, inner_reference > > value_iterator
functor_index_iterator< detail::layout_value_functor< const array_type, inner_const_reference > > const_value_iterator
std::conditional_t< BIG, std::uint64_t, std::uint32_t > list_size_type
list_view_array_impl< BIG > array_type
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()
Metafunction for retrieving the data_type of a typed array.