20#include <unordered_map>
42 template <std::ranges::sized_range T,
class CR,
typename Ext = empty_extension>
88 template <std::ranges::sized_range T,
class CR,
typename Ext>
107 template <std::ranges::sized_range T,
class CR,
typename Ext>
158 template <std::ranges::sized_range T,
class CR,
typename Ext>
222 template <
class... Args>
239 struct buffers_collection
251 [[nodiscard]]
static constexpr std::string_view get_arrow_format()
253 return std::is_same_v<T, arrow_traits<std::string>::value_type> ? std::string_view(
"vu")
254 :
std::string_view(
"vz");
266 template <input_metadata_container METADATA_RANGE>
267 [[nodiscard]]
static ArrowSchema create_arrow_schema(
268 std::optional<std::string_view> name,
269 std::optional<METADATA_RANGE> metadata,
270 std::optional<std::unordered_set<sparrow::ArrowFlag>> flags
273 constexpr repeat_view<bool> children_ownership(
true, 0);
304 template <std::ranges::input_range R>
305 requires std::convertible_to<std::ranges::range_value_t<R>, T>
306 static buffers_collection create_buffers(R&& range);
328 std::ranges::input_range R,
330 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
331 requires std::convertible_to<std::ranges::range_value_t<R>, T>
332 [[nodiscard]]
static arrow_proxy create_proxy(
335 std::optional<std::string_view> name = std::nullopt,
336 std::optional<METADATA_RANGE> metadata = std::nullopt
356 template <std::ranges::input_range NULLABLE_RANGE, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
357 requires std::convertible_to<std::ranges::range_value_t<NULLABLE_RANGE>, nullable<T>>
358 [[nodiscard]]
static arrow_proxy create_proxy(
359 NULLABLE_RANGE&& nullable_range,
360 std::optional<std::string_view> name = std::nullopt,
361 std::optional<METADATA_RANGE> metadata = std::nullopt
380 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
381 requires std::convertible_to<std::ranges::range_value_t<R>, T>
382 [[nodiscard]]
static arrow_proxy create_proxy(
385 std::optional<std::string_view> name = std::nullopt,
386 std::optional<METADATA_RANGE> metadata = std::nullopt
409 std::ranges::input_range VALUE_BUFFERS_RANGE,
410 validity_bitmap_input VB,
411 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
412 requires std::convertible_to<std::ranges::range_value_t<VALUE_BUFFERS_RANGE>, u8_buffer<uint8_t>>
413 [[nodiscard]]
static arrow_proxy create_proxy(
414 size_t element_count,
415 u8_buffer<uint8_t>&& buffer_view,
416 VALUE_BUFFERS_RANGE&& value_buffers,
418 std::optional<std::string_view> name = std::nullopt,
419 std::optional<METADATA_RANGE> metadata = std::nullopt
470 template <std::ranges::sized_range U>
471 requires mpl::convertible_ranges<U, T>
472 constexpr void assign(U&& rhs,
size_type index);
492 template <std::ranges::sized_range U>
493 requires mpl::convertible_ranges<U, T>
494 void resize_values(
size_type new_length, U value);
514 template <std::ranges::sized_range U>
515 requires mpl::convertible_ranges<U, T>
537 template <mpl::iterator_of_type<T> InputIt>
594 static constexpr size_type LENGTH_BUFFER_INDEX = 1;
595 static constexpr std::size_t DATA_BUFFER_SIZE = 16;
596 static constexpr std::size_t SHORT_STRING_SIZE = 12;
597 static constexpr std::size_t PREFIX_SIZE = 4;
598 static constexpr std::ptrdiff_t PREFIX_OFFSET = 4;
599 static constexpr std::ptrdiff_t SHORT_STRING_OFFSET = 4;
600 static constexpr std::ptrdiff_t BUFFER_INDEX_OFFSET = 8;
601 static constexpr std::ptrdiff_t BUFFER_OFFSET_OFFSET = 12;
602 static constexpr std::size_t FIRST_VAR_DATA_BUFFER_INDEX = 2;
611 template <std::ranges::sized_range T,
class CR,
typename Ext>
620 inline std::int32_t read_int32_unaligned(
const std::uint8_t* ptr)
623 std::memcpy(&value, ptr,
sizeof(std::int32_t));
627 inline void write_int32_unaligned(std::uint8_t* ptr, std::int32_t value)
629 std::memcpy(ptr, &value,
sizeof(std::int32_t));
633 template <
typename To,
typename From>
634 inline constexpr To transform_to(
const From& v)
636 return static_cast<To
>(v);
640 template <
typename SizeType>
641 inline void update_buffer_offsets_after(
642 std::uint8_t* view_data,
644 std::size_t data_buffer_size,
645 std::size_t short_string_size,
646 std::ptrdiff_t buffer_offset_offset,
647 std::size_t threshold_offset,
648 std::ptrdiff_t offset_adjustment,
649 SizeType skip_index =
static_cast<SizeType
>(-1)
659 auto* view_ptr = view_data + (i * data_buffer_size);
661 std::memcpy(&length, view_ptr,
sizeof(std::int32_t));
663 if (
static_cast<std::size_t
>(length) > short_string_size)
665 std::int32_t current_offset;
666 std::memcpy(¤t_offset, view_ptr + buffer_offset_offset,
sizeof(std::int32_t));
668 if (
static_cast<std::size_t
>(current_offset) > threshold_offset)
670 current_offset +=
static_cast<std::int32_t
>(offset_adjustment);
671 std::memcpy(view_ptr + buffer_offset_offset, ¤t_offset,
sizeof(std::int32_t));
678 template <
typename Buffer>
679 inline void update_buffer_sizes_metadata(Buffer& buffer_sizes_buffer, std::int64_t new_size)
681 auto buffer_sizes_ptr = buffer_sizes_buffer.template data<std::int64_t>();
682 *buffer_sizes_ptr = new_size;
686 template <std::ranges::sized_range T,
class CR,
typename Ext>
687 template <std::ranges::input_range R>
688 requires std::convertible_to<std::ranges::range_value_t<R>, T>
689 auto variable_size_binary_view_array_impl<T, CR, Ext>::create_buffers(R&& range) -> buffers_collection
692# pragma GCC diagnostic push
693# pragma GCC diagnostic ignored "-Wcast-align"
699 std::size_t long_string_storage_size = 0;
701 for (
auto&& val : range)
703 auto val_casted = val
704 | std::ranges::views::transform(transform_to<std::uint8_t, typename T::value_type>);
706 const auto length = val.size();
707 auto length_ptr = length_buffer.
data() + (i * DATA_BUFFER_SIZE);
710 write_int32_unaligned(length_ptr,
static_cast<std::int32_t
>(length));
712 if (length <= SHORT_STRING_SIZE)
717 length_ptr + SHORT_STRING_OFFSET + length,
718 length_ptr + DATA_BUFFER_SIZE,
725 auto prefix_sub_range = val_casted | std::ranges::views::take(PREFIX_SIZE);
729 write_int32_unaligned(length_ptr + BUFFER_INDEX_OFFSET, 0);
732 write_int32_unaligned(
733 length_ptr + BUFFER_OFFSET_OFFSET,
734 static_cast<std::int32_t
>(long_string_storage_size)
738 long_string_storage_size += length;
745 std::size_t long_string_storage_offset = 0;
746 for (
auto&& val : range)
748 const auto length = val.size();
749 if (length > SHORT_STRING_SIZE)
751 auto val_casted = val
752 | std::ranges::views::transform(
753 transform_to<std::uint8_t, typename T::value_type>
756 long_string_storage_offset += length;
765 static_cast<std::size_t
>(1),
766 static_cast<int64_t
>(long_string_storage_size)
769 return {std::move(length_buffer), std::move(long_string_storage), std::move(buffer_sizes)};
772# pragma GCC diagnostic pop
776 template <std::ranges::sized_range T,
class CR,
typename Ext>
777 template <std::ranges::input_range R, val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
778 requires std::convertible_to<std::ranges::range_value_t<R>, T>
779 arrow_proxy variable_size_binary_view_array_impl<T, CR, Ext>::create_proxy(
782 std::optional<std::string_view> name,
783 std::optional<METADATA_RANGE> metadata
790 static const std::optional<std::unordered_set<sparrow::ArrowFlag>> flags{{
ArrowFlag::NULLABLE}};
793 ArrowSchema schema = create_arrow_schema(std::move(name), std::move(metadata), flags);
796 auto buffers_parts = create_buffers(std::forward<R>(range));
798 std::vector<buffer<uint8_t>> buffers{
799 std::move(vbitmap).extract_storage(),
800 std::move(buffers_parts.length_buffer),
801 std::move(buffers_parts.long_string_storage),
802 std::move(buffers_parts.buffer_sizes).extract_storage()
809 static_cast<std::int64_t
>(size),
810 static_cast<int64_t
>(null_count),
819 arrow_proxy proxy{std::move(arr), std::move(schema)};
824 template <std::ranges::sized_range T,
class CR,
typename Ext>
825 template <std::ranges::input_range NULLABLE_RANGE, input_metadata_container METADATA_RANGE>
826 requires std::convertible_to<std::ranges::range_value_t<NULLABLE_RANGE>,
nullable<T>>
827 [[nodiscard]]
arrow_proxy variable_size_binary_view_array_impl<T, CR, Ext>::create_proxy(
828 NULLABLE_RANGE&& nullable_range,
829 std::optional<std::string_view> name,
830 std::optional<METADATA_RANGE> metadata
833 auto values = nullable_range
834 | std::views::transform(
837 return static_cast<T
>(v.value());
841 auto is_non_null = nullable_range
842 | std::views::transform(
845 return v.has_value();
850 std::forward<
decltype(values)>(values),
851 std::forward<
decltype(is_non_null)>(is_non_null),
857 template <std::ranges::sized_range T,
class CR,
typename Ext>
858 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
859 requires std::convertible_to<std::ranges::range_value_t<R>, T>
860 [[nodiscard]]
arrow_proxy variable_size_binary_view_array_impl<T, CR, Ext>::create_proxy(
863 std::optional<std::string_view> name,
864 std::optional<METADATA_RANGE> metadata
869 return create_proxy(std::forward<R>(range),
validity_bitmap{}, std::move(name), std::move(metadata));
873 ArrowSchema schema = create_arrow_schema(std::move(name), std::move(metadata), std::nullopt);
876 auto buffers_parts = create_buffers(std::forward<R>(range));
878 std::vector<buffer<uint8_t>> buffers{
880 std::move(buffers_parts.length_buffer),
881 std::move(buffers_parts.long_string_storage),
882 std::move(buffers_parts.buffer_sizes).extract_storage()
890 static_cast<std::int64_t
>(size),
891 static_cast<int64_t
>(0),
900 arrow_proxy proxy{std::move(arr), std::move(schema)};
905 template <std::ranges::sized_range T,
class CR,
typename Ext>
906 template <std::ranges::input_range VALUE_BUFFERS_RANGE, val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
907 requires std::convertible_to<std::ranges::range_value_t<VALUE_BUFFERS_RANGE>,
u8_buffer<uint8_t>>
908 arrow_proxy variable_size_binary_view_array_impl<T, CR, Ext>::create_proxy(
909 size_t element_count,
911 VALUE_BUFFERS_RANGE&& value_buffers,
913 std::optional<std::string_view> name,
914 std::optional<METADATA_RANGE> metadata
920 static const std::optional<std::unordered_set<sparrow::ArrowFlag>> flags{{
ArrowFlag::NULLABLE}};
922 ArrowSchema schema = create_arrow_schema(std::move(name), std::move(metadata), flags);
925 std::vector<buffer<uint8_t>> buffers{std::move(bitmap).extract_storage(), std::move(
buffer_view)};
926 for (
auto&& buf : value_buffers)
928 buffers.push_back(std::forward<
decltype(buf)>(buf));
933 for (std::size_t i = 0; i < value_buffers.size(); ++i)
935 buffer_sizes[i] =
static_cast<int64_t
>(value_buffers[i].size());
937 buffers.push_back(std::move(buffer_sizes).extract_storage());
942 static_cast<std::int64_t
>(size),
943 static_cast<std::int64_t
>(bitmap.null_count()),
952 arrow_proxy proxy{std::move(arr), std::move(schema)};
957 template <std::ranges::sized_range T,
class CR,
typename Ext>
958 constexpr auto variable_size_binary_view_array_impl<T, CR, Ext>::value(size_type i) -> inner_reference
960 return static_cast<const self_type*
>(
this)->value(i);
963 template <std::ranges::sized_range T,
class CR,
typename Ext>
964 constexpr auto variable_size_binary_view_array_impl<T, CR, Ext>::value(size_type i)
const
965 -> inner_const_reference
968# pragma GCC diagnostic push
969# pragma GCC diagnostic ignored "-Wcast-align"
973 using char_or_byte =
typename inner_const_reference::value_type;
975 auto data_ptr = this->get_arrow_proxy().buffers()[LENGTH_BUFFER_INDEX].template data<uint8_t>()
976 + (i * DATA_BUFFER_SIZE);
977 const auto length =
static_cast<std::size_t
>(read_int32_unaligned(data_ptr));
979 if (length <= SHORT_STRING_SIZE)
981 const auto ptr =
reinterpret_cast<const char_or_byte*
>(data_ptr);
982 const auto ret = inner_const_reference(ptr + SHORT_STRING_OFFSET, length);
987 const auto buffer_index =
static_cast<std::size_t
>(
988 read_int32_unaligned(data_ptr + BUFFER_INDEX_OFFSET)
990 const auto buffer_offset =
static_cast<std::size_t
>(
991 read_int32_unaligned(data_ptr + BUFFER_OFFSET_OFFSET)
993 const auto buffer = this->get_arrow_proxy()
994 .buffers()[buffer_index + FIRST_VAR_DATA_BUFFER_INDEX]
995 .template data<const char_or_byte>();
996 return inner_const_reference(
buffer + buffer_offset, length);
1000# pragma GCC diagnostic pop
1004 template <std::ranges::sized_range T,
class CR,
typename Ext>
1005 constexpr auto variable_size_binary_view_array_impl<T, CR, Ext>::value_begin() -> value_iterator
1010 template <std::ranges::sized_range T,
class CR,
typename Ext>
1011 constexpr auto variable_size_binary_view_array_impl<T, CR, Ext>::value_end() -> value_iterator
1016 template <std::ranges::sized_range T,
class CR,
typename Ext>
1017 constexpr auto variable_size_binary_view_array_impl<T, CR, Ext>::value_cbegin() const
1018 -> const_value_iterator
1023 template <std::ranges::sized_range T,
class CR,
typename Ext>
1024 constexpr auto variable_size_binary_view_array_impl<T, CR, Ext>::value_cend() const -> const_value_iterator
1026 return const_value_iterator(
1032 template <std::ranges::sized_range T,
class CR,
typename Ext>
1033 template <std::ranges::sized_range U>
1035 constexpr void variable_size_binary_view_array_impl<T, CR, Ext>::assign(U&& rhs, size_type index)
1038 const auto new_length =
static_cast<std::size_t
>(std::ranges::size(rhs));
1040 auto& length_buffer = this->get_arrow_proxy().get_array_private_data()->buffers()[LENGTH_BUFFER_INDEX];
1041 auto view_ptr = length_buffer.data() + (index * DATA_BUFFER_SIZE);
1042 const auto current_length =
static_cast<std::size_t
>(read_int32_unaligned(view_ptr));
1045 write_int32_unaligned(view_ptr,
static_cast<std::int32_t
>(new_length));
1047 if (new_length <= SHORT_STRING_SIZE)
1049 auto data_ptr = view_ptr + SHORT_STRING_OFFSET;
1050 std::ranges::copy(rhs,
reinterpret_cast<typename T::value_type*
>(data_ptr));
1053 if (new_length < SHORT_STRING_SIZE)
1056 reinterpret_cast<typename T::value_type*
>(data_ptr) + new_length,
1057 SHORT_STRING_SIZE - new_length,
1058 typename T::value_type{}
1067 auto& buffers = this->get_arrow_proxy().get_array_private_data()->buffers();
1068 auto& var_data_buffer = buffers[FIRST_VAR_DATA_BUFFER_INDEX];
1069 auto& buffer_sizes_buffer = buffers[buffers.size() - 1];
1071 const bool was_long_string = current_length > SHORT_STRING_SIZE;
1072 std::size_t current_buffer_offset = 0;
1074 if (was_long_string)
1076 current_buffer_offset =
static_cast<std::size_t
>(
1077 read_int32_unaligned(view_ptr + BUFFER_OFFSET_OFFSET)
1081 auto transformed_data = rhs
1082 | std::ranges::views::transform(
1083 transform_to<typename T::value_type, typename T::value_type>
1087 bool can_reuse_memory =
false;
1088 if (was_long_string && new_length == current_length)
1090 const auto* existing_data = var_data_buffer.data() + current_buffer_offset;
1091 can_reuse_memory = std::ranges::equal(
1093 std::span<const typename T::value_type>(
1094 reinterpret_cast<const typename T::value_type*
>(existing_data),
1100 if (can_reuse_memory)
1103 auto prefix_range = rhs | std::ranges::views::take(PREFIX_SIZE);
1104 auto prefix_transformed = prefix_range
1105 | std::ranges::views::transform(
1106 transform_to<std::uint8_t, typename T::value_type>
1108 std::ranges::copy(prefix_transformed, view_ptr + PREFIX_OFFSET);
1113 const auto length_diff =
static_cast<std::ptrdiff_t
>(new_length)
1114 -
static_cast<std::ptrdiff_t
>(current_length);
1115 const bool can_fit_in_place = was_long_string && length_diff <= 0;
1117 std::size_t final_offset = 0;
1119 if (can_fit_in_place)
1122 final_offset = current_buffer_offset;
1125 if (length_diff < 0)
1127 const auto bytes_to_compact =
static_cast<std::size_t
>(-length_diff);
1128 const auto move_start = current_buffer_offset + current_length;
1129 const auto move_end = var_data_buffer.size();
1130 const auto bytes_to_move = move_end - move_start;
1132 if (bytes_to_move > 0)
1135 var_data_buffer.data() + move_start,
1136 var_data_buffer.data() + move_end,
1137 var_data_buffer.data() + move_start - bytes_to_compact
1141 var_data_buffer.resize(var_data_buffer.size() - bytes_to_compact);
1144 update_buffer_offsets_after(
1145 length_buffer.data(),
1149 BUFFER_OFFSET_OFFSET,
1150 current_buffer_offset + current_length,
1151 -
static_cast<std::ptrdiff_t
>(bytes_to_compact),
1156 update_buffer_sizes_metadata(
1157 buffer_sizes_buffer,
1158 static_cast<std::int64_t
>(var_data_buffer.size())
1165 const auto expansion_needed = was_long_string ? length_diff
1166 :
static_cast<std::ptrdiff_t
>(new_length);
1167 const auto new_var_buffer_size = var_data_buffer.size() + expansion_needed;
1169 if (was_long_string && length_diff > 0)
1172 final_offset = current_buffer_offset;
1173 const auto expansion_bytes =
static_cast<std::size_t
>(length_diff);
1174 const auto move_start = current_buffer_offset + current_length;
1175 const auto bytes_to_move = var_data_buffer.size() - move_start;
1178 var_data_buffer.resize(new_var_buffer_size);
1180 if (bytes_to_move > 0)
1184 var_data_buffer.data() + move_start,
1185 var_data_buffer.data() + move_start + bytes_to_move,
1186 var_data_buffer.data() + move_start + bytes_to_move + expansion_bytes
1191 update_buffer_offsets_after(
1192 length_buffer.data(),
1196 BUFFER_OFFSET_OFFSET,
1198 static_cast<std::ptrdiff_t
>(expansion_bytes),
1205 final_offset = var_data_buffer.size();
1206 var_data_buffer.resize(new_var_buffer_size);
1210 update_buffer_sizes_metadata(buffer_sizes_buffer,
static_cast<std::int64_t
>(new_var_buffer_size));
1213 std::ranges::copy(transformed_data, var_data_buffer.data() + final_offset);
1217 auto prefix_range = rhs | std::ranges::views::take(PREFIX_SIZE);
1218 auto prefix_transformed = prefix_range
1219 | std::ranges::views::transform(
1220 transform_to<std::uint8_t, typename T::value_type>
1222 std::ranges::copy(prefix_transformed, view_ptr + PREFIX_OFFSET);
1224 write_int32_unaligned(
1225 view_ptr + BUFFER_INDEX_OFFSET,
1226 static_cast<std::int32_t
>(FIRST_VAR_DATA_BUFFER_INDEX)
1229 write_int32_unaligned(view_ptr + BUFFER_OFFSET_OFFSET,
static_cast<std::int32_t
>(final_offset));
1233 template <std::ranges::sized_range T,
class CR,
typename Ext>
1234 template <std::ranges::sized_range U>
1236 void variable_size_binary_view_array_impl<T, CR, Ext>::resize_values(size_type new_length, U value)
1238 const size_t current_size = this->
size();
1240 if (new_length == current_size)
1245 if (new_length < current_size)
1247 erase_values(
sparrow::next(value_cbegin(), new_length), current_size - new_length);
1251 insert_value(value_cend(), value, new_length - current_size);
1255 template <std::ranges::sized_range T,
class CR,
typename Ext>
1256 template <std::ranges::sized_range U>
1266 template <std::ranges::sized_range T,
class CR,
typename Ext>
1267 template <mpl::iterator_of_type<T> InputIt>
1268 auto variable_size_binary_view_array_impl<T, CR, Ext>::insert_values(
1278 const auto insert_index = std::distance(value_cbegin(), pos);
1279 return value_begin() + insert_index;
1282 const auto insert_index =
static_cast<size_t>(std::distance(value_cbegin(), pos));
1283 const auto current_size = this->size();
1284 const auto new_size = current_size + count;
1287 std::size_t additional_var_storage = 0;
1288 std::vector<std::size_t> value_lengths;
1289 value_lengths.reserve(count);
1291 for (
auto it = first; it != last; ++it)
1293 const auto length =
static_cast<std::size_t
>(std::ranges::size(*it));
1294 value_lengths.push_back(length);
1295 if (length > SHORT_STRING_SIZE)
1297 additional_var_storage += length;
1301 auto& proxy = this->get_arrow_proxy();
1302 auto* private_data = proxy.get_array_private_data();
1303 auto& buffers = private_data->buffers();
1305 const auto new_view_buffer_size = new_size * DATA_BUFFER_SIZE;
1306 buffers[LENGTH_BUFFER_INDEX].resize(new_view_buffer_size);
1308 if (additional_var_storage > 0)
1310 const auto current_var_size = buffers[FIRST_VAR_DATA_BUFFER_INDEX].size();
1311 buffers[FIRST_VAR_DATA_BUFFER_INDEX].resize(current_var_size + additional_var_storage);
1314 auto& buffer_sizes = buffers[buffers.size() - 1];
1315 update_buffer_sizes_metadata(
1317 static_cast<std::int64_t
>(buffers[FIRST_VAR_DATA_BUFFER_INDEX].size())
1321 auto* view_data = buffers[LENGTH_BUFFER_INDEX].data();
1322 if (insert_index < current_size)
1324 const auto bytes_to_move = (current_size - insert_index) * DATA_BUFFER_SIZE;
1325 const auto src_offset = insert_index * DATA_BUFFER_SIZE;
1326 const auto dst_offset = (insert_index + count) * DATA_BUFFER_SIZE;
1328 std::memmove(view_data + dst_offset, view_data + src_offset, bytes_to_move);
1331 if (additional_var_storage > 0)
1333 for (
size_type i = insert_index + count; i < new_size; ++i)
1335 auto* view_ptr = view_data + (i * DATA_BUFFER_SIZE);
1336 std::int32_t length;
1337 std::memcpy(&length, view_ptr,
sizeof(std::int32_t));
1339 if (
static_cast<std::size_t
>(length) > SHORT_STRING_SIZE)
1341 std::int32_t current_offset;
1342 std::memcpy(¤t_offset, view_ptr + BUFFER_OFFSET_OFFSET,
sizeof(std::int32_t));
1343 current_offset +=
static_cast<std::int32_t
>(additional_var_storage);
1344 std::memcpy(view_ptr + BUFFER_OFFSET_OFFSET, ¤t_offset,
sizeof(std::int32_t));
1351 std::size_t var_offset = buffers[FIRST_VAR_DATA_BUFFER_INDEX].size() - additional_var_storage;
1354 for (
auto it = first; it != last; ++it, ++value_idx)
1356 const auto view_index = insert_index + value_idx;
1357 auto* view_ptr = view_data + (view_index * DATA_BUFFER_SIZE);
1358 const auto value_length = value_lengths[value_idx];
1360 const auto& current_value = *it;
1363 const std::int32_t value_length_int32 =
static_cast<std::int32_t
>(value_length);
1364 std::memcpy(view_ptr, &value_length_int32,
sizeof(std::int32_t));
1366 if (value_length <= SHORT_STRING_SIZE)
1369 std::ranges::transform(
1371 view_ptr + SHORT_STRING_OFFSET,
1372 transform_to<std::uint8_t, typename T::value_type>
1376 view_ptr + SHORT_STRING_OFFSET + value_length,
1377 view_ptr + DATA_BUFFER_SIZE,
1384 std::ranges::transform(
1385 current_value | std::views::take(PREFIX_SIZE),
1386 view_ptr + PREFIX_OFFSET,
1387 transform_to<std::uint8_t, typename T::value_type>
1391 const std::int32_t buffer_index_zero = 0;
1392 std::memcpy(view_ptr + BUFFER_INDEX_OFFSET, &buffer_index_zero,
sizeof(std::int32_t));
1395 const std::int32_t var_offset_int32 =
static_cast<std::int32_t
>(var_offset);
1396 std::memcpy(view_ptr + BUFFER_OFFSET_OFFSET, &var_offset_int32,
sizeof(std::int32_t));
1399 std::ranges::transform(
1401 buffers[FIRST_VAR_DATA_BUFFER_INDEX].data() + var_offset,
1402 transform_to<std::uint8_t, typename T::value_type>
1405 var_offset += value_length;
1410 proxy.update_buffers();
1415 template <std::ranges::sized_range T,
class CR,
typename Ext>
1417 variable_size_binary_view_array_impl<T, CR, Ext>::erase_values(const_value_iterator pos, size_type count)
1420 const size_t erase_index =
static_cast<size_t>(std::distance(value_cbegin(), pos));
1421 const size_t current_size = this->size();
1424 if (erase_index + count > current_size)
1426 count = current_size - erase_index;
1431 return value_begin() +
static_cast<difference_type
>(erase_index);
1434 const auto new_size = current_size - count;
1437 std::size_t freed_var_storage = 0;
1438 auto& proxy = this->get_arrow_proxy();
1439 auto* private_data = proxy.get_array_private_data();
1440 auto& buffers = private_data->buffers();
1441 auto* view_data = buffers[LENGTH_BUFFER_INDEX].data();
1444 for (size_type i = erase_index; i < erase_index + count; ++i)
1446 auto* view_ptr = view_data + (i * DATA_BUFFER_SIZE);
1447 std::int32_t length;
1448 std::memcpy(&length, view_ptr,
sizeof(std::int32_t));
1449 if (
static_cast<std::size_t
>(length) > SHORT_STRING_SIZE)
1451 freed_var_storage +=
static_cast<std::size_t
>(length);
1459 if (buffers[0].
size() > 0)
1463 buffers[LENGTH_BUFFER_INDEX].clear();
1464 buffers[FIRST_VAR_DATA_BUFFER_INDEX].clear();
1466 auto& buffer_sizes = buffers[buffers.size() - 1];
1467 update_buffer_sizes_metadata(buffer_sizes, 0);
1469 proxy.update_buffers();
1470 return value_begin();
1474 if (freed_var_storage > 0)
1476 auto& var_buffer = buffers[FIRST_VAR_DATA_BUFFER_INDEX];
1477 std::size_t write_offset = 0;
1480 std::unordered_map<std::size_t, std::size_t> offset_mapping;
1481 offset_mapping.reserve(current_size - count);
1483 for (size_type i = 0; i < current_size; ++i)
1485 if (i >= erase_index && i < erase_index + count)
1491 auto* view_ptr = view_data + (i * DATA_BUFFER_SIZE);
1492 std::int32_t length;
1493 std::memcpy(&length, view_ptr,
sizeof(std::int32_t));
1494 if (
static_cast<std::size_t
>(length) > SHORT_STRING_SIZE)
1496 std::int32_t old_offset_int32;
1497 std::memcpy(&old_offset_int32, view_ptr + BUFFER_OFFSET_OFFSET,
sizeof(std::int32_t));
1498 const auto old_offset =
static_cast<std::size_t
>(old_offset_int32);
1501 offset_mapping[old_offset] = write_offset;
1504 if (write_offset != old_offset)
1507 var_buffer.data() + write_offset,
1508 var_buffer.data() + old_offset,
1509 static_cast<std::size_t
>(length)
1513 write_offset +=
static_cast<std::size_t
>(length);
1518 var_buffer.resize(var_buffer.size() - freed_var_storage);
1521 auto& buffer_sizes = buffers[buffers.size() - 1];
1522 update_buffer_sizes_metadata(buffer_sizes,
static_cast<std::int64_t
>(var_buffer.size()));
1525 for (size_type i = 0; i < current_size; ++i)
1527 if (i >= erase_index && i < erase_index + count)
1532 auto* view_ptr = view_data + (i * DATA_BUFFER_SIZE);
1533 std::int32_t length;
1534 std::memcpy(&length, view_ptr,
sizeof(std::int32_t));
1535 if (
static_cast<std::size_t
>(length) > SHORT_STRING_SIZE)
1537 std::int32_t old_offset_int32;
1538 std::memcpy(&old_offset_int32, view_ptr + BUFFER_OFFSET_OFFSET,
sizeof(std::int32_t));
1539 const auto old_offset =
static_cast<std::size_t
>(old_offset_int32);
1540 auto it = offset_mapping.find(old_offset);
1541 if (it != offset_mapping.end())
1543 const std::int32_t new_offset =
static_cast<std::int32_t
>(it->second);
1544 std::memcpy(view_ptr + BUFFER_OFFSET_OFFSET, &new_offset,
sizeof(std::int32_t));
1551 if (erase_index + count < current_size)
1553 const auto src_offset = (erase_index + count) * DATA_BUFFER_SIZE;
1554 const auto dst_offset = erase_index * DATA_BUFFER_SIZE;
1555 const auto bytes_to_move = (current_size - erase_index - count) * DATA_BUFFER_SIZE;
1557 std::memmove(view_data + dst_offset, view_data + src_offset, bytes_to_move);
1561 buffers[LENGTH_BUFFER_INDEX].resize(new_size * DATA_BUFFER_SIZE);
1564 proxy.update_buffers();
1567 return erase_index < new_size ?
sparrow::next(value_begin(), erase_index) : value_end();
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::bitmap_iterator bitmap_iterator
typename base_type::iterator_tag iterator_tag
typename base_type::const_bitmap_iterator const_bitmap_iterator
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
typename base_type::difference_type difference_type
constexpr size_type size() const noexcept
Object that owns a piece of contiguous memory.
constexpr U * data() noexcept
constexpr size_type null_count() const noexcept
Returns the number of bits set to false (null/invalid).
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
nullable< inner_reference, bitmap_reference > reference
typename inner_types::inner_const_reference inner_const_reference
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::const_iterator const_iterator
typename base_type::bitmap_type bitmap_type
typename base_type::bitmap_range bitmap_range
variable_size_binary_view_array_impl< T, arrow_traits< std::string >::const_reference, json_extension > self_type
typename base_type::const_value_iterator const_value_iterator
typename base_type::bitmap_iterator bitmap_iterator
typename base_type::value_iterator value_iterator
nullable< inner_value_type > value_type
typename inner_types::inner_value_type inner_value_type
variable_size_binary_view_array_impl(Args &&... args)
Generic constructor for creating variable-size binary view array.
mutable_array_bitmap_base< self_type > base_type
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::bitmap_reference bitmap_reference
typename inner_types::inner_reference inner_reference
typename base_type::const_bitmap_iterator const_bitmap_iterator
variable_size_binary_view_array_impl(arrow_proxy)
Constructs variable-size binary view array from Arrow proxy.
array_inner_types< self_type > inner_types
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename base_type::iterator_tag iterator_tag
typename base_type::size_type size_type
typename base_type::difference_type difference_type
typename base_type::iterator iterator
Concept for convertible range types.
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
constexpr std::ranges::copy_result< std::ranges::borrowed_iterator_t< R >, O > copy(R &&r, O result)
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient alias for arrays with mutable validity bitmaps.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_variable_size_binary_view_array
Checks whether T is a variable_size_binary_view_array_impl type.
SPARROW_API std::size_t array_size(const array_wrapper &ar)
constexpr InputIt next(InputIt it, Distance n)
constexpr std::size_t range_size(R &&r)
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
variable_size_binary_view_array_impl< arrow_traits< std::string >::value_type, arrow_traits< std::string >::const_reference > string_view_array
A variable-size string view layout implementation.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
variable_size_binary_view_array_impl< arrow_traits< std::vector< byte_t > >::value_type, arrow_traits< std::vector< byte_t > >::const_reference > binary_view_array
A variable-size binary view layout implementation.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
variable_size_binary_view_array_impl< T, CR, Ext > array_type
std::random_access_iterator_tag iterator_tag
inner_reference inner_const_reference
functor_index_iterator< detail::layout_value_functor< const array_type, inner_const_reference > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_reference > > value_iterator
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Provides compile-time information about Arrow data types.
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()
Metafunction for retrieving the data_type of a typed array.