20#include <unordered_map>
43 template <std::ranges::sized_range T,
class CR,
typename Ext = empty_extension>
52 return "variable_size_binary_view_array";
99 template <std::ranges::sized_range T,
class CR,
typename Ext>
118 template <std::ranges::sized_range T,
class CR,
typename Ext>
169 template <std::ranges::sized_range T,
class CR,
typename Ext>
236 template <
class... Args>
253 struct buffers_collection
265 [[nodiscard]]
static constexpr std::string_view get_arrow_format()
267 return std::is_same_v<T, arrow_traits<std::string>::value_type> ? std::string_view(
"vu")
268 :
std::string_view(
"vz");
280 template <input_metadata_container METADATA_RANGE>
281 [[nodiscard]]
static ArrowSchema create_arrow_schema(
282 std::optional<std::string_view> name,
283 std::optional<METADATA_RANGE> metadata,
284 std::optional<std::unordered_set<sparrow::ArrowFlag>> flags
287 constexpr repeat_view<bool> children_ownership(
true, 0);
318 template <std::ranges::input_range R>
319 requires std::convertible_to<std::ranges::range_value_t<R>, T>
320 static buffers_collection create_buffers(R&& range);
342 std::ranges::input_range R,
344 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
345 requires std::convertible_to<std::ranges::range_value_t<R>, T>
346 [[nodiscard]]
static arrow_proxy create_proxy(
349 std::optional<std::string_view> name = std::nullopt,
350 std::optional<METADATA_RANGE> metadata = std::nullopt
370 template <std::ranges::input_range NULLABLE_RANGE, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
371 requires std::convertible_to<std::ranges::range_value_t<NULLABLE_RANGE>, nullable<T>>
372 [[nodiscard]]
static arrow_proxy create_proxy(
373 NULLABLE_RANGE&& nullable_range,
374 std::optional<std::string_view> name = std::nullopt,
375 std::optional<METADATA_RANGE> metadata = std::nullopt
394 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
395 requires std::convertible_to<std::ranges::range_value_t<R>, T>
396 [[nodiscard]]
static arrow_proxy create_proxy(
399 std::optional<std::string_view> name = std::nullopt,
400 std::optional<METADATA_RANGE> metadata = std::nullopt
423 std::ranges::input_range VALUE_BUFFERS_RANGE,
424 validity_bitmap_input VB,
425 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
426 requires std::convertible_to<std::ranges::range_value_t<VALUE_BUFFERS_RANGE>, u8_buffer<uint8_t>>
427 [[nodiscard]]
static arrow_proxy create_proxy(
428 size_t element_count,
429 u8_buffer<uint8_t>&& buffer_view,
430 VALUE_BUFFERS_RANGE&& value_buffers,
432 std::optional<std::string_view> name = std::nullopt,
433 std::optional<METADATA_RANGE> metadata = std::nullopt
484 template <std::ranges::sized_range U>
485 requires mpl::convertible_ranges<U, T>
486 constexpr void assign(U&& rhs,
size_type index);
506 template <std::ranges::sized_range U>
507 requires mpl::convertible_ranges<U, T>
508 void resize_values(
size_type new_length, U value);
528 template <std::ranges::sized_range U>
529 requires mpl::convertible_ranges<U, T>
551 template <mpl::iterator_of_type<T> InputIt>
608 static constexpr size_type LENGTH_BUFFER_INDEX = 1;
609 static constexpr std::size_t DATA_BUFFER_SIZE = 16;
610 static constexpr std::size_t SHORT_STRING_SIZE = 12;
611 static constexpr std::size_t PREFIX_SIZE = 4;
612 static constexpr std::ptrdiff_t PREFIX_OFFSET = 4;
613 static constexpr std::ptrdiff_t SHORT_STRING_OFFSET = 4;
614 static constexpr std::ptrdiff_t BUFFER_INDEX_OFFSET = 8;
615 static constexpr std::ptrdiff_t BUFFER_OFFSET_OFFSET = 12;
616 static constexpr std::size_t FIRST_VAR_DATA_BUFFER_INDEX = 2;
625 template <std::ranges::sized_range T,
class CR,
typename Ext>
631 template <std::ranges::sized_range T,
class CR,
typename Ext>
641 inline std::int32_t read_int32_unaligned(
const std::uint8_t* ptr)
644 std::memcpy(&value, ptr,
sizeof(std::int32_t));
648 inline void write_int32_unaligned(std::uint8_t* ptr, std::int32_t value)
650 std::memcpy(ptr, &value,
sizeof(std::int32_t));
654 template <
typename To,
typename From>
655 inline constexpr To transform_to(
const From& v)
657 return static_cast<To
>(v);
661 template <
typename SizeType>
662 inline void update_buffer_offsets_after(
663 std::uint8_t* view_data,
665 std::size_t data_buffer_size,
666 std::size_t short_string_size,
667 std::ptrdiff_t buffer_offset_offset,
668 std::size_t threshold_offset,
669 std::ptrdiff_t offset_adjustment,
670 SizeType skip_index =
static_cast<SizeType
>(-1)
680 auto* view_ptr = view_data + (i * data_buffer_size);
682 std::memcpy(&length, view_ptr,
sizeof(std::int32_t));
684 if (
static_cast<std::size_t
>(length) > short_string_size)
686 std::int32_t current_offset;
687 std::memcpy(¤t_offset, view_ptr + buffer_offset_offset,
sizeof(std::int32_t));
689 if (
static_cast<std::size_t
>(current_offset) > threshold_offset)
691 current_offset +=
static_cast<std::int32_t
>(offset_adjustment);
692 std::memcpy(view_ptr + buffer_offset_offset, ¤t_offset,
sizeof(std::int32_t));
699 template <
typename Buffer>
700 inline void update_buffer_sizes_metadata(Buffer& buffer_sizes_buffer, std::int64_t new_size)
702 auto buffer_sizes_ptr = buffer_sizes_buffer.template data<std::int64_t>();
703 *buffer_sizes_ptr = new_size;
707 template <std::ranges::sized_range T,
class CR,
typename Ext>
708 template <std::ranges::input_range R>
709 requires std::convertible_to<std::ranges::range_value_t<R>, T>
710 auto variable_size_binary_view_array_impl<T, CR, Ext>::create_buffers(R&& range) -> buffers_collection
713# pragma GCC diagnostic push
714# pragma GCC diagnostic ignored "-Wcast-align"
720 std::size_t long_string_storage_size = 0;
722 for (
auto&& val : range)
724 auto val_casted = val
725 | std::ranges::views::transform(transform_to<std::uint8_t, typename T::value_type>);
727 const auto length = val.size();
728 auto length_ptr = length_buffer.
data() + (i * DATA_BUFFER_SIZE);
731 write_int32_unaligned(length_ptr,
static_cast<std::int32_t
>(length));
733 if (length <= SHORT_STRING_SIZE)
738 length_ptr + SHORT_STRING_OFFSET + length,
739 length_ptr + DATA_BUFFER_SIZE,
746 auto prefix_sub_range = val_casted | std::ranges::views::take(PREFIX_SIZE);
750 write_int32_unaligned(length_ptr + BUFFER_INDEX_OFFSET, 0);
753 write_int32_unaligned(
754 length_ptr + BUFFER_OFFSET_OFFSET,
755 static_cast<std::int32_t
>(long_string_storage_size)
759 long_string_storage_size += length;
766 std::size_t long_string_storage_offset = 0;
767 for (
auto&& val : range)
769 const auto length = val.size();
770 if (length > SHORT_STRING_SIZE)
772 auto val_casted = val
773 | std::ranges::views::transform(
774 transform_to<std::uint8_t, typename T::value_type>
777 long_string_storage_offset += length;
786 static_cast<std::size_t
>(1),
787 static_cast<int64_t
>(long_string_storage_size)
790 return {std::move(length_buffer), std::move(long_string_storage), std::move(buffer_sizes)};
793# pragma GCC diagnostic pop
797 template <std::ranges::sized_range T,
class CR,
typename Ext>
798 template <std::ranges::input_range R, val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
799 requires std::convertible_to<std::ranges::range_value_t<R>, T>
800 arrow_proxy variable_size_binary_view_array_impl<T, CR, Ext>::create_proxy(
803 std::optional<std::string_view> name,
804 std::optional<METADATA_RANGE> metadata
811 static const std::optional<std::unordered_set<sparrow::ArrowFlag>> flags{{
ArrowFlag::NULLABLE}};
814 ArrowSchema schema = create_arrow_schema(std::move(name), std::move(metadata), flags);
817 auto buffers_parts = create_buffers(std::forward<R>(range));
819 std::vector<buffer<uint8_t>> buffers;
821 buffers.emplace_back(std::move(vbitmap).extract_storage());
822 buffers.emplace_back(std::move(buffers_parts.length_buffer));
823 buffers.emplace_back(std::move(buffers_parts.long_string_storage));
824 buffers.emplace_back(std::move(buffers_parts.buffer_sizes).extract_storage());
830 static_cast<std::int64_t
>(size),
831 static_cast<int64_t
>(null_count),
840 arrow_proxy proxy{std::move(arr), std::move(schema)};
845 template <std::ranges::sized_range T,
class CR,
typename Ext>
846 template <std::ranges::input_range NULLABLE_RANGE, input_metadata_container METADATA_RANGE>
847 requires std::convertible_to<std::ranges::range_value_t<NULLABLE_RANGE>,
nullable<T>>
848 [[nodiscard]]
arrow_proxy variable_size_binary_view_array_impl<T, CR, Ext>::create_proxy(
849 NULLABLE_RANGE&& nullable_range,
850 std::optional<std::string_view> name,
851 std::optional<METADATA_RANGE> metadata
854 auto values = nullable_range
855 | std::views::transform(
858 return static_cast<T
>(v.value());
862 auto is_non_null = nullable_range
863 | std::views::transform(
866 return v.has_value();
871 std::forward<
decltype(values)>(values),
872 std::forward<
decltype(is_non_null)>(is_non_null),
878 template <std::ranges::sized_range T,
class CR,
typename Ext>
879 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
880 requires std::convertible_to<std::ranges::range_value_t<R>, T>
881 [[nodiscard]]
arrow_proxy variable_size_binary_view_array_impl<T, CR, Ext>::create_proxy(
884 std::optional<std::string_view> name,
885 std::optional<METADATA_RANGE> metadata
891 std::forward<R>(range),
899 ArrowSchema schema = create_arrow_schema(std::move(name), std::move(metadata), std::nullopt);
902 auto buffers_parts = create_buffers(std::forward<R>(range));
904 std::vector<buffer<uint8_t>> buffers;
907 buffers.emplace_back(std::move(buffers_parts.length_buffer));
908 buffers.emplace_back(std::move(buffers_parts.long_string_storage));
909 buffers.emplace_back(std::move(buffers_parts.buffer_sizes).extract_storage());
916 static_cast<std::int64_t
>(size),
917 static_cast<int64_t
>(0),
926 arrow_proxy proxy{std::move(arr), std::move(schema)};
931 template <std::ranges::sized_range T,
class CR,
typename Ext>
932 template <std::ranges::input_range VALUE_BUFFERS_RANGE, val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
933 requires std::convertible_to<std::ranges::range_value_t<VALUE_BUFFERS_RANGE>,
u8_buffer<uint8_t>>
934 arrow_proxy variable_size_binary_view_array_impl<T, CR, Ext>::create_proxy(
935 size_t element_count,
937 VALUE_BUFFERS_RANGE&& value_buffers,
939 std::optional<std::string_view> name,
940 std::optional<METADATA_RANGE> metadata
946 static const std::optional<std::unordered_set<sparrow::ArrowFlag>> flags{{
ArrowFlag::NULLABLE}};
948 ArrowSchema schema = create_arrow_schema(std::move(name), std::move(metadata), flags);
951 std::vector<buffer<uint8_t>> buffers;
952 buffers.reserve(2 + std::ranges::size(value_buffers));
953 buffers.emplace_back(std::move(bitmap).extract_storage());
954 buffers.emplace_back(std::move(
buffer_view).extract_storage());
960 for (
auto&& buf : value_buffers)
962 buffer_sizes[i] =
static_cast<int64_t
>(buf.size());
963 buffers.emplace_back(std::move(buf).extract_storage());
966 buffers.push_back(std::move(buffer_sizes).extract_storage());
972 static_cast<std::int64_t
>(size),
973 static_cast<std::int64_t
>(bitmap.null_count()),
982 arrow_proxy proxy{std::move(arr), std::move(schema)};
987 template <std::ranges::sized_range T,
class CR,
typename Ext>
988 constexpr auto variable_size_binary_view_array_impl<T, CR, Ext>::value(size_type i) -> inner_reference
990 return static_cast<const self_type*
>(
this)->value(i);
993 template <std::ranges::sized_range T,
class CR,
typename Ext>
994 constexpr auto variable_size_binary_view_array_impl<T, CR, Ext>::value(size_type i)
const
995 -> inner_const_reference
998# pragma GCC diagnostic push
999# pragma GCC diagnostic ignored "-Wcast-align"
1003 using char_or_byte =
typename inner_const_reference::value_type;
1005 auto data_ptr = this->get_arrow_proxy().buffers()[LENGTH_BUFFER_INDEX].template data<uint8_t>()
1006 + (i * DATA_BUFFER_SIZE);
1007 const auto length =
static_cast<std::size_t
>(read_int32_unaligned(data_ptr));
1009 if (length <= SHORT_STRING_SIZE)
1011 const auto ptr =
reinterpret_cast<const char_or_byte*
>(data_ptr);
1012 const auto ret = inner_const_reference(ptr + SHORT_STRING_OFFSET, length);
1017 const auto buffer_index =
static_cast<std::size_t
>(
1018 read_int32_unaligned(data_ptr + BUFFER_INDEX_OFFSET)
1020 const auto buffer_offset =
static_cast<std::size_t
>(
1021 read_int32_unaligned(data_ptr + BUFFER_OFFSET_OFFSET)
1023 const auto buffer = this->get_arrow_proxy()
1024 .buffers()[buffer_index + FIRST_VAR_DATA_BUFFER_INDEX]
1025 .template data<const char_or_byte>();
1026 return inner_const_reference(
buffer + buffer_offset, length);
1030# pragma GCC diagnostic pop
1034 template <std::ranges::sized_range T,
class CR,
typename Ext>
1035 constexpr auto variable_size_binary_view_array_impl<T, CR, Ext>::value_begin() -> value_iterator
1040 template <std::ranges::sized_range T,
class CR,
typename Ext>
1041 constexpr auto variable_size_binary_view_array_impl<T, CR, Ext>::value_end() -> value_iterator
1046 template <std::ranges::sized_range T,
class CR,
typename Ext>
1047 constexpr auto variable_size_binary_view_array_impl<T, CR, Ext>::value_cbegin() const
1048 -> const_value_iterator
1053 template <std::ranges::sized_range T,
class CR,
typename Ext>
1054 constexpr auto variable_size_binary_view_array_impl<T, CR, Ext>::value_cend() const -> const_value_iterator
1056 return const_value_iterator(
1062 template <std::ranges::sized_range T,
class CR,
typename Ext>
1063 template <std::ranges::sized_range U>
1065 constexpr void variable_size_binary_view_array_impl<T, CR, Ext>::assign(U&& rhs, size_type index)
1068 const auto new_length =
static_cast<std::size_t
>(std::ranges::size(rhs));
1070 auto& length_buffer = this->get_arrow_proxy().get_array_private_data()->buffers()[LENGTH_BUFFER_INDEX];
1071 auto view_ptr = length_buffer.data() + (index * DATA_BUFFER_SIZE);
1072 const auto current_length =
static_cast<std::size_t
>(read_int32_unaligned(view_ptr));
1075 write_int32_unaligned(view_ptr,
static_cast<std::int32_t
>(new_length));
1077 if (new_length <= SHORT_STRING_SIZE)
1079 auto data_ptr = view_ptr + SHORT_STRING_OFFSET;
1080 std::ranges::copy(rhs,
reinterpret_cast<typename T::value_type*
>(data_ptr));
1083 if (new_length < SHORT_STRING_SIZE)
1086 reinterpret_cast<typename T::value_type*
>(data_ptr) + new_length,
1087 SHORT_STRING_SIZE - new_length,
1088 typename T::value_type{}
1097 auto& buffers = this->get_arrow_proxy().get_array_private_data()->buffers();
1098 auto& var_data_buffer = buffers[FIRST_VAR_DATA_BUFFER_INDEX];
1099 auto& buffer_sizes_buffer = buffers[buffers.size() - 1];
1101 const bool was_long_string = current_length > SHORT_STRING_SIZE;
1102 std::size_t current_buffer_offset = 0;
1104 if (was_long_string)
1106 current_buffer_offset =
static_cast<std::size_t
>(
1107 read_int32_unaligned(view_ptr + BUFFER_OFFSET_OFFSET)
1111 auto transformed_data = rhs
1112 | std::ranges::views::transform(
1113 transform_to<typename T::value_type, typename T::value_type>
1117 bool can_reuse_memory =
false;
1118 if (was_long_string && new_length == current_length)
1120 const auto* existing_data = var_data_buffer.data() + current_buffer_offset;
1121 can_reuse_memory = std::ranges::equal(
1123 std::span<const typename T::value_type>(
1124 reinterpret_cast<const typename T::value_type*
>(existing_data),
1130 if (can_reuse_memory)
1133 auto prefix_range = rhs | std::ranges::views::take(PREFIX_SIZE);
1134 auto prefix_transformed = prefix_range
1135 | std::ranges::views::transform(
1136 transform_to<std::uint8_t, typename T::value_type>
1138 std::ranges::copy(prefix_transformed, view_ptr + PREFIX_OFFSET);
1143 const auto length_diff =
static_cast<std::ptrdiff_t
>(new_length)
1144 -
static_cast<std::ptrdiff_t
>(current_length);
1145 const bool can_fit_in_place = was_long_string && length_diff <= 0;
1147 std::size_t final_offset = 0;
1149 if (can_fit_in_place)
1152 final_offset = current_buffer_offset;
1155 if (length_diff < 0)
1157 const auto bytes_to_compact =
static_cast<std::size_t
>(-length_diff);
1158 const auto move_start = current_buffer_offset + current_length;
1159 const auto move_end = var_data_buffer.size();
1160 const auto bytes_to_move = move_end - move_start;
1162 if (bytes_to_move > 0)
1165 var_data_buffer.data() + move_start,
1166 var_data_buffer.data() + move_end,
1167 var_data_buffer.data() + move_start - bytes_to_compact
1171 var_data_buffer.resize(var_data_buffer.size() - bytes_to_compact);
1174 update_buffer_offsets_after(
1175 length_buffer.data(),
1179 BUFFER_OFFSET_OFFSET,
1180 current_buffer_offset + current_length,
1181 -
static_cast<std::ptrdiff_t
>(bytes_to_compact),
1186 update_buffer_sizes_metadata(
1187 buffer_sizes_buffer,
1188 static_cast<std::int64_t
>(var_data_buffer.size())
1195 const auto expansion_needed = was_long_string ? length_diff
1196 :
static_cast<std::ptrdiff_t
>(new_length);
1197 const auto new_var_buffer_size = var_data_buffer.size() + expansion_needed;
1199 if (was_long_string && length_diff > 0)
1202 final_offset = current_buffer_offset;
1203 const auto expansion_bytes =
static_cast<std::size_t
>(length_diff);
1204 const auto move_start = current_buffer_offset + current_length;
1205 const auto bytes_to_move = var_data_buffer.size() - move_start;
1208 var_data_buffer.resize(new_var_buffer_size);
1210 if (bytes_to_move > 0)
1214 var_data_buffer.data() + move_start,
1215 var_data_buffer.data() + move_start + bytes_to_move,
1216 var_data_buffer.data() + move_start + bytes_to_move + expansion_bytes
1221 update_buffer_offsets_after(
1222 length_buffer.data(),
1226 BUFFER_OFFSET_OFFSET,
1228 static_cast<std::ptrdiff_t
>(expansion_bytes),
1235 final_offset = var_data_buffer.size();
1236 var_data_buffer.resize(new_var_buffer_size);
1240 update_buffer_sizes_metadata(buffer_sizes_buffer,
static_cast<std::int64_t
>(new_var_buffer_size));
1243 std::ranges::copy(transformed_data, var_data_buffer.data() + final_offset);
1247 auto prefix_range = rhs | std::ranges::views::take(PREFIX_SIZE);
1248 auto prefix_transformed = prefix_range
1249 | std::ranges::views::transform(
1250 transform_to<std::uint8_t, typename T::value_type>
1252 std::ranges::copy(prefix_transformed, view_ptr + PREFIX_OFFSET);
1254 write_int32_unaligned(
1255 view_ptr + BUFFER_INDEX_OFFSET,
1256 static_cast<std::int32_t
>(FIRST_VAR_DATA_BUFFER_INDEX)
1259 write_int32_unaligned(view_ptr + BUFFER_OFFSET_OFFSET,
static_cast<std::int32_t
>(final_offset));
1263 template <std::ranges::sized_range T,
class CR,
typename Ext>
1264 template <std::ranges::sized_range U>
1266 void variable_size_binary_view_array_impl<T, CR, Ext>::resize_values(size_type new_length, U value)
1268 const size_t current_size = this->
size();
1270 if (new_length == current_size)
1275 if (new_length < current_size)
1277 erase_values(
sparrow::next(value_cbegin(), new_length), current_size - new_length);
1281 insert_value(value_cend(), value, new_length - current_size);
1285 template <std::ranges::sized_range T,
class CR,
typename Ext>
1286 template <std::ranges::sized_range U>
1296 template <std::ranges::sized_range T,
class CR,
typename Ext>
1297 template <mpl::iterator_of_type<T> InputIt>
1298 auto variable_size_binary_view_array_impl<T, CR, Ext>::insert_values(
1308 const auto insert_index = std::distance(value_cbegin(), pos);
1309 return value_begin() + insert_index;
1312 const auto insert_index =
static_cast<size_t>(std::distance(value_cbegin(), pos));
1313 const auto current_size = this->size();
1314 const auto new_size = current_size + count;
1317 std::size_t additional_var_storage = 0;
1318 std::vector<std::size_t> value_lengths;
1319 value_lengths.reserve(count);
1321 for (
auto it = first; it != last; ++it)
1323 const auto length =
static_cast<std::size_t
>(std::ranges::size(*it));
1324 value_lengths.push_back(length);
1325 if (length > SHORT_STRING_SIZE)
1327 additional_var_storage += length;
1331 auto& proxy = this->get_arrow_proxy();
1332 auto* private_data = proxy.get_array_private_data();
1333 auto& buffers = private_data->buffers();
1335 const auto new_view_buffer_size = new_size * DATA_BUFFER_SIZE;
1336 buffers[LENGTH_BUFFER_INDEX].resize(new_view_buffer_size);
1338 if (additional_var_storage > 0)
1340 const auto current_var_size = buffers[FIRST_VAR_DATA_BUFFER_INDEX].size();
1341 buffers[FIRST_VAR_DATA_BUFFER_INDEX].resize(current_var_size + additional_var_storage);
1344 auto& buffer_sizes = buffers[buffers.size() - 1];
1345 update_buffer_sizes_metadata(
1347 static_cast<std::int64_t
>(buffers[FIRST_VAR_DATA_BUFFER_INDEX].size())
1351 auto* view_data = buffers[LENGTH_BUFFER_INDEX].data();
1352 if (insert_index < current_size)
1354 const auto bytes_to_move = (current_size - insert_index) * DATA_BUFFER_SIZE;
1355 const auto src_offset = insert_index * DATA_BUFFER_SIZE;
1356 const auto dst_offset = (insert_index + count) * DATA_BUFFER_SIZE;
1358 std::memmove(view_data + dst_offset, view_data + src_offset, bytes_to_move);
1361 if (additional_var_storage > 0)
1363 for (
size_type i = insert_index + count; i < new_size; ++i)
1365 auto* view_ptr = view_data + (i * DATA_BUFFER_SIZE);
1366 std::int32_t length;
1367 std::memcpy(&length, view_ptr,
sizeof(std::int32_t));
1369 if (
static_cast<std::size_t
>(length) > SHORT_STRING_SIZE)
1371 std::int32_t current_offset;
1372 std::memcpy(¤t_offset, view_ptr + BUFFER_OFFSET_OFFSET,
sizeof(std::int32_t));
1373 current_offset +=
static_cast<std::int32_t
>(additional_var_storage);
1374 std::memcpy(view_ptr + BUFFER_OFFSET_OFFSET, ¤t_offset,
sizeof(std::int32_t));
1381 std::size_t var_offset = buffers[FIRST_VAR_DATA_BUFFER_INDEX].size() - additional_var_storage;
1384 for (
auto it = first; it != last; ++it, ++value_idx)
1386 const auto view_index = insert_index + value_idx;
1387 auto* view_ptr = view_data + (view_index * DATA_BUFFER_SIZE);
1388 const auto value_length = value_lengths[value_idx];
1390 const auto& current_value = *it;
1393 const std::int32_t value_length_int32 =
static_cast<std::int32_t
>(value_length);
1394 std::memcpy(view_ptr, &value_length_int32,
sizeof(std::int32_t));
1396 if (value_length <= SHORT_STRING_SIZE)
1399 std::ranges::transform(
1401 view_ptr + SHORT_STRING_OFFSET,
1402 transform_to<std::uint8_t, typename T::value_type>
1406 view_ptr + SHORT_STRING_OFFSET + value_length,
1407 view_ptr + DATA_BUFFER_SIZE,
1414 std::ranges::transform(
1415 current_value | std::views::take(PREFIX_SIZE),
1416 view_ptr + PREFIX_OFFSET,
1417 transform_to<std::uint8_t, typename T::value_type>
1421 const std::int32_t buffer_index_zero = 0;
1422 std::memcpy(view_ptr + BUFFER_INDEX_OFFSET, &buffer_index_zero,
sizeof(std::int32_t));
1425 const std::int32_t var_offset_int32 =
static_cast<std::int32_t
>(var_offset);
1426 std::memcpy(view_ptr + BUFFER_OFFSET_OFFSET, &var_offset_int32,
sizeof(std::int32_t));
1429 std::ranges::transform(
1431 buffers[FIRST_VAR_DATA_BUFFER_INDEX].data() + var_offset,
1432 transform_to<std::uint8_t, typename T::value_type>
1435 var_offset += value_length;
1440 proxy.update_buffers();
1445 template <std::ranges::sized_range T,
class CR,
typename Ext>
1447 variable_size_binary_view_array_impl<T, CR, Ext>::erase_values(const_value_iterator pos, size_type count)
1450 const size_t erase_index =
static_cast<size_t>(std::distance(value_cbegin(), pos));
1451 const size_t current_size = this->size();
1454 if (erase_index + count > current_size)
1456 count = current_size - erase_index;
1461 return value_begin() +
static_cast<difference_type
>(erase_index);
1464 const auto new_size = current_size -
count;
1467 std::size_t freed_var_storage = 0;
1468 auto& proxy = this->get_arrow_proxy();
1469 auto* private_data = proxy.get_array_private_data();
1470 auto& buffers = private_data->buffers();
1471 auto* view_data = buffers[LENGTH_BUFFER_INDEX].data();
1474 for (size_type i = erase_index; i < erase_index +
count; ++i)
1476 auto* view_ptr = view_data + (i * DATA_BUFFER_SIZE);
1477 std::int32_t length;
1478 std::memcpy(&length, view_ptr,
sizeof(std::int32_t));
1479 if (
static_cast<std::size_t
>(length) > SHORT_STRING_SIZE)
1481 freed_var_storage +=
static_cast<std::size_t
>(length);
1489 if (buffers[0].
size() > 0)
1493 buffers[LENGTH_BUFFER_INDEX].clear();
1494 buffers[FIRST_VAR_DATA_BUFFER_INDEX].clear();
1496 auto& buffer_sizes = buffers[buffers.size() - 1];
1497 update_buffer_sizes_metadata(buffer_sizes, 0);
1499 proxy.update_buffers();
1500 return value_begin();
1504 if (freed_var_storage > 0)
1506 auto& var_buffer = buffers[FIRST_VAR_DATA_BUFFER_INDEX];
1507 std::size_t write_offset = 0;
1510 std::unordered_map<std::size_t, std::size_t> offset_mapping;
1511 offset_mapping.reserve(current_size - count);
1513 for (size_type i = 0; i < current_size; ++i)
1515 if (i >= erase_index && i < erase_index + count)
1521 auto* view_ptr = view_data + (i * DATA_BUFFER_SIZE);
1522 std::int32_t length;
1523 std::memcpy(&length, view_ptr,
sizeof(std::int32_t));
1524 if (
static_cast<std::size_t
>(length) > SHORT_STRING_SIZE)
1526 std::int32_t old_offset_int32;
1527 std::memcpy(&old_offset_int32, view_ptr + BUFFER_OFFSET_OFFSET,
sizeof(std::int32_t));
1528 const auto old_offset =
static_cast<std::size_t
>(old_offset_int32);
1531 offset_mapping[old_offset] = write_offset;
1534 if (write_offset != old_offset)
1537 var_buffer.data() + write_offset,
1538 var_buffer.data() + old_offset,
1539 static_cast<std::size_t
>(length)
1543 write_offset +=
static_cast<std::size_t
>(length);
1548 var_buffer.resize(var_buffer.size() - freed_var_storage);
1551 auto& buffer_sizes = buffers[buffers.size() - 1];
1552 update_buffer_sizes_metadata(buffer_sizes,
static_cast<std::int64_t
>(var_buffer.size()));
1555 for (size_type i = 0; i < current_size; ++i)
1557 if (i >= erase_index && i < erase_index + count)
1562 auto* view_ptr = view_data + (i * DATA_BUFFER_SIZE);
1563 std::int32_t length;
1564 std::memcpy(&length, view_ptr,
sizeof(std::int32_t));
1565 if (
static_cast<std::size_t
>(length) > SHORT_STRING_SIZE)
1567 std::int32_t old_offset_int32;
1568 std::memcpy(&old_offset_int32, view_ptr + BUFFER_OFFSET_OFFSET,
sizeof(std::int32_t));
1569 const auto old_offset =
static_cast<std::size_t
>(old_offset_int32);
1570 auto it = offset_mapping.find(old_offset);
1571 if (it != offset_mapping.end())
1573 const std::int32_t new_offset =
static_cast<std::int32_t
>(it->second);
1574 std::memcpy(view_ptr + BUFFER_OFFSET_OFFSET, &new_offset,
sizeof(std::int32_t));
1581 if (erase_index + count < current_size)
1583 const auto src_offset = (erase_index +
count) * DATA_BUFFER_SIZE;
1584 const auto dst_offset = erase_index * DATA_BUFFER_SIZE;
1585 const auto bytes_to_move = (current_size - erase_index -
count) * DATA_BUFFER_SIZE;
1587 std::memmove(view_data + dst_offset, view_data + src_offset, bytes_to_move);
1591 buffers[LENGTH_BUFFER_INDEX].resize(new_size * DATA_BUFFER_SIZE);
1594 proxy.update_buffers();
1597 return erase_index < new_size ?
sparrow::next(value_begin(), erase_index) : value_end();
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::bitmap_iterator bitmap_iterator
typename base_type::iterator_tag iterator_tag
typename base_type::const_bitmap_iterator const_bitmap_iterator
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
typename base_type::difference_type difference_type
constexpr size_type size() const noexcept
Object that owns a piece of contiguous memory.
xsimd::aligned_allocator< T > default_allocator
constexpr U * data() noexcept
constexpr size_type null_count() const noexcept
Returns the number of bits set to false (null/invalid).
typename storage_type::default_allocator default_allocator
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
nullable< inner_reference, bitmap_reference > reference
typename inner_types::inner_const_reference inner_const_reference
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::const_iterator const_iterator
variable_size_binary_view_array_impl(const self_type &)
typename base_type::bitmap_type bitmap_type
typename base_type::bitmap_range bitmap_range
variable_size_binary_view_array_impl< T, arrow_traits< std::string >::const_reference, Ext > self_type
typename base_type::const_value_iterator const_value_iterator
typename base_type::bitmap_iterator bitmap_iterator
typename base_type::value_iterator value_iterator
nullable< inner_value_type > value_type
typename inner_types::inner_value_type inner_value_type
variable_size_binary_view_array_impl(Args &&... args)
Generic constructor for creating variable-size binary view array.
mutable_array_bitmap_base< self_type > base_type
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::bitmap_reference bitmap_reference
typename inner_types::inner_reference inner_reference
typename base_type::const_bitmap_iterator const_bitmap_iterator
self_type & operator=(const self_type &)=default
variable_size_binary_view_array_impl(arrow_proxy)
Constructs variable-size binary view array from Arrow proxy.
array_inner_types< self_type > inner_types
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename base_type::iterator_tag iterator_tag
typename base_type::size_type size_type
typename base_type::difference_type difference_type
typename base_type::iterator iterator
Concept for convertible range types.
#define SPARROW_ASSERT_TRUE(expr__)
SPARROW_API void increase(const std::string &key)
SPARROW_API int count(const std::string &key, int disabled_value=0)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
constexpr bool is_type_instance_of_v
Variable template for convenient access to is_type_instance_of.
constexpr std::ranges::copy_result< std::ranges::borrowed_iterator_t< R >, O > copy(R &&r, O result)
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient alias for arrays with mutable validity bitmaps.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_variable_size_binary_view_array
Checks whether T is a variable_size_binary_view_array_impl type.
SPARROW_API std::size_t array_size(const array_wrapper &ar)
constexpr InputIt next(InputIt it, Distance n)
constexpr std::size_t range_size(R &&r)
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
variable_size_binary_view_array_impl< arrow_traits< std::string >::value_type, arrow_traits< std::string >::const_reference > string_view_array
A variable-size string view layout implementation.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
variable_size_binary_view_array_impl< arrow_traits< std::vector< byte_t > >::value_type, arrow_traits< std::vector< byte_t > >::const_reference > binary_view_array
A variable-size binary view layout implementation.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
Extensions to the C++ standard library.
variable_size_binary_view_array_impl< T, CR, Ext > array_type
std::random_access_iterator_tag iterator_tag
inner_reference inner_const_reference
functor_index_iterator< detail::layout_value_functor< const array_type, inner_const_reference > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_reference > > value_iterator
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Provides compile-time information about Arrow data types.
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()
Metafunction for retrieving the data_type of a typed array.