20#include <unordered_map>
42 template <std::ranges::sized_range T,
class CR,
typename Ext = empty_extension>
88 template <std::ranges::sized_range T,
class CR,
typename Ext>
107 template <std::ranges::sized_range T,
class CR,
typename Ext>
158 template <std::ranges::sized_range T,
class CR,
typename Ext>
222 template <
class... Args>
239 struct buffers_collection
251 [[nodiscard]]
static constexpr std::string_view get_arrow_format()
253 return std::is_same_v<T, arrow_traits<std::string>::value_type> ? std::string_view(
"vu")
254 :
std::string_view(
"vz");
266 template <input_metadata_container METADATA_RANGE>
267 [[nodiscard]]
static ArrowSchema create_arrow_schema(
268 std::optional<std::string_view> name,
269 std::optional<METADATA_RANGE> metadata,
270 std::optional<std::unordered_set<sparrow::ArrowFlag>> flags
273 constexpr repeat_view<bool> children_ownership(
true, 0);
304 template <std::ranges::input_range R>
305 requires std::convertible_to<std::ranges::range_value_t<R>, T>
306 static buffers_collection create_buffers(R&& range);
328 std::ranges::input_range R,
330 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
331 requires std::convertible_to<std::ranges::range_value_t<R>, T>
332 [[nodiscard]]
static arrow_proxy create_proxy(
335 std::optional<std::string_view> name = std::nullopt,
336 std::optional<METADATA_RANGE> metadata = std::nullopt
356 template <std::ranges::input_range NULLABLE_RANGE, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
357 requires std::convertible_to<std::ranges::range_value_t<NULLABLE_RANGE>, nullable<T>>
358 [[nodiscard]]
static arrow_proxy create_proxy(
359 NULLABLE_RANGE&& nullable_range,
360 std::optional<std::string_view> name = std::nullopt,
361 std::optional<METADATA_RANGE> metadata = std::nullopt
380 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
381 requires std::convertible_to<std::ranges::range_value_t<R>, T>
382 [[nodiscard]]
static arrow_proxy create_proxy(
385 std::optional<std::string_view> name = std::nullopt,
386 std::optional<METADATA_RANGE> metadata = std::nullopt
409 std::ranges::input_range VALUE_BUFFERS_RANGE,
410 validity_bitmap_input VB,
411 input_metadata_container METADATA_RANGE = std::vector<metadata_pair>>
412 requires std::convertible_to<std::ranges::range_value_t<VALUE_BUFFERS_RANGE>, u8_buffer<uint8_t>>
413 [[nodiscard]]
static arrow_proxy create_proxy(
414 size_t element_count,
415 u8_buffer<uint8_t>&& buffer_view,
416 VALUE_BUFFERS_RANGE&& value_buffers,
418 std::optional<std::string_view> name = std::nullopt,
419 std::optional<METADATA_RANGE> metadata = std::nullopt
470 template <std::ranges::sized_range U>
471 requires mpl::convertible_ranges<U, T>
472 constexpr void assign(U&& rhs,
size_type index);
492 template <std::ranges::sized_range U>
493 requires mpl::convertible_ranges<U, T>
494 void resize_values(
size_type new_length, U value);
514 template <std::ranges::sized_range U>
515 requires mpl::convertible_ranges<U, T>
537 template <mpl::iterator_of_type<T> InputIt>
594 static constexpr size_type LENGTH_BUFFER_INDEX = 1;
595 static constexpr std::size_t DATA_BUFFER_SIZE = 16;
596 static constexpr std::size_t SHORT_STRING_SIZE = 12;
597 static constexpr std::size_t PREFIX_SIZE = 4;
598 static constexpr std::ptrdiff_t PREFIX_OFFSET = 4;
599 static constexpr std::ptrdiff_t SHORT_STRING_OFFSET = 4;
600 static constexpr std::ptrdiff_t BUFFER_INDEX_OFFSET = 8;
601 static constexpr std::ptrdiff_t BUFFER_OFFSET_OFFSET = 12;
602 static constexpr std::size_t FIRST_VAR_DATA_BUFFER_INDEX = 2;
611 template <std::ranges::sized_range T,
class CR,
typename Ext>
620 inline std::int32_t read_int32_unaligned(
const std::uint8_t* ptr)
623 std::memcpy(&value, ptr,
sizeof(std::int32_t));
627 inline void write_int32_unaligned(std::uint8_t* ptr, std::int32_t value)
629 std::memcpy(ptr, &value,
sizeof(std::int32_t));
633 template <
typename To,
typename From>
634 inline constexpr To transform_to(
const From& v)
636 return static_cast<To
>(v);
640 template <
typename SizeType>
641 inline void update_buffer_offsets_after(
642 std::uint8_t* view_data,
644 std::size_t data_buffer_size,
645 std::size_t short_string_size,
646 std::ptrdiff_t buffer_offset_offset,
647 std::size_t threshold_offset,
648 std::ptrdiff_t offset_adjustment,
649 SizeType skip_index =
static_cast<SizeType
>(-1)
659 auto* view_ptr = view_data + (i * data_buffer_size);
661 std::memcpy(&length, view_ptr,
sizeof(std::int32_t));
663 if (
static_cast<std::size_t
>(length) > short_string_size)
665 std::int32_t current_offset;
666 std::memcpy(¤t_offset, view_ptr + buffer_offset_offset,
sizeof(std::int32_t));
668 if (
static_cast<std::size_t
>(current_offset) > threshold_offset)
670 current_offset +=
static_cast<std::int32_t
>(offset_adjustment);
671 std::memcpy(view_ptr + buffer_offset_offset, ¤t_offset,
sizeof(std::int32_t));
678 template <
typename Buffer>
679 inline void update_buffer_sizes_metadata(Buffer& buffer_sizes_buffer, std::int64_t new_size)
681 auto buffer_sizes_ptr = buffer_sizes_buffer.template data<std::int64_t>();
682 *buffer_sizes_ptr = new_size;
686 template <std::ranges::sized_range T,
class CR,
typename Ext>
687 template <std::ranges::input_range R>
688 requires std::convertible_to<std::ranges::range_value_t<R>, T>
689 auto variable_size_binary_view_array_impl<T, CR, Ext>::create_buffers(R&& range) -> buffers_collection
692# pragma GCC diagnostic push
693# pragma GCC diagnostic ignored "-Wcast-align"
699 std::size_t long_string_storage_size = 0;
701 for (
auto&& val : range)
703 auto val_casted = val
704 | std::ranges::views::transform(transform_to<std::uint8_t, typename T::value_type>);
706 const auto length = val.size();
707 auto length_ptr = length_buffer.
data() + (i * DATA_BUFFER_SIZE);
710 write_int32_unaligned(length_ptr,
static_cast<std::int32_t
>(length));
712 if (length <= SHORT_STRING_SIZE)
717 length_ptr + SHORT_STRING_OFFSET + length,
718 length_ptr + DATA_BUFFER_SIZE,
725 auto prefix_sub_range = val_casted | std::ranges::views::take(PREFIX_SIZE);
729 write_int32_unaligned(length_ptr + BUFFER_INDEX_OFFSET, 0);
732 write_int32_unaligned(
733 length_ptr + BUFFER_OFFSET_OFFSET,
734 static_cast<std::int32_t
>(long_string_storage_size)
738 long_string_storage_size += length;
745 std::size_t long_string_storage_offset = 0;
746 for (
auto&& val : range)
748 const auto length = val.size();
749 if (length > SHORT_STRING_SIZE)
751 auto val_casted = val
752 | std::ranges::views::transform(
753 transform_to<std::uint8_t, typename T::value_type>
756 long_string_storage_offset += length;
765 static_cast<std::size_t
>(1),
766 static_cast<int64_t
>(long_string_storage_size)
769 return {std::move(length_buffer), std::move(long_string_storage), std::move(buffer_sizes)};
772# pragma GCC diagnostic pop
776 template <std::ranges::sized_range T,
class CR,
typename Ext>
777 template <std::ranges::input_range R, val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
778 requires std::convertible_to<std::ranges::range_value_t<R>, T>
779 arrow_proxy variable_size_binary_view_array_impl<T, CR, Ext>::create_proxy(
782 std::optional<std::string_view> name,
783 std::optional<METADATA_RANGE> metadata
790 static const std::optional<std::unordered_set<sparrow::ArrowFlag>> flags{{
ArrowFlag::NULLABLE}};
793 ArrowSchema schema = create_arrow_schema(std::move(name), std::move(metadata), flags);
796 auto buffers_parts = create_buffers(std::forward<R>(range));
798 std::vector<buffer<uint8_t>> buffers{
799 std::move(vbitmap).extract_storage(),
800 std::move(buffers_parts.length_buffer),
801 std::move(buffers_parts.long_string_storage),
802 std::move(buffers_parts.buffer_sizes).extract_storage()
809 static_cast<std::int64_t
>(size),
810 static_cast<int64_t
>(null_count),
819 arrow_proxy proxy{std::move(arr), std::move(schema)};
824 template <std::ranges::sized_range T,
class CR,
typename Ext>
825 template <std::ranges::input_range NULLABLE_RANGE, input_metadata_container METADATA_RANGE>
826 requires std::convertible_to<std::ranges::range_value_t<NULLABLE_RANGE>,
nullable<T>>
827 [[nodiscard]]
arrow_proxy variable_size_binary_view_array_impl<T, CR, Ext>::create_proxy(
828 NULLABLE_RANGE&& nullable_range,
829 std::optional<std::string_view> name,
830 std::optional<METADATA_RANGE> metadata
833 auto values = nullable_range
834 | std::views::transform(
837 return static_cast<T
>(v.value());
841 auto is_non_null = nullable_range
842 | std::views::transform(
845 return v.has_value();
850 std::forward<
decltype(values)>(values),
851 std::forward<
decltype(is_non_null)>(is_non_null),
857 template <std::ranges::sized_range T,
class CR,
typename Ext>
858 template <std::ranges::input_range R, input_metadata_container METADATA_RANGE>
859 requires std::convertible_to<std::ranges::range_value_t<R>, T>
860 [[nodiscard]]
arrow_proxy variable_size_binary_view_array_impl<T, CR, Ext>::create_proxy(
863 std::optional<std::string_view> name,
864 std::optional<METADATA_RANGE> metadata
870 std::forward<R>(range),
878 ArrowSchema schema = create_arrow_schema(std::move(name), std::move(metadata), std::nullopt);
881 auto buffers_parts = create_buffers(std::forward<R>(range));
883 std::vector<buffer<uint8_t>> buffers{
885 std::move(buffers_parts.length_buffer),
886 std::move(buffers_parts.long_string_storage),
887 std::move(buffers_parts.buffer_sizes).extract_storage()
895 static_cast<std::int64_t
>(size),
896 static_cast<int64_t
>(0),
905 arrow_proxy proxy{std::move(arr), std::move(schema)};
910 template <std::ranges::sized_range T,
class CR,
typename Ext>
911 template <std::ranges::input_range VALUE_BUFFERS_RANGE, val
idity_bitmap_input VB, input_metadata_container METADATA_RANGE>
912 requires std::convertible_to<std::ranges::range_value_t<VALUE_BUFFERS_RANGE>,
u8_buffer<uint8_t>>
913 arrow_proxy variable_size_binary_view_array_impl<T, CR, Ext>::create_proxy(
914 size_t element_count,
916 VALUE_BUFFERS_RANGE&& value_buffers,
918 std::optional<std::string_view> name,
919 std::optional<METADATA_RANGE> metadata
925 static const std::optional<std::unordered_set<sparrow::ArrowFlag>> flags{{
ArrowFlag::NULLABLE}};
927 ArrowSchema schema = create_arrow_schema(std::move(name), std::move(metadata), flags);
930 std::vector<buffer<uint8_t>> buffers{
931 std::move(bitmap).extract_storage(),
934 for (
auto&& buf : value_buffers)
941 for (std::size_t i = 0; i < value_buffers.size(); ++i)
943 buffer_sizes[i] =
static_cast<int64_t
>(value_buffers[i].size());
945 buffers.push_back(std::move(buffer_sizes).extract_storage());
950 static_cast<std::int64_t
>(size),
951 static_cast<std::int64_t
>(bitmap.null_count()),
960 arrow_proxy proxy{std::move(arr), std::move(schema)};
965 template <std::ranges::sized_range T,
class CR,
typename Ext>
966 constexpr auto variable_size_binary_view_array_impl<T, CR, Ext>::value(size_type i) -> inner_reference
968 return static_cast<const self_type*
>(
this)->value(i);
971 template <std::ranges::sized_range T,
class CR,
typename Ext>
972 constexpr auto variable_size_binary_view_array_impl<T, CR, Ext>::value(size_type i)
const
973 -> inner_const_reference
976# pragma GCC diagnostic push
977# pragma GCC diagnostic ignored "-Wcast-align"
981 using char_or_byte =
typename inner_const_reference::value_type;
983 auto data_ptr = this->get_arrow_proxy().buffers()[LENGTH_BUFFER_INDEX].template data<uint8_t>()
984 + (i * DATA_BUFFER_SIZE);
985 const auto length =
static_cast<std::size_t
>(read_int32_unaligned(data_ptr));
987 if (length <= SHORT_STRING_SIZE)
989 const auto ptr =
reinterpret_cast<const char_or_byte*
>(data_ptr);
990 const auto ret = inner_const_reference(ptr + SHORT_STRING_OFFSET, length);
995 const auto buffer_index =
static_cast<std::size_t
>(
996 read_int32_unaligned(data_ptr + BUFFER_INDEX_OFFSET)
998 const auto buffer_offset =
static_cast<std::size_t
>(
999 read_int32_unaligned(data_ptr + BUFFER_OFFSET_OFFSET)
1001 const auto buffer = this->get_arrow_proxy()
1002 .buffers()[buffer_index + FIRST_VAR_DATA_BUFFER_INDEX]
1003 .template data<const char_or_byte>();
1004 return inner_const_reference(
buffer + buffer_offset, length);
1008# pragma GCC diagnostic pop
1012 template <std::ranges::sized_range T,
class CR,
typename Ext>
1013 constexpr auto variable_size_binary_view_array_impl<T, CR, Ext>::value_begin() -> value_iterator
1018 template <std::ranges::sized_range T,
class CR,
typename Ext>
1019 constexpr auto variable_size_binary_view_array_impl<T, CR, Ext>::value_end() -> value_iterator
1024 template <std::ranges::sized_range T,
class CR,
typename Ext>
1025 constexpr auto variable_size_binary_view_array_impl<T, CR, Ext>::value_cbegin() const
1026 -> const_value_iterator
1031 template <std::ranges::sized_range T,
class CR,
typename Ext>
1032 constexpr auto variable_size_binary_view_array_impl<T, CR, Ext>::value_cend() const -> const_value_iterator
1034 return const_value_iterator(
1040 template <std::ranges::sized_range T,
class CR,
typename Ext>
1041 template <std::ranges::sized_range U>
1043 constexpr void variable_size_binary_view_array_impl<T, CR, Ext>::assign(U&& rhs, size_type index)
1046 const auto new_length =
static_cast<std::size_t
>(std::ranges::size(rhs));
1048 auto& length_buffer = this->get_arrow_proxy().get_array_private_data()->buffers()[LENGTH_BUFFER_INDEX];
1049 auto view_ptr = length_buffer.data() + (index * DATA_BUFFER_SIZE);
1050 const auto current_length =
static_cast<std::size_t
>(read_int32_unaligned(view_ptr));
1053 write_int32_unaligned(view_ptr,
static_cast<std::int32_t
>(new_length));
1055 if (new_length <= SHORT_STRING_SIZE)
1057 auto data_ptr = view_ptr + SHORT_STRING_OFFSET;
1058 std::ranges::copy(rhs,
reinterpret_cast<typename T::value_type*
>(data_ptr));
1061 if (new_length < SHORT_STRING_SIZE)
1064 reinterpret_cast<typename T::value_type*
>(data_ptr) + new_length,
1065 SHORT_STRING_SIZE - new_length,
1066 typename T::value_type{}
1075 auto& buffers = this->get_arrow_proxy().get_array_private_data()->buffers();
1076 auto& var_data_buffer = buffers[FIRST_VAR_DATA_BUFFER_INDEX];
1077 auto& buffer_sizes_buffer = buffers[buffers.size() - 1];
1079 const bool was_long_string = current_length > SHORT_STRING_SIZE;
1080 std::size_t current_buffer_offset = 0;
1082 if (was_long_string)
1084 current_buffer_offset =
static_cast<std::size_t
>(
1085 read_int32_unaligned(view_ptr + BUFFER_OFFSET_OFFSET)
1089 auto transformed_data = rhs
1090 | std::ranges::views::transform(
1091 transform_to<typename T::value_type, typename T::value_type>
1095 bool can_reuse_memory =
false;
1096 if (was_long_string && new_length == current_length)
1098 const auto* existing_data = var_data_buffer.data() + current_buffer_offset;
1099 can_reuse_memory = std::ranges::equal(
1101 std::span<const typename T::value_type>(
1102 reinterpret_cast<const typename T::value_type*
>(existing_data),
1108 if (can_reuse_memory)
1111 auto prefix_range = rhs | std::ranges::views::take(PREFIX_SIZE);
1112 auto prefix_transformed = prefix_range
1113 | std::ranges::views::transform(
1114 transform_to<std::uint8_t, typename T::value_type>
1116 std::ranges::copy(prefix_transformed, view_ptr + PREFIX_OFFSET);
1121 const auto length_diff =
static_cast<std::ptrdiff_t
>(new_length)
1122 -
static_cast<std::ptrdiff_t
>(current_length);
1123 const bool can_fit_in_place = was_long_string && length_diff <= 0;
1125 std::size_t final_offset = 0;
1127 if (can_fit_in_place)
1130 final_offset = current_buffer_offset;
1133 if (length_diff < 0)
1135 const auto bytes_to_compact =
static_cast<std::size_t
>(-length_diff);
1136 const auto move_start = current_buffer_offset + current_length;
1137 const auto move_end = var_data_buffer.size();
1138 const auto bytes_to_move = move_end - move_start;
1140 if (bytes_to_move > 0)
1143 var_data_buffer.data() + move_start,
1144 var_data_buffer.data() + move_end,
1145 var_data_buffer.data() + move_start - bytes_to_compact
1149 var_data_buffer.resize(var_data_buffer.size() - bytes_to_compact);
1152 update_buffer_offsets_after(
1153 length_buffer.data(),
1157 BUFFER_OFFSET_OFFSET,
1158 current_buffer_offset + current_length,
1159 -
static_cast<std::ptrdiff_t
>(bytes_to_compact),
1164 update_buffer_sizes_metadata(
1165 buffer_sizes_buffer,
1166 static_cast<std::int64_t
>(var_data_buffer.size())
1173 const auto expansion_needed = was_long_string ? length_diff
1174 :
static_cast<std::ptrdiff_t
>(new_length);
1175 const auto new_var_buffer_size = var_data_buffer.size() + expansion_needed;
1177 if (was_long_string && length_diff > 0)
1180 final_offset = current_buffer_offset;
1181 const auto expansion_bytes =
static_cast<std::size_t
>(length_diff);
1182 const auto move_start = current_buffer_offset + current_length;
1183 const auto bytes_to_move = var_data_buffer.size() - move_start;
1186 var_data_buffer.resize(new_var_buffer_size);
1188 if (bytes_to_move > 0)
1192 var_data_buffer.data() + move_start,
1193 var_data_buffer.data() + move_start + bytes_to_move,
1194 var_data_buffer.data() + move_start + bytes_to_move + expansion_bytes
1199 update_buffer_offsets_after(
1200 length_buffer.data(),
1204 BUFFER_OFFSET_OFFSET,
1206 static_cast<std::ptrdiff_t
>(expansion_bytes),
1213 final_offset = var_data_buffer.size();
1214 var_data_buffer.resize(new_var_buffer_size);
1218 update_buffer_sizes_metadata(buffer_sizes_buffer,
static_cast<std::int64_t
>(new_var_buffer_size));
1221 std::ranges::copy(transformed_data, var_data_buffer.data() + final_offset);
1225 auto prefix_range = rhs | std::ranges::views::take(PREFIX_SIZE);
1226 auto prefix_transformed = prefix_range
1227 | std::ranges::views::transform(
1228 transform_to<std::uint8_t, typename T::value_type>
1230 std::ranges::copy(prefix_transformed, view_ptr + PREFIX_OFFSET);
1232 write_int32_unaligned(
1233 view_ptr + BUFFER_INDEX_OFFSET,
1234 static_cast<std::int32_t
>(FIRST_VAR_DATA_BUFFER_INDEX)
1237 write_int32_unaligned(view_ptr + BUFFER_OFFSET_OFFSET,
static_cast<std::int32_t
>(final_offset));
1241 template <std::ranges::sized_range T,
class CR,
typename Ext>
1242 template <std::ranges::sized_range U>
1244 void variable_size_binary_view_array_impl<T, CR, Ext>::resize_values(size_type new_length, U value)
1246 const size_t current_size = this->
size();
1248 if (new_length == current_size)
1253 if (new_length < current_size)
1255 erase_values(
sparrow::next(value_cbegin(), new_length), current_size - new_length);
1259 insert_value(value_cend(), value, new_length - current_size);
1263 template <std::ranges::sized_range T,
class CR,
typename Ext>
1264 template <std::ranges::sized_range U>
1274 template <std::ranges::sized_range T,
class CR,
typename Ext>
1275 template <mpl::iterator_of_type<T> InputIt>
1276 auto variable_size_binary_view_array_impl<T, CR, Ext>::insert_values(
1286 const auto insert_index = std::distance(value_cbegin(), pos);
1287 return value_begin() + insert_index;
1290 const auto insert_index =
static_cast<size_t>(std::distance(value_cbegin(), pos));
1291 const auto current_size = this->size();
1292 const auto new_size = current_size + count;
1295 std::size_t additional_var_storage = 0;
1296 std::vector<std::size_t> value_lengths;
1297 value_lengths.reserve(count);
1299 for (
auto it = first; it != last; ++it)
1301 const auto length =
static_cast<std::size_t
>(std::ranges::size(*it));
1302 value_lengths.push_back(length);
1303 if (length > SHORT_STRING_SIZE)
1305 additional_var_storage += length;
1309 auto& proxy = this->get_arrow_proxy();
1310 auto* private_data = proxy.get_array_private_data();
1311 auto& buffers = private_data->buffers();
1313 const auto new_view_buffer_size = new_size * DATA_BUFFER_SIZE;
1314 buffers[LENGTH_BUFFER_INDEX].resize(new_view_buffer_size);
1316 if (additional_var_storage > 0)
1318 const auto current_var_size = buffers[FIRST_VAR_DATA_BUFFER_INDEX].size();
1319 buffers[FIRST_VAR_DATA_BUFFER_INDEX].resize(current_var_size + additional_var_storage);
1322 auto& buffer_sizes = buffers[buffers.size() - 1];
1323 update_buffer_sizes_metadata(
1325 static_cast<std::int64_t
>(buffers[FIRST_VAR_DATA_BUFFER_INDEX].size())
1329 auto* view_data = buffers[LENGTH_BUFFER_INDEX].data();
1330 if (insert_index < current_size)
1332 const auto bytes_to_move = (current_size - insert_index) * DATA_BUFFER_SIZE;
1333 const auto src_offset = insert_index * DATA_BUFFER_SIZE;
1334 const auto dst_offset = (insert_index + count) * DATA_BUFFER_SIZE;
1336 std::memmove(view_data + dst_offset, view_data + src_offset, bytes_to_move);
1339 if (additional_var_storage > 0)
1341 for (
size_type i = insert_index + count; i < new_size; ++i)
1343 auto* view_ptr = view_data + (i * DATA_BUFFER_SIZE);
1344 std::int32_t length;
1345 std::memcpy(&length, view_ptr,
sizeof(std::int32_t));
1347 if (
static_cast<std::size_t
>(length) > SHORT_STRING_SIZE)
1349 std::int32_t current_offset;
1350 std::memcpy(¤t_offset, view_ptr + BUFFER_OFFSET_OFFSET,
sizeof(std::int32_t));
1351 current_offset +=
static_cast<std::int32_t
>(additional_var_storage);
1352 std::memcpy(view_ptr + BUFFER_OFFSET_OFFSET, ¤t_offset,
sizeof(std::int32_t));
1359 std::size_t var_offset = buffers[FIRST_VAR_DATA_BUFFER_INDEX].size() - additional_var_storage;
1362 for (
auto it = first; it != last; ++it, ++value_idx)
1364 const auto view_index = insert_index + value_idx;
1365 auto* view_ptr = view_data + (view_index * DATA_BUFFER_SIZE);
1366 const auto value_length = value_lengths[value_idx];
1368 const auto& current_value = *it;
1371 const std::int32_t value_length_int32 =
static_cast<std::int32_t
>(value_length);
1372 std::memcpy(view_ptr, &value_length_int32,
sizeof(std::int32_t));
1374 if (value_length <= SHORT_STRING_SIZE)
1377 std::ranges::transform(
1379 view_ptr + SHORT_STRING_OFFSET,
1380 transform_to<std::uint8_t, typename T::value_type>
1384 view_ptr + SHORT_STRING_OFFSET + value_length,
1385 view_ptr + DATA_BUFFER_SIZE,
1392 std::ranges::transform(
1393 current_value | std::views::take(PREFIX_SIZE),
1394 view_ptr + PREFIX_OFFSET,
1395 transform_to<std::uint8_t, typename T::value_type>
1399 const std::int32_t buffer_index_zero = 0;
1400 std::memcpy(view_ptr + BUFFER_INDEX_OFFSET, &buffer_index_zero,
sizeof(std::int32_t));
1403 const std::int32_t var_offset_int32 =
static_cast<std::int32_t
>(var_offset);
1404 std::memcpy(view_ptr + BUFFER_OFFSET_OFFSET, &var_offset_int32,
sizeof(std::int32_t));
1407 std::ranges::transform(
1409 buffers[FIRST_VAR_DATA_BUFFER_INDEX].data() + var_offset,
1410 transform_to<std::uint8_t, typename T::value_type>
1413 var_offset += value_length;
1418 proxy.update_buffers();
1423 template <std::ranges::sized_range T,
class CR,
typename Ext>
1425 variable_size_binary_view_array_impl<T, CR, Ext>::erase_values(const_value_iterator pos, size_type count)
1428 const size_t erase_index =
static_cast<size_t>(std::distance(value_cbegin(), pos));
1429 const size_t current_size = this->size();
1432 if (erase_index + count > current_size)
1434 count = current_size - erase_index;
1439 return value_begin() +
static_cast<difference_type
>(erase_index);
1442 const auto new_size = current_size - count;
1445 std::size_t freed_var_storage = 0;
1446 auto& proxy = this->get_arrow_proxy();
1447 auto* private_data = proxy.get_array_private_data();
1448 auto& buffers = private_data->buffers();
1449 auto* view_data = buffers[LENGTH_BUFFER_INDEX].data();
1452 for (size_type i = erase_index; i < erase_index + count; ++i)
1454 auto* view_ptr = view_data + (i * DATA_BUFFER_SIZE);
1455 std::int32_t length;
1456 std::memcpy(&length, view_ptr,
sizeof(std::int32_t));
1457 if (
static_cast<std::size_t
>(length) > SHORT_STRING_SIZE)
1459 freed_var_storage +=
static_cast<std::size_t
>(length);
1467 if (buffers[0].
size() > 0)
1471 buffers[LENGTH_BUFFER_INDEX].clear();
1472 buffers[FIRST_VAR_DATA_BUFFER_INDEX].clear();
1474 auto& buffer_sizes = buffers[buffers.size() - 1];
1475 update_buffer_sizes_metadata(buffer_sizes, 0);
1477 proxy.update_buffers();
1478 return value_begin();
1482 if (freed_var_storage > 0)
1484 auto& var_buffer = buffers[FIRST_VAR_DATA_BUFFER_INDEX];
1485 std::size_t write_offset = 0;
1488 std::unordered_map<std::size_t, std::size_t> offset_mapping;
1489 offset_mapping.reserve(current_size - count);
1491 for (size_type i = 0; i < current_size; ++i)
1493 if (i >= erase_index && i < erase_index + count)
1499 auto* view_ptr = view_data + (i * DATA_BUFFER_SIZE);
1500 std::int32_t length;
1501 std::memcpy(&length, view_ptr,
sizeof(std::int32_t));
1502 if (
static_cast<std::size_t
>(length) > SHORT_STRING_SIZE)
1504 std::int32_t old_offset_int32;
1505 std::memcpy(&old_offset_int32, view_ptr + BUFFER_OFFSET_OFFSET,
sizeof(std::int32_t));
1506 const auto old_offset =
static_cast<std::size_t
>(old_offset_int32);
1509 offset_mapping[old_offset] = write_offset;
1512 if (write_offset != old_offset)
1515 var_buffer.data() + write_offset,
1516 var_buffer.data() + old_offset,
1517 static_cast<std::size_t
>(length)
1521 write_offset +=
static_cast<std::size_t
>(length);
1526 var_buffer.resize(var_buffer.size() - freed_var_storage);
1529 auto& buffer_sizes = buffers[buffers.size() - 1];
1530 update_buffer_sizes_metadata(buffer_sizes,
static_cast<std::int64_t
>(var_buffer.size()));
1533 for (size_type i = 0; i < current_size; ++i)
1535 if (i >= erase_index && i < erase_index + count)
1540 auto* view_ptr = view_data + (i * DATA_BUFFER_SIZE);
1541 std::int32_t length;
1542 std::memcpy(&length, view_ptr,
sizeof(std::int32_t));
1543 if (
static_cast<std::size_t
>(length) > SHORT_STRING_SIZE)
1545 std::int32_t old_offset_int32;
1546 std::memcpy(&old_offset_int32, view_ptr + BUFFER_OFFSET_OFFSET,
sizeof(std::int32_t));
1547 const auto old_offset =
static_cast<std::size_t
>(old_offset_int32);
1548 auto it = offset_mapping.find(old_offset);
1549 if (it != offset_mapping.end())
1551 const std::int32_t new_offset =
static_cast<std::int32_t
>(it->second);
1552 std::memcpy(view_ptr + BUFFER_OFFSET_OFFSET, &new_offset,
sizeof(std::int32_t));
1559 if (erase_index + count < current_size)
1561 const auto src_offset = (erase_index + count) * DATA_BUFFER_SIZE;
1562 const auto dst_offset = erase_index * DATA_BUFFER_SIZE;
1563 const auto bytes_to_move = (current_size - erase_index - count) * DATA_BUFFER_SIZE;
1565 std::memmove(view_data + dst_offset, view_data + src_offset, bytes_to_move);
1569 buffers[LENGTH_BUFFER_INDEX].resize(new_size * DATA_BUFFER_SIZE);
1572 proxy.update_buffers();
1575 return erase_index < new_size ?
sparrow::next(value_begin(), erase_index) : value_end();
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::bitmap_iterator bitmap_iterator
typename base_type::iterator_tag iterator_tag
typename base_type::const_bitmap_iterator const_bitmap_iterator
std::conditional_t< is_mutable, mutable_array_base< D >, array_crtp_base< D > > base_type
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::bitmap_type bitmap_type
typename base_type::difference_type difference_type
constexpr size_type size() const noexcept
Object that owns a piece of contiguous memory.
xsimd::aligned_allocator< T > default_allocator
constexpr U * data() noexcept
constexpr size_type null_count() const noexcept
Returns the number of bits set to false (null/invalid).
typename storage_type::default_allocator default_allocator
A view that repeats a value a given number of times.
This buffer class is used as storage buffer for all sparrow arrays.
nullable< inner_reference, bitmap_reference > reference
typename inner_types::inner_const_reference inner_const_reference
typename base_type::bitmap_const_reference bitmap_const_reference
typename base_type::const_iterator const_iterator
typename base_type::bitmap_type bitmap_type
typename base_type::bitmap_range bitmap_range
variable_size_binary_view_array_impl< T, arrow_traits< std::string >::const_reference, Ext > self_type
typename base_type::const_value_iterator const_value_iterator
typename base_type::bitmap_iterator bitmap_iterator
typename base_type::value_iterator value_iterator
nullable< inner_value_type > value_type
typename inner_types::inner_value_type inner_value_type
variable_size_binary_view_array_impl(Args &&... args)
Generic constructor for creating variable-size binary view array.
mutable_array_bitmap_base< self_type > base_type
typename base_type::const_bitmap_range const_bitmap_range
typename base_type::bitmap_reference bitmap_reference
typename inner_types::inner_reference inner_reference
typename base_type::const_bitmap_iterator const_bitmap_iterator
variable_size_binary_view_array_impl(arrow_proxy)
Constructs variable-size binary view array from Arrow proxy.
array_inner_types< self_type > inner_types
nullable< inner_const_reference, bitmap_const_reference > const_reference
typename base_type::iterator_tag iterator_tag
typename base_type::size_type size_type
typename base_type::difference_type difference_type
typename base_type::iterator iterator
Concept for convertible range types.
#define SPARROW_ASSERT_TRUE(expr__)
constexpr std::size_t size(typelist< T... >={})
Gets the count of types contained in a typelist.
constexpr bool excludes_copy_and_move_ctor_v
Convenience variable template for excludes_copy_and_move_ctor.
constexpr std::ranges::copy_result< std::ranges::borrowed_iterator_t< R >, O > copy(R &&r, O result)
array_bitmap_base_impl< D, true > mutable_array_bitmap_base
Convenient alias for arrays with mutable validity bitmaps.
ArrowSchema make_arrow_schema(F format, N name, std::optional< M > metadata, std::optional< std::unordered_set< ArrowFlag > > flags, ArrowSchema **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowSchema *dictionary, bool dictionary_ownership)
Creates an ArrowSchema owned by a unique_ptr and holding the provided data.
constexpr bool is_variable_size_binary_view_array
Checks whether T is a variable_size_binary_view_array_impl type.
SPARROW_API std::size_t array_size(const array_wrapper &ar)
constexpr InputIt next(InputIt it, Distance n)
constexpr std::size_t range_size(R &&r)
ArrowArray make_arrow_array(int64_t length, int64_t null_count, int64_t offset, B buffers, ArrowArray **children, const CHILDREN_OWNERSHIP &children_ownership, ArrowArray *dictionary, bool dictionary_ownership)
Creates an ArrowArray.
variable_size_binary_view_array_impl< arrow_traits< std::string >::value_type, arrow_traits< std::string >::const_reference > string_view_array
A variable-size string view layout implementation.
dynamic_bitset< std::uint8_t > validity_bitmap
Type alias for a validity bitmap using 8-bit storage blocks.
validity_bitmap ensure_validity_bitmap(std::size_t size, R &&validity_input)
Ensures a validity bitmap of the specified size from various input types.
variable_size_binary_view_array_impl< arrow_traits< std::vector< byte_t > >::value_type, arrow_traits< std::vector< byte_t > >::const_reference > binary_view_array
A variable-size binary view layout implementation.
data_type
Runtime identifier of arrow data types, usually associated with raw bytes with the associated value.
Extensions to the C++ standard library.
variable_size_binary_view_array_impl< T, CR, Ext > array_type
std::random_access_iterator_tag iterator_tag
inner_reference inner_const_reference
functor_index_iterator< detail::layout_value_functor< const array_type, inner_const_reference > > const_value_iterator
functor_index_iterator< detail::layout_value_functor< array_type, inner_reference > > value_iterator
Base class for array_inner_types specializations.
Traits class that must be specialized by array implementations.
Provides compile-time information about Arrow data types.
static constexpr sparrow::data_type get()
static constexpr sparrow::data_type get()
Metafunction for retrieving the data_type of a typed array.