sparrow 1.4.0
C++20 idiomatic APIs for the Apache Arrow Columnar Format
Loading...
Searching...
No Matches
format.hpp
Go to the documentation of this file.
1// Copyright 2024 Man Group Operations Limited
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or mplied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15
16#pragma once
17
18#include <algorithm>
19#include <cstddef>
20#include <cstdint>
21#include <format>
22#include <numeric>
23#include <ranges>
24#include <string>
25#include <type_traits>
26#include <variant>
27#include <vector>
28
30
31namespace sparrow::detail
32{
34 {
35 char fill = ' ';
36 char align = '>'; // '<', '>', '^'
37 std::size_t width = 0;
38
39 // Parse: [[fill]align] [width]
40 // Grammar subset: (fill? align?) width?
41 template <class It>
42 constexpr It parse(It it, It end)
43 {
44 if (it == end || *it == '}')
45 {
46 return it;
47 }
48
49 // Detect [fill][align] or [align]
50 auto next = it;
51 if (next != end)
52 {
53 ++next;
54 if (next != end && (*next == '<' || *next == '>' || *next == '^') && *it != '<' && *it != '>'
55 && *it != '^')
56 {
57 fill = *it;
58 align = *next;
59 it = ++next;
60 }
61 else if (*it == '<' || *it == '>' || *it == '^')
62 {
63 align = *it;
64 ++it;
65 }
66 }
67
68 // Parse width
69 std::size_t w = 0;
70 bool has_w = false;
71 while (it != end && *it >= '0' && *it <= '9')
72 {
73 has_w = true;
74 w = w * 10 + static_cast<unsigned>(*it - '0');
75 ++it;
76 }
77 if (has_w)
78 {
79 width = w;
80 }
81
82 // Ignore (silently) everything until '}' (keeps constexpr friendliness)
83 while (it != end && *it != '}')
84 {
85 ++it;
86 }
87
88 return it;
89 }
90
91 std::string apply_alignment(std::string inner) const
92 {
93 if (width <= inner.size())
94 {
95 return inner;
96 }
97
98 const std::size_t pad = width - inner.size();
99 switch (align)
100 {
101 case '<':
102 return inner + std::string(pad, fill);
103 case '^':
104 {
105 std::size_t left = pad / 2;
106 std::size_t right = pad - left;
107 return std::string(left, fill) + inner + std::string(right, fill);
108 }
109 case '>':
110 default:
111 return std::string(pad, fill) + inner;
112 }
113 }
114
115 template <class Seq>
116 std::string build_core(const Seq& seq) const
117 {
118 std::string core;
119 core.push_back('<');
120 bool first = true;
121 for (auto&& elem : seq)
122 {
123 if (!first)
124 {
125 core.append(", ");
126 }
127 std::format_to(std::back_inserter(core), "{}", elem);
128 first = false;
129 }
130 core.push_back('>');
131 return core;
132 }
133 };
134} // namespace sparrow::detail
135
136namespace std
137{
138 template <class... T>
139 struct formatter<std::variant<T...>>
140 {
141 constexpr auto parse(format_parse_context& ctx)
142 {
143 auto pos = ctx.begin();
144 while (pos != ctx.end() && *pos != '}')
145 {
146 m_format_string.push_back(*pos);
147 ++pos;
148 }
149 m_format_string.push_back('}');
150 return pos;
151 }
152
153 auto format(const std::variant<T...>& v, std::format_context& ctx) const
154 {
155 return std::visit(
156 [&ctx, this](const auto& value)
157 {
158 return std::vformat_to(ctx.out(), m_format_string, std::make_format_args(value));
159 },
160 v
161 );
162 }
163
164 std::string m_format_string = "{:";
165 };
166
167 template <>
168 struct formatter<std::byte>
169 {
170 constexpr auto parse(format_parse_context& ctx)
171 {
172 return m_underlying_formatter.parse(ctx);
173 }
174
175 auto format(std::byte b, std::format_context& ctx) const
176 {
177 return std::format_to(ctx.out(), "{:#04x}", std::to_integer<unsigned>(b));
178 }
179
180 private:
181
182 // Store the parsed format specification
183 std::formatter<unsigned> m_underlying_formatter;
184 };
185}
186
187namespace sparrow
188{
189 template <typename R>
190 concept RangeOfRanges = std::ranges::range<R> && std::ranges::range<std::ranges::range_value_t<R>>;
191
192 template <typename T>
193 concept Format = requires(const T& t) { std::format(t, 1); };
194
195 template <typename T>
196 concept RangeOfFormats = std::ranges::range<T> && Format<std::ranges::range_value_t<T>>;
197
198 constexpr size_t size_of_utf8(const std::string_view str)
199 {
200 size_t size = 0;
201 for (const char c : str)
202 {
203 if ((c & 0xC0) != 0x80)
204 {
205 ++size;
206 }
207 }
208 return size;
209 }
210
211 constexpr size_t max_width(const std::ranges::input_range auto& data)
212 {
213 size_t max_width = 0;
214 for (const auto& value : data)
215 {
216 if constexpr (std::is_same_v<std::remove_cvref_t<std::decay_t<decltype(value)>>, std::string>
217 || std::is_same_v<std::remove_cvref_t<std::decay_t<decltype(value)>>, std::string_view>
218 || std::is_same_v<std::remove_cvref_t<std::decay_t<decltype(value)>>, const char*>
219 || std::is_same_v<std::remove_cvref_t<std::decay_t<decltype(value)>>, char*>)
220 {
221 max_width = std::max(max_width, size_of_utf8(value));
222 }
223 else
224 {
225 max_width = std::max(max_width, std::format("{}", value).size());
226 }
227 }
228 return max_width;
229 }
230
231 template <RangeOfRanges Columns>
232 constexpr std::vector<size_t> columns_widths(const Columns& columns)
233 {
234 std::vector<size_t> widths;
235 widths.reserve(std::ranges::size(columns));
236 for (const auto& col : columns)
237 {
238 widths.push_back(max_width(col));
239 }
240 return widths;
241 }
242
243 template <typename OutputIt, std::ranges::input_range Widths, std::ranges::input_range Values>
244 requires(std::same_as<std::ranges::range_value_t<Widths>, size_t>)
245 constexpr void
246 to_row(OutputIt out, const Widths& widths, const Values& values, std::string_view separator = "|")
247 {
248 SPARROW_ASSERT_TRUE(std::ranges::size(widths) == std::ranges::size(values))
249 if (std::ranges::size(values) == 0)
250 {
251 return;
252 }
253 auto value_it = values.begin();
254 auto width_it = widths.begin();
255 for (size_t i = 0; i < std::ranges::size(values); ++i)
256 {
257 const std::string fmt = std::format("{}{{:>{}}}", separator, *width_it);
258 const auto& value = *value_it;
259 std::vformat_to(out, fmt, std::make_format_args(value));
260 ++value_it;
261 ++width_it;
262 }
263 std::format_to(out, "{}", separator);
264 }
265
266 template <typename OutputIt>
267 constexpr void
268 horizontal_separator(OutputIt out, const std::vector<size_t>& widths, std::string_view separator = "-")
269 {
270 if (std::ranges::size(widths) == 0)
271 {
272 return;
273 }
274 const size_t count = std::ranges::size(widths) + 1 + std::reduce(widths.begin(), widths.end());
275 std::format_to(out, "{}", std::string(count, separator[0]));
276 }
277
278#if defined(__clang__)
279# pragma clang diagnostic push
280# pragma clang diagnostic ignored "-Wsign-conversion"
281#endif
282#if defined(__GNUC__)
283# pragma GCC diagnostic push
284# pragma GCC diagnostic ignored "-Wsign-conversion"
285#endif
286 template <std::ranges::input_range Headers, RangeOfRanges Columns, typename OutputIt>
287 requires(std::convertible_to<std::ranges::range_value_t<Headers>, std::string>)
288 constexpr void to_table_with_columns(OutputIt out, const Headers& headers, const Columns& columns)
289 {
290 const size_t column_count = std::ranges::size(columns);
291 SPARROW_ASSERT_TRUE(std::ranges::size(headers) == column_count);
292 if (column_count == 0)
293 {
294 return;
295 }
296
297 // columns lenght must be the same
298 if (column_count > 0)
299 {
300 for (auto it = columns.begin() + 1; it != columns.end(); ++it)
301 {
302 SPARROW_ASSERT_TRUE(std::ranges::size(columns[0]) == std::ranges::size(*it));
303 }
304 }
305
306 std::vector<size_t> widths = columns_widths(columns);
307
308 // max with names
309 for (size_t i = 0; i < std::ranges::size(headers); ++i)
310 {
311 widths[i] = std::max(widths[i], std::ranges::size(headers[i]));
312 }
313 to_row(out, widths, headers);
314 std::format_to(out, "{}", '\n');
315 horizontal_separator(out, widths);
316 std::format_to(out, "{}", '\n');
317
318 // print data
319 for (size_t i = 0; i < std::ranges::size(columns[0]); ++i)
320 {
321 const auto row_range = std::views::transform(
322 columns,
323 [i](const auto& column)
324 {
325 return column[i];
326 }
327 );
328 to_row(out, widths, row_range);
329 std::format_to(out, "{}", '\n');
330 }
331
332 horizontal_separator(out, widths);
333 }
334#if defined(__GNUC__)
335# pragma GCC diagnostic pop
336#endif
337#if defined(__clang__)
338# pragma clang diagnostic pop
339#endif
340}
#define SPARROW_ASSERT_TRUE(expr__)
constexpr void horizontal_separator(OutputIt out, const std::vector< size_t > &widths, std::string_view separator="-")
Definition format.hpp:268
constexpr InputIt next(InputIt it, Distance n)
Definition iterator.hpp:503
constexpr void to_row(OutputIt out, const Widths &widths, const Values &values, std::string_view separator="|")
Definition format.hpp:246
constexpr void to_table_with_columns(OutputIt out, const Headers &headers, const Columns &columns)
Definition format.hpp:288
constexpr size_t max_width(const std::ranges::input_range auto &data)
Definition format.hpp:211
constexpr std::vector< size_t > columns_widths(const Columns &columns)
Definition format.hpp:232
constexpr size_t size_of_utf8(const std::string_view str)
Definition format.hpp:198
Extensions to the C++ standard library.
constexpr It parse(It it, It end)
Definition format.hpp:42
std::string apply_alignment(std::string inner) const
Definition format.hpp:91
std::string build_core(const Seq &seq) const
Definition format.hpp:116
constexpr auto parse(format_parse_context &ctx)
Definition format.hpp:170
auto format(std::byte b, std::format_context &ctx) const
Definition format.hpp:175
constexpr auto parse(format_parse_context &ctx)
Definition format.hpp:141
auto format(const std::variant< T... > &v, std::format_context &ctx) const
Definition format.hpp:153