SeqAn3 3.3.0
The Modern C++ library for sequence analysis.
Loading...
Searching...
No Matches
charconv
Go to the documentation of this file.
1// -*- C++ -*-
2// -----------------------------------------------------------------------------------------------------
3// Copyright (c) 2006-2023, Knut Reinert & Freie Universität Berlin
4// Copyright (c) 2016-2023, Knut Reinert & MPI für molekulare Genetik
5// This file may be used, modified and/or redistributed under the terms of the 3-clause BSD-License
6// shipped with this file and also available at: https://github.com/seqan/seqan3/blob/master/LICENSE.md
7// -----------------------------------------------------------------------------------------------------
8
9/*!\file
10 * \brief The [<charconv> header](https://en.cppreference.com/w/cpp/header/charconv) from C++17's standard library.
11 * \author Svenja Mehringer <svenja.mehringer AT fu-berlin.de>
12 */
13
14// File might be included from multiple libraries.
15#ifndef SEQAN_STD_CHARCONV_SHIM
16#define SEQAN_STD_CHARCONV_SHIM
17
18#include <charconv>
19#include <utility> // __cpp_lib_to_chars may be defined here as currently documented.
20#include <version> // From C++20 onwards, all feature macros should be defined here.
21
22/*!\defgroup std_charconv charconv
23 * \ingroup std
24 * \brief The [<charconv> header](https://en.cppreference.com/w/cpp/header/charconv) from C++17's standard library.
25 * \details
26 *
27 * The following table describes what implementation of std::to_chars and std::from_chars will be used
28 *
29 * | stdlib version | __cpp_lib_to_chars | chars_format | to_chars_result | from_chars_result | to_chars (int) | from_chars (int) | to_chars (float) | from_chars (float) |
30 * | -------------- | ------------------------------------------------ | -------------- | --------------- | ----------------- | -------------- | ---------------- | -------------------- | -------------------- |
31 * | gcc 11 | undefined (or 201611) and `<charconv>` header | stdlib | stdlib | stdlib | stdlib | stdlib | stdlib | stdlib |
32 *
33 * Note: gcc 11 implements float too, but does not define __cpp_lib_to_chars
34 */
35
36// =========================================================================
37// If float implementation is missing, add our own shim-implementation
38// =========================================================================
39
40#if __cpp_lib_to_chars < 201611
41# include <cassert>
42# include <concepts>
43# include <sstream>
44
45namespace seqan3::contrib::charconv_float
46{
47using ::std::chars_format;
48using ::std::from_chars_result;
49using ::std::to_chars_result;
50
51/*!\brief std::to_chars implementation for floating point via a std::stringstream for default base = 10.
52 * \ingroup std_charconv
53 */
54template <std::floating_point value_type>
55inline to_chars_result to_chars_floating_point(char * first, char * last, value_type value) noexcept
56{
57 assert(first != nullptr);
58 assert(last != nullptr);
59
60 std::ostringstream ss;
61 ss << value;
62 auto str = ss.str();
63
64 if (last - first < static_cast<std::ptrdiff_t>(str.size()))
65 return {last, std::errc::value_too_large};
66
67 std::copy(str.begin(), str.end(), first);
68
69 return {first + str.size(), std::errc{}};
70}
71
72/*!\brief Delegates to functions strto[d/f/ld] for floating point value extraction.
73 * \ingroup std_charconv
74 */
75template <std::floating_point value_type>
76inline from_chars_result from_chars_floating_point(char const * first,
77 char const * last,
78 value_type & value,
79 chars_format fmt = chars_format::general) noexcept
80{
81 // The locale issue:
82 // std::from_chars is documented to be locale independent. The accepted patterns
83 // are identical to the one used by strtod in the defailt ("C") locale.
84 //
85 // The functions strto[d/f/ld] used here are locale dependent but
86 // setting the locale manually by std::setlocale is not thread safe.
87 // So for the time being this workaround is locale dependent.
88 if (*first == '+') // + is permitted in function strto[d/f/ld] but not in from_chars
89 return {last, std::errc::invalid_argument};
90
91 float tmp{};
92 constexpr ptrdiff_t buffer_size = 100;
93 char buffer[buffer_size];
94
95 if (fmt != chars_format::general)
96 {
97 bool exponent_is_present{false};
98 for (auto it = first; it != last; ++it)
99 {
100 if (*it == 'e' || *it == 'E')
101 {
102 exponent_is_present = true;
103 break;
104 }
105 }
106
107 if (fmt == chars_format::scientific && !exponent_is_present)
108 return {last, std::errc::invalid_argument};
109
110 if (fmt == chars_format::fixed && exponent_is_present)
111 return {last, std::errc::invalid_argument};
112 }
113
114 // In contrast to std::from_chars, std::strto[f/d/ld] does not treat the second
115 // parameter (str_end) as "end of the sequence to parse" but merely as an out
116 // parameter to indicate where the parsing ended. Therefore, if [last] does
117 // not point to the end of a null-terminated string, a buffer is needed to
118 // represent the truncated sequence and ensure correct from_chars functionality.
119 char * start;
120
121 if ((*last != '\0') || fmt == chars_format::hex)
122 {
123 // If hex format is explicitly expected, the 0x prefix is not allowed in the
124 // the original sequence according to the std::from_chars cppreference
125 // documentation.
126 // In order to use strto[f/d/ld], the prefix must be prepended to achieve
127 // correct parsing. This will also automatically lead to an error if the
128 // original sequence did contain a 0x prefix and thus reflect the correct
129 // requirements of std::from_chars.
130 ptrdiff_t offset{0};
131 if (fmt == chars_format::hex)
132 {
133 buffer[0] = '0';
134 buffer[1] = 'x';
135 offset = 2;
136 }
137
138 std::copy(first, last, &buffer[offset]);
139 buffer[std::min<ptrdiff_t>(buffer_size - offset, last - first)] = '\0';
140
141 start = &buffer[0];
142 }
143 else
144 {
145 start = const_cast<char *>(first);
146 }
147
148 char * end;
149
150 if constexpr (std::same_as<std::remove_reference_t<value_type>, float>)
151 {
152 tmp = strtof(start, &end);
153 }
154 if constexpr (std::same_as<std::remove_reference_t<value_type>, double>)
155 {
156 tmp = strtod(start, &end);
157 }
158 if constexpr (std::same_as<std::remove_reference_t<value_type>, long double>)
159 {
160 tmp = strtold(start, &end);
161 }
162
163 last = first + (end - start);
164
165 if (errno == ERANGE)
166 {
167 return {last, std::errc::result_out_of_range};
168 }
169 else if (tmp == 0 && end == start)
170 {
171 return {last, std::errc::invalid_argument};
172 }
173
174 // Success.
175 value = tmp;
176 return {last, {}};
177}
178
179} // namespace seqan3::contrib::charconv_float
180
181namespace seqan3::contrib::charconv_float
182{
183// -----------------------------------------------------------------------------
184// to_chars for floating point types
185// -----------------------------------------------------------------------------
186
187/*!\brief std::to_chars overload for floating point via a std::stringstream for default base = 10.
188 * \ingroup std_charconv
189 */
190template <std::floating_point floating_point_type>
191inline to_chars_result to_chars(char * first, char * last, floating_point_type value) noexcept
192{
193 return to_chars_floating_point(first, last, value);
194}
195
196// -----------------------------------------------------------------------------
197// from_chars for floating point types
198// -----------------------------------------------------------------------------
199
200/*!\brief Parse a char sequence into an floating point value.
201 * \ingroup std_charconv
202 * \tparam floating_point_type The type to parse the string into; Must model std::floating_point.
203 * \param[in] first The start of the string to parse.
204 * \param[in] last The end of the string to parse.
205 * \param[in, out] value The value to store the parsed result in.
206 * \param[in] fmt The std::chars_format that alters the behaviour of parsing.
207 * \returns A std::from_char_result. See detail section return value for more information.
208 *
209 * \details
210 *
211 * Analyzes the character sequence [first,last) for a pattern described below.
212 * If no characters match the pattern or if the value obtained by parsing the
213 * matched characters is not representable in the type of value, value is
214 * unmodified, otherwise the characters matching the pattern are interpreted as
215 * a text representation of an arithmetic value, which is stored in value.
216 *
217 * Floating-point parsers: Expects the pattern identical to the one used by
218 * std::strtod in the default ("C") locale, except that:
219 *
220 * - the plus sign is not recognized outside of the exponent (only the minus
221 * sign is permitted at the beginning)
222 * - if fmt has std::chars_format::scientific set but not std::chars_format::fixed,
223 * the exponent part is required (otherwise it is optional)
224 * - if fmt has std::chars_format::fixed set but not std::chars_format::scientific,
225 * the optional exponent is not permitted
226 * - if fmt is std::chars_format::hex, the prefix "0x" or "0X" is not permitted
227 * (the string "0x123" parses as the value "0" with unparsed remainder "x123").
228 *
229 * \attention This implementation is a workaround until the function is supported
230 * by the compiler. It falls back to use the functions strto[d/f/ld]
231 * before checking the above limitations
232 *
233 * ### Return value
234 * This function is workaround until the function is supported
235 * by the compiler. It falls back to use the functions strto[d/f/ld] so the
236 * return value is NOT as documented here https://en.cppreference.com/w/cpp/utility/from_chars
237 * but:
238 *
239 * On success, std::from_chars_result::ec is value-initialized. On error,
240 * std::from_chars_result::ec is either an
241 * std::errc::invalid_argument if an illegal character or format has been
242 * encountered, or std::errc::out_of_range if parsing the value would cause an
243 * overflow. The std::from_chars_result::ptr value is always set to last.
244 *
245 * ### The locale issue
246 * std::from_chars is documented to be locale independent. The accepted patterns
247 * are identical to the one used by strtod in the defailt ("C") locale.
248 *
249 * The functions strto[d/f/ld] used here are locale dependent but
250 * setting the locale manually by std::setlocale is not thread safe.
251 * So for the time being this workaround is locale dependent.
252 *
253 * \sa https://en.cppreference.com/w/cpp/utility/from_chars
254 */
255template <std::floating_point floating_point_type>
256inline from_chars_result from_chars(char const * first,
257 char const * last,
258 floating_point_type & value,
259 chars_format fmt = chars_format::general) noexcept
260{
261 return from_chars_floating_point(first, last, value, fmt);
262}
263} // namespace seqan3::contrib::charconv_float
264
265namespace std
266{
267// gcc-11 also defines float versions, but they don't clash with ours, because they use explicit overloads for each
268// float type. That means the stdlib has a higher priority in overload resolution then our shim implementation.
269using ::seqan3::contrib::charconv_float::from_chars; // import our shim-float version
270using ::seqan3::contrib::charconv_float::to_chars; // import our shim-float version
271} // namespace std
272
273#endif // __cpp_lib_to_chars < 201611
274
275#endif // SEQAN_STD_CHARCONV_SHIM