Botan 3.12.0
Crypto and TLS for C&
parsing.cpp
Go to the documentation of this file.
1/*
2* Various string utils and parsing functions
3* (C) 1999-2007,2013,2014,2015,2018 Jack Lloyd
4* (C) 2015 Simon Warta (Kullo GmbH)
5* (C) 2017 René Korthaus, Rohde & Schwarz Cybersecurity
6*
7* Botan is released under the Simplified BSD License (see license.txt)
8*/
9
10#include <botan/internal/parsing.h>
11
12#include <botan/exceptn.h>
13#include <botan/internal/fmt.h>
14#include <botan/internal/loadstor.h>
15#include <algorithm>
16#include <limits>
17#include <sstream>
18
19namespace Botan {
20
21uint16_t to_uint16(std::string_view str) {
22 const uint32_t x = to_u32bit(str);
23
24 if(x != static_cast<uint16_t>(x)) {
25 throw Invalid_Argument("Integer value exceeds 16 bit range");
26 }
27
28 return static_cast<uint16_t>(x);
29}
30
31uint32_t to_u32bit(std::string_view str_view) {
32 const std::string str(str_view);
33
34 // std::stoul is not strict enough. Ensure that str is digit only [0-9]*
35 for(const char chr : str) {
36 if(chr < '0' || chr > '9') {
37 throw Invalid_Argument("to_u32bit invalid decimal string '" + str + "'");
38 }
39 }
40
41 const unsigned long int x = std::stoul(str);
42
43 if constexpr(sizeof(unsigned long int) > 4) {
44 // x might be uint64
45 if(x > std::numeric_limits<uint32_t>::max()) {
46 throw Invalid_Argument("Integer value of " + str + " exceeds 32 bit range");
47 }
48 }
49
50 return static_cast<uint32_t>(x);
51}
52
53/*
54* Parse a SCAN-style algorithm name
55*/
56std::vector<std::string> parse_algorithm_name(std::string_view scan_name) {
57 if(scan_name.find('(') == std::string::npos && scan_name.find(')') == std::string::npos) {
58 return {std::string(scan_name)};
59 }
60
61 std::string name(scan_name);
62 std::string substring;
63 std::vector<std::string> elems;
64 size_t level = 0;
65
66 elems.push_back(name.substr(0, name.find('(')));
67 name = name.substr(name.find('('));
68
69 for(auto i = name.begin(); i != name.end(); ++i) {
70 const char c = *i;
71
72 if(c == '(') {
73 ++level;
74 }
75 if(c == ')') {
76 if(level == 1 && i == name.end() - 1) {
77 if(elems.size() == 1) {
78 elems.push_back(substring.substr(1));
79 } else {
80 elems.push_back(substring);
81 }
82 return elems;
83 }
84
85 if(level == 0 || (level == 1 && i != name.end() - 1)) {
86 throw Invalid_Algorithm_Name(scan_name);
87 }
88 --level;
89 }
90
91 if(c == ',' && level == 1) {
92 if(elems.size() == 1) {
93 elems.push_back(substring.substr(1));
94 } else {
95 elems.push_back(substring);
96 }
97 substring.clear();
98 } else {
99 substring += c;
100 }
101 }
102
103 if(!substring.empty()) {
104 throw Invalid_Algorithm_Name(scan_name);
105 }
106
107 return elems;
108}
109
110std::vector<std::string> split_on(std::string_view str, char delim) {
111 std::vector<std::string> elems;
112 if(str.empty()) {
113 return elems;
114 }
115
116 std::string substr;
117 for(const char c : str) {
118 if(c == delim) {
119 if(!substr.empty()) {
120 elems.push_back(substr);
121 }
122 substr.clear();
123 } else {
124 substr += c;
125 }
126 }
127
128 if(substr.empty()) {
129 throw Invalid_Argument(fmt("Unable to split string '{}", str));
130 }
131 elems.push_back(substr);
132
133 return elems;
134}
135
136/*
137* Join a string
138*/
139std::string string_join(const std::vector<std::string>& strs, char delim) {
140 std::ostringstream out;
141
142 for(size_t i = 0; i != strs.size(); ++i) {
143 if(i != 0) {
144 out << delim;
145 }
146 out << strs[i];
147 }
148
149 return out.str();
150}
151
152/*
153* Convert a decimal-dotted string to binary IP
154*/
155std::optional<uint32_t> string_to_ipv4(std::string_view str) {
156 // At least 3 dots + 4 1-digit integers
157 // At most 3 dots + 4 3-digit integers
158 if(str.size() < 3 + 4 * 1 || str.size() > 3 + 4 * 3) {
159 return {};
160 }
161
162 // the final result
163 uint32_t ip = 0;
164 // the number of '.' seen so far
165 size_t dots = 0;
166 // accumulates one quad (range 0-255)
167 uint32_t accum = 0;
168 // # of digits pushed to accum since last dot
169 size_t cur_digits = 0;
170
171 for(const char c : str) {
172 if(c == '.') {
173 // . without preceding digit is invalid
174 if(cur_digits == 0) {
175 return {};
176 }
177 dots += 1;
178 // too many dots
179 if(dots > 3) {
180 return {};
181 }
182
183 cur_digits = 0;
184 ip = (ip << 8) | accum;
185 accum = 0;
186 } else if(c >= '0' && c <= '9') {
187 const auto d = static_cast<uint8_t>(c - '0');
188
189 // prohibit leading zero in quad (used for octal)
190 if(cur_digits > 0 && accum == 0) {
191 return {};
192 }
193 accum = (accum * 10) + d;
194
195 if(accum > 255) {
196 return {};
197 }
198
199 cur_digits++;
200 BOTAN_ASSERT_NOMSG(cur_digits <= 3);
201 } else {
202 return {};
203 }
204 }
205
206 // no trailing digits?
207 if(cur_digits == 0) {
208 return {};
209 }
210
211 // insufficient # of dots
212 if(dots != 3) {
213 return {};
214 }
215
216 ip = (ip << 8) | accum;
217
218 return ip;
219}
220
221std::optional<std::array<uint8_t, 16>> string_to_ipv6(std::string_view str) {
222 if(str.empty()) {
223 return {};
224 }
225
226 // Parsed hex groups, split by whether they appeared before or after a "::".
227 // If no "::" appears, only `pre` is populated and must reach exactly 8 groups.
228 std::array<uint16_t, 8> pre{};
229 std::array<uint16_t, 8> post{};
230 size_t pre_count = 0;
231 size_t post_count = 0;
232 bool seen_double_colon = false;
233
234 auto hex_value = [](char c) -> std::optional<uint8_t> {
235 if(c >= '0' && c <= '9') {
236 return c - '0';
237 } else if(c >= 'a' && c <= 'f') {
238 return 10 + (c - 'a');
239 } else if(c >= 'A' && c <= 'F') {
240 return 10 + (c - 'A');
241 } else {
242 return {};
243 }
244 };
245
246 size_t idx = 0;
247 bool expect_group = true; // set after any separator, cleared after a group
248
249 while(idx < str.size()) {
250 if(str[idx] == ':') {
251 if(idx + 1 < str.size() && str[idx + 1] == ':') {
252 if(seen_double_colon) {
253 return {}; // at most one "::"
254 }
255 seen_double_colon = true;
256 idx += 2;
257 expect_group = (idx < str.size());
258 continue;
259 }
260 // single ':' separator between groups — only valid after a group
261 if(expect_group) {
262 return {};
263 }
264 expect_group = true;
265 idx += 1;
266 continue;
267 }
268
269 // Parse a hex group of 1..4 digits
270 uint32_t group = 0;
271 size_t hex_chars = 0;
272 while(idx < str.size() && hex_chars < 4) {
273 const auto digit = hex_value(str[idx]);
274 if(digit.has_value() == false) {
275 break;
276 }
277 group = (group << 4) | static_cast<uint32_t>(digit.value());
278 idx += 1;
279 hex_chars += 1;
280 }
281 if(hex_chars == 0) {
282 return {};
283 }
284 // If a 5th hex digit follows, the group is oversized.
285 if(hex_chars == 4 && idx < str.size() && hex_value(str[idx]).has_value()) {
286 return {};
287 }
288
289 if(seen_double_colon) {
290 if(post_count >= 8) {
291 return {};
292 }
293 post[post_count++] = static_cast<uint16_t>(group);
294 } else {
295 if(pre_count >= 8) {
296 return {};
297 }
298 pre[pre_count++] = static_cast<uint16_t>(group);
299 }
300 expect_group = false;
301 }
302
303 // Trailing single ':' is invalid
304 if(expect_group) {
305 return {};
306 }
307
308 const size_t total_groups = pre_count + post_count;
309 if(seen_double_colon) {
310 // "::" has to cover at least one zero group
311 if(total_groups > 7) {
312 return {};
313 }
314 } else {
315 if(total_groups != 8) {
316 return {};
317 }
318 }
319
320 std::array<uint8_t, 16> out{};
321 for(size_t i = 0; i != pre_count; ++i) {
322 out[2 * i] = get_byte<0>(pre[i]);
323 out[2 * i + 1] = get_byte<1>(pre[i]);
324 }
325 const size_t gap = 8 - total_groups;
326 for(size_t i = 0; i != post_count; ++i) {
327 const size_t target = pre_count + gap + i;
328 out[2 * target] = get_byte<0>(post[i]);
329 out[2 * target + 1] = get_byte<1>(post[i]);
330 }
331 return out;
332}
333
334std::string ipv6_to_string(std::span<const uint8_t, 16> a) {
335 static const char* hex = "0123456789abcdef";
336
337 std::string out;
338 out.reserve(39);
339
340 for(size_t i = 0; i != 16; i += 2) {
341 if(i != 0) {
342 out.push_back(':');
343 }
344 const uint16_t group = make_uint16(a[i], a[i + 1]);
345 bool started = false;
346 // Write each nibble omitting leading 0s
347 for(int s = 12; s >= 0; s -= 4) {
348 const auto nibble = (group >> s) & 0xF;
349 if(nibble != 0 || started || s == 0) {
350 out.push_back(hex[nibble]);
351 started = true;
352 }
353 }
354 }
355 return out;
356}
357
358/*
359* Convert an IP address to decimal-dotted string
360*/
361std::string ipv4_to_string(uint32_t ip) {
362 uint8_t bits[4];
363 store_be(ip, bits);
364
365 std::string str;
366
367 for(size_t i = 0; i != 4; ++i) {
368 if(i > 0) {
369 str += ".";
370 }
371 str += std::to_string(bits[i]);
372 }
373
374 return str;
375}
376
377std::string tolower_string(std::string_view str) {
378 // Locale-independent ASCII fold; the only callers (DNS name canonicalization
379 // for SAN/name-constraints) work on ASCII strings per RFC 1035.
380 std::string lower(str);
381 for(char& c : lower) {
382 if(c >= 'A' && c <= 'Z') {
383 c = static_cast<char>(c + ('a' - 'A'));
384 }
385 }
386 return lower;
387}
388
389bool host_wildcard_match(std::string_view issued, std::string_view host) {
390 if(host.empty() || issued.empty()) {
391 return false;
392 }
393
394 // Maximum valid DNS name
395 if(host.size() > 253) {
396 return false;
397 }
398
399 /*
400 The wildcard if existing absorbs (host.size() - issued.size() + 1) chars,
401 which must be non-negative. So issued cannot possibly exceed host.size() + 1.
402 */
403 if(issued.size() > host.size() + 1) {
404 return false;
405 }
406
407 /*
408 If there are embedded nulls in your issued name
409 Well I feel bad for you son
410 */
411 if(issued.find('\0') != std::string_view::npos) {
412 return false;
413 }
414
415 // '*' is not a valid character in DNS names so should not appear on the host side
416 if(host.find('*') != std::string_view::npos) {
417 return false;
418 }
419
420 // Similarly a DNS name can't end in .
421 if(host.back() == '.') {
422 return false;
423 }
424
425 // And a host can't have an empty name component, so reject that
426 if(host.find("..") != std::string_view::npos) {
427 return false;
428 }
429
430 // ASCII-only case-insensitive char equality, avoids locale overhead from tolower
431 auto dns_char_eq = [](char a, char b) -> bool {
432 if(a == b) {
433 return true;
434 }
435 const auto la = static_cast<unsigned char>(a | 0x20);
436 const auto lb = static_cast<unsigned char>(b | 0x20);
437 return la == lb && la >= 'a' && la <= 'z';
438 };
439
440 auto dns_char_eq_range = [&](std::string_view a, std::string_view b) -> bool {
441 if(a.size() != b.size()) {
442 return false;
443 }
444 for(size_t i = 0; i != a.size(); ++i) {
445 if(!dns_char_eq(a[i], b[i])) {
446 return false;
447 }
448 }
449 return true;
450 };
451
452 // Exact match: accept
453 if(dns_char_eq_range(issued, host)) {
454 return true;
455 }
456
457 // First detect offset of wildcard '*' if included
458 const size_t first_star = issued.find('*');
459 const bool has_wildcard = (first_star != std::string_view::npos);
460
461 // At most one wildcard is allowed
462 if(has_wildcard && issued.find('*', first_star + 1) != std::string_view::npos) {
463 return false;
464 }
465
466 // If no * at all then not a wildcard, and so not a match
467 if(!has_wildcard) {
468 return false;
469 }
470
471 /*
472 Now walk through the issued string, making sure every character
473 matches. When we come to the (singular) '*', jump forward in the
474 hostname by the corresponding amount. We know exactly how much
475 space the wildcard takes because it must be exactly `len(host) -
476 len(issued) + 1 chars`.
477
478 We also verify that the '*' comes in the leftmost component, and
479 doesn't skip over any '.' in the hostname.
480 */
481 size_t dots_seen = 0;
482 size_t host_idx = 0;
483
484 for(size_t i = 0; i != issued.size(); ++i) {
485 if(issued[i] == '.') {
486 dots_seen += 1;
487 }
488
489 if(issued[i] == '*') {
490 // Fail: wildcard can only come in leftmost component
491 if(dots_seen > 0) {
492 return false;
493 }
494
495 /*
496 Since there is only one * we know the tail of the issued and
497 hostname must be an exact match. In this case advance host_idx
498 to match.
499 */
500 const size_t advance = (host.size() - issued.size() + 1);
501
502 if(host_idx + advance > host.size()) { // shouldn't happen
503 return false;
504 }
505
506 // Can't be any intervening .s that we would have skipped
507 for(size_t k = host_idx; k != host_idx + advance; ++k) {
508 if(host[k] == '.') {
509 return false;
510 }
511 }
512
513 host_idx += advance;
514 } else {
515 if(!dns_char_eq(issued[i], host[host_idx])) {
516 return false;
517 }
518
519 host_idx += 1;
520 }
521 }
522
523 // Wildcard issued name must have at least 3 components
524 if(dots_seen < 2) {
525 return false;
526 }
527
528 return true;
529}
530
531std::string check_and_canonicalize_dns_name(std::string_view name) {
532 if(name.size() > 255) {
533 throw Decoding_Error("DNS name exceeds maximum allowed length");
534 }
535
536 if(name.empty()) {
537 throw Decoding_Error("DNS name cannot be empty");
538 }
539
540 if(name.starts_with(".") || name.ends_with(".")) {
541 throw Decoding_Error("DNS name cannot start or end with a dot");
542 }
543
544 /*
545 * Table mapping uppercase to lowercase and only including values for valid DNS names
546 * namely A-Z, a-z, 0-9, hyphen, and dot, plus '*' for wildcarding. (RFC 1035)
547 */
548 // clang-format off
549 constexpr uint8_t DNS_CHAR_MAPPING[128] = {
550 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
551 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
552 '\0', '\0', '\0', '\0', '*', '\0', '\0', '-', '.', '\0', '0', '1', '2', '3', '4', '5', '6', '7', '8',
553 '9', '\0', '\0', '\0', '\0', '\0', '\0', '\0', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
554 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\0', '\0', '\0', '\0',
555 '\0', '\0', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
556 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\0', '\0', '\0', '\0', '\0',
557 };
558 // clang-format on
559
560 std::string canon;
561 canon.reserve(name.size());
562
563 // RFC 1035: DNS labels must not exceed 63 characters
564 size_t current_label_length = 0;
565
566 for(size_t i = 0; i != name.size(); ++i) {
567 const char c = name[i];
568
569 if(c == '.') {
570 if(i > 0 && name[i - 1] == '.') {
571 throw Decoding_Error("DNS name contains sequential period chars");
572 }
573
574 if(current_label_length == 0) {
575 throw Decoding_Error("DNS name contains empty label");
576 }
577 current_label_length = 0; // Reset for next label
578 } else {
579 current_label_length++;
580
581 if(current_label_length > 63) { // RFC 1035 Maximum DNS label length
582 throw Decoding_Error("DNS name label exceeds maximum length of 63 characters");
583 }
584 }
585
586 const uint8_t cu = static_cast<uint8_t>(c);
587 if(cu >= 128) {
588 throw Decoding_Error("DNS name must not contain any extended ASCII code points");
589 }
590 const uint8_t mapped = DNS_CHAR_MAPPING[cu];
591 if(mapped == 0) {
592 throw Decoding_Error("DNS name includes invalid character");
593 }
594
595 if(mapped == '-') {
596 if(i == 0 || (i > 0 && name[i - 1] == '.')) {
597 throw Decoding_Error("DNS name has label with leading hyphen");
598 } else if(i == name.size() - 1 || (i < name.size() - 1 && name[i + 1] == '.')) {
599 throw Decoding_Error("DNS name has label with trailing hyphen");
600 }
601 }
602 canon.push_back(static_cast<char>(mapped));
603 }
604
605 if(current_label_length == 0) {
606 throw Decoding_Error("DNS name contains empty label");
607 }
608 return canon;
609}
610
611} // namespace Botan
#define BOTAN_ASSERT_NOMSG(expr)
Definition assert.h:75
uint32_t to_u32bit(std::string_view str_view)
Definition parsing.cpp:31
constexpr uint8_t get_byte(T input)
Definition loadstor.h:79
uint16_t to_uint16(std::string_view str)
Definition parsing.cpp:21
std::string fmt(std::string_view format, const T &... args)
Definition fmt.h:53
std::string tolower_string(std::string_view str)
Definition parsing.cpp:377
std::vector< std::string > split_on(std::string_view str, char delim)
Definition parsing.cpp:110
std::vector< std::string > parse_algorithm_name(std::string_view scan_name)
Definition parsing.cpp:56
std::string check_and_canonicalize_dns_name(std::string_view name)
Definition parsing.cpp:531
std::string string_join(const std::vector< std::string > &strs, char delim)
Definition parsing.cpp:139
std::optional< std::array< uint8_t, 16 > > string_to_ipv6(std::string_view str)
Definition parsing.cpp:221
std::optional< uint32_t > string_to_ipv4(std::string_view str)
Definition parsing.cpp:155
std::string ipv4_to_string(uint32_t ip)
Definition parsing.cpp:361
bool host_wildcard_match(std::string_view issued, std::string_view host)
Definition parsing.cpp:389
constexpr auto store_be(ParamTs &&... params)
Definition loadstor.h:745
std::string ipv6_to_string(std::span< const uint8_t, 16 > a)
Definition parsing.cpp:334
constexpr uint16_t make_uint16(uint8_t i0, uint8_t i1)
Definition loadstor.h:92