Botan 3.9.0
Crypto and TLS for C&
parsing.cpp
Go to the documentation of this file.
1/*
2* Various string utils and parsing functions
3* (C) 1999-2007,2013,2014,2015,2018 Jack Lloyd
4* (C) 2015 Simon Warta (Kullo GmbH)
5* (C) 2017 René Korthaus, Rohde & Schwarz Cybersecurity
6*
7* Botan is released under the Simplified BSD License (see license.txt)
8*/
9
10#include <botan/internal/parsing.h>
11
12#include <botan/exceptn.h>
13#include <botan/internal/fmt.h>
14#include <botan/internal/loadstor.h>
15#include <algorithm>
16#include <cctype>
17#include <limits>
18#include <sstream>
19
20namespace Botan {
21
22uint16_t to_uint16(std::string_view str) {
23 const uint32_t x = to_u32bit(str);
24
25 if(x != static_cast<uint16_t>(x)) {
26 throw Invalid_Argument("Integer value exceeds 16 bit range");
27 }
28
29 return static_cast<uint16_t>(x);
30}
31
32uint32_t to_u32bit(std::string_view str_view) {
33 const std::string str(str_view);
34
35 // std::stoul is not strict enough. Ensure that str is digit only [0-9]*
36 for(const char chr : str) {
37 if(chr < '0' || chr > '9') {
38 throw Invalid_Argument("to_u32bit invalid decimal string '" + str + "'");
39 }
40 }
41
42 const unsigned long int x = std::stoul(str);
43
44 if constexpr(sizeof(unsigned long int) > 4) {
45 // x might be uint64
46 if(x > std::numeric_limits<uint32_t>::max()) {
47 throw Invalid_Argument("Integer value of " + str + " exceeds 32 bit range");
48 }
49 }
50
51 return static_cast<uint32_t>(x);
52}
53
54/*
55* Parse a SCAN-style algorithm name
56*/
57std::vector<std::string> parse_algorithm_name(std::string_view namex) {
58 if(namex.find('(') == std::string::npos && namex.find(')') == std::string::npos) {
59 return {std::string(namex)};
60 }
61
62 std::string name(namex);
63 std::string substring;
64 std::vector<std::string> elems;
65 size_t level = 0;
66
67 elems.push_back(name.substr(0, name.find('(')));
68 name = name.substr(name.find('('));
69
70 for(auto i = name.begin(); i != name.end(); ++i) {
71 char c = *i;
72
73 if(c == '(') {
74 ++level;
75 }
76 if(c == ')') {
77 if(level == 1 && i == name.end() - 1) {
78 if(elems.size() == 1) {
79 elems.push_back(substring.substr(1));
80 } else {
81 elems.push_back(substring);
82 }
83 return elems;
84 }
85
86 if(level == 0 || (level == 1 && i != name.end() - 1)) {
87 throw Invalid_Algorithm_Name(namex);
88 }
89 --level;
90 }
91
92 if(c == ',' && level == 1) {
93 if(elems.size() == 1) {
94 elems.push_back(substring.substr(1));
95 } else {
96 elems.push_back(substring);
97 }
98 substring.clear();
99 } else {
100 substring += c;
101 }
102 }
103
104 if(!substring.empty()) {
105 throw Invalid_Algorithm_Name(namex);
106 }
107
108 return elems;
109}
110
111std::vector<std::string> split_on(std::string_view str, char delim) {
112 std::vector<std::string> elems;
113 if(str.empty()) {
114 return elems;
115 }
116
117 std::string substr;
118 for(char c : str) {
119 if(c == delim) {
120 if(!substr.empty()) {
121 elems.push_back(substr);
122 }
123 substr.clear();
124 } else {
125 substr += c;
126 }
127 }
128
129 if(substr.empty()) {
130 throw Invalid_Argument(fmt("Unable to split string '{}", str));
131 }
132 elems.push_back(substr);
133
134 return elems;
135}
136
137/*
138* Join a string
139*/
140std::string string_join(const std::vector<std::string>& strs, char delim) {
141 std::ostringstream out;
142
143 for(size_t i = 0; i != strs.size(); ++i) {
144 if(i != 0) {
145 out << delim;
146 }
147 out << strs[i];
148 }
149
150 return out.str();
151}
152
153/*
154* Convert a decimal-dotted string to binary IP
155*/
156std::optional<uint32_t> string_to_ipv4(std::string_view str) {
157 // At least 3 dots + 4 1-digit integers
158 // At most 3 dots + 4 3-digit integers
159 if(str.size() < 3 + 4 * 1 || str.size() > 3 + 4 * 3) {
160 return {};
161 }
162
163 // the final result
164 uint32_t ip = 0;
165 // the number of '.' seen so far
166 size_t dots = 0;
167 // accumulates one quad (range 0-255)
168 uint32_t accum = 0;
169 // # of digits pushed to accum since last dot
170 size_t cur_digits = 0;
171
172 for(char c : str) {
173 if(c == '.') {
174 // . without preceding digit is invalid
175 if(cur_digits == 0) {
176 return {};
177 }
178 dots += 1;
179 // too many dots
180 if(dots > 3) {
181 return {};
182 }
183
184 cur_digits = 0;
185 ip = (ip << 8) | accum;
186 accum = 0;
187 } else if(c >= '0' && c <= '9') {
188 const auto d = static_cast<uint8_t>(c - '0');
189
190 // prohibit leading zero in quad (used for octal)
191 if(cur_digits > 0 && accum == 0) {
192 return {};
193 }
194 accum = (accum * 10) + d;
195
196 if(accum > 255) {
197 return {};
198 }
199
200 cur_digits++;
201 BOTAN_ASSERT_NOMSG(cur_digits <= 3);
202 } else {
203 return {};
204 }
205 }
206
207 // no trailing digits?
208 if(cur_digits == 0) {
209 return {};
210 }
211
212 // insufficient # of dots
213 if(dots != 3) {
214 return {};
215 }
216
217 ip = (ip << 8) | accum;
218
219 return ip;
220}
221
222/*
223* Convert an IP address to decimal-dotted string
224*/
225std::string ipv4_to_string(uint32_t ip) {
226 uint8_t bits[4];
227 store_be(ip, bits);
228
229 std::string str;
230
231 for(size_t i = 0; i != 4; ++i) {
232 if(i > 0) {
233 str += ".";
234 }
235 str += std::to_string(bits[i]);
236 }
237
238 return str;
239}
240
241std::string tolower_string(std::string_view in) {
242 std::string s(in);
243 for(char& c : s) {
244 const int cu = static_cast<unsigned char>(c);
245 if(std::isalpha(cu) != 0) {
246 c = static_cast<char>(std::tolower(cu));
247 }
248 }
249 return s;
250}
251
252bool host_wildcard_match(std::string_view issued_, std::string_view host_) {
253 const std::string issued = tolower_string(issued_);
254 const std::string host = tolower_string(host_);
255
256 if(host.empty() || issued.empty()) {
257 return false;
258 }
259
260 /*
261 If there are embedded nulls in your issued name
262 Well I feel bad for you son
263 */
264 if(std::count(issued.begin(), issued.end(), char(0)) > 0) {
265 return false;
266 }
267
268 // If more than one wildcard, then issued name is invalid
269 const size_t stars = std::count(issued.begin(), issued.end(), '*');
270 if(stars > 1) {
271 return false;
272 }
273
274 // '*' is not a valid character in DNS names so should not appear on the host side
275 if(std::count(host.begin(), host.end(), '*') != 0) {
276 return false;
277 }
278
279 // Similarly a DNS name can't end in .
280 if(host[host.size() - 1] == '.') {
281 return false;
282 }
283
284 // And a host can't have an empty name component, so reject that
285 if(host.find("..") != std::string::npos) {
286 return false;
287 }
288
289 // Exact match: accept
290 if(issued == host) {
291 return true;
292 }
293
294 /*
295 Otherwise it might be a wildcard
296
297 If the issued size is strictly longer than the hostname size it
298 couldn't possibly be a match, even if the issued value is a
299 wildcard. The only exception is when the wildcard ends up empty
300 (eg www.example.com matches www*.example.com)
301 */
302 if(issued.size() > host.size() + 1) {
303 return false;
304 }
305
306 // If no * at all then not a wildcard, and so not a match
307 if(stars != 1) {
308 return false;
309 }
310
311 /*
312 Now walk through the issued string, making sure every character
313 matches. When we come to the (singular) '*', jump forward in the
314 hostname by the corresponding amount. We know exactly how much
315 space the wildcard takes because it must be exactly `len(host) -
316 len(issued) + 1 chars`.
317
318 We also verify that the '*' comes in the leftmost component, and
319 doesn't skip over any '.' in the hostname.
320 */
321 size_t dots_seen = 0;
322 size_t host_idx = 0;
323
324 for(size_t i = 0; i != issued.size(); ++i) {
325 if(issued[i] == '.') {
326 dots_seen += 1;
327 }
328
329 if(issued[i] == '*') {
330 // Fail: wildcard can only come in leftmost component
331 if(dots_seen > 0) {
332 return false;
333 }
334
335 /*
336 Since there is only one * we know the tail of the issued and
337 hostname must be an exact match. In this case advance host_idx
338 to match.
339 */
340 const size_t advance = (host.size() - issued.size() + 1);
341
342 if(host_idx + advance > host.size()) { // shouldn't happen
343 return false;
344 }
345
346 // Can't be any intervening .s that we would have skipped
347 if(std::count(host.begin() + host_idx, host.begin() + host_idx + advance, '.') != 0) {
348 return false;
349 }
350
351 host_idx += advance;
352 } else {
353 if(issued[i] != host[host_idx]) {
354 return false;
355 }
356
357 host_idx += 1;
358 }
359 }
360
361 // Wildcard issued name must have at least 3 components
362 if(dots_seen < 2) {
363 return false;
364 }
365
366 return true;
367}
368
369std::string check_and_canonicalize_dns_name(std::string_view name) {
370 if(name.size() > 255) {
371 throw Decoding_Error("DNS name exceeds maximum allowed length");
372 }
373
374 if(name.empty()) {
375 throw Decoding_Error("DNS name cannot be empty");
376 }
377
378 if(name.starts_with(".") || name.ends_with(".")) {
379 throw Decoding_Error("DNS name cannot start or end with a dot");
380 }
381
382 /*
383 * Table mapping uppercase to lowercase and only including values for valid DNS names
384 * namely A-Z, a-z, 0-9, hyphen, and dot, plus '*' for wildcarding. (RFC 1035)
385 */
386 // clang-format off
387 constexpr uint8_t DNS_CHAR_MAPPING[128] = {
388 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
389 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
390 '\0', '\0', '\0', '\0', '*', '\0', '\0', '-', '.', '\0', '0', '1', '2', '3', '4', '5', '6', '7', '8',
391 '9', '\0', '\0', '\0', '\0', '\0', '\0', '\0', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
392 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\0', '\0', '\0', '\0',
393 '\0', '\0', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
394 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\0', '\0', '\0', '\0', '\0',
395 };
396 // clang-format on
397
398 std::string canon;
399 canon.reserve(name.size());
400
401 // RFC 1035: DNS labels must not exceed 63 characters
402 size_t current_label_length = 0;
403
404 for(size_t i = 0; i != name.size(); ++i) {
405 char c = name[i];
406
407 if(c == '.') {
408 if(i > 0 && name[i - 1] == '.') {
409 throw Decoding_Error("DNS name contains sequential period chars");
410 }
411
412 if(current_label_length == 0) {
413 throw Decoding_Error("DNS name contains empty label");
414 }
415 current_label_length = 0; // Reset for next label
416 } else {
417 current_label_length++;
418
419 if(current_label_length > 63) { // RFC 1035 Maximum DNS label length
420 throw Decoding_Error("DNS name label exceeds maximum length of 63 characters");
421 }
422 }
423
424 const uint8_t cu = static_cast<uint8_t>(c);
425 if(cu >= 128) {
426 throw Decoding_Error("DNS name must not contain any extended ASCII code points");
427 }
428 const uint8_t mapped = DNS_CHAR_MAPPING[cu];
429 if(mapped == 0) {
430 throw Decoding_Error("DNS name includes invalid character");
431 }
432
433 if(mapped == '-') {
434 if(i == 0 || (i > 0 && name[i - 1] == '.')) {
435 throw Decoding_Error("DNS name has label with leading hyphen");
436 } else if(i == name.size() - 1 || (i < name.size() - 1 && name[i + 1] == '.')) {
437 throw Decoding_Error("DNS name has label with trailing hyphen");
438 }
439 }
440 canon.push_back(static_cast<char>(mapped));
441 }
442
443 if(current_label_length == 0) {
444 throw Decoding_Error("DNS name contains empty label");
445 }
446 return canon;
447}
448
449} // namespace Botan
#define BOTAN_ASSERT_NOMSG(expr)
Definition assert.h:75
uint32_t to_u32bit(std::string_view str_view)
Definition parsing.cpp:32
uint16_t to_uint16(std::string_view str)
Definition parsing.cpp:22
std::string fmt(std::string_view format, const T &... args)
Definition fmt.h:53
std::vector< std::string > split_on(std::string_view str, char delim)
Definition parsing.cpp:111
std::string tolower_string(std::string_view in)
Definition parsing.cpp:241
std::string check_and_canonicalize_dns_name(std::string_view name)
Definition parsing.cpp:369
std::string string_join(const std::vector< std::string > &strs, char delim)
Definition parsing.cpp:140
bool host_wildcard_match(std::string_view issued_, std::string_view host_)
Definition parsing.cpp:252
std::vector< std::string > parse_algorithm_name(std::string_view namex)
Definition parsing.cpp:57
std::optional< uint32_t > string_to_ipv4(std::string_view str)
Definition parsing.cpp:156
std::string ipv4_to_string(uint32_t ip)
Definition parsing.cpp:225
constexpr auto store_be(ParamTs &&... params)
Definition loadstor.h:745