Botan 3.7.1
Crypto and TLS for C&
parsing.cpp
Go to the documentation of this file.
1/*
2* Various string utils and parsing functions
3* (C) 1999-2007,2013,2014,2015,2018 Jack Lloyd
4* (C) 2015 Simon Warta (Kullo GmbH)
5* (C) 2017 René Korthaus, Rohde & Schwarz Cybersecurity
6*
7* Botan is released under the Simplified BSD License (see license.txt)
8*/
9
10#include <botan/internal/parsing.h>
11
12#include <botan/exceptn.h>
13#include <botan/internal/fmt.h>
14#include <botan/internal/loadstor.h>
15#include <algorithm>
16#include <cctype>
17#include <limits>
18#include <sstream>
19
20namespace Botan {
21
22uint16_t to_uint16(std::string_view str) {
23 const uint32_t x = to_u32bit(str);
24
25 if(x >> 16) {
26 throw Invalid_Argument("Integer value exceeds 16 bit range");
27 }
28
29 return static_cast<uint16_t>(x);
30}
31
32uint32_t to_u32bit(std::string_view str_view) {
33 const std::string str(str_view);
34
35 // std::stoul is not strict enough. Ensure that str is digit only [0-9]*
36 for(const char chr : str) {
37 if(chr < '0' || chr > '9') {
38 throw Invalid_Argument("to_u32bit invalid decimal string '" + str + "'");
39 }
40 }
41
42 const unsigned long int x = std::stoul(str);
43
44 if constexpr(sizeof(unsigned long int) > 4) {
45 // x might be uint64
46 if(x > std::numeric_limits<uint32_t>::max()) {
47 throw Invalid_Argument("Integer value of " + str + " exceeds 32 bit range");
48 }
49 }
50
51 return static_cast<uint32_t>(x);
52}
53
54/*
55* Parse a SCAN-style algorithm name
56*/
57std::vector<std::string> parse_algorithm_name(std::string_view namex) {
58 if(namex.find('(') == std::string::npos && namex.find(')') == std::string::npos) {
59 return {std::string(namex)};
60 }
61
62 std::string name(namex);
63 std::string substring;
64 std::vector<std::string> elems;
65 size_t level = 0;
66
67 elems.push_back(name.substr(0, name.find('(')));
68 name = name.substr(name.find('('));
69
70 for(auto i = name.begin(); i != name.end(); ++i) {
71 char c = *i;
72
73 if(c == '(') {
74 ++level;
75 }
76 if(c == ')') {
77 if(level == 1 && i == name.end() - 1) {
78 if(elems.size() == 1) {
79 elems.push_back(substring.substr(1));
80 } else {
81 elems.push_back(substring);
82 }
83 return elems;
84 }
85
86 if(level == 0 || (level == 1 && i != name.end() - 1)) {
87 throw Invalid_Algorithm_Name(namex);
88 }
89 --level;
90 }
91
92 if(c == ',' && level == 1) {
93 if(elems.size() == 1) {
94 elems.push_back(substring.substr(1));
95 } else {
96 elems.push_back(substring);
97 }
98 substring.clear();
99 } else {
100 substring += c;
101 }
102 }
103
104 if(!substring.empty()) {
105 throw Invalid_Algorithm_Name(namex);
106 }
107
108 return elems;
109}
110
111std::vector<std::string> split_on(std::string_view str, char delim) {
112 std::vector<std::string> elems;
113 if(str.empty()) {
114 return elems;
115 }
116
117 std::string substr;
118 for(auto i = str.begin(); i != str.end(); ++i) {
119 if(*i == delim) {
120 if(!substr.empty()) {
121 elems.push_back(substr);
122 }
123 substr.clear();
124 } else {
125 substr += *i;
126 }
127 }
128
129 if(substr.empty()) {
130 throw Invalid_Argument(fmt("Unable to split string '{}", str));
131 }
132 elems.push_back(substr);
133
134 return elems;
135}
136
137/*
138* Join a string
139*/
140std::string string_join(const std::vector<std::string>& strs, char delim) {
141 std::ostringstream out;
142
143 for(size_t i = 0; i != strs.size(); ++i) {
144 if(i != 0) {
145 out << delim;
146 }
147 out << strs[i];
148 }
149
150 return out.str();
151}
152
153/*
154* Convert a decimal-dotted string to binary IP
155*/
156std::optional<uint32_t> string_to_ipv4(std::string_view str) {
157 // At least 3 dots + 4 1-digit integers
158 // At most 3 dots + 4 3-digit integers
159 if(str.size() < 3 + 4 * 1 || str.size() > 3 + 4 * 3) {
160 return {};
161 }
162
163 // the final result
164 uint32_t ip = 0;
165 // the number of '.' seen so far
166 size_t dots = 0;
167 // accumulates one quad (range 0-255)
168 uint32_t accum = 0;
169 // # of digits pushed to accum since last dot
170 size_t cur_digits = 0;
171
172 for(char c : str) {
173 if(c == '.') {
174 // . without preceding digit is invalid
175 if(cur_digits == 0) {
176 return {};
177 }
178 dots += 1;
179 // too many dots
180 if(dots > 3) {
181 return {};
182 }
183
184 cur_digits = 0;
185 ip = (ip << 8) | accum;
186 accum = 0;
187 } else if(c >= '0' && c <= '9') {
188 const auto d = static_cast<uint8_t>(c - '0');
189
190 // prohibit leading zero in quad (used for octal)
191 if(cur_digits > 0 && accum == 0) {
192 return {};
193 }
194 accum = (accum * 10) + d;
195
196 if(accum > 255) {
197 return {};
198 }
199
200 cur_digits++;
201 BOTAN_ASSERT_NOMSG(cur_digits <= 3);
202 } else {
203 return {};
204 }
205 }
206
207 // no trailing digits?
208 if(cur_digits == 0) {
209 return {};
210 }
211
212 // insufficient # of dots
213 if(dots != 3) {
214 return {};
215 }
216
217 ip = (ip << 8) | accum;
218
219 return ip;
220}
221
222/*
223* Convert an IP address to decimal-dotted string
224*/
225std::string ipv4_to_string(uint32_t ip) {
226 uint8_t bits[4];
227 store_be(ip, bits);
228
229 std::string str;
230
231 for(size_t i = 0; i != 4; ++i) {
232 if(i > 0) {
233 str += ".";
234 }
235 str += std::to_string(bits[i]);
236 }
237
238 return str;
239}
240
241std::string tolower_string(std::string_view in) {
242 std::string s(in);
243 for(size_t i = 0; i != s.size(); ++i) {
244 const int cu = static_cast<unsigned char>(s[i]);
245 if(std::isalpha(cu)) {
246 s[i] = static_cast<char>(std::tolower(cu));
247 }
248 }
249 return s;
250}
251
252bool host_wildcard_match(std::string_view issued_, std::string_view host_) {
253 const std::string issued = tolower_string(issued_);
254 const std::string host = tolower_string(host_);
255
256 if(host.empty() || issued.empty()) {
257 return false;
258 }
259
260 /*
261 If there are embedded nulls in your issued name
262 Well I feel bad for you son
263 */
264 if(std::count(issued.begin(), issued.end(), char(0)) > 0) {
265 return false;
266 }
267
268 // If more than one wildcard, then issued name is invalid
269 const size_t stars = std::count(issued.begin(), issued.end(), '*');
270 if(stars > 1) {
271 return false;
272 }
273
274 // '*' is not a valid character in DNS names so should not appear on the host side
275 if(std::count(host.begin(), host.end(), '*') != 0) {
276 return false;
277 }
278
279 // Similarly a DNS name can't end in .
280 if(host[host.size() - 1] == '.') {
281 return false;
282 }
283
284 // And a host can't have an empty name component, so reject that
285 if(host.find("..") != std::string::npos) {
286 return false;
287 }
288
289 // Exact match: accept
290 if(issued == host) {
291 return true;
292 }
293
294 /*
295 Otherwise it might be a wildcard
296
297 If the issued size is strictly longer than the hostname size it
298 couldn't possibly be a match, even if the issued value is a
299 wildcard. The only exception is when the wildcard ends up empty
300 (eg www.example.com matches www*.example.com)
301 */
302 if(issued.size() > host.size() + 1) {
303 return false;
304 }
305
306 // If no * at all then not a wildcard, and so not a match
307 if(stars != 1) {
308 return false;
309 }
310
311 /*
312 Now walk through the issued string, making sure every character
313 matches. When we come to the (singular) '*', jump forward in the
314 hostname by the corresponding amount. We know exactly how much
315 space the wildcard takes because it must be exactly `len(host) -
316 len(issued) + 1 chars`.
317
318 We also verify that the '*' comes in the leftmost component, and
319 doesn't skip over any '.' in the hostname.
320 */
321 size_t dots_seen = 0;
322 size_t host_idx = 0;
323
324 for(size_t i = 0; i != issued.size(); ++i) {
325 dots_seen += (issued[i] == '.');
326
327 if(issued[i] == '*') {
328 // Fail: wildcard can only come in leftmost component
329 if(dots_seen > 0) {
330 return false;
331 }
332
333 /*
334 Since there is only one * we know the tail of the issued and
335 hostname must be an exact match. In this case advance host_idx
336 to match.
337 */
338 const size_t advance = (host.size() - issued.size() + 1);
339
340 if(host_idx + advance > host.size()) { // shouldn't happen
341 return false;
342 }
343
344 // Can't be any intervening .s that we would have skipped
345 if(std::count(host.begin() + host_idx, host.begin() + host_idx + advance, '.') != 0) {
346 return false;
347 }
348
349 host_idx += advance;
350 } else {
351 if(issued[i] != host[host_idx]) {
352 return false;
353 }
354
355 host_idx += 1;
356 }
357 }
358
359 // Wildcard issued name must have at least 3 components
360 if(dots_seen < 2) {
361 return false;
362 }
363
364 return true;
365}
366
367std::string check_and_canonicalize_dns_name(std::string_view name) {
368 if(name.size() > 255) {
369 throw Decoding_Error("DNS name exceeds maximum allowed length");
370 }
371
372 if(name.empty()) {
373 throw Decoding_Error("DNS name cannot be empty");
374 }
375
376 if(name.starts_with(".")) {
377 throw Decoding_Error("DNS name cannot start with a dot");
378 }
379
380 /*
381 * Table mapping uppercase to lowercase and only including values for valid DNS names
382 * namely A-Z, a-z, 0-9, hypen, and dot, plus '*' for wildcarding.
383 */
384 // clang-format off
385 constexpr uint8_t DNS_CHAR_MAPPING[128] = {
386 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
387 '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\0',
388 '\0', '\0', '\0', '\0', '*', '\0', '\0', '-', '.', '\0', '0', '1', '2', '3', '4', '5', '6', '7', '8',
389 '9', '\0', '\0', '\0', '\0', '\0', '\0', '\0', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k',
390 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\0', '\0', '\0', '\0',
391 '\0', '\0', 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q',
392 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', '\0', '\0', '\0', '\0', '\0',
393 };
394 // clang-format on
395
396 std::string canon;
397 canon.reserve(name.size());
398
399 for(size_t i = 0; i != name.size(); ++i) {
400 char c = name[i];
401
402 if(c == '.') {
403 if(name[i - 1] == '.') {
404 throw Decoding_Error("DNS name contains sequential period chars");
405 }
406 if(i == name.size() - 1) {
407 throw Decoding_Error("DNS name cannot end in a period");
408 }
409 }
410
411 const uint8_t cu = static_cast<uint8_t>(c);
412 if(cu >= 128) {
413 throw Decoding_Error("DNS name must not contain any extended ASCII code points");
414 }
415 const uint8_t mapped = DNS_CHAR_MAPPING[cu];
416 if(mapped == 0) {
417 throw Decoding_Error("DNS name includes invalid character");
418 }
419 // TODO check label lengths
420 canon.push_back(static_cast<char>(mapped));
421 }
422
423 return canon;
424}
425
426} // namespace Botan
#define BOTAN_ASSERT_NOMSG(expr)
Definition assert.h:59
std::string name
uint32_t to_u32bit(std::string_view str_view)
Definition parsing.cpp:32
uint16_t to_uint16(std::string_view str)
Definition parsing.cpp:22
std::string fmt(std::string_view format, const T &... args)
Definition fmt.h:53
std::vector< std::string > split_on(std::string_view str, char delim)
Definition parsing.cpp:111
std::string tolower_string(std::string_view in)
Definition parsing.cpp:241
std::string string_join(const std::vector< std::string > &strs, char delim)
Definition parsing.cpp:140
bool host_wildcard_match(std::string_view issued_, std::string_view host_)
Definition parsing.cpp:252
std::vector< std::string > parse_algorithm_name(std::string_view namex)
Definition parsing.cpp:57
std::optional< uint32_t > string_to_ipv4(std::string_view str)
Definition parsing.cpp:156
std::string check_and_canonicalize_dns_name(std::string_view name)
Definition parsing.cpp:367
std::string ipv4_to_string(uint32_t ip)
Definition parsing.cpp:225
constexpr auto store_be(ParamTs &&... params)
Definition loadstor.h:773