Botan 2.19.1
Crypto and TLS for C&
parsing.cpp
Go to the documentation of this file.
1/*
2* Various string utils and parsing functions
3* (C) 1999-2007,2013,2014,2015,2018 Jack Lloyd
4* (C) 2015 Simon Warta (Kullo GmbH)
5* (C) 2017 René Korthaus, Rohde & Schwarz Cybersecurity
6*
7* Botan is released under the Simplified BSD License (see license.txt)
8*/
9
10#include <botan/parsing.h>
11#include <botan/exceptn.h>
12#include <botan/charset.h>
13#include <botan/loadstor.h>
14#include <algorithm>
15#include <cctype>
16#include <limits>
17#include <set>
18
19#if defined(BOTAN_HAS_ASN1)
20 #include <botan/asn1_obj.h>
21#endif
22
23namespace Botan {
24
25uint16_t to_uint16(const std::string& str)
26 {
27 const uint32_t x = to_u32bit(str);
28
29 if(x >> 16)
30 throw Invalid_Argument("Integer value exceeds 16 bit range");
31
32 return static_cast<uint16_t>(x);
33 }
34
35uint32_t to_u32bit(const std::string& str)
36 {
37 // std::stoul is not strict enough. Ensure that str is digit only [0-9]*
38 for(const char chr : str)
39 {
40 if(chr < '0' || chr > '9')
41 {
42 std::string chrAsString(1, chr);
43 throw Invalid_Argument("String contains non-digit char: " + chrAsString);
44 }
45 }
46
47 const unsigned long int x = std::stoul(str);
48
49 if(sizeof(unsigned long int) > 4)
50 {
51 // x might be uint64
52 if (x > std::numeric_limits<uint32_t>::max())
53 {
54 throw Invalid_Argument("Integer value of " + str + " exceeds 32 bit range");
55 }
56 }
57
58 return static_cast<uint32_t>(x);
59 }
60
61/*
62* Convert a string into a time duration
63*/
64uint32_t timespec_to_u32bit(const std::string& timespec)
65 {
66 if(timespec.empty())
67 return 0;
68
69 const char suffix = timespec[timespec.size()-1];
70 std::string value = timespec.substr(0, timespec.size()-1);
71
72 uint32_t scale = 1;
73
74 if(Charset::is_digit(suffix))
75 value += suffix;
76 else if(suffix == 's')
77 scale = 1;
78 else if(suffix == 'm')
79 scale = 60;
80 else if(suffix == 'h')
81 scale = 60 * 60;
82 else if(suffix == 'd')
83 scale = 24 * 60 * 60;
84 else if(suffix == 'y')
85 scale = 365 * 24 * 60 * 60;
86 else
87 throw Decoding_Error("timespec_to_u32bit: Bad input " + timespec);
88
89 return scale * to_u32bit(value);
90 }
91
92/*
93* Parse a SCAN-style algorithm name
94*/
95std::vector<std::string> parse_algorithm_name(const std::string& namex)
96 {
97 if(namex.find('(') == std::string::npos &&
98 namex.find(')') == std::string::npos)
99 return std::vector<std::string>(1, namex);
100
101 std::string name = namex, substring;
102 std::vector<std::string> elems;
103 size_t level = 0;
104
105 elems.push_back(name.substr(0, name.find('(')));
106 name = name.substr(name.find('('));
107
108 for(auto i = name.begin(); i != name.end(); ++i)
109 {
110 char c = *i;
111
112 if(c == '(')
113 ++level;
114 if(c == ')')
115 {
116 if(level == 1 && i == name.end() - 1)
117 {
118 if(elems.size() == 1)
119 elems.push_back(substring.substr(1));
120 else
121 elems.push_back(substring);
122 return elems;
123 }
124
125 if(level == 0 || (level == 1 && i != name.end() - 1))
126 throw Invalid_Algorithm_Name(namex);
127 --level;
128 }
129
130 if(c == ',' && level == 1)
131 {
132 if(elems.size() == 1)
133 elems.push_back(substring.substr(1));
134 else
135 elems.push_back(substring);
136 substring.clear();
137 }
138 else
139 substring += c;
140 }
141
142 if(!substring.empty())
143 throw Invalid_Algorithm_Name(namex);
144
145 return elems;
146 }
147
148std::vector<std::string> split_on(const std::string& str, char delim)
149 {
150 return split_on_pred(str, [delim](char c) { return c == delim; });
151 }
152
153std::vector<std::string> split_on_pred(const std::string& str,
154 std::function<bool (char)> pred)
155 {
156 std::vector<std::string> elems;
157 if(str.empty()) return elems;
158
159 std::string substr;
160 for(auto i = str.begin(); i != str.end(); ++i)
161 {
162 if(pred(*i))
163 {
164 if(!substr.empty())
165 elems.push_back(substr);
166 substr.clear();
167 }
168 else
169 substr += *i;
170 }
171
172 if(substr.empty())
173 throw Invalid_Argument("Unable to split string: " + str);
174 elems.push_back(substr);
175
176 return elems;
177 }
178
179/*
180* Join a string
181*/
182std::string string_join(const std::vector<std::string>& strs, char delim)
183 {
184 std::string out = "";
185
186 for(size_t i = 0; i != strs.size(); ++i)
187 {
188 if(i != 0)
189 out += delim;
190 out += strs[i];
191 }
192
193 return out;
194 }
195
196/*
197* Parse an ASN.1 OID string
198*/
199std::vector<uint32_t> parse_asn1_oid(const std::string& oid)
200 {
201#if defined(BOTAN_HAS_ASN1)
202 return OID(oid).get_components();
203#else
204 BOTAN_UNUSED(oid);
205 throw Not_Implemented("ASN1 support not available");
206#endif
207 }
208
209/*
210* X.500 String Comparison
211*/
212bool x500_name_cmp(const std::string& name1, const std::string& name2)
213 {
214 auto p1 = name1.begin();
215 auto p2 = name2.begin();
216
217 while((p1 != name1.end()) && Charset::is_space(*p1)) ++p1;
218 while((p2 != name2.end()) && Charset::is_space(*p2)) ++p2;
219
220 while(p1 != name1.end() && p2 != name2.end())
221 {
222 if(Charset::is_space(*p1))
223 {
224 if(!Charset::is_space(*p2))
225 return false;
226
227 while((p1 != name1.end()) && Charset::is_space(*p1)) ++p1;
228 while((p2 != name2.end()) && Charset::is_space(*p2)) ++p2;
229
230 if(p1 == name1.end() && p2 == name2.end())
231 return true;
232 if(p1 == name1.end() || p2 == name2.end())
233 return false;
234 }
235
236 if(!Charset::caseless_cmp(*p1, *p2))
237 return false;
238 ++p1;
239 ++p2;
240 }
241
242 while((p1 != name1.end()) && Charset::is_space(*p1)) ++p1;
243 while((p2 != name2.end()) && Charset::is_space(*p2)) ++p2;
244
245 if((p1 != name1.end()) || (p2 != name2.end()))
246 return false;
247 return true;
248 }
249
250/*
251* Convert a decimal-dotted string to binary IP
252*/
253uint32_t string_to_ipv4(const std::string& str)
254 {
255 std::vector<std::string> parts = split_on(str, '.');
256
257 if(parts.size() != 4)
258 throw Decoding_Error("Invalid IP string " + str);
259
260 uint32_t ip = 0;
261
262 for(auto part = parts.begin(); part != parts.end(); ++part)
263 {
264 uint32_t octet = to_u32bit(*part);
265
266 if(octet > 255)
267 throw Decoding_Error("Invalid IP string " + str);
268
269 ip = (ip << 8) | (octet & 0xFF);
270 }
271
272 return ip;
273 }
274
275/*
276* Convert an IP address to decimal-dotted string
277*/
278std::string ipv4_to_string(uint32_t ip)
279 {
280 std::string str;
281
282 for(size_t i = 0; i != sizeof(ip); ++i)
283 {
284 if(i)
285 str += ".";
286 str += std::to_string(get_byte(i, ip));
287 }
288
289 return str;
290 }
291
292std::string erase_chars(const std::string& str, const std::set<char>& chars)
293 {
294 std::string out;
295
296 for(auto c: str)
297 if(chars.count(c) == 0)
298 out += c;
299
300 return out;
301 }
302
303std::string replace_chars(const std::string& str,
304 const std::set<char>& chars,
305 char to_char)
306 {
307 std::string out = str;
308
309 for(size_t i = 0; i != out.size(); ++i)
310 if(chars.count(out[i]))
311 out[i] = to_char;
312
313 return out;
314 }
315
316std::string replace_char(const std::string& str, char from_char, char to_char)
317 {
318 std::string out = str;
319
320 for(size_t i = 0; i != out.size(); ++i)
321 if(out[i] == from_char)
322 out[i] = to_char;
323
324 return out;
325 }
326
327std::string tolower_string(const std::string& in)
328 {
329 std::string s = in;
330 for(size_t i = 0; i != s.size(); ++i)
331 {
332 const int cu = static_cast<unsigned char>(s[i]);
333 if(std::isalpha(cu))
334 s[i] = static_cast<char>(std::tolower(cu));
335 }
336 return s;
337 }
338
339bool host_wildcard_match(const std::string& issued_, const std::string& host_)
340 {
341 const std::string issued = tolower_string(issued_);
342 const std::string host = tolower_string(host_);
343
344 if(host.empty() || issued.empty())
345 return false;
346
347 /*
348 If there are embedded nulls in your issued name
349 Well I feel bad for you son
350 */
351 if(std::count(issued.begin(), issued.end(), char(0)) > 0)
352 return false;
353
354 // If more than one wildcard, then issued name is invalid
355 const size_t stars = std::count(issued.begin(), issued.end(), '*');
356 if(stars > 1)
357 return false;
358
359 // '*' is not a valid character in DNS names so should not appear on the host side
360 if(std::count(host.begin(), host.end(), '*') != 0)
361 return false;
362
363 // Similarly a DNS name can't end in .
364 if(host[host.size() - 1] == '.')
365 return false;
366
367 // And a host can't have an empty name component, so reject that
368 if(host.find("..") != std::string::npos)
369 return false;
370
371 // Exact match: accept
372 if(issued == host)
373 {
374 return true;
375 }
376
377 /*
378 Otherwise it might be a wildcard
379
380 If the issued size is strictly longer than the hostname size it
381 couldn't possibly be a match, even if the issued value is a
382 wildcard. The only exception is when the wildcard ends up empty
383 (eg www.example.com matches www*.example.com)
384 */
385 if(issued.size() > host.size() + 1)
386 {
387 return false;
388 }
389
390 // If no * at all then not a wildcard, and so not a match
391 if(stars != 1)
392 {
393 return false;
394 }
395
396 /*
397 Now walk through the issued string, making sure every character
398 matches. When we come to the (singular) '*', jump forward in the
399 hostname by the corresponding amount. We know exactly how much
400 space the wildcard takes because it must be exactly `len(host) -
401 len(issued) + 1 chars`.
402
403 We also verify that the '*' comes in the leftmost component, and
404 doesn't skip over any '.' in the hostname.
405 */
406 size_t dots_seen = 0;
407 size_t host_idx = 0;
408
409 for(size_t i = 0; i != issued.size(); ++i)
410 {
411 dots_seen += (issued[i] == '.');
412
413 if(issued[i] == '*')
414 {
415 // Fail: wildcard can only come in leftmost component
416 if(dots_seen > 0)
417 {
418 return false;
419 }
420
421 /*
422 Since there is only one * we know the tail of the issued and
423 hostname must be an exact match. In this case advance host_idx
424 to match.
425 */
426 const size_t advance = (host.size() - issued.size() + 1);
427
428 if(host_idx + advance > host.size()) // shouldn't happen
429 return false;
430
431 // Can't be any intervening .s that we would have skipped
432 if(std::count(host.begin() + host_idx,
433 host.begin() + host_idx + advance, '.') != 0)
434 return false;
435
436 host_idx += advance;
437 }
438 else
439 {
440 if(issued[i] != host[host_idx])
441 {
442 return false;
443 }
444
445 host_idx += 1;
446 }
447 }
448
449 // Wildcard issued name must have at least 3 components
450 if(dots_seen < 2)
451 {
452 return false;
453 }
454
455 return true;
456 }
457
458}
#define BOTAN_UNUSED(...)
Definition: assert.h:142
const std::vector< uint32_t > & get_components() const
Definition: asn1_obj.h:244
std::string name
std::string to_string(const BER_Object &obj)
Definition: asn1_obj.cpp:213
bool is_digit(char c)
Definition: charset.cpp:210
bool is_space(char c)
Definition: charset.cpp:221
bool caseless_cmp(char a, char b)
Definition: charset.cpp:275
Definition: alg_id.cpp:13
std::vector< std::string > split_on(const std::string &str, char delim)
Definition: parsing.cpp:148
std::string erase_chars(const std::string &str, const std::set< char > &chars)
Definition: parsing.cpp:292
std::string replace_char(const std::string &str, char from_char, char to_char)
Definition: parsing.cpp:316
uint32_t string_to_ipv4(const std::string &str)
Definition: parsing.cpp:253
uint16_t to_uint16(const std::string &str)
Definition: parsing.cpp:25
std::string replace_chars(const std::string &str, const std::set< char > &chars, char to_char)
Definition: parsing.cpp:303
uint32_t timespec_to_u32bit(const std::string &timespec)
Definition: parsing.cpp:64
std::string string_join(const std::vector< std::string > &strs, char delim)
Definition: parsing.cpp:182
std::string tolower_string(const std::string &in)
Definition: parsing.cpp:327
std::vector< uint32_t > parse_asn1_oid(const std::string &oid)
Definition: parsing.cpp:199
bool x500_name_cmp(const std::string &name1, const std::string &name2)
Definition: parsing.cpp:212
std::vector< std::string > split_on_pred(const std::string &str, std::function< bool(char)> pred)
Definition: parsing.cpp:153
std::vector< std::string > parse_algorithm_name(const std::string &namex)
Definition: parsing.cpp:95
std::string ipv4_to_string(uint32_t ip)
Definition: parsing.cpp:278
constexpr uint8_t get_byte(size_t byte_num, T input)
Definition: loadstor.h:41
uint32_t to_u32bit(const std::string &str)
Definition: parsing.cpp:35
bool host_wildcard_match(const std::string &issued_, const std::string &host_)
Definition: parsing.cpp:339