Botan  2.18.0
Crypto and TLS for C++11
parsing.cpp
Go to the documentation of this file.
1 /*
2 * Various string utils and parsing functions
3 * (C) 1999-2007,2013,2014,2015,2018 Jack Lloyd
4 * (C) 2015 Simon Warta (Kullo GmbH)
5 * (C) 2017 RenĂ© Korthaus, Rohde & Schwarz Cybersecurity
6 *
7 * Botan is released under the Simplified BSD License (see license.txt)
8 */
9 
10 #include <botan/parsing.h>
11 #include <botan/exceptn.h>
12 #include <botan/charset.h>
13 #include <botan/loadstor.h>
14 #include <algorithm>
15 #include <cctype>
16 #include <limits>
17 #include <set>
18 
19 #if defined(BOTAN_HAS_ASN1)
20  #include <botan/asn1_obj.h>
21 #endif
22 
23 namespace Botan {
24 
25 uint16_t to_uint16(const std::string& str)
26  {
27  const uint32_t x = to_u32bit(str);
28 
29  if(x >> 16)
30  throw Invalid_Argument("Integer value exceeds 16 bit range");
31 
32  return static_cast<uint16_t>(x);
33  }
34 
35 uint32_t to_u32bit(const std::string& str)
36  {
37  // std::stoul is not strict enough. Ensure that str is digit only [0-9]*
38  for(const char chr : str)
39  {
40  if(chr < '0' || chr > '9')
41  {
42  std::string chrAsString(1, chr);
43  throw Invalid_Argument("String contains non-digit char: " + chrAsString);
44  }
45  }
46 
47  const unsigned long int x = std::stoul(str);
48 
49  if(sizeof(unsigned long int) > 4)
50  {
51  // x might be uint64
52  if (x > std::numeric_limits<uint32_t>::max())
53  {
54  throw Invalid_Argument("Integer value of " + str + " exceeds 32 bit range");
55  }
56  }
57 
58  return static_cast<uint32_t>(x);
59  }
60 
61 /*
62 * Convert a string into a time duration
63 */
64 uint32_t timespec_to_u32bit(const std::string& timespec)
65  {
66  if(timespec.empty())
67  return 0;
68 
69  const char suffix = timespec[timespec.size()-1];
70  std::string value = timespec.substr(0, timespec.size()-1);
71 
72  uint32_t scale = 1;
73 
74  if(Charset::is_digit(suffix))
75  value += suffix;
76  else if(suffix == 's')
77  scale = 1;
78  else if(suffix == 'm')
79  scale = 60;
80  else if(suffix == 'h')
81  scale = 60 * 60;
82  else if(suffix == 'd')
83  scale = 24 * 60 * 60;
84  else if(suffix == 'y')
85  scale = 365 * 24 * 60 * 60;
86  else
87  throw Decoding_Error("timespec_to_u32bit: Bad input " + timespec);
88 
89  return scale * to_u32bit(value);
90  }
91 
92 /*
93 * Parse a SCAN-style algorithm name
94 */
95 std::vector<std::string> parse_algorithm_name(const std::string& namex)
96  {
97  if(namex.find('(') == std::string::npos &&
98  namex.find(')') == std::string::npos)
99  return std::vector<std::string>(1, namex);
100 
101  std::string name = namex, substring;
102  std::vector<std::string> elems;
103  size_t level = 0;
104 
105  elems.push_back(name.substr(0, name.find('(')));
106  name = name.substr(name.find('('));
107 
108  for(auto i = name.begin(); i != name.end(); ++i)
109  {
110  char c = *i;
111 
112  if(c == '(')
113  ++level;
114  if(c == ')')
115  {
116  if(level == 1 && i == name.end() - 1)
117  {
118  if(elems.size() == 1)
119  elems.push_back(substring.substr(1));
120  else
121  elems.push_back(substring);
122  return elems;
123  }
124 
125  if(level == 0 || (level == 1 && i != name.end() - 1))
126  throw Invalid_Algorithm_Name(namex);
127  --level;
128  }
129 
130  if(c == ',' && level == 1)
131  {
132  if(elems.size() == 1)
133  elems.push_back(substring.substr(1));
134  else
135  elems.push_back(substring);
136  substring.clear();
137  }
138  else
139  substring += c;
140  }
141 
142  if(!substring.empty())
143  throw Invalid_Algorithm_Name(namex);
144 
145  return elems;
146  }
147 
148 std::vector<std::string> split_on(const std::string& str, char delim)
149  {
150  return split_on_pred(str, [delim](char c) { return c == delim; });
151  }
152 
153 std::vector<std::string> split_on_pred(const std::string& str,
154  std::function<bool (char)> pred)
155  {
156  std::vector<std::string> elems;
157  if(str.empty()) return elems;
158 
159  std::string substr;
160  for(auto i = str.begin(); i != str.end(); ++i)
161  {
162  if(pred(*i))
163  {
164  if(!substr.empty())
165  elems.push_back(substr);
166  substr.clear();
167  }
168  else
169  substr += *i;
170  }
171 
172  if(substr.empty())
173  throw Invalid_Argument("Unable to split string: " + str);
174  elems.push_back(substr);
175 
176  return elems;
177  }
178 
179 /*
180 * Join a string
181 */
182 std::string string_join(const std::vector<std::string>& strs, char delim)
183  {
184  std::string out = "";
185 
186  for(size_t i = 0; i != strs.size(); ++i)
187  {
188  if(i != 0)
189  out += delim;
190  out += strs[i];
191  }
192 
193  return out;
194  }
195 
196 /*
197 * Parse an ASN.1 OID string
198 */
199 std::vector<uint32_t> parse_asn1_oid(const std::string& oid)
200  {
201 #if defined(BOTAN_HAS_ASN1)
202  return OID(oid).get_components();
203 #else
204  BOTAN_UNUSED(oid);
205  throw Not_Implemented("ASN1 support not available");
206 #endif
207  }
208 
209 /*
210 * X.500 String Comparison
211 */
212 bool x500_name_cmp(const std::string& name1, const std::string& name2)
213  {
214  auto p1 = name1.begin();
215  auto p2 = name2.begin();
216 
217  while((p1 != name1.end()) && Charset::is_space(*p1)) ++p1;
218  while((p2 != name2.end()) && Charset::is_space(*p2)) ++p2;
219 
220  while(p1 != name1.end() && p2 != name2.end())
221  {
222  if(Charset::is_space(*p1))
223  {
224  if(!Charset::is_space(*p2))
225  return false;
226 
227  while((p1 != name1.end()) && Charset::is_space(*p1)) ++p1;
228  while((p2 != name2.end()) && Charset::is_space(*p2)) ++p2;
229 
230  if(p1 == name1.end() && p2 == name2.end())
231  return true;
232  if(p1 == name1.end() || p2 == name2.end())
233  return false;
234  }
235 
236  if(!Charset::caseless_cmp(*p1, *p2))
237  return false;
238  ++p1;
239  ++p2;
240  }
241 
242  while((p1 != name1.end()) && Charset::is_space(*p1)) ++p1;
243  while((p2 != name2.end()) && Charset::is_space(*p2)) ++p2;
244 
245  if((p1 != name1.end()) || (p2 != name2.end()))
246  return false;
247  return true;
248  }
249 
250 /*
251 * Convert a decimal-dotted string to binary IP
252 */
253 uint32_t string_to_ipv4(const std::string& str)
254  {
255  std::vector<std::string> parts = split_on(str, '.');
256 
257  if(parts.size() != 4)
258  throw Decoding_Error("Invalid IP string " + str);
259 
260  uint32_t ip = 0;
261 
262  for(auto part = parts.begin(); part != parts.end(); ++part)
263  {
264  uint32_t octet = to_u32bit(*part);
265 
266  if(octet > 255)
267  throw Decoding_Error("Invalid IP string " + str);
268 
269  ip = (ip << 8) | (octet & 0xFF);
270  }
271 
272  return ip;
273  }
274 
275 /*
276 * Convert an IP address to decimal-dotted string
277 */
278 std::string ipv4_to_string(uint32_t ip)
279  {
280  std::string str;
281 
282  for(size_t i = 0; i != sizeof(ip); ++i)
283  {
284  if(i)
285  str += ".";
286  str += std::to_string(get_byte(i, ip));
287  }
288 
289  return str;
290  }
291 
292 std::string erase_chars(const std::string& str, const std::set<char>& chars)
293  {
294  std::string out;
295 
296  for(auto c: str)
297  if(chars.count(c) == 0)
298  out += c;
299 
300  return out;
301  }
302 
303 std::string replace_chars(const std::string& str,
304  const std::set<char>& chars,
305  char to_char)
306  {
307  std::string out = str;
308 
309  for(size_t i = 0; i != out.size(); ++i)
310  if(chars.count(out[i]))
311  out[i] = to_char;
312 
313  return out;
314  }
315 
316 std::string replace_char(const std::string& str, char from_char, char to_char)
317  {
318  std::string out = str;
319 
320  for(size_t i = 0; i != out.size(); ++i)
321  if(out[i] == from_char)
322  out[i] = to_char;
323 
324  return out;
325  }
326 
327 namespace {
328 
329 std::string tolower_string(const std::string& in)
330  {
331  std::string s = in;
332  for(size_t i = 0; i != s.size(); ++i)
333  {
334  const int cu = static_cast<unsigned char>(s[i]);
335  if(std::isalpha(cu))
336  s[i] = static_cast<char>(std::tolower(cu));
337  }
338  return s;
339  }
340 
341 }
342 
343 bool host_wildcard_match(const std::string& issued_, const std::string& host_)
344  {
345  const std::string issued = tolower_string(issued_);
346  const std::string host = tolower_string(host_);
347 
348  if(host.empty() || issued.empty())
349  return false;
350 
351  /*
352  If there are embedded nulls in your issued name
353  Well I feel bad for you son
354  */
355  if(std::count(issued.begin(), issued.end(), char(0)) > 0)
356  return false;
357 
358  // If more than one wildcard, then issued name is invalid
359  const size_t stars = std::count(issued.begin(), issued.end(), '*');
360  if(stars > 1)
361  return false;
362 
363  // '*' is not a valid character in DNS names so should not appear on the host side
364  if(std::count(host.begin(), host.end(), '*') != 0)
365  return false;
366 
367  // Similarly a DNS name can't end in .
368  if(host[host.size() - 1] == '.')
369  return false;
370 
371  // And a host can't have an empty name component, so reject that
372  if(host.find("..") != std::string::npos)
373  return false;
374 
375  // Exact match: accept
376  if(issued == host)
377  {
378  return true;
379  }
380 
381  /*
382  Otherwise it might be a wildcard
383 
384  If the issued size is strictly longer than the hostname size it
385  couldn't possibly be a match, even if the issued value is a
386  wildcard. The only exception is when the wildcard ends up empty
387  (eg www.example.com matches www*.example.com)
388  */
389  if(issued.size() > host.size() + 1)
390  {
391  return false;
392  }
393 
394  // If no * at all then not a wildcard, and so not a match
395  if(stars != 1)
396  {
397  return false;
398  }
399 
400  /*
401  Now walk through the issued string, making sure every character
402  matches. When we come to the (singular) '*', jump forward in the
403  hostname by the corresponding amount. We know exactly how much
404  space the wildcard takes because it must be exactly `len(host) -
405  len(issued) + 1 chars`.
406 
407  We also verify that the '*' comes in the leftmost component, and
408  doesn't skip over any '.' in the hostname.
409  */
410  size_t dots_seen = 0;
411  size_t host_idx = 0;
412 
413  for(size_t i = 0; i != issued.size(); ++i)
414  {
415  dots_seen += (issued[i] == '.');
416 
417  if(issued[i] == '*')
418  {
419  // Fail: wildcard can only come in leftmost component
420  if(dots_seen > 0)
421  {
422  return false;
423  }
424 
425  /*
426  Since there is only one * we know the tail of the issued and
427  hostname must be an exact match. In this case advance host_idx
428  to match.
429  */
430  const size_t advance = (host.size() - issued.size() + 1);
431 
432  if(host_idx + advance > host.size()) // shouldn't happen
433  return false;
434 
435  // Can't be any intervening .s that we would have skipped
436  if(std::count(host.begin() + host_idx,
437  host.begin() + host_idx + advance, '.') != 0)
438  return false;
439 
440  host_idx += advance;
441  }
442  else
443  {
444  if(issued[i] != host[host_idx])
445  {
446  return false;
447  }
448 
449  host_idx += 1;
450  }
451  }
452 
453  // Wildcard issued name must have at least 3 components
454  if(dots_seen < 2)
455  {
456  return false;
457  }
458 
459  return true;
460  }
461 
462 }
uint16_t to_uint16(const std::string &str)
Definition: parsing.cpp:25
const std::vector< uint32_t > & get_components() const
Definition: asn1_obj.h:244
std::string erase_chars(const std::string &str, const std::set< char > &chars)
Definition: parsing.cpp:292
std::vector< std::string > split_on(const std::string &str, char delim)
Definition: parsing.cpp:148
constexpr uint8_t get_byte(size_t byte_num, T input)
Definition: loadstor.h:41
std::vector< std::string > split_on_pred(const std::string &str, std::function< bool(char)> pred)
Definition: parsing.cpp:153
std::string to_string(const BER_Object &obj)
Definition: asn1_obj.cpp:213
uint32_t to_u32bit(const std::string &str)
Definition: parsing.cpp:35
uint32_t timespec_to_u32bit(const std::string &timespec)
Definition: parsing.cpp:64
bool caseless_cmp(char a, char b)
Definition: charset.cpp:275
bool host_wildcard_match(const std::string &issued_, const std::string &host_)
Definition: parsing.cpp:343
std::string name
std::string ipv4_to_string(uint32_t ip)
Definition: parsing.cpp:278
std::string replace_char(const std::string &str, char from_char, char to_char)
Definition: parsing.cpp:316
bool x500_name_cmp(const std::string &name1, const std::string &name2)
Definition: parsing.cpp:212
bool is_space(char c)
Definition: charset.cpp:221
std::vector< std::string > parse_algorithm_name(const std::string &namex)
Definition: parsing.cpp:95
Definition: alg_id.cpp:13
uint32_t string_to_ipv4(const std::string &str)
Definition: parsing.cpp:253
#define BOTAN_UNUSED(...)
Definition: assert.h:142
std::string replace_chars(const std::string &str, const std::set< char > &chars, char to_char)
Definition: parsing.cpp:303
bool is_digit(char c)
Definition: charset.cpp:210
std::vector< uint32_t > parse_asn1_oid(const std::string &oid)
Definition: parsing.cpp:199
std::string string_join(const std::vector< std::string > &strs, char delim)
Definition: parsing.cpp:182