Botan  2.7.0
Crypto and TLS for C++11
parsing.cpp
Go to the documentation of this file.
1 /*
2 * Various string utils and parsing functions
3 * (C) 1999-2007,2013,2014,2015,2018 Jack Lloyd
4 * (C) 2015 Simon Warta (Kullo GmbH)
5 * (C) 2017 RenĂ© Korthaus, Rohde & Schwarz Cybersecurity
6 *
7 * Botan is released under the Simplified BSD License (see license.txt)
8 */
9 
10 #include <botan/parsing.h>
11 #include <botan/exceptn.h>
12 #include <botan/charset.h>
13 #include <botan/loadstor.h>
14 #include <algorithm>
15 #include <cctype>
16 #include <limits>
17 #include <set>
18 
19 namespace Botan {
20 
21 uint16_t to_uint16(const std::string& str)
22  {
23  const uint32_t x = to_u32bit(str);
24 
25  if(x >> 16)
26  throw Invalid_Argument("Integer value exceeds 16 bit range");
27 
28  return static_cast<uint16_t>(x);
29  }
30 
31 uint32_t to_u32bit(const std::string& str)
32  {
33  // std::stoul is not strict enough. Ensure that str is digit only [0-9]*
34  for(const char chr : str)
35  {
36  if(chr < '0' || chr > '9')
37  {
38  std::string chrAsString(1, chr);
39  throw Invalid_Argument("String contains non-digit char: " + chrAsString);
40  }
41  }
42 
43  const unsigned long int x = std::stoul(str);
44 
45  if(sizeof(unsigned long int) > 4)
46  {
47  // x might be uint64
48  if (x > std::numeric_limits<uint32_t>::max())
49  {
50  throw Invalid_Argument("Integer value of " + str + " exceeds 32 bit range");
51  }
52  }
53 
54  return static_cast<uint32_t>(x);
55  }
56 
57 /*
58 * Convert a string into a time duration
59 */
60 uint32_t timespec_to_u32bit(const std::string& timespec)
61  {
62  if(timespec.empty())
63  return 0;
64 
65  const char suffix = timespec[timespec.size()-1];
66  std::string value = timespec.substr(0, timespec.size()-1);
67 
68  uint32_t scale = 1;
69 
70  if(Charset::is_digit(suffix))
71  value += suffix;
72  else if(suffix == 's')
73  scale = 1;
74  else if(suffix == 'm')
75  scale = 60;
76  else if(suffix == 'h')
77  scale = 60 * 60;
78  else if(suffix == 'd')
79  scale = 24 * 60 * 60;
80  else if(suffix == 'y')
81  scale = 365 * 24 * 60 * 60;
82  else
83  throw Decoding_Error("timespec_to_u32bit: Bad input " + timespec);
84 
85  return scale * to_u32bit(value);
86  }
87 
88 /*
89 * Parse a SCAN-style algorithm name
90 */
91 std::vector<std::string> parse_algorithm_name(const std::string& namex)
92  {
93  if(namex.find('(') == std::string::npos &&
94  namex.find(')') == std::string::npos)
95  return std::vector<std::string>(1, namex);
96 
97  std::string name = namex, substring;
98  std::vector<std::string> elems;
99  size_t level = 0;
100 
101  elems.push_back(name.substr(0, name.find('(')));
102  name = name.substr(name.find('('));
103 
104  for(auto i = name.begin(); i != name.end(); ++i)
105  {
106  char c = *i;
107 
108  if(c == '(')
109  ++level;
110  if(c == ')')
111  {
112  if(level == 1 && i == name.end() - 1)
113  {
114  if(elems.size() == 1)
115  elems.push_back(substring.substr(1));
116  else
117  elems.push_back(substring);
118  return elems;
119  }
120 
121  if(level == 0 || (level == 1 && i != name.end() - 1))
122  throw Invalid_Algorithm_Name(namex);
123  --level;
124  }
125 
126  if(c == ',' && level == 1)
127  {
128  if(elems.size() == 1)
129  elems.push_back(substring.substr(1));
130  else
131  elems.push_back(substring);
132  substring.clear();
133  }
134  else
135  substring += c;
136  }
137 
138  if(!substring.empty())
139  throw Invalid_Algorithm_Name(namex);
140 
141  return elems;
142  }
143 
144 std::vector<std::string> split_on(const std::string& str, char delim)
145  {
146  return split_on_pred(str, [delim](char c) { return c == delim; });
147  }
148 
149 std::vector<std::string> split_on_pred(const std::string& str,
150  std::function<bool (char)> pred)
151  {
152  std::vector<std::string> elems;
153  if(str.empty()) return elems;
154 
155  std::string substr;
156  for(auto i = str.begin(); i != str.end(); ++i)
157  {
158  if(pred(*i))
159  {
160  if(!substr.empty())
161  elems.push_back(substr);
162  substr.clear();
163  }
164  else
165  substr += *i;
166  }
167 
168  if(substr.empty())
169  throw Invalid_Argument("Unable to split string: " + str);
170  elems.push_back(substr);
171 
172  return elems;
173  }
174 
175 /*
176 * Join a string
177 */
178 std::string string_join(const std::vector<std::string>& strs, char delim)
179  {
180  std::string out = "";
181 
182  for(size_t i = 0; i != strs.size(); ++i)
183  {
184  if(i != 0)
185  out += delim;
186  out += strs[i];
187  }
188 
189  return out;
190  }
191 
192 /*
193 * Parse an ASN.1 OID string
194 */
195 std::vector<uint32_t> parse_asn1_oid(const std::string& oid)
196  {
197  std::string substring;
198  std::vector<uint32_t> oid_elems;
199 
200  for(auto i = oid.begin(); i != oid.end(); ++i)
201  {
202  char c = *i;
203 
204  if(c == '.')
205  {
206  if(substring.empty())
207  throw Invalid_OID(oid);
208  oid_elems.push_back(to_u32bit(substring));
209  substring.clear();
210  }
211  else
212  substring += c;
213  }
214 
215  if(substring.empty())
216  throw Invalid_OID(oid);
217  oid_elems.push_back(to_u32bit(substring));
218 
219  if(oid_elems.size() < 2)
220  throw Invalid_OID(oid);
221 
222  return oid_elems;
223  }
224 
225 /*
226 * X.500 String Comparison
227 */
228 bool x500_name_cmp(const std::string& name1, const std::string& name2)
229  {
230  auto p1 = name1.begin();
231  auto p2 = name2.begin();
232 
233  while((p1 != name1.end()) && Charset::is_space(*p1)) ++p1;
234  while((p2 != name2.end()) && Charset::is_space(*p2)) ++p2;
235 
236  while(p1 != name1.end() && p2 != name2.end())
237  {
238  if(Charset::is_space(*p1))
239  {
240  if(!Charset::is_space(*p2))
241  return false;
242 
243  while((p1 != name1.end()) && Charset::is_space(*p1)) ++p1;
244  while((p2 != name2.end()) && Charset::is_space(*p2)) ++p2;
245 
246  if(p1 == name1.end() && p2 == name2.end())
247  return true;
248  if(p1 == name1.end() || p2 == name2.end())
249  return false;
250  }
251 
252  if(!Charset::caseless_cmp(*p1, *p2))
253  return false;
254  ++p1;
255  ++p2;
256  }
257 
258  while((p1 != name1.end()) && Charset::is_space(*p1)) ++p1;
259  while((p2 != name2.end()) && Charset::is_space(*p2)) ++p2;
260 
261  if((p1 != name1.end()) || (p2 != name2.end()))
262  return false;
263  return true;
264  }
265 
266 /*
267 * Convert a decimal-dotted string to binary IP
268 */
269 uint32_t string_to_ipv4(const std::string& str)
270  {
271  std::vector<std::string> parts = split_on(str, '.');
272 
273  if(parts.size() != 4)
274  throw Decoding_Error("Invalid IP string " + str);
275 
276  uint32_t ip = 0;
277 
278  for(auto part = parts.begin(); part != parts.end(); ++part)
279  {
280  uint32_t octet = to_u32bit(*part);
281 
282  if(octet > 255)
283  throw Decoding_Error("Invalid IP string " + str);
284 
285  ip = (ip << 8) | (octet & 0xFF);
286  }
287 
288  return ip;
289  }
290 
291 /*
292 * Convert an IP address to decimal-dotted string
293 */
294 std::string ipv4_to_string(uint32_t ip)
295  {
296  std::string str;
297 
298  for(size_t i = 0; i != sizeof(ip); ++i)
299  {
300  if(i)
301  str += ".";
302  str += std::to_string(get_byte(i, ip));
303  }
304 
305  return str;
306  }
307 
308 std::string erase_chars(const std::string& str, const std::set<char>& chars)
309  {
310  std::string out;
311 
312  for(auto c: str)
313  if(chars.count(c) == 0)
314  out += c;
315 
316  return out;
317  }
318 
319 std::string replace_chars(const std::string& str,
320  const std::set<char>& chars,
321  char to_char)
322  {
323  std::string out = str;
324 
325  for(size_t i = 0; i != out.size(); ++i)
326  if(chars.count(out[i]))
327  out[i] = to_char;
328 
329  return out;
330  }
331 
332 std::string replace_char(const std::string& str, char from_char, char to_char)
333  {
334  std::string out = str;
335 
336  for(size_t i = 0; i != out.size(); ++i)
337  if(out[i] == from_char)
338  out[i] = to_char;
339 
340  return out;
341  }
342 
343 namespace {
344 
345 std::string tolower_string(const std::string& in)
346  {
347  std::string s = in;
348  for(size_t i = 0; i != s.size(); ++i)
349  {
350  if(std::isalpha(static_cast<unsigned char>(s[i])))
351  s[i] = std::tolower(static_cast<unsigned char>(s[i]));
352  }
353  return s;
354  }
355 
356 }
357 
358 bool host_wildcard_match(const std::string& issued_, const std::string& host_)
359  {
360  const std::string issued = tolower_string(issued_);
361  const std::string host = tolower_string(host_);
362 
363  if(host.empty() || issued.empty())
364  return false;
365 
366  /*
367  If there are embedded nulls in your issued name
368  Well I feel bad for you son
369  */
370  if(std::count(issued.begin(), issued.end(), char(0)) > 0)
371  return false;
372 
373  // If more than one wildcard, then issued name is invalid
374  const size_t stars = std::count(issued.begin(), issued.end(), '*');
375  if(stars > 1)
376  return false;
377 
378  // '*' is not a valid character in DNS names so should not appear on the host side
379  if(std::count(host.begin(), host.end(), '*') != 0)
380  return false;
381 
382  // Similarly a DNS name can't end in .
383  if(host[host.size() - 1] == '.')
384  return false;
385 
386  // And a host can't have an empty name component, so reject that
387  if(host.find("..") != std::string::npos)
388  return false;
389 
390  // Exact match: accept
391  if(issued == host)
392  {
393  return true;
394  }
395 
396  /*
397  Otherwise it might be a wildcard
398 
399  If the issued size is strictly longer than the hostname size it
400  couldn't possibly be a match, even if the issued value is a
401  wildcard. The only exception is when the wildcard ends up empty
402  (eg www.example.com matches www*.example.com)
403  */
404  if(issued.size() > host.size() + 1)
405  {
406  return false;
407  }
408 
409  // If no * at all then not a wildcard, and so not a match
410  if(stars != 1)
411  {
412  return false;
413  }
414 
415  /*
416  Now walk through the issued string, making sure every character
417  matches. When we come to the (singular) '*', jump forward in the
418  hostname by the cooresponding amount. We know exactly how much
419  space the wildcard takes because it must be exactly `len(host) -
420  len(issued) + 1 chars`.
421 
422  We also verify that the '*' comes in the leftmost component, and
423  doesn't skip over any '.' in the hostname.
424  */
425  size_t dots_seen = 0;
426  size_t host_idx = 0;
427 
428  for(size_t i = 0; i != issued.size(); ++i)
429  {
430  dots_seen += (issued[i] == '.');
431 
432  if(issued[i] == '*')
433  {
434  // Fail: wildcard can only come in leftmost component
435  if(dots_seen > 0)
436  {
437  return false;
438  }
439 
440  /*
441  Since there is only one * we know the tail of the issued and
442  hostname must be an exact match. In this case advance host_idx
443  to match.
444  */
445  const size_t advance = (host.size() - issued.size() + 1);
446 
447  if(host_idx + advance > host.size()) // shouldn't happen
448  return false;
449 
450  // Can't be any intervening .s that we would have skipped
451  if(std::count(host.begin() + host_idx,
452  host.begin() + host_idx + advance, '.') != 0)
453  return false;
454 
455  host_idx += advance;
456  }
457  else
458  {
459  if(issued[i] != host[host_idx])
460  {
461  return false;
462  }
463 
464  host_idx += 1;
465  }
466  }
467 
468  // Wildcard issued name must have at least 3 components
469  if(dots_seen < 2)
470  {
471  return false;
472  }
473 
474  return true;
475  }
476 
477 }
uint16_t to_uint16(const std::string &str)
Definition: parsing.cpp:21
std::string erase_chars(const std::string &str, const std::set< char > &chars)
Definition: parsing.cpp:308
std::vector< std::string > split_on(const std::string &str, char delim)
Definition: parsing.cpp:144
std::vector< std::string > split_on_pred(const std::string &str, std::function< bool(char)> pred)
Definition: parsing.cpp:149
std::string to_string(const BER_Object &obj)
Definition: asn1_obj.cpp:210
uint32_t to_u32bit(const std::string &str)
Definition: parsing.cpp:31
uint32_t timespec_to_u32bit(const std::string &timespec)
Definition: parsing.cpp:60
bool caseless_cmp(char a, char b)
Definition: charset.cpp:275
bool host_wildcard_match(const std::string &issued_, const std::string &host_)
Definition: parsing.cpp:358
std::string ipv4_to_string(uint32_t ip)
Definition: parsing.cpp:294
std::string replace_char(const std::string &str, char from_char, char to_char)
Definition: parsing.cpp:332
bool x500_name_cmp(const std::string &name1, const std::string &name2)
Definition: parsing.cpp:228
bool is_space(char c)
Definition: charset.cpp:221
std::vector< std::string > parse_algorithm_name(const std::string &namex)
Definition: parsing.cpp:91
Definition: alg_id.cpp:13
uint32_t string_to_ipv4(const std::string &str)
Definition: parsing.cpp:269
std::string replace_chars(const std::string &str, const std::set< char > &chars, char to_char)
Definition: parsing.cpp:319
bool is_digit(char c)
Definition: charset.cpp:210
uint8_t get_byte(size_t byte_num, T input)
Definition: loadstor.h:39
std::vector< uint32_t > parse_asn1_oid(const std::string &oid)
Definition: parsing.cpp:195
std::string string_join(const std::vector< std::string > &strs, char delim)
Definition: parsing.cpp:178