Botan  2.8.0
Crypto and TLS for C++11
parsing.cpp
Go to the documentation of this file.
1 /*
2 * Various string utils and parsing functions
3 * (C) 1999-2007,2013,2014,2015,2018 Jack Lloyd
4 * (C) 2015 Simon Warta (Kullo GmbH)
5 * (C) 2017 RenĂ© Korthaus, Rohde & Schwarz Cybersecurity
6 *
7 * Botan is released under the Simplified BSD License (see license.txt)
8 */
9 
10 #include <botan/parsing.h>
11 #include <botan/exceptn.h>
12 #include <botan/charset.h>
13 #include <botan/loadstor.h>
14 #include <algorithm>
15 #include <cctype>
16 #include <limits>
17 #include <set>
18 
19 namespace Botan {
20 
21 uint16_t to_uint16(const std::string& str)
22  {
23  const uint32_t x = to_u32bit(str);
24 
25  if(x >> 16)
26  throw Invalid_Argument("Integer value exceeds 16 bit range");
27 
28  return static_cast<uint16_t>(x);
29  }
30 
31 uint32_t to_u32bit(const std::string& str)
32  {
33  // std::stoul is not strict enough. Ensure that str is digit only [0-9]*
34  for(const char chr : str)
35  {
36  if(chr < '0' || chr > '9')
37  {
38  std::string chrAsString(1, chr);
39  throw Invalid_Argument("String contains non-digit char: " + chrAsString);
40  }
41  }
42 
43  const unsigned long int x = std::stoul(str);
44 
45  if(sizeof(unsigned long int) > 4)
46  {
47  // x might be uint64
48  if (x > std::numeric_limits<uint32_t>::max())
49  {
50  throw Invalid_Argument("Integer value of " + str + " exceeds 32 bit range");
51  }
52  }
53 
54  return static_cast<uint32_t>(x);
55  }
56 
57 /*
58 * Convert a string into a time duration
59 */
60 uint32_t timespec_to_u32bit(const std::string& timespec)
61  {
62  if(timespec.empty())
63  return 0;
64 
65  const char suffix = timespec[timespec.size()-1];
66  std::string value = timespec.substr(0, timespec.size()-1);
67 
68  uint32_t scale = 1;
69 
70  if(Charset::is_digit(suffix))
71  value += suffix;
72  else if(suffix == 's')
73  scale = 1;
74  else if(suffix == 'm')
75  scale = 60;
76  else if(suffix == 'h')
77  scale = 60 * 60;
78  else if(suffix == 'd')
79  scale = 24 * 60 * 60;
80  else if(suffix == 'y')
81  scale = 365 * 24 * 60 * 60;
82  else
83  throw Decoding_Error("timespec_to_u32bit: Bad input " + timespec);
84 
85  return scale * to_u32bit(value);
86  }
87 
88 /*
89 * Parse a SCAN-style algorithm name
90 */
91 std::vector<std::string> parse_algorithm_name(const std::string& namex)
92  {
93  if(namex.find('(') == std::string::npos &&
94  namex.find(')') == std::string::npos)
95  return std::vector<std::string>(1, namex);
96 
97  std::string name = namex, substring;
98  std::vector<std::string> elems;
99  size_t level = 0;
100 
101  elems.push_back(name.substr(0, name.find('(')));
102  name = name.substr(name.find('('));
103 
104  for(auto i = name.begin(); i != name.end(); ++i)
105  {
106  char c = *i;
107 
108  if(c == '(')
109  ++level;
110  if(c == ')')
111  {
112  if(level == 1 && i == name.end() - 1)
113  {
114  if(elems.size() == 1)
115  elems.push_back(substring.substr(1));
116  else
117  elems.push_back(substring);
118  return elems;
119  }
120 
121  if(level == 0 || (level == 1 && i != name.end() - 1))
122  throw Invalid_Algorithm_Name(namex);
123  --level;
124  }
125 
126  if(c == ',' && level == 1)
127  {
128  if(elems.size() == 1)
129  elems.push_back(substring.substr(1));
130  else
131  elems.push_back(substring);
132  substring.clear();
133  }
134  else
135  substring += c;
136  }
137 
138  if(!substring.empty())
139  throw Invalid_Algorithm_Name(namex);
140 
141  return elems;
142  }
143 
144 std::vector<std::string> split_on(const std::string& str, char delim)
145  {
146  return split_on_pred(str, [delim](char c) { return c == delim; });
147  }
148 
149 std::vector<std::string> split_on_pred(const std::string& str,
150  std::function<bool (char)> pred)
151  {
152  std::vector<std::string> elems;
153  if(str.empty()) return elems;
154 
155  std::string substr;
156  for(auto i = str.begin(); i != str.end(); ++i)
157  {
158  if(pred(*i))
159  {
160  if(!substr.empty())
161  elems.push_back(substr);
162  substr.clear();
163  }
164  else
165  substr += *i;
166  }
167 
168  if(substr.empty())
169  throw Invalid_Argument("Unable to split string: " + str);
170  elems.push_back(substr);
171 
172  return elems;
173  }
174 
175 /*
176 * Join a string
177 */
178 std::string string_join(const std::vector<std::string>& strs, char delim)
179  {
180  std::string out = "";
181 
182  for(size_t i = 0; i != strs.size(); ++i)
183  {
184  if(i != 0)
185  out += delim;
186  out += strs[i];
187  }
188 
189  return out;
190  }
191 
192 /*
193 * Parse an ASN.1 OID string
194 */
195 std::vector<uint32_t> parse_asn1_oid(const std::string& oid)
196  {
197  std::string substring;
198  std::vector<uint32_t> oid_elems;
199 
200  for(auto i = oid.begin(); i != oid.end(); ++i)
201  {
202  char c = *i;
203 
204  if(c == '.')
205  {
206  if(substring.empty())
207  throw Invalid_OID(oid);
208  oid_elems.push_back(to_u32bit(substring));
209  substring.clear();
210  }
211  else
212  substring += c;
213  }
214 
215  if(substring.empty())
216  throw Invalid_OID(oid);
217  oid_elems.push_back(to_u32bit(substring));
218 
219  if(oid_elems.size() < 2)
220  throw Invalid_OID(oid);
221 
222  return oid_elems;
223  }
224 
225 /*
226 * X.500 String Comparison
227 */
228 bool x500_name_cmp(const std::string& name1, const std::string& name2)
229  {
230  auto p1 = name1.begin();
231  auto p2 = name2.begin();
232 
233  while((p1 != name1.end()) && Charset::is_space(*p1)) ++p1;
234  while((p2 != name2.end()) && Charset::is_space(*p2)) ++p2;
235 
236  while(p1 != name1.end() && p2 != name2.end())
237  {
238  if(Charset::is_space(*p1))
239  {
240  if(!Charset::is_space(*p2))
241  return false;
242 
243  while((p1 != name1.end()) && Charset::is_space(*p1)) ++p1;
244  while((p2 != name2.end()) && Charset::is_space(*p2)) ++p2;
245 
246  if(p1 == name1.end() && p2 == name2.end())
247  return true;
248  if(p1 == name1.end() || p2 == name2.end())
249  return false;
250  }
251 
252  if(!Charset::caseless_cmp(*p1, *p2))
253  return false;
254  ++p1;
255  ++p2;
256  }
257 
258  while((p1 != name1.end()) && Charset::is_space(*p1)) ++p1;
259  while((p2 != name2.end()) && Charset::is_space(*p2)) ++p2;
260 
261  if((p1 != name1.end()) || (p2 != name2.end()))
262  return false;
263  return true;
264  }
265 
266 /*
267 * Convert a decimal-dotted string to binary IP
268 */
269 uint32_t string_to_ipv4(const std::string& str)
270  {
271  std::vector<std::string> parts = split_on(str, '.');
272 
273  if(parts.size() != 4)
274  throw Decoding_Error("Invalid IP string " + str);
275 
276  uint32_t ip = 0;
277 
278  for(auto part = parts.begin(); part != parts.end(); ++part)
279  {
280  uint32_t octet = to_u32bit(*part);
281 
282  if(octet > 255)
283  throw Decoding_Error("Invalid IP string " + str);
284 
285  ip = (ip << 8) | (octet & 0xFF);
286  }
287 
288  return ip;
289  }
290 
291 /*
292 * Convert an IP address to decimal-dotted string
293 */
294 std::string ipv4_to_string(uint32_t ip)
295  {
296  std::string str;
297 
298  for(size_t i = 0; i != sizeof(ip); ++i)
299  {
300  if(i)
301  str += ".";
302  str += std::to_string(get_byte(i, ip));
303  }
304 
305  return str;
306  }
307 
308 std::string erase_chars(const std::string& str, const std::set<char>& chars)
309  {
310  std::string out;
311 
312  for(auto c: str)
313  if(chars.count(c) == 0)
314  out += c;
315 
316  return out;
317  }
318 
319 std::string replace_chars(const std::string& str,
320  const std::set<char>& chars,
321  char to_char)
322  {
323  std::string out = str;
324 
325  for(size_t i = 0; i != out.size(); ++i)
326  if(chars.count(out[i]))
327  out[i] = to_char;
328 
329  return out;
330  }
331 
332 std::string replace_char(const std::string& str, char from_char, char to_char)
333  {
334  std::string out = str;
335 
336  for(size_t i = 0; i != out.size(); ++i)
337  if(out[i] == from_char)
338  out[i] = to_char;
339 
340  return out;
341  }
342 
343 namespace {
344 
345 std::string tolower_string(const std::string& in)
346  {
347  std::string s = in;
348  for(size_t i = 0; i != s.size(); ++i)
349  {
350  const int cu = static_cast<unsigned char>(s[i]);
351  if(std::isalpha(cu))
352  s[i] = static_cast<char>(std::tolower(cu));
353  }
354  return s;
355  }
356 
357 }
358 
359 bool host_wildcard_match(const std::string& issued_, const std::string& host_)
360  {
361  const std::string issued = tolower_string(issued_);
362  const std::string host = tolower_string(host_);
363 
364  if(host.empty() || issued.empty())
365  return false;
366 
367  /*
368  If there are embedded nulls in your issued name
369  Well I feel bad for you son
370  */
371  if(std::count(issued.begin(), issued.end(), char(0)) > 0)
372  return false;
373 
374  // If more than one wildcard, then issued name is invalid
375  const size_t stars = std::count(issued.begin(), issued.end(), '*');
376  if(stars > 1)
377  return false;
378 
379  // '*' is not a valid character in DNS names so should not appear on the host side
380  if(std::count(host.begin(), host.end(), '*') != 0)
381  return false;
382 
383  // Similarly a DNS name can't end in .
384  if(host[host.size() - 1] == '.')
385  return false;
386 
387  // And a host can't have an empty name component, so reject that
388  if(host.find("..") != std::string::npos)
389  return false;
390 
391  // Exact match: accept
392  if(issued == host)
393  {
394  return true;
395  }
396 
397  /*
398  Otherwise it might be a wildcard
399 
400  If the issued size is strictly longer than the hostname size it
401  couldn't possibly be a match, even if the issued value is a
402  wildcard. The only exception is when the wildcard ends up empty
403  (eg www.example.com matches www*.example.com)
404  */
405  if(issued.size() > host.size() + 1)
406  {
407  return false;
408  }
409 
410  // If no * at all then not a wildcard, and so not a match
411  if(stars != 1)
412  {
413  return false;
414  }
415 
416  /*
417  Now walk through the issued string, making sure every character
418  matches. When we come to the (singular) '*', jump forward in the
419  hostname by the corresponding amount. We know exactly how much
420  space the wildcard takes because it must be exactly `len(host) -
421  len(issued) + 1 chars`.
422 
423  We also verify that the '*' comes in the leftmost component, and
424  doesn't skip over any '.' in the hostname.
425  */
426  size_t dots_seen = 0;
427  size_t host_idx = 0;
428 
429  for(size_t i = 0; i != issued.size(); ++i)
430  {
431  dots_seen += (issued[i] == '.');
432 
433  if(issued[i] == '*')
434  {
435  // Fail: wildcard can only come in leftmost component
436  if(dots_seen > 0)
437  {
438  return false;
439  }
440 
441  /*
442  Since there is only one * we know the tail of the issued and
443  hostname must be an exact match. In this case advance host_idx
444  to match.
445  */
446  const size_t advance = (host.size() - issued.size() + 1);
447 
448  if(host_idx + advance > host.size()) // shouldn't happen
449  return false;
450 
451  // Can't be any intervening .s that we would have skipped
452  if(std::count(host.begin() + host_idx,
453  host.begin() + host_idx + advance, '.') != 0)
454  return false;
455 
456  host_idx += advance;
457  }
458  else
459  {
460  if(issued[i] != host[host_idx])
461  {
462  return false;
463  }
464 
465  host_idx += 1;
466  }
467  }
468 
469  // Wildcard issued name must have at least 3 components
470  if(dots_seen < 2)
471  {
472  return false;
473  }
474 
475  return true;
476  }
477 
478 }
uint16_t to_uint16(const std::string &str)
Definition: parsing.cpp:21
std::string erase_chars(const std::string &str, const std::set< char > &chars)
Definition: parsing.cpp:308
std::vector< std::string > split_on(const std::string &str, char delim)
Definition: parsing.cpp:144
std::vector< std::string > split_on_pred(const std::string &str, std::function< bool(char)> pred)
Definition: parsing.cpp:149
std::string to_string(const BER_Object &obj)
Definition: asn1_obj.cpp:210
uint32_t to_u32bit(const std::string &str)
Definition: parsing.cpp:31
uint32_t timespec_to_u32bit(const std::string &timespec)
Definition: parsing.cpp:60
bool caseless_cmp(char a, char b)
Definition: charset.cpp:275
bool host_wildcard_match(const std::string &issued_, const std::string &host_)
Definition: parsing.cpp:359
std::string name
std::string ipv4_to_string(uint32_t ip)
Definition: parsing.cpp:294
std::string replace_char(const std::string &str, char from_char, char to_char)
Definition: parsing.cpp:332
bool x500_name_cmp(const std::string &name1, const std::string &name2)
Definition: parsing.cpp:228
bool is_space(char c)
Definition: charset.cpp:221
std::vector< std::string > parse_algorithm_name(const std::string &namex)
Definition: parsing.cpp:91
Definition: alg_id.cpp:13
uint32_t string_to_ipv4(const std::string &str)
Definition: parsing.cpp:269
std::string replace_chars(const std::string &str, const std::set< char > &chars, char to_char)
Definition: parsing.cpp:319
bool is_digit(char c)
Definition: charset.cpp:210
uint8_t get_byte(size_t byte_num, T input)
Definition: loadstor.h:39
std::vector< uint32_t > parse_asn1_oid(const std::string &oid)
Definition: parsing.cpp:195
std::string string_join(const std::vector< std::string > &strs, char delim)
Definition: parsing.cpp:178