Botan 3.12.0
Crypto and TLS for C&
charset.h
Go to the documentation of this file.
1/*
2* Character Set Conversions
3* (C) 1999-2007 Jack Lloyd
4*
5* Botan is released under the Simplified BSD License (see license.txt)
6*/
7
8#ifndef BOTAN_CHARSET_H_
9#define BOTAN_CHARSET_H_
10
11#include <botan/types.h>
12#include <string>
13#include <string_view>
14#include <vector>
15
16namespace Botan {
17
18// TODO convert these to take arguments as spans or std::string_view
19
20/**
21* Convert a sequence of UCS-2 (big endian) characters to a UTF-8 string
22* This is used for ASN.1 BMPString type
23* @param ucs2 the sequence of UCS-2 characters
24* @param len length of ucs2 in bytes, must be a multiple of 2
25*/
26BOTAN_TEST_API std::string ucs2_to_utf8(const uint8_t ucs2[], size_t len);
27
28/**
29 * Convert a UTF-8 string to a sequence of UCS-2 (big endian) characters
30 * This is used for ASN.1 BMPString type
31 * @param utf8 the UTF-8 string
32 * @return a vector of bytes containing the UCS-2 (big endian) encoding
33 * @throws Decoding_Error if the input is not valid UTF-8 (including overlong encodings,
34 * surrogate code points, or values outside Unicode), or if a code point exceeds
35 * U+FFFF and cannot be represented in UCS-2
36 */
37BOTAN_TEST_API std::vector<uint8_t> utf8_to_ucs2(const std::string& utf8);
38
39/**
40* Convert a sequence of UCS-4 (big endian) characters to a UTF-8 string
41* This is used for ASN.1 UniversalString type
42* @param ucs4 the sequence of UCS-4 characters
43* @param len length of ucs4 in bytes, must be a multiple of 4
44*/
45BOTAN_TEST_API std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len);
46
47/**
48 * Convert a UTF-8 string to a sequence of UCS-4 (big endian) characters
49 * This is used for ASN.1 UniversalString type
50 * @param utf8 the UTF-8 string
51 * @return a vector of bytes containing the UCS-4 (big endian) encoding
52 * @throws Decoding_Error if the input is not valid UTF-8 (including overlong encodings,
53 * surrogate code points, or values outside the Unicode scalar value range U+0000..U+10FFFF)
54 */
55BOTAN_TEST_API std::vector<uint8_t> utf8_to_ucs4(const std::string& utf8);
56
57BOTAN_TEST_API std::string latin1_to_utf8(const uint8_t latin1[], size_t len);
58
59/**
60* Return true if this string seems to contain a valid sequence of UTF-8
61*/
62bool is_valid_utf8(const std::string& str);
63
64/**
65* Return a string containing 'c', quoted and possibly escaped
66*
67* This is used when creating an error message nothing an invalid character
68* in some codex (for example during hex decoding)
69*
70* Currently this function escapes tab, newlines and carriage return
71* as "\t", "\n", and "\r", and also escapes characters > 0x7F as
72* "\xHH" where HH is the hex code.
73*/
74std::string format_char_for_display(char c);
75
76} // namespace Botan
77
78#endif
#define BOTAN_TEST_API
Definition api.h:41
std::string format_char_for_display(char c)
Definition charset.cpp:197
std::vector< uint8_t > utf8_to_ucs4(const std::string &utf8)
Definition charset.cpp:169
bool is_valid_utf8(const std::string &utf8)
Definition charset.cpp:106
std::string ucs2_to_utf8(const uint8_t ucs2[], size_t len)
Definition charset.cpp:119
std::string latin1_to_utf8(const uint8_t chars[], size_t len)
Definition charset.cpp:188
std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len)
Definition charset.cpp:153
std::vector< uint8_t > utf8_to_ucs2(const std::string &utf8)
Definition charset.cpp:135