Botan 3.11.1
Crypto and TLS for C&
charset.h
Go to the documentation of this file.
1/*
2* Character Set Conversions
3* (C) 1999-2007 Jack Lloyd
4*
5* Botan is released under the Simplified BSD License (see license.txt)
6*/
7
8#ifndef BOTAN_CHARSET_H_
9#define BOTAN_CHARSET_H_
10
11#include <botan/types.h>
12#include <string>
13#include <vector>
14
15namespace Botan {
16
17/**
18* Convert a sequence of UCS-2 (big endian) characters to a UTF-8 string
19* This is used for ASN.1 BMPString type
20* @param ucs2 the sequence of UCS-2 characters
21* @param len length of ucs2 in bytes, must be a multiple of 2
22*/
23BOTAN_TEST_API std::string ucs2_to_utf8(const uint8_t ucs2[], size_t len);
24
25/**
26 * Convert a UTF-8 string to a sequence of UCS-2 (big endian) characters
27 * This is used for ASN.1 BMPString type
28 * @param utf8 the UTF-8 string
29 * @return a vector of bytes containing the UCS-2 (big endian) encoding
30 * @throws Decoding_Error if the input is not valid UTF-8 (including overlong encodings,
31 * surrogate code points, or values outside Unicode), or if a code point exceeds
32 * U+FFFF and cannot be represented in UCS-2
33 */
34BOTAN_TEST_API std::vector<uint8_t> utf8_to_ucs2(const std::string& utf8);
35
36/**
37* Convert a sequence of UCS-4 (big endian) characters to a UTF-8 string
38* This is used for ASN.1 UniversalString type
39* @param ucs4 the sequence of UCS-4 characters
40* @param len length of ucs4 in bytes, must be a multiple of 4
41*/
42BOTAN_TEST_API std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len);
43
44/**
45 * Convert a UTF-8 string to a sequence of UCS-4 (big endian) characters
46 * This is used for ASN.1 UniversalString type
47 * @param utf8 the UTF-8 string
48 * @return a vector of bytes containing the UCS-4 (big endian) encoding
49 * @throws Decoding_Error if the input is not valid UTF-8 (including overlong encodings,
50 * surrogate code points, or values outside the Unicode scalar value range U+0000..U+10FFFF)
51 */
52BOTAN_TEST_API std::vector<uint8_t> utf8_to_ucs4(const std::string& utf8);
53
54BOTAN_TEST_API std::string latin1_to_utf8(const uint8_t latin1[], size_t len);
55
56/**
57* Return a string containing 'c', quoted and possibly escaped
58*
59* This is used when creating an error message nothing an invalid character
60* in some codex (for example during hex decoding)
61*
62* Currently this function escapes tab, newlines and carriage return
63* as "\t", "\n", and "\r", and also escapes characters > 0x7F as
64* "\xHH" where HH is the hex code.
65*/
66std::string format_char_for_display(char c);
67
68} // namespace Botan
69
70#endif
#define BOTAN_TEST_API
Definition api.h:41
std::string format_char_for_display(char c)
Definition charset.cpp:184
std::vector< uint8_t > utf8_to_ucs4(const std::string &utf8)
Definition charset.cpp:156
std::string ucs2_to_utf8(const uint8_t ucs2[], size_t len)
Definition charset.cpp:106
std::string latin1_to_utf8(const uint8_t chars[], size_t len)
Definition charset.cpp:175
std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len)
Definition charset.cpp:140
std::vector< uint8_t > utf8_to_ucs2(const std::string &utf8)
Definition charset.cpp:122