Botan 3.0.0
Crypto and TLS for C&
charset.cpp
Go to the documentation of this file.
1/*
2* Character Set Handling
3* (C) 1999-2007,2021 Jack Lloyd
4*
5* Botan is released under the Simplified BSD License (see license.txt)
6*/
7
8#include <botan/internal/charset.h>
9#include <botan/internal/loadstor.h>
10#include <botan/exceptn.h>
11#include <sstream>
12
13namespace Botan {
14
15namespace {
16
17void append_utf8_for(std::string& s, uint32_t c)
18 {
19 if(c >= 0xD800 && c < 0xE000)
20 throw Decoding_Error("Invalid Unicode character");
21
22 if(c <= 0x7F)
23 {
24 const uint8_t b0 = static_cast<uint8_t>(c);
25 s.push_back(static_cast<char>(b0));
26 }
27 else if(c <= 0x7FF)
28 {
29 const uint8_t b0 = 0xC0 | static_cast<uint8_t>(c >> 6);
30 const uint8_t b1 = 0x80 | static_cast<uint8_t>(c & 0x3F);
31 s.push_back(static_cast<char>(b0));
32 s.push_back(static_cast<char>(b1));
33 }
34 else if(c <= 0xFFFF)
35 {
36 const uint8_t b0 = 0xE0 | static_cast<uint8_t>(c >> 12);
37 const uint8_t b1 = 0x80 | static_cast<uint8_t>((c >> 6) & 0x3F);
38 const uint8_t b2 = 0x80 | static_cast<uint8_t>(c & 0x3F);
39 s.push_back(static_cast<char>(b0));
40 s.push_back(static_cast<char>(b1));
41 s.push_back(static_cast<char>(b2));
42 }
43 else if(c <= 0x10FFFF)
44 {
45 const uint8_t b0 = 0xF0 | static_cast<uint8_t>(c >> 18);
46 const uint8_t b1 = 0x80 | static_cast<uint8_t>((c >> 12) & 0x3F);
47 const uint8_t b2 = 0x80 | static_cast<uint8_t>((c >> 6) & 0x3F);
48 const uint8_t b3 = 0x80 | static_cast<uint8_t>(c & 0x3F);
49 s.push_back(static_cast<char>(b0));
50 s.push_back(static_cast<char>(b1));
51 s.push_back(static_cast<char>(b2));
52 s.push_back(static_cast<char>(b3));
53 }
54 else
55 throw Decoding_Error("Invalid Unicode character");
56
57 }
58
59}
60
61std::string ucs2_to_utf8(const uint8_t ucs2[], size_t len)
62 {
63 if(len % 2 != 0)
64 throw Decoding_Error("Invalid length for UCS-2 string");
65
66 const size_t chars = len / 2;
67
68 std::string s;
69 for(size_t i = 0; i != chars; ++i)
70 {
71 const uint32_t c = load_be<uint16_t>(ucs2, i);
72 append_utf8_for(s, c);
73 }
74
75 return s;
76 }
77
78std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len)
79 {
80 if(len % 4 != 0)
81 throw Decoding_Error("Invalid length for UCS-4 string");
82
83 const size_t chars = len / 4;
84
85 std::string s;
86 for(size_t i = 0; i != chars; ++i)
87 {
88 const uint32_t c = load_be<uint32_t>(ucs4, i);
89 append_utf8_for(s, c);
90 }
91
92 return s;
93 }
94
95/*
96* Convert from ISO 8859-1 to UTF-8
97*/
98std::string latin1_to_utf8(const uint8_t chars[], size_t len)
99 {
100 std::string s;
101 for(size_t i = 0; i != len; ++i)
102 {
103 const uint32_t c = static_cast<uint8_t>(chars[i]);
104 append_utf8_for(s, c);
105 }
106 return s;
107 }
108
109std::string format_char_for_display(char c)
110 {
111 std::ostringstream oss;
112
113 oss << "'";
114
115 if(c == '\t')
116 { oss << "\\t"; }
117 else if(c == '\n')
118 { oss << "\\n"; }
119 else if(c == '\r')
120 { oss << "\\r"; }
121 else if(static_cast<unsigned char>(c) >= 128)
122 {
123 unsigned char z = static_cast<unsigned char>(c);
124 oss << "\\x" << std::hex << std::uppercase << static_cast<int>(z);
125 }
126 else
127 { oss << c; }
128
129 oss << "'";
130
131 return oss.str();
132 }
133
134}
135
Definition: alg_id.cpp:12
std::string format_char_for_display(char c)
Definition: charset.cpp:109
std::string ucs2_to_utf8(const uint8_t ucs2[], size_t len)
Definition: charset.cpp:61
constexpr uint32_t load_be< uint32_t >(const uint8_t in[], size_t off)
Definition: loadstor.h:190
constexpr uint16_t load_be< uint16_t >(const uint8_t in[], size_t off)
Definition: loadstor.h:150
std::string latin1_to_utf8(const uint8_t chars[], size_t len)
Definition: charset.cpp:98
std::string ucs4_to_utf8(const uint8_t ucs4[], size_t len)
Definition: charset.cpp:78