|
|
|
@ -116,18 +116,119 @@ std::string ReplaceAll(std::string result, const std::string& src, const std::st |
|
|
|
} |
|
|
|
|
|
|
|
std::string UTF16ToUTF8(std::u16string_view input) { |
|
|
|
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert; |
|
|
|
return convert.to_bytes(input.data(), input.data() + input.size()); |
|
|
|
std::string result; |
|
|
|
result.reserve(input.size()); |
|
|
|
for (size_t i = 0; i < input.size(); ++i) { |
|
|
|
uint32_t codepoint = input[i]; |
|
|
|
// Handle surrogate pairs
|
|
|
|
if (codepoint >= 0xD800 && codepoint <= 0xDBFF) { |
|
|
|
if (i + 1 < input.size()) { |
|
|
|
uint32_t low = input[i + 1]; |
|
|
|
if (low >= 0xDC00 && low <= 0xDFFF) { |
|
|
|
codepoint = ((codepoint - 0xD800) << 10) + (low - 0xDC00) + 0x10000; |
|
|
|
++i; |
|
|
|
} |
|
|
|
} |
|
|
|
} |
|
|
|
if (codepoint <= 0x7F) { |
|
|
|
result.push_back(static_cast<char>(codepoint)); |
|
|
|
} else if (codepoint <= 0x7FF) { |
|
|
|
result.push_back(static_cast<char>(0xC0 | (codepoint >> 6))); |
|
|
|
result.push_back(static_cast<char>(0x80 | (codepoint & 0x3F))); |
|
|
|
} else if (codepoint <= 0xFFFF) { |
|
|
|
result.push_back(static_cast<char>(0xE0 | (codepoint >> 12))); |
|
|
|
result.push_back(static_cast<char>(0x80 | ((codepoint >> 6) & 0x3F))); |
|
|
|
result.push_back(static_cast<char>(0x80 | (codepoint & 0x3F))); |
|
|
|
} else { |
|
|
|
result.push_back(static_cast<char>(0xF0 | (codepoint >> 18))); |
|
|
|
result.push_back(static_cast<char>(0x80 | ((codepoint >> 12) & 0x3F))); |
|
|
|
result.push_back(static_cast<char>(0x80 | ((codepoint >> 6) & 0x3F))); |
|
|
|
result.push_back(static_cast<char>(0x80 | (codepoint & 0x3F))); |
|
|
|
} |
|
|
|
} |
|
|
|
return result; |
|
|
|
} |
|
|
|
|
|
|
|
std::u16string UTF8ToUTF16(std::string_view input) { |
|
|
|
std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> convert; |
|
|
|
return convert.from_bytes(input.data(), input.data() + input.size()); |
|
|
|
std::u16string result; |
|
|
|
size_t i = 0; |
|
|
|
while (i < input.size()) { |
|
|
|
uint32_t codepoint = 0; |
|
|
|
unsigned char c = input[i]; |
|
|
|
size_t extra = 0; |
|
|
|
if ((c & 0x80) == 0) { |
|
|
|
codepoint = c; |
|
|
|
extra = 0; |
|
|
|
} else if ((c & 0xE0) == 0xC0) { |
|
|
|
codepoint = c & 0x1F; |
|
|
|
extra = 1; |
|
|
|
} else if ((c & 0xF0) == 0xE0) { |
|
|
|
codepoint = c & 0x0F; |
|
|
|
extra = 2; |
|
|
|
} else if ((c & 0xF8) == 0xF0) { |
|
|
|
codepoint = c & 0x07; |
|
|
|
extra = 3; |
|
|
|
} else { |
|
|
|
// Invalid UTF-8
|
|
|
|
++i; |
|
|
|
continue; |
|
|
|
} |
|
|
|
if (i + extra >= input.size()) break; |
|
|
|
for (size_t j = 1; j <= extra; ++j) { |
|
|
|
if ((input[i + j] & 0xC0) != 0x80) { |
|
|
|
codepoint = 0xFFFD; |
|
|
|
break; |
|
|
|
} |
|
|
|
codepoint = (codepoint << 6) | (input[i + j] & 0x3F); |
|
|
|
} |
|
|
|
if (codepoint <= 0xFFFF) { |
|
|
|
result.push_back(static_cast<char16_t>(codepoint)); |
|
|
|
} else { |
|
|
|
codepoint -= 0x10000; |
|
|
|
result.push_back(static_cast<char16_t>(0xD800 + (codepoint >> 10))); |
|
|
|
result.push_back(static_cast<char16_t>(0xDC00 + (codepoint & 0x3FF))); |
|
|
|
} |
|
|
|
i += extra + 1; |
|
|
|
} |
|
|
|
return result; |
|
|
|
} |
|
|
|
|
|
|
|
std::u32string UTF8ToUTF32(std::string_view input) { |
|
|
|
std::wstring_convert<std::codecvt_utf8<char32_t>, char32_t> convert; |
|
|
|
return convert.from_bytes(input.data(), input.data() + input.size()); |
|
|
|
std::u32string result; |
|
|
|
size_t i = 0; |
|
|
|
while (i < input.size()) { |
|
|
|
uint32_t codepoint = 0; |
|
|
|
unsigned char c = input[i]; |
|
|
|
size_t extra = 0; |
|
|
|
if ((c & 0x80) == 0) { |
|
|
|
codepoint = c; |
|
|
|
extra = 0; |
|
|
|
} else if ((c & 0xE0) == 0xC0) { |
|
|
|
codepoint = c & 0x1F; |
|
|
|
extra = 1; |
|
|
|
} else if ((c & 0xF0) == 0xE0) { |
|
|
|
codepoint = c & 0x0F; |
|
|
|
extra = 2; |
|
|
|
} else if ((c & 0xF8) == 0xF0) { |
|
|
|
codepoint = c & 0x07; |
|
|
|
extra = 3; |
|
|
|
} else { |
|
|
|
// Invalid UTF-8
|
|
|
|
++i; |
|
|
|
continue; |
|
|
|
} |
|
|
|
if (i + extra >= input.size()) break; |
|
|
|
for (size_t j = 1; j <= extra; ++j) { |
|
|
|
if ((input[i + j] & 0xC0) != 0x80) { |
|
|
|
codepoint = 0xFFFD; |
|
|
|
break; |
|
|
|
} |
|
|
|
codepoint = (codepoint << 6) | (input[i + j] & 0x3F); |
|
|
|
} |
|
|
|
result.push_back(codepoint); |
|
|
|
i += extra + 1; |
|
|
|
} |
|
|
|
return result; |
|
|
|
} |
|
|
|
|
|
|
|
#ifdef _WIN32
|
|
|
|
|