1 C++ UTF8编码下的全角字符转半角字符

如果输入的字符std::string是UTF8编码,因为UTF8字符串长度的不确定性,需要先将UTF8编码的字符串转换为宽字符Unicode编码的字符串,再进行全角到半角字符的转换,具体的转换过程参考以下代码:

#include <iostream>
#include <locale>
#include <codecvt>

std::wstring Std_UTF8ToUnicode(const std::string& utf8Str)
{
    std::wstring ret;
    try
    {
        std::wstring_convert< std::codecvt_utf8<wchar_t> > conv;
        ret = conv.from_bytes(utf8Str);
    }
    catch (const std::exception& e)
    {

    }
    return ret;
}

std::string Std_UnicodeToUTF8(const std::wstring& unicodeStr)
{
    std::string ret;
    try
    {
        std::wstring_convert< std::codecvt_utf8<wchar_t> > conv;
        ret = conv.to_bytes(unicodeStr);
    }
    catch (const std::exception& e)
    {

    }
    return ret;
}


static std::string DoubleByteCharToSingleByteChar_UTF8(const std::string& srcStr)
{
    // 先转成Unicode字符
    std::wstring srcStr_Uniocde = Std_UTF8ToUnicode(srcStr);
    std::wstring destSrt_Unicode = L"";

    std::string dstStr = "";
    int tempChar;
    int length = srcStr_Uniocde.length();

    for (int i = 0; i < length; i++)
    {
        tempChar = srcStr_Uniocde[i];
        if (tempChar == 12288)
        {
            tempChar = 32;
        }
        else if (tempChar >= 65281 && tempChar <= 65374)
        {
            tempChar -= 65248;
        }

        destSrt_Unicode += tempChar;

    }

    // 然后将处理过后的全角转半角字符Unicode转utf8
    dstStr = Std_UnicodeToUTF8(destSrt_Unicode);

    return dstStr;
}

int main()
{
    std::string input_str = u8"55555。你好。";

    std::string output_str = DoubleByteCharToSingleByteChar_UTF8(input_str);

    std::cout << output_str << std::endl;
}