You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

215 lines
4.5 KiB
C++

/* Copyright (C) 2011 <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
*
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Դ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ɵ<EFBFBD><EFBFBD>޸ĺͷ<EFBFBD><EFBFBD><EFBFBD>.
* <EFBFBD><EFBFBD>ֹ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ҵ<EFBFBD><EFBFBD>;.
*
* <EFBFBD><EFBFBD>ϵԭ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>: querw@sina.com
*/
// ATW.h: interface for the CBase64 class.
// by Ted.Que - Que's C++ Studio
// 2010-11-12
// ת<><D7AA><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD>
#include "pch.h"
#include "ATW.h"
std::string __do_w_to_a_utf8(const wchar_t* pwszText, UINT uCodePage)
{
// <20><>ָ<EFBFBD><D6B8><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
if (pwszText == NULL) return "";
// <20>޷<EFBFBD><DEB7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD>ij<EFBFBD><C4B3><EFBFBD>.
int nNeedSize = WideCharToMultiByte(uCodePage, 0, pwszText, -1, NULL, 0, NULL, NULL);
if (0 == nNeedSize) return "";
// <20><><EFBFBD><EFBFBD><EFBFBD>ռ<EFBFBD><><D7AA>.
char* pRet = new char[nNeedSize + 1]; // <20><>Ȼ<EFBFBD><C8BB><EFBFBD><EFBFBD>WideCharToMultiByte<74>ij<EFBFBD><C4B3><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0><EFBFBD> null <20>ַ<EFBFBD><D6B7>ij<EFBFBD><C4B3><EFBFBD>, <20><><EFBFBD>Ƕ<EFBFBD>+һ<><D2BB><EFBFBD>ַ<EFBFBD>.
memset(pRet, 0, nNeedSize + 1);
std::string strRet("");
if (0 == WideCharToMultiByte(uCodePage, 0, pwszText, -1, pRet, nNeedSize, NULL, NULL))
{
}
else
{
strRet = pRet;
}
delete[]pRet;
return strRet;
}
std::wstring __do_a_utf8_to_w(const char* pszText, UINT uCodePage)
{
// <20><>ָ<EFBFBD><D6B8>
if (pszText == NULL) return L"";
// <20><><EFBFBD><EFBFBD><E3B3A4>
int nNeedSize = MultiByteToWideChar(uCodePage, 0, pszText, -1, NULL, 0);
if (0 == nNeedSize) return L"";
// <20><><EFBFBD><EFBFBD><EFBFBD>ռ<EFBFBD><><D7AA>
std::wstring strRet(L"");
wchar_t* pRet = new wchar_t[nNeedSize + 1];
memset(pRet, 0, (nNeedSize + 1) * sizeof(wchar_t));
if (0 == MultiByteToWideChar(uCodePage, 0, pszText, -1, pRet, nNeedSize))
{
}
else
{
strRet = pRet;
}
delete[]pRet;
return strRet;
}
std::string WtoA(const std::wstring& strText)
{
return __do_w_to_a_utf8(strText.c_str(), CP_ACP);
}
std::string WtoA(const wchar_t* pwszText)
{
return __do_w_to_a_utf8(pwszText, CP_ACP);
}
std::wstring AtoW(const std::string& strText)
{
return __do_a_utf8_to_w(strText.c_str(), CP_ACP);
}
std::wstring AtoW(const char* pszText)
{
return __do_a_utf8_to_w(pszText, CP_ACP);
}
std::string WtoUTF8(const std::wstring& strText)
{
return __do_w_to_a_utf8(strText.c_str(), CP_UTF8);
}
std::string WtoUTF8(const wchar_t* pwszText)
{
return __do_w_to_a_utf8(pwszText, CP_UTF8);
}
std::wstring UTF8toW(const std::string& strText)
{
return __do_a_utf8_to_w(strText.c_str(), CP_UTF8);
}
std::wstring UTF8toW(const char* pszText)
{
return __do_a_utf8_to_w(pszText, CP_UTF8);
}
std::string UTF8toA(const std::string& src)
{
return WtoA(UTF8toW(src));
}
std::string UTF8toA(const char* src)
{
return WtoA(UTF8toW(src));
}
std::string AtoUTF8(const std::string& src)
{
return WtoUTF8(AtoW(src));
}
std::string AtoUTF8(const char* src)
{
return WtoUTF8(AtoW(src));
}
/*
UTF-8 <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>6<EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>
1<EFBFBD>ֽ<EFBFBD> 0xxxxxxx
2<EFBFBD>ֽ<EFBFBD> 110xxxxx 10xxxxxx
3<EFBFBD>ֽ<EFBFBD> 1110xxxx 10xxxxxx 10xxxxxx
4<EFBFBD>ֽ<EFBFBD> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
5<EFBFBD>ֽ<EFBFBD> 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
6<EFBFBD>ֽ<EFBFBD> 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
*/
// <20><><EFBFBD><EFBFBD>ֵ˵<D6B5><CBB5>:
// 0 -> <20><><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>UTF-8<><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
// -1 -> <20><><EFBFBD><EFBFBD>Ƿ<EFBFBD><C7B7><EFBFBD>UTF-8<><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>
// -2 -> <20><><EFBFBD><EFBFBD>Ƿ<EFBFBD><C7B7><EFBFBD>UTF-8<>ֽڱ<D6BD><DAB1><EFBFBD><EFBFBD>ĺ<EFBFBD><C4BA><EFBFBD><EFBFBD>ֽ<EFBFBD>.
int IsTextUTF8(const char* pszSrc)
{
const unsigned char* puszSrc = (const unsigned char*)pszSrc; // һ<><D2BB>Ҫ<EFBFBD>޷<EFBFBD><DEB7>ŵ<EFBFBD>,<2C>з<EFBFBD><D0B7>ŵıȽϾͲ<CFBE><CDB2><EFBFBD>ȷ<EFBFBD><C8B7>.
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>û<EFBFBD><C3BB>BOM<4F><4D>ʾ EF BB BF
if (puszSrc[0] != 0 && puszSrc[0] == 0xEF &&
puszSrc[1] != 0 && puszSrc[1] == 0xBB &&
puszSrc[2] != 0 && puszSrc[2] == 0xBF)
{
return 0;
}
// <20><><EFBFBD><EFBFBD>û<EFBFBD><C3BB> BOM<4F><4D>ʶ
bool bIsNextByte = false;
int nBytes = 0; // <20><>¼һ<C2BC><D2BB><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD>UTF8<46><38><EFBFBD><EFBFBD><EFBFBD>Ѿ<EFBFBD>ռ<EFBFBD><D5BC><EFBFBD>˼<EFBFBD><CBBC><EFBFBD><EFBFBD>ֽ<EFBFBD>.
const unsigned char* pCur = (const unsigned char*)pszSrc; // ָ<><D6B8><EFBFBD>α<EFBFBD><CEB1><EFBFBD><EFBFBD>޷<EFBFBD><DEB7><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD>. <20><>Ϊ<EFBFBD><CEAA>λΪ1, <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> char <20><>, <20><><EFBFBD><EFBFBD>Ϊ<EFBFBD><CEAA><EFBFBD><EFBFBD>,<2C><><EFBFBD><EFBFBD><EFBFBD>ڱ<EFBFBD><DAB1><EFBFBD>ʱ<EFBFBD><CAB1><EFBFBD>ıȽϲ<C8BD><CFB2><EFBFBD>.
while (pCur[0] != 0)
{
if (!bIsNextByte)
{
bIsNextByte = true;
if ((pCur[0] >> 7) == 0)
{
bIsNextByte = false; nBytes = 1; bIsNextByte = false;
} // <20><><EFBFBD><EFBFBD>λΪ0, ANSI <20><><EFBFBD>ݵ<EFBFBD>.
else if ((pCur[0] >> 5) == 0x06)
{
nBytes = 2;
} // <20><><EFBFBD><EFBFBD><35><CEBB><EFBFBD><EFBFBD> 110 -> 2<>ֽڱ<D6BD><DAB1><EFBFBD><EFBFBD><EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>
else if ((pCur[0] >> 4) == 0x0E)
{
nBytes = 3;
} // <20><><EFBFBD><EFBFBD><34><CEBB><EFBFBD><EFBFBD> 1110 -> 3<>ֽڱ<D6BD><DAB1><EFBFBD><EFBFBD><EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>
else if ((pCur[0] >> 3) == 0x1E)
{
nBytes = 4;
} // <20><><EFBFBD><EFBFBD><33><CEBB><EFBFBD><EFBFBD> 11110 -> 4<>ֽڱ<D6BD><DAB1><EFBFBD><EFBFBD><EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>
else if ((pCur[0] >> 2) == 0x3E)
{
nBytes = 5;
} // <20><><EFBFBD><EFBFBD><32><CEBB><EFBFBD><EFBFBD> 111110 -> 5<>ֽڱ<D6BD><DAB1><EFBFBD><EFBFBD><EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>
else if ((pCur[0] >> 1) == 0x7E)
{
nBytes = 6;
} // <20><><EFBFBD><EFBFBD><31><CEBB><EFBFBD><EFBFBD> 1111110 -> 6<>ֽڱ<D6BD><DAB1><EFBFBD><EFBFBD><EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>
else
{
nBytes = -1; // <20>Ƿ<EFBFBD><C7B7><EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>
break;
}
}
else
{
if ((pCur[0] >> 6) == 0x02) // <20><><EFBFBD><EFBFBD>,<2C><><EFBFBD><EFBFBD><EFBFBD>ֽڱ<D6BD><DAB1><EFBFBD><EFBFBD><EFBFBD> 10xxx <20><>ͷ
{
nBytes--;
if (nBytes == 1) bIsNextByte = false; // <20><> nBytes = 1ʱ, ˵<><CBB5><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD>ֽ<EFBFBD>Ӧ<EFBFBD><D3A6><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>.
}
else
{
nBytes = -2;
break;
}
}
// <20><><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD>ַ<EFBFBD>
pCur++;
}
if (nBytes == 1) return 0;
else return nBytes;
}