|
|
|
|
/* Copyright (C) 2011 <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
*
|
|
|
|
|
* <EFBFBD><EFBFBD><EFBFBD><EFBFBD>һ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>Դ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ɵ<EFBFBD><EFBFBD>ĺͷ<EFBFBD><EFBFBD><EFBFBD>.
|
|
|
|
|
* <EFBFBD><EFBFBD>ֹ<EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ҵ<EFBFBD><EFBFBD>;.
|
|
|
|
|
*
|
|
|
|
|
* <EFBFBD><EFBFBD>ϵԭ<EFBFBD><EFBFBD><EFBFBD><EFBFBD>: querw@sina.com
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
// ATW.h: interface for the CBase64 class.
|
|
|
|
|
// by Ted.Que - Que's C++ Studio
|
|
|
|
|
// 2010-11-12
|
|
|
|
|
// ת<><D7AA><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
#include "pch.h"
|
|
|
|
|
#include "ATW.h"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
std::string __do_w_to_a_utf8(const wchar_t* pwszText, UINT uCodePage)
|
|
|
|
|
{
|
|
|
|
|
// <20><>ָ<EFBFBD><D6B8><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
if (pwszText == NULL) return "";
|
|
|
|
|
|
|
|
|
|
// <20><EFBFBD><DEB7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>Ҫ<EFBFBD>ij<EFBFBD><C4B3><EFBFBD>.
|
|
|
|
|
int nNeedSize = WideCharToMultiByte(uCodePage, 0, pwszText, -1, NULL, 0, NULL, NULL);
|
|
|
|
|
if (0 == nNeedSize) return "";
|
|
|
|
|
|
|
|
|
|
// <20><><EFBFBD><EFBFBD><EFBFBD>ռ<EFBFBD>,ת<><D7AA>.
|
|
|
|
|
char* pRet = new char[nNeedSize + 1]; // <20><>Ȼ<EFBFBD><C8BB><EFBFBD><EFBFBD>WideCharToMultiByte<74>ij<EFBFBD><C4B3><EFBFBD><EFBFBD>ǰ<EFBFBD><C7B0><EFBFBD> null <20>ַ<EFBFBD><D6B7>ij<EFBFBD><C4B3><EFBFBD>, <20><><EFBFBD>Ƕ<EFBFBD>+һ<><D2BB><EFBFBD>ַ<EFBFBD>.
|
|
|
|
|
memset(pRet, 0, nNeedSize + 1);
|
|
|
|
|
|
|
|
|
|
std::string strRet("");
|
|
|
|
|
if (0 == WideCharToMultiByte(uCodePage, 0, pwszText, -1, pRet, nNeedSize, NULL, NULL))
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
strRet = pRet;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
delete[]pRet;
|
|
|
|
|
return strRet;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::wstring __do_a_utf8_to_w(const char* pszText, UINT uCodePage)
|
|
|
|
|
{
|
|
|
|
|
// <20><>ָ<EFBFBD><D6B8>
|
|
|
|
|
if (pszText == NULL) return L"";
|
|
|
|
|
|
|
|
|
|
// <20><><EFBFBD>㳤<EFBFBD><E3B3A4>
|
|
|
|
|
int nNeedSize = MultiByteToWideChar(uCodePage, 0, pszText, -1, NULL, 0);
|
|
|
|
|
if (0 == nNeedSize) return L"";
|
|
|
|
|
|
|
|
|
|
// <20><><EFBFBD><EFBFBD><EFBFBD>ռ<EFBFBD>,ת<><D7AA>
|
|
|
|
|
std::wstring strRet(L"");
|
|
|
|
|
wchar_t* pRet = new wchar_t[nNeedSize + 1];
|
|
|
|
|
memset(pRet, 0, (nNeedSize + 1) * sizeof(wchar_t));
|
|
|
|
|
if (0 == MultiByteToWideChar(uCodePage, 0, pszText, -1, pRet, nNeedSize))
|
|
|
|
|
{
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
strRet = pRet;
|
|
|
|
|
}
|
|
|
|
|
delete[]pRet;
|
|
|
|
|
return strRet;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string WtoA(const std::wstring& strText)
|
|
|
|
|
{
|
|
|
|
|
return __do_w_to_a_utf8(strText.c_str(), CP_ACP);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string WtoA(const wchar_t* pwszText)
|
|
|
|
|
{
|
|
|
|
|
return __do_w_to_a_utf8(pwszText, CP_ACP);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::wstring AtoW(const std::string& strText)
|
|
|
|
|
{
|
|
|
|
|
return __do_a_utf8_to_w(strText.c_str(), CP_ACP);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::wstring AtoW(const char* pszText)
|
|
|
|
|
{
|
|
|
|
|
return __do_a_utf8_to_w(pszText, CP_ACP);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string WtoUTF8(const std::wstring& strText)
|
|
|
|
|
{
|
|
|
|
|
return __do_w_to_a_utf8(strText.c_str(), CP_UTF8);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string WtoUTF8(const wchar_t* pwszText)
|
|
|
|
|
{
|
|
|
|
|
return __do_w_to_a_utf8(pwszText, CP_UTF8);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::wstring UTF8toW(const std::string& strText)
|
|
|
|
|
{
|
|
|
|
|
return __do_a_utf8_to_w(strText.c_str(), CP_UTF8);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::wstring UTF8toW(const char* pszText)
|
|
|
|
|
{
|
|
|
|
|
return __do_a_utf8_to_w(pszText, CP_UTF8);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string UTF8toA(const std::string& src)
|
|
|
|
|
{
|
|
|
|
|
return WtoA(UTF8toW(src));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string UTF8toA(const char* src)
|
|
|
|
|
{
|
|
|
|
|
return WtoA(UTF8toW(src));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string AtoUTF8(const std::string& src)
|
|
|
|
|
{
|
|
|
|
|
return WtoUTF8(AtoW(src));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
std::string AtoUTF8(const char* src)
|
|
|
|
|
{
|
|
|
|
|
return WtoUTF8(AtoW(src));
|
|
|
|
|
}
|
|
|
|
|
/*
|
|
|
|
|
UTF-8 <EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>6<EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>
|
|
|
|
|
|
|
|
|
|
1<EFBFBD>ֽ<EFBFBD> 0xxxxxxx
|
|
|
|
|
2<EFBFBD>ֽ<EFBFBD> 110xxxxx 10xxxxxx
|
|
|
|
|
3<EFBFBD>ֽ<EFBFBD> 1110xxxx 10xxxxxx 10xxxxxx
|
|
|
|
|
4<EFBFBD>ֽ<EFBFBD> 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
|
|
|
5<EFBFBD>ֽ<EFBFBD> 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
|
|
|
6<EFBFBD>ֽ<EFBFBD> 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
|
|
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
// <20><><EFBFBD><EFBFBD>ֵ˵<D6B5><CBB5>:
|
|
|
|
|
// 0 -> <20><><EFBFBD><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>UTF-8<><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
|
|
|
|
// -1 -> <20><><EFBFBD><EFBFBD>Ƿ<EFBFBD><C7B7><EFBFBD>UTF-8<><38><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>
|
|
|
|
|
// -2 -> <20><><EFBFBD><EFBFBD>Ƿ<EFBFBD><C7B7><EFBFBD>UTF-8<>ֽڱ<D6BD><DAB1><EFBFBD><EFBFBD>ĺ<EFBFBD><C4BA><EFBFBD><EFBFBD>ֽ<EFBFBD>.
|
|
|
|
|
|
|
|
|
|
int IsTextUTF8(const char* pszSrc)
|
|
|
|
|
{
|
|
|
|
|
const unsigned char* puszSrc = (const unsigned char*)pszSrc; // һ<><D2BB>Ҫ<EFBFBD><EFBFBD><DEB7>ŵ<EFBFBD>,<2C>з<EFBFBD><D0B7>ŵıȽϾͲ<CFBE><CDB2><EFBFBD>ȷ<EFBFBD><C8B7>.
|
|
|
|
|
// <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>û<EFBFBD><C3BB>BOM<4F><4D>ʾ EF BB BF
|
|
|
|
|
if (puszSrc[0] != 0 && puszSrc[0] == 0xEF &&
|
|
|
|
|
puszSrc[1] != 0 && puszSrc[1] == 0xBB &&
|
|
|
|
|
puszSrc[2] != 0 && puszSrc[2] == 0xBF)
|
|
|
|
|
{
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// <20><><EFBFBD><EFBFBD>û<EFBFBD><C3BB> BOM<4F><4D>ʶ
|
|
|
|
|
bool bIsNextByte = false;
|
|
|
|
|
int nBytes = 0; // <20><>¼һ<C2BC><D2BB><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD>UTF8<46><38><EFBFBD><EFBFBD><EFBFBD>Ѿ<EFBFBD>ռ<EFBFBD><D5BC><EFBFBD>˼<EFBFBD><CBBC><EFBFBD><EFBFBD>ֽ<EFBFBD>.
|
|
|
|
|
const unsigned char* pCur = (const unsigned char*)pszSrc; // ָ<><D6B8><EFBFBD>α<EFBFBD><CEB1><EFBFBD><EFBFBD><EFBFBD><DEB7><EFBFBD><EFBFBD>ַ<EFBFBD><D6B7><EFBFBD>. <20><>Ϊ<EFBFBD><CEAA>λΪ1, <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> char <20><>, <20><><EFBFBD><EFBFBD>Ϊ<EFBFBD><CEAA><EFBFBD><EFBFBD>,<2C><><EFBFBD><EFBFBD><EFBFBD>ڱ<EFBFBD><DAB1><EFBFBD>ʱ<EFBFBD><CAB1><EFBFBD>ıȽϲ<C8BD><CFB2><EFBFBD>.
|
|
|
|
|
|
|
|
|
|
while (pCur[0] != 0)
|
|
|
|
|
{
|
|
|
|
|
if (!bIsNextByte)
|
|
|
|
|
{
|
|
|
|
|
bIsNextByte = true;
|
|
|
|
|
if ((pCur[0] >> 7) == 0)
|
|
|
|
|
{
|
|
|
|
|
bIsNextByte = false; nBytes = 1; bIsNextByte = false;
|
|
|
|
|
} // <20><><EFBFBD><EFBFBD>λΪ0, ANSI <20><><EFBFBD>ݵ<EFBFBD>.
|
|
|
|
|
else if ((pCur[0] >> 5) == 0x06)
|
|
|
|
|
{
|
|
|
|
|
nBytes = 2;
|
|
|
|
|
} // <20><><EFBFBD><EFBFBD>5λ<35><CEBB><EFBFBD><EFBFBD> 110 -> 2<>ֽڱ<D6BD><DAB1><EFBFBD><EFBFBD><EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>
|
|
|
|
|
else if ((pCur[0] >> 4) == 0x0E)
|
|
|
|
|
{
|
|
|
|
|
nBytes = 3;
|
|
|
|
|
} // <20><><EFBFBD><EFBFBD>4λ<34><CEBB><EFBFBD><EFBFBD> 1110 -> 3<>ֽڱ<D6BD><DAB1><EFBFBD><EFBFBD><EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>
|
|
|
|
|
else if ((pCur[0] >> 3) == 0x1E)
|
|
|
|
|
{
|
|
|
|
|
nBytes = 4;
|
|
|
|
|
} // <20><><EFBFBD><EFBFBD>3λ<33><CEBB><EFBFBD><EFBFBD> 11110 -> 4<>ֽڱ<D6BD><DAB1><EFBFBD><EFBFBD><EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>
|
|
|
|
|
else if ((pCur[0] >> 2) == 0x3E)
|
|
|
|
|
{
|
|
|
|
|
nBytes = 5;
|
|
|
|
|
} // <20><><EFBFBD><EFBFBD>2λ<32><CEBB><EFBFBD><EFBFBD> 111110 -> 5<>ֽڱ<D6BD><DAB1><EFBFBD><EFBFBD><EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>
|
|
|
|
|
else if ((pCur[0] >> 1) == 0x7E)
|
|
|
|
|
{
|
|
|
|
|
nBytes = 6;
|
|
|
|
|
} // <20><><EFBFBD><EFBFBD>1λ<31><CEBB><EFBFBD><EFBFBD> 1111110 -> 6<>ֽڱ<D6BD><DAB1><EFBFBD><EFBFBD><EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
nBytes = -1; // <20>Ƿ<EFBFBD><C7B7><EFBFBD>UTF8<46>ַ<EFBFBD><D6B7><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
if ((pCur[0] >> 6) == 0x02) // <20><><EFBFBD><EFBFBD>,<2C><><EFBFBD><EFBFBD><EFBFBD>ֽڱ<D6BD><DAB1><EFBFBD><EFBFBD><EFBFBD> 10xxx <20><>ͷ
|
|
|
|
|
{
|
|
|
|
|
nBytes--;
|
|
|
|
|
if (nBytes == 1) bIsNextByte = false; // <20><> nBytes = 1ʱ, ˵<><CBB5><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD>ֽ<EFBFBD>Ӧ<EFBFBD><D3A6><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>ֽ<EFBFBD>.
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
{
|
|
|
|
|
nBytes = -2;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
// <20><><EFBFBD><EFBFBD>һ<EFBFBD><D2BB><EFBFBD>ַ<EFBFBD>
|
|
|
|
|
pCur++;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (nBytes == 1) return 0;
|
|
|
|
|
else return nBytes;
|
|
|
|
|
}
|