[C++]中文Windows系统处理base64的正确方式
由于Visual Studio在中文系统下是用GB2312处理字符串,如果base64结果要跟其他语言,特别是网页开发的一般默认utf-8编码,就会出现乱码。
解决方法就是先转换文本的编码。最终结果使用这个在线工具验证通过
先看main函数
int main() { //这是WINDOWS API函数 LANGID lang = GetUserDefaultLangID(); cout << lang << endl; string strOriginal = "随意吧输入一定文字*()————abcdefg"; string strTemp; TextConvert convertor; // 对简体中文编码,从GB2312转为UTF8 // 东亚有些语言编码也会引起问题,需要用特殊的方法转换 if (lang == 0x804) { convertor.GB2312ToUTF_8(strTemp, strOriginal.c_str(), strOriginal.length()); } else { strTemp = strOriginal; } Base64Convert base64; string encode = base64.Encode((unsigned char*)strTemp.c_str(), strTemp.length()); cout << encode << endl; strTemp = base64.Decode(encode.c_str(), encode.length()); string decode; convertor.UTF_8ToGB2312(decode, strTemp.c_str(), strTemp.length()); cout << decode << endl; }
TextConvert.h文件
#pragma once #include <iostream> #include <string> #include <windows.h> using namespace std; class TextConvert { public: TextConvert(); ~TextConvert(); void UTF_8ToGB2312(string &pOut, const char *pText, int pLen);//utf_8转为gb2312 void GB2312ToUTF_8(string &pOut, const char *pText, int pLen); //gb2312 转utf_8 string UrlGB2312(char * str); //urlgb2312编码 string UrlUTF8(char * str); //urlutf8 编码 string UrlUTF8Decode(string str); //urlutf8解码 string UrlGB2312Decode(string str); //urlgb2312解码 private: void Gb2312ToUnicode(WCHAR* pOut, const char *gbBuffer); void UTF_8ToUnicode(WCHAR* pOut, const char *pText); void UnicodeToUTF_8(char* pOut, WCHAR* pText); void UnicodeToGB2312(char* pOut, WCHAR uData); char CharToInt(char ch); char StrToBin(char *str); };
TextConvert.cpp文件
#include "stdafx.h" #include "TextConvert.h" TextConvert::TextConvert() { } TextConvert::~TextConvert() { } void TextConvert::Gb2312ToUnicode(WCHAR* pOut, const char *gbBuffer) { ::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, gbBuffer, 2, pOut, 1); return; } void TextConvert::UTF_8ToUnicode(WCHAR* pOut, const char *pText) { char* uchar = (char *)pOut; uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F); uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F); return; } void TextConvert::UnicodeToUTF_8(char* pOut, WCHAR* pText) { // 注意 WCHAR高低字的顺序,低字节在前,高字节在后 char* pchar = (char *)pText; pOut[0] = (0xE0 | ((pchar[1] & 0xF0) >> 4)); pOut[1] = (0x80 | ((pchar[1] & 0x0F) << 2)) + ((pchar[0] & 0xC0) >> 6); pOut[2] = (0x80 | (pchar[0] & 0x3F)); return; } void TextConvert::UnicodeToGB2312(char* pOut, WCHAR uData) { WideCharToMultiByte(CP_ACP, NULL, &uData, 1, pOut, sizeof(WCHAR), NULL, NULL); return; } //做为解Url使用 char TextConvert::CharToInt(char ch) { if (ch >= '0' && ch <= '9')return (char)(ch - '0'); if (ch >= 'a' && ch <= 'f')return (char)(ch - 'a' + 10); if (ch >= 'A' && ch <= 'F')return (char)(ch - 'A' + 10); return -1; } char TextConvert::StrToBin(char *str) { char tempWord[2]; char chn; tempWord[0] = CharToInt(str[0]); //make the B to 11 -- 00001011 tempWord[1] = CharToInt(str[1]); //make the 0 to 0 -- 00000000 chn = (tempWord[0] << 4) | tempWord[1]; //to change the BO to 10110000 return chn; } //UTF_8 转gb2312 void TextConvert::UTF_8ToGB2312(string &pOut, const char *pText, int pLen) { char buf[4]; char* rst = new char[pLen + (pLen >> 2) + 2]; memset(buf, 0, 4); memset(rst, 0, pLen + (pLen >> 2) + 2); int i = 0; int j = 0; while (i < pLen) { if (*(pText + i) >= 0) { rst[j++] = pText[i++]; } else { WCHAR Wtemp; UTF_8ToUnicode(&Wtemp, pText + i); UnicodeToGB2312(buf, Wtemp); unsigned short int tmp = 0; tmp = rst[j] = buf[0]; tmp = rst[j + 1] = buf[1]; tmp = rst[j + 2] = buf[2]; //newBuf[j] = Ctemp[0]; //newBuf[j + 1] = Ctemp[1]; i += 3; j += 2; } } rst[j] = '\0'; pOut = rst; delete[]rst; } //GB2312 转为 UTF-8 void TextConvert::GB2312ToUTF_8(string& pOut, const char *pText, int pLen) { char buf[4]; memset(buf, 0, 4); pOut.clear(); int i = 0; while (i < pLen) { //如果是英文直接复制就可以 if (pText[i] >= 0) { char asciistr[2] = { 0 }; asciistr[0] = (pText[i++]); pOut.append(asciistr); } else { WCHAR pbuffer; Gb2312ToUnicode(&pbuffer, pText + i); UnicodeToUTF_8(buf, &pbuffer); pOut.append(buf); i += 2; } } return; } //把str编码为网页中的 GB2312 url encode ,英文不变,汉字双字节 如%3D%AE%88 string TextConvert::UrlGB2312(char * str) { string dd; size_t len = strlen(str); for (size_t i = 0; i<len; i++) { if (isalnum((BYTE)str[i])) { char tempbuff[2]; sprintf(tempbuff, "%c", str[i]); dd.append(tempbuff); } else if (isspace((BYTE)str[i])) { dd.append("+"); } else { char tempbuff[4]; sprintf(tempbuff, "%%%X%X", ((BYTE*)str)[i] >> 4, ((BYTE*)str)[i] % 16); dd.append(tempbuff); } } return dd; } //把str编码为网页中的 UTF-8 url encode ,英文不变,汉字三字节 如%3D%AE%88 string TextConvert::UrlUTF8(char * str) { string tt; string dd; GB2312ToUTF_8(tt, str, (int)strlen(str)); size_t len = tt.length(); for (size_t i = 0; i<len; i++) { if (isalnum((BYTE)tt.at(i))) { char tempbuff[2] = { 0 }; sprintf(tempbuff, "%c", (BYTE)tt.at(i)); dd.append(tempbuff); } else if (isspace((BYTE)tt.at(i))) { dd.append("+"); } else { char tempbuff[4]; sprintf(tempbuff, "%%%X%X", ((BYTE)tt.at(i)) >> 4, ((BYTE)tt.at(i)) % 16); dd.append(tempbuff); } } return dd; } //把url GB2312解码 string TextConvert::UrlGB2312Decode(string str) { string output = ""; char tmp[2]; int i = 0, idx = 0, len = str.length(); while (i<len) { if (str[i] == '%') { tmp[0] = str[i + 1]; tmp[1] = str[i + 2]; output += StrToBin(tmp); i = i + 3; } else if (str[i] == '+') { output += ' '; i++; } else { output += str[i]; i++; } } return output; } //把url utf8解码 string TextConvert::UrlUTF8Decode(string str) { string output = ""; string temp = UrlGB2312Decode(str);// UTF_8ToGB2312(output, (char *)temp.data(), strlen(temp.data())); return output; }
Base64Convert.h文件
#pragma once #include <string> using namespace std; /** * Base64 编码/解码 * @author liruixing */ class Base64Convert { private: std::string _base64_table; static const char base64_pad = '='; public: Base64Convert() { _base64_table = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; /*这是Base64编码使用的标准字典*/ } /** * 这里必须是unsigned类型,否则编码中文的时候出错 */ std::string Encode(const unsigned char * str, int bytes); std::string Decode(const char *str, int bytes); //void Debug(bool open = true); };
Base64Convert.cpp文件
#include "stdafx.h" #include <iostream> #include "Base64Convert.h" std::string Base64Convert::Encode(const unsigned char * str, int bytes) { int num = 0, bin = 0; std::string _encode_result; const unsigned char * current; current = str; while (bytes > 2) { _encode_result += _base64_table[current[0] >> 2]; _encode_result += _base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)]; _encode_result += _base64_table[((current[1] & 0x0f) << 2) + (current[2] >> 6)]; _encode_result += _base64_table[current[2] & 0x3f]; current += 3; bytes -= 3; } if (bytes > 0) { _encode_result += _base64_table[current[0] >> 2]; if (bytes % 3 == 1) { _encode_result += _base64_table[(current[0] & 0x03) << 4]; _encode_result += "=="; } else if (bytes % 3 == 2) { _encode_result += _base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)]; _encode_result += _base64_table[(current[1] & 0x0f) << 2]; _encode_result += "="; } } return _encode_result; } std::string Base64Convert::Decode(const char *str, int length) { //解码表 const char DecodeTable[] = { -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 62, -2, -2, -2, 63, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2, -2, -2, -2, -2, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -2, -2, -2, -2, -2, -2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2 }; int bin = 0, i = 0, pos = 0; std::string _decode_result; const char *current = str; char ch; while ((ch = *current++) != '\0' && length-- > 0) { if (ch == base64_pad) { // 当前一个字符是“=”号 /* 先说明一个概念:在解码时,4个字符为一组进行一轮字符匹配。 两个条件: 1、如果某一轮匹配的第二个是“=”且第三个字符不是“=”,说明这个带解析字符串不合法,直接返回空 2、如果当前“=”不是第二个字符,且后面的字符只包含空白符,则说明这个这个条件合法,可以继续。 */ if (*current != '=' && (i % 4) == 1) { return NULL; } continue; } ch = DecodeTable[ch]; //这个很重要,用来过滤所有不合法的字符 if (ch < 0) { /* a space or some other separator character, we simply skip over */ continue; } switch (i % 4) { case 0: bin = ch << 2; break; case 1: bin |= ch >> 4; _decode_result += bin; bin = (ch & 0x0f) << 4; break; case 2: bin |= ch >> 2; _decode_result += bin; bin = (ch & 0x03) << 6; break; case 3: bin |= ch; _decode_result += bin; break; } i++; } return _decode_result; }
注: 以上两个类来自不同的博文
发表评论