[C++]中文Windows系统处理base64的正确方式

由于Visual Studio在中文系统下是用GB2312处理字符串,如果base64结果要跟其他语言,特别是网页开发的一般默认utf-8编码,就会出现乱码。
解决方法就是先转换文本的编码。最终结果使用这个在线工具验证通过
先看main函数

int main()
{
	//这是WINDOWS API函数
	LANGID lang = GetUserDefaultLangID();
	cout << lang << endl;

	string strOriginal = "随意吧输入一定文字*()————abcdefg";
	string strTemp;
	TextConvert convertor;
	// 对简体中文编码,从GB2312转为UTF8
	// 东亚有些语言编码也会引起问题,需要用特殊的方法转换
	if (lang == 0x804) {
		convertor.GB2312ToUTF_8(strTemp, strOriginal.c_str(), strOriginal.length());
	}
	else {
		strTemp = strOriginal;
	}
	Base64Convert base64;
	string encode = base64.Encode((unsigned char*)strTemp.c_str(), strTemp.length());
	cout  << encode << endl;
	
	strTemp = base64.Decode(encode.c_str(), encode.length());
	string decode;
	convertor.UTF_8ToGB2312(decode, strTemp.c_str(), strTemp.length());
	cout << decode << endl;
}

TextConvert.h文件

#pragma once
#include <iostream>
#include <string>
#include <windows.h>
using namespace std;

class TextConvert
{
public:
	TextConvert();
	~TextConvert();

	void UTF_8ToGB2312(string &pOut, const char *pText, int pLen);//utf_8转为gb2312
	void GB2312ToUTF_8(string &pOut, const char *pText, int pLen); //gb2312 转utf_8
	string UrlGB2312(char * str);                           //urlgb2312编码
	string UrlUTF8(char * str);                             //urlutf8 编码
	string UrlUTF8Decode(string str);                  //urlutf8解码
	string UrlGB2312Decode(string str);                //urlgb2312解码

private:
	void Gb2312ToUnicode(WCHAR* pOut, const char *gbBuffer);
	void UTF_8ToUnicode(WCHAR* pOut, const char *pText);
	void UnicodeToUTF_8(char* pOut, WCHAR* pText);
	void UnicodeToGB2312(char* pOut, WCHAR uData);
	char  CharToInt(char ch);
	char StrToBin(char *str);
};

TextConvert.cpp文件

#include "stdafx.h"
#include "TextConvert.h"


TextConvert::TextConvert()
{
}


TextConvert::~TextConvert()
{
}

void TextConvert::Gb2312ToUnicode(WCHAR* pOut, const char *gbBuffer)
{
	::MultiByteToWideChar(CP_ACP, MB_PRECOMPOSED, gbBuffer, 2, pOut, 1);
	return;
}
void TextConvert::UTF_8ToUnicode(WCHAR* pOut, const char *pText)
{
	char* uchar = (char *)pOut;

	uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F);
	uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F);

	return;
}

void TextConvert::UnicodeToUTF_8(char* pOut, WCHAR* pText)
{
	// 注意 WCHAR高低字的顺序,低字节在前,高字节在后
	char* pchar = (char *)pText;

	pOut[0] = (0xE0 | ((pchar[1] & 0xF0) >> 4));
	pOut[1] = (0x80 | ((pchar[1] & 0x0F) << 2)) + ((pchar[0] & 0xC0) >> 6);
	pOut[2] = (0x80 | (pchar[0] & 0x3F));

	return;
}
void TextConvert::UnicodeToGB2312(char* pOut, WCHAR uData)
{
	WideCharToMultiByte(CP_ACP, NULL, &uData, 1, pOut, sizeof(WCHAR), NULL, NULL);
	return;
}

//做为解Url使用
char TextConvert::CharToInt(char ch) {
	if (ch >= '0' && ch <= '9')return (char)(ch - '0');
	if (ch >= 'a' && ch <= 'f')return (char)(ch - 'a' + 10);
	if (ch >= 'A' && ch <= 'F')return (char)(ch - 'A' + 10);
	return -1;
}
char TextConvert::StrToBin(char *str) {
	char tempWord[2];
	char chn;

	tempWord[0] = CharToInt(str[0]);                         //make the B to 11 -- 00001011
	tempWord[1] = CharToInt(str[1]);                         //make the 0 to 0  -- 00000000

	chn = (tempWord[0] << 4) | tempWord[1];                //to change the BO to 10110000

	return chn;
}


//UTF_8 转gb2312
void TextConvert::UTF_8ToGB2312(string &pOut, const char *pText, int pLen)
{
	char buf[4];
	char* rst = new char[pLen + (pLen >> 2) + 2];
	memset(buf, 0, 4);
	memset(rst, 0, pLen + (pLen >> 2) + 2);

	int i = 0;
	int j = 0;

	while (i < pLen)
	{
		if (*(pText + i) >= 0)
		{

			rst[j++] = pText[i++];
		}
		else
		{
			WCHAR Wtemp;


			UTF_8ToUnicode(&Wtemp, pText + i);

			UnicodeToGB2312(buf, Wtemp);

			unsigned short int tmp = 0;
			tmp = rst[j] = buf[0];
			tmp = rst[j + 1] = buf[1];
			tmp = rst[j + 2] = buf[2];

			//newBuf[j] = Ctemp[0];
			//newBuf[j + 1] = Ctemp[1];

			i += 3;
			j += 2;
		}

	}
	rst[j] = '\0';
	pOut = rst;
	delete[]rst;
}

//GB2312 转为 UTF-8
void TextConvert::GB2312ToUTF_8(string& pOut, const char *pText, int pLen)
{
	char buf[4];
	memset(buf, 0, 4);

	pOut.clear();

	int i = 0;
	while (i < pLen)
	{
		//如果是英文直接复制就可以
		if (pText[i] >= 0)
		{
			char asciistr[2] = { 0 };
			asciistr[0] = (pText[i++]);
			pOut.append(asciistr);
		}
		else
		{
			WCHAR pbuffer;
			Gb2312ToUnicode(&pbuffer, pText + i);

			UnicodeToUTF_8(buf, &pbuffer);

			pOut.append(buf);

			i += 2;
		}
	}

	return;
}
//把str编码为网页中的 GB2312 url encode ,英文不变,汉字双字节  如%3D%AE%88
string TextConvert::UrlGB2312(char * str)
{
	string dd;
	size_t len = strlen(str);
	for (size_t i = 0; i<len; i++)
	{
		if (isalnum((BYTE)str[i]))
		{
			char tempbuff[2];
			sprintf(tempbuff, "%c", str[i]);
			dd.append(tempbuff);
		}
		else if (isspace((BYTE)str[i]))
		{
			dd.append("+");
		}
		else
		{
			char tempbuff[4];
			sprintf(tempbuff, "%%%X%X", ((BYTE*)str)[i] >> 4, ((BYTE*)str)[i] % 16);
			dd.append(tempbuff);
		}

	}
	return dd;
}

//把str编码为网页中的 UTF-8 url encode ,英文不变,汉字三字节  如%3D%AE%88

string TextConvert::UrlUTF8(char * str)
{
	string tt;
	string dd;
	GB2312ToUTF_8(tt, str, (int)strlen(str));

	size_t len = tt.length();
	for (size_t i = 0; i<len; i++)
	{
		if (isalnum((BYTE)tt.at(i)))
		{
			char tempbuff[2] = { 0 };
			sprintf(tempbuff, "%c", (BYTE)tt.at(i));
			dd.append(tempbuff);
		}
		else if (isspace((BYTE)tt.at(i)))
		{
			dd.append("+");
		}
		else
		{
			char tempbuff[4];
			sprintf(tempbuff, "%%%X%X", ((BYTE)tt.at(i)) >> 4, ((BYTE)tt.at(i)) % 16);
			dd.append(tempbuff);
		}

	}
	return dd;
}
//把url GB2312解码
string TextConvert::UrlGB2312Decode(string str)
{
	string output = "";
	char tmp[2];
	int i = 0, idx = 0, len = str.length();

	while (i<len) {
		if (str[i] == '%') {
			tmp[0] = str[i + 1];
			tmp[1] = str[i + 2];
			output += StrToBin(tmp);
			i = i + 3;
		}
		else if (str[i] == '+') {
			output += ' ';
			i++;
		}
		else {
			output += str[i];
			i++;
		}
	}

	return output;
}
//把url utf8解码
string TextConvert::UrlUTF8Decode(string str)
{
	string output = "";

	string temp = UrlGB2312Decode(str);//

	UTF_8ToGB2312(output, (char *)temp.data(), strlen(temp.data()));

	return output;

}

Base64Convert.h文件

#pragma once
#include <string>
using namespace std;
/**
* Base64 编码/解码
* @author liruixing
*/
class Base64Convert {
private:
	std::string _base64_table;
	static const char base64_pad = '=';
public:
		Base64Convert()
		{
			_base64_table = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; /*这是Base64编码使用的标准字典*/
		}
		/**
		* 这里必须是unsigned类型,否则编码中文的时候出错
		*/
		std::string Encode(const unsigned char * str, int bytes);
		std::string Decode(const char *str, int bytes);
		//void Debug(bool open = true);
};

Base64Convert.cpp文件

#include "stdafx.h"
#include <iostream>
#include "Base64Convert.h"

std::string Base64Convert::Encode(const unsigned char * str, int bytes) {
	int num = 0, bin = 0;
	std::string _encode_result;
	const unsigned char * current;
	current = str;
	while (bytes > 2) {
		_encode_result += _base64_table[current[0] >> 2];
		_encode_result += _base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
		_encode_result += _base64_table[((current[1] & 0x0f) << 2) + (current[2] >> 6)];
		_encode_result += _base64_table[current[2] & 0x3f];

		current += 3;
		bytes -= 3;
	}
	if (bytes > 0)
	{
		_encode_result += _base64_table[current[0] >> 2];
		if (bytes % 3 == 1) {
			_encode_result += _base64_table[(current[0] & 0x03) << 4];
			_encode_result += "==";
		}
		else if (bytes % 3 == 2) {
			_encode_result += _base64_table[((current[0] & 0x03) << 4) + (current[1] >> 4)];
			_encode_result += _base64_table[(current[1] & 0x0f) << 2];
			_encode_result += "=";
		}
	}
	return _encode_result;
}
std::string Base64Convert::Decode(const char *str, int length) {
	//解码表
	const char DecodeTable[] =
	{
		-2, -2, -2, -2, -2, -2, -2, -2, -2, -1, -1, -2, -2, -1, -2, -2,
		-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
		-1, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, 62, -2, -2, -2, 63,
		52, 53, 54, 55, 56, 57, 58, 59, 60, 61, -2, -2, -2, -2, -2, -2,
		-2,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
		15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -2, -2, -2, -2, -2,
		-2, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40,
		41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -2, -2, -2, -2, -2,
		-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
		-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
		-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
		-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
		-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
		-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
		-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2,
		-2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2, -2
	};
	int bin = 0, i = 0, pos = 0;
	std::string _decode_result;
	const char *current = str;
	char ch;
	while ((ch = *current++) != '\0' && length-- > 0)
	{
		if (ch == base64_pad) { // 当前一个字符是“=”号
								/*
								先说明一个概念:在解码时,4个字符为一组进行一轮字符匹配。
								两个条件:
								1、如果某一轮匹配的第二个是“=”且第三个字符不是“=”,说明这个带解析字符串不合法,直接返回空
								2、如果当前“=”不是第二个字符,且后面的字符只包含空白符,则说明这个这个条件合法,可以继续。
								*/
			if (*current != '=' && (i % 4) == 1) {
				return NULL;
			}
			continue;
		}
		ch = DecodeTable[ch];
		//这个很重要,用来过滤所有不合法的字符
		if (ch < 0) { /* a space or some other separator character, we simply skip over */
			continue;
		}
		switch (i % 4)
		{
		case 0:
			bin = ch << 2;
			break;
		case 1:
			bin |= ch >> 4;
			_decode_result += bin;
			bin = (ch & 0x0f) << 4;
			break;
		case 2:
			bin |= ch >> 2;
			_decode_result += bin;
			bin = (ch & 0x03) << 6;
			break;
		case 3:
			bin |= ch;
			_decode_result += bin;
			break;
		}
		i++;
	}
	return _decode_result;
}

注: 以上两个类来自不同的博文