#include "precompile.h" #include "charset.h" #include "memutil.h" #include #include #include #include #ifdef _WIN32 #include #else #include #include #include #include #include #endif #define TAG TOOLKIT_TAG("charaset") // Copyright (c) 2008-2009 Bjoern Hoehrmann // See http://bjoern.hoehrmann.de/utf-8/decoder/dfa/ for details. #define UTF8_ACCEPT 0 #define UTF8_REJECT 1 static const uint8_t utf8d[] = { 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 00..1f 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 20..3f 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 40..5f 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, // 60..7f 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9,9, // 80..9f 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7, // a0..bf 8,8,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, // c0..df 0xa,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x3,0x4,0x3,0x3, // e0..ef 0xb,0x6,0x6,0x6,0x5,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8,0x8, // f0..ff 0x0,0x1,0x2,0x3,0x5,0x8,0x7,0x1,0x1,0x1,0x4,0x6,0x1,0x1,0x1,0x1, // s0..s0 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,1,1,1,1,0,1,0,1,1,1,1,1,1, // s1..s2 1,2,1,1,1,1,1,2,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1, // s3..s4 1,2,1,1,1,1,1,1,1,2,1,1,1,1,1,1,1,1,1,1,1,1,1,3,1,3,1,1,1,1,1,1, // s5..s6 1,3,1,1,1,1,1,3,1,3,1,1,1,1,1,1,1,3,1,1,1,1,1,1,1,1,1,1,1,1,1,1, // s7..s8 }; static uint32_t decode(uint32_t* state, uint32_t* codep, uint32_t byte) { uint32_t type = utf8d[byte]; *codep = (*state != UTF8_ACCEPT) ? (byte & 0x3fu) | (*codep << 6) : (0xff >> type) & (byte); *state = utf8d[256 + *state * 16 + type]; return *state; } uint32_t validate_utf8(char* str, size_t len) { size_t i; uint32_t type; uint32_t state = UTF8_ACCEPT; for (i = 0; i < len; i++) { // We don't care about the codepoint, so this is a simplified version of the decode function. type = utf8d[(uint8_t)str[i]]; state = utf8d[256 + (state) * 16 + type]; if (state == UTF8_REJECT) break; } return state; } #ifndef _WIN32 int code_convert(char* from_charset, char* to_charset, char* inbuf, size_t inlen, char* outbuf, size_t outlen) { iconv_t cd; char** pin = &inbuf; char** pout = &outbuf; cd = iconv_open(to_charset, from_charset); if (cd == 0) return -1; memset(outbuf, 0, outlen); if (iconv(cd, pin, &inlen, pout, &outlen) == -1) return -1; iconv_close(cd); *pout = '\0'; return 0; } #endif //NOT _WIN32 #ifdef _WIN32 void utf82gbk_2(const char* src, char* dst, int len) { int ret = 0; WCHAR* strA; int i = MultiByteToWideChar(CP_UTF8, 0, src, -1, NULL, 0); if (i <= 0) { printf("ERROR."); return; } strA = (WCHAR*)malloc(i * 2); MultiByteToWideChar(CP_UTF8, 0, src, -1, strA, i); i = WideCharToMultiByte(CP_ACP, 0, strA, -1, NULL, 0, NULL, NULL); if (len >= i) { ret = WideCharToMultiByte(CP_ACP, 0, strA, -1, dst, i, NULL, NULL); dst[i] = 0; } if (ret <= 0) { free(strA); return; } free(strA); } void gbk2utf8_2(const char* src, char* dst, int len) { int ret = 0; WCHAR* strA; int i = MultiByteToWideChar(CP_ACP, 0, src, -1, NULL, 0); if (i <= 0) { printf("ERROR."); return; } strA = (WCHAR*)malloc(i * 2); MultiByteToWideChar(CP_ACP, 0, src, -1, strA, i); i = WideCharToMultiByte(CP_UTF8, 0, strA, -1, NULL, 0, NULL, NULL); if (len >= i) { ret = WideCharToMultiByte(CP_UTF8, 0, strA, -1, dst, i, NULL, NULL); dst[i] = 0; } if (ret <= 0) { free(strA); return; } free(strA); } #else //Linux // starkwong: In iconv implementations, inlen and outlen should be type of size_t not uint, which is different in length on Mac void utf82gbk_2(const char* src, char* dst, int len) { int ret = 0; size_t inlen = strlen(src) + 1; size_t outlen = len; // duanqn: The iconv function in Linux requires non-const char * // So we need to copy the source string char* inbuf = (char*)malloc(len); char* inbuf_hold = inbuf; // iconv may change the address of inbuf // so we use another pointer to keep the address memcpy(inbuf, src, len); char* outbuf = dst; iconv_t cd; cd = iconv_open("GBK", "UTF-8"); if (cd != (iconv_t)-1) { ret = iconv(cd, &inbuf, &inlen, &outbuf, &outlen); if (ret != 0) { printf("iconv failed err: %s\n", strerror(errno)); } iconv_close(cd); } free(inbuf_hold); // Don't pass in inbuf as it may have been modified } void gbk2utf8_2(const char* src, char* dst, int len) { int ret = 0; size_t inlen = strlen(src) + 1; size_t outlen = len; // duanqn: The iconv function in Linux requires non-const char * // So we need to copy the source string char* inbuf = (char*)malloc(len); char* inbuf_hold = inbuf; // iconv may change the address of inbuf // so we use another pointer to keep the address memcpy(inbuf, src, len); char* outbuf2 = NULL; char* outbuf = dst; iconv_t cd; // starkwong: if src==dst, the string will become invalid during conversion since UTF-8 is 3 chars in Chinese but GBK is mostly 2 chars if (src == dst) { outbuf2 = (char*)malloc(len); memset(outbuf2, 0, len); outbuf = outbuf2; } cd = iconv_open("UTF-8", "GBK"); if (cd != (iconv_t)-1) { ret = iconv(cd, &inbuf, &inlen, &outbuf, &outlen); if (ret != 0) printf("iconv failed err: %s\n", strerror(errno)); if (outbuf2 != NULL) { strcpy(dst, outbuf2); free(outbuf2); } iconv_close(cd); } free(inbuf_hold); // Don't pass in inbuf as it may have been modified } #endif static int is_utf8(const char* str) { const unsigned char* bytes = (const unsigned char*)str; if (str == NULL) return 1; #if 0 while (*bytes != 0x00) { if ((*bytes & 0x80) == 0x00) { // U+0000 to U+007F num = 1; } else if ((*bytes & 0xE0) == 0xC0) { // U+0080 to U+07FF num = 2; } else if ((*bytes & 0xF0) == 0xE0) { // U+0800 to U+FFFF num = 3; } else if ((*bytes & 0xF8) == 0xF0) { // U+10000 to U+10FFFF num = 4; } else { return 0; } bytes += 1; for (i = 1; i < num; ++i) { if ((*bytes & 0xC0) != 0x80) return 0; bytes += 1; } } return 1; #else while (*bytes) { if ((// ASCII // use bytes[0] <= 0x7F to allow ASCII control characters bytes[0] == 0x09 || bytes[0] == 0x0A || bytes[0] == 0x0D || (0x20 <= bytes[0] && bytes[0] <= 0x7E) ) ) { bytes += 1; continue; } if ((// non-overlong 2-byte (0xC2 <= bytes[0] && bytes[0] <= 0xDF) && (0x80 <= bytes[1] && bytes[1] <= 0xBF) ) ) { bytes += 2; continue; } if ((// excluding overlongs bytes[0] == 0xE0 && (0xA0 <= bytes[1] && bytes[1] <= 0xBF) && (0x80 <= bytes[2] && bytes[2] <= 0xBF) ) || (// straight 3-byte ((0xE1 <= bytes[0] && bytes[0] <= 0xEC) || bytes[0] == 0xEE || bytes[0] == 0xEF) && (0x80 <= bytes[1] && bytes[1] <= 0xBF) && (0x80 <= bytes[2] && bytes[2] <= 0xBF) ) || (// excluding surrogates bytes[0] == 0xED && (0x80 <= bytes[1] && bytes[1] <= 0x9F) && (0x80 <= bytes[2] && bytes[2] <= 0xBF) ) ) { bytes += 3; continue; } if ((// planes 1-3 bytes[0] == 0xF0 && (0x90 <= bytes[1] && bytes[1] <= 0xBF) && (0x80 <= bytes[2] && bytes[2] <= 0xBF) && (0x80 <= bytes[3] && bytes[3] <= 0xBF) ) || (// planes 4-15 (0xF1 <= bytes[0] && bytes[0] <= 0xF3) && (0x80 <= bytes[1] && bytes[1] <= 0xBF) && (0x80 <= bytes[2] && bytes[2] <= 0xBF) && (0x80 <= bytes[3] && bytes[3] <= 0xBF) ) || (// plane 16 bytes[0] == 0xF4 && (0x80 <= bytes[1] && bytes[1] <= 0x8F) && (0x80 <= bytes[2] && bytes[2] <= 0xBF) && (0x80 <= bytes[3] && bytes[3] <= 0xBF) ) ) { bytes += 4; continue; } return 0; } return 1; #endif } int is_valid_utf8(const char* str) { const unsigned char* bytes = (const unsigned char*)str; unsigned int cp; int num, i; if (!str) return 1; while (*bytes != 0x00) { if ((*bytes & 0x80) == 0x00) { // U+0000 to U+007F cp = (*bytes & 0x7F); num = 1; } else if ((*bytes & 0xE0) == 0xC0) { // U+0080 to U+07FF cp = (*bytes & 0x1F); num = 2; } else if ((*bytes & 0xF0) == 0xE0) { // U+0800 to U+FFFF cp = (*bytes & 0x0F); num = 3; } else if ((*bytes & 0xF8) == 0xF0) { // U+10000 to U+10FFFF cp = (*bytes & 0x07); num = 4; } else return 0; bytes += 1; for (i = 1; i < num; ++i) { if ((*bytes & 0xC0) != 0x80) return 0; cp = (cp << 6) | (*bytes & 0x3F); bytes += 1; } if ((cp > 0x10FFFF) || ((cp >= 0xD800) && (cp <= 0xDFFF)) || ((cp <= 0x007F) && (num != 1)) || ((cp >= 0x0080) && (cp <= 0x07FF) && (num != 2)) || ((cp >= 0x0800) && (cp <= 0xFFFF) && (num != 3)) || ((cp >= 0x10000) && (cp <= 0x1FFFFF) && (num != 4))) return 0; } return 1; } int s2w(char* locale_charset, char* inbuf, size_t inlen, wchar_t* outbuf, size_t outlen) { #ifdef _MSC_VER wchar_t* wstr = NULL; int n = MultiByteToWideChar(CP_ACP, 0, inbuf, -1, NULL, 0); if (outbuf == NULL || outlen == 0) return n; if (n > 0) { wstr = malloc(sizeof(wchar_t) * (n + 1)); if (wstr == NULL) { return 0; } memset(wstr, 0, (n + 1) * sizeof(wchar_t)); MultiByteToWideChar(CP_ACP, 0, inbuf, -1, &wstr[0], n); wcscpy_s(outbuf, outlen, wstr); free(wstr); return n; } return 0; #else unsigned len = 0; if (inbuf == NULL || inlen == 0) { return 0; } const char* origin = setlocale(LC_CTYPE, NULL); WLog_DBG(TAG, "%s: origin locale: %s, aim locale: %s, data:%s", __FUNCTION__, origin, locale_charset, inbuf); if (NULL == setlocale(LC_CTYPE, locale_charset)) { WLog_DBG(TAG, "setlocale with \"%s\" failed: %d", locale_charset, errno); return -1; } len = mbstowcs(NULL, inbuf, inlen); if (len == (unsigned)-1) { WLog_DBG(TAG, "mbstowcs failed: %d, %s", errno, strerror(errno)); goto on_end; } len += 1; if (outbuf == NULL || outlen == 0) { WLog_DBG(TAG, "mbstowcs to fetch need capacity: %d", len); goto on_end; } wmemset(outbuf, 0, outlen); len = mbstowcs(outbuf, inbuf, outlen > len ? len : outlen); if (len == (unsigned)-1) { WLog_DBG(TAG, "mbstowcs after new malloc failed: %d", errno); goto on_end; } WLog_DBG(TAG, "after mbstowcs returned: %ls(%d)", outbuf, len); on_end: setlocale(LC_CTYPE, origin); return len; #endif } int w2s(char* locale_charset, wchar_t* inbuf, size_t inlen, char* outbuf, size_t outlen) { #if defined(_MSC_VER) char* str = NULL; int n = 0; n = WideCharToMultiByte(CP_ACP, 0, inbuf, -1, NULL, 0, NULL, NULL); if (outbuf == NULL || outlen == 0) return n; if (n > 0) { str = malloc(sizeof(char) * (n + 1)); if (str == NULL) { return 0; } memset(str, 0, sizeof(char) * (n + 1)); WideCharToMultiByte(CP_ACP, 0, inbuf, -1, &str[0], n, NULL, NULL); memset(outbuf, 0, sizeof(char) * (outlen)); strcpy_s(outbuf, outlen, str); free(str); return n; } return 0; #else unsigned len; char* str = NULL; char* origin = NULL; len = wcslen(inbuf); if (inbuf == NULL || inlen == 0) { return 0; } origin = setlocale(LC_CTYPE, NULL); WLog_DBG(TAG, "%s: origin locale: %s, aim locale: %s, data: %ls", __FUNCTION__, origin, locale_charset, inbuf); if (NULL == setlocale(LC_CTYPE, locale_charset)) { WLog_ERR(TAG, "setlocale with \"%s\" failed: %d", locale_charset, errno); } len = wcstombs(NULL, inbuf, inlen); if (len == (unsigned)-1) { WLog_DBG(TAG, "wcstombs failed: %d", errno); goto on_end; } len += 1; if (outbuf == NULL || outlen == 0) { WLog_DBG(TAG, "wcstombs to fetch need capacity: %d", len); goto on_end; } memset(outbuf, 0, outlen * sizeof(char)); len = wcstombs(outbuf, inbuf, len > outlen ? outlen : len); if (len == (unsigned)-1) { WLog_DBG(TAG, "wcstombs after new malloc failed: %d", errno); goto on_end; } WLog_DBG(TAG, "after wcstombs returned: %s(%d)", outbuf, len); on_end: setlocale(LC_CTYPE, origin); return len; #endif //_MSC_VER } TOOLKIT_API int toolkit_utf82gbk(char* inbuf, size_t inlen, char* outbuf, size_t outlen) { //return code_convert("utf-8", "gb2312", inbuf, inlen, outbuf, outlen); int in = 0; wchar_t* p = NULL; int n = 0; n = s2w("zh_CN.utf8", inbuf, inlen, NULL, 0); if (n <= 0) { return n; } p = malloc(sizeof(wchar_t) * n); if (p == NULL) { return 0; } wmemset(p, 0, n); n = s2w("zh_CN.utf8", inbuf, inlen, p, n); n = w2s("zh_CN.gbk", p, n, outbuf, outlen); free(p); return n; } TOOLKIT_API int toolkit_gbk2utf8(char* inbuf, size_t inlen, char* outbuf, size_t outlen) { //return code_convert("gb2312", "utf-8", inbuf, inlen, outbuf, outlen); int in = 0; wchar_t* p = NULL; int n = 0; n = s2w("zh_CN.gbk", inbuf, inlen, NULL, 0); if (n <= 0) { return n; } p = malloc(sizeof(wchar_t) * n); wmemset(p, 0, n); n = s2w("zh_CN.gbk", inbuf, inlen, p, n); n = w2s("zh_CN.utf8", p, n, outbuf, outlen); free(p); return n; } /*when the text file is UTF-8 without BOM, the following function will mistakenly regard it as a ANSI file.*/ TOOLKIT_API char_encoding detect_file_encoding(const char* file_path) { FILE* fp = NULL; long file_size = 0; char* buf = NULL; char_encoding result = unknown; unsigned char two[20]; unsigned char unic[] = { 0xFF, 0xFE, 0x00 }; // Unicode file header unsigned char unic_big[] = { 0xFE, 0xFF,0x00 }; // Unicode big endian file header unsigned char utf8[] = { 0xEF, 0xBB, 0xBF }; // UTF_8 file header fp = fopen(file_path, "rb"); if (NULL == fp) { WLog_ERR(TAG, "fopen(%s) failed: %d", file_path, errno); return result; } fseek(fp, 0, SEEK_SET); memset(two, 0, sizeof(two)); if (fread(two, 1, 2, fp) != 2) goto on_end; if (two[0] == unic[0] && two[1] == unic[1]) result = unicode; else if (two[0] == unic_big[0] && two[1] == unic_big[1]) result = unicode_with_big_endian; else if (two[0] == utf8[0] && two[1] == utf8[1] /*&& two[2] == utf8[2]*/) result = unicode_with_bom; else result = ansi; on_end: WLog_DBG(TAG, "firstchar 0x%X, 0x%X: %d", two[0], two[1], result); fclose(fp); return result; } TOOLKIT_API int toolkit_detect_utf8_file(const char* file_path) { FILE* f = NULL; long file_size = 0; char* buf = NULL; int result = 1; f = fopen(file_path, "rb"); if (NULL == f) { return -1; } fseek(f, 0, SEEK_END); file_size = ftell(f); if (0 == file_size) { fclose(f); return 1; } fseek(f, 0, SEEK_SET); buf = (char*)malloc((file_size+1) * sizeof(char)); memset(buf, 0, file_size + 1); fread(buf, file_size, 1, f); fclose(f); result = is_utf8(buf); free(buf); return result; } TOOLKIT_API int toolkit_detect_utf8_str(const char* str) { //if (validate_utf8(str, strlen(str)) == UTF8_ACCEPT) return 1; return is_utf8(str); } TOOLKIT_API int toolkit_detect_utf8_str2(const char* str) { unsigned int nBytes = 0;//UFT8可用1-6个字节编码,ASCII用一个字节 unsigned char chr = *str; int bAllAscii = 1; for (unsigned int i = 0; str[i] != '\0'; ++i) { chr = *(str + i); //判断是否ASCII编码,如果不是,说明有可能是UTF8,ASCII用7位编码,最高位标记为0,0xxxxxxx if (nBytes == 0 && (chr & 0x80) != 0) { bAllAscii = 0; } if (nBytes == 0) { //如果不是ASCII码,应该是多字节符,计算字节数 if (chr >= 0x80) { if (chr >= 0xFC && chr <= 0xFD) { nBytes = 6; } else if (chr >= 0xF8) { nBytes = 5; } else if (chr >= 0xF0) { nBytes = 4; } else if (chr >= 0xE0) { nBytes = 3; } else if (chr >= 0xC0) { nBytes = 2; } else { return 0; } nBytes--; } } else { //多字节符的非首字节,应为 10xxxxxx if ((chr & 0xC0) != 0x80) { return 0; } //减到为零为止 nBytes--; } } //违返UTF8编码规则 if (nBytes != 0) { return 0; } if (bAllAscii) { //如果全部都是ASCII, 也是UTF8 return 1; } return 1; } size_t toolkit_mbs2wcs(const char* src, wchar_t* dst, size_t dst_size) { #ifdef _MSC_VER wchar_t* wstr = NULL; int n = MultiByteToWideChar(CP_ACP, 0, src, -1, NULL, 0); if (n > 0) { wstr = malloc(sizeof(wchar_t) * (n + 1)); if (wstr == NULL) { return 0; } memset(wstr, 0, (n + 1) * sizeof(wchar_t)); MultiByteToWideChar(CP_ACP, 0, src, -1, &wstr[0], n); wcscpy(dst, wstr); FREE(wstr); return n; } return 0; #else unsigned len = 0; wchar_t* p = NULL; if (strlen(src) == 0) { return 0; } const char* origin = setlocale(LC_CTYPE, NULL); WLog_DBG(TAG, "%s: origin locale: %s, data:%s", __FUNCTION__, origin, src); setlocale(LC_CTYPE, ""); len = mbstowcs(NULL, src, 0); if (len == -1) { WLog_DBG(TAG, "mbstowcs failed: %d", errno); goto on_end; } len += 1; if (dst == NULL) { WLog_DBG(TAG, "mbstowcs to fetch need capacity: %d", len); goto on_end; } p = malloc(sizeof(wchar_t) * len); wmemset(p, 0, len); len = mbstowcs(p, src, len); if (len == -1) { WLog_DBG(TAG, "mbstowcs after new malloc failed: %d", errno); goto on_del; } wmemset(dst, 0, dst_size); wcscpy(dst, p); len = wcslen(dst); WLog_DBG(TAG, "after mbstowcs returned: %ls(%d)", dst, len); on_del: free(p); on_end: setlocale(LC_CTYPE, origin); return len; #endif } size_t toolkit_wcs2mbs(const wchar_t* src, char* dst, size_t dst_size) { #if defined(_MSC_VER) char* str = NULL; int n = 0; n = WideCharToMultiByte(CP_ACP, 0, src, -1, NULL, 0, NULL, NULL); if (n > 0) { str = malloc(sizeof(char) * (n + 1)); if (str == NULL) { return 0; } memset(str, 0, sizeof(char) * (n + 1)); WideCharToMultiByte(CP_ACP, 0, src, -1, &str[0], n, NULL, NULL); memset(dst, 0, sizeof(char) * (dst_size)); strcpy(dst, str); FREE(str); return n; } return 0; #else unsigned len; char* str = NULL; char* origin = NULL; len = wcslen(src); if (len == 0) { return 0; } origin = setlocale(LC_CTYPE, NULL); WLog_DBG(TAG, "%s: origin locale: %s, data: %ls", __FUNCTION__, origin, src); setlocale(LC_CTYPE, ""); len = wcstombs(NULL, src, 0); if (len == -1) { WLog_DBG(TAG, "wcstombs failed: %d", errno); goto on_end; } len += 1; if (dst == NULL) { WLog_DBG(TAG, "wcstombs to fetch need capacity: %d", len); goto on_end; } str = malloc(sizeof(char) * len); memset(str, 0, len * sizeof(char)); len = wcstombs(str, src, len); if (len == -1) { WLog_DBG(TAG, "wcstombs after new malloc failed: %d", errno); goto on_del; } memset(dst, 0, dst_size * sizeof(char)); strcpy(dst, str); len = strlen(dst); WLog_DBG(TAG, "after wcstombs returned: %s(%d)", dst, len); on_del: free(str); on_end: setlocale(LC_CTYPE, origin); return len; #endif //_MSC_VER } TOOLKIT_API char* ConvertGBKToUtf8(const char* gbk, int* n) { #ifdef _MSC_VER int len = MultiByteToWideChar(CP_ACP, 0, gbk, -1, NULL, 0); WCHAR* wszGBK = malloc(sizeof(WCHAR) * (len + 1)); memset(wszGBK, 0, len * 2 + 2); MultiByteToWideChar(CP_ACP, 0, gbk, -1, wszGBK, len); len = WideCharToMultiByte(CP_UTF8, 0, wszGBK, -1, NULL, 0, NULL, NULL); char* szUtf8 = malloc(sizeof(char) * (len + 1)); memset(szUtf8, 0, len + 1); WideCharToMultiByte(CP_UTF8, 0, wszGBK, -1, szUtf8, len, NULL, NULL); free(wszGBK); *n = len - 1; return szUtf8; #else return NULL; #endif } TOOLKIT_API char* ConvertUtf8ToGBK(const char* strUtf8) { #ifdef _MSC_VER int len = MultiByteToWideChar(CP_UTF8, 0, strUtf8, -1, NULL, 0); WCHAR* wszGBK = malloc(sizeof(WCHAR)*(len + 1)); memset(wszGBK, 0, len * 2 + 2); MultiByteToWideChar(CP_UTF8, 0, strUtf8, -1, wszGBK, len); len = WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, NULL, 0, NULL, NULL); char* szGBK = malloc(sizeof(char) * (len + 1)); memset(szGBK, 0, len + 1); WideCharToMultiByte(CP_ACP, 0, wszGBK, -1, szGBK, len, NULL, NULL); free(wszGBK); return szGBK; #else return NULL; #endif }