From f069db91c7d26faf97e74dea1e6ff62fdfbb1779 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?D=C4=81vis=20Mos=C4=81ns?= <davispuh@gmail.com> Date: Wed, 9 Nov 2016 23:27:53 +0200 Subject: [PATCH] Encoding: Fix conversion of strings that contain null bytes The `std::string` and `std::wstring` types are allowed to contain null (`0`) bytes. Teach Encoding::{ToNarrow,ToWide} to preserve them. Change-Id: If9ddb77d275c1365016f2f1fa811d0c06e46d5f2 --- CMakeLists.txt | 2 +- EncodingCXX.cxx | 60 ++++++++++++++++++++++++++++++++++++++++++++++-- testEncoding.cxx | 31 +++++++++++++++++++++++++ 3 files changed, 90 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index ac6be34..9eb3b2d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -879,7 +879,7 @@ ENDIF() IF(KWSYS_USE_Encoding) # Set default 8 bit encoding in "EndcodingC.c". - SET_PROPERTY(SOURCE EncodingC.c APPEND PROPERTY COMPILE_DEFINITIONS + SET_PROPERTY(SOURCE EncodingC.c EncodingCXX.cxx APPEND PROPERTY COMPILE_DEFINITIONS KWSYS_ENCODING_DEFAULT_CODEPAGE=${KWSYS_ENCODING_DEFAULT_CODEPAGE}) ENDIF() diff --git a/EncodingCXX.cxx b/EncodingCXX.cxx index 5c58bcb..e904c1a 100644 --- a/EncodingCXX.cxx +++ b/EncodingCXX.cxx @@ -125,12 +125,68 @@ char const* const* Encoding::CommandLineArguments::argv() const std::wstring Encoding::ToWide(const std::string& str) { - return ToWide(str.c_str()); + std::wstring wstr; +#if defined(_WIN32) + const int wlength = MultiByteToWideChar( + KWSYS_ENCODING_DEFAULT_CODEPAGE, 0, str.data(), int(str.size()), NULL, 0); + if (wlength > 0) { + wchar_t* wdata = new wchar_t[wlength]; + int r = MultiByteToWideChar(KWSYS_ENCODING_DEFAULT_CODEPAGE, 0, str.data(), + int(str.size()), wdata, wlength); + if (r > 0) { + wstr = std::wstring(wdata, wlength); + } + delete[] wdata; + } +#else + size_t pos = 0; + size_t nullPos = 0; + do { + if (pos < str.size() && str.at(pos) != '\0') { + wstr += ToWide(str.c_str() + pos); + } + nullPos = str.find('\0', pos); + if (nullPos != str.npos) { + pos = nullPos + 1; + wstr += wchar_t('\0'); + } + } while (nullPos != str.npos); +#endif + return wstr; } std::string Encoding::ToNarrow(const std::wstring& str) { - return ToNarrow(str.c_str()); + std::string nstr; +#if defined(_WIN32) + int length = + WideCharToMultiByte(KWSYS_ENCODING_DEFAULT_CODEPAGE, 0, str.c_str(), + int(str.size()), NULL, 0, NULL, NULL); + if (length > 0) { + char* data = new char[length]; + int r = + WideCharToMultiByte(KWSYS_ENCODING_DEFAULT_CODEPAGE, 0, str.c_str(), + int(str.size()), data, length, NULL, NULL); + if (r > 0) { + nstr = std::string(data, length); + } + delete[] data; + } +#else + size_t pos = 0; + size_t nullPos = 0; + do { + if (pos < str.size() && str.at(pos) != '\0') { + nstr += ToNarrow(str.c_str() + pos); + } + nullPos = str.find(wchar_t('\0'), pos); + if (nullPos != str.npos) { + pos = nullPos + 1; + nstr += '\0'; + } + } while (nullPos != str.npos); +#endif + return nstr; } std::wstring Encoding::ToWide(const char* cstr) diff --git a/testEncoding.cxx b/testEncoding.cxx index 996976f..03f2ec9 100644 --- a/testEncoding.cxx +++ b/testEncoding.cxx @@ -9,6 +9,7 @@ #include KWSYS_HEADER(Encoding.hxx) #include KWSYS_HEADER(Encoding.h) +#include <algorithm> #include <iostream> #include <locale.h> #include <stdlib.h> @@ -124,6 +125,35 @@ static int testRobustEncoding() return ret; } +static int testWithNulls() +{ + int ret = 0; + std::vector<std::string> strings; + strings.push_back(std::string("ab") + '\0' + 'c'); + strings.push_back(std::string("d") + '\0' + '\0' + 'e'); + strings.push_back(std::string() + '\0' + 'f'); + strings.push_back(std::string() + '\0' + '\0' + "gh"); + strings.push_back(std::string("ij") + '\0'); + strings.push_back(std::string("k") + '\0' + '\0'); + strings.push_back(std::string("\0\0\0\0", 4) + "lmn" + + std::string("\0\0\0\0", 4)); + for (std::vector<std::string>::iterator it = strings.begin(); + it != strings.end(); ++it) { + std::wstring wstr = kwsys::Encoding::ToWide(*it); + std::string str = kwsys::Encoding::ToNarrow(wstr); + std::string s(*it); + std::replace(s.begin(), s.end(), '\0', ' '); + std::cout << "'" << s << "' (" << it->size() << ")" << std::endl; + if (str != *it) { + std::replace(str.begin(), str.end(), '\0', ' '); + std::cout << "string with null was different: '" << str << "' (" + << str.size() << ")" << std::endl; + ret++; + } + } + return ret; +} + static int testCommandLineArguments() { int status = 0; @@ -165,6 +195,7 @@ int testEncoding(int, char* []) ret |= testHelloWorldEncoding(); ret |= testRobustEncoding(); ret |= testCommandLineArguments(); + ret |= testWithNulls(); return ret; } -- GitLab