From 0c2ff1f738471f32162abe74372cb7984aaec044 Mon Sep 17 00:00:00 2001 From: Clinton Stimpson <clinton@elemtech.com> Date: Wed, 30 Oct 2013 21:40:03 -0600 Subject: [PATCH] Encoding: Add Encoding module. The 8 bit encoding is UTF-8. Change-Id: If54262c09777effcbffac30481405e56c6605dd7 --- CMakeLists.txt | 30 +++++-- Configure.hxx.in | 4 + Encoding.h.in | 79 +++++++++++++++++++ Encoding.hxx.in | 57 ++++++++++++++ EncodingC.c | 79 +++++++++++++++++++ EncodingCXX.cxx | 88 +++++++++++++++++++++ kwsysPlatformTestsCXX.cxx | 6 ++ testEncoding.cxx | 159 ++++++++++++++++++++++++++++++++++++++ 8 files changed, 497 insertions(+), 5 deletions(-) create mode 100644 Encoding.h.in create mode 100644 Encoding.hxx.in create mode 100644 EncodingC.c create mode 100644 EncodingCXX.cxx create mode 100644 testEncoding.cxx diff --git a/CMakeLists.txt b/CMakeLists.txt index 0f27836..c26cdb7 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -112,6 +112,7 @@ IF(KWSYS_STANDALONE OR CMake_SOURCE_DIR) SET(KWSYS_USE_Base64 1) SET(KWSYS_USE_Directory 1) SET(KWSYS_USE_DynamicLoader 1) + SET(KWSYS_USE_Encoding 1) SET(KWSYS_USE_Glob 1) SET(KWSYS_USE_MD5 1) SET(KWSYS_USE_Process 1) @@ -506,6 +507,12 @@ IF(KWSYS_USE_FundamentalType) "Checking whether char is signed" DIRECT) ENDIF(KWSYS_USE_FundamentalType) +IF(KWSYS_USE_Encoding) + # Look for type size helper macros. + KWSYS_PLATFORM_CXX_TEST(KWSYS_STL_HAS_WSTRING + "Checking whether wstring is available" DIRECT) +ENDIF(KWSYS_USE_Encoding) + IF(KWSYS_USE_IOStream) # Determine whether iostreams support long long. SET(KWSYS_PLATFORM_CXX_TEST_DEFINES @@ -861,7 +868,7 @@ SET(KWSYS_HXX_FILES Configure String # Add selected C++ classes. SET(cppclasses - Directory DynamicLoader Glob RegularExpression SystemTools + Directory DynamicLoader Encoding Glob RegularExpression SystemTools CommandLineArguments IOStream SystemInformation ) FOREACH(cpp ${cppclasses}) @@ -878,7 +885,7 @@ ENDFOREACH(cpp) # Add selected C components. FOREACH(c - Process Base64 FundamentalType MD5 Terminal System String CPU + Process Base64 Encoding FundamentalType MD5 Terminal System String CPU ) IF(KWSYS_USE_${c}) # Use the corresponding header file. @@ -909,16 +916,24 @@ IF(KWSYS_USE_Process) ENDIF(KWSYS_USE_Process) # Add selected C sources. -FOREACH(c Base64 MD5 Terminal System String) +FOREACH(c Base64 Encoding MD5 Terminal System String) IF(KWSYS_USE_${c}) - SET(KWSYS_C_SRCS ${KWSYS_C_SRCS} ${c}.c) + IF(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${c}C.c) + LIST(APPEND KWSYS_C_SRCS ${c}C.c) + ELSE() + LIST(APPEND KWSYS_C_SRCS ${c}.c) + ENDIF() ENDIF(KWSYS_USE_${c}) ENDFOREACH(c) # Configure headers of C++ classes and construct the list of sources. FOREACH(c ${KWSYS_CLASSES}) # Add this source to the list of source files for the library. - SET(KWSYS_CXX_SRCS ${KWSYS_CXX_SRCS} ${c}.cxx) + IF(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${c}CXX.cxx) + LIST(APPEND KWSYS_CXX_SRCS ${c}CXX.cxx) + ELSE() + LIST(APPEND KWSYS_CXX_SRCS ${c}.cxx) + ENDIF() # Configure the header for this class. CONFIGURE_FILE(${PROJECT_SOURCE_DIR}/${c}.hxx.in ${KWSYS_HEADER_DIR}/${c}.hxx @@ -1087,6 +1102,11 @@ IF(KWSYS_STANDALONE OR CMake_SOURCE_DIR) testCommandLineArguments testCommandLineArguments1 ) + IF(KWSYS_STL_HAS_WSTRING) + SET(KWSYS_CXX_TESTS ${KWSYS_CXX_TESTS} + testEncoding + ) + ENDIF(KWSYS_STL_HAS_WSTRING) IF(KWSYS_USE_SystemInformation) SET(KWSYS_CXX_TESTS ${KWSYS_CXX_TESTS} testSystemInformation) ENDIF(KWSYS_USE_SystemInformation) diff --git a/Configure.hxx.in b/Configure.hxx.in index 716b84f..8f5ace2 100644 --- a/Configure.hxx.in +++ b/Configure.hxx.in @@ -36,6 +36,9 @@ /* Whether STL is in std namespace. */ #define @KWSYS_NAMESPACE@_STL_HAVE_STD @KWSYS_STL_HAVE_STD@ +/* Whether wstring is available. */ +#define @KWSYS_NAMESPACE@_STL_HAS_WSTRING @KWSYS_STL_HAS_WSTRING@ + /* Whether the STL string has operator<< for ostream. */ #define @KWSYS_NAMESPACE@_STL_STRING_HAVE_OSTREAM @KWSYS_STL_STRING_HAVE_OSTREAM@ @@ -170,6 +173,7 @@ # define KWSYS_STL_HAS_ALLOCATOR_TEMPLATE @KWSYS_NAMESPACE@_STL_HAS_ALLOCATOR_TEMPLATE # define KWSYS_STL_HAS_ALLOCATOR_NONTEMPLATE @KWSYS_NAMESPACE@_STL_HAS_ALLOCATOR_NONTEMPLATE # define KWSYS_STL_HAS_ALLOCATOR_OBJECTS @KWSYS_NAMESPACE@_STL_HAS_ALLOCATOR_OBJECTS +# define KWSYS_STL_HAS_WSTRING @KWSYS_NAMESPACE@_STL_HAS_WSTRING #endif #endif diff --git a/Encoding.h.in b/Encoding.h.in new file mode 100644 index 0000000..591c5a8 --- /dev/null +++ b/Encoding.h.in @@ -0,0 +1,79 @@ +/*============================================================================ + KWSys - Kitware System Library + Copyright 2000-2009 Kitware, Inc., Insight Software Consortium + + Distributed under the OSI-approved BSD License (the "License"); + see accompanying file Copyright.txt for details. + + This software is distributed WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the License for more information. +============================================================================*/ +#ifndef @KWSYS_NAMESPACE@_Encoding_h +#define @KWSYS_NAMESPACE@_Encoding_h + +#include <@KWSYS_NAMESPACE@/Configure.h> +#include <wchar.h> + +/* Redefine all public interface symbol names to be in the proper + namespace. These macros are used internally to kwsys only, and are + not visible to user code. Use kwsysHeaderDump.pl to reproduce + these macros after making changes to the interface. */ +#if !defined(KWSYS_NAMESPACE) +# define kwsys_ns(x) @KWSYS_NAMESPACE@##x +# define kwsysEXPORT @KWSYS_NAMESPACE@_EXPORT +#endif +#if !@KWSYS_NAMESPACE@_NAME_IS_KWSYS +# define kwsysEncoding kwsys_ns(Encoding) +# define kwsysEncoding_mbstowcs kwsys_ns(Encoding_mbstowcs) +# define kwsysEncoding_DupToWide kwsys_ns(Encoding_DupToWide) +# define kwsysEncoding_wcstombs kwsys_ns(Encoding_wcstombs) +# define kwsysEncoding_DupToNarrow kwsys_ns(Encoding_DupToNarrow) +#endif + +#if defined(__cplusplus) +extern "C" +{ +#endif + + +/* Convert a narrow string to a wide string. + On Windows, UTF-8 is assumed, and on other platforms, + the current locale is assumed. + */ +kwsysEXPORT size_t kwsysEncoding_mbstowcs(wchar_t* dest, const char* src, size_t n); + +/* Convert a narrow string to a wide string. + This can return NULL if the conversion fails. */ +kwsysEXPORT wchar_t* kwsysEncoding_DupToWide(const char* src); + + +/* Convert a wide string to a narrow string. + On Windows, UTF-8 is assumed, and on other platforms, + the current locale is assumed. */ +kwsysEXPORT size_t kwsysEncoding_wcstombs(char* dest, const wchar_t* src, size_t n); + +/* Convert a wide string to a narrow string. + This can return NULL if the conversion fails. */ +kwsysEXPORT char* kwsysEncoding_DupToNarrow(const wchar_t* str); + + +#if defined(__cplusplus) +} /* extern "C" */ +#endif + +/* If we are building a kwsys .c or .cxx file, let it use these macros. + Otherwise, undefine them to keep the namespace clean. */ +#if !defined(KWSYS_NAMESPACE) +# undef kwsys_ns +# undef kwsysEXPORT +# if !defined(KWSYS_NAMESPACE) && !@KWSYS_NAMESPACE@_NAME_IS_KWSYS +# undef kwsysEncoding +# undef kwsysEncoding_mbstowcs +# undef kwsysEncoding_DupToWide +# undef kwsysEncoding_wcstombs +# undef kwsysEncoding_DupToNarrow +# endif +#endif + +#endif diff --git a/Encoding.hxx.in b/Encoding.hxx.in new file mode 100644 index 0000000..8278d68 --- /dev/null +++ b/Encoding.hxx.in @@ -0,0 +1,57 @@ +/*============================================================================ + KWSys - Kitware System Library + Copyright 2000-2009 Kitware, Inc., Insight Software Consortium + + Distributed under the OSI-approved BSD License (the "License"); + see accompanying file Copyright.txt for details. + + This software is distributed WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the License for more information. +============================================================================*/ +#ifndef @KWSYS_NAMESPACE@_Encoding_hxx +#define @KWSYS_NAMESPACE@_Encoding_hxx + +#include <@KWSYS_NAMESPACE@/Configure.hxx> +#include <@KWSYS_NAMESPACE@/String.h> +#include <@KWSYS_NAMESPACE@/stl/string> + +/* Define these macros temporarily to keep the code readable. */ +#if !defined (KWSYS_NAMESPACE) && !@KWSYS_NAMESPACE@_NAME_IS_KWSYS +# define kwsys_stl @KWSYS_NAMESPACE@_stl +#endif + +namespace @KWSYS_NAMESPACE@ +{ +class @KWSYS_NAMESPACE@_EXPORT Encoding +{ +public: + /** + * Convert between char and wchar_t + */ + +#if @KWSYS_NAMESPACE@_STL_HAS_WSTRING + + // Convert a narrow string to a wide string. + // On Windows, UTF-8 is assumed, and on other platforms, + // the current locale is assumed. + static kwsys_stl::wstring ToWide(const kwsys_stl::string& str); + static kwsys_stl::wstring ToWide(const char* str); + + // Convert a wide string to a narrow string. + // On Windows, UTF-8 is assumed, and on other platforms, + // the current locale is assumed. + static kwsys_stl::string ToNarrow(const kwsys_stl::wstring& str); + static kwsys_stl::string ToNarrow(const wchar_t* str); + +#endif // @KWSYS_NAMESPACE@_STL_HAS_WSTRING + +}; // class Encoding +} // namespace @KWSYS_NAMESPACE@ + +/* Undefine temporary macros. */ +#if !defined (KWSYS_NAMESPACE) && !@KWSYS_NAMESPACE@_NAME_IS_KWSYS +# undef kwsys_stl +#endif + +#endif diff --git a/EncodingC.c b/EncodingC.c new file mode 100644 index 0000000..feff969 --- /dev/null +++ b/EncodingC.c @@ -0,0 +1,79 @@ +/*============================================================================ + KWSys - Kitware System Library + Copyright 2000-2009 Kitware, Inc., Insight Software Consortium + + Distributed under the OSI-approved BSD License (the "License"); + see accompanying file Copyright.txt for details. + + This software is distributed WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the License for more information. +============================================================================*/ +#include "kwsysPrivate.h" +#include KWSYS_HEADER(Encoding.h) + +/* Work-around CMake dependency scanning limitation. This must + duplicate the above list of headers. */ +#if 0 +# include "Encoding.h.in" +#endif + +#include <stdlib.h> + +#ifdef _WIN32 +#include <windows.h> +#endif + +size_t kwsysEncoding_mbstowcs(wchar_t* dest, const char* str, size_t n) +{ + if(str == 0) + { + return (size_t)-1; + } +#ifdef _WIN32 + return MultiByteToWideChar(CP_UTF8, 0, + str, -1, dest, (int)n) - 1; +#else + return mbstowcs(dest, str, n); +#endif +} + +wchar_t* kwsysEncoding_DupToWide(const char* str) +{ + wchar_t* ret = NULL; + size_t length = kwsysEncoding_mbstowcs(NULL, str, 0) + 1; + if(length > 0) + { + ret = malloc((length)*sizeof(wchar_t)); + ret[0] = 0; + kwsysEncoding_mbstowcs(ret, str, length); + } + return ret; +} + +size_t kwsysEncoding_wcstombs(char* dest, const wchar_t* str, size_t n) +{ + if(str == 0) + { + return (size_t)-1; + } +#ifdef _WIN32 + return WideCharToMultiByte(CP_UTF8, 0, str, -1, + dest, (int)n, NULL, NULL) - 1; +#else + return wcstombs(dest, str, n); +#endif +} + +char* kwsysEncoding_DupToNarrow(const wchar_t* str) +{ + char* ret = NULL; + size_t length = kwsysEncoding_wcstombs(0, str, 0); + if(length > 0) + { + ret = malloc(length); + ret[0] = 0; + kwsysEncoding_wcstombs(ret, str, length); + } + return ret; +} diff --git a/EncodingCXX.cxx b/EncodingCXX.cxx new file mode 100644 index 0000000..aebc148 --- /dev/null +++ b/EncodingCXX.cxx @@ -0,0 +1,88 @@ +/*============================================================================ + KWSys - Kitware System Library + Copyright 2000-2009 Kitware, Inc., Insight Software Consortium + + Distributed under the OSI-approved BSD License (the "License"); + see accompanying file Copyright.txt for details. + + This software is distributed WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the License for more information. +============================================================================*/ + +#ifdef __osf__ +# define _OSF_SOURCE +# define _POSIX_C_SOURCE 199506L +# define _XOPEN_SOURCE_EXTENDED +#endif + +#include "kwsysPrivate.h" +#include KWSYS_HEADER(Encoding.hxx) +#include KWSYS_HEADER(Encoding.h) +#include KWSYS_HEADER(stl/vector) + +// Work-around CMake dependency scanning limitation. This must +// duplicate the above list of headers. +#if 0 +# include "Encoding.hxx.in" +# include "Encoding.h.in" +#endif + +#include <stdlib.h> + +#ifdef _MSC_VER +# pragma warning (disable: 4786) +#endif + +// Windows API. +#if defined(_WIN32) +# include <windows.h> +#endif + +namespace KWSYS_NAMESPACE +{ + +#if KWSYS_STL_HAS_WSTRING + +kwsys_stl::wstring Encoding::ToWide(const kwsys_stl::string& str) +{ + return ToWide(str.c_str()); +} + +kwsys_stl::string Encoding::ToNarrow(const kwsys_stl::wstring& str) +{ + return ToNarrow(str.c_str()); +} + +kwsys_stl::wstring Encoding::ToWide(const char* cstr) +{ + kwsys_stl::wstring wstr; + size_t length = kwsysEncoding_mbstowcs(0, cstr, 0) + 1; + if(length > 0) + { + kwsys_stl::vector<wchar_t> wchars(length); + if(kwsysEncoding_mbstowcs(&wchars[0], cstr, length) > 0) + { + wstr = &wchars[0]; + } + } + return wstr; +} + +kwsys_stl::string Encoding::ToNarrow(const wchar_t* wcstr) +{ + kwsys_stl::string str; + size_t length = kwsysEncoding_wcstombs(0, wcstr, 0) + 1; + if(length > 0) + { + std::vector<char> chars(length); + if(kwsysEncoding_wcstombs(&chars[0], wcstr, length) > 0) + { + str = &chars[0]; + } + } + return str; +} +#endif // KWSYS_STL_HAS_WSTRING + +} // namespace KWSYS_NAMESPACE diff --git a/kwsysPlatformTestsCXX.cxx b/kwsysPlatformTestsCXX.cxx index be7a09e..3f947f3 100644 --- a/kwsysPlatformTestsCXX.cxx +++ b/kwsysPlatformTestsCXX.cxx @@ -674,3 +674,9 @@ int main() return a; } #endif + +#ifdef TEST_KWSYS_STL_HAS_WSTRING +#include <string> +void f(std ::wstring*) {} +int main() { return 0; } +#endif diff --git a/testEncoding.cxx b/testEncoding.cxx new file mode 100644 index 0000000..8e74a50 --- /dev/null +++ b/testEncoding.cxx @@ -0,0 +1,159 @@ +/*============================================================================ + KWSys - Kitware System Library + Copyright 2000-2009 Kitware, Inc., Insight Software Consortium + + Distributed under the OSI-approved BSD License (the "License"); + see accompanying file Copyright.txt for details. + + This software is distributed WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the License for more information. +============================================================================*/ +#include "kwsysPrivate.h" + +#if defined(_MSC_VER) +# pragma warning (disable:4786) +#endif + +#include KWSYS_HEADER(Encoding.hxx) +#include KWSYS_HEADER(ios/iostream) + +#include <locale.h> + +// Work-around CMake dependency scanning limitation. This must +// duplicate the above list of headers. +#if 0 +# include "Encoding.hxx.in" +# include "kwsys_ios_iostream.h.in" +#endif + +//---------------------------------------------------------------------------- +static const unsigned char helloWorldStrings[][32] = +{ + // English + {'H','e','l','l','o',' ','W','o','r','l','d',0}, + // Japanese + {0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3, + 0x81, 0xA1, 0xE3, 0x81, 0xAF, 0xE4, 0xB8, 0x96, 0xE7, 0x95, + 0x8C, 0}, + // Arabic + {0xD9, 0x85, 0xD8, 0xB1, 0xD8, 0xAD, 0xD8, 0xA8, 0xD8, 0xA7, + 0x20, 0xD8, 0xA7, 0xD9, 0x84, 0xD8, 0xB9, 0xD8, 0xA7, 0xD9, + 0x84, 0xD9, 0x85, 0}, + // Yiddish + {0xD7, 0x94, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x90, 0x20, 0xD7, + 0x95, 0xD7, 0x95, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x98, 0}, + // Russian + {0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, 0xB5, + 0xD1, 0x82, 0x20, 0xD0, 0xBC, 0xD0, 0xB8, 0xD1, 0x80, 0}, + // Latin + {0x4D, 0x75, 0x6E, 0x64, 0x75, 0x73, 0x20, 0x73, 0x61, 0x6C, + 0x76, 0x65, 0}, + // Swahili + {0x68, 0x75, 0x6A, 0x61, 0x6D, 0x62, 0x6F, 0x20, 0x44, 0x75, + 0x6E, 0x69, 0x61, 0}, + // Icelandic + {0x48, 0x61, 0x6C, 0x6C, 0xC3, 0xB3, 0x20, 0x68, 0x65, 0x69, + 0x6D, 0x75, 0x72, 0}, + {0} +}; + +//---------------------------------------------------------------------------- +static int testHelloWorldEncoding() +{ + int ret = 0; + for(int i=0; helloWorldStrings[i][0] != 0; i++) + { + std::string str = reinterpret_cast<const char*>(helloWorldStrings[i]); + std::cout << str << std::endl; + std::wstring wstr = kwsys::Encoding::ToWide(str); + std::string str2 = kwsys::Encoding::ToNarrow(wstr); + if(!wstr.empty() && str != str2) + { + std::cout << "converted string was different: " << str2 << std::endl; + ret++; + } + } + return ret; +} + +static int testRobustEncoding() +{ + // test that the conversion functions handle invalid + // unicode correctly/gracefully + + int ret = 0; + char cstr[] = {(char)-1, 0}; + // this conversion could fail + std::wstring wstr = kwsys::Encoding::ToWide(cstr); + + wstr = kwsys::Encoding::ToWide(NULL); + if(wstr != L"") + { + const wchar_t* wcstr = wstr.c_str(); + std::cout << "ToWide(NULL) returned"; + for(size_t i=0; i<wstr.size(); i++) + { + std::cout << " " << std::hex << (int)wcstr[i]; + } + std::cout << std::endl; + ret++; + } + wstr = kwsys::Encoding::ToWide(""); + if(wstr != L"") + { + const wchar_t* wcstr = wstr.c_str(); + std::cout << "ToWide(\"\") returned"; + for(size_t i=0; i<wstr.size(); i++) + { + std::cout << " " << std::hex << (int)wcstr[i]; + } + std::cout << std::endl; + ret++; + } + +#ifdef WIN32 + // 16 bit wchar_t - we make an invalid surrogate pair + wchar_t cwstr[] = {0xD801, 0xDA00, 0}; + // this conversion could fail + std::string win_str = kwsys::Encoding::ToNarrow(cwstr); +#endif + + std::string str = kwsys::Encoding::ToNarrow(NULL); + if(str != "") + { + std::cout << "ToNarrow(NULL) returned " << str << std::endl; + ret++; + } + + str = kwsys::Encoding::ToNarrow(L""); + if(wstr != L"") + { + std::cout << "ToNarrow(\"\") returned " << str << std::endl; + ret++; + } + + return ret; +} + + +//---------------------------------------------------------------------------- +int testEncoding(int, char*[]) +{ + const char* loc = setlocale(LC_ALL, ""); + if(loc) + { + std::cout << "Locale: " << loc << std::endl; + } + else + { + std::cout << "Locale: None" << std::endl; + } + + int ret = 0; + + ret |= testHelloWorldEncoding(); + ret |= testRobustEncoding(); + + return ret; +} -- GitLab