Commit 0c2ff1f7 authored by Clinton Stimpson's avatar Clinton Stimpson

Encoding: Add Encoding module.

The 8 bit encoding is UTF-8.

Change-Id: If54262c09777effcbffac30481405e56c6605dd7
parent f67bb2ba
......@@ -112,6 +112,7 @@ IF(KWSYS_STANDALONE OR CMake_SOURCE_DIR)
SET(KWSYS_USE_Base64 1)
SET(KWSYS_USE_Directory 1)
SET(KWSYS_USE_DynamicLoader 1)
SET(KWSYS_USE_Encoding 1)
SET(KWSYS_USE_Glob 1)
SET(KWSYS_USE_MD5 1)
SET(KWSYS_USE_Process 1)
......@@ -506,6 +507,12 @@ IF(KWSYS_USE_FundamentalType)
"Checking whether char is signed" DIRECT)
ENDIF(KWSYS_USE_FundamentalType)
IF(KWSYS_USE_Encoding)
# Look for type size helper macros.
KWSYS_PLATFORM_CXX_TEST(KWSYS_STL_HAS_WSTRING
"Checking whether wstring is available" DIRECT)
ENDIF(KWSYS_USE_Encoding)
IF(KWSYS_USE_IOStream)
# Determine whether iostreams support long long.
SET(KWSYS_PLATFORM_CXX_TEST_DEFINES
......@@ -861,7 +868,7 @@ SET(KWSYS_HXX_FILES Configure String
# Add selected C++ classes.
SET(cppclasses
Directory DynamicLoader Glob RegularExpression SystemTools
Directory DynamicLoader Encoding Glob RegularExpression SystemTools
CommandLineArguments IOStream SystemInformation
)
FOREACH(cpp ${cppclasses})
......@@ -878,7 +885,7 @@ ENDFOREACH(cpp)
# Add selected C components.
FOREACH(c
Process Base64 FundamentalType MD5 Terminal System String CPU
Process Base64 Encoding FundamentalType MD5 Terminal System String CPU
)
IF(KWSYS_USE_${c})
# Use the corresponding header file.
......@@ -909,16 +916,24 @@ IF(KWSYS_USE_Process)
ENDIF(KWSYS_USE_Process)
# Add selected C sources.
FOREACH(c Base64 MD5 Terminal System String)
FOREACH(c Base64 Encoding MD5 Terminal System String)
IF(KWSYS_USE_${c})
SET(KWSYS_C_SRCS ${KWSYS_C_SRCS} ${c}.c)
IF(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${c}C.c)
LIST(APPEND KWSYS_C_SRCS ${c}C.c)
ELSE()
LIST(APPEND KWSYS_C_SRCS ${c}.c)
ENDIF()
ENDIF(KWSYS_USE_${c})
ENDFOREACH(c)
# Configure headers of C++ classes and construct the list of sources.
FOREACH(c ${KWSYS_CLASSES})
# Add this source to the list of source files for the library.
SET(KWSYS_CXX_SRCS ${KWSYS_CXX_SRCS} ${c}.cxx)
IF(EXISTS ${CMAKE_CURRENT_SOURCE_DIR}/${c}CXX.cxx)
LIST(APPEND KWSYS_CXX_SRCS ${c}CXX.cxx)
ELSE()
LIST(APPEND KWSYS_CXX_SRCS ${c}.cxx)
ENDIF()
# Configure the header for this class.
CONFIGURE_FILE(${PROJECT_SOURCE_DIR}/${c}.hxx.in ${KWSYS_HEADER_DIR}/${c}.hxx
......@@ -1087,6 +1102,11 @@ IF(KWSYS_STANDALONE OR CMake_SOURCE_DIR)
testCommandLineArguments
testCommandLineArguments1
)
IF(KWSYS_STL_HAS_WSTRING)
SET(KWSYS_CXX_TESTS ${KWSYS_CXX_TESTS}
testEncoding
)
ENDIF(KWSYS_STL_HAS_WSTRING)
IF(KWSYS_USE_SystemInformation)
SET(KWSYS_CXX_TESTS ${KWSYS_CXX_TESTS} testSystemInformation)
ENDIF(KWSYS_USE_SystemInformation)
......
......@@ -36,6 +36,9 @@
/* Whether STL is in std namespace. */
#define @KWSYS_NAMESPACE@_STL_HAVE_STD @KWSYS_STL_HAVE_STD@
/* Whether wstring is available. */
#define @KWSYS_NAMESPACE@_STL_HAS_WSTRING @KWSYS_STL_HAS_WSTRING@
/* Whether the STL string has operator<< for ostream. */
#define @KWSYS_NAMESPACE@_STL_STRING_HAVE_OSTREAM @KWSYS_STL_STRING_HAVE_OSTREAM@
......@@ -170,6 +173,7 @@
# define KWSYS_STL_HAS_ALLOCATOR_TEMPLATE @KWSYS_NAMESPACE@_STL_HAS_ALLOCATOR_TEMPLATE
# define KWSYS_STL_HAS_ALLOCATOR_NONTEMPLATE @KWSYS_NAMESPACE@_STL_HAS_ALLOCATOR_NONTEMPLATE
# define KWSYS_STL_HAS_ALLOCATOR_OBJECTS @KWSYS_NAMESPACE@_STL_HAS_ALLOCATOR_OBJECTS
# define KWSYS_STL_HAS_WSTRING @KWSYS_NAMESPACE@_STL_HAS_WSTRING
#endif
#endif
/*============================================================================
KWSys - Kitware System Library
Copyright 2000-2009 Kitware, Inc., Insight Software Consortium
Distributed under the OSI-approved BSD License (the "License");
see accompanying file Copyright.txt for details.
This software is distributed WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the License for more information.
============================================================================*/
#ifndef @KWSYS_NAMESPACE@_Encoding_h
#define @KWSYS_NAMESPACE@_Encoding_h
#include <@KWSYS_NAMESPACE@/Configure.h>
#include <wchar.h>
/* Redefine all public interface symbol names to be in the proper
namespace. These macros are used internally to kwsys only, and are
not visible to user code. Use kwsysHeaderDump.pl to reproduce
these macros after making changes to the interface. */
#if !defined(KWSYS_NAMESPACE)
# define kwsys_ns(x) @KWSYS_NAMESPACE@##x
# define kwsysEXPORT @KWSYS_NAMESPACE@_EXPORT
#endif
#if !@KWSYS_NAMESPACE@_NAME_IS_KWSYS
# define kwsysEncoding kwsys_ns(Encoding)
# define kwsysEncoding_mbstowcs kwsys_ns(Encoding_mbstowcs)
# define kwsysEncoding_DupToWide kwsys_ns(Encoding_DupToWide)
# define kwsysEncoding_wcstombs kwsys_ns(Encoding_wcstombs)
# define kwsysEncoding_DupToNarrow kwsys_ns(Encoding_DupToNarrow)
#endif
#if defined(__cplusplus)
extern "C"
{
#endif
/* Convert a narrow string to a wide string.
On Windows, UTF-8 is assumed, and on other platforms,
the current locale is assumed.
*/
kwsysEXPORT size_t kwsysEncoding_mbstowcs(wchar_t* dest, const char* src, size_t n);
/* Convert a narrow string to a wide string.
This can return NULL if the conversion fails. */
kwsysEXPORT wchar_t* kwsysEncoding_DupToWide(const char* src);
/* Convert a wide string to a narrow string.
On Windows, UTF-8 is assumed, and on other platforms,
the current locale is assumed. */
kwsysEXPORT size_t kwsysEncoding_wcstombs(char* dest, const wchar_t* src, size_t n);
/* Convert a wide string to a narrow string.
This can return NULL if the conversion fails. */
kwsysEXPORT char* kwsysEncoding_DupToNarrow(const wchar_t* str);
#if defined(__cplusplus)
} /* extern "C" */
#endif
/* If we are building a kwsys .c or .cxx file, let it use these macros.
Otherwise, undefine them to keep the namespace clean. */
#if !defined(KWSYS_NAMESPACE)
# undef kwsys_ns
# undef kwsysEXPORT
# if !defined(KWSYS_NAMESPACE) && !@KWSYS_NAMESPACE@_NAME_IS_KWSYS
# undef kwsysEncoding
# undef kwsysEncoding_mbstowcs
# undef kwsysEncoding_DupToWide
# undef kwsysEncoding_wcstombs
# undef kwsysEncoding_DupToNarrow
# endif
#endif
#endif
/*============================================================================
KWSys - Kitware System Library
Copyright 2000-2009 Kitware, Inc., Insight Software Consortium
Distributed under the OSI-approved BSD License (the "License");
see accompanying file Copyright.txt for details.
This software is distributed WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the License for more information.
============================================================================*/
#ifndef @KWSYS_NAMESPACE@_Encoding_hxx
#define @KWSYS_NAMESPACE@_Encoding_hxx
#include <@KWSYS_NAMESPACE@/Configure.hxx>
#include <@KWSYS_NAMESPACE@/String.h>
#include <@KWSYS_NAMESPACE@/stl/string>
/* Define these macros temporarily to keep the code readable. */
#if !defined (KWSYS_NAMESPACE) && !@KWSYS_NAMESPACE@_NAME_IS_KWSYS
# define kwsys_stl @KWSYS_NAMESPACE@_stl
#endif
namespace @KWSYS_NAMESPACE@
{
class @KWSYS_NAMESPACE@_EXPORT Encoding
{
public:
/**
* Convert between char and wchar_t
*/
#if @KWSYS_NAMESPACE@_STL_HAS_WSTRING
// Convert a narrow string to a wide string.
// On Windows, UTF-8 is assumed, and on other platforms,
// the current locale is assumed.
static kwsys_stl::wstring ToWide(const kwsys_stl::string& str);
static kwsys_stl::wstring ToWide(const char* str);
// Convert a wide string to a narrow string.
// On Windows, UTF-8 is assumed, and on other platforms,
// the current locale is assumed.
static kwsys_stl::string ToNarrow(const kwsys_stl::wstring& str);
static kwsys_stl::string ToNarrow(const wchar_t* str);
#endif // @KWSYS_NAMESPACE@_STL_HAS_WSTRING
}; // class Encoding
} // namespace @KWSYS_NAMESPACE@
/* Undefine temporary macros. */
#if !defined (KWSYS_NAMESPACE) && !@KWSYS_NAMESPACE@_NAME_IS_KWSYS
# undef kwsys_stl
#endif
#endif
/*============================================================================
KWSys - Kitware System Library
Copyright 2000-2009 Kitware, Inc., Insight Software Consortium
Distributed under the OSI-approved BSD License (the "License");
see accompanying file Copyright.txt for details.
This software is distributed WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the License for more information.
============================================================================*/
#include "kwsysPrivate.h"
#include KWSYS_HEADER(Encoding.h)
/* Work-around CMake dependency scanning limitation. This must
duplicate the above list of headers. */
#if 0
# include "Encoding.h.in"
#endif
#include <stdlib.h>
#ifdef _WIN32
#include <windows.h>
#endif
size_t kwsysEncoding_mbstowcs(wchar_t* dest, const char* str, size_t n)
{
if(str == 0)
{
return (size_t)-1;
}
#ifdef _WIN32
return MultiByteToWideChar(CP_UTF8, 0,
str, -1, dest, (int)n) - 1;
#else
return mbstowcs(dest, str, n);
#endif
}
wchar_t* kwsysEncoding_DupToWide(const char* str)
{
wchar_t* ret = NULL;
size_t length = kwsysEncoding_mbstowcs(NULL, str, 0) + 1;
if(length > 0)
{
ret = malloc((length)*sizeof(wchar_t));
ret[0] = 0;
kwsysEncoding_mbstowcs(ret, str, length);
}
return ret;
}
size_t kwsysEncoding_wcstombs(char* dest, const wchar_t* str, size_t n)
{
if(str == 0)
{
return (size_t)-1;
}
#ifdef _WIN32
return WideCharToMultiByte(CP_UTF8, 0, str, -1,
dest, (int)n, NULL, NULL) - 1;
#else
return wcstombs(dest, str, n);
#endif
}
char* kwsysEncoding_DupToNarrow(const wchar_t* str)
{
char* ret = NULL;
size_t length = kwsysEncoding_wcstombs(0, str, 0);
if(length > 0)
{
ret = malloc(length);
ret[0] = 0;
kwsysEncoding_wcstombs(ret, str, length);
}
return ret;
}
/*============================================================================
KWSys - Kitware System Library
Copyright 2000-2009 Kitware, Inc., Insight Software Consortium
Distributed under the OSI-approved BSD License (the "License");
see accompanying file Copyright.txt for details.
This software is distributed WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the License for more information.
============================================================================*/
#ifdef __osf__
# define _OSF_SOURCE
# define _POSIX_C_SOURCE 199506L
# define _XOPEN_SOURCE_EXTENDED
#endif
#include "kwsysPrivate.h"
#include KWSYS_HEADER(Encoding.hxx)
#include KWSYS_HEADER(Encoding.h)
#include KWSYS_HEADER(stl/vector)
// Work-around CMake dependency scanning limitation. This must
// duplicate the above list of headers.
#if 0
# include "Encoding.hxx.in"
# include "Encoding.h.in"
#endif
#include <stdlib.h>
#ifdef _MSC_VER
# pragma warning (disable: 4786)
#endif
// Windows API.
#if defined(_WIN32)
# include <windows.h>
#endif
namespace KWSYS_NAMESPACE
{
#if KWSYS_STL_HAS_WSTRING
kwsys_stl::wstring Encoding::ToWide(const kwsys_stl::string& str)
{
return ToWide(str.c_str());
}
kwsys_stl::string Encoding::ToNarrow(const kwsys_stl::wstring& str)
{
return ToNarrow(str.c_str());
}
kwsys_stl::wstring Encoding::ToWide(const char* cstr)
{
kwsys_stl::wstring wstr;
size_t length = kwsysEncoding_mbstowcs(0, cstr, 0) + 1;
if(length > 0)
{
kwsys_stl::vector<wchar_t> wchars(length);
if(kwsysEncoding_mbstowcs(&wchars[0], cstr, length) > 0)
{
wstr = &wchars[0];
}
}
return wstr;
}
kwsys_stl::string Encoding::ToNarrow(const wchar_t* wcstr)
{
kwsys_stl::string str;
size_t length = kwsysEncoding_wcstombs(0, wcstr, 0) + 1;
if(length > 0)
{
std::vector<char> chars(length);
if(kwsysEncoding_wcstombs(&chars[0], wcstr, length) > 0)
{
str = &chars[0];
}
}
return str;
}
#endif // KWSYS_STL_HAS_WSTRING
} // namespace KWSYS_NAMESPACE
......@@ -674,3 +674,9 @@ int main()
return a;
}
#endif
#ifdef TEST_KWSYS_STL_HAS_WSTRING
#include <string>
void f(std ::wstring*) {}
int main() { return 0; }
#endif
/*============================================================================
KWSys - Kitware System Library
Copyright 2000-2009 Kitware, Inc., Insight Software Consortium
Distributed under the OSI-approved BSD License (the "License");
see accompanying file Copyright.txt for details.
This software is distributed WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the License for more information.
============================================================================*/
#include "kwsysPrivate.h"
#if defined(_MSC_VER)
# pragma warning (disable:4786)
#endif
#include KWSYS_HEADER(Encoding.hxx)
#include KWSYS_HEADER(ios/iostream)
#include <locale.h>
// Work-around CMake dependency scanning limitation. This must
// duplicate the above list of headers.
#if 0
# include "Encoding.hxx.in"
# include "kwsys_ios_iostream.h.in"
#endif
//----------------------------------------------------------------------------
static const unsigned char helloWorldStrings[][32] =
{
// English
{'H','e','l','l','o',' ','W','o','r','l','d',0},
// Japanese
{0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3,
0x81, 0xA1, 0xE3, 0x81, 0xAF, 0xE4, 0xB8, 0x96, 0xE7, 0x95,
0x8C, 0},
// Arabic
{0xD9, 0x85, 0xD8, 0xB1, 0xD8, 0xAD, 0xD8, 0xA8, 0xD8, 0xA7,
0x20, 0xD8, 0xA7, 0xD9, 0x84, 0xD8, 0xB9, 0xD8, 0xA7, 0xD9,
0x84, 0xD9, 0x85, 0},
// Yiddish
{0xD7, 0x94, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x90, 0x20, 0xD7,
0x95, 0xD7, 0x95, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x98, 0},
// Russian
{0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, 0xB5,
0xD1, 0x82, 0x20, 0xD0, 0xBC, 0xD0, 0xB8, 0xD1, 0x80, 0},
// Latin
{0x4D, 0x75, 0x6E, 0x64, 0x75, 0x73, 0x20, 0x73, 0x61, 0x6C,
0x76, 0x65, 0},
// Swahili
{0x68, 0x75, 0x6A, 0x61, 0x6D, 0x62, 0x6F, 0x20, 0x44, 0x75,
0x6E, 0x69, 0x61, 0},
// Icelandic
{0x48, 0x61, 0x6C, 0x6C, 0xC3, 0xB3, 0x20, 0x68, 0x65, 0x69,
0x6D, 0x75, 0x72, 0},
{0}
};
//----------------------------------------------------------------------------
static int testHelloWorldEncoding()
{
int ret = 0;
for(int i=0; helloWorldStrings[i][0] != 0; i++)
{
std::string str = reinterpret_cast<const char*>(helloWorldStrings[i]);
std::cout << str << std::endl;
std::wstring wstr = kwsys::Encoding::ToWide(str);
std::string str2 = kwsys::Encoding::ToNarrow(wstr);
if(!wstr.empty() && str != str2)
{
std::cout << "converted string was different: " << str2 << std::endl;
ret++;
}
}
return ret;
}
static int testRobustEncoding()
{
// test that the conversion functions handle invalid
// unicode correctly/gracefully
int ret = 0;
char cstr[] = {(char)-1, 0};
// this conversion could fail
std::wstring wstr = kwsys::Encoding::ToWide(cstr);
wstr = kwsys::Encoding::ToWide(NULL);
if(wstr != L"")
{
const wchar_t* wcstr = wstr.c_str();
std::cout << "ToWide(NULL) returned";
for(size_t i=0; i<wstr.size(); i++)
{
std::cout << " " << std::hex << (int)wcstr[i];
}
std::cout << std::endl;
ret++;
}
wstr = kwsys::Encoding::ToWide("");
if(wstr != L"")
{
const wchar_t* wcstr = wstr.c_str();
std::cout << "ToWide(\"\") returned";
for(size_t i=0; i<wstr.size(); i++)
{
std::cout << " " << std::hex << (int)wcstr[i];
}
std::cout << std::endl;
ret++;
}
#ifdef WIN32
// 16 bit wchar_t - we make an invalid surrogate pair
wchar_t cwstr[] = {0xD801, 0xDA00, 0};
// this conversion could fail
std::string win_str = kwsys::Encoding::ToNarrow(cwstr);
#endif
std::string str = kwsys::Encoding::ToNarrow(NULL);
if(str != "")
{
std::cout << "ToNarrow(NULL) returned " << str << std::endl;
ret++;
}
str = kwsys::Encoding::ToNarrow(L"");
if(wstr != L"")
{
std::cout << "ToNarrow(\"\") returned " << str << std::endl;
ret++;
}
return ret;
}
//----------------------------------------------------------------------------
int testEncoding(int, char*[])
{
const char* loc = setlocale(LC_ALL, "");
if(loc)
{
std::cout << "Locale: " << loc << std::endl;
}
else
{
std::cout << "Locale: None" << std::endl;
}
int ret = 0;
ret |= testHelloWorldEncoding();
ret |= testRobustEncoding();
return ret;
}
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment