testEncoding.cxx 6 KB
Newer Older
1 2
/* Distributed under the OSI-approved BSD 3-Clause License.  See accompanying
   file Copyright.txt or https://cmake.org/licensing#kwsys for details.  */
3 4 5
#include "kwsysPrivate.h"

#if defined(_MSC_VER)
6
#pragma warning(disable : 4786)
7 8 9
#endif

#include KWSYS_HEADER(Encoding.hxx)
10
#include KWSYS_HEADER(Encoding.h)
11

12
#include <algorithm>
13
#include <iostream>
14
#include <locale.h>
15
#include <stdlib.h>
16
#include <string.h>
17 18 19 20

// Work-around CMake dependency scanning limitation.  This must
// duplicate the above list of headers.
#if 0
21 22
#include "Encoding.h.in"
#include "Encoding.hxx.in"
23 24 25
#endif

//----------------------------------------------------------------------------
26
static const unsigned char helloWorldStrings[][32] = {
27
  // English
28
  { 'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd', 0 },
29
  // Japanese
30 31 32 33 34
  { 0xE3, 0x81, 0x93, 0xE3, 0x82, 0x93, 0xE3, 0x81, 0xAB, 0xE3, 0x81,
    0xA1, 0xE3, 0x81, 0xAF, 0xE4, 0xB8, 0x96, 0xE7, 0x95, 0x8C, 0 },
  // Arabic
  { 0xD9, 0x85, 0xD8, 0xB1, 0xD8, 0xAD, 0xD8, 0xA8, 0xD8, 0xA7, 0x20, 0xD8,
    0xA7, 0xD9, 0x84, 0xD8, 0xB9, 0xD8, 0xA7, 0xD9, 0x84, 0xD9, 0x85, 0 },
35
  // Yiddish
36 37
  { 0xD7, 0x94, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x90, 0x20, 0xD7,
    0x95, 0xD7, 0x95, 0xD7, 0xA2, 0xD7, 0x9C, 0xD7, 0x98, 0 },
38
  // Russian
39 40
  { 0xD0, 0xBF, 0xD1, 0x80, 0xD0, 0xB8, 0xD0, 0xB2, 0xD0, 0xB5,
    0xD1, 0x82, 0x20, 0xD0, 0xBC, 0xD0, 0xB8, 0xD1, 0x80, 0 },
41
  // Latin
42 43
  { 0x4D, 0x75, 0x6E, 0x64, 0x75, 0x73, 0x20, 0x73, 0x61, 0x6C, 0x76, 0x65,
    0 },
44
  // Swahili
45 46
  { 0x68, 0x75, 0x6A, 0x61, 0x6D, 0x62, 0x6F, 0x20, 0x44, 0x75, 0x6E, 0x69,
    0x61, 0 },
47
  // Icelandic
48 49 50
  { 0x48, 0x61, 0x6C, 0x6C, 0xC3, 0xB3, 0x20, 0x68, 0x65, 0x69, 0x6D, 0x75,
    0x72, 0 },
  { 0 }
51 52 53 54 55 56
};

//----------------------------------------------------------------------------
static int testHelloWorldEncoding()
{
  int ret = 0;
57
  for (int i = 0; helloWorldStrings[i][0] != 0; i++) {
58 59 60 61
    std::string str = reinterpret_cast<const char*>(helloWorldStrings[i]);
    std::cout << str << std::endl;
    std::wstring wstr = kwsys::Encoding::ToWide(str);
    std::string str2 = kwsys::Encoding::ToNarrow(wstr);
62 63
    wchar_t* c_wstr = kwsysEncoding_DupToWide(str.c_str());
    char* c_str2 = kwsysEncoding_DupToNarrow(c_wstr);
64
    if (!wstr.empty() && (str != str2 || strcmp(c_str2, str.c_str()))) {
65
      std::cout << "converted string was different: " << str2 << std::endl;
66
      std::cout << "converted string was different: " << c_str2 << std::endl;
67
      ret++;
68
    }
69 70
    free(c_wstr);
    free(c_str2);
71
  }
72 73 74 75 76 77 78 79 80
  return ret;
}

static int testRobustEncoding()
{
  // test that the conversion functions handle invalid
  // unicode correctly/gracefully

  int ret = 0;
81
  char cstr[] = { (char)-1, 0 };
82 83 84 85
  // this conversion could fail
  std::wstring wstr = kwsys::Encoding::ToWide(cstr);

  wstr = kwsys::Encoding::ToWide(NULL);
86
  if (wstr != L"") {
87 88
    const wchar_t* wcstr = wstr.c_str();
    std::cout << "ToWide(NULL) returned";
89
    for (size_t i = 0; i < wstr.size(); i++) {
90
      std::cout << " " << std::hex << (int)wcstr[i];
91
    }
92 93
    std::cout << std::endl;
    ret++;
94
  }
95
  wstr = kwsys::Encoding::ToWide("");
96
  if (wstr != L"") {
97 98
    const wchar_t* wcstr = wstr.c_str();
    std::cout << "ToWide(\"\") returned";
99
    for (size_t i = 0; i < wstr.size(); i++) {
100
      std::cout << " " << std::hex << (int)wcstr[i];
101
    }
102 103
    std::cout << std::endl;
    ret++;
104
  }
105

106
#ifdef _WIN32
107
  // 16 bit wchar_t - we make an invalid surrogate pair
108
  wchar_t cwstr[] = { 0xD801, 0xDA00, 0 };
109 110 111 112 113
  // this conversion could fail
  std::string win_str = kwsys::Encoding::ToNarrow(cwstr);
#endif

  std::string str = kwsys::Encoding::ToNarrow(NULL);
114
  if (str != "") {
115 116
    std::cout << "ToNarrow(NULL) returned " << str << std::endl;
    ret++;
117
  }
118 119

  str = kwsys::Encoding::ToNarrow(L"");
120
  if (wstr != L"") {
121 122
    std::cout << "ToNarrow(\"\") returned " << str << std::endl;
    ret++;
123
  }
124 125 126 127

  return ret;
}

128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
static int testWithNulls()
{
  int ret = 0;
  std::vector<std::string> strings;
  strings.push_back(std::string("ab") + '\0' + 'c');
  strings.push_back(std::string("d") + '\0' + '\0' + 'e');
  strings.push_back(std::string() + '\0' + 'f');
  strings.push_back(std::string() + '\0' + '\0' + "gh");
  strings.push_back(std::string("ij") + '\0');
  strings.push_back(std::string("k") + '\0' + '\0');
  strings.push_back(std::string("\0\0\0\0", 4) + "lmn" +
                    std::string("\0\0\0\0", 4));
  for (std::vector<std::string>::iterator it = strings.begin();
       it != strings.end(); ++it) {
    std::wstring wstr = kwsys::Encoding::ToWide(*it);
    std::string str = kwsys::Encoding::ToNarrow(wstr);
    std::string s(*it);
    std::replace(s.begin(), s.end(), '\0', ' ');
    std::cout << "'" << s << "' (" << it->size() << ")" << std::endl;
    if (str != *it) {
      std::replace(str.begin(), str.end(), '\0', ' ');
      std::cout << "string with null was different: '" << str << "' ("
                << str.size() << ")" << std::endl;
      ret++;
    }
  }
  return ret;
}

157 158 159 160
static int testCommandLineArguments()
{
  int status = 0;

161
  char const* argv[2] = { "./app.exe", (char const*)helloWorldStrings[1] };
162 163 164 165 166 167

  kwsys::Encoding::CommandLineArguments args(2, argv);
  kwsys::Encoding::CommandLineArguments arg2 =
    kwsys::Encoding::CommandLineArguments(args);

  char const* const* u8_argv = args.argv();
168
  for (int i = 0; i < args.argc(); i++) {
169
    char const* u8_arg = u8_argv[i];
170 171 172
    if (strcmp(argv[i], u8_arg) != 0) {
      std::cout << "argv[" << i << "] " << argv[i] << " != " << u8_arg
                << std::endl;
173 174 175 176 177 178 179 180 181
      status++;
    }
  }

  kwsys::Encoding::CommandLineArguments args3 =
    kwsys::Encoding::CommandLineArguments::Main(2, argv);

  return status;
}
182 183

//----------------------------------------------------------------------------
184
int testEncoding(int, char* [])
185 186
{
  const char* loc = setlocale(LC_ALL, "");
187
  if (loc) {
188
    std::cout << "Locale: " << loc << std::endl;
189
  } else {
190
    std::cout << "Locale: None" << std::endl;
191
  }
192 193 194 195 196

  int ret = 0;

  ret |= testHelloWorldEncoding();
  ret |= testRobustEncoding();
197
  ret |= testCommandLineArguments();
198
  ret |= testWithNulls();
199 200 201

  return ret;
}