Skip to content
Snippets Groups Projects
Commit b3b20cc0 authored by Clinton Stimpson's avatar Clinton Stimpson
Browse files

FStream: Add ability to detect BOM.

Change-Id: I4d06782730fca0fd68fb62c418b4e2d95f550625
parent b3db597b
No related branches found
No related tags found
No related merge requests found
/*============================================================================
KWSys - Kitware System Library
Copyright 2000-2009 Kitware, Inc., Insight Software Consortium
Distributed under the OSI-approved BSD License (the "License");
see accompanying file Copyright.txt for details.
This software is distributed WITHOUT ANY WARRANTY; without even the
implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
See the License for more information.
============================================================================*/
#include "kwsysPrivate.h"
#include KWSYS_HEADER(FStream.hxx)
// Work-around CMake dependency scanning limitation. This must
// duplicate the above list of headers.
#if 0
# include "FStream.hxx.in"
#endif
namespace KWSYS_NAMESPACE
{
namespace FStream
{
BOM ReadBOM(std::istream& in)
{
if(!in.good())
{
return BOM_None;
}
unsigned long orig = in.tellg();
unsigned char bom[4];
in.read(reinterpret_cast<char*>(bom), 2);
if(!in.good())
{
in.seekg(orig);
return BOM_None;
}
if(bom[0] == 0xEF && bom[1] == 0xBB)
{
in.read(reinterpret_cast<char*>(bom+2), 1);
if(in.good() && bom[2] == 0xBF)
{
return BOM_UTF8;
}
}
else if(bom[0] == 0xFE && bom[1] == 0xFF)
{
return BOM_UTF16BE;
}
else if(bom[0] == 0x00 && bom[1] == 0x00)
{
in.read(reinterpret_cast<char*>(bom+2), 2);
if(in.good() && bom[2] == 0xFE && bom[3] == 0xFF)
{
return BOM_UTF32BE;
}
}
else if(bom[0] == 0xFF && bom[1] == 0xFE)
{
unsigned long p = in.tellg();
in.read(reinterpret_cast<char*>(bom+2), 2);
if(in.good() && bom[2] == 0x00 && bom[3] == 0x00)
{
return BOM_UTF32LE;
}
in.seekg(p);
return BOM_UTF16LE;
}
in.seekg(orig);
return BOM_None;
}
} // FStream namespace
} //KWSYS_NAMESPACE
......@@ -165,8 +165,24 @@ class basic_ofstream : public std::basic_ostream<CharType,Traits>
using @KWSYS_NAMESPACE@_ios_namespace::ifstream;
#endif
namespace FStream
{
enum BOM
{
BOM_None,
BOM_UTF8,
BOM_UTF16BE,
BOM_UTF16LE,
BOM_UTF32BE,
BOM_UTF32LE
};
// Read a BOM, if one exists.
// If a BOM exists, the stream is advanced to after the BOM.
// This function requires a seekable stream (but not a relative
// seekable stream).
BOM ReadBOM(std::istream& in);
}
}
#endif
......@@ -16,11 +16,17 @@
#endif
#include KWSYS_HEADER(FStream.hxx)
#include KWSYS_HEADER(ios/iostream)
#include <string.h>
#ifdef __BORLANDC__
# include <mem.h> /* memcmp */
#endif
// Work-around CMake dependency scanning limitation. This must
// duplicate the above list of headers.
#if 0
# include "FStream.hxx.in"
# include "kwsys_ios_iostream.h.in"
#endif
......@@ -36,6 +42,141 @@ static int testNoFile()
return 0;
}
static kwsys::FStream::BOM expected_bom[5] =
{
kwsys::FStream::BOM_UTF8,
kwsys::FStream::BOM_UTF16LE,
kwsys::FStream::BOM_UTF16BE,
kwsys::FStream::BOM_UTF32LE,
kwsys::FStream::BOM_UTF32BE
};
static unsigned char expected_bom_data[5][5] =
{
{3, 0xEF, 0xBB, 0xBF},
{2, 0xFF, 0xFE},
{2, 0xFE, 0xFF},
{4, 0xFF, 0xFE, 0x00, 0x00},
{4, 0x00, 0x00, 0xFE, 0xFF},
};
static unsigned char file_data[5][45] =
{
{11, 'H', 'e', 'l', 'l', 'o', ' ', 'W', 'o', 'r', 'l', 'd'},
{22, 0x48, 0x00, 0x65, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0x6F, 0x00, 0x20, 0x00,
0x57, 0x00, 0x6F, 0x00, 0x72, 0x00, 0x6C, 0x00, 0x64, 0x00},
{22, 0x00, 0x48, 0x00, 0x65, 0x00, 0x6C, 0x00, 0x6C, 0x00, 0x6F, 0x00, 0x20,
0x00, 0x57, 0x00, 0x6F, 0x00, 0x72, 0x00, 0x6C, 0x00, 0x64},
{44, 0x48, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00,
0x6C, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x20, 0x00, 0x00, 0x00,
0x57, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x72, 0x00, 0x00, 0x00,
0x6C, 0x00, 0x00, 0x00, 0x64, 0x00, 0x00, 0x00},
{44, 0x00, 0x00, 0x00, 0x48, 0x00, 0x00, 0x00, 0x65, 0x00, 0x00, 0x00, 0x6C,
0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x20,
0x00, 0x00, 0x00, 0x57, 0x00, 0x00, 0x00, 0x6F, 0x00, 0x00, 0x00, 0x72,
0x00, 0x00, 0x00, 0x6C, 0x00, 0x00, 0x00, 0x64},
};
//----------------------------------------------------------------------------
static int testBOM()
{
// test various encodings in binary mode
for(int i=0; i<5; i++)
{
{
kwsys::ofstream out("bom.txt", kwsys::ofstream::binary);
out.write(reinterpret_cast<const char*>(expected_bom_data[i]+1),
*expected_bom_data[i]);
out.write(reinterpret_cast<const char*>(file_data[i]+1),
file_data[i][0]);
}
kwsys::ifstream in("bom.txt", kwsys::ofstream::binary);
kwsys::FStream::BOM bom = kwsys::FStream::ReadBOM(in);
if(bom != expected_bom[i])
{
kwsys_ios::cout << "Unexpected BOM " << i << std::endl;
return 1;
}
char data[45];
in.read(data, file_data[i][0]);
if(!in.good())
{
kwsys_ios::cout << "Unable to read data " << i << std::endl;
return 1;
}
if(memcmp(data, file_data[i]+1, file_data[i][0]) != 0)
{
kwsys_ios::cout << "Incorrect read data " << i << std::endl;
return 1;
}
}
// test text file without bom
{
{
kwsys::ofstream out("bom.txt");
out << "Hello World";
}
kwsys::ifstream in("bom.txt");
kwsys::FStream::BOM bom = kwsys::FStream::ReadBOM(in);
if(bom != kwsys::FStream::BOM_None)
{
kwsys_ios::cout << "Unexpected BOM for none case" << std::endl;
return 1;
}
char data[45];
in.read(data, file_data[0][0]);
if(!in.good())
{
kwsys_ios::cout << "Unable to read data for none case" << std::endl;
return 1;
}
if(memcmp(data, file_data[0]+1, file_data[0][0]) != 0)
{
kwsys_ios::cout << "Incorrect read data for none case" << std::endl;
return 1;
}
}
// test text file with utf-8 bom
{
{
kwsys::ofstream out("bom.txt");
out.write(reinterpret_cast<const char*>(expected_bom_data[0]+1),
*expected_bom_data[0]);
out << "Hello World";
}
kwsys::ifstream in("bom.txt");
kwsys::FStream::BOM bom = kwsys::FStream::ReadBOM(in);
if(bom != kwsys::FStream::BOM_UTF8)
{
kwsys_ios::cout << "Unexpected BOM for utf-8 case" << std::endl;
return 1;
}
char data[45];
in.read(data, file_data[0][0]);
if(!in.good())
{
kwsys_ios::cout << "Unable to read data for utf-8 case" << std::endl;
return 1;
}
if(memcmp(data, file_data[0]+1, file_data[0][0]) != 0)
{
kwsys_ios::cout << "Incorrect read data for utf-8 case" << std::endl;
return 1;
}
}
return 0;
}
//----------------------------------------------------------------------------
int testFStream(int, char*[])
......@@ -43,6 +184,7 @@ int testFStream(int, char*[])
int ret = 0;
ret |= testNoFile();
ret |= testBOM();
return ret;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment