Skip to content
GitLab
Menu
Projects
Groups
Snippets
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
Menu
Open sidebar
Chuck Atkins
KWSys
Commits
b8ed3605
Commit
b8ed3605
authored
Jul 28, 2005
by
Sebastien Barre
Browse files
ENH: add method to attempt to check if a file is text or binary
parent
19432982
Changes
2
Hide whitespace changes
Inline
Side-by-side
SystemTools.cxx
View file @
b8ed3605
...
...
@@ -2748,6 +2748,65 @@ bool SystemTools::FileHasSignature(const char *filename,
return
res
;
}
SystemTools
::
FileTypeEnum
SystemTools
::
DetectFileType
(
const
char
*
filename
,
unsigned
long
length
,
double
percent_bin
)
{
if
(
!
filename
||
percent_bin
<
0
)
{
return
SystemTools
::
FileTypeUnknown
;
}
FILE
*
fp
;
fp
=
fopen
(
filename
,
"rb"
);
if
(
!
fp
)
{
return
SystemTools
::
FileTypeUnknown
;
}
// Allocate buffer and read bytes
unsigned
char
*
buffer
=
new
unsigned
char
[
length
];
size_t
read_length
=
fread
(
buffer
,
1
,
length
,
fp
);
fclose
(
fp
);
if
(
read_length
==
0
)
{
return
SystemTools
::
FileTypeUnknown
;
}
// Loop over contents and count
size_t
text_count
=
0
;
const
unsigned
char
*
ptr
=
buffer
;
const
unsigned
char
*
buffer_end
=
buffer
+
read_length
;
while
(
ptr
!=
buffer_end
)
{
if
((
*
ptr
>=
0x20
&&
*
ptr
<=
0x7F
)
||
*
ptr
==
'\n'
||
*
ptr
==
'\r'
||
*
ptr
==
'\t'
)
{
text_count
++
;
}
ptr
++
;
}
delete
[]
buffer
;
double
current_percent_bin
=
((
double
)(
read_length
-
text_count
)
/
(
double
)
read_length
);
if
(
current_percent_bin
>=
percent_bin
)
{
return
SystemTools
::
FileTypeBinary
;
}
return
SystemTools
::
FileTypeText
;
}
bool
SystemTools
::
LocateFileInDir
(
const
char
*
filename
,
const
char
*
dir
,
kwsys_stl
::
string
&
filename_found
,
...
...
SystemTools.hxx.in
View file @
b8ed3605
...
...
@@ -507,11 +507,31 @@ public:
static bool FileIsSymlink(const char* name);
/**
*
r
eturn true if the file has a given signature (first set of bytes)
*
R
eturn true if the file has a given signature (first set of bytes)
*/
static bool FileHasSignature(
const char* filename, const char *signature, long offset = 0);
/**
* Attempt to detect and return the type of a file.
* Up to 'length' bytes are read from the file, if more than 'percent_bin' %
* of the bytes are non-textual elements, the file is considered binary,
* otherwise textual. Textual elements are bytes in the ASCII [0x20, 0x7E]
* range, but also \n, \r, \t.
* The algorithm is simplistic, and should probably check for usual file
* extensions, 'magic' signature, unicode, etc.
*/
enum FileTypeEnum
{
FileTypeUnknown,
FileTypeBinary,
FileTypeText
};
static SystemTools::FileTypeEnum DetectFileType(
const char* filename,
unsigned long length = 256,
double percent_bin = 0.05);
/**
* Try to locate the file 'filename' in the directory 'dir'.
* If 'filename' is a fully qualified filename, the basename of the file is
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment