Commit 52a5c4a8 authored by Kitware Robot's avatar Kitware Robot Committed by Brad King
Browse files

KWSys 2017-12-05 (9376537e)

Code extracted from:

    https://gitlab.kitware.com/utils/kwsys.git

at commit 9376537ec0e4770a28f4b1705cfacf79650f71b6 (master).

Upstream Shortlog
-----------------

Brad King (1):
      e9557f37 RegularExpression: Fix regression in 'find' method

Sebastian Holtermann (4):
      4d1e8738 RegularExpression: Make compile() reentrant (thread safe)
      64f80068 RegularExpression: Make find() reentrant (thread safe)
      bbc94ba8 RegularExpression: Remove unused code
      cff58f07 RegularExpression: New RegularExpressionMatch class
parent 740ccb1f
......@@ -45,9 +45,9 @@ RegularExpression::RegularExpression(const RegularExpression& rxp)
this->program = new char[this->progsize]; // Allocate storage
for (ind = this->progsize; ind-- != 0;) // Copy regular expresion
this->program[ind] = rxp.program[ind];
this->startp[0] = rxp.startp[0]; // Copy pointers into last
this->endp[0] = rxp.endp[0]; // Successful "find" operation
this->regmust = rxp.regmust; // Copy field
// Copy pointers into last successful "find" operation
this->regmatch = rxp.regmatch;
this->regmust = rxp.regmust; // Copy field
if (rxp.regmust != 0) {
char* dum = rxp.program;
ind = 0;
......@@ -78,9 +78,9 @@ RegularExpression& RegularExpression::operator=(const RegularExpression& rxp)
this->program = new char[this->progsize]; // Allocate storage
for (ind = this->progsize; ind-- != 0;) // Copy regular expresion
this->program[ind] = rxp.program[ind];
this->startp[0] = rxp.startp[0]; // Copy pointers into last
this->endp[0] = rxp.endp[0]; // Successful "find" operation
this->regmust = rxp.regmust; // Copy field
// Copy pointers into last successful "find" operation
this->regmatch = rxp.regmatch;
this->regmust = rxp.regmust; // Copy field
if (rxp.regmust != 0) {
char* dum = rxp.program;
ind = 0;
......@@ -123,8 +123,9 @@ bool RegularExpression::deep_equal(const RegularExpression& rxp) const
while (ind-- != 0) // Else while still characters
if (this->program[ind] != rxp.program[ind]) // If regexp are different
return false; // Return failure
return (this->startp[0] == rxp.startp[0] && // Else if same start/end ptrs,
this->endp[0] == rxp.endp[0]); // Return true
// Else if same start/end ptrs, return true
return (this->regmatch.start() == rxp.regmatch.start() &&
this->regmatch.end() == rxp.regmatch.end());
}
// The remaining code in this file is derived from the regular expression code
......@@ -276,31 +277,35 @@ const unsigned char MAGIC = 0234;
/////////////////////////////////////////////////////////////////////////
/*
* Global work variables for compile().
* Read only utility variables.
*/
static const char* regparse; // Input-scan pointer.
static int regnpar; // () count.
static char regdummy;
static char* regcode; // Code-emit pointer; &regdummy = don't.
static long regsize; // Code size.
static char* const regdummyptr = &regdummy;
/*
* Forward declarations for compile()'s friends.
* Utility class for RegularExpression::compile().
*/
// #ifndef static
// #define static static
// #endif
static char* reg(int, int*);
static char* regbranch(int*);
static char* regpiece(int*);
static char* regatom(int*);
static char* regnode(char);
class RegExpCompile
{
public:
const char* regparse; // Input-scan pointer.
int regnpar; // () count.
char* regcode; // Code-emit pointer; regdummyptr = don't.
long regsize; // Code size.
char* reg(int, int*);
char* regbranch(int*);
char* regpiece(int*);
char* regatom(int*);
char* regnode(char);
void regc(char);
void reginsert(char, char*);
static void regtail(char*, const char*);
static void regoptail(char*, const char*);
};
static const char* regnext(const char*);
static char* regnext(char*);
static void regc(char);
static void reginsert(char, char*);
static void regtail(char*, const char*);
static void regoptail(char*, const char*);
#ifdef STRCSPN
static int strcspn();
......@@ -337,19 +342,20 @@ bool RegularExpression::compile(const char* exp)
}
// First pass: determine size, legality.
regparse = exp;
regnpar = 1;
regsize = 0L;
regcode = &regdummy;
regc(static_cast<char>(MAGIC));
if (!reg(0, &flags)) {
RegExpCompile comp;
comp.regparse = exp;
comp.regnpar = 1;
comp.regsize = 0L;
comp.regcode = regdummyptr;
comp.regc(static_cast<char>(MAGIC));
if (!comp.reg(0, &flags)) {
printf("RegularExpression::compile(): Error in compile.\n");
return false;
}
this->startp[0] = this->endp[0] = this->searchstring = 0;
this->regmatch.clear();
// Small enough for pointer-storage convention?
if (regsize >= 32767L) { // Probably could be 65535L.
if (comp.regsize >= 32767L) { // Probably could be 65535L.
// RAISE Error, SYM(RegularExpression), SYM(Expr_Too_Big),
printf("RegularExpression::compile(): Expression too big.\n");
return false;
......@@ -360,8 +366,8 @@ bool RegularExpression::compile(const char* exp)
if (this->program != 0)
delete[] this->program;
//#endif
this->program = new char[regsize];
this->progsize = static_cast<int>(regsize);
this->program = new char[comp.regsize];
this->progsize = static_cast<int>(comp.regsize);
if (this->program == 0) {
// RAISE Error, SYM(RegularExpression), SYM(Out_Of_Memory),
......@@ -370,11 +376,11 @@ bool RegularExpression::compile(const char* exp)
}
// Second pass: emit code.
regparse = exp;
regnpar = 1;
regcode = this->program;
regc(static_cast<char>(MAGIC));
reg(0, &flags);
comp.regparse = exp;
comp.regnpar = 1;
comp.regcode = this->program;
comp.regc(static_cast<char>(MAGIC));
comp.reg(0, &flags);
// Dig out information for optimizations.
this->regstart = '\0'; // Worst-case defaults.
......@@ -423,7 +429,7 @@ bool RegularExpression::compile(const char* exp)
* is a trifle forced, but the need to tie the tails of the branches to what
* follows makes it hard to avoid.
*/
static char* reg(int paren, int* flagp)
char* RegExpCompile::reg(int paren, int* flagp)
{
char* ret;
char* br;
......@@ -435,7 +441,7 @@ static char* reg(int paren, int* flagp)
// Make an OPEN node, if parenthesized.
if (paren) {
if (regnpar >= RegularExpression::NSUBEXP) {
if (regnpar >= RegularExpressionMatch::NSUBEXP) {
// RAISE Error, SYM(RegularExpression), SYM(Too_Many_Parens),
printf("RegularExpression::compile(): Too many parentheses.\n");
return 0;
......@@ -501,7 +507,7 @@ static char* reg(int paren, int* flagp)
*
* Implements the concatenation operator.
*/
static char* regbranch(int* flagp)
char* RegExpCompile::regbranch(int* flagp)
{
char* ret;
char* chain;
......@@ -538,7 +544,7 @@ static char* regbranch(int* flagp)
* It might seem that this node could be dispensed with entirely, but the
* endmarker role is not redundant.
*/
static char* regpiece(int* flagp)
char* RegExpCompile::regpiece(int* flagp)
{
char* ret;
char op;
......@@ -605,7 +611,7 @@ static char* regpiece(int* flagp)
* faster to run. Backslashed characters are exceptions, each becoming a
* separate node; the code is simpler that way and it's not worth fixing.
*/
static char* regatom(int* flagp)
char* RegExpCompile::regatom(int* flagp)
{
char* ret;
int flags;
......@@ -724,13 +730,13 @@ static char* regatom(int* flagp)
- regnode - emit a node
Location.
*/
static char* regnode(char op)
char* RegExpCompile::regnode(char op)
{
char* ret;
char* ptr;
ret = regcode;
if (ret == &regdummy) {
if (ret == regdummyptr) {
regsize += 3;
return (ret);
}
......@@ -747,9 +753,9 @@ static char* regnode(char op)
/*
- regc - emit (if appropriate) a byte of code
*/
static void regc(char b)
void RegExpCompile::regc(char b)
{
if (regcode != &regdummy)
if (regcode != regdummyptr)
*regcode++ = b;
else
regsize++;
......@@ -760,13 +766,13 @@ static void regc(char b)
*
* Means relocating the operand.
*/
static void reginsert(char op, char* opnd)
void RegExpCompile::reginsert(char op, char* opnd)
{
char* src;
char* dst;
char* place;
if (regcode == &regdummy) {
if (regcode == regdummyptr) {
regsize += 3;
return;
}
......@@ -786,13 +792,13 @@ static void reginsert(char op, char* opnd)
/*
- regtail - set the next-pointer at the end of a node chain
*/
static void regtail(char* p, const char* val)
void RegExpCompile::regtail(char* p, const char* val)
{
char* scan;
char* temp;
int offset;
if (p == &regdummy)
if (p == regdummyptr)
return;
// Find last node.
......@@ -815,10 +821,10 @@ static void regtail(char* p, const char* val)
/*
- regoptail - regtail on operand of first argument; nop if operandless
*/
static void regoptail(char* p, const char* val)
void RegExpCompile::regoptail(char* p, const char* val)
{
// "Operandless" and "op != BRANCH" are synonymous in practice.
if (p == 0 || p == &regdummy || OP(p) != BRANCH)
if (p == 0 || p == regdummyptr || OP(p) != BRANCH)
return;
regtail(OPERAND(p), val);
}
......@@ -830,34 +836,30 @@ static void regoptail(char* p, const char* val)
////////////////////////////////////////////////////////////////////////
/*
* Global work variables for find().
* Utility class for RegularExpression::find().
*/
static const char* reginput; // String-input pointer.
static const char* regbol; // Beginning of input, for ^ check.
static const char** regstartp; // Pointer to startp array.
static const char** regendp; // Ditto for endp.
class RegExpFind
{
public:
const char* reginput; // String-input pointer.
const char* regbol; // Beginning of input, for ^ check.
const char** regstartp; // Pointer to startp array.
const char** regendp; // Ditto for endp.
/*
* Forwards.
*/
static int regtry(const char*, const char**, const char**, const char*);
static int regmatch(const char*);
static int regrepeat(const char*);
#ifdef DEBUG
int regnarrate = 0;
void regdump();
static char* regprop();
#endif
int regtry(const char*, const char**, const char**, const char*);
int regmatch(const char*);
int regrepeat(const char*);
};
// find -- Matches the regular expression to the given string.
// Returns true if found, and sets start and end indexes accordingly.
bool RegularExpression::find(const char* string)
bool RegularExpression::find(char const* string,
RegularExpressionMatch& rmatch) const
{
const char* s;
this->searchstring = string;
rmatch.clear();
rmatch.searchstring = string;
if (!this->program) {
return false;
......@@ -868,7 +870,7 @@ bool RegularExpression::find(const char* string)
// RAISE Error, SYM(RegularExpression), SYM(Internal_Error),
printf(
"RegularExpression::find(): Compiled regular expression corrupted.\n");
return 0;
return false;
}
// If there is a "must appear" string, look for it.
......@@ -880,42 +882,45 @@ bool RegularExpression::find(const char* string)
s++;
}
if (s == 0) // Not present.
return (0);
return false;
}
RegExpFind regFind;
// Mark beginning of line for ^ .
regbol = string;
regFind.regbol = string;
// Simplest case: anchored match need be tried only once.
if (this->reganch)
return (regtry(string, this->startp, this->endp, this->program) != 0);
return (
regFind.regtry(string, rmatch.startp, rmatch.endp, this->program) != 0);
// Messy cases: unanchored match.
s = string;
if (this->regstart != '\0')
// We know what char it must start with.
while ((s = strchr(s, this->regstart)) != 0) {
if (regtry(s, this->startp, this->endp, this->program))
return (1);
if (regFind.regtry(s, rmatch.startp, rmatch.endp, this->program))
return true;
s++;
}
else
// We don't -- general case.
do {
if (regtry(s, this->startp, this->endp, this->program))
return (1);
if (regFind.regtry(s, rmatch.startp, rmatch.endp, this->program))
return true;
} while (*s++ != '\0');
// Failure.
return (0);
return false;
}
/*
- regtry - try match at specific point
0 failure, 1 success
*/
static int regtry(const char* string, const char** start, const char** end,
const char* prog)
int RegExpFind::regtry(const char* string, const char** start,
const char** end, const char* prog)
{
int i;
const char** sp1;
......@@ -927,7 +932,7 @@ static int regtry(const char* string, const char** start, const char** end,
sp1 = start;
ep = end;
for (i = RegularExpression::NSUBEXP; i > 0; i--) {
for (i = RegularExpressionMatch::NSUBEXP; i > 0; i--) {
*sp1++ = 0;
*ep++ = 0;
}
......@@ -950,7 +955,7 @@ static int regtry(const char* string, const char** start, const char** end,
* by recursion.
* 0 failure, 1 success
*/
static int regmatch(const char* prog)
int RegExpFind::regmatch(const char* prog)
{
const char* scan; // Current node.
const char* next; // Next node.
......@@ -1129,7 +1134,7 @@ static int regmatch(const char* prog)
/*
- regrepeat - repeatedly match something simple, report how many
*/
static int regrepeat(const char* p)
int RegExpFind::regrepeat(const char* p)
{
int count = 0;
const char* scan;
......@@ -1176,7 +1181,7 @@ static const char* regnext(const char* p)
{
int offset;
if (p == &regdummy)
if (p == regdummyptr)
return (0);
offset = NEXT(p);
......@@ -1193,7 +1198,7 @@ static char* regnext(char* p)
{
int offset;
if (p == &regdummy)
if (p == regdummyptr)
return (0);
offset = NEXT(p);
......
......@@ -34,6 +34,115 @@
namespace @KWSYS_NAMESPACE@ {
// Forward declaration
class RegularExpression;
/** \class RegularExpressionMatch
* \brief Stores the pattern matches of a RegularExpression
*/
class @KWSYS_NAMESPACE@_EXPORT RegularExpressionMatch
{
public:
RegularExpressionMatch();
bool isValid() const;
void clear();
std::string::size_type start() const;
std::string::size_type end() const;
std::string::size_type start(int n) const;
std::string::size_type end(int n) const;
std::string match(int n) const;
enum
{
NSUBEXP = 10
};
private:
friend class RegularExpression;
const char* startp[NSUBEXP];
const char* endp[NSUBEXP];
const char* searchstring;
};
/**
* \brief Creates an invalid match object
*/
inline RegularExpressionMatch::RegularExpressionMatch()
{
startp[0] = 0;
endp[0] = 0;
searchstring = 0;
}
/**
* \brief Returns true if the match pointers are valid
*/
inline bool RegularExpressionMatch::isValid() const
{
return (this->startp[0] != 0);
}
/**
* \brief Resets to the (invalid) construction state.
*/
inline void RegularExpressionMatch::clear()
{
startp[0] = 0;
endp[0] = 0;
searchstring = 0;
}
/**
* \brief Returns the start index of the full match.
*/
inline std::string::size_type RegularExpressionMatch::start() const
{
return static_cast<std::string::size_type>(this->startp[0] - searchstring);
}
/**
* \brief Returns the end index of the full match.
*/
inline std::string::size_type RegularExpressionMatch::end() const
{
return static_cast<std::string::size_type>(this->endp[0] - searchstring);
}
/**
* \brief Returns the start index of nth submatch.
* start(0) is the start of the full match.
*/
inline std::string::size_type RegularExpressionMatch::start(int n) const
{
return static_cast<std::string::size_type>(this->startp[n] -
this->searchstring);
}
/**
* \brief Returns the end index of nth submatch.
* end(0) is the end of the full match.
*/
inline std::string::size_type RegularExpressionMatch::end(int n) const
{
return static_cast<std::string::size_type>(this->endp[n] -
this->searchstring);
}
/**
* \brief Returns the nth submatch as a string.
*/
inline std::string RegularExpressionMatch::match(int n) const
{
if (this->startp[n] == 0) {
return std::string();
} else {
return std::string(this->startp[n], static_cast<std::string::size_type>(
this->endp[n] - this->startp[n]));
}
}
/** \class RegularExpression
* \brief Implements pattern matching with regular expressions.
*
......@@ -170,6 +279,9 @@ namespace @KWSYS_NAMESPACE@ {
* the same as the two characters before the first p encounterd in
* the line. It would match "drepa qrepb" in "rep drepa qrepb".
*
* All methods of RegularExpression can be called simultaneously from
* different threads but only if each invocation uses an own instance of
* RegularExpression.
*/
class @KWSYS_NAMESPACE@_EXPORT RegularExpression
{
......@@ -211,11 +323,21 @@ public:
*/
inline bool compile(std::string const&);
/**
* Matches the regular expression to the given string.
* Returns true if found, and sets start and end indexes
* in the RegularExpressionMatch instance accordingly.
*
* This method is thread safe when called with different
* RegularExpressionMatch instances.
*/
bool find(char const*, RegularExpressionMatch&) const;
/**
* Matches the regular expression to the given string.
* Returns true if found, and sets start and end indexes accordingly.
*/
bool find(char const*);
inline bool find(char const*);
/**
* Matches the regular expression to the given std string.
......@@ -224,14 +346,18 @@ public:
inline bool find(std::string const&);
/**
* Index to start of first find.
* Match indices
*/
inline RegularExpressionMatch const& regMatch() const;
inline std::string::size_type start() const;
inline std::string::size_type end() const;
inline std::string::size_type start(int n) const;
inline std::string::size_type end(int n) const;
/**
* Index to end of first find.
* Match strings
*/
inline std::string::size_type end() const;
inline std::string match(int n) const;
/**
* Copy the given regular expression.
......@@ -266,29 +392,14 @@ public:
*/
inline void set_invalid();
/**
* Destructor.
*/
// awf added
std::string::size_type start(int n) const;
std::string::size_type end(int n) const;
std::string match(int n) const;
enum
{
NSUBEXP = 10
};
private:
const char* startp[NSUBEXP];
const char* endp[NSUBEXP];
RegularExpressionMatch regmatch;
char regstart; // Internal use only
char reganch; // Internal use only
const char* regmust; // Internal use only
std::string::size_type regmlen; // Internal use only
char* program;
int progsize;
const char* searchstring;
};
/**
......@@ -344,51 +455,42 @@ inline bool RegularExpression::compile(std::string const& s)
* Matches the regular expression to the given std string.
* Returns true if found, and sets start and end indexes accordingly.
*/
inline bool RegularExpression::find(std::string const& s)
inline bool RegularExpression::find(const char* s)
{
return this->find(s.c_str());
return this->find(s, this->regmatch);
}
/**
* Set the start position for the regular expression.
* Matches the regular expression to the given std string.