Commit cff58f07 authored by Sebastian Holtermann's avatar Sebastian Holtermann
Browse files

RegularExpression: New RegularExpressionMatch class

The new RegularExpressionMatch allows to keep multiple match
results of a RegularExpression.

The startp and endp pointers of RegularExpression are replaced
by an instance of RegularExpressionMatch.

There also is new thread safe version of RegularExpression::find()
which allows multiple threads to concurrently use the same
RegularExpression on different input strings with independent
RegularExpressionMatch results.
parent bbc94ba8
......@@ -45,9 +45,9 @@ RegularExpression::RegularExpression(const RegularExpression& rxp)
this->program = new char[this->progsize]; // Allocate storage
for (ind = this->progsize; ind-- != 0;) // Copy regular expresion
this->program[ind] = rxp.program[ind];
this->startp[0] = rxp.startp[0]; // Copy pointers into last
this->endp[0] = rxp.endp[0]; // Successful "find" operation
this->regmust = rxp.regmust; // Copy field
// Copy pointers into last successful "find" operation
this->regmatch = rxp.regmatch;
this->regmust = rxp.regmust; // Copy field
if (rxp.regmust != 0) {
char* dum = rxp.program;
ind = 0;
......@@ -78,9 +78,9 @@ RegularExpression& RegularExpression::operator=(const RegularExpression& rxp)
this->program = new char[this->progsize]; // Allocate storage
for (ind = this->progsize; ind-- != 0;) // Copy regular expresion
this->program[ind] = rxp.program[ind];
this->startp[0] = rxp.startp[0]; // Copy pointers into last
this->endp[0] = rxp.endp[0]; // Successful "find" operation
this->regmust = rxp.regmust; // Copy field
// Copy pointers into last successful "find" operation
this->regmatch = rxp.regmatch;
this->regmust = rxp.regmust; // Copy field
if (rxp.regmust != 0) {
char* dum = rxp.program;
ind = 0;
......@@ -123,8 +123,9 @@ bool RegularExpression::deep_equal(const RegularExpression& rxp) const
while (ind-- != 0) // Else while still characters
if (this->program[ind] != rxp.program[ind]) // If regexp are different
return false; // Return failure
return (this->startp[0] == rxp.startp[0] && // Else if same start/end ptrs,
this->endp[0] == rxp.endp[0]); // Return true
// Else if same start/end ptrs, return true
return (this->regmatch.start() == rxp.regmatch.start() &&
this->regmatch.end() == rxp.regmatch.end());
}
// The remaining code in this file is derived from the regular expression code
......@@ -351,7 +352,7 @@ bool RegularExpression::compile(const char* exp)
printf("RegularExpression::compile(): Error in compile.\n");
return false;
}
this->startp[0] = this->endp[0] = this->searchstring = 0;
this->regmatch.clear();
// Small enough for pointer-storage convention?
if (comp.regsize >= 32767L) { // Probably could be 65535L.
......@@ -440,7 +441,7 @@ char* RegExpCompile::reg(int paren, int* flagp)
// Make an OPEN node, if parenthesized.
if (paren) {
if (regnpar >= RegularExpression::NSUBEXP) {
if (regnpar >= RegularExpressionMatch::NSUBEXP) {
// RAISE Error, SYM(RegularExpression), SYM(Too_Many_Parens),
printf("RegularExpression::compile(): Too many parentheses.\n");
return 0;
......@@ -852,12 +853,13 @@ public:
// find -- Matches the regular expression to the given string.
// Returns true if found, and sets start and end indexes accordingly.
bool RegularExpression::find(const char* string)
bool RegularExpression::find(char const* string,
RegularExpressionMatch& rmatch) const
{
const char* s;
this->searchstring = string;
rmatch.clear();
rmatch.searchstring = string;
if (!this->program) {
return false;
......@@ -868,7 +870,7 @@ bool RegularExpression::find(const char* string)
// RAISE Error, SYM(RegularExpression), SYM(Internal_Error),
printf(
"RegularExpression::find(): Compiled regular expression corrupted.\n");
return 0;
return false;
}
// If there is a "must appear" string, look for it.
......@@ -880,7 +882,7 @@ bool RegularExpression::find(const char* string)
s++;
}
if (s == 0) // Not present.
return (0);
return false;
}
RegExpFind regFind;
......@@ -890,27 +892,27 @@ bool RegularExpression::find(const char* string)
// Simplest case: anchored match need be tried only once.
if (this->reganch)
return (regFind.regtry(string, this->startp, this->endp, this->program) !=
0);
return (
regFind.regtry(string, rmatch.startp, rmatch.endp, this->program) != 0);
// Messy cases: unanchored match.
s = string;
if (this->regstart != '\0')
// We know what char it must start with.
while ((s = strchr(s, this->regstart)) != 0) {
if (regFind.regtry(s, this->startp, this->endp, this->program))
return (1);
if (regFind.regtry(s, rmatch.startp, rmatch.endp, this->program))
return false;
s++;
}
else
// We don't -- general case.
do {
if (regFind.regtry(s, this->startp, this->endp, this->program))
return (1);
if (regFind.regtry(s, rmatch.startp, rmatch.endp, this->program))
return true;
} while (*s++ != '\0');
// Failure.
return (0);
return false;
}
/*
......@@ -930,7 +932,7 @@ int RegExpFind::regtry(const char* string, const char** start,
sp1 = start;
ep = end;
for (i = RegularExpression::NSUBEXP; i > 0; i--) {
for (i = RegularExpressionMatch::NSUBEXP; i > 0; i--) {
*sp1++ = 0;
*ep++ = 0;
}
......
......@@ -34,6 +34,115 @@
namespace @KWSYS_NAMESPACE@ {
// Forward declaration
class RegularExpression;
/** \class RegularExpressionMatch
* \brief Stores the pattern matches of a RegularExpression
*/
class @KWSYS_NAMESPACE@_EXPORT RegularExpressionMatch
{
public:
RegularExpressionMatch();
bool isValid() const;
void clear();
std::string::size_type start() const;
std::string::size_type end() const;
std::string::size_type start(int n) const;
std::string::size_type end(int n) const;
std::string match(int n) const;
enum
{
NSUBEXP = 10
};
private:
friend class RegularExpression;
const char* startp[NSUBEXP];
const char* endp[NSUBEXP];
const char* searchstring;
};
/**
* \brief Creates an invalid match object
*/
inline RegularExpressionMatch::RegularExpressionMatch()
{
startp[0] = 0;
endp[0] = 0;
searchstring = 0;
}
/**
* \brief Returns true if the match pointers are valid
*/
inline bool RegularExpressionMatch::isValid() const
{
return (this->startp[0] != 0);
}
/**
* \brief Resets to the (invalid) construction state.
*/
inline void RegularExpressionMatch::clear()
{
startp[0] = 0;
endp[0] = 0;
searchstring = 0;
}
/**
* \brief Returns the start index of the full match.
*/
inline std::string::size_type RegularExpressionMatch::start() const
{
return static_cast<std::string::size_type>(this->startp[0] - searchstring);
}
/**
* \brief Returns the end index of the full match.
*/
inline std::string::size_type RegularExpressionMatch::end() const
{
return static_cast<std::string::size_type>(this->endp[0] - searchstring);
}
/**
* \brief Returns the start index of nth submatch.
* start(0) is the start of the full match.
*/
inline std::string::size_type RegularExpressionMatch::start(int n) const
{
return static_cast<std::string::size_type>(this->startp[n] -
this->searchstring);
}
/**
* \brief Returns the end index of nth submatch.
* end(0) is the end of the full match.
*/
inline std::string::size_type RegularExpressionMatch::end(int n) const
{
return static_cast<std::string::size_type>(this->endp[n] -
this->searchstring);
}
/**
* \brief Returns the nth submatch as a string.
*/
inline std::string RegularExpressionMatch::match(int n) const
{
if (this->startp[n] == 0) {
return std::string();
} else {
return std::string(this->startp[n], static_cast<std::string::size_type>(
this->endp[n] - this->startp[n]));
}
}
/** \class RegularExpression
* \brief Implements pattern matching with regular expressions.
*
......@@ -170,6 +279,9 @@ namespace @KWSYS_NAMESPACE@ {
* the same as the two characters before the first p encounterd in
* the line. It would match "drepa qrepb" in "rep drepa qrepb".
*
* All methods of RegularExpression can be called simultaneously from
* different threads but only if each invocation uses an own instance of
* RegularExpression.
*/
class @KWSYS_NAMESPACE@_EXPORT RegularExpression
{
......@@ -211,11 +323,21 @@ public:
*/
inline bool compile(std::string const&);
/**
* Matches the regular expression to the given string.
* Returns true if found, and sets start and end indexes
* in the RegularExpressionMatch instance accordingly.
*
* This method is thread safe when called with different
* RegularExpressionMatch instances.
*/
bool find(char const*, RegularExpressionMatch&) const;
/**
* Matches the regular expression to the given string.
* Returns true if found, and sets start and end indexes accordingly.
*/
bool find(char const*);
inline bool find(char const*);
/**
* Matches the regular expression to the given std string.
......@@ -224,14 +346,18 @@ public:
inline bool find(std::string const&);
/**
* Index to start of first find.
* Match indices
*/
inline RegularExpressionMatch const& regMatch() const;
inline std::string::size_type start() const;
inline std::string::size_type end() const;
inline std::string::size_type start(int n) const;
inline std::string::size_type end(int n) const;
/**
* Index to end of first find.
* Match strings
*/
inline std::string::size_type end() const;
inline std::string match(int n) const;
/**
* Copy the given regular expression.
......@@ -266,29 +392,14 @@ public:
*/
inline void set_invalid();
/**
* Destructor.
*/
// awf added
std::string::size_type start(int n) const;
std::string::size_type end(int n) const;
std::string match(int n) const;
enum
{
NSUBEXP = 10
};
private:
const char* startp[NSUBEXP];
const char* endp[NSUBEXP];
RegularExpressionMatch regmatch;
char regstart; // Internal use only
char reganch; // Internal use only
const char* regmust; // Internal use only
std::string::size_type regmlen; // Internal use only
char* program;
int progsize;
const char* searchstring;
};
/**
......@@ -344,51 +455,42 @@ inline bool RegularExpression::compile(std::string const& s)
* Matches the regular expression to the given std string.
* Returns true if found, and sets start and end indexes accordingly.
*/
inline bool RegularExpression::find(std::string const& s)
inline bool RegularExpression::find(const char* s)
{
return this->find(s.c_str());
return this->find(s, this->regmatch);
}
/**
* Set the start position for the regular expression.
* Matches the regular expression to the given std string.
* Returns true if found, and sets start and end indexes accordingly.
*/
inline std::string::size_type RegularExpression::start() const
inline bool RegularExpression::find(std::string const& s)
{
return static_cast<std::string::size_type>(this->startp[0] - searchstring);
return this->find(s.c_str());
}
/**
* Returns the start/end index of the last item found.
* Returns the internal match object
*/
inline std::string::size_type RegularExpression::end() const
inline RegularExpressionMatch const& RegularExpression::regMatch() const
{
return static_cast<std::string::size_type>(this->endp[0] - searchstring);
return this->regmatch;
}
/**
* Returns true if two regular expressions have different
* compiled program for pattern matching.
* Returns the start index of the full match.
*/
inline bool RegularExpression::operator!=(const RegularExpression& r) const
inline std::string::size_type RegularExpression::start() const
{
return (!(*this == r));
return regmatch.start();
}
/**
* Returns true if a valid regular expression is compiled
* and ready for pattern matching.
* Returns the end index of the full match.
*/
inline bool RegularExpression::is_valid() const
{
return (this->program != 0);
}
inline void RegularExpression::set_invalid()
inline std::string::size_type RegularExpression::end() const
{
//#ifndef _WIN32
delete[] this->program;
//#endif
this->program = 0;
return regmatch.end();
}
/**
......@@ -396,7 +498,7 @@ inline void RegularExpression::set_invalid()
*/
inline std::string::size_type RegularExpression::start(int n) const
{
return static_cast<std::string::size_type>(this->startp[n] - searchstring);
return regmatch.start(n);
}
/**
......@@ -404,7 +506,7 @@ inline std::string::size_type RegularExpression::start(int n) const
*/
inline std::string::size_type RegularExpression::end(int n) const
{
return static_cast<std::string::size_type>(this->endp[n] - searchstring);
return regmatch.end(n);
}
/**
......@@ -412,12 +514,33 @@ inline std::string::size_type RegularExpression::end(int n) const
*/
inline std::string RegularExpression::match(int n) const
{
if (this->startp[n] == 0) {
return std::string("");
} else {
return std::string(this->startp[n], static_cast<std::string::size_type>(
this->endp[n] - this->startp[n]));
}
return regmatch.match(n);
}
/**
* Returns true if two regular expressions have different
* compiled program for pattern matching.
*/
inline bool RegularExpression::operator!=(const RegularExpression& r) const
{
return (!(*this == r));
}
/**
* Returns true if a valid regular expression is compiled
* and ready for pattern matching.
*/
inline bool RegularExpression::is_valid() const
{
return (this->program != 0);
}
inline void RegularExpression::set_invalid()
{
//#ifndef _WIN32
delete[] this->program;
//#endif
this->program = 0;
}
} // namespace @KWSYS_NAMESPACE@
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment