#ifndef Common_DataModel_Grammar_Identifier_h
#define Common_DataModel_Grammar_Identifier_h
#include <vtk_pegtl.h>

// Not included to avoid a depdendency on ICU libraries.
// #include VTK_PEGTL(pegtl/contrib/icu/utf8.hpp)

#include VTK_PEGTL(pegtl/utf8.hpp)

namespace token
{

/// Import PEGTL's rules:
namespace rule = tao::pegtl;

/// Any code-point sequence that serves as a line terminator.
struct newline :
  rule::sor<
    rule::string<'\n'>,
    rule::string<'\r', '\n'>,
    rule::string<'\r'>,
    rule::string<'\f'>
  >
{
};

struct comment :
  rule::seq<
    rule::string<'/', '*'>,
    rule::until<rule::utf8::string<'*', '/'>>
  >
{
};

struct bad_comment :
  rule::seq<
    rule::string<'/', '*'>,
    rule::minus<rule::star<rule::utf8::any>, rule::utf8::string<'*', '/'>>,
    rule::eof
  >
{
};

/// This avoids a dependency on ICU libraries
struct utf8_whitespace_chars :
  rule::utf8::one<
    0x0009, // horizontal tab
    0x000A, // line feed
    0x000B, // vertical tab
    0x000C, // form feed
    0x000D, // carriage return
    0x0020, // space
    0x0085, // next line
    0x00A0, // non-breaking space (&nbsp;)
    0x1680, // Ogham space
    0x2000, // en quad
    0x2001, // em quad
    0x2002, // en space
    0x2003, // em space
    0x2004, // three-per em space
    0x2005, // four-per em space
    0x2006, // six-per em space
    0x2007, // figure space
    0x2008, // punctuation space
    0x2009, // thin space
    0x200A, // hair space
    0x2028, // line separator
    0x2029, // paragraph separator
    0x202F, // narrow no-break space
    0x205F, // medium math space
    0x3000  // ideographic space
  >
{
};


/// One or more whitespace code-points.
///
/// We also match comments as whitespace.
struct whitespace :
  rule::plus<
    rule::sor<
      // rule::utf8::icu::white_space, // avoid a dependency on ICU libraries
      token::utf8_whitespace_chars,
      token::comment,
      token::bad_comment,
      token::newline
    >
  >
{
};

/// Zero or more whitespace code-points (phrased as an optional token).
struct optional_whitespace :
  rule::star<
    token::whitespace
  >
{
};

struct non_ascii : rule::utf8::ranges<0xa0, 0x10ffff>
{
};

struct hex_number : rule::rep_min_max<1, 6, rule::utf8::ranges<'0', '9', 'a', 'f', 'A', 'F'>>
{
};

/// An escaped character.
///
/// Escapes begin with a backslash and are followed by a character-specifier.
/// Character-specifiers may be hex numbers that specify a unicode code-point
/// or another non-newline character that might otherwise be matched by the
/// tokenizer/parser. The latter is used, for example, as a way to include
/// string-terminators inside strings.
struct escape :
  rule::seq<
    rule::string<'\\'>,
    rule::sor<
      token::hex_number,
      rule::utf8::not_one<'\n', '\r', '\f'>
    >
  >
{
};

struct letters_digits :
  rule::utf8::ranges<'a', 'z', 'A', 'Z', '0', '9'>
{
};

/// The ending characters of an identifier.
struct ident_suffix :
  rule::star<
    rule::sor<
      token::escape,
      token::letters_digits,
      rule::string<'-'>,
      rule::string<'_'>,
      token::non_ascii
    >
  >
{
};

/// A CSS identifier.
///
/// Identifiers may be – depending on context – element/tag names,
/// pseudo-classes, property names, property values, etc.
struct ident :
  rule::seq<
    rule::sor<
      rule::string<'-','-'>,
      rule::seq<
        rule::opt<rule::string<'-'>>,
        rule::sor<
          token::escape,
          token::non_ascii,
          rule::utf8::ranges<'a', 'z', 'A', 'Z', '_'>
        >
      >
    >,
    token::ident_suffix
  >
{
};

} // namespace token

#endif // Common_DataModel_Grammar_Identifier_h
