Commit 987d98dd authored by David Gobbi's avatar David Gobbi
Browse files

Add evaluation of unicode char literals.

The wrapper's char literal evaluator did not recognize the 'u' and U'
escape codes or evaluate utf-8 encoded char constants.  Also, the
CPRE_IDGIT character type has been split into CPRE_XID (which allows
non-ASCII chars) and CPRE_XDIGIT (which is restricted to ASCII).
parent 2318e520
......@@ -5104,7 +5104,7 @@ const char *raw_string(const char *begin)
*dp++ = c;
c = input();
}
while (vtkParse_CharType(c, CPRE_IDGIT));
while (vtkParse_CharType(c, CPRE_XID));
}
unput(c);
......
......@@ -1171,7 +1171,7 @@ const char *raw_string(const char *begin)
*dp++ = c;
c = input();
}
while (vtkParse_CharType(c, CPRE_IDGIT));
while (vtkParse_CharType(c, CPRE_XID));
}
unput(c);
......
......@@ -1211,7 +1211,7 @@ void prepend_scope(char *cp, const char *arg)
n = strlen(arg);
i = m;
while (i > 0 &&
(vtkParse_CharType(cp[i-1], CPRE_IDGIT) ||
(vtkParse_CharType(cp[i-1], CPRE_XID) ||
cp[i-1] == ':' || cp[i-1] == '>'))
{
i--;
......@@ -8854,7 +8854,7 @@ yyuserAction (yyRuleNum yyn, size_t yyrhslen, yyGLRStackItem* yyvsp,
postSig(" ");
}
postSig((((yyGLRStackItem const *)yyvsp)[YYFILL (0)].yystate.yysemantics.yysval.str));
if (vtkParse_CharType(c1, (CPRE_IDGIT|CPRE_QUOTE)) ||
if (vtkParse_CharType(c1, (CPRE_XID|CPRE_QUOTE)) ||
c1 == ')' || c1 == ']')
{
postSig(" ");
......@@ -8903,7 +8903,7 @@ yyuserAction (yyRuleNum yyn, size_t yyrhslen, yyGLRStackItem* yyvsp,
cp = getSig();
l = getSigLength();
if (l != 0) { c1 = cp[l-1]; }
while (vtkParse_CharType(c1, CPRE_IDGIT) && l != 0)
while (vtkParse_CharType(c1, CPRE_XID) && l != 0)
{
--l;
c1 = cp[l-1];
......
......@@ -1172,7 +1172,7 @@ void prepend_scope(char *cp, const char *arg)
n = strlen(arg);
i = m;
while (i > 0 &&
(vtkParse_CharType(cp[i-1], CPRE_IDGIT) ||
(vtkParse_CharType(cp[i-1], CPRE_XID) ||
cp[i-1] == ':' || cp[i-1] == '>'))
{
i--;
......@@ -3185,7 +3185,7 @@ common_bracket_item_no_scope_operator:
postSig(" ");
}
postSig($<str>1);
if (vtkParse_CharType(c1, (CPRE_IDGIT|CPRE_QUOTE)) ||
if (vtkParse_CharType(c1, (CPRE_XID|CPRE_QUOTE)) ||
c1 == ')' || c1 == ']')
{
postSig(" ");
......@@ -3210,7 +3210,7 @@ common_bracket_item_no_scope_operator:
cp = getSig();
l = getSigLength();
if (l != 0) { c1 = cp[l-1]; }
while (vtkParse_CharType(c1, CPRE_IDGIT) && l != 0)
while (vtkParse_CharType(c1, CPRE_XID) && l != 0)
{
--l;
c1 = cp[l-1];
......
......@@ -779,7 +779,7 @@ size_t vtkParse_ValueInfoFromString(
pointer_bits = (pointer_bits << 2);
while (vtkParse_CharType(*cp, CPRE_HSPACE)) { cp++; }
if (strncmp(cp, "const", 5) == 0 &&
!vtkParse_CharType(cp[5], CPRE_IDGIT))
!vtkParse_CharType(cp[5], CPRE_XID))
{
cp += 5;
while (vtkParse_CharType(*cp, CPRE_HSPACE)) { cp++; }
......
......@@ -377,17 +377,26 @@ static int preproc_skip_parentheses(StringTokenizer *tokens)
return VTK_PARSE_SYNTAX_ERROR;
}
/** Evaluate a char literal to an integer value. */
static int preproc_evaluate_char(
const char *cp, preproc_int_t *val, int *is_unsigned)
{
if (cp[0] == '\'')
size_t i = 0;
preproc_int_t code = 0;
int typecode = 0;
if (cp[0] == 'u' || cp[0] == 'U' || cp[0] == 'L')
{
typecode = cp[0];
cp++;
}
if (*cp == '\'')
{
cp++;
if (*cp != '\\')
{
*val = *cp;
code = vtkParse_DecodeUtf8(&cp, NULL);
}
else if (*cp != '\'' && *cp != '\n' && *cp != '\0')
{
......@@ -405,14 +414,26 @@ static int preproc_evaluate_char(
else if (*cp == '\?') { *val = '\?'; cp++; }
else if (*cp >= '0' && *cp <= '7')
{
*val = (char)string_to_preproc_int(cp, 8);
do { cp++; } while (*cp >= '0' && *cp <= '7');
code = string_to_preproc_int(cp, 8);
do { cp++; i++; } while (i < 4 && *cp >= '0' && *cp <= '7');
}
else if (*cp == 'x')
{
*val = (char)string_to_preproc_int(cp+1, 16);
code = string_to_preproc_int(cp+1, 16);
do { cp++; } while (vtkParse_CharType(*cp, CPRE_HEX));
}
else if (*cp == 'u')
{
code = string_to_preproc_int(cp+1, 16);
do { cp++; i++; } while (i < 5 && vtkParse_CharType(*cp, CPRE_HEX));
if (i != 5) { cp -= i; }
}
else if (*cp == 'U')
{
code = string_to_preproc_int(cp+1, 16);
do { cp++; i++; } while (i < 9 && vtkParse_CharType(*cp, CPRE_HEX));
if (i != 9) { cp -= i; }
}
}
if (*cp != '\'')
{
......@@ -421,10 +442,21 @@ static int preproc_evaluate_char(
#endif
return VTK_PARSE_SYNTAX_ERROR;
}
if (typecode == 0)
{
*val = (char)code;
}
else if (typecode == 'L')
{
*val = (wchar_t)code;
}
else
{
*val = code;
}
*is_unsigned = 0;
return VTK_PARSE_OK;
}
#if PREPROC_DEBUG
fprintf(stderr, "syntax error %d\n", __LINE__);
#endif
......@@ -1551,7 +1583,7 @@ const char *preproc_find_include_file(
/* check for absolute path of form DRIVE: or /path/to/file */
j = 0;
while (vtkParse_CharType(filename[j], CPRE_IDGIT)) { j++; }
while (vtkParse_CharType(filename[j], CPRE_XID)) { j++; }
if (filename[j] == ':' || filename[0] == '/' || filename[0] == '\\')
{
......@@ -2066,13 +2098,13 @@ static int preproc_include_file(
((j > 2 &&
(line[j-3] == 'u' || line[j-2] == '8') &&
(j == 3 ||
!vtkParse_CharType(line[j-4], CPRE_IDGIT|CPRE_QUOTE))) ||
!vtkParse_CharType(line[j-4], CPRE_XID|CPRE_QUOTE))) ||
(j > 1 &&
(line[j-2] == 'u' || line[j-2] == 'U' || line[j-2] == 'L') &&
(j == 2 ||
!vtkParse_CharType(line[j-3], CPRE_IDGIT|CPRE_QUOTE))) ||
!vtkParse_CharType(line[j-3], CPRE_XID|CPRE_QUOTE))) ||
(j == 1 ||
!vtkParse_CharType(line[j-2], CPRE_IDGIT|CPRE_QUOTE))))
!vtkParse_CharType(line[j-2], CPRE_XID|CPRE_QUOTE))))
{
state = '(';
d = j + 1;
......
......@@ -50,33 +50,35 @@ unsigned char parse_charbits[256] = {
CPRE_DIGIT|CPRE_HEX, CPRE_DIGIT|CPRE_HEX,
CPRE_DIGIT|CPRE_HEX, /* 9 */
0, 0, 0, 0, 0, 0, 0, /* :;<=>?@ */
CPRE_ID|CPRE_HEX, /* A */
CPRE_ID|CPRE_HEX, CPRE_ID|CPRE_HEX, CPRE_ID|CPRE_HEX, /* BCD */
CPRE_ID|CPRE_HEX|CPRE_EXP, /* E */
CPRE_ID|CPRE_HEX, CPRE_ID, CPRE_ID, CPRE_ID, /* FGHI */
CPRE_ID|CPRE_HEX, CPRE_ID|CPRE_HEX, CPRE_ID|CPRE_HEX, /* ABC */
CPRE_ID|CPRE_HEX, CPRE_ID|CPRE_HEX, CPRE_ID|CPRE_HEX, /* DEF */
CPRE_ID, CPRE_ID, CPRE_ID, /* GHI */
CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, /* JKLM */
CPRE_ID, CPRE_ID, CPRE_ID|CPRE_EXP, CPRE_ID, /* NOPQ */
CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, /* NOPQ */
CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, /* RSTU */
CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, /* VWXY */
CPRE_ID, /* Z */
0, 0, 0, 0, /* [\\]^ */
CPRE_ID, /* _ */
0, /* ` */
CPRE_ID|CPRE_HEX, /* a */
CPRE_ID|CPRE_HEX, CPRE_ID|CPRE_HEX, CPRE_ID|CPRE_HEX, /* bcd */
CPRE_ID|CPRE_HEX|CPRE_EXP, /* e */
CPRE_ID|CPRE_HEX, CPRE_ID, CPRE_ID, CPRE_ID, /* fghi */
CPRE_ID|CPRE_HEX, CPRE_ID|CPRE_HEX, CPRE_ID|CPRE_HEX, /* abc */
CPRE_ID|CPRE_HEX, CPRE_ID|CPRE_HEX, CPRE_ID|CPRE_HEX, /* def */
CPRE_ID, CPRE_ID, CPRE_ID, /* ghi */
CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, /* jklm */
CPRE_ID, CPRE_ID, CPRE_ID|CPRE_EXP, CPRE_ID, /* nopq */
CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, /* nopq */
CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, /* rstu */
CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, /* vwxy */
CPRE_ID, /* z */
0, 0, 0, 0, /* {|}~ */
0, /* '\x7f' */
CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID,
CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID,
CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID,
CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID, CPRE_ID,
CPRE_EXTEND, CPRE_EXTEND, CPRE_EXTEND, CPRE_EXTEND,
CPRE_EXTEND, CPRE_EXTEND, CPRE_EXTEND, CPRE_EXTEND,
CPRE_EXTEND, CPRE_EXTEND, CPRE_EXTEND, CPRE_EXTEND,
CPRE_EXTEND, CPRE_EXTEND, CPRE_EXTEND, CPRE_EXTEND,
CPRE_EXTEND, CPRE_EXTEND, CPRE_EXTEND, CPRE_EXTEND,
CPRE_EXTEND, CPRE_EXTEND, CPRE_EXTEND, CPRE_EXTEND,
CPRE_EXTEND, CPRE_EXTEND, CPRE_EXTEND, CPRE_EXTEND,
CPRE_EXTEND, CPRE_EXTEND, CPRE_EXTEND, CPRE_EXTEND,
};
#define parse_chartype(c, bits) \
......@@ -203,13 +205,12 @@ size_t vtkParse_SkipNumber(const char *text)
do
{
char c = *cp++;
if (parse_chartype(c, CPRE_EXP) &&
parse_chartype(*cp, CPRE_SIGN))
if (parse_chartype(*cp, CPRE_SIGN) && (c == 'e' || c == 'E'))
{
cp++;
}
}
while (parse_chartype(*cp, CPRE_IDGIT) || *cp == '.');
while (parse_chartype(*cp, CPRE_XID) || *cp == '.');
}
return cp - text;
......@@ -226,7 +227,7 @@ size_t vtkParse_SkipId(const char *text)
{
cp++;
}
while (parse_chartype(*cp, CPRE_IDGIT));
while (parse_chartype(*cp, CPRE_XID));
}
return cp - text;
......@@ -236,7 +237,7 @@ size_t vtkParse_SkipId(const char *text)
#define parse_hash_name(cp, h) \
h = 5381; \
do { h = (h << 5) + h + (unsigned char)*cp++; } \
while (parse_chartype(*cp, CPRE_IDGIT));
while (parse_chartype(*cp, CPRE_XID));
unsigned int vtkParse_HashId(const char *cp)
{
......@@ -250,7 +251,77 @@ unsigned int vtkParse_HashId(const char *cp)
return h;
}
/** Skip a string or */
/** Decode a single unicode character from utf8, but if utf8 decoding
* fails, assume assume ISO-8859 and return the first octet. */
unsigned int vtkParse_DecodeUtf8(const char **cpp, int *error_flag)
{
const unsigned char *cp = (const unsigned char *)(*cpp);
unsigned int code = *cp++;
unsigned int s = 0;
int good = 1;
if ((code & 0x80) != 0)
{
good = 0;
if ((code & 0xE0) == 0xC0)
{
code &= 0x1F;
code <<= 6;
s = *cp;
good = ((s & 0xC0) == 0x80);
cp += good;
code |= (s & 0x3F);
}
else if ((code & 0xF0) == 0xE0)
{
code &= 0x0F;
code <<= 6;
s = *cp;
good = ((s & 0xC0) == 0x80);
cp += good;
code |= (s & 0x3F);
code <<= 6;
s = *cp;
good = ((s & 0xC0) == 0x80);
cp += good;
code |= (s & 0x3F);
}
else if ((code & 0xF8) == 0xF0)
{
code &= 0x07;
code <<= 6;
s = *cp;
good = ((s & 0xC0) == 0x80);
cp += good;
code |= (s & 0x3F);
code <<= 6;
s = *cp;
good = ((s & 0xC0) == 0x80);
cp += good;
code |= (s & 0x3F);
code <<= 6;
s = *cp;
good = ((s & 0xC0) == 0x80);
cp += good;
code |= (s & 0x3F);
}
if (!good)
{
cp = (const unsigned char *)(*cpp);
code = *cp++;
}
}
if (error_flag)
{
*error_flag = !good;
}
*cpp = (const char *)(cp);
return code;
}
/** Skip a string or char literal */
size_t parse_skip_quotes_with_suffix(const char *cp)
{
size_t l = vtkParse_SkipQuotes(cp);
......
......@@ -54,11 +54,13 @@ extern "C" {
*/
typedef enum _parse_char_type
{
CPRE_ID = 0x01, /* A-Z a-z and _ */
CPRE_NONDIGIT = 0x01, /* A-Z a-z and _ */
CPRE_DIGIT = 0x02, /* 0-9 */
CPRE_IDGIT = 0x03, /* 0-9 A-Z a-z and _ */
CPRE_HEX = 0x04, /* 0-9A-Fa-f */
CPRE_EXP = 0x08, /* EPep (exponents for floats) */
CPRE_XDIGIT = 0x03, /* 0-9 A-Z a-z and _ */
CPRE_EXTEND = 0x04, /* non-ascii character */
CPRE_ID = 0x05, /* starting char for identifier */
CPRE_XID = 0x07, /* continuing char for identifier */
CPRE_HEX = 0x08, /* 0-9 A-F a-f hexadecimal digits */
CPRE_SIGN = 0x10, /* +- (sign for floats) */
CPRE_QUOTE = 0x20, /* " and ' */
CPRE_HSPACE = 0x40, /* space, tab, carriage return */
......@@ -199,6 +201,12 @@ size_t vtkParse_SkipId(const char *cp);
*/
unsigned int vtkParse_HashId(const char *cp);
/**
* Decode a single unicode character from utf8, or set error flag to 1.
* The character pointer will be advanced by one if an error occurred,
* and the return value will be the value of the first octet.
*/
unsigned int vtkParse_DecodeUtf8(const char **cpp, int *error_flag);
/**
* StringCache provides a simple way of allocating strings centrally.
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment