[nasm:master] preproc: when parsing a # marker, use C-style string unquoting
nasm-bot for H. Peter Anvin (Intel)
hpa at zytor.com
Mon Jul 13 14:15:05 PDT 2020
Commit-ID: 1d151a8558f1ba7ef971a3b5af960db0031a0383
Gitweb: http://repo.or.cz/w/nasm.git?a=commitdiff;h=1d151a8558f1ba7ef971a3b5af960db0031a0383
Author: H. Peter Anvin (Intel) <hpa at zytor.com>
AuthorDate: Mon, 13 Jul 2020 14:10:16 -0700
Committer: H. Peter Anvin (Intel) <hpa at zytor.com>
CommitDate: Mon, 13 Jul 2020 14:14:28 -0700
preproc: when parsing a # marker, use C-style string unquoting
To handle escape codes in filename strings after # markers correctly,
we need nasm_unquote() to be aware that it is using C escapes;
otherwise things like "foo`bar" will break.
Signed-off-by: H. Peter Anvin (Intel) <hpa at zytor.com>
---
asm/preproc.c | 71 ++++++++++++++++++++++++-------------------------------
asm/quote.c | 75 ++++++++++++++++++++++++++++++++---------------------------
asm/quote.h | 15 ++++++++++++
3 files changed, 87 insertions(+), 74 deletions(-)
diff --git a/asm/preproc.c b/asm/preproc.c
index 6a71ad00..0dfde9a5 100644
--- a/asm/preproc.c
+++ b/asm/preproc.c
@@ -721,30 +721,37 @@ static inline bool tok_isnt(const Token *x, char c)
* Unquote a token if it is a string, and set its type to
* TOK_INTERNAL_STRING.
*/
-static const char *unquote_token(Token *t)
+
+/*
+ * Common version for any kind of quoted string; see asm/quote.c for
+ * information about the arguments.
+ */
+static const char *unquote_token_anystr(Token *t, uint32_t badctl, char qstart)
{
+ size_t nlen, olen;
+ char *p;
+
if (t->type != TOK_STRING)
return tok_text(t);
+ olen = t->len;
+ p = (olen > INLINE_TEXT) ? t->text.p.ptr : t->text.a;
+ t->len = nlen = nasm_unquote_anystr(p, NULL, badctl, qstart);
t->type = TOK_INTERNAL_STRING;
- if (t->len > INLINE_TEXT) {
- char *p = t->text.p.ptr;
+ if (olen <= INLINE_TEXT || nlen > INLINE_TEXT)
+ return p;
- t->len = nasm_unquote(p, NULL);
+ nasm_zero(t->text.a);
+ memcpy(t->text.a, p, nlen);
+ nasm_free(p);
+ return t->text.a;
+}
- if (t->len <= INLINE_TEXT) {
- nasm_zero(t->text.a);
- memcpy(t->text.a, p, t->len);
- nasm_free(p);
- return t->text.a;
- } else {
- return p;
- }
- } else {
- t->len = nasm_unquote(t->text.a, NULL);
- return t->text.a;
- }
+/* Unquote any string, can produce any arbitrary binary output */
+static const char *unquote_token(Token *t)
+{
+ return unquote_token_anystr(t, 0, STR_NASM);
}
/*
@@ -753,28 +760,7 @@ static const char *unquote_token(Token *t)
*/
static const char *unquote_token_cstr(Token *t)
{
- if (t->type != TOK_STRING)
- return tok_text(t);
-
- t->type = TOK_INTERNAL_STRING;
-
- if (t->len > INLINE_TEXT) {
- char *p = t->text.p.ptr;
-
- t->len = nasm_unquote_cstr(p, NULL);
-
- if (t->len <= INLINE_TEXT) {
- nasm_zero(t->text.a);
- memcpy(t->text.a, p, t->len);
- nasm_free(p);
- return t->text.a;
- } else {
- return p;
- }
- } else {
- t->len = nasm_unquote_cstr(t->text.a, NULL);
- return t->text.a;
- }
+ return unquote_token_anystr(t, BADCTL, STR_NASM);
}
/*
@@ -3389,14 +3375,19 @@ static int line_directive(Token *origline, Token *tline)
tline = skip_white(tline);
if (tline) {
if (tline->type == TOK_STRING) {
+ const char *fname;
/*
* If this is a quoted string, ignore anything after
* it; this allows for compatiblity with gcc's
* additional flags options.
*/
- src_set_fname(unquote_token(tline));
+
+ fname = unquote_token_anystr(tline, BADCTL,
+ dname[0] == '#' ? STR_C : STR_NASM);
+ src_set_fname(fname);
} else {
- char *fname = detoken(tline, false);
+ char *fname;
+ fname = detoken(tline, false);
src_set_fname(fname);
nasm_free(fname);
}
diff --git a/asm/quote.c b/asm/quote.c
index 58bb5a10..301abed7 100644
--- a/asm/quote.c
+++ b/asm/quote.c
@@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
*
- * Copyright 1996-2019 The NASM Authors - All Rights Reserved
+ * Copyright 1996-2020 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
@@ -291,10 +291,17 @@ char *nasm_quote_cstr(const char *str, size_t *lenp)
* corresponding to bits set in badctl; in that case, the output
* string, but not *ep, is truncated before the first invalid
* character.
+ *
+ * badctl is a bitmask of control characters (0-31) which are forbidden
+ * from appearing in the final output.
+ *
+ * The qstart character can be either '`' (NASM style) or '\"' (C style),
+ * to indicate the lead marker of a quoted string. If it is '\"', then
+ * '`' is not a special character at all.
*/
-static size_t nasm_unquote_common(char *str, char **ep,
- const uint32_t badctl)
+size_t nasm_unquote_anystr(char *str, char **ep, const uint32_t badctl,
+ const char qstart)
{
unsigned char bq;
const unsigned char *p;
@@ -319,15 +326,7 @@ static size_t nasm_unquote_common(char *str, char **ep,
if (!bq)
return 0;
- switch (bq) {
- case '\'':
- case '\"':
- /* '...' or "..." string */
- while ((c = *p++) && (c != bq))
- EMIT(c);
- break;
-
- case '`':
+ if (bq == (unsigned char)qstart) {
/* `...` string */
state = st_start;
@@ -335,18 +334,13 @@ static size_t nasm_unquote_common(char *str, char **ep,
c = *p++;
switch (state) {
case st_start:
- switch (c) {
- case '\\':
+ if (c == '\\') {
state = st_backslash;
- break;
- case '`':
- case '\0':
+ } else if ((c == '\0') | (c == bq)) {
state = st_done;
- break;
- default:
+ } else {
EMIT(c);
- break;
- }
+ }
break;
case st_backslash:
@@ -450,14 +444,19 @@ static size_t nasm_unquote_common(char *str, char **ep,
default:
panic();
}
- }
- break;
-
- default:
+ }
+ } else if (bq == '\'' || bq == '\"') {
+ /*
+ * '...' or "..." string, NASM legacy style (no escapes of
+ * * any kind, including collapsing double quote marks.)
+ * We obviously can't get here if qstart == '\"'.
+ */
+ while ((c = *p++) && (c != bq))
+ EMIT(c);
+ } else {
/* Not a quoted string, just return the input... */
while ((c = *p++))
EMIT(c);
- break;
}
/* Zero-terminate the output */
@@ -472,24 +471,30 @@ static size_t nasm_unquote_common(char *str, char **ep,
}
#undef EMIT
+/*
+ * Unquote any arbitrary string; may produce any bytes, including embedded
+ * control- and NUL characters.
+ */
size_t nasm_unquote(char *str, char **ep)
{
- return nasm_unquote_common(str, ep, 0);
+ return nasm_unquote_anystr(str, ep, 0, STR_NASM);
}
+
+/*
+ * Unquote a string indended to be used as a C string; most control
+ * characters are rejected, including whitespace characters that
+ * would imply line endings and so on.
+ */
size_t nasm_unquote_cstr(char *str, char **ep)
{
- /*
- * These are the only control characters permitted: BEL BS TAB ESC
- */
- const uint32_t okctl = (1 << '\a') | (1 << '\b') | (1 << '\t') | (1 << 27);
-
- return nasm_unquote_common(str, ep, ~okctl);
+ return nasm_unquote_anystr(str, ep, BADCTL, STR_NASM);
}
/*
* Find the end of a quoted string; returns the pointer to the terminating
* character (either the ending quote or the null character, if unterminated.)
* If the input is not a quoted string, return NULL.
+ * This applies to NASM style strings only.
*/
char *nasm_skip_string(const char *str)
{
@@ -537,7 +542,9 @@ char *nasm_skip_string(const char *str)
* Note: for the purpose of finding the end of the string,
* all successor states to st_backslash are functionally
* equivalent to st_start, since either a backslash or
- * a backquote will force a return to the st_start state.
+ * a backquote will force a return to the st_start state,
+ * and any possible multi-character state will terminate
+ * for any non-alphanumeric character.
*/
state = c ? st_start : st_done;
break;
diff --git a/asm/quote.h b/asm/quote.h
index 7259f7cd..d8226cdb 100644
--- a/asm/quote.h
+++ b/asm/quote.h
@@ -38,9 +38,24 @@
char *nasm_quote(const char *str, size_t *len);
char *nasm_quote_cstr(const char *str, size_t *len);
+size_t nasm_unquote_anystr(char *str, char **endptr,
+ uint32_t badctl, char qstart);
size_t nasm_unquote(char *str, char **endptr);
size_t nasm_unquote_cstr(char *str, char **endptr);
char *nasm_skip_string(const char *str);
+/* Arguments used with nasm_quote_anystr() */
+
+/*
+ * These are the only control characters when we produce a C string:
+ * BEL BS TAB ESC
+ */
+#define OKCTL ((1U << '\a') | (1U << '\b') | (1U << '\t') | (1U << 27))
+#define BADCTL (~(uint32_t)OKCTL)
+
+/* Initial quotation mark */
+#define STR_C '\"'
+#define STR_NASM '`'
+
#endif /* NASM_QUOTE_H */
More information about the Nasm-commits
mailing list