[nasm:nasm-2.16.xx] BR 3392906: error out on bad syntax "db 1 2"

nasm-bot for H. Peter Anvin hpa at zytor.com
Wed Oct 11 12:09:04 PDT 2023


Commit-ID:  73676357de6bf4028a9ccfb0bd443ed57935870c
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=73676357de6bf4028a9ccfb0bd443ed57935870c
Author:     H. Peter Anvin <hpa at zytor.com>
AuthorDate: Wed, 11 Oct 2023 12:01:22 -0700
Committer:  H. Peter Anvin <hpa at zytor.com>
CommitDate: Wed, 11 Oct 2023 12:06:58 -0700

BR 3392906: error out on bad syntax "db 1 2"

NASM would try to "eat the comma token" in db expressions, even for
cases where the token was not a comma. Fix that and error out
properly.

To give better error messages, track where in the input string a token
starts or ends. This information is only valid as long as the input
string is kept, but that is just fine for error messages during
parsing.

Reported-by: Peter Cordes <pcordes at gmail.com>
Signed-off-by: H. Peter Anvin <hpa at zytor.com>


---
 asm/parser.c                 | 60 +++++++++++++++++++++++++++++++-------------
 asm/stdscan.c                | 24 +++++++++++++++---
 include/nasm.h               |  2 ++
 test/baddb.asm               |  5 ++++
 travis/test/utf-error.stderr | 12 ++++-----
 5 files changed, 76 insertions(+), 27 deletions(-)

diff --git a/asm/parser.c b/asm/parser.c
index 6b19ffac..26673196 100644
--- a/asm/parser.c
+++ b/asm/parser.c
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
  *
- *   Copyright 1996-2020 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2023 The NASM Authors - All Rights Reserved
  *   See the file AUTHORS included with the NASM distribution for
  *   the specific copyright holders.
  *
@@ -55,6 +55,21 @@ static int end_expression_next(void);
 
 static struct tokenval tokval;
 
+/*
+ * Human-readable description of a token, intended for error messages.
+ * The resulting string needs to be freed.
+ */
+static char *tokstr(const struct tokenval *tok)
+{
+    if (tok->t_type == TOKEN_EOS) {
+        return nasm_strdup("end of line");
+    } else if (tok->t_len) {
+        return nasm_asprintf("`%.*s'", tok->t_len, tok->t_start);
+    } else {
+        return nasm_strdup("invalid token");
+    }
+}
+
 static void process_size_override(insn *result, operand *op)
 {
     if (tasm_compatible_mode) {
@@ -384,6 +399,7 @@ static int parse_eops(extop **result, bool critical, int elem)
 
     /* End of string is obvious; ) ends a sub-expression list e.g. DUP */
     for (i = tokval.t_type; i != TOKEN_EOS; i = stdscan(NULL, &tokval)) {
+        bool skip;
         char endparen = ')';   /* Is a right paren the end of list? */
 
         if (i == ')')
@@ -397,12 +413,9 @@ static int parse_eops(extop **result, bool critical, int elem)
         }
         sign = +1;
 
-        /*
-         * end_expression_next() here is to distinguish this from
-         * a string used as part of an expression...
-         */
         if (i == TOKEN_QMARK) {
             eop->type = EOT_DB_RESERVE;
+            skip = true;
         } else if (do_subexpr && i == '(') {
             extop *subexpr;
 
@@ -432,12 +445,13 @@ static int parse_eops(extop **result, bool critical, int elem)
 
             /* We should have ended on a closing paren */
             if (tokval.t_type != ')') {
-                nasm_nonfatal("expected `)' after subexpression, got `%s'",
-                              i == TOKEN_EOS ?
-                              "end of line" : tokval.t_charptr);
+                char *tp = tokstr(&tokval);
+                nasm_nonfatal("expected `)' after subexpression, got %s", tp);
+                nasm_free(tp);
                 goto fail;
             }
             endparen = 0;       /* This time the paren is not the end */
+            skip = true;
         } else if (i == '%') {
             /* %(expression_list) */
             do_subexpr = true;
@@ -448,9 +462,14 @@ static int parse_eops(extop **result, bool critical, int elem)
             do_subexpr = true;
             continue;
         } else if (i == TOKEN_STR && end_expression_next()) {
+            /*
+             * end_expression_next() is to distinguish this from
+             * a string used as part of an expression...
+             */
             eop->type            = EOT_DB_STRING;
             eop->val.string.data = tokval.t_charptr;
             eop->val.string.len  = tokval.t_inttwo;
+            skip = true;
         } else if (i == TOKEN_STRFUNC) {
             bool parens = false;
             const char *funcname = tokval.t_charptr;
@@ -463,8 +482,10 @@ static int parse_eops(extop **result, bool critical, int elem)
                 i = stdscan(NULL, &tokval);
             }
             if (i != TOKEN_STR) {
-                nasm_nonfatal("%s must be followed by a string constant",
-                              funcname);
+                char *tp = tokstr(&tokval);
+                nasm_nonfatal("%s must be followed by a string constant, got %s",
+                              funcname, tp);
+                nasm_free(tp);
                 eop->type = EOT_NOTHING;
             } else {
                 eop->type = EOT_DB_STRING_FREE;
@@ -481,6 +502,7 @@ static int parse_eops(extop **result, bool critical, int elem)
                 if (i != ')')
                     nasm_nonfatal("unterminated %s function", funcname);
             }
+            skip = i != ',';
         } else if (i == '-' || i == '+') {
             char *save = stdscan_get();
             struct tokenval tmptok;
@@ -522,6 +544,7 @@ static int parse_eops(extop **result, bool critical, int elem)
             }
             if (!eop->val.string.len)
                 eop->type = EOT_NOTHING;
+            skip = true;
         } else {
             /* anything else, assume it is an expression */
             expr *value;
@@ -548,6 +571,7 @@ static int parse_eops(extop **result, bool critical, int elem)
             if (value_to_extop(value, eop, location.segment)) {
                 nasm_nonfatal("expression is not simple or relocatable");
             }
+            skip = false;
         }
 
         if (eop->dup == 0 || eop->type == EOT_NOTHING) {
@@ -568,6 +592,11 @@ static int parse_eops(extop **result, bool critical, int elem)
         oper_num++;
         eop = NULL;             /* Done with this operand */
 
+        if (skip) {
+            /* Consume the (last) token if that didn't happen yet */
+            i = stdscan(NULL, &tokval);
+        }
+
         /*
          * We're about to call stdscan(), which will eat the
          * comma that we're currently sitting on between
@@ -577,13 +606,10 @@ static int parse_eops(extop **result, bool critical, int elem)
         if (i == TOKEN_EOS || i == endparen)	/* Already at end? */
             break;
         if (i != ',') {
-            i = stdscan(NULL, &tokval);		/* eat the comma or final paren */
-            if (i == TOKEN_EOS || i == ')')	/* got end of expression */
-                break;
-            if (i != ',') {
-                nasm_nonfatal("comma expected after operand");
-                goto fail;
-            }
+            char *tp = tokstr(&tokval);
+            nasm_nonfatal("comma expected after operand, got %s", tp);
+            nasm_free(tp);
+            goto fail;
         }
     }
 
diff --git a/asm/stdscan.c b/asm/stdscan.c
index cbc0fc77..b8f0c850 100644
--- a/asm/stdscan.c
+++ b/asm/stdscan.c
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
  *
- *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2023 The NASM Authors - All Rights Reserved
  *   See the file AUTHORS included with the NASM distribution for
  *   the specific copyright holders.
  *
@@ -122,19 +122,33 @@ static int stdscan_handle_brace(struct tokenval *tv)
     return tv->t_type;
 }
 
+static int stdscan_token(struct tokenval *tv);
+
 int stdscan(void *private_data, struct tokenval *tv)
 {
-    const char *r;
+    int i;
 
     (void)private_data;         /* Don't warn that this parameter is unused */
 
     nasm_zero(*tv);
 
     stdscan_bufptr = nasm_skip_spaces(stdscan_bufptr);
+    tv->t_start = stdscan_bufptr;
+
     if (!*stdscan_bufptr)
         return tv->t_type = TOKEN_EOS;
 
-    /* we have a token; either an id, a number or a char */
+    i = stdscan_token(tv);
+    tv->t_len = stdscan_bufptr - tv->t_start;
+
+    return i;
+}
+
+static int stdscan_token(struct tokenval *tv)
+{
+    const char *r;
+
+    /* we have a token; either an id, a number, operator or char */
     if (nasm_isidstart(*stdscan_bufptr) ||
         (*stdscan_bufptr == '$' && nasm_isidstart(stdscan_bufptr[1]))) {
         /* now we've got an identifier */
@@ -341,6 +355,8 @@ int stdscan(void *private_data, struct tokenval *tv)
     } else if (stdscan_bufptr[0] == '|' && stdscan_bufptr[1] == '|') {
         stdscan_bufptr += 2;
         return tv->t_type = TOKEN_DBL_OR;
-    } else                      /* just an ordinary char */
+    } else {
+        /* just an ordinary char */
         return tv->t_type = (uint8_t)(*stdscan_bufptr++);
+    }
 }
diff --git a/include/nasm.h b/include/nasm.h
index 8b017f3e..45b07c9b 100644
--- a/include/nasm.h
+++ b/include/nasm.h
@@ -316,6 +316,8 @@ struct tokenval {
     int64_t             t_inttwo;
     enum token_type     t_type;
     int8_t              t_flag;
+    const char		*t_start; /* Pointer to token in input buffer */
+    int			t_len;    /* Length of token in input buffer */
 };
 typedef int (*scanner)(void *private_data, struct tokenval *tv);
 
diff --git a/test/baddb.asm b/test/baddb.asm
new file mode 100644
index 00000000..1ff4fab4
--- /dev/null
+++ b/test/baddb.asm
@@ -0,0 +1,5 @@
+	;; This should error
+	db 1 2
+
+	;; This should work
+	db 1, 2
diff --git a/travis/test/utf-error.stderr b/travis/test/utf-error.stderr
index 5d2786da..f972605c 100644
--- a/travis/test/utf-error.stderr
+++ b/travis/test/utf-error.stderr
@@ -1,15 +1,15 @@
-./travis/test/utf.asm:63: error: __?utf16?__ must be followed by a string constant
-./travis/test/utf.asm:64: error: __?utf16?__ must be followed by a string constant
+./travis/test/utf.asm:63: error: __?utf16?__ must be followed by a string constant, got `33'
+./travis/test/utf.asm:64: error: __?utf16?__ must be followed by a string constant, got `,'
 ./travis/test/utf.asm:65: error: unterminated __?utf16?__ function
 ./travis/test/utf.asm:66: error: unterminated __?utf16?__ function
 ./travis/test/utf.asm:67: error: invalid input string to __?utf16?__
-./travis/test/utf.asm:69: error: __?utf16le?__ must be followed by a string constant
-./travis/test/utf.asm:70: error: __?utf16le?__ must be followed by a string constant
+./travis/test/utf.asm:69: error: __?utf16le?__ must be followed by a string constant, got `33'
+./travis/test/utf.asm:70: error: __?utf16le?__ must be followed by a string constant, got `,'
 ./travis/test/utf.asm:71: error: unterminated __?utf16le?__ function
 ./travis/test/utf.asm:72: error: unterminated __?utf16le?__ function
 ./travis/test/utf.asm:73: error: invalid input string to __?utf16le?__
-./travis/test/utf.asm:75: error: __?utf16be?__ must be followed by a string constant
-./travis/test/utf.asm:76: error: __?utf16be?__ must be followed by a string constant
+./travis/test/utf.asm:75: error: __?utf16be?__ must be followed by a string constant, got `33'
+./travis/test/utf.asm:76: error: __?utf16be?__ must be followed by a string constant, got `,'
 ./travis/test/utf.asm:77: error: unterminated __?utf16be?__ function
 ./travis/test/utf.asm:78: error: unterminated __?utf16be?__ function
 ./travis/test/utf.asm:79: error: invalid input string to __?utf16be?__


More information about the Nasm-commits mailing list