[nasm:rebin] preproc: add conditional-string smacro parameters; simplify functions

nasm-bot for H. Peter Anvin hpa at zytor.com
Mon Nov 21 11:00:06 PST 2022


Commit-ID:  b018ba0f45fadcd359a525452163ab8ad1df37d0
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=b018ba0f45fadcd359a525452163ab8ad1df37d0
Author:     H. Peter Anvin <hpa at zytor.com>
AuthorDate: Mon, 21 Nov 2022 10:47:08 -0800
Committer:  H. Peter Anvin <hpa at zytor.com>
CommitDate: Mon, 21 Nov 2022 10:51:48 -0800

preproc: add conditional-string smacro parameters; simplify functions

Add the option of having strings only conditionally quoted (&&) -- do
not quote an already quoted string again -- as opposed to always
quoting a string.

This makes a lot of the string functions way simpler to implement, and
removes the need to share ad hoc parsing code with directives.

Signed-off-by: H. Peter Anvin <hpa at zytor.com>


---
 asm/preproc.c   | 168 ++++++++++++++++++++++++++++++--------------------------
 doc/nasmdoc.src |  29 +++++++---
 2 files changed, 110 insertions(+), 87 deletions(-)

diff --git a/asm/preproc.c b/asm/preproc.c
index c59ba4fb..a613af2e 100644
--- a/asm/preproc.c
+++ b/asm/preproc.c
@@ -206,13 +206,14 @@ typedef Token *(*ExpandSMacro)(const SMacro *s, Token **params, int nparams);
  * if SPARM_GREEDY is set.
  */
 enum sparmflags {
-    SPARM_PLAIN    =  0,
-    SPARM_EVAL     =  1,     /* Evaluate as a numeric expression (=) */
-    SPARM_STR      =  2,     /* Convert to quoted string ($) */
-    SPARM_NOSTRIP  =  4,     /* Don't strip braces (!) */
-    SPARM_GREEDY   =  8,     /* Greedy final parameter (+) */
-    SPARM_VARADIC  = 16,     /* Any number of separate arguments */
-    SPARM_OPTIONAL = 32      /* Optional argument */
+    SPARM_PLAIN     =  0,
+    SPARM_EVAL      =  1,   /* Evaluate as a numeric expression (=) */
+    SPARM_STR       =  2,   /* Convert to quoted string ($) */
+    SPARM_NOSTRIP   =  4,   /* Don't strip braces (!) */
+    SPARM_GREEDY    =  8,   /* Greedy final parameter (+) */
+    SPARM_VARADIC   = 16,   /* Any number of separate arguments */
+    SPARM_OPTIONAL  = 32,   /* Optional argument */
+    SPARM_CONDQUOTE = 64    /* With SPARM_STR, don't re-quote a string */
 };
 
 struct smac_param {
@@ -2875,11 +2876,11 @@ list_smacro_def(enum preproc_token op, const Context *ctx, const SMacro *m)
     if (m->nparam) {
         /*
          * Space for ( and either , or ) around each
-         * parameter, plus up to 4 flags.
+         * parameter, plus up to 5 flags.
          */
         int i;
 
-        size += 1 + 4 * m->nparam;
+        size += 1 + 5 * m->nparam;
         for (i = 0; i < m->nparam; i++)
             size += m->params[i].name.len;
     }
@@ -2910,8 +2911,11 @@ list_smacro_def(enum preproc_token op, const Context *ctx, const SMacro *m)
 
             if (flags & SPARM_NOSTRIP)
                 *--p = '!';
-            if (flags & SPARM_STR)
+            if (flags & SPARM_STR) {
                 *--p = '&';
+                if (flags & SPARM_CONDQUOTE)
+                    *--p = '&';
+            }
             if (flags & SPARM_EVAL)
                 *--p = '=';
             *--p = ',';
@@ -3019,6 +3023,9 @@ static int parse_smacro_template(Token ***tpp, SMacro *tmpl)
         case '&':
             flags |= SPARM_STR;
             break;
+        case TOKEN_DBL_AND:
+            flags |= SPARM_STR|SPARM_CONDQUOTE;
+            break;
         case '!':
             flags |= SPARM_NOSTRIP;
             break;
@@ -3691,15 +3698,16 @@ err:
     return res;
 }
 
+
 /*
  * Implement substring extraction as used by the %substr directive
  * and function.
  */
+static Token *pp_substr_common(Token *t, int64_t start, int64_t count);
+
 static Token *pp_substr(Token *tline, const char *dname)
 {
     int64_t start, count;
-    const char *txt;
-    size_t len;
     struct ppscan pps;
     Token *t;
     Token *res = NULL;
@@ -3730,7 +3738,7 @@ static Token *pp_substr(Token *tline, const char *dname)
         nasm_nonfatal("non-constant value given to `%s'", dname);
         goto err;
     }
-    start = evalresult->value - 1;
+    start = evalresult->value;
 
     pps.tptr = skip_white(pps.tptr);
     if (!pps.tptr) {
@@ -3747,10 +3755,24 @@ static Token *pp_substr(Token *tline, const char *dname)
         count = evalresult->value;
     }
 
+    res = pp_substr_common(t, start, count);
+
+err:
+    free_tlist(tline);
+    return res;
+}
+
+static Token *pp_substr_common(Token *t, int64_t start, int64_t count)
+{
+    size_t len;
+    const char *txt;
+
     unquote_token(t);
     len = t->len;
 
     /* make start and count being in range */
+    start -= 1;                 /* First character is 1 */
+
     if (start < 0)
         start = 0;
     if (count < 0)
@@ -3761,10 +3783,7 @@ static Token *pp_substr(Token *tline, const char *dname)
         start = -1, count = 0; /* empty string */
 
     txt = (start < 0) ? "" : tok_text(t) + start;
-    res = make_tok_qstr_len(NULL, txt, count);
-err:
-    free_tlist(tline);
-    return res;
+    return make_tok_qstr_len(NULL, txt, count);
 }
 
 /**
@@ -5893,14 +5912,19 @@ static SMacro *expand_one_smacro(Token ***tpp)
 
             if (flags & SPARM_STR) {
                 /* Convert expansion to a quoted string */
-                char *arg;
                 Token *qs;
 
                 qs = expand_smacro_noreset(params[i]);
-                arg = detoken(qs, false);
-                free_tlist(qs);
-                params[i] = make_tok_qstr(NULL, arg);
-                nasm_free(arg);
+                if ((flags & SPARM_CONDQUOTE) &&
+                    tok_is(qs, TOKEN_STR) && !qs->next) {
+                    /* A single quoted string token */
+                    params[i] = qs;
+                } else {
+                    char *arg = detoken(qs, false);
+                    free_tlist(qs);
+                    params[i] = make_tok_qstr(NULL, arg);
+                    nasm_free(arg);
+                }
             }
         }
     }
@@ -7051,78 +7075,61 @@ stdmac_join(const SMacro *s, Token **params, int nparams)
 static Token *
 stdmac_strcat(const SMacro *s, Token **params, int nparams)
 {
-    Token *tline;
-    (void)nparams;
+    int i;
+    size_t len = 0;
+    char *str, *p;
 
-    tline = params[0];
-    params[0] = NULL;           /* Don't free this later */
-    return pp_strcat(expand_smacro_noreset(tline), s->name);
+    (void)s;
+
+    for (i = 0; i < nparams; i++) {
+        unquote_token(params[i]);
+        len += params[i]->len;
+    }
+
+    nasm_newn(str, len+1);
+    p = str;
+
+    for (i = 0; i < nparams; i++) {
+        p = mempcpy(p, tok_text(params[i]), params[i]->len);
+    }
+
+    return make_tok_qstr_len(NULL, str, len);
 }
 
 /* %substr() function */
 static Token *
 stdmac_substr(const SMacro *s, Token **params, int nparams)
 {
-    Token *tline;
-    (void)nparams;
-
-    tline = params[0];
-    params[0] = NULL;           /* Don't free this later */
-    return pp_substr(expand_smacro_noreset(tline), s->name);
-}
-
-/* Expand a the argument and enforce it being a single quoted string */
-static Token *expand_to_string(Token **tp, const char *dname)
-{
-    Token *tlist, *t;
-
-    tlist = *tp;
-    *tp = NULL;                 /* Don't free this later */
-    t = zap_white(expand_smacro_noreset(tlist));
+    int64_t start, count;
 
-    if (!tok_is(t, TOKEN_STR)) {
-        nasm_nonfatal("`%s' requires string as parameter", dname);
-        return NULL;
-    }
+    (void)nparams;
+    (void)s;
 
-    t->next = zap_white(t->next);
-    if (t->next) {
-        nasm_nonfatal("`%s' requires exactly one string as parameter", dname);
-        return NULL;
-    }
+    start = get_tok_num(params[1], NULL);
+    count = get_tok_num(params[2], NULL);
 
-    return t;
+    return pp_substr_common(params[0], start, count);
 }
 
 /* %strlen() function */
 static Token *
 stdmac_strlen(const SMacro *s, Token **params, int nparams)
 {
-    Token *t;
-
     (void)nparams;
+    (void)s;
 
-    t = expand_to_string(&params[0], s->name);
-    if (!t)
-        return NULL;
-
-    unquote_token(t);
-    return make_tok_num(NULL, t->len);
+    unquote_token(params[0]);
+    return make_tok_num(NULL, params[0]->len);
 }
 
 /* %tok() function */
 static Token *
 stdmac_tok(const SMacro *s, Token **params, int nparams)
 {
-    Token *t;
-
     (void)nparams;
+    (void)s;
 
-    t = expand_to_string(&params[0], s->name);
-    if (!t)
-        return NULL;
-
-    return reverse_tokens(tokenize(unquote_token_cstr(t)));
+    return reverse_tokens(tokenize(unquote_token_cstr(params[0])));
 }
 
 /* %cond() or %sel() */
@@ -7272,12 +7279,6 @@ struct magic_macros {
     ExpandSMacro func;
 };
 
-struct num_macros {
-    const char name[6];
-    uint8_t base;
-    char prefix;
-};
-
 static void pp_add_magic_stdmac(void)
 {
     static const struct magic_macros magic_macros[] = {
@@ -7289,10 +7290,9 @@ static void pp_add_magic_stdmac(void)
         { "%count",     false, 1, SPARM_VARADIC, stdmac_count },
         { "%eval",      false, 1, SPARM_EVAL|SPARM_VARADIC, stdmac_join },
         { "%str",       false, 1, SPARM_GREEDY|SPARM_STR, stdmac_join },
-        { "%strcat",    false, 1, SPARM_GREEDY, stdmac_strcat },
-        { "%strlen",    false, 1, 0, stdmac_strlen },
-        { "%substr",    false, 1, SPARM_GREEDY, stdmac_substr },
-        { "%tok",       false, 1, 0, stdmac_tok },
+        { "%strcat",    false, 1, SPARM_STR|SPARM_CONDQUOTE|SPARM_VARADIC, stdmac_strcat },
+        { "%strlen",    false, 1, SPARM_STR|SPARM_CONDQUOTE, stdmac_strlen },
+        { "%tok",       false, 1, SPARM_STR|SPARM_CONDQUOTE, stdmac_tok },
         { NULL, false, 0, 0, NULL }
     };
     const struct magic_macros *m;
@@ -7361,6 +7361,18 @@ static void pp_add_magic_stdmac(void)
     tmpl.params[2].def   = make_tok_num(NULL, 10);
     define_smacro("%num", false, NULL, &tmpl);
 
+    /* %substr() function */
+    nasm_zero(tmpl);
+    tmpl.nparam = 3;
+    tmpl.expand = stdmac_substr;
+    tmpl.recursive = true;
+    nasm_newn(tmpl.params, tmpl.nparam);
+    tmpl.params[0].flags  = SPARM_STR|SPARM_CONDQUOTE;
+    tmpl.params[1].flags  = SPARM_EVAL;
+    tmpl.params[2].flags  = SPARM_EVAL|SPARM_OPTIONAL;
+    tmpl.params[2].def    = make_tok_num(NULL, -1);
+    define_smacro("%substr", false, NULL, &tmpl);
+
     /* %is...() macro functions */
     nasm_zero(tmpl);
     tmpl.nparam  = 1;
diff --git a/doc/nasmdoc.src b/doc/nasmdoc.src
index 0883be4b..bd933db5 100644
--- a/doc/nasmdoc.src
+++ b/doc/nasmdoc.src
@@ -2424,17 +2424,22 @@ A single pair of parentheses is a subcase of a single, unused argument:
 
 This is similar to the behavior of the C preprocessor.
 
-\b If declared with an \c{=}, NASM will evaluate the argument as an
-expression after expansion.
+\b If declared with an \c{=}, NASM will expand the argument and then
+evaluate it as a numeric expression.
 
-\b If an argument declared with an \c{&}, a macro parameter will be
-turned into a quoted string after expansion.
+\b If declared with an \c{&}, NASM will expand the argument and then
+turn into a quoted string; if the argument already \e{is} a quoted
+string, it will be quoted again.
+
+\b If declared with \c{&&}, NASM will expand the argument and then
+turn it into a quoted string, but if the argument already is a quoted
+string, it will \e{not} be re-quoted.
 
 \b If declared with a \c{+}, it is a greedy or variadic parameter; it
-includes any subsequent commas and parameters.
+will include any subsequent commas and parameters.
 
 \b If declared with an \c{!}, NASM will not strip whitespace and
-braces (useful in conjunction with \c{&}).
+braces (potentially useful in conjunction with \c{&} or \c{&&}.)
 
 For example:
 
@@ -2849,7 +2854,9 @@ means "until N-1 characters before the end of string", i.e. \c{-1}
 means until end of string, \c{-2} until one character before, etc.
 
 The corresponding preprocessor function is \c{%substr()}, see
-\k{f_substr}.
+\k{f_substr}, however please note that the default value for the
+length parameter, if omitted, is \c{-1} rather than \c{1} for
+\c{%substr()}.
 
 
 \H{ppfunc} \i{Preprocessor Functions}
@@ -3016,13 +3023,17 @@ in the same way the \i\c{%strlen} directive would, see \k{strlen}.
 
 The \c{%substr()} function extracts a substring of a quoted string, in
 the same way the \i\c{%substr} directive would, see \k{substr}. Note
-that unlike the \c{%substr} directive, a comma is required after the
-string argument.
+that unlike the \c{%substr} directive, commas are required between all
+parameters, is required after the string argument, and that the
+default for the length argument, if omitted, is \c{-1} (i.e. the
+remainder of the string) rather than \c{1}.
 
 \c ; The following lines are all equivalent
 \c %define  mychar 'yzw'
 \c %substr  mychar 'xyzw' 2,-1
+\c %xdefine mychar %substr('xyzw',2,3)
 \c %xdefine mychar %substr('xyzw',2,-1)
+\c %xdefine mychar %substr('xyzw',2)
 
 
 \S{f_tok} \i\c{%tok()} function


More information about the Nasm-commits mailing list