[nasm:master] Add {rex} prefix, simplify prefix handling, better error messages
nasm-bot for H. Peter Anvin
hpa at zytor.com
Mon Nov 7 17:12:13 PST 2022
Commit-ID: 2469b8b66e09213346071209e5631be3cbaee8e5
Gitweb: http://repo.or.cz/w/nasm.git?a=commitdiff;h=2469b8b66e09213346071209e5631be3cbaee8e5
Author: H. Peter Anvin <hpa at zytor.com>
AuthorDate: Tue, 27 Apr 2021 11:37:42 -0700
Committer: H. Peter Anvin <hpa at zytor.com>
CommitDate: Tue, 27 Apr 2021 11:37:42 -0700
Add {rex} prefix, simplify prefix handling, better error messages
Add a {rex} prefix to force REX encoding (typically a redundant 40h
prefix).
For prefix parsing, we can use t_inttwo to encode the prefix slot
number.
Give more verbose error messages for encoding mismatches.
---
asm/assemble.c | 70 ++++++++++++++++++++---------
asm/parser.c | 115 +++++++++++++++++-------------------------------
asm/tokens.dat | 30 +++++++++----
common/common.c | 7 +--
include/nasm.h | 19 ++++----
test/vex.asm | 30 +++++++++++++
travis/test/vex.asm | 30 +++++++++++++
travis/test/vex.bin.t | Bin 25 -> 103 bytes
travis/test/vex.json | 17 +++++--
travis/test/vex.stderr | 1 -
travis/test/vex1.stderr | 3 ++
travis/test/vex2.stderr | 3 ++
12 files changed, 204 insertions(+), 121 deletions(-)
diff --git a/asm/assemble.c b/asm/assemble.c
index ecf5c093..46ce9303 100644
--- a/asm/assemble.c
+++ b/asm/assemble.c
@@ -935,15 +935,13 @@ int64_t assemble(int32_t segment, int64_t start, int bits, insn *instruction)
nasm_nonfatal("instruction not supported in %d-bit mode", bits);
break;
case MERR_ENCMISMATCH:
- nasm_nonfatal("specific encoding scheme not available");
+ nasm_nonfatal("instruction not encodable with %s prefix",
+ prefix_name(instruction->prefixes[PPS_REX]));
break;
case MERR_BADBND:
- nasm_nonfatal("bnd prefix is not allowed");
- break;
case MERR_BADREPNE:
nasm_nonfatal("%s prefix is not allowed",
- (has_prefix(instruction, PPS_REP, P_REPNE) ?
- "repne" : "repnz"));
+ prefix_name(instruction->prefixes[PPS_REP]));
break;
case MERR_REGSETSIZE:
nasm_nonfatal("invalid register set size");
@@ -1644,16 +1642,22 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
ins->rex &= ~REX_P; /* Don't force REX prefix due to high reg */
}
- switch (ins->prefixes[PPS_VEX]) {
+ switch (ins->prefixes[PPS_REX]) {
case P_EVEX:
if (!(ins->rex & REX_EV))
return -1;
break;
+ case P_VEX:
case P_VEX3:
case P_VEX2:
if (!(ins->rex & REX_V))
return -1;
break;
+ case P_REX:
+ if (ins->rex & (REX_V|REX_EV))
+ return -1;
+ ins->rex |= REX_P; /* Force REX prefix */
+ break;
default:
break;
}
@@ -1687,16 +1691,19 @@ static int64_t calcsize(int32_t segment, int64_t offset, int bits,
nasm_nonfatal("invalid high-16 register in non-AVX-512");
return -1;
}
- if (ins->rex & REX_EV)
+ if (ins->rex & REX_EV) {
length += 4;
- else if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)) ||
- ins->prefixes[PPS_VEX] == P_VEX3)
+ } else if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)) ||
+ ins->prefixes[PPS_REX] == P_VEX3) {
+ if (ins->prefixes[PPS_REX] == P_VEX2)
+ nasm_nonfatal("instruction not encodable with {vex2} prefix");
length += 3;
- else
+ } else {
length += 2;
+ }
} else if (ins->rex & REX_MASK) {
if (ins->rex & REX_H) {
- nasm_nonfatal("cannot use high register in rex instruction");
+ nasm_nonfatal("cannot use high byte register in rex instruction");
return -1;
} else if (bits == 64) {
length++;
@@ -1849,6 +1856,8 @@ static int emit_prefix(struct out_data *data, const int bits, insn *ins)
case P_OSP:
c = 0x66;
break;
+ case P_REX:
+ case P_VEX:
case P_EVEX:
case P_VEX3:
case P_VEX2:
@@ -1994,7 +2003,7 @@ static void gencode(struct out_data *data, insn *ins)
case 0172:
{
- int mask = ins->prefixes[PPS_VEX] == P_EVEX ? 7 : 15;
+ int mask = ins->prefixes[PPS_REX] == P_EVEX ? 7 : 15;
const struct operand *opy;
c = *codes++;
@@ -2054,7 +2063,7 @@ static void gencode(struct out_data *data, insn *ins)
case 0270:
codes += 2;
if (ins->vex_cm != 1 || (ins->rex & (REX_W|REX_X|REX_B)) ||
- ins->prefixes[PPS_VEX] == P_VEX3) {
+ ins->prefixes[PPS_REX] == P_VEX3) {
bytes[0] = (ins->vex_cm >> 6) ? 0x8f : 0xc4;
bytes[1] = (ins->vex_cm & 31) | ((~ins->rex & 7) << 5);
bytes[2] = ((ins->rex & REX_W) << (7-3)) |
@@ -2383,11 +2392,12 @@ static enum match_result find_match(const struct itemplate **tempp,
int i;
/* broadcasting uses a different data element size */
- for (i = 0; i < instruction->operands; i++)
+ for (i = 0; i < instruction->operands; i++) {
if (i == broadcast)
xsizeflags[i] = instruction->oprs[i].decoflags & BRSIZE_MASK;
else
xsizeflags[i] = instruction->oprs[i].type & SIZE_MASK;
+ }
merr = MERR_INVALOP;
@@ -2507,18 +2517,24 @@ static enum match_result matches(const struct itemplate *itemp,
return MERR_INVALOP;
/*
- * {evex} available?
+ * {rex/vexn/evex} available?
*/
- switch (instruction->prefixes[PPS_VEX]) {
+ switch (instruction->prefixes[PPS_REX]) {
case P_EVEX:
if (!itemp_has(itemp, IF_EVEX))
return MERR_ENCMISMATCH;
break;
+ case P_VEX:
case P_VEX3:
case P_VEX2:
if (!itemp_has(itemp, IF_VEX))
return MERR_ENCMISMATCH;
break;
+ case P_REX:
+ if (itemp_has(itemp, IF_VEX) || itemp_has(itemp, IF_EVEX) ||
+ bits != 64)
+ return MERR_ENCMISMATCH;
+ break;
default:
break;
}
@@ -2667,6 +2683,9 @@ static enum match_result matches(const struct itemplate *itemp,
* considered a wildcard match rather than an error.
*/
opsizemissing = true;
+ } else if (is_class(REG_HIGH, type) &&
+ instruction->prefixes[PPS_REX]) {
+ return MERR_ENCMISMATCH;
}
} else if (is_broadcast &&
(brcast_num !=
@@ -2764,13 +2783,14 @@ static enum match_result matches(const struct itemplate *itemp,
static enum ea_type process_ea(operand *input, ea *output, int bits,
int rfield, opflags_t rflags, insn *ins,
- const char **errmsg)
+ const char **errmsgp)
{
bool forw_ref = !!(input->opflags & OPFLAG_UNKNOWN);
int addrbits = ins->addr_size;
int eaflags = input->eaflags;
+ const char *errmsg = NULL;
- *errmsg = "invalid effective address"; /* Default error message */
+ errmsg = NULL;
output->type = EA_SCALAR;
output->rip = false;
@@ -2793,7 +2813,7 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
/* broadcasting is not available with a direct register operand. */
if (input->decoflags & BRDCAST_MASK) {
- *errmsg = "broadcast not allowed with register operand";
+ errmsg = "broadcast not allowed with register operand";
goto err;
}
@@ -2809,7 +2829,7 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
/* Embedded rounding or SAE is not available with a mem ref operand. */
if (input->decoflags & (ER | SAE)) {
- *errmsg = "embedded rounding is available only with "
+ errmsg = "embedded rounding is available only with "
"register-register operations";
goto err;
}
@@ -2838,7 +2858,7 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
}
if (bits == 64 && !(IP_REL & ~input->type) && (eaflags & EAF_SIB)) {
- *errmsg = "instruction requires SIB encoding, cannot be RIP-relative";
+ errmsg = "instruction requires SIB encoding, cannot be RIP-relative";
goto err;
}
@@ -3224,6 +3244,14 @@ static enum ea_type process_ea(operand *input, ea *output, int bits,
return output->type;
err:
+ if (!errmsg) {
+ /* Default error message */
+ static char invalid_address_msg[40];
+ snprintf(invalid_address_msg, sizeof invalid_address_msg,
+ "invalid %d-bit effective address", bits);
+ errmsg = invalid_address_msg;
+ }
+ *errmsgp = errmsg;
return output->type = EA_INVALID;
}
diff --git a/asm/parser.c b/asm/parser.c
index 584e40c9..daafa920 100644
--- a/asm/parser.c
+++ b/asm/parser.c
@@ -55,50 +55,6 @@ static int end_expression_next(void);
static struct tokenval tokval;
-static int prefix_slot(int prefix)
-{
- switch (prefix) {
- case P_WAIT:
- return PPS_WAIT;
- case R_CS:
- case R_DS:
- case R_SS:
- case R_ES:
- case R_FS:
- case R_GS:
- return PPS_SEG;
- case P_LOCK:
- return PPS_LOCK;
- case P_REP:
- case P_REPE:
- case P_REPZ:
- case P_REPNE:
- case P_REPNZ:
- case P_XACQUIRE:
- case P_XRELEASE:
- case P_BND:
- case P_NOBND:
- return PPS_REP;
- case P_O16:
- case P_O32:
- case P_O64:
- case P_OSP:
- return PPS_OSIZE;
- case P_A16:
- case P_A32:
- case P_A64:
- case P_ASP:
- return PPS_ASIZE;
- case P_EVEX:
- case P_VEX3:
- case P_VEX2:
- return PPS_VEX;
- default:
- nasm_panic("Invalid value %d passed to prefix_slot()", prefix);
- return -1;
- }
-}
-
static void process_size_override(insn *result, operand *op)
{
if (tasm_compatible_mode) {
@@ -185,7 +141,7 @@ static void process_size_override(insn *result, operand *op)
}
/*
- * Brace decorators are are parsed here. opmask and zeroing
+ * Braced keywords are are parsed here. opmask and zeroing
* decorators can be placed in any order. e.g. zmm1 {k2}{z} or zmm2
* {z}{k3} decorator(s) are placed at the end of an operand.
*/
@@ -715,42 +671,51 @@ restart_parse:
if (i == TOKEN_EOS)
goto fail;
- while (i == TOKEN_PREFIX ||
- (i == TOKEN_REG && IS_SREG(tokval.t_integer))) {
- first = false;
+ while (i) {
+ int slot = PPS_SEG;
- /*
- * Handle special case: the TIMES prefix.
- */
- if (i == TOKEN_PREFIX && tokval.t_integer == P_TIMES) {
- expr *value;
+ if (i == TOKEN_PREFIX) {
+ slot = tokval.t_inttwo;
- i = stdscan(NULL, &tokval);
- value = evaluate(stdscan, NULL, &tokval, NULL, pass_stable(), NULL);
- i = tokval.t_type;
- if (!value) /* Error in evaluator */
- goto fail;
- if (!is_simple(value)) {
- nasm_nonfatal("non-constant argument supplied to TIMES");
- result->times = 1L;
- } else {
- result->times = value->value;
- if (value->value < 0) {
- nasm_nonfatalf(ERR_PASS2, "TIMES value %"PRId64" is negative", value->value);
- result->times = 0;
+ if (slot == PPS_TIMES) {
+ /* TIMES is a very special prefix */
+ expr *value;
+
+ i = stdscan(NULL, &tokval);
+ value = evaluate(stdscan, NULL, &tokval, NULL,
+ pass_stable(), NULL);
+ i = tokval.t_type;
+ if (!value) /* Error in evaluator */
+ goto fail;
+ if (!is_simple(value)) {
+ nasm_nonfatal("non-constant argument supplied to TIMES");
+ result->times = 1;
+ } else {
+ result->times = value->value;
+ if (value->value < 0) {
+ nasm_nonfatalf(ERR_PASS2, "TIMES value %"PRId64" is negative", value->value);
+ result->times = 0;
+ }
}
+ first = false;
+ continue;
}
+ } else if (i == TOKEN_REG && IS_SREG(tokval.t_integer)) {
+ slot = PPS_SEG;
+ first = false;
} else {
- int slot = prefix_slot(tokval.t_integer);
- if (result->prefixes[slot]) {
- if (result->prefixes[slot] == tokval.t_integer)
- nasm_warn(WARN_OTHER, "instruction has redundant prefixes");
- else
- nasm_nonfatal("instruction has conflicting prefixes");
- }
- result->prefixes[slot] = tokval.t_integer;
- i = stdscan(NULL, &tokval);
+ break; /* Not a prefix */
}
+
+ if (result->prefixes[slot]) {
+ if (result->prefixes[slot] == tokval.t_integer)
+ nasm_warn(WARN_OTHER, "instruction has redundant prefixes");
+ else
+ nasm_nonfatal("instruction has conflicting prefixes");
+ }
+ result->prefixes[slot] = tokval.t_integer;
+ i = stdscan(NULL, &tokval);
+ first = false;
}
if (i != TOKEN_INSN) {
diff --git a/asm/tokens.dat b/asm/tokens.dat
index 356b39a2..0a782464 100644
--- a/asm/tokens.dat
+++ b/asm/tokens.dat
@@ -1,6 +1,6 @@
## --------------------------------------------------------------------------
##
-## Copyright 1996-2016 The NASM Authors - All Rights Reserved
+## Copyright 1996-2021 The NASM Authors - All Rights Reserved
## See the file AUTHORS included with the NASM distribution for
## the specific copyright holders.
##
@@ -46,28 +46,45 @@
% TOKEN_QMARK, 0, 0, 0
?
-% TOKEN_PREFIX, 0, 0, P_*
+% TOKEN_PREFIX, PPS_ASIZE, 0, P_*
a16
a32
a64
asp
+
+% TOKEN_PREFIX, PPS_LOCK, 0, P_*
lock
+
+% TOKEN_PREFIX, PPS_OSIZE, 0, P_*
o16
o32
o64
osp
+
+% TOKEN_PREFIX, PPS_REP, 0, P_*
rep
repe
repne
repnz
repz
-times
-wait
xacquire
xrelease
bnd
nobnd
+% TOKEN_PREFIX, PPS_TIMES, 0, P_*
+times
+
+% TOKEN_PREFIX, PPS_WAIT, 0, P_*
+wait
+
+% TOKEN_PREFIX, PPS_REX, TFLAG_BRC, P_*
+rex
+evex
+vex
+vex3
+vex2
+
% TOKEN_SIZE, SIZE_*, 0, S_*
byte
word
@@ -154,11 +171,6 @@ rz-sae
sae
z
-% TOKEN_PREFIX, 0, TFLAG_BRC, P_*
-evex
-vex3
-vex2
-
# Multi-character operators. Used in ppscan().
% TOKEN_SHR, 0, 0, 0
>>
diff --git a/common/common.c b/common/common.c
index 13237994..aa778055 100644
--- a/common/common.c
+++ b/common/common.c
@@ -1,6 +1,6 @@
/* ----------------------------------------------------------------------- *
*
- * Copyright 1996-2016 The NASM Authors - All Rights Reserved
+ * Copyright 1996-2021 The NASM Authors - All Rights Reserved
* See the file AUTHORS included with the NASM distribution for
* the specific copyright holders.
*
@@ -46,14 +46,15 @@
int globalbits = 0;
/*
* Common list of prefix names; ideally should be auto-generated
- * from tokens.dat
+ * from tokens.dat. This MUST match the enum in include/nasm.h.
*/
const char *prefix_name(int token)
{
static const char *prefix_names[] = {
"a16", "a32", "a64", "asp", "lock", "o16", "o32", "o64", "osp",
"rep", "repe", "repne", "repnz", "repz", "times", "wait",
- "xacquire", "xrelease", "bnd"
+ "xacquire", "xrelease", "bnd", "nobnd", "{rex}", "{evex}", "{vex}",
+ "{vex3}", "{vex2}"
};
unsigned int prefix = token-PREFIX_ENUM_START;
diff --git a/include/nasm.h b/include/nasm.h
index 922a1bb2..4d0dcafd 100644
--- a/include/nasm.h
+++ b/include/nasm.h
@@ -619,7 +619,9 @@ enum prefixes { /* instruction prefixes */
P_XRELEASE,
P_BND,
P_NOBND,
+ P_REX,
P_EVEX,
+ P_VEX,
P_VEX3,
P_VEX2,
PREFIX_ENUM_LIMIT
@@ -715,14 +717,15 @@ enum ea_type {
* the introduction of HLE.
*/
enum prefix_pos {
- PPS_WAIT, /* WAIT (technically not a prefix!) */
- PPS_REP, /* REP/HLE prefix */
- PPS_LOCK, /* LOCK prefix */
- PPS_SEG, /* Segment override prefix */
- PPS_OSIZE, /* Operand size prefix */
- PPS_ASIZE, /* Address size prefix */
- PPS_VEX, /* VEX type */
- MAXPREFIX /* Total number of prefix slots */
+ PPS_TIMES = -1, /* TIMES (not a slot, handled separately) */
+ PPS_WAIT = 0, /* WAIT (technically not a prefix!) */
+ PPS_REP, /* REP/HLE prefix */
+ PPS_LOCK, /* LOCK prefix */
+ PPS_SEG, /* Segment override prefix */
+ PPS_OSIZE, /* Operand size prefix */
+ PPS_ASIZE, /* Address size prefix */
+ PPS_REX, /* REX/VEX type */
+ MAXPREFIX /* Total number of prefix slots */
};
/*
diff --git a/test/vex.asm b/test/vex.asm
index 6772c7ce..7c1b3f82 100644
--- a/test/vex.asm
+++ b/test/vex.asm
@@ -1,9 +1,39 @@
bits 64
+ add eax,edx
+ {rex} add eax,edx
+ add al,dl
+ {rex} add al,dl
+ add ah,dl
+ comisd xmm0,xmm1
+ {rex} comisd xmm0,xmm1
vcomisd xmm0,xmm31
vcomisd xmm0,xmm1
+ {vex} vcomisd xmm0,xmm1
{vex2} vcomisd xmm0,xmm1
{vex3} vcomisd xmm0,xmm1
{evex} vcomisd xmm0,xmm1
+ {vex2} vcomisd xmm0,xmm1
+ {vex3} vcomisd xmm0,xmm1
+ {evex} vcomisd xmm0,xmm1
+ {vex} vcomisd xmm0,[r8+rax*1]
+ {vex3} vcomisd xmm0,[r8+rax*1]
+ {evex} vcomisd xmm0,[r8+rax*1]
+ {vex} vcomisd xmm0,[rax+r8*2]
+ {vex3} vcomisd xmm0,[rax+r8*2]
+ {evex} vcomisd xmm0,[rax+r8*2]
+
+ ;; These errors may be caught in different passes, so
+ ;; some shadows the others...
%ifdef ERROR
+ %if ERROR <= 1
+ {vex2} vcomisd xmm0,[rax+r8*2]
+ {rex} add ah,dl
+ bits 32
+ mov eax,[r8d]
+ %endif
+ %if ERROR <= 2
+ {rex} vcomisd xmm0,xmm1
+ {vex} add eax,edx
{vex3} add eax,edx
+ %endif
%endif
diff --git a/travis/test/vex.asm b/travis/test/vex.asm
index 6772c7ce..7c1b3f82 100644
--- a/travis/test/vex.asm
+++ b/travis/test/vex.asm
@@ -1,9 +1,39 @@
bits 64
+ add eax,edx
+ {rex} add eax,edx
+ add al,dl
+ {rex} add al,dl
+ add ah,dl
+ comisd xmm0,xmm1
+ {rex} comisd xmm0,xmm1
vcomisd xmm0,xmm31
vcomisd xmm0,xmm1
+ {vex} vcomisd xmm0,xmm1
{vex2} vcomisd xmm0,xmm1
{vex3} vcomisd xmm0,xmm1
{evex} vcomisd xmm0,xmm1
+ {vex2} vcomisd xmm0,xmm1
+ {vex3} vcomisd xmm0,xmm1
+ {evex} vcomisd xmm0,xmm1
+ {vex} vcomisd xmm0,[r8+rax*1]
+ {vex3} vcomisd xmm0,[r8+rax*1]
+ {evex} vcomisd xmm0,[r8+rax*1]
+ {vex} vcomisd xmm0,[rax+r8*2]
+ {vex3} vcomisd xmm0,[rax+r8*2]
+ {evex} vcomisd xmm0,[rax+r8*2]
+
+ ;; These errors may be caught in different passes, so
+ ;; some shadows the others...
%ifdef ERROR
+ %if ERROR <= 1
+ {vex2} vcomisd xmm0,[rax+r8*2]
+ {rex} add ah,dl
+ bits 32
+ mov eax,[r8d]
+ %endif
+ %if ERROR <= 2
+ {rex} vcomisd xmm0,xmm1
+ {vex} add eax,edx
{vex3} add eax,edx
+ %endif
%endif
diff --git a/travis/test/vex.bin.t b/travis/test/vex.bin.t
index 2145d4cf..4f015064 100644
Binary files a/travis/test/vex.bin.t and b/travis/test/vex.bin.t differ
diff --git a/travis/test/vex.json b/travis/test/vex.json
index e1db1923..ff5337d4 100644
--- a/travis/test/vex.json
+++ b/travis/test/vex.json
@@ -1,6 +1,6 @@
[
{
- "description": "Test VEX2/VEX3/EVEX prefix",
+ "description": "Test explicit REX/VEX2/VEX3/EVEX prefix",
"id": "vex",
"format": "bin",
"source": "vex.asm",
@@ -10,11 +10,20 @@
]
},
{
- "description": "Test VEX3 prefix error",
+ "description": "Test early REX/VEX prefix errors",
"ref": "vex",
- "option": "-Ox -DERROR -o vex.bin.err",
+ "option": "-Ox -DERROR=1 -o vex1.bin.err",
"target": [
- { "stderr": "vex.stderr" }
+ { "stderr": "vex1.stderr" }
+ ],
+ "error": "expected"
+ },
+ {
+ "description": "Test late REX/VEX prefix errors",
+ "ref": "vex",
+ "option": "-Ox -DERROR=2 -o vex2.bin.err",
+ "target": [
+ { "stderr": "vex2.stderr" }
],
"error": "expected"
}
diff --git a/travis/test/vex.stderr b/travis/test/vex.stderr
deleted file mode 100644
index b7645c38..00000000
--- a/travis/test/vex.stderr
+++ /dev/null
@@ -1 +0,0 @@
-./travis/test/vex.asm:8: error: specific encoding scheme not available
diff --git a/travis/test/vex1.stderr b/travis/test/vex1.stderr
new file mode 100644
index 00000000..0e9425f6
--- /dev/null
+++ b/travis/test/vex1.stderr
@@ -0,0 +1,3 @@
+./travis/test/vex.asm:29: error: instruction not encodable with {vex2} prefix
+./travis/test/vex.asm:30: error: cannot use high byte register in rex instruction
+./travis/test/vex.asm:32: error: invalid operands in non-64-bit mode
diff --git a/travis/test/vex2.stderr b/travis/test/vex2.stderr
new file mode 100644
index 00000000..f50b5406
--- /dev/null
+++ b/travis/test/vex2.stderr
@@ -0,0 +1,3 @@
+./travis/test/vex.asm:35: error: instruction not encodable with {rex} prefix
+./travis/test/vex.asm:36: error: instruction not encodable with {vex} prefix
+./travis/test/vex.asm:37: error: instruction not encodable with {vex3} prefix
More information about the Nasm-commits
mailing list