[nasm:nasm-2.15.xx] BR 3392705: AVX512: reinstate the SSE-like opcodes for VPCMPEQ/GT

nasm-bot for H. Peter Anvin (Intel) hpa at zytor.com
Thu Jul 30 17:00:03 PDT 2020


Commit-ID:  bae6b070ffdcf250fb84abc75768daa5ed2391f5
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=bae6b070ffdcf250fb84abc75768daa5ed2391f5
Author:     H. Peter Anvin (Intel) <hpa at zytor.com>
AuthorDate: Thu, 30 Jul 2020 16:56:52 -0700
Committer:  H. Peter Anvin (Intel) <hpa at zytor.com>
CommitDate: Thu, 30 Jul 2020 16:56:52 -0700

BR 3392705: AVX512: reinstate the SSE-like opcodes for VPCMPEQ/GT

The VPCMP instructions are controlled by an immediate byte, but there
is also a set of SSE-derived legacy opcodes for VPCMPEQ and
VPCMPGT. For the specific cases of VPCMPEQ and VPCMPGT, prefer those
opcodes since they are one byte shorter.

Reported-by: ig <glucksmann at avast.com>
Signed-off-by: H. Peter Anvin (Intel) <hpa at zytor.com>


---
 test/vpcmp.asm | 27 +++++++++++++++++++++++++++
 x86/insns.dat  | 27 +++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

diff --git a/test/vpcmp.asm b/test/vpcmp.asm
new file mode 100644
index 00000000..16377cb0
--- /dev/null
+++ b/test/vpcmp.asm
@@ -0,0 +1,27 @@
+	bits 64
+	vpcmpeqb k2{k2},zmm0,zmm1
+	vpcmpgtb k2{k2},zmm0,zmm1
+	vpcmpeqw k2{k2},zmm0,zmm1
+	vpcmpgtw k2{k2},zmm0,zmm1
+	vpcmpeqd k2{k2},zmm0,zmm1
+	vpcmpgtd k2{k2},zmm0,zmm1
+	vpcmpeqq k2{k2},zmm0,zmm1
+	vpcmpgtq k2{k2},zmm0,zmm1
+
+	vpcmpb k2{k2},zmm0,zmm1,0
+	vpcmpb k2{k2},zmm0,zmm1,6
+	vpcmpw k2{k2},zmm0,zmm1,0
+	vpcmpw k2{k2},zmm0,zmm1,6
+	vpcmpd k2{k2},zmm0,zmm1,0
+	vpcmpd k2{k2},zmm0,zmm1,6
+	vpcmpq k2{k2},zmm0,zmm1,0
+	vpcmpq k2{k2},zmm0,zmm1,6
+
+	vpcmpneqb k2{k2},zmm0,zmm1
+	vpcmpleb k2{k2},zmm0,zmm1
+	vpcmpneqw k2{k2},zmm0,zmm1
+	vpcmplew k2{k2},zmm0,zmm1
+	vpcmpneqd k2{k2},zmm0,zmm1
+	vpcmpled k2{k2},zmm0,zmm1
+	vpcmpneqq k2{k2},zmm0,zmm1
+	vpcmpleq k2{k2},zmm0,zmm1
diff --git a/x86/insns.dat b/x86/insns.dat
index e24c2479..18b573a6 100644
--- a/x86/insns.dat
+++ b/x86/insns.dat
@@ -4811,6 +4811,33 @@ VPBROADCASTW    ymmreg|mask|z,reg64                 [rm: evex.256.66.0f38.w0 7b
 VPBROADCASTW    zmmreg|mask|z,reg16                 [rm: evex.512.66.0f38.w0 7b /r ] AVX512BW,FUTURE
 VPBROADCASTW    zmmreg|mask|z,reg32                 [rm: evex.512.66.0f38.w0 7b /r ] AVX512BW,FUTURE
 VPBROADCASTW    zmmreg|mask|z,reg64                 [rm: evex.512.66.0f38.w0 7b /r ] AVX512BW,FUTURE
+; VPCMPEQx and VPCMPGTx come in two flavors: SSE-like, and VPCMP with immediate. They are both
+; valid, but prefer the SSE version as it is one byte shorter.
+VPCMPEQB        kreg|mask,xmmreg,xmmrm128           [rvm:fvm: evex.nds.128.66.0f.wig 74 /r ] AVX512VL,AVX512BW,FUTURE
+VPCMPEQB        kreg|mask,ymmreg,ymmrm256           [rvm:fvm: evex.nds.256.66.0f.wig 74 /r ] AVX512VL,AVX512BW,FUTURE
+VPCMPEQB        kreg|mask,zmmreg,zmmrm512           [rvm:fvm: evex.nds.512.66.0f.wig 74 /r ] AVX512BW,FUTURE
+VPCMPEQD        kreg|mask,xmmreg,xmmrm128|b32       [rvm:fv: evex.nds.128.66.0f.w0 76 /r ] AVX512VL,AVX512,FUTURE
+VPCMPEQD        kreg|mask,ymmreg,ymmrm256|b32       [rvm:fv: evex.nds.256.66.0f.w0 76 /r ] AVX512VL,AVX512,FUTURE
+VPCMPEQD        kreg|mask,zmmreg,zmmrm512|b32       [rvm:fv: evex.nds.512.66.0f.w0 76 /r ] AVX512,FUTURE
+VPCMPEQQ        kreg|mask,xmmreg,xmmrm128|b64       [rvm:fv: evex.nds.128.66.0f38.w1 29 /r ] AVX512VL,AVX512,FUTURE
+VPCMPEQQ        kreg|mask,ymmreg,ymmrm256|b64       [rvm:fv: evex.nds.256.66.0f38.w1 29 /r ] AVX512VL,AVX512,FUTURE
+VPCMPEQQ        kreg|mask,zmmreg,zmmrm512|b64       [rvm:fv: evex.nds.512.66.0f38.w1 29 /r ] AVX512,FUTURE
+VPCMPEQW        kreg|mask,xmmreg,xmmrm128           [rvm:fvm: evex.nds.128.66.0f.wig 75 /r ] AVX512VL,AVX512BW,FUTURE
+VPCMPEQW        kreg|mask,ymmreg,ymmrm256           [rvm:fvm: evex.nds.256.66.0f.wig 75 /r ] AVX512VL,AVX512BW,FUTURE
+VPCMPEQW        kreg|mask,zmmreg,zmmrm512           [rvm:fvm: evex.nds.512.66.0f.wig 75 /r ] AVX512BW,FUTURE
+VPCMPGTB        kreg|mask,xmmreg,xmmrm128           [rvm:fvm: evex.nds.128.66.0f.wig 64 /r ] AVX512VL,AVX512BW,FUTURE
+VPCMPGTB        kreg|mask,ymmreg,ymmrm256           [rvm:fvm: evex.nds.256.66.0f.wig 64 /r ] AVX512VL,AVX512BW,FUTURE
+VPCMPGTB        kreg|mask,zmmreg,zmmrm512           [rvm:fvm: evex.nds.512.66.0f.wig 64 /r ] AVX512BW,FUTURE
+VPCMPGTD        kreg|mask,xmmreg,xmmrm128|b32       [rvm:fv: evex.nds.128.66.0f.w0 66 /r ] AVX512VL,AVX512,FUTURE
+VPCMPGTD        kreg|mask,ymmreg,ymmrm256|b32       [rvm:fv: evex.nds.256.66.0f.w0 66 /r ] AVX512VL,AVX512,FUTURE
+VPCMPGTD        kreg|mask,zmmreg,zmmrm512|b32       [rvm:fv: evex.nds.512.66.0f.w0 66 /r ] AVX512,FUTURE
+VPCMPGTQ        kreg|mask,xmmreg,xmmrm128|b64       [rvm:fv: evex.nds.128.66.0f38.w1 37 /r ] AVX512VL,AVX512,FUTURE
+VPCMPGTQ        kreg|mask,ymmreg,ymmrm256|b64       [rvm:fv: evex.nds.256.66.0f38.w1 37 /r ] AVX512VL,AVX512,FUTURE
+VPCMPGTQ        kreg|mask,zmmreg,zmmrm512|b64       [rvm:fv: evex.nds.512.66.0f38.w1 37 /r ] AVX512,FUTURE
+VPCMPGTW        kreg|mask,xmmreg,xmmrm128           [rvm:fvm: evex.nds.128.66.0f.wig 65 /r ] AVX512VL,AVX512BW,FUTURE
+VPCMPGTW        kreg|mask,ymmreg,ymmrm256           [rvm:fvm: evex.nds.256.66.0f.wig 65 /r ] AVX512VL,AVX512BW,FUTURE
+VPCMPGTW        kreg|mask,zmmreg,zmmrm512           [rvm:fvm: evex.nds.512.66.0f.wig 65 /r ] AVX512BW,FUTURE
+; The systematic VPCMP with immediate instructions
 VPCMPEQB          kreg|mask,xmmreg,xmmrm128      [rvmi:fvm: evex.nds.128.66.0f3a.w0 3f /r 00 ] AVX512VL,AVX512BW,FUTURE
 VPCMPEQB          kreg|mask,ymmreg,ymmrm256      [rvmi:fvm: evex.nds.256.66.0f3a.w0 3f /r 00 ] AVX512VL,AVX512BW,FUTURE
 VPCMPEQB          kreg|mask,zmmreg,zmmrm512      [rvmi:fvm: evex.nds.512.66.0f3a.w0 3f /r 00 ] AVX512BW,FUTURE


More information about the Nasm-commits mailing list