[nasm:master] phash: simplify the code generators

nasm-bot for H. Peter Anvin (Intel) hpa at zytor.com
Fri Jul 10 19:30:05 PDT 2020


Commit-ID:  b14dbb95a17a3dda2fe9a6948bdbe978a120b209
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=b14dbb95a17a3dda2fe9a6948bdbe978a120b209
Author:     H. Peter Anvin (Intel) <hpa at zytor.com>
AuthorDate: Fri, 10 Jul 2020 19:26:52 -0700
Committer:  H. Peter Anvin (Intel) <hpa at zytor.com>
CommitDate: Fri, 10 Jul 2020 19:26:52 -0700

phash: simplify the code generators

Simplify the code generators by merging the two hash constant arrays
into one. The hash is effectively the same, although the order of the
constants differ (possibly in a way which makes the indexing easier.)
The main difference is the amount of code is necessary to generate
each of the output C files.

Signed-off-by: H. Peter Anvin (Intel) <hpa at zytor.com>


---
 asm/pptok.pl        | 112 +++++++++++++++++++++++++---------------------------
 asm/tokhash.pl      |  21 ++++------
 macros/macros.pl    |  18 +++------
 nasmlib/perfhash.c  |   4 +-
 nasmlib/perfhash.pl |  11 ++----
 perllib/phash.ph    |   5 ++-
 6 files changed, 76 insertions(+), 95 deletions(-)

diff --git a/asm/pptok.pl b/asm/pptok.pl
index 2e763b31..47924897 100755
--- a/asm/pptok.pl
+++ b/asm/pptok.pl
@@ -1,7 +1,7 @@
 #!/usr/bin/perl
 ## --------------------------------------------------------------------------
 ##
-##   Copyright 1996-2019 The NASM Authors - All Rights Reserved
+##   Copyright 1996-2020 The NASM Authors - All Rights Reserved
 ##   See the file AUTHORS included with the NASM distribution for
 ##   the specific copyright holders.
 ##
@@ -168,6 +168,36 @@ if ($what eq 'c') {
     print OUT "/* Do not edit */\n";
     print OUT "\n";
 
+    print OUT "#include \"compiler.h\"\n";
+    print OUT "#include \"nctype.h\"\n";
+    print OUT "#include \"nasmlib.h\"\n";
+    print OUT "#include \"hashtbl.h\"\n";
+    print OUT "#include \"preproc.h\"\n";
+    print OUT "\n";
+
+    # Note that this is global.
+    printf OUT "const char * const pp_directives[%d] = {\n", scalar(@pptok);
+    foreach $d (@pptok) {
+	if (defined($d) && $d !~ /[A-Z]/) {
+	    print OUT "    \"%$d\",\n";
+	} else {
+	    print OUT "    NULL,\n";
+	}
+    }
+    print OUT  "};\n";
+
+    printf OUT "const uint8_t pp_directives_len[%d] = {\n", scalar(@pptok);
+    foreach $d (@pptok) {
+	printf OUT "    %d,\n", defined($d) ? length($d)+1 : 0;
+    }
+    print OUT  "};\n";
+
+    # Put a large value in unused hash slots.  This makes it extremely
+    # unlikely that any combination that involves unused slot will
+    # pass the range test.  This speeds up rejection of unrecognized
+    # tokens, i.e. identifiers.
+    print OUT "\n#define INVALID_HASH_ENTRY (65535/3)\n";
+
     my %tokens = ();
     my @tokendata = ();
 
@@ -196,54 +226,19 @@ if ($what eq 'c') {
 
     ($n, $sv, $g) = @hashinfo;
     die if ($n & ($n-1));
+    $n <<= 1;
 
-    print OUT "#include \"compiler.h\"\n";
-    print OUT "#include \"nctype.h\"\n";
-    print OUT "#include \"nasmlib.h\"\n";
-    print OUT "#include \"hashtbl.h\"\n";
-    print OUT "#include \"preproc.h\"\n";
-    print OUT "\n";
-
-    # Note that this is global.
-    printf OUT "const char * const pp_directives[%d] = {\n", scalar(@pptok);
-    foreach $d (@pptok) {
-	if (defined($d) && $d !~ /[A-Z]/) {
-	    print OUT "    \"%$d\",\n";
-	} else {
-	    print OUT "    NULL,\n";
-	}
-    }
-    print OUT  "};\n";
-
-    printf OUT "const uint8_t pp_directives_len[%d] = {\n", scalar(@pptok);
-    foreach $d (@pptok) {
-	printf OUT "    %d,\n", defined($d) ? length($d)+1 : 0;
-    }
-    print OUT  "};\n";
-
-    # Put a large value in unused slots.  This makes it extremely unlikely
-    # that any combination that involves unused slot will pass the range test.
-    # This speeds up rejection of unrecognized tokens, i.e. identifiers.
-    print OUT "\n#define INVALID_HASH_ENTRY (65535/3)\n";
 
     print OUT "\n\n/* Primary preprocessor token hash */\n\n";
 
     print OUT "enum preproc_token pp_token_hash(const char *token)\n";
     print OUT "{\n";
-    print OUT "    static const int16_t hash1[$n] = {\n";
-    for ($i = 0; $i < $n; $i++) {
-	my $h = ${$g}[$i*2+0];
-	print OUT "        ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n";
-    }
-    print OUT "    };\n";
-
-    print OUT "    static const int16_t hash2[$n] = {\n";
+    print OUT "    static const int16_t hashdata[$n] = {\n";
     for ($i = 0; $i < $n; $i++) {
-	my $h = ${$g}[$i*2+1];
+	my $h = ${$g}[$i];
 	print OUT "        ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n";
     }
     print OUT "    };\n";
-
     print OUT  "    uint32_t k1, k2;\n";
     print OUT  "    uint64_t crc;\n";
     # For correct overflow behavior, "ix" should be unsigned of the same
@@ -253,10 +248,10 @@ if ($what eq 'c') {
 
     printf OUT "    crc = crc64i(UINT64_C(0x%08x%08x), token);\n",
 	$$sv[0], $$sv[1];
-    print  OUT "    k1 = (uint32_t)crc;\n";
-    print  OUT "    k2 = (uint32_t)(crc >> 32);\n";
+    printf OUT "    k1 = ((uint32_t)crc & 0x%x) + 0;\n", $n-2;
+    printf OUT "    k2 = ((uint32_t)(crc >> 32) & 0x%x) + 1;\n", $n-2;
     print  OUT "\n";
-    printf OUT "    ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1;
+    print  OUT "    ix = hashdata[k1] + hashdata[k2];\n";
     printf OUT "    if (ix >= %d)\n", scalar(@pptok);
     print OUT  "        return PP_INVALID;\n";
     print OUT  "\n";
@@ -288,25 +283,18 @@ if ($what eq 'c') {
 
     ($n, $sv, $g) = @hashinfo;
     die if ($n & ($n-1));
+    $n <<= 1;
 
     print OUT "\n\n/* TASM compatibility preprocessor token hash */\n";
 
     print OUT "enum preproc_token pp_tasm_token_hash(const char *token)\n";
     print OUT "{\n";
-    print OUT "    static const int16_t hash1[$n] = {\n";
+    print OUT "    static const int16_t hashdata[$n] = {\n";
     for ($i = 0; $i < $n; $i++) {
-	my $h = ${$g}[$i*2+0];
+	my $h = ${$g}[$i];
 	print OUT "        ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n";
     }
     print OUT "    };\n";
-
-    print OUT "    static const int16_t hash2[$n] = {\n";
-    for ($i = 0; $i < $n; $i++) {
-	my $h = ${$g}[$i*2+1];
-	print OUT "        ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n";
-    }
-    print OUT "    };\n";
-
     print OUT  "    uint32_t k1, k2;\n";
     print OUT  "    uint64_t crc;\n";
     # For correct overflow behavior, "ix" should be unsigned of the same
@@ -316,10 +304,12 @@ if ($what eq 'c') {
 
     printf OUT "    crc = crc64i(UINT64_C(0x%08x%08x), token);\n",
 	$$sv[0], $$sv[1];
-    print  OUT "    k1 = (uint32_t)crc;\n";
-    print  OUT "    k2 = (uint32_t)(crc >> 32);\n";
+    printf OUT "    k1 = ((uint32_t)crc & 0x%x) + 0;\n", $n-2;
+    printf OUT "    k2 = ((uint32_t)(crc >> 32) & 0x%x) + 1;\n", $n-2;
     print  OUT "\n";
-    printf OUT "    ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1;
+    printf OUT "    ix = hashdata[k1] + hashdata[k2];\n", $n-1, $n-1;
+    # Comparing to pptok here is correct, because this hash produces
+    # an enum preproc_token value directly.
     printf OUT "    if (ix >= %d)\n", scalar(@pptok);
     print OUT  "        return PP_INVALID;\n";
     print OUT  "\n";
@@ -337,9 +327,8 @@ if ($what eq 'c') {
 if ($what eq 'ph') {
     print OUT "# Automatically generated from $in by $0\n";
     print OUT "# Do not edit\n";
-    print OUT "\n";
 
-    print OUT "%pptok_hash = (\n";
+    print OUT "\n\%pptok_hash = (\n";
     $n = 0;
     foreach $tok (@pptok) {
 	if (defined($tok)) {
@@ -348,5 +337,12 @@ if ($what eq 'ph') {
 	$n++;
     }
     print OUT ");\n";
-    print OUT "1;\n";
+
+    print OUT "\n\@pptok_list = (\n";
+    foreach $tok (@pptok) {
+	print OUT "    ", (defined($tok) ? "'\%$tok'" : 'undef'), ",\n";
+    }
+    print OUT ");\n";
+
+    print OUT "\n1;\n";
 }
diff --git a/asm/tokhash.pl b/asm/tokhash.pl
index 2d6538fe..566a3aa3 100755
--- a/asm/tokhash.pl
+++ b/asm/tokhash.pl
@@ -1,7 +1,7 @@
 #!/usr/bin/perl
 ## --------------------------------------------------------------------------
 ##
-##   Copyright 1996-2018 The NASM Authors - All Rights Reserved
+##   Copyright 1996-2020 The NASM Authors - All Rights Reserved
 ##   See the file AUTHORS included with the NASM distribution for
 ##   the specific copyright holders.
 ##
@@ -201,6 +201,7 @@ if ($output eq 'h') {
 
     ($n, $sv, $g) = @hashinfo;
     die if ($n & ($n-1));
+    $n <<= 1;
 
     print "/*\n";
     print " * This file is generated from insns.dat, regs.dat and token.dat\n";
@@ -236,16 +237,9 @@ if ($output eq 'h') {
     # This speeds up rejection of unrecognized tokens, i.e. identifiers.
     print "#define INVALID_HASH_ENTRY (65535/3)\n";
 
-    print "    static const int16_t hash1[$n] = {\n";
+    printf "    static const int16_t hashdata[%d] = {\n", $n;
     for ($i = 0; $i < $n; $i++) {
-	my $h = ${$g}[$i*2+0];
-	print "        ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n";
-    }
-    print "    };\n";
-
-    print "    static const int16_t hash2[$n] = {\n";
-    for ($i = 0; $i < $n; $i++) {
-	my $h = ${$g}[$i*2+1];
+	my $h = ${$g}[$i];
 	print "        ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n";
     }
     print "    };\n";
@@ -275,10 +269,11 @@ if ($output eq 'h') {
     print  "        crc = crc64_byte(crc, c);\n";
     print  "    };\n";
     print  "\n";
-    print  "    k1 = (uint32_t)crc;\n";
-    print  "    k2 = (uint32_t)(crc >> 32);\n";
+    printf "    k1 = ((uint32_t)crc & 0x%x) + 0;\n", $n-2;
+    printf "    k2 = ((uint32_t)(crc >> 32) & 0x%x) + 1;\n", $n-2;
     print  "\n";
-    printf "    ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1;
+    printf "    ix = hashdata[k1] + hashdata[k2];\n",
+	$n-2, $n-2;
     printf "    if (ix >= %d)\n", scalar(@tokendata);
     print  "        goto notfound;\n";
     print  "\n";
diff --git a/macros/macros.pl b/macros/macros.pl
index 1e9cc110..03a6486c 100755
--- a/macros/macros.pl
+++ b/macros/macros.pl
@@ -260,6 +260,7 @@ if (!@hashinfo) {
 verify_hash_table(\%pkg_number, \@hashinfo);
 my ($n, $sv, $g) = @hashinfo;
 die if ($n & ($n-1));
+$n <<= 1;
 
 printf OUT "const int use_package_count = %d;\n\n", $npkg;
 
@@ -278,16 +279,9 @@ print OUT "    };\n";
 # This speeds up rejection of unrecognized tokens, i.e. identifiers.
 print OUT "#define INVALID_HASH_ENTRY (65535/3)\n";
 
-print OUT "    static const int16_t hash1[$n] = {\n";
+print OUT "    static const int16_t hashdata[$n] = {\n";
 for ($i = 0; $i < $n; $i++) {
-    my $h = ${$g}[$i*2+0];
-    print OUT "        ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n";
-}
-print OUT "    };\n";
-
-print OUT "    static const int16_t hash2[$n] = {\n";
-for ($i = 0; $i < $n; $i++) {
-    my $h = ${$g}[$i*2+1];
+    my $h = ${$g}[$i];
     print OUT "        ", defined($h) ? $h : 'INVALID_HASH_ENTRY', ",\n";
 }
 print OUT "    };\n";
@@ -301,10 +295,10 @@ print OUT  "\n";
 
 printf OUT "    crc = crc64i(UINT64_C(0x%08x%08x), name);\n",
     $$sv[0], $$sv[1];
-print  OUT "    k1 = (uint32_t)crc;\n";
-print  OUT "    k2 = (uint32_t)(crc >> 32);\n";
+printf OUT "    k1 = ((uint32_t)crc & 0x%x) + 0;\n", $n-2;
+printf OUT "    k2 = ((uint32_t)(crc >> 32) & 0x%x) + 1;\n", $n-2;
 print  OUT "\n";
-printf OUT "    ix = hash1[k1 & 0x%x] + hash2[k2 & 0x%x];\n", $n-1, $n-1;
+printf OUT "    ix = hashdata[k1] + hashdata[k2];\n";
 printf OUT "    if (ix >= %d)\n", scalar(@pkg_list);
 print OUT  "        return NULL;\n";
 print OUT  "\n";
diff --git a/nasmlib/perfhash.c b/nasmlib/perfhash.c
index 5cd6714e..acd3d289 100644
--- a/nasmlib/perfhash.c
+++ b/nasmlib/perfhash.c
@@ -42,9 +42,9 @@ int perfhash_find(const struct perfect_hash *hash, const char *str)
 
     crc = crc64i(hash->crcinit, str);
     k1 = (uint32_t)crc & hash->hashmask;
-    k2 = (uint32_t)(crc >> 32) & hash->hashmask;
+    k2 = ((uint32_t)(crc >> 32) & hash->hashmask) + 1;
 
-    ix = hash->hashvals[k1] + hash->hashvals[k2 + hash->hashmask + 1];
+    ix = hash->hashvals[k1] + hash->hashvals[k2];
 
     if (ix >= hash->tbllen ||
         !hash->strings[ix] ||
diff --git a/nasmlib/perfhash.pl b/nasmlib/perfhash.pl
index 036fdf1a..0c885e8f 100755
--- a/nasmlib/perfhash.pl
+++ b/nasmlib/perfhash.pl
@@ -338,13 +338,8 @@ if ($output eq 'h') {
 
     printf F "static const int16_t %s_hashvals[%d] = ", $name, $n*2;
     $c = '{';
-    for (my $i = 0; $i < $n; $i++) {
-	my $h = ${$g}[$i*2+0];
-	print F "$c\n    ", defined($h) ? $h : 'INVALID_HASH_ENTRY';
-	$c = ',';
-    }
-    for (my $i = 0; $i < $n; $i++) {
-	my $h = ${$g}[$i*2+1];
+    for (my $i = 0; $i < $n*2; $i++) {
+	my $h = ${$g}[$i];
 	print F "$c\n    ", defined($h) ? $h : 'INVALID_HASH_ENTRY';
 	$c = ',';
     }
@@ -352,7 +347,7 @@ if ($output eq 'h') {
 
     print F "const struct perfect_hash ${name}_hash = {\n";
     printf F "    UINT64_C(0x%08x%08x),\n", $$sv[0], $$sv[1]; # crcinit
-    printf F "    UINT32_C(0x%x),\n", $n-1;		      # hashmask
+    printf F "    UINT32_C(0x%x),\n", ($n-1) << 1;	      # hashmask
     printf F "    UINT32_C(%u),\n", $tbllen;		      # tbllen
     printf F "    %d,\n", $tbloffs;			      # tbloffs
     printf F "    (%s),\n", $errval;			      # errval
diff --git a/perllib/phash.ph b/perllib/phash.ph
index 133a28b0..693909b8 100644
--- a/perllib/phash.ph
+++ b/perllib/phash.ph
@@ -15,10 +15,11 @@ require 'crc64.ph';
 sub prehash($$$) {
     my($key, $n, $sv) = @_;
     my @c = crc64($sv, $key);
+    my $nmask = ($n << 1) - 2;
 
     # Create a bipartite graph...
-    $k1 = (($c[1] & ($n-1)) << 1) + 0; # low word
-    $k2 = (($c[0] & ($n-1)) << 1) + 1; # high word
+    $k1 = ($c[1] & $nmask) + 0; # low word
+    $k2 = ($c[0] & $nmask) + 1; # high word
 
     return ($k1, $k2);
 }


More information about the Nasm-commits mailing list