[nasm:master] doc: allow replicated index entries (\IR), make index sorting smarter

nasm-bot for H. Peter Anvin hpa at zytor.com
Wed Nov 9 19:06:03 PST 2022


Commit-ID:  f7163e343cb689d6ee414a72e059f1d4a75cac35
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=f7163e343cb689d6ee414a72e059f1d4a75cac35
Author:     H. Peter Anvin <hpa at zytor.com>
AuthorDate: Wed, 9 Nov 2022 18:38:45 -0800
Committer:  H. Peter Anvin <hpa at zytor.com>
CommitDate: Wed, 9 Nov 2022 18:38:45 -0800

doc: allow replicated index entries (\IR), make index sorting smarter

Allow a single index entry key to be defined with \IR more than once,
generating multiple entries in the index; this is really useful for
example to always generate "macros, single-line" and "single-line
macros" entries sorted at different places.

Be smarter about sorting the index: sort (nearly) all special
characters before alphanumerics, and (attempt to) sort numbers in
numerical order rather than alphabetical (so BITS8 sorts before
BITS16).

Signed-off-by: H. Peter Anvin <hpa at zytor.com>


---
 doc/rdsrc.pl | 132 ++++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 77 insertions(+), 55 deletions(-)

diff --git a/doc/rdsrc.pl b/doc/rdsrc.pl
index f1e49352..f0d570e7 100644
--- a/doc/rdsrc.pl
+++ b/doc/rdsrc.pl
@@ -157,7 +157,7 @@ $MAXLEVEL = 10;  # really 3, but play safe ;-)
 
 # Read the file; pass a paragraph at a time to the paragraph processor.
 print "Reading input...";
-$pname = "para000000";
+$pname = [];
 @pnames = @pflags = ();
 $para = undef;
 foreach $file (@files) {
@@ -268,9 +268,9 @@ sub include {
 sub got_para {
   local ($_) = @_;
   my $pflags = "", $i, $w, $l, $t;
-  return if !/\S/;
+  my @para = ();
 
-  @$pname = ();
+  return if !/\S/;
 
   # Replace metadata macros
   while (/^(.*)\\m\{([^\}]*)\}(.*)$/) {
@@ -294,7 +294,7 @@ sub got_para {
       $l =~ s/\\\{/\{/g;
       $l =~ s/\\\}/}/g;
       $l =~ s/\\\\/\\/g;
-      push @$pname, $l;
+      push @para, $l;
     }
     $_ = ''; # suppress word-by-word code
   } elsif (/^\\C/) {
@@ -389,11 +389,11 @@ sub got_para {
     $pflags = "norm";
   }
 
-  # The word-by-word code: unless @$pname is already defined (which it
+  # The word-by-word code: unless @para is already defined (which it
   # will be in the case of a code paragraph), split the paragraph up
-  # into words and push each on @$pname.
+  # into words and push each on @para.
   #
-  # Each thing pushed on @$pname should have a two-character type
+  # Each thing pushed on @para should have a two-character type
   # code followed by the text.
   #
   # Type codes are:
@@ -416,7 +416,7 @@ sub got_para {
   #      index-items arrays
   # "sp" for space
   while (/\S/) {
-    s/^\s*//, push @$pname, "sp" if /^\s/;
+    s/^\s*//, push @para, "sp" if /^\s/;
     $indexing = $qindex = 0;
     if (/^(\\[iI])?\\c/) {
       $qindex = 1 if $1 eq "\\I";
@@ -429,9 +429,8 @@ sub got_para {
       $w =~ s/\\\}/\}/g;
       $w =~ s/\\-/-/g;
       $w =~ s/\\\\/\\/g;
-      (push @$pname,"i"),$lastp = $#$pname if $indexing;
-      push @$pname,"c $w" if !$qindex;
-      $$pname[$lastp] = &addidx($node, $w, "c $w") if $indexing;
+      push(@para, addidx($node, $w, "c $w")) if ($indexing);
+      push(@para, "c $w") if (!$qindex);
     } elsif (/^\\[iIe]/) {
       /^(\\[iI])?(\\e)?/;
       $emph = 0;
@@ -448,19 +447,25 @@ sub got_para {
       $w =~ s/\\\\/\\/g;
       $t = $emph ? "es" : "n ";
       @ientry = ();
-      (push @$pname,"i"),$lastp = $#$pname if $indexing;
+      @pentry = ();
       foreach $i (split /\s+/,$w) {  # \e and \i can be multiple words
-        push @$pname,"$t$i","sp" if !$qindex;
-	($ii=$i) =~ tr/A-Z/a-z/, push @ientry,"n $ii","sp" if $indexing;
+        push @pentry, "$t$i","sp";
+	($ii=$i) =~ tr/A-Z/a-z/, push @ientry,"n $ii","sp";
 	$t = $emph ? "e " : "n ";
       }
-      $w =~ tr/A-Z/a-z/, pop @ientry if $indexing;
-      $$pname[$lastp] = &addidx($node, $w, @ientry) if $indexing;
-      pop @$pname if !$qindex; # remove final space
-      if (substr($$pname[$#$pname],0,2) eq "es" && !$qindex) {
-        substr($$pname[$#$pname],0,2) = "eo";
-      } elsif ($emph && !$qindex) {
-        substr($$pname[$#$pname],0,2) = "ee";
+      if ($indexing) {
+	  $w =~ tr/A-Z/a-z/;
+	  pop @ientry;		# remove final space
+	  push(@para, addidx($node, $w, @ientry));
+      }
+      if (!$qindex) {
+	  pop @pentry;		# remove final space
+	  if (substr($pentry[-1],0,2) eq 'es') {
+	      substr($pentry[-1],0,2) = 'eo';
+	  } elsif ($emph) {
+	      substr($pentry[-1],0,2) = 'ee';
+	  }
+	  push(@para, @pentry);
       }
     } elsif (/^\\[kK]/) {
       $t = "k ";
@@ -468,7 +473,7 @@ sub got_para {
       s/^\\[kK]//;
       die "badly formatted \\k: \\k$_\n" if !/\{([^\}]*)\}(.*)$/;
       $_ = $2;
-      push @$pname,"$t$1";
+      push @para,"$t$1";
     } elsif (/^\\W/) {
       s/^\\W//;
       die "badly formatted \\W: \\W$_\n"
@@ -483,9 +488,8 @@ sub got_para {
       $w =~ s/\\\}/\}/g;
       $w =~ s/\\-/-/g;
       $w =~ s/\\\\/\\/g;
-      (push @$pname,"i"),$lastp = $#$pname if $indexing;
-      push @$pname,"$t<$l>$w";
-      $$pname[$lastp] = &addidx($node, $w, "c $w") if $indexing;
+      push(@para, addidx($node, $w, "c $w")) if $indexing;
+      push(@para, "$t<$l>$w");
     } else {
       die "what the hell? $_\n" if !/^(([^\s\\\-]|\\[\\{}\-])*-?)(.*)$/;
       die "painful death! $_\n" if !length $1;
@@ -496,53 +500,71 @@ sub got_para {
       $w =~ s/\\-/-/g;
       $w =~ s/\\\\/\\/g;
       if ($w eq '--') {
-	  push @$pname, 'dm';
+	  push @para, 'dm';
       } elsif ($w eq '-') {
-        push @$pname, 'da';
+        push @para, 'da';
       } else {
-        push @$pname,"n $w";
+        push @para,"n $w";
       }
     }
   }
   if ($irewrite ne undef) {
-    &addidx(undef, $irewrite, @$pname);
-    @$pname = ();
+    addidx(undef, $irewrite, @para);
   } else {
-    push @pnames, $pname;
+    push @pnames, [@para];
     push @pflags, $pflags;
-    $pname++;
   }
 }
 
-sub addidx {
-  my ($node, $text, @ientry) = @_;
+sub addidx($$@) {
+  my($node, $text, @ientry) = @_;
   $text = $idxalias{$text} || $text;
-  if ($node eq undef || !$idxmap{$text}) {
-    @$ientry = @ientry;
-    $idxmap{$text} = $ientry;
-    $ientry++;
-  }
-  if ($node) {
-    $idxnodes{$node,$text} = 1;
-    return "i $text";
+  if (!exists($idxmap{$text})) {
+      $idxmap{$text} = [@ientry];
+      $idxdup{$text} = [$text];
+  } elsif (!defined($node)) {
+      my $dummy = sprintf('%s    #%05d', $text, $#{$idxdup{$text}} + 2);
+      $idxmap{$dummy} = [@ientry];
+      push(@{$idxdup{$text}}, $dummy);
   }
+
+  return undef if (!defined($node));
+
+  return map { $idxnodes{$node,$_} = 1; "i $_" } @{$idxdup{$text}};
 }
 
 sub indexsort {
   my $iitem, $ientry, $i, $piitem, $pcval, $cval, $clrcval;
 
   @itags = map { # get back the original data as the 1st elt of each list
-             $_->[0]
-	   } sort { # compare auxiliary (non-first) elements of lists
-	     $a->[1] cmp $b->[1] ||
-	     $a->[2] cmp $b->[2] ||
-	     $a->[0] cmp $b->[0]
-           } map { # transform array into list of 3-element lists
-	     my $ientry = $idxmap{$_};
-	     my $a = substr($$ientry[0],2);
-	     $a =~ tr/A-Za-z0-9//cd;
-	     [$_, uc($a), substr($$ientry[0],0,2)]
-	   } keys %idxmap;
+      $_->[0]
+  } sort { # compare auxiliary (non-first) elements of lists
+      my $d = 0;
+      for (my $i = 1; defined($a->[$i]) || defined($b->[$i]); $i++) {
+	  $d = $a->[$i] cmp $b->[$i];
+	  last if ($d);
+      }
+      $d
+  } map { # transform array into list of 3-element lists
+      my $ientry = $idxmap{$_};
+      my $b = lc(join(' ', map { substr($_,2) } @$ientry));
+      $b =~ s/([][(){}]+|\B,)//g;
+      $b =~ s/\s+/ /g;
+      my $a = $b;
+      $a =~ s/([[:alpha:]])/Z$1/g;
+      # From this point on [A-Z] means an already classed character
+	  # Try to sort numbers in numerical order (e.g. 8 before 16)
+      while ($a =~ /^(|.*?[^A-Z])(\d+)(\.\d+)?(.*)$/) {
+	  my $p = $1; my $s = $4;
+	  my $nn = ('0' x (24 - length($2))) . $2 . $3;
+	  $nn =~ s/(.)/D$1/g;
+	  $a = $p . $nn . $s;
+      }
+      $a =~ s/([^A-Z\s])/A$1/g;
+      my $c = join(' ', map { substr($_,0,2) } @$ientry);
+      my $v = [$_, $a, $b, $_, $c];
+      $v
+  } keys %idxmap;
 
   # Having done that, check for comma-hood.
   $cval = 0;
@@ -594,8 +616,8 @@ sub fixup_xrefs {
     next if $pflags[$p] eq "code";
     $pname = $pnames[$p];
     for ($i=$#$pname; $i >= 0; $i--) {
-      if ($$pname[$i] =~ /^k/) {
-        $k = $$pname[$i];
+	$k = $$pname[$i];
+      if ($k =~ /^k/) {
         $caps = ($k =~ /^kK/);
 	$k = substr($k,2);
         $repl = $refs{$k};


More information about the Nasm-commits mailing list