[nasm:master] outlib: add infrastructure for common section and symbol handling

nasm-bot for H. Peter Anvin (Intel) hpa at zytor.com
Tue Jun 30 17:03:06 PDT 2020


Commit-ID:  2770fc7ac6674a7fd3ef895cecae5d9b5b722dc1
Gitweb:     http://repo.or.cz/w/nasm.git?a=commitdiff;h=2770fc7ac6674a7fd3ef895cecae5d9b5b722dc1
Author:     H. Peter Anvin (Intel) <hpa at zytor.com>
AuthorDate: Sat, 27 Jun 2020 22:08:26 -0700
Committer:  H. Peter Anvin (Intel) <hpa at zytor.com>
CommitDate: Sat, 27 Jun 2020 22:11:27 -0700

outlib: add infrastructure for common section and symbol handling

Pretty much all the backends have to do the same gymnastics to handle
symbols and sections. In the future, this should be done by the
assembly core, but in order to prepare the ground and get the
performance benefits as soon as possible, implement a library of
functions which can manage symbols and sections and their respective
hashes, trees, and tables.

This infrastructure is not yet used by any backend, that porting work
has to come next.

Signed-off-by: H. Peter Anvin (Intel) <hpa at zytor.com>


---
 output/outlib.c | 278 ++++++++++++++++++++++++++++++++++++++++++++++++++++++--
 output/outlib.h | 252 +++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 522 insertions(+), 8 deletions(-)

diff --git a/output/outlib.c b/output/outlib.c
index c60de055..fa7db15e 100644
--- a/output/outlib.c
+++ b/output/outlib.c
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
- *   
- *   Copyright 1996-2009 The NASM Authors - All Rights Reserved
+ *
+ *   Copyright 1996-2020 The NASM Authors - All Rights Reserved
  *   See the file AUTHORS included with the NASM distribution for
  *   the specific copyright holders.
  *
@@ -14,7 +14,7 @@
  *     copyright notice, this list of conditions and the following
  *     disclaimer in the documentation and/or other materials provided
  *     with the distribution.
- *     
+ *
  *     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  *     CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  *     INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
@@ -32,14 +32,13 @@
  * ----------------------------------------------------------------------- */
 
 /*
- * libout.c
+ * outlib.c
  *
  * Common routines for the output backends.
  */
 
-#include "compiler.h"
-#include "nasm.h"
 #include "outlib.h"
+#include "raa.h"
 
 uint64_t realsize(enum out_type type, uint64_t size)
 {
@@ -56,3 +55,270 @@ uint64_t realsize(enum out_type type, uint64_t size)
 	return size;
     }
 }
+
+/* Common section/symbol handling */
+
+struct ol_sect *_ol_sect_list;
+uint64_t _ol_nsects;             /* True sections, not external symbols */
+static struct ol_sect **ol_sect_tail = &_ol_sect_list;
+static struct hash_table ol_secthash;
+static struct RAA *ol_sect_index_tbl;
+
+struct ol_sym *_ol_sym_list;
+uint64_t _ol_nsyms;
+static struct ol_sym **ol_sym_tail = &_ol_sym_list;
+static struct hash_table ol_symhash;
+
+void ol_init(void)
+{
+}
+
+static void ol_free_symbols(void)
+{
+    struct ol_sym *s, *stmp;
+
+    hash_free(&ol_symhash);
+
+    list_for_each_safe(s, stmp, _ol_sym_list) {
+        nasm_free((char *)s->name);
+        nasm_free(s);
+    }
+
+    _ol_nsyms = 0;
+    _ol_sym_list = NULL;
+    ol_sym_tail = &_ol_sym_list;
+}
+
+static void ol_free_sections(void)
+{
+    struct ol_sect *s, *stmp;
+
+    hash_free(&ol_secthash);
+    raa_free(ol_sect_index_tbl);
+    ol_sect_index_tbl = NULL;
+
+    list_for_each_safe(s, stmp, _ol_sect_list) {
+        saa_free(s->data);
+        saa_free(s->reloc);
+        nasm_free((char *)s->name);
+        nasm_free(s);
+    }
+
+    _ol_nsects = 0;
+    _ol_sect_list = NULL;
+    ol_sect_tail = &_ol_sect_list;
+}
+
+void ol_cleanup(void)
+{
+    ol_free_symbols();
+    ol_free_sections();
+}
+
+/*
+ * Allocate a section index and add a section, subsection, or external
+ * symbol to the section-by-index table. If the index provided is zero,
+ * allocate a new index via seg_alloc().
+ */
+static uint32_t ol_seg_alloc(void *s, uint32_t ix)
+{
+    if (!ix)
+        ix = seg_alloc();
+    ol_sect_index_tbl = raa_write_ptr(ol_sect_index_tbl, ix >> 1, s);
+    return ix;
+}
+
+/*
+ * Find a section or create a new section structure if it does not exist
+ * and allocate it an index value via seg_alloc().
+ */
+struct ol_sect *_ol_get_sect(const char *name, size_t ssize, size_t rsize)
+{
+    struct ol_sect *s, **sp;
+    struct hash_insert hi;
+
+    sp = (struct ol_sect **)hash_find(&ol_secthash, name, &hi);
+    if (sp)
+        return *sp;
+
+    s             = nasm_zalloc(ssize);
+    s->syml.tail  = &s->syml.head;
+    s->name       = nasm_strdup(name);
+    s->data       = saa_init(1);
+    s->reloc      = saa_init(rsize);
+    *ol_sect_tail = s;
+    ol_sect_tail  = &s->next;
+    _ol_nsects++;
+    s->index     = s->subindex = ol_seg_alloc(s, 0);
+
+    hash_add(&hi, s->name, s);
+    return s;
+}
+
+/* Find a section by name without creating one */
+struct ol_sect *_ol_sect_by_name(const char *name)
+{
+    struct ol_sect **sp;
+
+    sp = (struct ol_sect **)hash_find(&ol_secthash, name, NULL);
+    return sp ? *sp : NULL;
+}
+
+/* Find a section or external symbol by index; NULL if not valid */
+struct ol_sect *_ol_sect_by_index(int32_t index)
+{
+    uint32_t ix = index;
+
+    if (unlikely(ix >= SEG_ABS))
+        return NULL;
+
+    return raa_read_ptr(ol_sect_index_tbl, ix >> 1);
+}
+
+/*
+ * Start a new subsection for the given section. At the moment, once a
+ * subsection has been created, it is not possible to revert to an
+ * earlier subsection. ol_sect_by_index() will return the main section
+ * structure. Returns the new section index.  This is used to prevent
+ * the front end from optimizing across subsection boundaries.
+ */
+int32_t _ol_new_subsection(struct ol_sect *sect)
+{
+    if (unlikely(!sect))
+        return NO_SEG;
+
+    return sect->subindex = ol_seg_alloc(sect, 0);
+}
+
+/*
+ * Insert a symbol into a list; need to use upcasting using container_of()
+ * to walk the list later.
+ */
+void ol_add_sym_to(struct ol_symlist *syml, struct ol_symhead *head,
+                   uint64_t offset)
+{
+    syml->tree.key = offset;
+    head->tree     = rb_insert(head->tree, &syml->tree);
+    *head->tail    = syml;
+    head->tail     = &syml->next;
+    head->n++;
+}
+
+/*
+ * Create a location structure from seg:offs
+ */
+void ol_mkloc(struct ol_loc *loc, int64_t offs, int32_t seg)
+{
+    nasm_zero(*loc);
+    loc->offs = offs;
+
+    if (unlikely((uint32_t)seg >= SEG_ABS)) {
+        if (likely(seg == NO_SEG)) {
+            loc->seg.t     = OS_NOSEG;
+        } else {
+            loc->seg.t     = OS_ABS;
+            loc->seg.index = seg - SEG_ABS;
+        }
+    } else {
+        loc->seg.index  = seg & ~1;
+        loc->seg.t      = OS_SECT | (seg & 1);
+        loc->seg.s.sect = _ol_sect_by_index(loc->seg.index);
+    }
+}
+
+/*
+ * Create a new symbol. If this symbol is OS_OFFS, add it to the relevant
+ * section, too. If the symbol already exists, return NULL; this is
+ * different from ol_get_section() as a single section may be invoked
+ * many times. On the contrary, the front end will prevent a single symbol
+ * from being defined more than once.
+ *
+ * If flags has OF_GLOBAL set, add it to the global symbol hash for
+ * the containing section if applicable.
+ *
+ * If flags has OF_IMPSEC set, allocate a segment index for it via
+ * seg_alloc() unless v->index is already set, and add it to the
+ * section by index list.
+ */
+struct ol_sym *_ol_new_sym(const char *name, const struct ol_loc *v,
+                           uint32_t flags, size_t size)
+{
+    struct hash_insert hi;
+    struct ol_sym *sym;
+
+    if (hash_find(&ol_symhash, name, &hi))
+        return NULL;            /* Symbol already exists */
+
+    flags     |= OF_SYMBOL;
+
+    sym        = nasm_zalloc(size);
+    sym->name  = nasm_strdup(name);
+    sym->v     = *v;
+
+    if (sym->v.seg.t & OS_SECT) {
+        struct ol_sect *sect = sym->v.seg.s.sect;
+
+        if (!sect || (sect->flags & OF_SYMBOL))
+            /* Must be an external or common reference */
+            flags |= OF_IMPSEC;
+
+        if (flags & OF_IMPSEC) {
+            /* Metasection */
+            if (!sym->v.seg.s.sym) {
+                sym->v.seg.s.sym = sym;
+                sym->v.seg.index = ol_seg_alloc(sym, sym->v.seg.index);
+            }
+        } else if (sym->v.seg.t == OS_OFFS) {
+            struct ol_sect * const sect = sym->v.seg.s.sect;
+            const uint64_t offs = sym->v.offs;
+
+            ol_add_sym_to(&sym->syml, &sect->syml, offs);
+            if (flags & OF_GLOBAL)
+                ol_add_sym_to(&sym->symg, &sect->symg, offs);
+        }
+    }
+    sym->flags = flags;
+
+    *ol_sym_tail = sym;
+    ol_sym_tail  = &sym->next;
+    _ol_nsyms++;
+
+    hash_add(&hi, sym->name, sym);
+    return sym;
+}
+
+/* Find a symbol in the global namespace */
+struct ol_sym *_ol_sym_by_name(const char *name)
+{
+    struct ol_sym **symp;
+
+    symp = (struct ol_sym **)hash_find(&ol_symhash, name, NULL);
+    return symp ? *symp : NULL;
+}
+
+/*
+ * Find a symbol by address in a specific section. If no symbol is defined
+ * at that exact address, return the immediately previously defined one.
+ * If global is set, then only return global symbols.
+ */
+struct ol_sym *_ol_sym_by_address(struct ol_sect *sect, int64_t addr,
+                                  bool global)
+{
+    struct ol_symhead *head;
+    size_t t_offs;
+    struct rbtree *t;
+
+    if (global) {
+        head = &sect->symg;
+        t_offs = offsetof(struct ol_sym, symg.tree);
+    } else {
+        head = &sect->syml;
+        t_offs = offsetof(struct ol_sym, syml.tree);
+    }
+
+    t = rb_search(head->tree, addr);
+    if (!t)
+        return NULL;
+
+    return (struct ol_sym *)((char *)t - t_offs);
+}
diff --git a/output/outlib.h b/output/outlib.h
index 30f2c0b2..a0b31245 100644
--- a/output/outlib.h
+++ b/output/outlib.h
@@ -1,6 +1,6 @@
 /* ----------------------------------------------------------------------- *
  *   
- *   Copyright 1996-2018 The NASM Authors - All Rights Reserved
+ *   Copyright 1996-2020 The NASM Authors - All Rights Reserved
  *   See the file AUTHORS included with the NASM distribution for
  *   the specific copyright holders.
  *
@@ -34,8 +34,12 @@
 #ifndef NASM_OUTLIB_H
 #define NASM_OUTLIB_H
 
+#include "compiler.h"
 #include "nasm.h"
 #include "error.h"
+#include "hashtbl.h"
+#include "saa.h"
+#include "rbtree.h"
 
 uint64_t realsize(enum out_type type, uint64_t size);
 
@@ -61,5 +65,249 @@ extern const struct dfmt * const null_debug_arr[2];
 /* Wrapper for unported backends */
 void nasm_do_legacy_output(const struct out_data *data);
 
-#endif /* NASM_OUTLIB_H */
+/*
+ * Common routines for tasks that really should migrate into the core.
+ * This provides a common interface for maintaining sections and symbols,
+ * and provide quick lookups as well as declared-order sequential walks.
+ *
+ * These structures are intended to be embedded at the *top* of a
+ * backend-specific structure containing additional information.
+ *
+ * The tokens O_Section, O_Symbol and O_Reloc are intended to be
+ * defined as macros by the backend before including this file!
+ */
+
+struct ol_sect;
+struct ol_sym;
+
+#ifndef O_Section
+typedef struct ol_sect O_Section;
+#endif
+#ifndef O_Symbol
+typedef struct ol_sym O_Symbol;
+#endif
+#ifndef O_Reloc
+typedef void * O_Reloc;
+#endif
+
+/* Common section structure */
+
+/*
+ * Common flags for sections and symbols; low values reserved for
+ * backend.  Note that both ol_sect and ol_sym begin with a flags
+ * field, so if a section pointer points to an external symbol instead
+ * they can be trivially resolved.
+ */
+#define OF_SYMBOL 0x80000000
+#define OF_GLOBAL 0x40000000
+#define OF_IMPSEC 0x20000000
+#define OF_COMMON 0x10000000
+
+struct ol_sym;
+
+struct ol_symlist {
+    struct ol_symlist *next;
+    struct rbtree tree;
+};
+struct ol_symhead {
+    struct ol_symlist *head, **tail;
+    struct rbtree *tree;
+    uint64_t n;
+};
+
+struct ol_sect {
+    uint32_t flags;             /* Section/symbol flags */
+    struct ol_sect *next;       /* Next section in declared order */
+    const char *name;           /* Name of section */
+    struct ol_symhead syml;     /* All symbols in this section */
+    struct ol_symhead symg;     /* Global symbols in this section */
+    struct SAA *data;           /* Contents of section */
+    struct SAA *reloc;          /* Section relocations */
+    uint32_t index;             /* Primary section index */
+    uint32_t subindex;          /* Current subsection index */
+};
+
+/* Segment reference */
+enum ol_seg_type {
+    OS_NOSEG  = 0,                /* Plain number (no segment) */
+    OS_SEGREF = 1,                /* It is a segment reference */
+    OS_ABS    = 1,                /* Absolute segment reference */
+    OS_SECT   = 2,                /* It is a real section */
+    OS_OFFS   = OS_SECT,          /* Offset reference in section */
+    OS_SEG    = OS_SECT|OS_SEGREF /* Section reference */
+};
+
+union ol_segval {
+    struct ol_sect *sect;   /* Section structure */
+    struct ol_sym  *sym;    /* External symbol structure */
+};
+
+struct ol_seg {
+    union ol_segval  s;
+    enum ol_seg_type t;
+
+    /*
+     * For a section:          subsection index
+     * For a metasymbol:       virtual segment index
+     * For an absolute symbol: absolute value
+     */
+    uint32_t index;
+};
+
+/* seg:offs representing the full location value and type */
+struct ol_loc {
+    int64_t offs;
+    struct ol_seg seg;
+};
+
+/* Common symbol structure */
+struct ol_sym {
+    uint32_t flags;             /* Section/symbol flags */
+    uint32_t size;              /* Size value (for backend) */
+    struct ol_sym *next;       	/* Next symbol in declared order */
+    const char *name;           /* Symbol name */
+    struct ol_symlist syml;     /* Section-local symbol list */
+    struct ol_symlist symg;     /* Section-local global symbol list */
+    struct ol_loc p;            /* Symbol position ("where") */
+    struct ol_loc v;            /* Symbol value ("what") */
+};
+
+/*
+ * Operations
+ */
+void ol_init(void);
+void ol_cleanup(void);
 
+/* Convert offs:seg to a location structure */
+extern void
+ol_mkloc(struct ol_loc *loc, int64_t offs, int32_t seg);
+
+/* Get the section or external symbol from a struct ol_seg */
+static inline O_Section *seg_sect(struct ol_seg *seg)
+{
+    return (O_Section *)seg->s.sect;
+}
+static inline O_Symbol *seg_xsym(struct ol_seg *seg)
+{
+    return (O_Symbol *)seg->s.sym;
+}
+
+/*
+ * Return a pointer to the symbol structure if and only if a section is
+ * really a symbol of some kind (extern, common...)
+ */
+static inline struct ol_sym *_seg_extsym(struct ol_sect *sect)
+{
+    return (sect->flags & OF_SYMBOL) ? (struct ol_sym *)sect : NULL;
+}
+static inline O_Symbol *seg_extsym(O_Section *sect)
+{
+    return (O_Symbol *)_seg_extsym((struct ol_sect *)sect);
+}
+
+/*
+ * Find a section or create a new section structure if it does not exist
+ * and allocate it an index value via seg_alloc().
+ */
+extern struct ol_sect *
+_ol_get_sect(const char *name, size_t ssize, size_t rsize);
+static inline O_Section *ol_get_sect(const char *name)
+{
+    return (O_Section *)_ol_get_sect(name, sizeof(O_Section), sizeof(O_Reloc));
+}
+
+/* Find a section by name without creating one */
+extern struct ol_sect *_ol_sect_by_name(const char *);
+static inline O_Section *ol_sect_by_name(const char *name)
+{
+    return (O_Section *)_ol_sect_by_name(name);
+}
+
+/* Find a section or external symbol by index; NULL if not valid */
+extern struct ol_sect *_ol_sect_by_index(int32_t index);
+static inline O_Section *ol_sect_by_index(int32_t index)
+{
+    return (O_Section *)_ol_sect_by_index(index);
+}
+
+/* Global list of sections (not including external symbols) */
+extern struct ol_sect *_ol_sect_list;
+static inline O_Section *ol_sect_list(void)
+{
+    return (O_Section *)_ol_sect_list;
+}
+
+/* Count of sections (not including external symbols) */
+extern uint64_t _ol_nsects;
+static inline uint64_t ol_nsects(void)
+{
+    return _ol_nsects;
+}
+
+/*
+ * Start a new subsection for the given section. At the moment, once a
+ * subsection has been created, it is not possible to revert to an
+ * earlier subsection. ol_sect_by_index() will return the main section
+ * structure. Returns the new section index.  This is used to prevent
+ * the front end from optimizing across subsection boundaries.
+ */
+extern int32_t _ol_new_subsection(struct ol_sect *sect);
+static inline int32_t ol_new_subsection(O_Section *sect)
+{
+    return ol_new_subsection((struct ol_sect *)sect);
+}
+
+/*
+ * Create a new symbol. If this symbol is OS_OFFS, add it to the relevant
+ * section, too. If the symbol already exists, return NULL; this is
+ * different from ol_get_section() as a single section may be invoked
+ * many times. On the contrary, the front end will prevent a single symbol
+ * from being defined more than once.
+ *
+ * If flags has OF_GLOBAL set, add it to the global symbol hash for the
+ * containing section. If flags has OF_IMPSEC set, allocate a segment
+ * index for it via seg_alloc() and add it to the section by index list.
+ */
+extern struct ol_sym *_ol_new_sym(const char *name, const struct ol_loc *v,
+                                  uint32_t flags, size_t size);
+static inline O_Symbol *ol_new_sym(const char *name, const struct ol_loc *v,
+                                   uint32_t flags)
+{
+    return (O_Symbol *)_ol_new_sym(name, v, flags, sizeof(O_Symbol));
+}
+
+/* Find a symbol by name in the global namespace */
+extern struct ol_sym *_ol_sym_by_name(const char *name);
+static inline O_Symbol *ol_sym_by_name(const char *name)
+{
+    return (O_Symbol *)_ol_sym_by_name(name);
+}
+
+/*
+ * Find a symbol by address in a specific section. If no symbol is defined
+ * at that exact address, return the immediately previously defined one.
+ * If global is set, then only return global symbols.
+ */
+extern struct ol_sym *_ol_sym_by_address(struct ol_sect *sect, int64_t addr,
+                                         bool global);
+static inline O_Symbol *ol_sym_by_address(O_Section *sect, int64_t addr,
+                                          bool global)
+{
+    return (O_Symbol *)_ol_sym_by_address((struct ol_sect *)sect, addr, global);
+}
+
+/* Global list of symbols */
+extern struct ol_sym *_ol_sym_list;
+static inline O_Symbol *ol_sym_list(void)
+{
+    return (O_Symbol *)_ol_sym_list;
+}
+
+/* Global count of symbols */
+extern uint64_t _ol_nsyms;
+static inline uint64_t ol_nsyms(void)
+{
+    return _ol_nsyms;
+}
+
+#endif /* NASM_OUTLIB_H */


More information about the Nasm-commits mailing list