slas/asmdata.h

311 lines
12 KiB
C
Raw Permalink Normal View History

#ifndef ASMDATA_H
#define ASMDATA_H
#include "asmln.h"
#define ASMDATA_MAXSECTIONS 200
#define ASMDATA_MAXFILLED (1024*1024*1024)
#define ASMDATA_MAXSYMBOLS 400000
#define ASMDATA_MAXREFERENCES 400000
typedef struct asmdata asmdata_t;
typedef struct asmdata_section asmdata_section_t;
typedef struct asmdata_symbol asmdata_symbol_t;
typedef struct asmdata_reference asmdata_reference_t;
typedef struct asmdata_mapentry asmdata_mapentry_t;
typedef struct asmdata_map asmdata_map_t;
struct asmdata_reference {
int32_t symbolindex;
int32_t sectionindex;
int64_t sectionoffset;
int8_t size; // 0 for 8bit, 1 for 16bit, 2 for 32bit, 3 for 64bit
int8_t baseflags;
int16_t extflags;
int32_t extdata;
};
#define ASMDATA_SYMBOL_DUMMY (1<<8)
#define ASMDATA_SYMBOL_EXPR (1<<9)
#define ASMDATA_SYMBOL_CONST (1<<10)
#define ASMDATA_SYMBOL_OP (1<<11)
struct asmdata_symbol {
const char* namecopy;
int64_t sectionoffset; // 0 if not defined, also reused for constant values in expressions (but with -1 for section)
int32_t flags; // 0 if not defined
int32_t sectionindex; // -1 if not defined
int32_t firstreferenceindex; // -1 if not defined
int32_t x_lhs;
int32_t x_op;
int32_t x_rhs;
int32_t reserved;
};
struct asmdata_section {
int32_t sectionnumber;
bool bigendian;
const char* namecopy;
uint8_t* buffer;
int32_t buffersize;
int32_t bufferfilled;
int64_t reservedsize;
int64_t virtualoffset;
};
struct asmdata {
bool finalised;
bool extsyntax;
int32_t nsections;
int32_t nsymbols;
int32_t nreferences;
int32_t pad;
asmdata_section_t* activesection;
asmdata_section_t** sections;
asmdata_symbol_t* symbols;
asmdata_reference_t* references;
/*
asmdata_section_t* sections[ASMDATA_MAXSECTIONS];
asmdata_symbol_t symbols[ASMDATA_MAXSYMBOLS];
asmdata_reference_t references[ASMDATA_MAXREFERENCES];
*/
};
typedef void (*asmdata_iterf_t)(asmdata_map_t*, char*, void*);
struct asmdata_mapentry {
int hash;
char* key;
void* value;
asmdata_mapentry_t* next;
};
struct asmdata_map {
asmdata_mapentry_t** table;
int ntable; // Number of slots allocated in the table
asmdata_iterf_t deletef;
};
asmdata_map_t* asmdata_map_new(int n, asmdata_iterf_t deletef);
void asmdata_map_set(asmdata_map_t* map, char* key, void* value);
void* asmdata_map_get(asmdata_map_t* map, char* key);
void asmdata_map_delete(asmdata_map_t* map);
asmdata_section_t* asmdata_section_new(const char* name);
bool asmdata_section_reserveextra(asmdata_section_t* section, int64_t nbytes, bool willfill);
void* asmdata_section_delete(asmdata_section_t* section);
ASMLN_INLINE bool asmdata_section_align(asmdata_section_t* section, int64_t alignment) {
while (section->reservedsize % alignment != 0) {
if (!asmdata_section_reserveextra(section, 1, false)) {
return false;
}
}
return true;
}
ASMLN_INLINE uint8_t* asmdata_section_fill(asmdata_section_t* section, int64_t nbytes) {
int64_t offset = section->reservedsize;
if (((int64_t)((int32_t)offset)) != offset || ((int64_t)((int32_t)nbytes)) != nbytes) {
return NULL;
}
if (!asmdata_section_reserveextra(section, nbytes, true)) {
return NULL;
}
return &(section->buffer[(int32_t)offset]);
}
#define ASMDATA_SIZE_8BIT ((int8_t)0)
#define ASMDATA_SIZE_16BIT ((int8_t)1)
#define ASMDATA_SIZE_32BIT ((int8_t)2)
#define ASMDATA_SIZE_64BIT ((int8_t)3)
ASMLN_INLINE bool asmdata_section_appendword(asmdata_section_t* section, int64_t word, int8_t size) {
uint8_t* target = asmdata_section_fill(section, 1LL << size);
if (target == NULL) {
return false;
}
switch (size) {
case ASMDATA_SIZE_8BIT:
target[0] = (uint8_t)word;
return true;
case ASMDATA_SIZE_16BIT:
if (section->bigendian) {
target[0] = (uint8_t)(word >> 8);
target[1] = (uint8_t)(word);
}
else {
target[0] = (uint8_t)(word);
target[1] = (uint8_t)(word >> 8);
}
return true;
case ASMDATA_SIZE_32BIT:
if (section->bigendian) {
target[0] = (uint8_t)(word >> 24);
target[1] = (uint8_t)(word >> 16);
target[2] = (uint8_t)(word >> 8);
target[3] = (uint8_t)(word);
}
else {
target[0] = (uint8_t)(word);
target[1] = (uint8_t)(word >> 8);
target[2] = (uint8_t)(word >> 16);
target[3] = (uint8_t)(word >> 24);
}
return true;
case ASMDATA_SIZE_64BIT:
if (section->bigendian) {
target[0] = (uint8_t)(word >> 56);
target[1] = (uint8_t)(word >> 48);
target[2] = (uint8_t)(word >> 40);
target[3] = (uint8_t)(word >> 32);
target[4] = (uint8_t)(word >> 24);
target[5] = (uint8_t)(word >> 16);
target[6] = (uint8_t)(word >> 8);
target[7] = (uint8_t)(word);
}
else {
target[0] = (uint8_t)(word);
target[1] = (uint8_t)(word >> 8);
target[2] = (uint8_t)(word >> 16);
target[3] = (uint8_t)(word >> 24);
target[4] = (uint8_t)(word >> 32);
target[5] = (uint8_t)(word >> 40);
target[6] = (uint8_t)(word >> 48);
target[7] = (uint8_t)(word >> 58);
}
return true;
default:
return false;
}
}
ASMLN_INLINE bool asmdata_section_appendbytes(asmdata_section_t* section, uint8_t* source, int32_t nbytes) {
uint8_t* target = asmdata_section_fill(section, nbytes);
if (target == NULL) {
return false;
}
int32_t i;
for (i = 0; i < nbytes; i++) {
target[i] = source[i];
}
return true;
}
asmdata_t* asmdata_new();
void* asmdata_delete(asmdata_t* asmdata);
/* This function should be called (exactly) once before extracting data, assuming you want linkage information retained.
* It will assemble the symbol and references list (and possibly any additional metadata) into their own special sections.
* You generally shouldn't assemble anything else after finalising the asmdata structure (the API will still let you though,
* in case you want to add e.g. a special checksum or signature section based on the finalised contents of the other sections,
* but defining or using any symbols after this point is an error).
*/
bool asmdata_finalise(asmdata_t* asmdata);
/* Produces a simple file header. This is added as the last section (typically, but not necessarily, after finalisation). A
* full binary file can then be produced by writing - firstly - the header section, and then each section (including the header
* again) and padding to the given page boundary after every section (including the first and last copies of the header section).
* Two "hint" strings (references into the strings section) can be provided as a simple means to classify file types within
* higher-level environments (e.g. a system might use a conventions like hint1="program" hint2="generic-dynamic-64bit", and might
* use different hints like hint1="library" vs hint1="program" to distinguish components, but no convention is mandated specifically).
* The hint strings are only intended to confirm type information and may be empty or ignored. An additional "inthint" field is
* also added in case high level systems need to be able to quickly identify things like architecture flags without loading strings.
* No additional metadata (e.g. filename, architecture, build time) is added in the file header. This is by design (it can easily
* be added in another section if necessary). A simple checksum of each section is added with it's record in the header, while
* the checksum of the file header itself is calculated with it's own checksum field set to zero (before it has been set!). The
* second copy of the file header is added primarily for integrity (i.e. if a section may be corrupted, how do we know the header
* or checksum itself is not corrupted? With a second copy of course), however it's secondary purpose is to verify the end of file
* for a loader. The file header will also contain a version number (currently 1), which should be taken as the version of the
* file header/format only (not necessarily related to the version of the assembler/compiler/architecture or even necessarily of
* the symbols/references sections which may not even exist in the output, but the strings section must be compatible for the
* hints to be used).
*
* Note on duplicating header: Having the header in it's own section may also be convenient for special cases e.g. having to
* inspect the header itself in "readable" assembly output or, in the future, including alternative headers for different
* environments or files with multiple sub-files/file-headers for the purposes of optimisation. In these cases, a definitive
* file header is still given at the start of the file, but an interpreter may use that header's section list to find the most
* suitable alternative file header for it's environment.
*
* Note on page sizes: For compact binaries, a page size of 1 will leave no padding between sections, but other considerations:
* Larger page sizes are primarily useful for optimising specific cases and specifically for sharing (at page granularity)
* these pages between multiple program instances in a modern (multitasking) operating system. A page size of up to around
* 64kb might make sense in some cases but larger page sizes (e.g. 2MB) typically lead to far-oversized program files. In
* any case, a loader can still determine the same program contents regardless of the page size, so a smaller size (either 1
* or a value such as 4, 8, or 16 to ensure basic alignment of fields) is generally a better default than a larger one. An
* interpreter may expect a specific page size matching it's own environmental considerations (e.g. if it's too large a
* smaller machine might run out of memory, and if it's too small it might have to copy all the data for alignment).
*/
bool asmdata_produceheader(asmdata_t* asmdata, int32_t pagesize, const char* hint1, const char* hint2, int32_t inthint);
asmdata_section_t* asmdata_findsection(asmdata_t* asmdata, const char* sectionname, bool autocreate);
ASMLN_INLINE bool asmdata_hassection(asmdata_t* asmdata, const char* sectionname) {
return asmdata_findsection(asmdata, sectionname, false) != NULL;
}
ASMLN_INLINE asmdata_section_t* asmdata_selectsection(asmdata_t* asmdata, const char* sectionname) {
asmdata_section_t* result = asmdata_findsection(asmdata, sectionname, true);
if (result != NULL) {
asmdata->activesection = result;
}
return result;
}
ASMLN_INLINE asmdata_section_t* asmdata_activesection(asmdata_t* asmdata) {
if (asmdata->activesection == NULL) {
return asmdata_selectsection(asmdata, "data");
}
return asmdata->activesection;
}
ASMLN_INLINE bool asmdata_appendword(asmdata_t* asmdata, int64_t word, int8_t size) {
return asmdata_section_appendword(asmdata_activesection(asmdata), word, size);
}
ASMLN_INLINE bool asmdata_appendbytes(asmdata_t* asmdata, uint8_t* source, int32_t nbytes) {
return asmdata_section_appendbytes(asmdata_activesection(asmdata), source, nbytes);
}
bool asmdata_beginfile(asmdata_t* asmdata, const char* name);
bool asmdata_endfile(asmdata_t* asmdata, const char* name);
int32_t asmdata_findsymbol(asmdata_t* asmdata, const char* name, bool autocreate);
int32_t asmdata_symbolhere(asmdata_t* asmdata, const char* name);
int32_t asmdata_appendreferenceword(asmdata_t* asmdata, const char* name, int8_t size);
bool asmdata_isvalidasmln(asmdata_t* asmdata, asmln_t* asmln);
bool asmdata_asmln(asmdata_t* asmdata, asmln_t* asmln);
ASMLN_INLINE bool asmdata_isvalidln(asmdata_t* asmdata, const char* ln) {
asmln_t* asmln = asmln_new(ln);
if (asmln == NULL) {
return false;
}
bool result = asmdata_isvalidasmln(asmdata, asmln);
asmln_delete(asmln);
return result;
}
ASMLN_INLINE bool asmdata_ln(asmdata_t* asmdata, const char* ln) {
asmln_t* asmln = asmln_new(ln);
if (asmln == NULL) {
return false;
}
bool result = asmdata_isvalidasmln(asmdata, asmln);
asmln_delete(asmln);
return result;
}
/* From ifndef at top of file: */
#endif