311 lines
12 KiB
C
311 lines
12 KiB
C
|
#ifndef ASMDATA_H
|
||
|
#define ASMDATA_H
|
||
|
|
||
|
#include "asmln.h"
|
||
|
|
||
|
#define ASMDATA_MAXSECTIONS 200
|
||
|
#define ASMDATA_MAXFILLED (1024*1024*1024)
|
||
|
#define ASMDATA_MAXSYMBOLS 400000
|
||
|
#define ASMDATA_MAXREFERENCES 400000
|
||
|
|
||
|
typedef struct asmdata asmdata_t;
|
||
|
typedef struct asmdata_section asmdata_section_t;
|
||
|
typedef struct asmdata_symbol asmdata_symbol_t;
|
||
|
typedef struct asmdata_reference asmdata_reference_t;
|
||
|
typedef struct asmdata_mapentry asmdata_mapentry_t;
|
||
|
typedef struct asmdata_map asmdata_map_t;
|
||
|
|
||
|
struct asmdata_reference {
|
||
|
int32_t symbolindex;
|
||
|
int32_t sectionindex;
|
||
|
int64_t sectionoffset;
|
||
|
int8_t size; // 0 for 8bit, 1 for 16bit, 2 for 32bit, 3 for 64bit
|
||
|
int8_t baseflags;
|
||
|
int16_t extflags;
|
||
|
int32_t extdata;
|
||
|
};
|
||
|
|
||
|
#define ASMDATA_SYMBOL_DUMMY (1<<8)
|
||
|
#define ASMDATA_SYMBOL_EXPR (1<<9)
|
||
|
#define ASMDATA_SYMBOL_CONST (1<<10)
|
||
|
#define ASMDATA_SYMBOL_OP (1<<11)
|
||
|
|
||
|
struct asmdata_symbol {
|
||
|
const char* namecopy;
|
||
|
int64_t sectionoffset; // 0 if not defined, also reused for constant values in expressions (but with -1 for section)
|
||
|
int32_t flags; // 0 if not defined
|
||
|
int32_t sectionindex; // -1 if not defined
|
||
|
int32_t firstreferenceindex; // -1 if not defined
|
||
|
int32_t x_lhs;
|
||
|
int32_t x_op;
|
||
|
int32_t x_rhs;
|
||
|
int32_t reserved;
|
||
|
};
|
||
|
|
||
|
struct asmdata_section {
|
||
|
int32_t sectionnumber;
|
||
|
bool bigendian;
|
||
|
const char* namecopy;
|
||
|
uint8_t* buffer;
|
||
|
int32_t buffersize;
|
||
|
int32_t bufferfilled;
|
||
|
int64_t reservedsize;
|
||
|
int64_t virtualoffset;
|
||
|
};
|
||
|
|
||
|
struct asmdata {
|
||
|
bool finalised;
|
||
|
bool extsyntax;
|
||
|
int32_t nsections;
|
||
|
int32_t nsymbols;
|
||
|
int32_t nreferences;
|
||
|
int32_t pad;
|
||
|
asmdata_section_t* activesection;
|
||
|
asmdata_section_t** sections;
|
||
|
asmdata_symbol_t* symbols;
|
||
|
asmdata_reference_t* references;
|
||
|
/*
|
||
|
asmdata_section_t* sections[ASMDATA_MAXSECTIONS];
|
||
|
asmdata_symbol_t symbols[ASMDATA_MAXSYMBOLS];
|
||
|
asmdata_reference_t references[ASMDATA_MAXREFERENCES];
|
||
|
*/
|
||
|
};
|
||
|
|
||
|
typedef void (*asmdata_iterf_t)(asmdata_map_t*, char*, void*);
|
||
|
|
||
|
struct asmdata_mapentry {
|
||
|
int hash;
|
||
|
char* key;
|
||
|
void* value;
|
||
|
asmdata_mapentry_t* next;
|
||
|
};
|
||
|
|
||
|
struct asmdata_map {
|
||
|
asmdata_mapentry_t** table;
|
||
|
int ntable; // Number of slots allocated in the table
|
||
|
asmdata_iterf_t deletef;
|
||
|
};
|
||
|
|
||
|
asmdata_map_t* asmdata_map_new(int n, asmdata_iterf_t deletef);
|
||
|
void asmdata_map_set(asmdata_map_t* map, char* key, void* value);
|
||
|
void* asmdata_map_get(asmdata_map_t* map, char* key);
|
||
|
void asmdata_map_delete(asmdata_map_t* map);
|
||
|
|
||
|
asmdata_section_t* asmdata_section_new(const char* name);
|
||
|
bool asmdata_section_reserveextra(asmdata_section_t* section, int64_t nbytes, bool willfill);
|
||
|
void* asmdata_section_delete(asmdata_section_t* section);
|
||
|
|
||
|
ASMLN_INLINE bool asmdata_section_align(asmdata_section_t* section, int64_t alignment) {
|
||
|
while (section->reservedsize % alignment != 0) {
|
||
|
if (!asmdata_section_reserveextra(section, 1, false)) {
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
ASMLN_INLINE uint8_t* asmdata_section_fill(asmdata_section_t* section, int64_t nbytes) {
|
||
|
int64_t offset = section->reservedsize;
|
||
|
|
||
|
if (((int64_t)((int32_t)offset)) != offset || ((int64_t)((int32_t)nbytes)) != nbytes) {
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
if (!asmdata_section_reserveextra(section, nbytes, true)) {
|
||
|
return NULL;
|
||
|
}
|
||
|
|
||
|
return &(section->buffer[(int32_t)offset]);
|
||
|
}
|
||
|
|
||
|
#define ASMDATA_SIZE_8BIT ((int8_t)0)
|
||
|
#define ASMDATA_SIZE_16BIT ((int8_t)1)
|
||
|
#define ASMDATA_SIZE_32BIT ((int8_t)2)
|
||
|
#define ASMDATA_SIZE_64BIT ((int8_t)3)
|
||
|
ASMLN_INLINE bool asmdata_section_appendword(asmdata_section_t* section, int64_t word, int8_t size) {
|
||
|
uint8_t* target = asmdata_section_fill(section, 1LL << size);
|
||
|
|
||
|
if (target == NULL) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
switch (size) {
|
||
|
case ASMDATA_SIZE_8BIT:
|
||
|
target[0] = (uint8_t)word;
|
||
|
return true;
|
||
|
case ASMDATA_SIZE_16BIT:
|
||
|
if (section->bigendian) {
|
||
|
target[0] = (uint8_t)(word >> 8);
|
||
|
target[1] = (uint8_t)(word);
|
||
|
}
|
||
|
else {
|
||
|
target[0] = (uint8_t)(word);
|
||
|
target[1] = (uint8_t)(word >> 8);
|
||
|
}
|
||
|
return true;
|
||
|
case ASMDATA_SIZE_32BIT:
|
||
|
if (section->bigendian) {
|
||
|
target[0] = (uint8_t)(word >> 24);
|
||
|
target[1] = (uint8_t)(word >> 16);
|
||
|
target[2] = (uint8_t)(word >> 8);
|
||
|
target[3] = (uint8_t)(word);
|
||
|
}
|
||
|
else {
|
||
|
target[0] = (uint8_t)(word);
|
||
|
target[1] = (uint8_t)(word >> 8);
|
||
|
target[2] = (uint8_t)(word >> 16);
|
||
|
target[3] = (uint8_t)(word >> 24);
|
||
|
}
|
||
|
return true;
|
||
|
case ASMDATA_SIZE_64BIT:
|
||
|
if (section->bigendian) {
|
||
|
target[0] = (uint8_t)(word >> 56);
|
||
|
target[1] = (uint8_t)(word >> 48);
|
||
|
target[2] = (uint8_t)(word >> 40);
|
||
|
target[3] = (uint8_t)(word >> 32);
|
||
|
target[4] = (uint8_t)(word >> 24);
|
||
|
target[5] = (uint8_t)(word >> 16);
|
||
|
target[6] = (uint8_t)(word >> 8);
|
||
|
target[7] = (uint8_t)(word);
|
||
|
}
|
||
|
else {
|
||
|
target[0] = (uint8_t)(word);
|
||
|
target[1] = (uint8_t)(word >> 8);
|
||
|
target[2] = (uint8_t)(word >> 16);
|
||
|
target[3] = (uint8_t)(word >> 24);
|
||
|
target[4] = (uint8_t)(word >> 32);
|
||
|
target[5] = (uint8_t)(word >> 40);
|
||
|
target[6] = (uint8_t)(word >> 48);
|
||
|
target[7] = (uint8_t)(word >> 58);
|
||
|
}
|
||
|
return true;
|
||
|
default:
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
ASMLN_INLINE bool asmdata_section_appendbytes(asmdata_section_t* section, uint8_t* source, int32_t nbytes) {
|
||
|
uint8_t* target = asmdata_section_fill(section, nbytes);
|
||
|
|
||
|
if (target == NULL) {
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
int32_t i;
|
||
|
for (i = 0; i < nbytes; i++) {
|
||
|
target[i] = source[i];
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
asmdata_t* asmdata_new();
|
||
|
void* asmdata_delete(asmdata_t* asmdata);
|
||
|
|
||
|
/* This function should be called (exactly) once before extracting data, assuming you want linkage information retained.
|
||
|
* It will assemble the symbol and references list (and possibly any additional metadata) into their own special sections.
|
||
|
* You generally shouldn't assemble anything else after finalising the asmdata structure (the API will still let you though,
|
||
|
* in case you want to add e.g. a special checksum or signature section based on the finalised contents of the other sections,
|
||
|
* but defining or using any symbols after this point is an error).
|
||
|
*/
|
||
|
bool asmdata_finalise(asmdata_t* asmdata);
|
||
|
|
||
|
/* Produces a simple file header. This is added as the last section (typically, but not necessarily, after finalisation). A
|
||
|
* full binary file can then be produced by writing - firstly - the header section, and then each section (including the header
|
||
|
* again) and padding to the given page boundary after every section (including the first and last copies of the header section).
|
||
|
* Two "hint" strings (references into the strings section) can be provided as a simple means to classify file types within
|
||
|
* higher-level environments (e.g. a system might use a conventions like hint1="program" hint2="generic-dynamic-64bit", and might
|
||
|
* use different hints like hint1="library" vs hint1="program" to distinguish components, but no convention is mandated specifically).
|
||
|
* The hint strings are only intended to confirm type information and may be empty or ignored. An additional "inthint" field is
|
||
|
* also added in case high level systems need to be able to quickly identify things like architecture flags without loading strings.
|
||
|
* No additional metadata (e.g. filename, architecture, build time) is added in the file header. This is by design (it can easily
|
||
|
* be added in another section if necessary). A simple checksum of each section is added with it's record in the header, while
|
||
|
* the checksum of the file header itself is calculated with it's own checksum field set to zero (before it has been set!). The
|
||
|
* second copy of the file header is added primarily for integrity (i.e. if a section may be corrupted, how do we know the header
|
||
|
* or checksum itself is not corrupted? With a second copy of course), however it's secondary purpose is to verify the end of file
|
||
|
* for a loader. The file header will also contain a version number (currently 1), which should be taken as the version of the
|
||
|
* file header/format only (not necessarily related to the version of the assembler/compiler/architecture or even necessarily of
|
||
|
* the symbols/references sections which may not even exist in the output, but the strings section must be compatible for the
|
||
|
* hints to be used).
|
||
|
*
|
||
|
* Note on duplicating header: Having the header in it's own section may also be convenient for special cases e.g. having to
|
||
|
* inspect the header itself in "readable" assembly output or, in the future, including alternative headers for different
|
||
|
* environments or files with multiple sub-files/file-headers for the purposes of optimisation. In these cases, a definitive
|
||
|
* file header is still given at the start of the file, but an interpreter may use that header's section list to find the most
|
||
|
* suitable alternative file header for it's environment.
|
||
|
*
|
||
|
* Note on page sizes: For compact binaries, a page size of 1 will leave no padding between sections, but other considerations:
|
||
|
* Larger page sizes are primarily useful for optimising specific cases and specifically for sharing (at page granularity)
|
||
|
* these pages between multiple program instances in a modern (multitasking) operating system. A page size of up to around
|
||
|
* 64kb might make sense in some cases but larger page sizes (e.g. 2MB) typically lead to far-oversized program files. In
|
||
|
* any case, a loader can still determine the same program contents regardless of the page size, so a smaller size (either 1
|
||
|
* or a value such as 4, 8, or 16 to ensure basic alignment of fields) is generally a better default than a larger one. An
|
||
|
* interpreter may expect a specific page size matching it's own environmental considerations (e.g. if it's too large a
|
||
|
* smaller machine might run out of memory, and if it's too small it might have to copy all the data for alignment).
|
||
|
*/
|
||
|
bool asmdata_produceheader(asmdata_t* asmdata, int32_t pagesize, const char* hint1, const char* hint2, int32_t inthint);
|
||
|
|
||
|
asmdata_section_t* asmdata_findsection(asmdata_t* asmdata, const char* sectionname, bool autocreate);
|
||
|
|
||
|
ASMLN_INLINE bool asmdata_hassection(asmdata_t* asmdata, const char* sectionname) {
|
||
|
return asmdata_findsection(asmdata, sectionname, false) != NULL;
|
||
|
}
|
||
|
|
||
|
ASMLN_INLINE asmdata_section_t* asmdata_selectsection(asmdata_t* asmdata, const char* sectionname) {
|
||
|
asmdata_section_t* result = asmdata_findsection(asmdata, sectionname, true);
|
||
|
|
||
|
if (result != NULL) {
|
||
|
asmdata->activesection = result;
|
||
|
}
|
||
|
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
ASMLN_INLINE asmdata_section_t* asmdata_activesection(asmdata_t* asmdata) {
|
||
|
if (asmdata->activesection == NULL) {
|
||
|
return asmdata_selectsection(asmdata, "data");
|
||
|
}
|
||
|
return asmdata->activesection;
|
||
|
}
|
||
|
|
||
|
ASMLN_INLINE bool asmdata_appendword(asmdata_t* asmdata, int64_t word, int8_t size) {
|
||
|
return asmdata_section_appendword(asmdata_activesection(asmdata), word, size);
|
||
|
}
|
||
|
|
||
|
ASMLN_INLINE bool asmdata_appendbytes(asmdata_t* asmdata, uint8_t* source, int32_t nbytes) {
|
||
|
return asmdata_section_appendbytes(asmdata_activesection(asmdata), source, nbytes);
|
||
|
}
|
||
|
|
||
|
bool asmdata_beginfile(asmdata_t* asmdata, const char* name);
|
||
|
bool asmdata_endfile(asmdata_t* asmdata, const char* name);
|
||
|
|
||
|
int32_t asmdata_findsymbol(asmdata_t* asmdata, const char* name, bool autocreate);
|
||
|
int32_t asmdata_symbolhere(asmdata_t* asmdata, const char* name);
|
||
|
int32_t asmdata_appendreferenceword(asmdata_t* asmdata, const char* name, int8_t size);
|
||
|
|
||
|
bool asmdata_isvalidasmln(asmdata_t* asmdata, asmln_t* asmln);
|
||
|
bool asmdata_asmln(asmdata_t* asmdata, asmln_t* asmln);
|
||
|
|
||
|
ASMLN_INLINE bool asmdata_isvalidln(asmdata_t* asmdata, const char* ln) {
|
||
|
asmln_t* asmln = asmln_new(ln);
|
||
|
if (asmln == NULL) {
|
||
|
return false;
|
||
|
}
|
||
|
bool result = asmdata_isvalidasmln(asmdata, asmln);
|
||
|
asmln_delete(asmln);
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
ASMLN_INLINE bool asmdata_ln(asmdata_t* asmdata, const char* ln) {
|
||
|
asmln_t* asmln = asmln_new(ln);
|
||
|
if (asmln == NULL) {
|
||
|
return false;
|
||
|
}
|
||
|
bool result = asmdata_isvalidasmln(asmdata, asmln);
|
||
|
asmln_delete(asmln);
|
||
|
return result;
|
||
|
}
|
||
|
|
||
|
/* From ifndef at top of file: */
|
||
|
#endif
|