From a79c7ffaf2177e8d72197801c6ff1db8452f902a Mon Sep 17 00:00:00 2001 From: Zak Yani Star Fenton Date: Thu, 5 Jun 2025 02:50:01 +1000 Subject: [PATCH] Initial commit of the new version of my assembler, complete with macro definitions for RV64 instructions --- asmdata.c | 897 +++++++++++++++++++++++++++++++++++++++++++++++++++++ asmdata.h | 310 ++++++++++++++++++ asmln.c | 388 +++++++++++++++++++++++ asmln.h | 356 +++++++++++++++++++++ asmpp.c | 421 +++++++++++++++++++++++++ asmpp.h | 76 +++++ assemble.c | 478 ++++++++++++++++++++++++++++ rv64.inc | 276 +++++++++++++++++ 8 files changed, 3202 insertions(+) create mode 100644 asmdata.c create mode 100644 asmdata.h create mode 100644 asmln.c create mode 100644 asmln.h create mode 100644 asmpp.c create mode 100644 asmpp.h create mode 100644 assemble.c create mode 100644 rv64.inc diff --git a/asmdata.c b/asmdata.c new file mode 100644 index 0000000..8c715e3 --- /dev/null +++ b/asmdata.c @@ -0,0 +1,897 @@ +#include "asmdata.h" +#include +//#include //Just for debuging. TODO: Remove. + +static int32_t asmdata_strlen_(const char* str) { + if (str == NULL) { + return 0; + } + + int32_t l = 0; + + while (str[l] != 0) { + l++; + } + + return l; +} + +static const char* asmdata_strndup_(const char* str, int maxlen) { + char* result = calloc(maxlen + 1, 1); + + if (result != NULL && str != NULL) { + int32_t i; + for (i = 0; i < maxlen; i++) { + if (str[i] == 0) { + return (const char*)result; + } + result[i] = str[i]; + } + } + + return (const char*)result; +} + +char* asmdata_strdup_(const char* str) { + /*if (str == NULL) { + return NULL; + }*/ + return asmdata_strndup_(str, asmdata_strlen_(str)); +} + +bool asmdata_streq_(const char* a, const char* b) { + if (a == b) { + return true; + } + + int32_t la = asmdata_strlen_(a); + int32_t lb = asmdata_strlen_(b); + + if (la != lb) { + return false; + } + + int32_t i; + for (i = 0; i < la; i++) { + if (a[i] != b[i]) { + return false; + } + } + + return true; +} + +int asmdata_atoll_val_(char a, int r) { + switch (a) { + case '0': + return 0; + case '1': + return 1; + case '2': + return (r >= 2) ? 2 : -1; + case '3': + return (r >= 3) ? 3 : -1; + case '4': + return (r >= 4) ? 4 : -1; + case '5': + return (r >= 5) ? 5 : -1; + case '6': + return (r >= 6) ? 6 : -1; + case '7': + return (r >= 7) ? 7 : -1; + case '8': + return (r >= 8) ? 8 : -1; + case '9': + return (r >= 9) ? 9 : -1; + case 'a': + case 'A': + return (r >= 0xA) ? 0xA : -1; + case 'b': + case 'B': + return (r >= 0xB) ? 0xB : -1; + case 'c': + case 'C': + return (r >= 0xC) ? 0xC : -1; + case 'd': + case 'D': + return (r >= 0xD) ? 0xD : -1; + case 'e': + case 'E': + return (r >= 0xE) ? 0xE : -1; + case 'f': + case 'F': + return (r >= 0xF) ? 0xF : -1; + default: + return -1; + } +} + +long long asmdata_atoll_r_(const char* a, int r) { + long long result = 0; + int v = -1; + while (*a != 0 && (v = asmdata_atoll_val_(*a, r)) >= 0) { + result *= r; + result += v; + a++; + } + + return result; +} + +long long atoll(const char*a); + +long long asmdata_atoll_(const char* a) { + if (a == NULL) { + return 0; + } + if (a[0] == '0' && a[1] == 'x') { + return asmdata_atoll_r_(a+2, 16); + } else if (a[0] == '0' && a[1] == 'b') { + return asmdata_atoll_r_(a+2, 2); + } else { + return atoll(a); + } +} + +asmdata_map_t* asmdata_map_new(int n, asmdata_iterf_t deletef) { + asmdata_map_t* map = malloc(sizeof(asmdata_map_t)); + if (map == NULL) { + return NULL; + } + + map->table = calloc(n, sizeof(void*)); + if (map->table == NULL) { + free(map); + return NULL; + } + map->ntable = n; + map->deletef = deletef; + //TODO map->udata = 0; + + return map; +} + +int asmdata_map_hash(char* key) { + int i = 0; + int h = 456123; + while (key[i] != 0) { + h = (h * 33) + key[i]; + i++; + } + return h & 0x0FFFFFFF; +} + +void asmdata_map_set(asmdata_map_t* map, char* key, void* value) { + int hash = asmdata_map_hash(key); + int idx = hash % map->ntable; + asmdata_mapentry_t* e = map->table[idx]; + while (e != NULL) { + if (e->hash == hash && asmdata_streq_(key, e->key)) { + asmdata_iterf_t deletef = map->deletef; + if (deletef != NULL) { + deletef(map, e->key, e->value); + } + e->key = key; + e->value = value; + return; + } + e = e->next; + } + // If not present, create a new one. + e = malloc(sizeof(asmdata_mapentry_t)); + if (e == NULL) { + // TODO: Report fatal error. + } + e->hash = hash; + e->key = key; + e->value = value; + e->next = map->table[idx]; + map->table[idx] = e; +} + +void* asmdata_map_get(asmdata_map_t* map, char* key) { + int hash = asmdata_map_hash(key); + asmdata_mapentry_t* e = map->table[hash % map->ntable]; + while (e != NULL) { + if (e->hash == hash && asmdata_streq_(key, e->key)) { + return e->value; + } + e = e->next; + } + return NULL; +} + +void asmdata_map_delete(asmdata_map_t* map) { + asmdata_iterf_t deletef = map->deletef; + int i; + for (i = 0; i < map->ntable; i++) { + asmdata_mapentry_t* e = map->table[i]; + while (e != NULL) { + //if (deletef != NULL) { + // deletef(map, e->key, e->value); + //} + void* oldptr = e; + e = e->next; + free(e); + } + } + free(map); +} + +asmdata_section_t* asmdata_section_new(const char* name) { + asmdata_section_t* result = calloc(1, sizeof(asmdata_section_t)); + + if (result != NULL) { + result->namecopy = asmdata_strdup_(name); + } + + return result; +} +bool asmdata_section_reserveextra(asmdata_section_t* section, int64_t nbytes, bool willfill) { + int32_t granularity = 1024; // Resize the buffer about kilobyte or so at a time to prevent unnecessary realloc calls + /* Firstly, the reserved total needs to be incremented whether filled or not. */ + section->reservedsize += nbytes; + + /* If we're not filling it, we can just leave it on the reserved total (not necessarily any need to allocate/use extra memory!). */ + if (!willfill) { + return true; + } + + /* Make sure it fits in 32 bits (so we don't have to worry whether size_t is 32- or 64- bits) and is below the maximum. */ + if (((int64_t)((int32_t)(section->reservedsize))) != section->reservedsize || section->reservedsize > ASMDATA_MAXFILLED) { + return false; + } + + if (section->reservedsize > section->buffersize) { + section->buffersize = (int32_t)(section->reservedsize); // Already checked that it can fit in 32 bits if filled + while (section->buffersize % granularity != 0) { + section->buffersize++; + } + if (section->buffer == NULL) { + section->buffer = calloc(1, section->buffersize); + } + else { + section->buffer = realloc(section->buffer, section->buffersize); + } + if (section->buffer == NULL) { + return false; + } + } + + section->bufferfilled = (int32_t)(section->reservedsize); + return true; +} + +void* asmdata_section_delete(asmdata_section_t* section) { + if (section != NULL) { + if (section->namecopy != NULL) { + free(section->namecopy); + section->namecopy = NULL; + } + if (section->buffer != NULL) { + free(section->buffer); + section->buffer = NULL; + section->bigendian = false; + section->bufferfilled = 0; + section->buffersize = 0; + section->reservedsize = 0; + section->virtualoffset = 0; + } + + free(section); + } + return NULL; +} + +asmdata_t* asmdata_new() { + asmdata_t* result = calloc(1, sizeof(asmdata_t)); + + if (result != NULL) { + result->extsyntax = true; + result->sections = calloc(ASMDATA_MAXSECTIONS, sizeof(asmdata_section_t*)); + if (!result->sections) { + free(result); + return NULL; + } + result->symbols = calloc(ASMDATA_MAXSYMBOLS, sizeof(asmdata_symbol_t)); + if (!result->sections) { + free(result->sections); + free(result); + return NULL; + } + result->references = calloc(ASMDATA_MAXREFERENCES, sizeof(asmdata_reference_t)); + if (!result->sections) { + free(result->sections); + free(result->references); + free(result); + return NULL; + } + } + + return result; +} +void* asmdata_delete(asmdata_t* asmdata) { + if (asmdata != NULL) { + asmdata->activesection = NULL; + int32_t i; + for (i = 0; i < asmdata->nsections; i++) { + asmdata->sections[i] = asmdata_section_delete(asmdata->sections[i]); + } + asmdata->nsections = 0; + free(asmdata); + } + + return NULL; +} + +bool asmdata_finalise(asmdata_t* asmdata) { + /* Finalisation will fail if there's any hint that the code has already been finalised (even if it was done manually by defining symbol + * table sections in assembly code). + */ + if (asmdata->finalised || asmdata_hassection(asmdata, "asmdata.strings") || asmdata_hassection(asmdata, "asmdata.symbols") || asmdata_hassection(asmdata, "asmdata.references")) { + return false; + } + + /* The three linkage sections are created in "optimised" order: You generally need to have read the symbols section to make sense of the + * references section, so symbols are written before references. Strings (and maybe later extra debug info) are written last because they + * need not actually be read in many cases. + */ + if (asmdata_selectsection(asmdata, "asmdata.symbols") == NULL) { + return false; + } + if (asmdata_selectsection(asmdata, "asmdata.references") == NULL) { + return false; + } + if (asmdata_selectsection(asmdata, "asmdata.strings") == NULL) { + return false; + } + + /* The first string will be at offset 0 (which might also imply NULL), so we can start by adding a NULL string there. An interpreter + * should generally handle NULL strings differently, but in case any are automatically read the result will be a marker: */ + asmdata_selectsection(asmdata, "asmdata.strings"); + const char* nullstr = ""; + asmdata_appendbytes(asmdata, (uint8_t*)nullstr, asmdata_strlen_(nullstr) + 1); // Be sure to include the terminating zero byte! + + int32_t i; + for (i = 0; i < asmdata->nsymbols; i++) { + asmdata_section_t* str = asmdata_selectsection(asmdata, "asmdata.strings"); + int64_t stroffset = str->reservedsize; + int32_t strlen = asmdata_strlen_(asmdata->symbols[i].namecopy); + asmdata_appendbytes(asmdata, (uint8_t*)(asmdata->symbols[i].namecopy), strlen + 1); // Make sure we include terminating zero. + asmdata_section_t* sym = asmdata_selectsection(asmdata, "asmdata.symbols"); + asmdata_appendword(asmdata, asmdata->symbols[i].flags, ASMDATA_SIZE_32BIT); + asmdata_appendword(asmdata, asmdata->symbols[i].sectionindex, ASMDATA_SIZE_32BIT); + asmdata_appendword(asmdata, asmdata->symbols[i].sectionoffset, ASMDATA_SIZE_64BIT); + asmdata_appendword(asmdata, stroffset, ASMDATA_SIZE_64BIT); + asmdata_appendword(asmdata, asmdata->symbols[i].x_lhs, ASMDATA_SIZE_32BIT); + asmdata_appendword(asmdata, asmdata->symbols[i].x_op, ASMDATA_SIZE_32BIT); + asmdata_appendword(asmdata, asmdata->symbols[i].x_rhs, ASMDATA_SIZE_32BIT); + asmdata_appendword(asmdata, asmdata->symbols[i].reserved, ASMDATA_SIZE_32BIT); + } + asmdata_section_t* refs = asmdata_selectsection(asmdata, "asmdata.references"); + for (i = 0; i < asmdata->nreferences; i++) { + asmdata_appendword(asmdata, asmdata->references[i].baseflags, ASMDATA_SIZE_8BIT); + asmdata_appendword(asmdata, asmdata->references[i].size, ASMDATA_SIZE_8BIT); + asmdata_appendword(asmdata, asmdata->references[i].extflags, ASMDATA_SIZE_16BIT); + asmdata_appendword(asmdata, asmdata->references[i].sectionindex, ASMDATA_SIZE_32BIT); + asmdata_appendword(asmdata, asmdata->references[i].sectionoffset, ASMDATA_SIZE_64BIT); + asmdata_appendword(asmdata, asmdata->references[i].symbolindex, ASMDATA_SIZE_32BIT); + asmdata_appendword(asmdata, asmdata->references[i].extdata, ASMDATA_SIZE_32BIT); + } + + asmdata->finalised = true; + + return true; +} + +bool asmdata_produceheader(asmdata_t* asmdata, int32_t pagesize, const char* hint1, const char* hint2, int32_t inthint) { + /* Exclude any unreasonably small/large page sizes, but allow weird/unaligned ones just in case. */ + if (pagesize < 1 || pagesize >(1024 * 1024 * 1024)) { + return false; + } + /* The header can only be produced once, otherwise it would get too confusing... */ + if (asmdata_hassection(asmdata, "asmdata.fileheader")) { + return false; + } + /* If the strings section hasn't already been initialised, we should do the same initialisation that would happen in + * asmdata_finalise(), ensuring that the strings section is created before the file header. + */ + if (!asmdata_hassection(asmdata, "asmdata.strings")) { + /* The first string will be at offset 0 (which might also imply NULL), so we can start by adding a NULL string there. An interpreter + * should generally handle NULL strings differently, but in case any are automatically read the result will be a marker: */ + if (asmdata_selectsection(asmdata, "asmdata.strings") == NULL) { + return false; + } + const char* nullstr = ""; + asmdata_appendbytes(asmdata, (uint8_t*)nullstr, asmdata_strlen_(nullstr) + 1); // Be sure to include the terminating zero byte! + } + int32_t nstrsection = asmdata_selectsection(asmdata, "asmdata.strings")->sectionnumber; + + /* Now we can start with the file header itself: */ + if (asmdata_selectsection(asmdata, "asmdata.fileheader") == NULL) { + return false; + } + int32_t nhdrsection = asmdata_selectsection(asmdata, "asmdata.fileheader")->sectionnumber; + + asmdata_appendbytes(asmdata, (uint8_t*)"ASMDATA1", 8); + + asmdata_appendword(asmdata, 1, ASMDATA_SIZE_32BIT); // Version number + asmdata_appendword(asmdata, pagesize, ASMDATA_SIZE_32BIT); + + if (hint1 == NULL) { + asmdata_appendword(asmdata, 0, ASMDATA_SIZE_64BIT); + } + else { + int64_t stroffset = asmdata_selectsection(asmdata, "asmdata.strings")->reservedsize; + asmdata_appendbytes(asmdata, (uint8_t*)hint1, asmdata_strlen_(hint1) + 1); // Include terminating zero byte + asmdata_selectsection(asmdata, "asmdata.fileheader"); + asmdata_appendword(asmdata, stroffset, ASMDATA_SIZE_64BIT); + } + if (hint2 == NULL) { + asmdata_appendword(asmdata, 0, ASMDATA_SIZE_64BIT); + } + else { + int64_t stroffset = asmdata_selectsection(asmdata, "asmdata.strings")->reservedsize; + asmdata_appendbytes(asmdata, (uint8_t*)hint2, asmdata_strlen_(hint2) + 1); // Include terminating zero byte + asmdata_selectsection(asmdata, "asmdata.fileheader"); + asmdata_appendword(asmdata, stroffset, ASMDATA_SIZE_64BIT); + } + asmdata_appendword(asmdata, inthint, ASMDATA_SIZE_32BIT); + + asmdata_appendword(asmdata, nstrsection, ASMDATA_SIZE_32BIT); + asmdata_appendword(asmdata, nhdrsection, ASMDATA_SIZE_32BIT); + + asmdata_appendword(asmdata, asmdata->nsections, ASMDATA_SIZE_32BIT); + + /* We need to add the section name strings before generating the section list, otherwise the newly-added strings + * won't all get added to the string section before it's totals are recorded in the header. + */ + int64_t nameoffsets[ASMDATA_MAXSECTIONS]; + int32_t i; + for (i = 0; i < asmdata->nsections; i++) { + nameoffsets[i] = asmdata_selectsection(asmdata, "asmdata.strings")->reservedsize; + asmdata_appendbytes(asmdata, (uint8_t*)(asmdata->sections[i]->namecopy), asmdata_strlen_(asmdata->sections[i]->namecopy) + 1); // Include terminating zero byte + } + + /* TODO: Hash values. */ + int64_t hdrsize = asmdata_selectsection(asmdata, "asmdata.fileheader")->reservedsize; + hdrsize += asmdata->nsections * 8 * 8; + hdrsize += 16; // Final file size and checksum fields + // NOTE: The header size is double-checked against the produced header at the end of this function + + int64_t sectionoffset = hdrsize; + while (sectionoffset % pagesize != 0) { + sectionoffset++; + } + for (i = 0; i < asmdata->nsections; i++) { + asmdata_section_t* s = asmdata->sections[i]; + asmdata_appendword(asmdata, s->bigendian ? 1 : 0, ASMDATA_SIZE_32BIT); // Encoding flags (in future may also specify compression etc.) + asmdata_appendword(asmdata, 0, ASMDATA_SIZE_32BIT); // Content flags (in future may specify mapped/unmapped/executable/writable/etc.) + asmdata_appendword(asmdata, sectionoffset, ASMDATA_SIZE_64BIT); // Offset in file + asmdata_appendword(asmdata, s->virtualoffset, ASMDATA_SIZE_64BIT); + if (s == asmdata->activesection) { /* The size field needs to be hard-coded for the header since we're still creating it! */ + asmdata_appendword(asmdata, hdrsize, ASMDATA_SIZE_64BIT); // Size in file (not including page boundary padding) + asmdata_appendword(asmdata, hdrsize, ASMDATA_SIZE_64BIT); // Size in memory (for header section, is the same as + asmdata_appendword(asmdata, 0, ASMDATA_SIZE_64BIT); // Reserved + asmdata_appendword(asmdata, nameoffsets[i], ASMDATA_SIZE_64BIT); + asmdata_appendword(asmdata, 0, ASMDATA_SIZE_64BIT); // Hash (TODO) + sectionoffset += hdrsize; + } + else { + asmdata_appendword(asmdata, s->bufferfilled, ASMDATA_SIZE_64BIT); // Size in file (not including page boundary padding) + asmdata_appendword(asmdata, s->reservedsize, ASMDATA_SIZE_64BIT); // Size in memory (padded with zero by the loader) + asmdata_appendword(asmdata, 0, ASMDATA_SIZE_64BIT); // Reserved + asmdata_appendword(asmdata, nameoffsets[i], ASMDATA_SIZE_64BIT); + asmdata_appendword(asmdata, 0, ASMDATA_SIZE_64BIT); // Hash (TODO) + sectionoffset += s->bufferfilled; + } + while (sectionoffset % pagesize != 0) { + sectionoffset++; + } + } + asmdata_appendword(asmdata, sectionoffset, ASMDATA_SIZE_64BIT); // Total file size + asmdata_appendword(asmdata, 0, ASMDATA_SIZE_64BIT); // Checksum (TODO) + + /* Make sure the written header size exactly matches the size we calculated before writing. */ + if (asmdata->activesection->bufferfilled != hdrsize) { + return false; + } + + return true; +} + +asmdata_section_t* asmdata_findsection(asmdata_t* asmdata, const char* sectionname, bool autocreate) { + int32_t i; + for (i = 0; i < asmdata->nsections; i++) { + if (asmdata_streq_(asmdata->sections[i]->namecopy, sectionname)) { + return asmdata->sections[i]; + } + } + if (autocreate && asmdata->nsections < ASMDATA_MAXSECTIONS) { + asmdata_section_t* result = asmdata_section_new(sectionname); + if (result == NULL) { + return NULL; + } + asmdata->sections[asmdata->nsections] = result; + result->sectionnumber = asmdata->nsections; + asmdata->nsections++; + return result; + } + return NULL; +} + + + +bool asmdata_beginfile(asmdata_t* asmdata, const char* name) { + return true; // TODO init? +} + +bool asmdata_endfile(asmdata_t* asmdata, const char* name) { + return true; // TODO cleanup/checks? +} + +static int32_t asmdata_dummysymbol(asmdata_t* asmdata, const char* dummyname) { + int32_t i; + + if (asmdata->nsymbols >= ASMDATA_MAXSYMBOLS) { + return -1; + } + i = asmdata->nsymbols; + asmdata->symbols[i].flags |= ASMDATA_SYMBOL_DUMMY; + asmdata->symbols[i].namecopy = asmdata_strdup_(dummyname); + asmdata->symbols[i].sectionindex = -1; + asmdata->nsymbols++; + return i; +} + +int32_t asmdata_findsymbol(asmdata_t* asmdata, const char* name, bool autocreate) { + int32_t i; + for (i = 0; i < asmdata->nsymbols; i++) { + if (asmdata_streq_(asmdata->symbols[i].namecopy, name)) { + return i; + } + } + if (!autocreate || asmdata->nsymbols >= ASMDATA_MAXSYMBOLS) { + return -1; + } + i = asmdata->nsymbols; + asmdata->symbols[i].namecopy = asmdata_strdup_(name); + asmdata->symbols[i].sectionindex = -1; + asmdata->nsymbols++; + return i; +} + +int32_t asmdata_symbolhere(asmdata_t* asmdata, const char* name) { + int32_t idx = asmdata_findsymbol(asmdata, name, true); + if (idx < 0) { + return idx; + } + asmdata_symbol_t* symbol = &(asmdata->symbols[idx]); + if (symbol->sectionindex != -1) { + return -1; // It's already defined? + } + symbol->sectionindex = asmdata_activesection(asmdata)->sectionnumber; + symbol->sectionoffset = asmdata_activesection(asmdata)->reservedsize; + // TODO: Clear flags etc.? + return idx; +} + +int32_t asmdata_appendreferenceword(asmdata_t* asmdata, const char* name, int8_t size) { + int64_t startoffset = asmdata_activesection(asmdata)->reservedsize; + if (!asmdata_appendword(asmdata, 0, size)) { + return -1; + } + if (asmdata->nreferences >= ASMDATA_MAXREFERENCES) { + return -1; + } + int32_t idx = asmdata->nreferences; + asmdata_reference_t* reference = &(asmdata->references[idx]); + reference->symbolindex = asmdata_findsymbol(asmdata, name, true); + if (reference->symbolindex < 0) { + return -1; + } + reference->sectionindex = asmdata_activesection(asmdata)->sectionnumber; + //printf("Note: Reference '%s' at offset %d\n", name, startoffset); + reference->sectionoffset = startoffset; + reference->size = size; + // TODO: Clear flags etc.? + asmdata->nreferences++; + return idx; +} + +static int32_t asmdata_appendsubref_(asmdata_t* asmdata, int32_t symbol, int8_t size) { + int64_t startoffset = asmdata_activesection(asmdata)->reservedsize; + if (!asmdata_appendword(asmdata, 0, size)) { + return -1; + } + if (asmdata->nreferences >= ASMDATA_MAXREFERENCES) { + return -1; + } + int32_t idx = asmdata->nreferences; + asmdata_reference_t* reference = &(asmdata->references[idx]); + reference->symbolindex = symbol; + if (reference->symbolindex < 0) { + return -1; + } + reference->sectionindex = asmdata_activesection(asmdata)->sectionnumber; + //printf("Note: Reference '%s' at offset %d\n", name, startoffset); + reference->sectionoffset = startoffset; + reference->size = size; + // TODO: Clear flags etc.? + asmdata->nreferences++; + return idx; +} + +int32_t asmdata_subx_(asmdata_t* asmdata, int32_t tt, const char* tokenstr, asmlnx_t* expr); +int32_t asmdata_subx_(asmdata_t* asmdata, int32_t tt, const char* tokenstr, asmlnx_t* expr) { + if (tt == ASMT_TOKENTYPE_NAME) { // Shortcut if this part of the expression is just a simple symbol name + return asmdata_findsymbol(asmdata, tokenstr, true); + } + + int32_t result = asmdata_dummysymbol(asmdata, tokenstr); + + if (result >= 0) { + asmdata_symbol_t* sym = &asmdata->symbols[result]; + switch (tt) { + case ASMT_TOKENTYPE_NUMBER: + sym->flags |= ASMDATA_SYMBOL_CONST; + sym->sectionoffset = asmdata_atoll_(tokenstr); + break; + case ASMT_TOKENTYPE_OPENBR: + sym->flags |= ASMDATA_SYMBOL_EXPR; + sym->x_lhs = asmdata_subx_(asmdata, expr->lhstype, expr->lhscopy, expr->lhsx); + if (sym->x_lhs < 0) { + return -1; + } + sym->x_op = asmdata_dummysymbol(asmdata, expr->opcopy); + if (sym->x_op < 0) { + return -1; + } + asmdata->symbols[sym->x_op].flags |= ASMDATA_SYMBOL_OP; + sym->x_rhs = asmdata_subx_(asmdata, expr->rhstype, expr->rhscopy, expr->rhsx); + if (sym->x_rhs < 0) { + return -1; + } + //sym->flags |= ASMDATA_SYMBOL_CONST; + break; + default: + return -1; + } + } + + return result; +} + +static bool asmdata_appendvalue_(asmdata_t* asmdata, int32_t tokentype, const char* tokenstr, asmlnx_t* expr, int8_t primsize) { + if (tokentype == ASMT_TOKENTYPE_NUMBER) { + long long x = asmdata_atoll_(tokenstr); + //printf("Got number %lld\n", x); + return asmdata_appendword(asmdata, (int64_t)x, primsize); + } + else if (tokentype == ASMT_TOKENTYPE_STRING) { + int32_t l = asmdata_strlen_(tokenstr); + int32_t i; + for (i = 0; i < l; i++) { + if (!asmdata_appendword(asmdata, (int64_t)((int8_t)(tokenstr[i])), primsize)) { + return false; + } + } + return true; + } + else if (tokentype == ASMT_TOKENTYPE_NAME) { + int32_t refidx = asmdata_appendreferenceword(asmdata, tokenstr, primsize); + if (refidx < 0) { + return false; + } + else { + return true; + } + } + else if (tokentype == ASMT_TOKENTYPE_OPENBR) { + int32_t exprsym = asmdata_subx_(asmdata, tokentype, tokenstr, expr); + if (exprsym < 0) { + return false; + } + int32_t ref = asmdata_appendsubref_(asmdata, exprsym, primsize); + if (ref < 0) { + return false; + } + else { + return true; + } + } + else { + return false; + } +} + +bool asmdata_isvalidasmln(asmdata_t* asmdata, asmln_t* asmln) { + if (asmln == NULL || asmln->errorcopy != NULL) { + return false; // If there's a major error it's obviously not a valid data line + } + + if (asmln->instrcopy == NULL) { + return true; // If there's no "instruction" part then it's either a plain label or an empty/comment line, which is perfectly valid + } + + if (asmdata->extsyntax) { + if (asmdata_streq_(asmln->instrcopy, ".text") || asmdata_streq_(asmln->instrcopy, ".data")) { + return true; + } + if (asmdata_streq_(asmln->instrcopy, ".string") && asmln->nparams == 1) { + return true; + } + if (asmdata_streq_(asmln->instrcopy, ".long") || asmdata_streq_(asmln->instrcopy, ".long")) { + return true; + } + } + + // For normal lines (with an "instruction" part) then we recognise anything that is data or simple section/symbol/linkage instruction + return asmdata_streq_(asmln->instrcopy, "data8") || asmdata_streq_(asmln->instrcopy, "data16") + || asmdata_streq_(asmln->instrcopy, "data32") || asmdata_streq_(asmln->instrcopy, "data64") + || asmdata_streq_(asmln->instrcopy, "data.section") || asmdata_streq_(asmln->instrcopy, "data.symbol") + || asmdata_streq_(asmln->instrcopy, "align") || asmdata_streq_(asmln->instrcopy, "reserve"); +} + +bool asmdata_asmln(asmdata_t* asmdata, asmln_t* asmln) { + if (!asmdata_isvalidasmln(asmdata, asmln)) { + if (asmln != NULL && asmln->errorcopy == NULL) { + asmln->errorcopy = asmdata_strdup_("Not a valid data instruction"); + } + return false; + } + + if (asmln->labelcopy != NULL) { + int32_t idx = asmdata_symbolhere(asmdata, asmln->labelcopy); + //printf("SYMBOL '%s'\n", asmln->labelcopy); + if (idx < 0) { + asmln->errorcopy = asmdata_strdup_("Failed to create symbol (label already defined?)"); + return false; + } + } + + if (asmln->instrcopy == NULL && asmln->nparams == 0) { + // No instruction. We're done! (Parameters were also checked to be 0, just to be pedantic.) + return true; + } + + if (asmdata->extsyntax && (asmdata_streq_(asmln->instrcopy, ".text") || asmdata_streq_(asmln->instrcopy, ".data"))) { + asmdata_selectsection(asmdata, asmln->instrcopy+1); + return true; + } + else if (asmdata_streq_(asmln->instrcopy, "data.section")) { + if (asmln->nparams == 0) { + asmdata->activesection = NULL; // Reset the section + return true; + } + else if (asmln->nparams == 1) { + if (asmln->paramtype[0] != ASMT_TOKENTYPE_NAME && asmln->paramtype[0] != ASMT_TOKENTYPE_STRING) { + asmln->errorcopy = asmdata_strdup_("section instruction expects section identified by a name or string (but got a different parameter)"); + return false; + } + asmdata_selectsection(asmdata, asmln->paramcopy[0]); + return true; + } + else { + asmln->errorcopy = asmdata_strdup_("TODO: Additional section parameters"); + return false; + } + } + else if (asmdata_streq_(asmln->instrcopy, "data.symbol")) { + if (asmln->nparams != 2 || asmln->paramtype[0] != ASMT_TOKENTYPE_NAME || asmln->paramtype[1] != ASMT_TOKENTYPE_NUMBER) { + asmln->errorcopy = asmdata_strdup_("data.symbol special instruction requires a symbol name and flag integer"); + return false; + } + int32_t idx = asmdata_findsymbol(asmdata, asmln->paramcopy[0], true); + if (idx < 0) { + return idx; + } + asmdata_symbol_t* symbol = &(asmdata->symbols[idx]); + if (symbol->sectionindex != -1) { + return -1; // It's already defined? + } + long x = asmdata_atoll_(asmln->paramcopy[1]); + symbol->flags |= (int)x; + return true; + } + else if (asmdata_streq_(asmln->instrcopy, "align")) { + if (asmln->nparams != 1 || asmln->paramtype[0] != ASMT_TOKENTYPE_NUMBER) { + asmln->errorcopy = asmdata_strdup_("align special instruction requires a simple integer"); + return false; + } + long x = asmdata_atoll_(asmln->paramcopy[0]); + while ((asmdata_activesection(asmdata)->reservedsize % x) != 0) { + asmdata_activesection(asmdata)->reservedsize++; + } + return true; + } + else if (asmdata_streq_(asmln->instrcopy, "reserve")) { + if (asmln->nparams != 1 || asmln->paramtype[0] != ASMT_TOKENTYPE_NUMBER) { + asmln->errorcopy = asmdata_strdup_("reserve special instruction requires a simple integer"); + return false; + } + long x = asmdata_atoll_(asmln->paramcopy[0]); + while (x > 0) { + asmdata_activesection(asmdata)->reservedsize++; + x--; + } + return true; + } + else if (asmdata_streq_(asmln->instrcopy, "data8")) { + if (asmln->nparams < 1) { + asmln->errorcopy = asmdata_strdup_("data instructions generally expect at least one value, otherwise they are probably erroneous"); + return false; + } + int32_t i; + for (i = 0; i < asmln->nparams; i++) { + //printf("DOING ARG %d\n", i); + if (!asmdata_appendvalue_(asmdata, asmln->paramtype[i], asmln->paramcopy[i], asmln->paramx[i], ASMDATA_SIZE_8BIT)) { + asmln->errorcopy = asmdata_strdup_("invalid data value"); + return false; + } + } + return true; + } + else if (asmdata->extsyntax && asmdata_streq_(asmln->instrcopy, ".string")) { + if (asmln->nparams < 1) { + asmln->errorcopy = asmdata_strdup_("data instructions generally expect at least one value, otherwise they are probably erroneous"); + return false; + } + int32_t i; + for (i = 0; i < asmln->nparams; i++) { + //printf("DOING ARG %d\n", i); + if (!asmdata_appendvalue_(asmdata, asmln->paramtype[i], asmln->paramcopy[i], asmln->paramx[i], ASMDATA_SIZE_8BIT)) { + asmln->errorcopy = asmdata_strdup_("invalid data value"); + return false; + } + } + asmdata_appendvalue_(asmdata, ASMT_TOKENTYPE_NUMBER, "0", NULL, ASMDATA_SIZE_8BIT); + return true; + } + else if (asmdata_streq_(asmln->instrcopy, "data16")) { + if (asmln->nparams < 1) { + asmln->errorcopy = asmdata_strdup_("data instructions generally expect at least one value, otherwise they are probably erroneous"); + return false; + } + int32_t i; + for (i = 0; i < asmln->nparams; i++) { + if (!asmdata_appendvalue_(asmdata, asmln->paramtype[i], asmln->paramcopy[i], asmln->paramx[i], ASMDATA_SIZE_16BIT)) { + asmln->errorcopy = asmdata_strdup_("invalid data value"); + return false; + } + } + return true; + } + else if (asmdata_streq_(asmln->instrcopy, "data32")) { + if (asmln->nparams < 1) { + asmln->errorcopy = asmdata_strdup_("data instructions generally expect at least one value, otherwise they are probably erroneous"); + return false; + } + int32_t i; + for (i = 0; i < asmln->nparams; i++) { + if (!asmdata_appendvalue_(asmdata, asmln->paramtype[i], asmln->paramcopy[i], asmln->paramx[i], ASMDATA_SIZE_32BIT)) { + asmln->errorcopy = asmdata_strdup_("invalid data value"); + return false; + } + } + return true; + } + else if (asmdata_streq_(asmln->instrcopy, "data64") || (asmdata->extsyntax && asmdata_streq_(asmln->instrcopy, ".long"))) { + if (asmln->nparams < 1) { + asmln->errorcopy = asmdata_strdup_("data instructions generally expect at least one value, otherwise they are probably erroneous"); + return false; + } + int32_t i; + for (i = 0; i < asmln->nparams; i++) { + if (!asmdata_appendvalue_(asmdata, asmln->paramtype[i], asmln->paramcopy[i], asmln->paramx[i], ASMDATA_SIZE_64BIT)) { + asmln->errorcopy = asmdata_strdup_("invalid data value"); + return false; + } + } + return true; + } + else { + asmln->errorcopy = asmdata_strdup_("internal error, asmdata recognised input but didn't translate it properly"); + return false; + } +} diff --git a/asmdata.h b/asmdata.h new file mode 100644 index 0000000..f4c947e --- /dev/null +++ b/asmdata.h @@ -0,0 +1,310 @@ +#ifndef ASMDATA_H +#define ASMDATA_H + +#include "asmln.h" + +#define ASMDATA_MAXSECTIONS 200 +#define ASMDATA_MAXFILLED (1024*1024*1024) +#define ASMDATA_MAXSYMBOLS 400000 +#define ASMDATA_MAXREFERENCES 400000 + +typedef struct asmdata asmdata_t; +typedef struct asmdata_section asmdata_section_t; +typedef struct asmdata_symbol asmdata_symbol_t; +typedef struct asmdata_reference asmdata_reference_t; +typedef struct asmdata_mapentry asmdata_mapentry_t; +typedef struct asmdata_map asmdata_map_t; + +struct asmdata_reference { + int32_t symbolindex; + int32_t sectionindex; + int64_t sectionoffset; + int8_t size; // 0 for 8bit, 1 for 16bit, 2 for 32bit, 3 for 64bit + int8_t baseflags; + int16_t extflags; + int32_t extdata; +}; + +#define ASMDATA_SYMBOL_DUMMY (1<<8) +#define ASMDATA_SYMBOL_EXPR (1<<9) +#define ASMDATA_SYMBOL_CONST (1<<10) +#define ASMDATA_SYMBOL_OP (1<<11) + +struct asmdata_symbol { + const char* namecopy; + int64_t sectionoffset; // 0 if not defined, also reused for constant values in expressions (but with -1 for section) + int32_t flags; // 0 if not defined + int32_t sectionindex; // -1 if not defined + int32_t firstreferenceindex; // -1 if not defined + int32_t x_lhs; + int32_t x_op; + int32_t x_rhs; + int32_t reserved; +}; + +struct asmdata_section { + int32_t sectionnumber; + bool bigendian; + const char* namecopy; + uint8_t* buffer; + int32_t buffersize; + int32_t bufferfilled; + int64_t reservedsize; + int64_t virtualoffset; +}; + +struct asmdata { + bool finalised; + bool extsyntax; + int32_t nsections; + int32_t nsymbols; + int32_t nreferences; + int32_t pad; + asmdata_section_t* activesection; + asmdata_section_t** sections; + asmdata_symbol_t* symbols; + asmdata_reference_t* references; + /* + asmdata_section_t* sections[ASMDATA_MAXSECTIONS]; + asmdata_symbol_t symbols[ASMDATA_MAXSYMBOLS]; + asmdata_reference_t references[ASMDATA_MAXREFERENCES]; + */ +}; + +typedef void (*asmdata_iterf_t)(asmdata_map_t*, char*, void*); + +struct asmdata_mapentry { + int hash; + char* key; + void* value; + asmdata_mapentry_t* next; +}; + +struct asmdata_map { + asmdata_mapentry_t** table; + int ntable; // Number of slots allocated in the table + asmdata_iterf_t deletef; +}; + +asmdata_map_t* asmdata_map_new(int n, asmdata_iterf_t deletef); +void asmdata_map_set(asmdata_map_t* map, char* key, void* value); +void* asmdata_map_get(asmdata_map_t* map, char* key); +void asmdata_map_delete(asmdata_map_t* map); + +asmdata_section_t* asmdata_section_new(const char* name); +bool asmdata_section_reserveextra(asmdata_section_t* section, int64_t nbytes, bool willfill); +void* asmdata_section_delete(asmdata_section_t* section); + +ASMLN_INLINE bool asmdata_section_align(asmdata_section_t* section, int64_t alignment) { + while (section->reservedsize % alignment != 0) { + if (!asmdata_section_reserveextra(section, 1, false)) { + return false; + } + } + return true; +} + +ASMLN_INLINE uint8_t* asmdata_section_fill(asmdata_section_t* section, int64_t nbytes) { + int64_t offset = section->reservedsize; + + if (((int64_t)((int32_t)offset)) != offset || ((int64_t)((int32_t)nbytes)) != nbytes) { + return NULL; + } + + if (!asmdata_section_reserveextra(section, nbytes, true)) { + return NULL; + } + + return &(section->buffer[(int32_t)offset]); +} + +#define ASMDATA_SIZE_8BIT ((int8_t)0) +#define ASMDATA_SIZE_16BIT ((int8_t)1) +#define ASMDATA_SIZE_32BIT ((int8_t)2) +#define ASMDATA_SIZE_64BIT ((int8_t)3) +ASMLN_INLINE bool asmdata_section_appendword(asmdata_section_t* section, int64_t word, int8_t size) { + uint8_t* target = asmdata_section_fill(section, 1LL << size); + + if (target == NULL) { + return false; + } + + switch (size) { + case ASMDATA_SIZE_8BIT: + target[0] = (uint8_t)word; + return true; + case ASMDATA_SIZE_16BIT: + if (section->bigendian) { + target[0] = (uint8_t)(word >> 8); + target[1] = (uint8_t)(word); + } + else { + target[0] = (uint8_t)(word); + target[1] = (uint8_t)(word >> 8); + } + return true; + case ASMDATA_SIZE_32BIT: + if (section->bigendian) { + target[0] = (uint8_t)(word >> 24); + target[1] = (uint8_t)(word >> 16); + target[2] = (uint8_t)(word >> 8); + target[3] = (uint8_t)(word); + } + else { + target[0] = (uint8_t)(word); + target[1] = (uint8_t)(word >> 8); + target[2] = (uint8_t)(word >> 16); + target[3] = (uint8_t)(word >> 24); + } + return true; + case ASMDATA_SIZE_64BIT: + if (section->bigendian) { + target[0] = (uint8_t)(word >> 56); + target[1] = (uint8_t)(word >> 48); + target[2] = (uint8_t)(word >> 40); + target[3] = (uint8_t)(word >> 32); + target[4] = (uint8_t)(word >> 24); + target[5] = (uint8_t)(word >> 16); + target[6] = (uint8_t)(word >> 8); + target[7] = (uint8_t)(word); + } + else { + target[0] = (uint8_t)(word); + target[1] = (uint8_t)(word >> 8); + target[2] = (uint8_t)(word >> 16); + target[3] = (uint8_t)(word >> 24); + target[4] = (uint8_t)(word >> 32); + target[5] = (uint8_t)(word >> 40); + target[6] = (uint8_t)(word >> 48); + target[7] = (uint8_t)(word >> 58); + } + return true; + default: + return false; + } +} + +ASMLN_INLINE bool asmdata_section_appendbytes(asmdata_section_t* section, uint8_t* source, int32_t nbytes) { + uint8_t* target = asmdata_section_fill(section, nbytes); + + if (target == NULL) { + return false; + } + + int32_t i; + for (i = 0; i < nbytes; i++) { + target[i] = source[i]; + } + + return true; +} + +asmdata_t* asmdata_new(); +void* asmdata_delete(asmdata_t* asmdata); + +/* This function should be called (exactly) once before extracting data, assuming you want linkage information retained. + * It will assemble the symbol and references list (and possibly any additional metadata) into their own special sections. + * You generally shouldn't assemble anything else after finalising the asmdata structure (the API will still let you though, + * in case you want to add e.g. a special checksum or signature section based on the finalised contents of the other sections, + * but defining or using any symbols after this point is an error). + */ +bool asmdata_finalise(asmdata_t* asmdata); + +/* Produces a simple file header. This is added as the last section (typically, but not necessarily, after finalisation). A + * full binary file can then be produced by writing - firstly - the header section, and then each section (including the header + * again) and padding to the given page boundary after every section (including the first and last copies of the header section). + * Two "hint" strings (references into the strings section) can be provided as a simple means to classify file types within + * higher-level environments (e.g. a system might use a conventions like hint1="program" hint2="generic-dynamic-64bit", and might + * use different hints like hint1="library" vs hint1="program" to distinguish components, but no convention is mandated specifically). + * The hint strings are only intended to confirm type information and may be empty or ignored. An additional "inthint" field is + * also added in case high level systems need to be able to quickly identify things like architecture flags without loading strings. + * No additional metadata (e.g. filename, architecture, build time) is added in the file header. This is by design (it can easily + * be added in another section if necessary). A simple checksum of each section is added with it's record in the header, while + * the checksum of the file header itself is calculated with it's own checksum field set to zero (before it has been set!). The + * second copy of the file header is added primarily for integrity (i.e. if a section may be corrupted, how do we know the header + * or checksum itself is not corrupted? With a second copy of course), however it's secondary purpose is to verify the end of file + * for a loader. The file header will also contain a version number (currently 1), which should be taken as the version of the + * file header/format only (not necessarily related to the version of the assembler/compiler/architecture or even necessarily of + * the symbols/references sections which may not even exist in the output, but the strings section must be compatible for the + * hints to be used). + * + * Note on duplicating header: Having the header in it's own section may also be convenient for special cases e.g. having to + * inspect the header itself in "readable" assembly output or, in the future, including alternative headers for different + * environments or files with multiple sub-files/file-headers for the purposes of optimisation. In these cases, a definitive + * file header is still given at the start of the file, but an interpreter may use that header's section list to find the most + * suitable alternative file header for it's environment. + * + * Note on page sizes: For compact binaries, a page size of 1 will leave no padding between sections, but other considerations: + * Larger page sizes are primarily useful for optimising specific cases and specifically for sharing (at page granularity) + * these pages between multiple program instances in a modern (multitasking) operating system. A page size of up to around + * 64kb might make sense in some cases but larger page sizes (e.g. 2MB) typically lead to far-oversized program files. In + * any case, a loader can still determine the same program contents regardless of the page size, so a smaller size (either 1 + * or a value such as 4, 8, or 16 to ensure basic alignment of fields) is generally a better default than a larger one. An + * interpreter may expect a specific page size matching it's own environmental considerations (e.g. if it's too large a + * smaller machine might run out of memory, and if it's too small it might have to copy all the data for alignment). + */ +bool asmdata_produceheader(asmdata_t* asmdata, int32_t pagesize, const char* hint1, const char* hint2, int32_t inthint); + +asmdata_section_t* asmdata_findsection(asmdata_t* asmdata, const char* sectionname, bool autocreate); + +ASMLN_INLINE bool asmdata_hassection(asmdata_t* asmdata, const char* sectionname) { + return asmdata_findsection(asmdata, sectionname, false) != NULL; +} + +ASMLN_INLINE asmdata_section_t* asmdata_selectsection(asmdata_t* asmdata, const char* sectionname) { + asmdata_section_t* result = asmdata_findsection(asmdata, sectionname, true); + + if (result != NULL) { + asmdata->activesection = result; + } + + return result; +} + +ASMLN_INLINE asmdata_section_t* asmdata_activesection(asmdata_t* asmdata) { + if (asmdata->activesection == NULL) { + return asmdata_selectsection(asmdata, "data"); + } + return asmdata->activesection; +} + +ASMLN_INLINE bool asmdata_appendword(asmdata_t* asmdata, int64_t word, int8_t size) { + return asmdata_section_appendword(asmdata_activesection(asmdata), word, size); +} + +ASMLN_INLINE bool asmdata_appendbytes(asmdata_t* asmdata, uint8_t* source, int32_t nbytes) { + return asmdata_section_appendbytes(asmdata_activesection(asmdata), source, nbytes); +} + +bool asmdata_beginfile(asmdata_t* asmdata, const char* name); +bool asmdata_endfile(asmdata_t* asmdata, const char* name); + +int32_t asmdata_findsymbol(asmdata_t* asmdata, const char* name, bool autocreate); +int32_t asmdata_symbolhere(asmdata_t* asmdata, const char* name); +int32_t asmdata_appendreferenceword(asmdata_t* asmdata, const char* name, int8_t size); + +bool asmdata_isvalidasmln(asmdata_t* asmdata, asmln_t* asmln); +bool asmdata_asmln(asmdata_t* asmdata, asmln_t* asmln); + +ASMLN_INLINE bool asmdata_isvalidln(asmdata_t* asmdata, const char* ln) { + asmln_t* asmln = asmln_new(ln); + if (asmln == NULL) { + return false; + } + bool result = asmdata_isvalidasmln(asmdata, asmln); + asmln_delete(asmln); + return result; +} + +ASMLN_INLINE bool asmdata_ln(asmdata_t* asmdata, const char* ln) { + asmln_t* asmln = asmln_new(ln); + if (asmln == NULL) { + return false; + } + bool result = asmdata_isvalidasmln(asmdata, asmln); + asmln_delete(asmln); + return result; +} + +/* From ifndef at top of file: */ +#endif diff --git a/asmln.c b/asmln.c new file mode 100644 index 0000000..f916f91 --- /dev/null +++ b/asmln.c @@ -0,0 +1,388 @@ +#include "asmln.h" +#include +#include +#include // TODO: Remove this, it's only for debugging/testing.. + +static int32_t asmln_strlen_(const char* str) { + if (str == NULL) { + return 0; + } + + int32_t l = 0; + + while (str[l] != 0) { + l++; + } + + return l; +} + +static char* asmln_strndup_(char* str, int maxlen) { + char* result = calloc(maxlen + 1, 1); + + if (result != NULL && str != NULL) { + int32_t i; + for (i = 0; i < maxlen; i++) { + if (str[i] == 0) { + return (char*)result; + } + result[i] = str[i]; + } + } + + return (char*)result; +} + +static char* asmln_strdup_(char* str) { + return asmln_strndup_(str, asmln_strlen_(str)); +} + +static char* asmln_tokendup_(asmt_t* asmt) { + int32_t t = asmt_tokentype(asmt); + int32_t len = asmt_tokenlength(asmt); + if (len <= 0) { + return NULL; + } + if (t == ASMT_TOKENTYPE_STRING) { + return asmln_strndup_(asmt->input + (asmt->index + 1), len - 2); // Skip quotes + } + if (t == ASMT_TOKENTYPE_LABEL) { + len--; + } + //printf("Duplicating token type %d length %d\n", t, len); + + const char* result = asmln_strndup_(asmt->input + asmt->index, len); + + //printf("Got '%s'\n", result); + + return result; +} + +bool asmlnx_parse_subexpression(asmln_t* asmln, asmt_t* asmt, int32_t* type_var, char** copy_var, asmlnx_t** x_var, int32_t* incr_var) { + bool mayhavemoreparams = true; + int32_t tt = asmt_tokentype(asmt); + int32_t subc = 0; + //fprintf(stderr, "Got token type #%d\n", tt); + switch (tt) + { + case ASMT_TOKENTYPE_OPENBR: + x_var[0] = calloc(1, sizeof(asmlnx_t)); + if (x_var[0] == NULL) { + fprintf(stderr, "MEMORY FAILURE\n"); + return false; // TODO: Better error handling here? + } + type_var[0] = tt; + copy_var[0] = asmln_strdup_("(...)"); // TODO: Copy the whole expression source for debugging? (Probably not worthwhile.) + incr_var[0]++; + asmt_skiptoken(asmt); + if (asmlnx_parse_subexpression(asmln, asmt, &(x_var[0]->lhstype), &(x_var[0]->lhscopy), &(x_var[0]->lhsx), &subc)) { + fprintf(stderr, "BAD LHS\n"); + return false; + } + if (asmt_tokentype(asmt) != ASMT_TOKENTYPE_NAME) { + fprintf(stderr, "NOT A NAME\n"); + return false; + } + x_var[0]->opcopy = asmln_tokendup_(asmt); + //fprintf(stderr, "GOT OPERATOR '%s'\n", x_var[0]->opcopy); + asmt_skiptoken(asmt); + if (asmlnx_parse_subexpression(asmln, asmt, &(x_var[0]->rhstype), &(x_var[0]->rhscopy), &(x_var[0]->rhsx), &subc)) { + fprintf(stderr, "BAD RHS\n"); + return false; + } + if (asmt_tokentype(asmt) != ASMT_TOKENTYPE_CLOSEBR) { + fprintf(stderr, "MISSING CLOSE\n"); + return false; + } + asmt_skiptoken(asmt); + if (asmt_tokentype(asmt) == ASMT_TOKENTYPE_COMMA) { + //printf("I got a comma\n"); + asmt_skiptoken(asmt); + } + else { + mayhavemoreparams = false; + } + break; + case ASMT_TOKENTYPE_NAME: + case ASMT_TOKENTYPE_NUMBER: + case ASMT_TOKENTYPE_STRING: + type_var[0] = tt; + copy_var[0] = asmln_tokendup_(asmt); + x_var[0] = NULL; + incr_var[0]++; + asmt_skiptoken(asmt); + // A hack to allow GNU-style offsets like in ld a0, 0(sp) + // This translates e.g. 0(sp) to (0 OFF sp) + if (/*tt == ASMT_TOKENTYPE_NUMBER && */asmt_tokentype(asmt) == ASMT_TOKENTYPE_OPENBR) { + asmt_skiptoken(asmt); + x_var[0] = calloc(1, sizeof(asmlnx_t)); + x_var[0]->lhstype = type_var[0]; + x_var[0]->lhscopy = asmln_strdup_(copy_var[0]); + x_var[0]->lhsx = NULL; + x_var[0]->opcopy = asmln_strdup_("OFF"); + //printf("Copied '%s'\n", x_var[0]->lhscopy); + copy_var[0] = NULL; + type_var[0] = ASMT_TOKENTYPE_OPENBR; + + + if (asmlnx_parse_subexpression(asmln, asmt, &(x_var[0]->rhstype), &(x_var[0]->rhscopy), &(x_var[0]->rhsx), &subc)) { + fprintf(stderr, "BAD RHS\n"); + return false; + } + if (asmt_tokentype(asmt) != ASMT_TOKENTYPE_CLOSEBR) { + fprintf(stderr, "MISSING CLOSE\n"); + return false; + } + asmt_skiptoken(asmt); + } + if (asmt_tokentype(asmt) == ASMT_TOKENTYPE_COMMA) { + //printf("I got a comma\n"); + asmt_skiptoken(asmt); + } else { + mayhavemoreparams = false; + } + break; + default: + mayhavemoreparams = false; + break; + } + + return mayhavemoreparams; +} + + + +// Parses a C-style string +static char* asmt_cstringhack(asmt_t* asmt) { + asmt_skipspaces(asmt); + if (asmt_isend(asmt) || !asmt_isstringstart(asmt)) { + return NULL; + } + asmt->index++; + if (asmt_isend(asmt)) { + return NULL; + } + char* result = calloc(asmt->length+1,1); + if (result == NULL) { + return NULL; + } + result[0] = '\"'; + int resulti = 1; + bool finished = false; + while (!asmt_isend(asmt) && !finished) { + if (asmt->input[asmt->index] == '\\') { + asmt->index++; + if (asmt_isend(asmt)) break; + switch (asmt->input[asmt->index]) { + case 'r': + result[resulti] = '\r'; + resulti++; + break; + case 'n': + result[resulti] = '\n'; + resulti++; + break; + case 't': + result[resulti] = '\t'; + resulti++; + break; + case '\'': + result[resulti] = '\n'; + resulti++; + break; + case '\"': + result[resulti] = '\"'; + resulti++; + break; + case '\\': + result[resulti] = '\\'; + resulti++; + break; + default: + result[resulti] = asmt->input[asmt->index]; + resulti++; + } + asmt->index++; + } else { + result[resulti] = asmt->input[asmt->index]; + if (result[resulti] == '\"') { + finished = true; + } + resulti++; + asmt->index++; + } + } + result[resulti] = 0; + if (!finished) { + free(result); + return NULL; + } + return result; +} + +static void asmln_parse_inner(asmln_t* asmln, const char* sourceline) { + if (asmln->errorcopy != NULL || asmln->instrcopy != NULL || asmln->labelcopy != NULL || asmln->commentcopy != NULL || asmln->nparams != 0) { + asmln->errorcopy = asmln_strdup_("Assembler structure reused improperly"); + return; + } + if (sourceline == NULL) { + asmln->errorcopy = asmln_strdup_("Source line is NULL"); + return; + } + asmt_t asmt; + asmt.input = sourceline; + asmt.length = asmln_strlen_(sourceline); + asmt.index = 0; + + if (asmt_tokentype(&asmt) == ASMT_TOKENTYPE_LABEL) { + asmln->labelcopy = asmln_tokendup_(&asmt); + asmt_skiptoken(&asmt); + } + else { + asmln->labelcopy = NULL; + } + asmln->nparams = 0; + if (asmt_tokentype(&asmt) == ASMT_TOKENTYPE_NAME) { + asmln->instrcopy = asmln_tokendup_(&asmt); + asmt_skiptoken(&asmt); + + bool mayhavemoreparams = true; + // For compatibility with .string "Hello\n" type strings + if (asmln->instrcopy[0] == '.' + && asmln->instrcopy[1] == 's' + && asmln->instrcopy[2] == 't' + && asmln->instrcopy[3] == 'r' + && asmln->instrcopy[4] == 'i' + && asmln->instrcopy[5] == 'n' + && asmln->instrcopy[6] == 'g' + && asmln->instrcopy[7] == 0) { + asmln->paramcopy[0] = asmt_cstringhack(&asmt); + if (asmln->paramcopy[0] != NULL) { + asmln->paramtype[0] = ASMT_TOKENTYPE_STRING; + asmln->nparams = 1; + mayhavemoreparams = false; + } + } + + int32_t tt; + while (mayhavemoreparams) { + if (asmln->nparams >= ASMLN_MAXPARAMS) { + asmln->errorcopy = asmln_strdup_("Too many parameters"); + return; + } + mayhavemoreparams = asmlnx_parse_subexpression(asmln, &asmt, &asmln->paramtype[asmln->nparams], &asmln->paramcopy[asmln->nparams], &asmln->paramx[asmln->nparams], &asmln->nparams); + /*switch (tt = asmt_tokentype(&asmt)) + { + case ASMT_TOKENTYPE_OPENBR: + + case ASMT_TOKENTYPE_NAME: + case ASMT_TOKENTYPE_NUMBER: + case ASMT_TOKENTYPE_STRING: + if (asmln->nparams >= ASMLN_MAXPARAMS) { + asmln->errorcopy = asmln_strdup_("Too many parameters"); + return; + } + asmln->paramtype[asmln->nparams] = tt; + asmln->paramcopy[asmln->nparams] = asmln_tokendup_(&asmt); + asmln->nparams++; + asmt_skiptoken(&asmt); + if (asmt_tokentype(&asmt) == ASMT_TOKENTYPE_COMMA) { + //printf("I got a comma\n"); + asmt_skiptoken(&asmt); + } else { + mayhavemoreparams = false; + } + break; + default: + mayhavemoreparams = false; + break; + }*/ + } + } + else { + asmln->instrcopy = NULL; + } + if (asmt_tokentype(&asmt) == ASMT_TOKENTYPE_COMMENT) { + asmln->commentcopy = asmln_tokendup_(&asmt); + asmt_skiptoken(&asmt); + } + else { + asmln->commentcopy = NULL; + } + if (asmt_tokentype(&asmt) != ASMT_TOKENTYPE_END) { + asmln->errorcopy = asmln_strdup_("Unexpected token (the source doesn't seem to be a line of valid assembler)"); + return; + } +} + +asmln_t* asmln_new(const char* sourceline) { + asmln_t* result = calloc(1, sizeof(asmln_t)); + + if (result != NULL) { + asmln_parse_inner(result, sourceline); + } + + return result; +} + +void* asmlnx_delete(asmlnx_t* asmlnx) { + if (asmlnx->lhscopy != NULL) { + free(asmlnx->lhscopy); + } + if (asmlnx->lhsx != NULL) { + asmlnx_delete(asmlnx->lhsx); + } + + if (asmlnx->opcopy != NULL) { + free(asmlnx->opcopy); + } + + if (asmlnx->rhscopy != NULL) { + free(asmlnx->rhscopy); + } + if (asmlnx->rhsx != NULL) { + asmlnx_delete(asmlnx->rhsx); + } + + free(asmlnx); + + return NULL; +} + +void* asmln_delete(asmln_t* asmln) { + if (asmln != NULL) { + if (asmln->labelcopy != NULL) { + free(asmln->labelcopy); + asmln->labelcopy = NULL; + } + if (asmln->instrcopy != NULL) { + free(asmln->instrcopy); + asmln->instrcopy = NULL; + } + + if (asmln->errorcopy != NULL) { + free(asmln->errorcopy); + asmln->errorcopy = NULL; + } + if (asmln->commentcopy != NULL) { + free(asmln->commentcopy); + asmln->commentcopy = NULL; + } + int32_t i; + for (i = 0; i < asmln->nparams; i++) { + asmln->paramtype[i] = ASMT_TOKENTYPE_END; + if (asmln->paramcopy[i] != NULL) { + free(asmln->paramcopy[i]); + asmln->paramcopy[i] = NULL; + } + if (asmln->paramx[i] != NULL) { + asmlnx_delete(asmln->paramx[i]); + asmln->paramx[i] = NULL; + } + } + asmln->nparams = 0; + + free(asmln); + } + return NULL; +} diff --git a/asmln.h b/asmln.h new file mode 100644 index 0000000..0028428 --- /dev/null +++ b/asmln.h @@ -0,0 +1,356 @@ +#ifndef ASMLN_H +#define ASMLN_H +#define _CRT_SECURE_NO_WARNINGS + +#include +#include +#include + +// XXX TODO: Memory management sorta fails when you're dealing with that many +// substructures, should use garbage collection. +//#define free fakefree +//static void fakefree(void*p) {} + +/* This can be defined explicitly to change how the functions are defined (may be needed on some compilers or targets). */ +#ifndef ASMLN_INLINE +#define ASMLN_INLINE static inline +#endif + +/* Character functions: */ + +ASMLN_INLINE bool asmc_isalpha(char c) { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); +} + +ASMLN_INLINE int32_t asmc_digitval(char c) { + if (c >= '0' && c <= '9') { + return c - '0'; + } + else if (c >= 'a' && c <= 'f') { + return c - 'a'; + } + else if (c >= 'A' && c <= 'F') { + return c - 'A'; + } + else { + return -1; + } +} + +ASMLN_INLINE bool asmc_isdigit(char c, int base) { + int v = asmc_digitval(c); + if (v < 0) { + return false; + } + else if (v < base) { + return true; + } + else { + return false; + } +} + +ASMLN_INLINE bool asmc_isdec(char c) { + return asmc_isdigit(c, 10); +} + +ASMLN_INLINE bool asmc_ishex(char c) { + return asmc_isdigit(c, 16); +} + +ASMLN_INLINE bool asmc_isvalidlabelhead(char c) { + return asmc_isalpha(c) || c == '_' || c == '+' || c == '-' || c == '*' || c == '/' || c == '%' || c == '<' || c == '>' || c == '|' || c == '&' || c == '$' || c == '.'; +} + +ASMLN_INLINE bool asmc_isvalidlabeltail(char c) { + return asmc_isvalidlabelhead(c) || asmc_isdec(c); +} + +ASMLN_INLINE bool asmc_isspace(char c) { + return c == ' ' || c == '\t' || c == '\r' || c == '\n'; +} + +/* Tokeniser functions, these should be useful whether the structure is set up for a line of input or for a whole input file at a time. */ + +typedef struct asmt asmt_t; + +struct asmt { + const char* input; + int32_t length; + int32_t index; +}; + +ASMLN_INLINE bool asmt_isend(asmt_t* asmt) { + return asmt->index < 0 || asmt->index >= asmt->length; +} + +ASMLN_INLINE void asmt_skipspaces(asmt_t* asmt) { + while (!asmt_isend(asmt) && asmc_isspace(asmt->input[asmt->index])) { + asmt->index++; + } +} + +ASMLN_INLINE bool asmt_isnumberstart(asmt_t* asmt) { + return !asmt_isend(asmt) && (asmc_isdec(asmt->input[asmt->index]) || ((asmt->input[asmt->index] == '-' || asmt->input[asmt->index] == '+') && asmc_isdec(asmt->input[asmt->index+1]))); +} + +ASMLN_INLINE bool asmt_isnameorlabelstart(asmt_t* asmt) { + return !asmt_isend(asmt) && asmc_isvalidlabelhead(asmt->input[asmt->index])/* && !asmt_isnumberstart(asmt)*/; +} + +ASMLN_INLINE bool asmt_isstringstart(asmt_t* asmt) { + return !asmt_isend(asmt) && asmt->input[asmt->index] == '\"'; +} + +ASMLN_INLINE bool asmt_iscommentstart(asmt_t* asmt) { + return !asmt_isend(asmt) && (asmt->input[asmt->index] == ';' || asmt->input[asmt->index] == '#'); +} + +ASMLN_INLINE int32_t asmt_numberlength(asmt_t* asmt) { + if (asmt_isnumberstart(asmt)) { + int32_t len = 1; + /*if (asmt->input[asmt->index] == '-' || asmt->input[asmt->index] == '+') { + len++; + }*/ + // Detect hex/binary format TODO: Combine with handling of negatives + if (asmt->index + 2 < asmt->length && ((asmt->input[asmt->index + 1] == 'x') || (asmt->input[asmt->index + 1] == 'b'))) { + len++; + } + while (!asmt_isend(asmt) && asmt->index + len < asmt->length && asmc_isdec(asmt->input[asmt->index + len])) { + len++; + } + return len; + } + else { + return -1; + } +} + +ASMLN_INLINE int32_t asmt_namelength(asmt_t* asmt) { + if (asmt_isnameorlabelstart(asmt)) { + int32_t len = 1; + while (!asmt_isend(asmt) && asmt->index + len < asmt->length && asmc_isvalidlabeltail(asmt->input[asmt->index + len])) { + len++; + } + return len; + } + else { + return -1; + } +} + +ASMLN_INLINE int32_t asmt_stringlength(asmt_t* asmt) { + if (asmt_isstringstart(asmt)) { + int32_t len = 1; + while (!asmt_isend(asmt) && asmt->index + len < asmt->length && asmt->input[asmt->index + len] != '\"') { + len++; + } + if (asmt_isend(asmt) || asmt->index + len >= asmt->length) { + return -1; + } + return len + 1; + } + else { + return -1; + } +} + +ASMLN_INLINE int32_t asmt_commentlength(asmt_t* asmt) { + if (asmt_iscommentstart(asmt)) { + int32_t len = 1; + while (!asmt_isend(asmt) && asmt->index + len < asmt->length && asmt->input[asmt->index + len] != '\r' && asmt->input[asmt->index + len] != '\n') { + len++; + } + return len; + } + else { + return -1; + } +} + +ASMLN_INLINE bool asmt_islabelstart(asmt_t* asmt) { + int32_t namelen = asmt_namelength(asmt); + if (namelen > 0 && asmt->index + namelen < asmt->length && asmt->input[asmt->index + namelen] == ':') { + return true; + } + else { + return false; + } +} + +ASMLN_INLINE bool asmt_isnamestart(asmt_t* asmt) { + return asmt_isnameorlabelstart(asmt) && !asmt_islabelstart(asmt); +} + +#define ASMT_TOKENTYPE_ERROR -1 +#define ASMT_TOKENTYPE_END 0 +#define ASMT_TOKENTYPE_LABEL 1 +#define ASMT_TOKENTYPE_NAME 2 +#define ASMT_TOKENTYPE_NUMBER 3 +#define ASMT_TOKENTYPE_STRING 4 +#define ASMT_TOKENTYPE_COMMA 5 +#define ASMT_TOKENTYPE_COMMENT 6 +#define ASMT_TOKENTYPE_OPENBR 7 +#define ASMT_TOKENTYPE_CLOSEBR 8 + +ASMLN_INLINE int32_t asmt_tokentype(asmt_t* asmt) { + asmt_skipspaces(asmt); + if (asmt_isend(asmt)) { + return ASMT_TOKENTYPE_END; + } + else if (asmt->input[asmt->index] == '-' && asmc_isdec(asmt->input[asmt->index+1])) { + return ASMT_TOKENTYPE_NUMBER; + } + else if (asmt_isnamestart(asmt)) { + return ASMT_TOKENTYPE_NAME; + } + else if (asmt_islabelstart(asmt)) { + return ASMT_TOKENTYPE_LABEL; + } + else if (asmt_isnumberstart(asmt)) { + return ASMT_TOKENTYPE_NUMBER; + } + else if (asmt_isstringstart(asmt)) { + return ASMT_TOKENTYPE_STRING; + } + else if (asmt_iscommentstart(asmt)) { + return ASMT_TOKENTYPE_COMMENT; + } + else if (asmt->input[asmt->index] == ',') { + return ASMT_TOKENTYPE_COMMA; + } + else if (asmt->input[asmt->index] == '(') { + return ASMT_TOKENTYPE_OPENBR; + } + else if (asmt->input[asmt->index] == ')') { + return ASMT_TOKENTYPE_CLOSEBR; + } + else { + return ASMT_TOKENTYPE_ERROR; + } +} + +ASMLN_INLINE int32_t asmt_tokenlength(asmt_t* asmt) { + switch (asmt_tokentype(asmt)) { + case ASMT_TOKENTYPE_NAME: + return asmt_namelength(asmt); + case ASMT_TOKENTYPE_LABEL: + return asmt_namelength(asmt) + 1; + case ASMT_TOKENTYPE_NUMBER: + return asmt_numberlength(asmt); + case ASMT_TOKENTYPE_STRING: + return asmt_stringlength(asmt); + case ASMT_TOKENTYPE_COMMENT: + return asmt_commentlength(asmt); + case ASMT_TOKENTYPE_COMMA: + case ASMT_TOKENTYPE_OPENBR: + case ASMT_TOKENTYPE_CLOSEBR: + return 1; + case ASMT_TOKENTYPE_ERROR: + case ASMT_TOKENTYPE_END: + default: + return -1; + } +} + +ASMLN_INLINE bool asmt_skiptoken(asmt_t* asmt) { + asmt_skipspaces(asmt); // Just in case of edge cases... + if (asmt_isend(asmt)) { + return false; + } + else { + int32_t l = asmt_tokenlength(asmt); + if (l <= 0) { + return false; + } + else { + asmt->index += l; + return true; + } + } +} + +ASMLN_INLINE void asmt_skipcomments(asmt_t* asmt) { + while (asmt_iscommentstart(asmt)) { + asmt_skiptoken(asmt); + } +} + +/* Main API: Deals with one line of assembly code at a time, in format like [labelname:] [instrname param1 , param2 , ... , paramN] [; comment] */ + +/* Set a reasonably high maximum, assuming data strings can go on for a while... */ +#define ASMLN_MAXPARAMS 100 + +typedef struct asmln asmln_t; +typedef struct asmlnx asmlnx_t; + +struct asmln { + char* labelcopy; + char* instrcopy; + int32_t nparams; + int32_t paramtype[ASMLN_MAXPARAMS]; + char* paramcopy[ASMLN_MAXPARAMS]; + asmlnx_t* paramx[ASMLN_MAXPARAMS]; + char* commentcopy; + char* errorcopy; +}; + +/* Subexpressions, e.g. (1 + (2 * 3)) need to be split into a tree structure. Expressions are given the type ASMT_TOKENTYPE_OPENBR (like their first token). + * These expressions could be optimised/reduced by the assembler in the future, but otherwise are encoded directly in the output, allowing the linker/loader to + * resolve complex expressions involving linker symbols. + */ +struct asmlnx { + int32_t lhstype; + char* lhscopy; + asmlnx_t* lhsx; + char* opcopy; + int32_t rhstype; + char* rhscopy; + asmlnx_t* rhsx; +}; + +asmln_t* asmln_new(const char* sourceline); + +void* asmln_delete(asmln_t* asmln); + +bool asmlnx_parse_subexpression(asmln_t* asmln, asmt_t* asmt, int32_t* type_var, char** copy_var, asmlnx_t** x_var, int32_t* incr_var); +void* asmlnx_delete(asmlnx_t* asmlnx); + +/* +ASMLN_INLINE int32_t asmln_nextnonspace(const char* l, int32_t n, int32_t i) { + while (i < n) { + if (!asmc_isspace(l[i])) { + return i; + } + i++; + } + return -1; +} + +ASMLN_INLINE int32_t asmln_nextinstance(const char* l, int32_t n, int32_t i, char c) { + while (i < n) { + if (l[i] == c) { + return i; + } + i++; + } + return -1; +}*/ + +//ASMLN_INLINE int32_t asmln_lab + +/* Convenience functions: */ +/* +ASMLN_INLINE int32_t asml_len(const char* l) { + if (l == NULL) { + return 0; + } + int32_t i = 0; + while (l[i] != 0) { + i++; + } + return i; +}*/ + +/* From ifndef at top of file: */ +#endif diff --git a/asmpp.c b/asmpp.c new file mode 100644 index 0000000..7d38d50 --- /dev/null +++ b/asmpp.c @@ -0,0 +1,421 @@ +#include "asmpp.h" +#include +#include + +static void asmpp_mapdelete(asmdata_map_t* map, char* name, void* poo) { +} + +asmpp_t* asmpp_new(asmpp_systemf_t outputf, void* udata) { + asmpp_t* pp = malloc(sizeof(asmpp_t)); + if (pp == NULL) { + return NULL; + } + + pp->outputf = outputf; + pp->udata = udata; + + pp->defs = asmdata_map_new(543, &asmpp_mapdelete); + if (pp->defs == NULL) { + free(pp); + return NULL; + } + pp->macros = asmdata_map_new(543, &asmpp_mapdelete); + if (pp->macros == NULL) { + free(pp); + return NULL; + } + + pp->context = NULL; + + return pp; +} + +void asmpp_delete(asmpp_t* pp) { + if (pp == NULL) { + return; + } + + asmdata_map_delete(pp->defs); + asmdata_map_delete(pp->macros); + free(pp); +} + +const char* asmpp_contexttypename(asmpp_t* pp, int type) { + switch (type) { + case ASMPP_CONTEXT_OUTER: return "OUTER"; + case ASMPP_CONTEXT_MACRO_EXPAND: return "MACRO_EXPAND"; + case ASMPP_CONTEXT_MACRO_COLLECT: return "MACRO_COLLECT"; + case ASMPP_CONTEXT_IF_EXPAND: return "IF_EXPAND"; + case ASMPP_CONTEXT_IF_PARSEONLY: return "IF_PARSEONLY"; + default: "Invalid"; + } +} +asmpp_context_t* asmpp_enter(asmpp_t* pp, int type, asmpp_macro_t* macro) { + //printf("ENTER CONTEXT %s(#%i) %s\n", asmpp_contexttypename(pp, type), type, macro == NULL ? "(not a macro)" : macro->proto->instrcopy); + asmpp_context_t* c = malloc(sizeof(asmpp_context_t)); + if (c == NULL) { + return NULL; + } + c->type = type; + c->macro = macro; + c->locals = NULL; + c->ifvalue = 0; + c->next = pp->context; + pp->context = c; + return c; +} + +void asmpp_dumpctx(asmpp_t* pp, asmpp_context_t* ctx) { + printf("Context %i %s @%p\n", ctx->type, ctx->macro == NULL ? "(not a macro)" : ctx->macro->proto->instrcopy, ctx); +} + +void asmpp_dumpctxs(asmpp_t* pp) { + asmpp_context_t* ctx = pp->context; + while (ctx != NULL) { + asmpp_dumpctx(pp, ctx); + ctx = ctx->next; + } +} + +void asmpp_exit(asmpp_t* pp, asmpp_context_t* context) { + if (context != pp->context) { + printf("WARNING: Trying to exit from wrong context\n"); + asmpp_dumpctx(pp, context); + printf("--- STACK: ---\n"); + asmpp_dumpctxs(pp); + return; + } + //printf("EXIT CONTEXT %i %s\n", context->type, context->macro == NULL ? "(not a macro)" : context->macro->proto->instrcopy); + pp->context = context->next; + if (context->locals != NULL) { + asmdata_map_delete(context->locals); + } + free(context); +} + +asmpp_macro_t* asmpp_quickmacro(asmpp_t* pp, asmln_t* proto, asmpp_systemf_t systemf) { + asmpp_macro_t* macro = malloc(sizeof(asmpp_macro_t)); + if (macro == NULL) { + return NULL; + } + macro->proto = proto; + macro->systemf = systemf; + macro->lines = NULL; + macro->next = NULL; + + asmpp_macro_t* oldmacro = asmdata_map_get(pp->macros, proto->instrcopy); + macro->next = oldmacro; + asmdata_map_set(pp->macros, proto->instrcopy, macro); + return macro; +} + +asmpp_def_t* asmpp_allocdef(asmpp_t* pp, int t, char* str, asmlnx_t* x) { + asmpp_def_t* d = malloc(sizeof(asmpp_def_t)); + if (d == NULL) { + return NULL; + } + d->t = t; + d->value = str; + d->x = x; + return d; +} + +asmpp_def_t* asmpp_finddef(asmpp_t* pp, char* name) { + asmpp_context_t* ctx = pp->context; + if (ctx != NULL && ctx->locals != NULL) { + asmpp_def_t* localresult = asmdata_map_get(ctx->locals, name); + if (localresult != NULL) { + return localresult; + } + } + return asmdata_map_get(pp->defs, name); +} + +asmpp_macro_t* asmpp_findmacro(asmpp_t* pp, char* name, int nparams) { + asmpp_macro_t* firstm = asmdata_map_get(pp->macros, name); + asmpp_macro_t* m = firstm; + while (m != NULL) { + if (m->proto->nparams == nparams) { + return m; + } + m = m->next; + } + return NULL; +} + +char* asmdata_strdup_(const char* str); + +char* asmpp_strdupnull_(char* str) { + return (str == NULL) ? NULL : asmdata_strdup_(str); +} + +/*int asmpp_copyxln_param(asmpp_t* pp, int* paramtp, char** strp, asmlnx_t** xp, bool expandparams) { + int t = *paramtp; + if (t == ASMT_TOKENTYPE_NAME) { + asmpp_def_t* def = asmpp_finddef(pp, *strp); + if (def != NULL) { + *paramtp = def->t; + *strp = asmpp_strdupnull_(def->value); + } + } + *strp = asmpp_strdupnull_(*strp); // NOTE: This must handle NULLs! +}*/ + +#define ASMPP_COPYXLN_EXPAND(dstt,dstc,dstx,srct,srcc,srcx,xp) \ + do { \ + bool _done = false; \ + if ((xp) && (srct) == ASMT_TOKENTYPE_NAME) { \ + asmpp_def_t* _def = asmpp_finddef(pp, srcc); \ + if (_def != NULL) { \ + _done = true; \ + dstt = _def->t; \ + dstc = asmpp_strdupnull_(_def->value); \ + dstx = asmpp_copyxln_x(pp,_def->x,xp); \ + } \ + } \ + if (!_done) { \ + dstt = srct; \ + dstc = asmpp_strdupnull_(srcc); \ + dstx = asmpp_copyxln_x(pp,srcx,xp); \ + } \ + } while (0) + +asmlnx_t* asmpp_copyxln_x(asmpp_t* pp, asmlnx_t* x, bool expandparams); +asmlnx_t* asmpp_copyxln_x(asmpp_t* pp, asmlnx_t* x, bool expandparams) { + if (x == NULL) { + return NULL; + } + asmlnx_t* result = calloc(sizeof(asmlnx_t),1); + if (result == NULL) { + return NULL; + } + + ASMPP_COPYXLN_EXPAND(result->lhstype,result->lhscopy,result->lhsx,x->lhstype,x->lhscopy,x->lhsx,expandparams); + //printf("Converted '%s' to '%s'\n", x->lhscopy, result->lhscopy); + result->opcopy = asmpp_strdupnull_(x->opcopy); + ASMPP_COPYXLN_EXPAND(result->rhstype,result->rhscopy,result->rhsx,x->rhstype,x->rhscopy,x->rhsx,expandparams); + //printf("Converted '%s' to '%s'\n", x->lhscopy, result->lhscopy); + return result; +} + +asmln_t* asmpp_copyxln(asmpp_t* pp, asmln_t* ln, bool expandparams) { + asmln_t* xln = malloc(sizeof(asmln_t)); + xln->labelcopy = asmpp_strdupnull_(ln->labelcopy); + xln->instrcopy = asmpp_strdupnull_(ln->instrcopy); + xln->nparams = ln->nparams; + xln->commentcopy = asmpp_strdupnull_(ln->commentcopy); + xln->errorcopy = asmpp_strdupnull_(ln->errorcopy); + int i; + for (i = 0; i < xln->nparams; i++) { + ASMPP_COPYXLN_EXPAND(xln->paramtype[i],xln->paramcopy[i],xln->paramx[i],ln->paramtype[i],ln->paramcopy[i],ln->paramx[i],expandparams); + + } + return xln; +} + +bool asmdata_streq_(const char* a, const char* b); + +/* Returns true unless compilation is disabled by an #if or equivalent context. */ +bool asmpp_ifcompiling(asmpp_t* pp) { + asmpp_context_t* ctx = pp->context; + while (ctx != NULL) { + if (ctx->type == ASMPP_CONTEXT_IF_EXPAND && ctx->ifvalue == 0) { + return false; + } + ctx = ctx->next; + } + return true; +} + +int asmpp_expand(asmpp_t* pp, asmln_t* ln) { + if (pp->context != NULL && pp->context->type == ASMPP_CONTEXT_MACRO_COLLECT) { + //printf("GOT MACRO LINE %s\n", ln->instrcopy); + if (asmdata_streq_(ln->instrcopy, "%endmacro")) { + asmpp_exit(pp, pp->context); + return 0; + } + asmpp_lines_t* ll = pp->context->macro->lines; + while (ll != NULL && ll->next != NULL) { + ll = ll->next; + } + //printf("Found end\n"); + asmpp_lines_t* nl = malloc(sizeof(asmpp_lines_t)); + if (nl == NULL) { + return -1; + } + nl->line = asmpp_copyxln(pp, ln, false); + nl->next = NULL; + if (ll != NULL) { + ll->next = nl; + } else { + pp->context->macro->lines = nl; + } + //printf("Done\n"); + return 0; + } + if (asmdata_streq_(ln->instrcopy, "%endif")) { + asmpp_exit(pp, pp->context); + return 0; + } else if (!asmpp_ifcompiling(pp) && !asmdata_streq_(ln->instrcopy, "%else") && !asmdata_streq_(ln->instrcopy, "%elseif")) { + if (asmdata_streq_(ln->instrcopy, "%if")) { + asmpp_enter(pp, ASMPP_CONTEXT_IF_PARSEONLY, NULL); + } + return 0; + } else if (asmdata_streq_(ln->labelcopy, "%macro")) { + asmpp_macro_t* mac = asmpp_quickmacro(pp, asmpp_copyxln(pp, ln, false), NULL); + //printf("Got macro '%s'\n", ln->instrcopy); + asmpp_enter(pp, ASMPP_CONTEXT_MACRO_COLLECT, mac); + return 0; + } else if (asmdata_streq_(ln->labelcopy, "%def")) { + asmln_t* defln = asmpp_copyxln(pp, ln, true); + asmpp_def_t* def = asmpp_allocdef(pp, defln->paramtype[0], defln->paramcopy[0], defln->paramx[0]); + asmdata_map_set(pp->defs, defln->instrcopy, def); + return 0; + } + //dump_asmln(stdout, ln); + asmln_t* xln = asmpp_copyxln(pp, ln, true); + //dump_asmln(stdout, xln); + if (asmdata_streq_(xln->instrcopy, "%if")) { + uint64_t val; + char* err; + int sig; + if (asmpp_calc(pp, xln->paramtype[0], xln->paramcopy[0], xln->paramx[0], &val, &err, &sig)) { + //printf("Got if with value %llu\n", val); + } else { + printf("Calculation of the if factor failed: %s\n", err == NULL ? "(error string was not set)" : err); + } + asmpp_enter(pp, ASMPP_CONTEXT_IF_EXPAND, NULL); + pp->context->ifvalue = val; + if (pp->context->next != NULL) { + pp->context->locals = pp->context->next->locals; + } + return 0; + } else if (asmdata_streq_(xln->instrcopy, "%elseif")) { + uint64_t val; + char* err; + int sig; + if (pp->context->ifvalue != 0) { + return 0; + } else if (asmpp_calc(pp, xln->paramtype[0], xln->paramcopy[0], xln->paramx[0], &val, &err, &sig)) { + //printf("Got if with value %llu\n", val); + } else { + printf("Calculation of the if factor failed: %s\n", err == NULL ? "(error string was not set)" : err); + } + pp->context->ifvalue = val; + return 0; + } else if (asmdata_streq_(xln->instrcopy, "%else")) { + pp->context->ifvalue = (pp->context->ifvalue == 0) ? 1 : 0; + return 0; + } + asmpp_macro_t* m = (xln->instrcopy == NULL) ? NULL : asmpp_findmacro(pp, xln->instrcopy, xln->nparams); + int nexpanded = 0; + if (m != NULL) { + asmpp_context_t* ctx = asmpp_enter(pp, ASMPP_CONTEXT_MACRO_EXPAND, m); + if (m->systemf != NULL) { + asmpp_systemf_t sysf = m->systemf; + nexpanded = sysf(pp, xln); + } else { + ctx->locals = asmdata_map_new(23, &asmpp_mapdelete); + int argi; + for (argi = 0; argi < m->proto->nparams; argi++) { + if (m->proto->paramtype[argi] == ASMT_TOKENTYPE_OPENBR /*&& m->proto->paramx[argi]->lhstype == ASMT_TOKENTYPE_NAME && m->proto->paramx[argi]->rhstype == ASMT_TOKENTYPE_NAME && xln->paramtype[argi] == ASMT_TOKENTYPE_OPENBR*/) { + asmpp_def_t* deflhs = asmpp_allocdef(pp, xln->paramx[argi]->lhstype, xln->paramx[argi]->lhscopy, xln->paramx[argi]->lhsx); + asmpp_def_t* defop = asmpp_allocdef(pp, ASMT_TOKENTYPE_NAME, xln->paramx[argi]->opcopy, NULL); + asmpp_def_t* defrhs = asmpp_allocdef(pp, xln->paramx[argi]->rhstype, xln->paramx[argi]->rhscopy, xln->paramx[argi]->rhsx); + asmdata_map_set(ctx->locals, m->proto->paramx[argi]->lhscopy, deflhs); + //printf("Mapped '%s' to '%s'\n", m->proto->paramx[argi]->lhscopy, deflhs->value); + asmdata_map_set(ctx->locals, m->proto->paramx[argi]->opcopy, defop); + asmdata_map_set(ctx->locals, m->proto->paramx[argi]->rhscopy, defrhs); + } else { + asmpp_def_t* def = asmpp_allocdef(pp, xln->paramtype[argi], xln->paramcopy[argi], xln->paramx[argi]); + asmdata_map_set(ctx->locals, m->proto->paramcopy[argi] == NULL ? "?" : m->proto->paramcopy[argi], def); + } + } + asmpp_lines_t* l = m->lines; + + while (l != NULL) { + nexpanded += asmpp_expand(pp, l->line); + l = l->next; + } + } + asmpp_exit(pp, ctx); + } else { + asmpp_systemf_t outputf = pp->outputf; + nexpanded = outputf(pp, xln); + if (nexpanded < 0) { + return -1; + //printf("TODO: exit cleanly?\n"); + //exit(-1); + } + } + asmln_delete(xln); + return nexpanded; +} + +bool asmpp_binop(asmpp_t* pp, uint64_t lhsresult, int lhssig, char* op, uint64_t rhsresult, int rhssig, uint64_t* resultp, char** errp, int* signp) { + if (asmdata_streq_(op, "&&")) { + if (lhsresult && rhsresult) { + return true; + } else { + return false; + } + } else if (asmdata_streq_(op, "||")) { + if (lhsresult || rhsresult) { + return true; + } else { + return false; + } + } else { + *errp = "Unknown binary operator"; + return false; + } +} + +long long asmdata_atoll_(const char* a); +bool asmpp_calc(asmpp_t* pp, int t, char* str, asmlnx_t* x, uint64_t* resultp, char** errp, int* sigp) { + char* err = NULL; + int sig = 0; + uint64_t result = 0; + bool calculated = false; + + switch (t) { + case ASMT_TOKENTYPE_NUMBER: + result = asmdata_atoll_(str); // This handles hex/binary, TODO: foats + printf("Number '%s' -> %d (0x%x 0b%b)\n", str, result, result, result); + calculated = true; + break; + case ASMT_TOKENTYPE_OPENBR: + uint64_t lhsresult; + char* lhserror; + int lhssig; + bool lhscalc = asmpp_calc(pp, x->lhstype, x->lhscopy, x->lhsx, &lhsresult, &lhserror, &lhssig); + if (!lhscalc) { + err = lhserror; + goto retfromcalc; + } + uint64_t rhsresult; + char* rhserror; + int rhssig; + bool rhscalc = asmpp_calc(pp, x->rhstype, x->rhscopy, x->rhsx, &rhsresult, &rhserror, &rhssig); + if (!rhscalc) { + err = rhserror; + goto retfromcalc; + } + calculated = asmpp_binop(pp, lhsresult, lhssig, x->opcopy, rhsresult, rhssig, &result, &err, &sig); + break; + default: + calculated = false; + } + + retfromcalc: + if (resultp != NULL) { + *resultp = result; + } + if (errp != NULL) { + *errp = err; + } + if (sigp != NULL) { + *sigp = sig; + } + return calculated; +} diff --git a/asmpp.h b/asmpp.h new file mode 100644 index 0000000..6cc7134 --- /dev/null +++ b/asmpp.h @@ -0,0 +1,76 @@ +#ifndef ASMPP_H +#define ASMPP_H + +#include "asmln.h" +#include "asmdata.h" +#include +#include + +typedef struct asmpp_def asmpp_def_t; +typedef struct asmpp_lines asmpp_lines_t; +typedef struct asmpp_macro asmpp_macro_t; +typedef struct asmpp_context asmpp_context_t; +typedef struct asmpp asmpp_t; + +// Either outputs a line or handles some builtin macro +typedef int (*asmpp_systemf_t)(asmpp_t* pp, asmln_t* ln); + +struct asmpp_def { + int t; + char* value; + asmlnx_t* x; +}; + +struct asmpp_lines { + asmln_t* line; + asmpp_lines_t* next; +}; + +struct asmpp_macro { + asmln_t* proto; + asmpp_systemf_t systemf; + asmpp_lines_t* lines; + asmpp_macro_t* next; +}; + +#define ASMPP_CONTEXT_OUTER 0 +#define ASMPP_CONTEXT_MACRO_EXPAND 1 +#define ASMPP_CONTEXT_MACRO_COLLECT 2 +#define ASMPP_CONTEXT_IF_EXPAND 3 +#define ASMPP_CONTEXT_IF_PARSEONLY 4 +struct asmpp_context { + //bool collecting; + int type; + uint64_t ifvalue; + asmpp_macro_t* macro; + asmdata_map_t* locals; + asmpp_context_t* next; +}; + +struct asmpp { + void* udata; + asmpp_systemf_t outputf; + asmdata_map_t* defs; + asmdata_map_t* macros; + asmpp_context_t* context; +}; + +asmpp_t* asmpp_new(asmpp_systemf_t outputf, void* udata); +void asmpp_delete(asmpp_t* pp); + +asmpp_context_t* asmpp_enter(asmpp_t* pp, int type, asmpp_macro_t* macro); +void asmpp_exit(asmpp_t* pp, asmpp_context_t* context); + +asmpp_macro_t* asmpp_quickmacro(asmpp_t* pp, asmln_t* proto, asmpp_systemf_t systemf); + +asmpp_def_t* asmpp_finddef(asmpp_t* pp, char* name); + +asmpp_macro_t* asmpp_findmacro(asmpp_t* pp, char* name, int nparams); + +int asmpp_expand(asmpp_t* pp, asmln_t* ln); + +bool asmpp_binop(asmpp_t* pp, uint64_t lhsresult, int lhssig, char* op, uint64_t rhsresult, int rhssig, uint64_t* resultp, char** errp, int* signp); +bool asmpp_calc(asmpp_t* pp, int t, char* str, asmlnx_t* x, uint64_t* resultp, char** errp, int* signp); + +// From idndef at top of file: +#endif diff --git a/assemble.c b/assemble.c new file mode 100644 index 0000000..90a7d0f --- /dev/null +++ b/assemble.c @@ -0,0 +1,478 @@ +#include "asmln.h" +#include "asmdata.h" +#include "asmpp.h" +#ifdef OLD_CODE +#include "asmgeneric_old.h" +#include "asmgen1.h" +#endif +#include +#include +#include + +#include "asmln.c" +#include "asmdata.c" +#include "asmpp.c" + +void usage(int argc, char** argv, int argi) { + fprintf(stderr, "TODO: USAGE\n"); +} + +bool assemble_line(void* assembler, const char* line) { + return true; +} + +void dump_indent(FILE* output, const char* indentString, int indent) { + while (indent-- > 0) { + fprintf(output, "%s", indentString); + } +} + +void dump_x(FILE* output, int32_t type, asmlnx_t* x, const char* indentString, int indent); +void dump_x(FILE* output, int32_t type, asmlnx_t* x, const char* indentString, int indent) { + if (type != ASMT_TOKENTYPE_OPENBR || x == NULL) { + return; + } + + dump_indent(output, indentString, indent); + fprintf(output, "[LHS]\t[type %d]\t'%s'\n", x->lhstype, x->lhscopy); + dump_x(output, x->lhstype, x->lhsx, indentString, indent + 1); + + dump_indent(output, indentString, indent); + fprintf(output, "[OP]\t'%s'\n", x->opcopy); + + dump_indent(output, indentString, indent); + fprintf(output, "[RHS]\t[type %d]\t'%s'\n", x->rhstype, x->rhscopy); + dump_x(output, x->rhstype, x->rhsx, indentString, indent + 1); +} + +void dump_asmln(FILE* output, asmln_t* asmln) { + if (asmln == NULL) { + fprintf(output, "ERROR: NULL asmln\n"); + } + if (asmln->errorcopy != NULL) { + fprintf(output, "ERROR: %s\n", asmln->errorcopy); + } + fprintf(output, "%s:\t%s\t[%d params]\t; %s\n", + asmln->labelcopy == NULL ? "[no label]" : asmln->labelcopy, + asmln->instrcopy == NULL ? "[no instr]" : asmln->instrcopy, + asmln->nparams, + asmln->commentcopy == NULL ? "[no comment]" : asmln->commentcopy); + int32_t i; + for (i = 0; i < asmln->nparams; i++) { + fprintf(output, "\t[param %d]\t[type %d]\t'%s'\n", i, asmln->paramtype[i], asmln->paramcopy[i]); + dump_x(output, asmln->paramtype[i], asmln->paramx[i], "\t", 2); + } +} + +int assemble_warn(asmpp_t* pp, asmln_t* asmln) { + dump_asmln(stderr, asmln); + return 0; +} + +int assemble_include(asmpp_t* pp, asmln_t* asmln) { + char* filename = asmln->paramcopy[0]; + FILE* input = fopen(filename, "r"); + + if (input == NULL) { + fprintf(stderr, "Bad filename?"); + return -1; + } + + int32_t lnum = 1; + int xnum = 0; + int32_t buffermax = 1024 * 1024; + char* buffer = calloc(buffermax, 1); + if (buffer == NULL) { + fprintf(stderr, "Out of memory?"); + return -1; + } + + const char* line; + while ((line = fgets(buffer, buffermax, input)) != NULL) { + /* First parse the line.*/ + asmln_t* asmln = asmln_new(line); + /* Now check for parse errors. */ + if (asmln == NULL) { + fprintf(stderr, "asmln_new failed entirely!"); + return false; + } + if (asmln->errorcopy != NULL) { + dump_asmln(stdout, asmln); + return false; + } + int n = asmpp_expand(pp, asmln); + if (n < 0) { + return n; + } + xnum += n; + lnum++; + } + + fclose(input); + + return xnum; +} + +int assemble_ppout(asmpp_t* pp, asmln_t* asmln) { + if (asmln == NULL) { + fprintf(stderr, "preprocessor failed entirely!"); + return -1; + } + + if (asmln->errorcopy != NULL && asmln->errorcopy[0] != 0) { + dump_asmln(stdout, asmln); + return -1; + } + /* If it's a plain data line, just assemble it: */ + if (asmdata_isvalidasmln(pp->udata, asmln)) { + bool tmp = asmdata_asmln(pp->udata, asmln); + //printf("Done.\n"); + if (!tmp) { + fprintf(stderr, "Failed to interpret plain data line:\n"); + //fprintf(stderr, "Around line %d:\t", lnum); + dump_asmln(stderr, asmln); + return -1; + } + return 1; + } else { + fprintf(stderr, "This instruction can't be interpreted:\n"); + //fprintf(stderr, "Around line %d:\t", lnum); + dump_asmln(stderr, asmln); + return -1; + } +} + +#define MODE_DATA 0 +#define MODE_GEN1 1 +#define MODE_GEN1X 2 +#define MODE_GENERIC 3 +#define MODE_PP 4 + + +bool assemble_input(void* assembler, FILE* input, int32_t mode, const char* modestr) { + int32_t lnum = 1; + int32_t buffermax = 1024 * 1024; + char* buffer = calloc(buffermax, 1); + if (buffer == NULL) { + fprintf(stderr, "Out of memory?"); + return false; + } + + asmpp_t* pp = asmpp_new(&assemble_ppout, assembler); + asmpp_quickmacro(pp, asmln_new("include x"), &assemble_include); + asmpp_quickmacro(pp, asmln_new("warn x"), &assemble_warn); + + if (modestr != NULL) { + char* modebuf = calloc(20+strlen(modestr),1); + strcat(modebuf, "include "); + strcat(modebuf, modestr); + strcat(modebuf, ".inc"); + assemble_include(pp, asmln_new(modebuf)); + } + + const char* line; + while ((line = fgets(buffer, buffermax, input)) != NULL) { + /* First parse the line.*/ + asmln_t* asmln = asmln_new(line); + /* Now check for parse errors. */ + if (asmln == NULL) { + fprintf(stderr, "asmln_new failed entirely!"); + return false; + } + if (asmln->errorcopy != NULL) { + dump_asmln(stdout, asmln); + return false; + } + if (mode == MODE_PP) { + int n = asmpp_expand(pp, asmln); + if (n < 0) { + fprintf(stderr, "Failed to interpret preprocessed data around line %d:\t", lnum); + //dump_asmln(stderr, asmln); + //asmln_delete(asmln); + free(buffer); + return false; + } + } + /* If it's a plain data line, just assemble it: */ + else if (asmdata_isvalidasmln(assembler, asmln)) { + bool tmp = asmdata_asmln(assembler, asmln); + //printf("Done.\n"); + if (!tmp) { + fprintf(stderr, "Failed to interpret plain data line:\n"); + fprintf(stderr, "Around line %d:\t", lnum); + dump_asmln(stderr, asmln); + asmln_delete(asmln); + free(buffer); + return false; + } + } + #ifdef OLD_CODE + else if (mode == MODE_GEN1 || mode == MODE_GEN1X) { + bool tmp = asmgen1_asmln(assembler, asmln); + //printf("Done.\n"); + if (!tmp) { + fprintf(stderr, "Failed to interpret generic instruction line:\n"); + fprintf(stderr, "Around line %d:\t", lnum); + dump_asmln(stderr, asmln); + asmln_delete(asmln); + free(buffer); + return false; + } + } + else if (mode == MODE_GENERIC) { + bool tmp = asmgeneric_asmln(assembler, asmln); + //printf("Done.\n"); + if (!tmp) { + fprintf(stderr, "Failed to interpret generic instruction line:\n"); + fprintf(stderr, "Around line %d:\t", lnum); + dump_asmln(stderr, asmln); + asmln_delete(asmln); + free(buffer); + return false; + } + } + #endif + else { + fprintf(stderr, "This instruction can't be interpreted:\n"); + fprintf(stderr, "Around line %d:\t", lnum); + dump_asmln(stderr, asmln); + asmln_delete(asmln); + free(buffer); + return false; + } + + asmln = asmln_delete(asmln); + memset(buffer, 0, buffermax); + lnum++; + } + + asmpp_delete(pp); + free(buffer); + return true; +} + +bool assemble(void* assembler, const char* filename, int32_t mode, const char* modestr) { + + FILE* input = fopen(filename, "r"); + + if (input == NULL) { + fprintf(stderr, "Bad filename?"); + return false; + } + + if (!assemble_input(assembler, input, mode, modestr)) { + fclose(input); + return false; + } + + fclose(input); + + return true; +} + +bool produce_section(void* assembler, int sectionnum, FILE* output, bool readable) { + asmdata_t* asmdata = assembler; + asmdata_section_t* section = asmdata->sections[sectionnum]; + if (readable) { + fprintf(output, "\t[base=0x%016llX reserved=0x%016llX filled=0x%08llX buffered=0x%08llX]\n", (long long)(section->virtualoffset), (long long)(section->reservedsize), (long long)(section->bufferfilled), (long long)(section->buffersize)); + int32_t i = 0; + while (i < section->bufferfilled) { + if (i > 0 && i % 16 == 0) { + fprintf(output, "\n"); + } + if (i % 16 == 0) { + fprintf(output, "\tS%04d+0x%016x:\t", sectionnum, i); + } + fprintf(output, "%02x ", section->buffer[i]); + i++; + } + fprintf(output, "\n\n"); + } + return true; +} + +bool produce_output(void* assembler, FILE* output, bool readable, bool header, int32_t pagesize) { + asmdata_t* asmdata = assembler; + int32_t sectionnum; + int32_t offset = 0; + if (header && readable) { + fprintf(output, "SECTIONS (%d):\n", asmdata->nsections); + for (sectionnum = 0; sectionnum < asmdata->nsections; sectionnum++) { + fprintf(output, "\tSECTION %04d: '%s'\n", sectionnum, asmdata->sections[sectionnum]->namecopy); + } + for (sectionnum = 0; sectionnum < asmdata->nsections; sectionnum++) { + fprintf(output, "[SECTION %04d: '%s']\n", sectionnum, asmdata->sections[sectionnum]->namecopy); + if (!produce_section(assembler, sectionnum, output, readable)) { + return false; + } + } + fprintf(output, "\n"); + } + else if (header) { + asmdata_section_t* hdrsec = asmdata_findsection(asmdata, "asmdata.fileheader", false); + if (hdrsec == NULL || hdrsec->buffer == NULL) { + return false; + } + if (fwrite(hdrsec->buffer, 1, hdrsec->bufferfilled, output) != hdrsec->bufferfilled) { + return false; + } + offset += hdrsec->bufferfilled; + while ((offset % pagesize) != 0) { + char c = 0; + if (fwrite(&c, 1, 1, output) != 1) { + return false; + } + offset++; + } + for (sectionnum = 0; sectionnum < asmdata->nsections; sectionnum++) { + asmdata_section_t* sec = asmdata->sections[sectionnum]; + fprintf(stderr, "Writing section '%s' (#%d) at file offset %d\n", sec->namecopy, sectionnum, offset); + if (sec->bufferfilled > 0 && fwrite(sec->buffer, 1, sec->bufferfilled, output) != sec->bufferfilled) { + return false; + } + offset += sec->bufferfilled; + while ((offset % pagesize) != 0) { + char c = 0; + if (fwrite(&c, 1, 1, output) != 1) { + return false; + } + offset++; + } + } + // We're done now + return true; + } + if (readable) { + fprintf(output, "REFERENCES (%d):\n", asmdata->nreferences); + int32_t refnum; + for (refnum = 0; refnum < asmdata->nreferences; refnum++) { + fprintf(output, "\tREFERENCE %04d -> SYMBOL %04d\n", refnum, asmdata->references[refnum].symbolindex); + } + fprintf(output, "\n"); + fprintf(output, "SYMBOLS (%d):\n", asmdata->nsymbols); + int32_t symnum; + for (symnum = 0; symnum < asmdata->nsymbols; symnum++) { + fprintf(output, "\tSYMBOL %04d -> '%s'\n", symnum, asmdata->symbols[symnum].namecopy); + } + fprintf(output, "\n"); + } + return true; +} + +int main(int argc, char** argv) { + //argc = 4; + //argv = (char* []){ "test", "--stdout", "--readable", "--stdin"/*"--ast-only","--input","C:\\Users\\Zak\\source\\repos\\ZCC\\Debug\\test2.c"*/ }; + int argi = 1; + FILE* output = NULL; + void* assembler = asmdata_new(); + bool somethingtodo = false; + bool readable = false; + bool finalise = true; + bool produceheader = true; + + int32_t pagesize = 1024; + const char* hint1 = "gen1"; + const char* hint2 = NULL; + int32_t inthint = 0; + int32_t mode = MODE_PP; + const char* modestr = NULL; + + while (argi < argc) { + if (!strcmp(argv[argi], "--usage")) { + usage(argc, argv, argi); + return 0; + } + else if (!strcmp(argv[argi], "--mode")) { + if (output != NULL || argi + 1 >= argc) { + usage(argc, argv, argi); + return -1; + } + argi++; + modestr = argv[argi]; + } + else if (!strcmp(argv[argi], "--output") || (argv[argi][0] == '-' && argv[argi][1] == 'o' && argv[argi][2] == 0)) { + if (output != NULL || argi + 1 >= argc) { + usage(argc, argv, argi); + return -1; + } + argi++; + output = fopen(argv[argi], "w+"); + if (output == NULL) { + fprintf(stderr, "ERROR: Failed to open output file '%s'\n", argv[argi]); + return -1; + } + } + else if (argv[argi][0] == '-' && argv[argi][1] == 'o') { + if (output != NULL) { + usage(argc, argv, argi); + return -1; + } + output = fopen(argv[argi]+2, "w+"); + if (output == NULL) { + fprintf(stderr, "ERROR: Failed to open output file '%s'\n", argv[argi]+2); + return -1; + } + } + else if (!strcmp(argv[argi], "--stdin")) { + fprintf(stderr, "NOTE: Assembling from standard input\n"); + if (!assemble_input(assembler, stdin, mode, modestr)) { + fprintf(stderr, "ERROR: Failed to assemble from standard input\n"); + return -1; + } + somethingtodo = true; + } + else if (!strcmp(argv[argi], "--stdout")) { + fprintf(stderr, "NOTE: Dumping to standard output (use with --readable to easily inspect output)\n"); + output = stdout; + } + else if (!strcmp(argv[argi], "--readable")) { + fprintf(stderr, "NOTE: Will attempt to produce 'readable' output\n"); + readable = true; + } + else if (!strcmp(argv[argi], "--nofinalise")) { + fprintf(stderr, "NOTE: Output will not be finalised\n"); + finalise = false; + } + else if (!strcmp(argv[argi], "--noheader")) { + fprintf(stderr, "NOTE: Output will not include file header\n"); + produceheader = false; + } + else { + if (!assemble(assembler, argv[argi], mode, modestr)) { + fprintf(stderr, "ERROR: Failed to assemble '%s'\n", argv[argi]); + return -1; + } + somethingtodo = true; + } + argi++; + } + + if (!somethingtodo || output == NULL) { + fprintf(stderr, "ERROR: Nothing to do? Please define an input and an output!\n"); + usage(argc, argv, 0); + return -1; + } + + if (finalise && !asmdata_finalise(assembler)) { + fprintf(stderr, "ERROR: Finalisation failed.\n"); + return -1; + } + + if (produceheader && !asmdata_produceheader(assembler, pagesize, hint1, hint2, inthint)) { + fprintf(stderr, "ERROR: Failed to produce header\n"); + } + + if (!produce_output(assembler, output, readable, produceheader, pagesize)) { + fprintf(stderr, "ERROR: Failed to produce output\n"); + return -1; + } + + if (output != stdout) { + fclose(output); + } + output = NULL; + + fprintf(stderr, "FINISHED.\n"); + + return 0; +} diff --git a/rv64.inc b/rv64.inc new file mode 100644 index 0000000..97e74ac --- /dev/null +++ b/rv64.inc @@ -0,0 +1,276 @@ +%def: x0 0 +%def: x1 1 +%def: x2 2 +%def: x3 3 +%def: x4 4 +%def: x5 5 +%def: x6 6 +%def: x7 7 +%def: x8 8 +%def: x9 9 +%def: x10 10 +%def: x11 11 +%def: x12 12 +%def: x13 13 +%def: x14 14 +%def: x15 15 +%def: x16 16 +%def: x17 17 +%def: x18 18 +%def: x19 19 +%def: x20 20 +%def: x21 21 +%def: x22 22 +%def: x23 23 +%def: x24 24 +%def: x25 25 +%def: x26 26 +%def: x27 27 +%def: x28 28 +%def: x29 29 +%def: x30 30 +%def: x31 31 + +%def: zero x0 +%def: ra x1 +%def: sp x2 +%def: gp x3 +%def: tp x4 +%def: t0 x5 +%def: t1 x6 +%def: t2 x7 +%def: s0 x8 +%def: s1 x9 +%def: a0 x10 +%def: a1 x11 +%def: a2 x12 +%def: a3 x13 +%def: a4 x14 +%def: a5 x15 +%def: a6 x16 +%def: a7 x17 + +%def: fp s0 + +%macro: rv.rtype op, dst, fn3, src1, src2, fn7 + data32 (((((op | (dst << 7)) | (fn3 << 12)) | (src1 << 15)) | (src2 << 20)) | (fn7 << 25)) +%endmacro + +%macro: rv.itype op, dst, fn3, src1, imm + data32 ((((op | (dst << 7)) | (fn3 << 12)) | (src1 << 15)) | (imm << 20)) +%endmacro + +%macro: rv.stype op, imm04, fn3, src1, src2, imm511 + data32 (((((op | (imm04 << 7)) | (fn3 << 12)) | (src1 << 15)) | (src2 << 20)) | (imm511 << 25)) +%endmacro + +%macro: rv.stype op, imm, fn3, src1, src2 + rv.stype op, (imm & 0x1F), fn3, src1, src2, (imm >> 5) +%endmacro + +%macro: rv.utype op, dst, imm + data32 ((op | (dst << 7)) | (imm << 12)) +%endmacro + +%macro: rv.btype op, imm, fn3, src1, src2 + rv.stype op, ((((imm >> 12) << 11) | (imm & 0x3FE)) | ((imm >> 11) & 1)), fn3, src1, src2 +%endmacro + +%macro: rv.jtype op, dst, imm + rv.utype op, dst, ((((imm & (1 << 20)) | (((imm >> 1) & 0x3FF) << 10)) | (((imm >> 11) & 1) << 8)) | ((imm >> 12) & 0xFF)) +%endmacro + +%def: OP_IMM 0b0010011 +%def: OP_ALU 0b0110011 + +%def: FN3_ADD 0b000 +%def: FN3_SLL 0b001 +%def: FN3_SLT 0b010 +%def: FN3_SLTU 0b011 +%def: FN3_XOR 0b100 +%def: FN3_SRL 0b101 +%def: FN3_OR 0b110 +%def: FN3_AND 0b111 + +%macro: addi dst, src, n + rv.itype OP_IMM, dst, FN3_ADD, src, n +%endmacro +%macro: slti dst, src, n + rv.itype OP_IMM, dst, FN3_SLT, src, n +%endmacro +%macro: sltiu dst, src, n + rv.itype OP_IMM, dst, FN3_SLTU, src, n +%endmacro +%macro: xori dst, src, n + rv.itype OP_IMM, dst, FN3_XOR, src, n +%endmacro +%macro: ori dst, src, n + rv.itype OP_IMM, dst, FN3_OR, src, n +%endmacro +%macro: andi dst, src, n + rv.itype OP_IMM, dst, FN3_AND, src, n +%endmacro +%macro: slli dst, src, n + rv.itype OP_IMM, dst, FN3_SLL, src, n +%endmacro +%macro: srli dst, src, n + rv.itype OP_IMM, dst, FN3_SRL, src, n +%endmacro + +%macro: add dst, src1, src2 + rv.stype OP_ALU, dst, FN3_ADD, src1, src2, 0b0000000 +%endmacro +%macro: sub dst, src1, src2 + rv.stype OP_ALU, dst, FN3_ADD, src1, src2, 0b0100000 +%endmacro +%macro: sll dst, src1, src2 + rv.stype OP_ALU, dst, FN3_SLL, src1, src2, 0b0000000 +%endmacro +%macro: srl dst, src1, src2 + rv.stype OP_ALU, dst, FN3_SRL, src1, src2, 0b0000000 +%endmacro +%macro: sra dst, src1, src2 + rv.stype OP_ALU, dst, FN3_SRL, src1, src2, 0b0100000 +%endmacro +%macro: slt dst, src1, src2 + rv.stype OP_ALU, dst, FN3_SLT, src1, src2, 0b0000000 +%endmacro +%macro: sltu dst, src1, src2 + rv.stype OP_ALU, dst, FN3_SLTU, src1, src2, 0b0000000 +%endmacro +%macro: or dst, src1, src2 + rv.stype OP_ALU, dst, FN3_OR, src1, src2, 0b0000000 +%endmacro +%macro: and dst, src1, src2 + rv.stype OP_ALU, dst, FN3_AND, src1, src2, 0b0000000 +%endmacro +%macro: xor dst, src1, src2 + rv.stype OP_ALU, dst, FN3_XOR, src1, src2, 0b0000000 +%endmacro + +%macro: snez dst, src2 + sltu dst, zero, src2 +%endmacro + +%macro: lb dst, off(src) + rv.itype 0b0000011, dst, 0b000, src, off +%endmacro +%macro: lbu dst, off(src) + rv.itype 0b0000011, dst, 0b100, src, off +%endmacro +%macro: lh dst, off(src) + rv.itype 0b0000011, dst, 0b001, src, off +%endmacro +%macro: lhu dst, off(src) + rv.itype 0b0000011, dst, 0b101, src, off +%endmacro +%macro: lw dst, off(src) + rv.itype 0b0000011, dst, 0b010, src, off +%endmacro +%macro: ld dst, off(src) + rv.itype 0b0000011, dst, 0b011, src, off +%endmacro + +%macro: sb src1, off(src2) + rv.stype 0b0100011, off, 0b000, src2, src1 +%endmacro +%macro: sh src1, off(src2) + rv.stype 0b0100011, off, 0b001, src2, src1 +%endmacro +%macro: sw src1, off(src2) + rv.stype 0b0100011, off, 0b010, src2, src1 +%endmacro +%macro: sd src1, off(src2) + rv.stype 0b0100011, off, 0b011, src2, src1 +%endmacro + + +%macro: jal dst, imm + rv.jtype 0b1101111, dst, imm +%endmacro +%macro: jalr dst, src, imm + rv.itype 0b1100111, dst, 0b000, src, imm +%endmacro +%macro: jalr src + jalr x1, src, 0 +%endmacro +%macro: j imm + jal zero, imm +%endmacro + +%macro: beq src1, src2, imm + rv.btype 0b1100011, imm, 0b000, src1, src2 +%endmacro +%macro: bne src1, src2, imm + rv.btype 0b1100011, imm, 0b001, src1, src2 +%endmacro + +%macro: beqz src, imm + beq src, zero, imm +%endmacro + +%macro: sltiu dst, src, imm + rv.itype 0b0010011, dst, 0b011, src, imm +%endmacro +%macro: seqz dst, src + sltiu dst, src, 1 +%endmacro + +%macro: lui dst, imm + rv.utype 0b0110111, dst, imm +%endmacro + +%macro: li dst, imm + lui dst, (imm >> 12) + addi dst, dst, (imm & 0xFFF) +%endmacro +%macro: la dst, imm + lui dst, (imm >> 12) + addi dst, dst, (imm & 0xFFF) +%endmacro + +%macro: call imm + jal x1, imm +%endmacro +%macro: ret + jalr x0, x1, 0 +%endmacro + +%macro: mv dst, src + addi dst, src, 0 +%endmacro + +# RV32M +%macro: mul dst, src1, src2 + rv.rtype 0b0110011, dst, 0b000, src1, src2, 0b0000001 +%endmacro +%macro: div dst, src1, src2 + rv.rtype 0b0110011, dst, 0b100, src1, src2, 0b0000001 +%endmacro +%macro: divu dst, src1, src2 + rv.rtype 0b0110011, dst, 0b101, src1, src2, 0b0000001 +%endmacro +%macro: rem dst, src1, src2 + rv.rtype 0b0110011, dst, 0b110, src1, src2, 0b0000001 +%endmacro +%macro: remu dst, src1, src2 + rv.rtype 0b0110011, dst, 0b111, src1, src2, 0b0000001 +%endmacro + +# RV64I +%macro: addiw dst, src, imm + rv.itype 0b0011011, dst, FN3_ADD, src, imm +%endmacro + +%macro: sext.w dst, src + addiw dst, src, 0 +%endmacro + +;rv.itype OP_IMM, 8, FN3_ADD, 8, 8 +;addi 8, 8, 8 +;ld dst, 8(8) + +%macro: .global n + data.symbol n, 4096 +%endmacro +