Initial commit of the new version of my assembler, complete with macro definitions for RV64 instructions

This commit is contained in:
Zak Yani Star Fenton 2025-06-05 02:50:01 +10:00
parent 8b549c70e3
commit a79c7ffaf2
8 changed files with 3202 additions and 0 deletions

897
asmdata.c Normal file
View File

@ -0,0 +1,897 @@
#include "asmdata.h"
#include <stdlib.h>
//#include <stdio.h> //Just for debuging. TODO: Remove.
static int32_t asmdata_strlen_(const char* str) {
if (str == NULL) {
return 0;
}
int32_t l = 0;
while (str[l] != 0) {
l++;
}
return l;
}
static const char* asmdata_strndup_(const char* str, int maxlen) {
char* result = calloc(maxlen + 1, 1);
if (result != NULL && str != NULL) {
int32_t i;
for (i = 0; i < maxlen; i++) {
if (str[i] == 0) {
return (const char*)result;
}
result[i] = str[i];
}
}
return (const char*)result;
}
char* asmdata_strdup_(const char* str) {
/*if (str == NULL) {
return NULL;
}*/
return asmdata_strndup_(str, asmdata_strlen_(str));
}
bool asmdata_streq_(const char* a, const char* b) {
if (a == b) {
return true;
}
int32_t la = asmdata_strlen_(a);
int32_t lb = asmdata_strlen_(b);
if (la != lb) {
return false;
}
int32_t i;
for (i = 0; i < la; i++) {
if (a[i] != b[i]) {
return false;
}
}
return true;
}
int asmdata_atoll_val_(char a, int r) {
switch (a) {
case '0':
return 0;
case '1':
return 1;
case '2':
return (r >= 2) ? 2 : -1;
case '3':
return (r >= 3) ? 3 : -1;
case '4':
return (r >= 4) ? 4 : -1;
case '5':
return (r >= 5) ? 5 : -1;
case '6':
return (r >= 6) ? 6 : -1;
case '7':
return (r >= 7) ? 7 : -1;
case '8':
return (r >= 8) ? 8 : -1;
case '9':
return (r >= 9) ? 9 : -1;
case 'a':
case 'A':
return (r >= 0xA) ? 0xA : -1;
case 'b':
case 'B':
return (r >= 0xB) ? 0xB : -1;
case 'c':
case 'C':
return (r >= 0xC) ? 0xC : -1;
case 'd':
case 'D':
return (r >= 0xD) ? 0xD : -1;
case 'e':
case 'E':
return (r >= 0xE) ? 0xE : -1;
case 'f':
case 'F':
return (r >= 0xF) ? 0xF : -1;
default:
return -1;
}
}
long long asmdata_atoll_r_(const char* a, int r) {
long long result = 0;
int v = -1;
while (*a != 0 && (v = asmdata_atoll_val_(*a, r)) >= 0) {
result *= r;
result += v;
a++;
}
return result;
}
long long atoll(const char*a);
long long asmdata_atoll_(const char* a) {
if (a == NULL) {
return 0;
}
if (a[0] == '0' && a[1] == 'x') {
return asmdata_atoll_r_(a+2, 16);
} else if (a[0] == '0' && a[1] == 'b') {
return asmdata_atoll_r_(a+2, 2);
} else {
return atoll(a);
}
}
asmdata_map_t* asmdata_map_new(int n, asmdata_iterf_t deletef) {
asmdata_map_t* map = malloc(sizeof(asmdata_map_t));
if (map == NULL) {
return NULL;
}
map->table = calloc(n, sizeof(void*));
if (map->table == NULL) {
free(map);
return NULL;
}
map->ntable = n;
map->deletef = deletef;
//TODO map->udata = 0;
return map;
}
int asmdata_map_hash(char* key) {
int i = 0;
int h = 456123;
while (key[i] != 0) {
h = (h * 33) + key[i];
i++;
}
return h & 0x0FFFFFFF;
}
void asmdata_map_set(asmdata_map_t* map, char* key, void* value) {
int hash = asmdata_map_hash(key);
int idx = hash % map->ntable;
asmdata_mapentry_t* e = map->table[idx];
while (e != NULL) {
if (e->hash == hash && asmdata_streq_(key, e->key)) {
asmdata_iterf_t deletef = map->deletef;
if (deletef != NULL) {
deletef(map, e->key, e->value);
}
e->key = key;
e->value = value;
return;
}
e = e->next;
}
// If not present, create a new one.
e = malloc(sizeof(asmdata_mapentry_t));
if (e == NULL) {
// TODO: Report fatal error.
}
e->hash = hash;
e->key = key;
e->value = value;
e->next = map->table[idx];
map->table[idx] = e;
}
void* asmdata_map_get(asmdata_map_t* map, char* key) {
int hash = asmdata_map_hash(key);
asmdata_mapentry_t* e = map->table[hash % map->ntable];
while (e != NULL) {
if (e->hash == hash && asmdata_streq_(key, e->key)) {
return e->value;
}
e = e->next;
}
return NULL;
}
void asmdata_map_delete(asmdata_map_t* map) {
asmdata_iterf_t deletef = map->deletef;
int i;
for (i = 0; i < map->ntable; i++) {
asmdata_mapentry_t* e = map->table[i];
while (e != NULL) {
//if (deletef != NULL) {
// deletef(map, e->key, e->value);
//}
void* oldptr = e;
e = e->next;
free(e);
}
}
free(map);
}
asmdata_section_t* asmdata_section_new(const char* name) {
asmdata_section_t* result = calloc(1, sizeof(asmdata_section_t));
if (result != NULL) {
result->namecopy = asmdata_strdup_(name);
}
return result;
}
bool asmdata_section_reserveextra(asmdata_section_t* section, int64_t nbytes, bool willfill) {
int32_t granularity = 1024; // Resize the buffer about kilobyte or so at a time to prevent unnecessary realloc calls
/* Firstly, the reserved total needs to be incremented whether filled or not. */
section->reservedsize += nbytes;
/* If we're not filling it, we can just leave it on the reserved total (not necessarily any need to allocate/use extra memory!). */
if (!willfill) {
return true;
}
/* Make sure it fits in 32 bits (so we don't have to worry whether size_t is 32- or 64- bits) and is below the maximum. */
if (((int64_t)((int32_t)(section->reservedsize))) != section->reservedsize || section->reservedsize > ASMDATA_MAXFILLED) {
return false;
}
if (section->reservedsize > section->buffersize) {
section->buffersize = (int32_t)(section->reservedsize); // Already checked that it can fit in 32 bits if filled
while (section->buffersize % granularity != 0) {
section->buffersize++;
}
if (section->buffer == NULL) {
section->buffer = calloc(1, section->buffersize);
}
else {
section->buffer = realloc(section->buffer, section->buffersize);
}
if (section->buffer == NULL) {
return false;
}
}
section->bufferfilled = (int32_t)(section->reservedsize);
return true;
}
void* asmdata_section_delete(asmdata_section_t* section) {
if (section != NULL) {
if (section->namecopy != NULL) {
free(section->namecopy);
section->namecopy = NULL;
}
if (section->buffer != NULL) {
free(section->buffer);
section->buffer = NULL;
section->bigendian = false;
section->bufferfilled = 0;
section->buffersize = 0;
section->reservedsize = 0;
section->virtualoffset = 0;
}
free(section);
}
return NULL;
}
asmdata_t* asmdata_new() {
asmdata_t* result = calloc(1, sizeof(asmdata_t));
if (result != NULL) {
result->extsyntax = true;
result->sections = calloc(ASMDATA_MAXSECTIONS, sizeof(asmdata_section_t*));
if (!result->sections) {
free(result);
return NULL;
}
result->symbols = calloc(ASMDATA_MAXSYMBOLS, sizeof(asmdata_symbol_t));
if (!result->sections) {
free(result->sections);
free(result);
return NULL;
}
result->references = calloc(ASMDATA_MAXREFERENCES, sizeof(asmdata_reference_t));
if (!result->sections) {
free(result->sections);
free(result->references);
free(result);
return NULL;
}
}
return result;
}
void* asmdata_delete(asmdata_t* asmdata) {
if (asmdata != NULL) {
asmdata->activesection = NULL;
int32_t i;
for (i = 0; i < asmdata->nsections; i++) {
asmdata->sections[i] = asmdata_section_delete(asmdata->sections[i]);
}
asmdata->nsections = 0;
free(asmdata);
}
return NULL;
}
bool asmdata_finalise(asmdata_t* asmdata) {
/* Finalisation will fail if there's any hint that the code has already been finalised (even if it was done manually by defining symbol
* table sections in assembly code).
*/
if (asmdata->finalised || asmdata_hassection(asmdata, "asmdata.strings") || asmdata_hassection(asmdata, "asmdata.symbols") || asmdata_hassection(asmdata, "asmdata.references")) {
return false;
}
/* The three linkage sections are created in "optimised" order: You generally need to have read the symbols section to make sense of the
* references section, so symbols are written before references. Strings (and maybe later extra debug info) are written last because they
* need not actually be read in many cases.
*/
if (asmdata_selectsection(asmdata, "asmdata.symbols") == NULL) {
return false;
}
if (asmdata_selectsection(asmdata, "asmdata.references") == NULL) {
return false;
}
if (asmdata_selectsection(asmdata, "asmdata.strings") == NULL) {
return false;
}
/* The first string will be at offset 0 (which might also imply NULL), so we can start by adding a NULL string there. An interpreter
* should generally handle NULL strings differently, but in case any are automatically read the result will be a marker: */
asmdata_selectsection(asmdata, "asmdata.strings");
const char* nullstr = "<ASMDATA-NULL-STRING>";
asmdata_appendbytes(asmdata, (uint8_t*)nullstr, asmdata_strlen_(nullstr) + 1); // Be sure to include the terminating zero byte!
int32_t i;
for (i = 0; i < asmdata->nsymbols; i++) {
asmdata_section_t* str = asmdata_selectsection(asmdata, "asmdata.strings");
int64_t stroffset = str->reservedsize;
int32_t strlen = asmdata_strlen_(asmdata->symbols[i].namecopy);
asmdata_appendbytes(asmdata, (uint8_t*)(asmdata->symbols[i].namecopy), strlen + 1); // Make sure we include terminating zero.
asmdata_section_t* sym = asmdata_selectsection(asmdata, "asmdata.symbols");
asmdata_appendword(asmdata, asmdata->symbols[i].flags, ASMDATA_SIZE_32BIT);
asmdata_appendword(asmdata, asmdata->symbols[i].sectionindex, ASMDATA_SIZE_32BIT);
asmdata_appendword(asmdata, asmdata->symbols[i].sectionoffset, ASMDATA_SIZE_64BIT);
asmdata_appendword(asmdata, stroffset, ASMDATA_SIZE_64BIT);
asmdata_appendword(asmdata, asmdata->symbols[i].x_lhs, ASMDATA_SIZE_32BIT);
asmdata_appendword(asmdata, asmdata->symbols[i].x_op, ASMDATA_SIZE_32BIT);
asmdata_appendword(asmdata, asmdata->symbols[i].x_rhs, ASMDATA_SIZE_32BIT);
asmdata_appendword(asmdata, asmdata->symbols[i].reserved, ASMDATA_SIZE_32BIT);
}
asmdata_section_t* refs = asmdata_selectsection(asmdata, "asmdata.references");
for (i = 0; i < asmdata->nreferences; i++) {
asmdata_appendword(asmdata, asmdata->references[i].baseflags, ASMDATA_SIZE_8BIT);
asmdata_appendword(asmdata, asmdata->references[i].size, ASMDATA_SIZE_8BIT);
asmdata_appendword(asmdata, asmdata->references[i].extflags, ASMDATA_SIZE_16BIT);
asmdata_appendword(asmdata, asmdata->references[i].sectionindex, ASMDATA_SIZE_32BIT);
asmdata_appendword(asmdata, asmdata->references[i].sectionoffset, ASMDATA_SIZE_64BIT);
asmdata_appendword(asmdata, asmdata->references[i].symbolindex, ASMDATA_SIZE_32BIT);
asmdata_appendword(asmdata, asmdata->references[i].extdata, ASMDATA_SIZE_32BIT);
}
asmdata->finalised = true;
return true;
}
bool asmdata_produceheader(asmdata_t* asmdata, int32_t pagesize, const char* hint1, const char* hint2, int32_t inthint) {
/* Exclude any unreasonably small/large page sizes, but allow weird/unaligned ones just in case. */
if (pagesize < 1 || pagesize >(1024 * 1024 * 1024)) {
return false;
}
/* The header can only be produced once, otherwise it would get too confusing... */
if (asmdata_hassection(asmdata, "asmdata.fileheader")) {
return false;
}
/* If the strings section hasn't already been initialised, we should do the same initialisation that would happen in
* asmdata_finalise(), ensuring that the strings section is created before the file header.
*/
if (!asmdata_hassection(asmdata, "asmdata.strings")) {
/* The first string will be at offset 0 (which might also imply NULL), so we can start by adding a NULL string there. An interpreter
* should generally handle NULL strings differently, but in case any are automatically read the result will be a marker: */
if (asmdata_selectsection(asmdata, "asmdata.strings") == NULL) {
return false;
}
const char* nullstr = "<ASMDATA-NULL-STRING>";
asmdata_appendbytes(asmdata, (uint8_t*)nullstr, asmdata_strlen_(nullstr) + 1); // Be sure to include the terminating zero byte!
}
int32_t nstrsection = asmdata_selectsection(asmdata, "asmdata.strings")->sectionnumber;
/* Now we can start with the file header itself: */
if (asmdata_selectsection(asmdata, "asmdata.fileheader") == NULL) {
return false;
}
int32_t nhdrsection = asmdata_selectsection(asmdata, "asmdata.fileheader")->sectionnumber;
asmdata_appendbytes(asmdata, (uint8_t*)"ASMDATA1", 8);
asmdata_appendword(asmdata, 1, ASMDATA_SIZE_32BIT); // Version number
asmdata_appendword(asmdata, pagesize, ASMDATA_SIZE_32BIT);
if (hint1 == NULL) {
asmdata_appendword(asmdata, 0, ASMDATA_SIZE_64BIT);
}
else {
int64_t stroffset = asmdata_selectsection(asmdata, "asmdata.strings")->reservedsize;
asmdata_appendbytes(asmdata, (uint8_t*)hint1, asmdata_strlen_(hint1) + 1); // Include terminating zero byte
asmdata_selectsection(asmdata, "asmdata.fileheader");
asmdata_appendword(asmdata, stroffset, ASMDATA_SIZE_64BIT);
}
if (hint2 == NULL) {
asmdata_appendword(asmdata, 0, ASMDATA_SIZE_64BIT);
}
else {
int64_t stroffset = asmdata_selectsection(asmdata, "asmdata.strings")->reservedsize;
asmdata_appendbytes(asmdata, (uint8_t*)hint2, asmdata_strlen_(hint2) + 1); // Include terminating zero byte
asmdata_selectsection(asmdata, "asmdata.fileheader");
asmdata_appendword(asmdata, stroffset, ASMDATA_SIZE_64BIT);
}
asmdata_appendword(asmdata, inthint, ASMDATA_SIZE_32BIT);
asmdata_appendword(asmdata, nstrsection, ASMDATA_SIZE_32BIT);
asmdata_appendword(asmdata, nhdrsection, ASMDATA_SIZE_32BIT);
asmdata_appendword(asmdata, asmdata->nsections, ASMDATA_SIZE_32BIT);
/* We need to add the section name strings before generating the section list, otherwise the newly-added strings
* won't all get added to the string section before it's totals are recorded in the header.
*/
int64_t nameoffsets[ASMDATA_MAXSECTIONS];
int32_t i;
for (i = 0; i < asmdata->nsections; i++) {
nameoffsets[i] = asmdata_selectsection(asmdata, "asmdata.strings")->reservedsize;
asmdata_appendbytes(asmdata, (uint8_t*)(asmdata->sections[i]->namecopy), asmdata_strlen_(asmdata->sections[i]->namecopy) + 1); // Include terminating zero byte
}
/* TODO: Hash values. */
int64_t hdrsize = asmdata_selectsection(asmdata, "asmdata.fileheader")->reservedsize;
hdrsize += asmdata->nsections * 8 * 8;
hdrsize += 16; // Final file size and checksum fields
// NOTE: The header size is double-checked against the produced header at the end of this function
int64_t sectionoffset = hdrsize;
while (sectionoffset % pagesize != 0) {
sectionoffset++;
}
for (i = 0; i < asmdata->nsections; i++) {
asmdata_section_t* s = asmdata->sections[i];
asmdata_appendword(asmdata, s->bigendian ? 1 : 0, ASMDATA_SIZE_32BIT); // Encoding flags (in future may also specify compression etc.)
asmdata_appendword(asmdata, 0, ASMDATA_SIZE_32BIT); // Content flags (in future may specify mapped/unmapped/executable/writable/etc.)
asmdata_appendword(asmdata, sectionoffset, ASMDATA_SIZE_64BIT); // Offset in file
asmdata_appendword(asmdata, s->virtualoffset, ASMDATA_SIZE_64BIT);
if (s == asmdata->activesection) { /* The size field needs to be hard-coded for the header since we're still creating it! */
asmdata_appendword(asmdata, hdrsize, ASMDATA_SIZE_64BIT); // Size in file (not including page boundary padding)
asmdata_appendword(asmdata, hdrsize, ASMDATA_SIZE_64BIT); // Size in memory (for header section, is the same as
asmdata_appendword(asmdata, 0, ASMDATA_SIZE_64BIT); // Reserved
asmdata_appendword(asmdata, nameoffsets[i], ASMDATA_SIZE_64BIT);
asmdata_appendword(asmdata, 0, ASMDATA_SIZE_64BIT); // Hash (TODO)
sectionoffset += hdrsize;
}
else {
asmdata_appendword(asmdata, s->bufferfilled, ASMDATA_SIZE_64BIT); // Size in file (not including page boundary padding)
asmdata_appendword(asmdata, s->reservedsize, ASMDATA_SIZE_64BIT); // Size in memory (padded with zero by the loader)
asmdata_appendword(asmdata, 0, ASMDATA_SIZE_64BIT); // Reserved
asmdata_appendword(asmdata, nameoffsets[i], ASMDATA_SIZE_64BIT);
asmdata_appendword(asmdata, 0, ASMDATA_SIZE_64BIT); // Hash (TODO)
sectionoffset += s->bufferfilled;
}
while (sectionoffset % pagesize != 0) {
sectionoffset++;
}
}
asmdata_appendword(asmdata, sectionoffset, ASMDATA_SIZE_64BIT); // Total file size
asmdata_appendword(asmdata, 0, ASMDATA_SIZE_64BIT); // Checksum (TODO)
/* Make sure the written header size exactly matches the size we calculated before writing. */
if (asmdata->activesection->bufferfilled != hdrsize) {
return false;
}
return true;
}
asmdata_section_t* asmdata_findsection(asmdata_t* asmdata, const char* sectionname, bool autocreate) {
int32_t i;
for (i = 0; i < asmdata->nsections; i++) {
if (asmdata_streq_(asmdata->sections[i]->namecopy, sectionname)) {
return asmdata->sections[i];
}
}
if (autocreate && asmdata->nsections < ASMDATA_MAXSECTIONS) {
asmdata_section_t* result = asmdata_section_new(sectionname);
if (result == NULL) {
return NULL;
}
asmdata->sections[asmdata->nsections] = result;
result->sectionnumber = asmdata->nsections;
asmdata->nsections++;
return result;
}
return NULL;
}
bool asmdata_beginfile(asmdata_t* asmdata, const char* name) {
return true; // TODO init?
}
bool asmdata_endfile(asmdata_t* asmdata, const char* name) {
return true; // TODO cleanup/checks?
}
static int32_t asmdata_dummysymbol(asmdata_t* asmdata, const char* dummyname) {
int32_t i;
if (asmdata->nsymbols >= ASMDATA_MAXSYMBOLS) {
return -1;
}
i = asmdata->nsymbols;
asmdata->symbols[i].flags |= ASMDATA_SYMBOL_DUMMY;
asmdata->symbols[i].namecopy = asmdata_strdup_(dummyname);
asmdata->symbols[i].sectionindex = -1;
asmdata->nsymbols++;
return i;
}
int32_t asmdata_findsymbol(asmdata_t* asmdata, const char* name, bool autocreate) {
int32_t i;
for (i = 0; i < asmdata->nsymbols; i++) {
if (asmdata_streq_(asmdata->symbols[i].namecopy, name)) {
return i;
}
}
if (!autocreate || asmdata->nsymbols >= ASMDATA_MAXSYMBOLS) {
return -1;
}
i = asmdata->nsymbols;
asmdata->symbols[i].namecopy = asmdata_strdup_(name);
asmdata->symbols[i].sectionindex = -1;
asmdata->nsymbols++;
return i;
}
int32_t asmdata_symbolhere(asmdata_t* asmdata, const char* name) {
int32_t idx = asmdata_findsymbol(asmdata, name, true);
if (idx < 0) {
return idx;
}
asmdata_symbol_t* symbol = &(asmdata->symbols[idx]);
if (symbol->sectionindex != -1) {
return -1; // It's already defined?
}
symbol->sectionindex = asmdata_activesection(asmdata)->sectionnumber;
symbol->sectionoffset = asmdata_activesection(asmdata)->reservedsize;
// TODO: Clear flags etc.?
return idx;
}
int32_t asmdata_appendreferenceword(asmdata_t* asmdata, const char* name, int8_t size) {
int64_t startoffset = asmdata_activesection(asmdata)->reservedsize;
if (!asmdata_appendword(asmdata, 0, size)) {
return -1;
}
if (asmdata->nreferences >= ASMDATA_MAXREFERENCES) {
return -1;
}
int32_t idx = asmdata->nreferences;
asmdata_reference_t* reference = &(asmdata->references[idx]);
reference->symbolindex = asmdata_findsymbol(asmdata, name, true);
if (reference->symbolindex < 0) {
return -1;
}
reference->sectionindex = asmdata_activesection(asmdata)->sectionnumber;
//printf("Note: Reference '%s' at offset %d\n", name, startoffset);
reference->sectionoffset = startoffset;
reference->size = size;
// TODO: Clear flags etc.?
asmdata->nreferences++;
return idx;
}
static int32_t asmdata_appendsubref_(asmdata_t* asmdata, int32_t symbol, int8_t size) {
int64_t startoffset = asmdata_activesection(asmdata)->reservedsize;
if (!asmdata_appendword(asmdata, 0, size)) {
return -1;
}
if (asmdata->nreferences >= ASMDATA_MAXREFERENCES) {
return -1;
}
int32_t idx = asmdata->nreferences;
asmdata_reference_t* reference = &(asmdata->references[idx]);
reference->symbolindex = symbol;
if (reference->symbolindex < 0) {
return -1;
}
reference->sectionindex = asmdata_activesection(asmdata)->sectionnumber;
//printf("Note: Reference '%s' at offset %d\n", name, startoffset);
reference->sectionoffset = startoffset;
reference->size = size;
// TODO: Clear flags etc.?
asmdata->nreferences++;
return idx;
}
int32_t asmdata_subx_(asmdata_t* asmdata, int32_t tt, const char* tokenstr, asmlnx_t* expr);
int32_t asmdata_subx_(asmdata_t* asmdata, int32_t tt, const char* tokenstr, asmlnx_t* expr) {
if (tt == ASMT_TOKENTYPE_NAME) { // Shortcut if this part of the expression is just a simple symbol name
return asmdata_findsymbol(asmdata, tokenstr, true);
}
int32_t result = asmdata_dummysymbol(asmdata, tokenstr);
if (result >= 0) {
asmdata_symbol_t* sym = &asmdata->symbols[result];
switch (tt) {
case ASMT_TOKENTYPE_NUMBER:
sym->flags |= ASMDATA_SYMBOL_CONST;
sym->sectionoffset = asmdata_atoll_(tokenstr);
break;
case ASMT_TOKENTYPE_OPENBR:
sym->flags |= ASMDATA_SYMBOL_EXPR;
sym->x_lhs = asmdata_subx_(asmdata, expr->lhstype, expr->lhscopy, expr->lhsx);
if (sym->x_lhs < 0) {
return -1;
}
sym->x_op = asmdata_dummysymbol(asmdata, expr->opcopy);
if (sym->x_op < 0) {
return -1;
}
asmdata->symbols[sym->x_op].flags |= ASMDATA_SYMBOL_OP;
sym->x_rhs = asmdata_subx_(asmdata, expr->rhstype, expr->rhscopy, expr->rhsx);
if (sym->x_rhs < 0) {
return -1;
}
//sym->flags |= ASMDATA_SYMBOL_CONST;
break;
default:
return -1;
}
}
return result;
}
static bool asmdata_appendvalue_(asmdata_t* asmdata, int32_t tokentype, const char* tokenstr, asmlnx_t* expr, int8_t primsize) {
if (tokentype == ASMT_TOKENTYPE_NUMBER) {
long long x = asmdata_atoll_(tokenstr);
//printf("Got number %lld\n", x);
return asmdata_appendword(asmdata, (int64_t)x, primsize);
}
else if (tokentype == ASMT_TOKENTYPE_STRING) {
int32_t l = asmdata_strlen_(tokenstr);
int32_t i;
for (i = 0; i < l; i++) {
if (!asmdata_appendword(asmdata, (int64_t)((int8_t)(tokenstr[i])), primsize)) {
return false;
}
}
return true;
}
else if (tokentype == ASMT_TOKENTYPE_NAME) {
int32_t refidx = asmdata_appendreferenceword(asmdata, tokenstr, primsize);
if (refidx < 0) {
return false;
}
else {
return true;
}
}
else if (tokentype == ASMT_TOKENTYPE_OPENBR) {
int32_t exprsym = asmdata_subx_(asmdata, tokentype, tokenstr, expr);
if (exprsym < 0) {
return false;
}
int32_t ref = asmdata_appendsubref_(asmdata, exprsym, primsize);
if (ref < 0) {
return false;
}
else {
return true;
}
}
else {
return false;
}
}
bool asmdata_isvalidasmln(asmdata_t* asmdata, asmln_t* asmln) {
if (asmln == NULL || asmln->errorcopy != NULL) {
return false; // If there's a major error it's obviously not a valid data line
}
if (asmln->instrcopy == NULL) {
return true; // If there's no "instruction" part then it's either a plain label or an empty/comment line, which is perfectly valid
}
if (asmdata->extsyntax) {
if (asmdata_streq_(asmln->instrcopy, ".text") || asmdata_streq_(asmln->instrcopy, ".data")) {
return true;
}
if (asmdata_streq_(asmln->instrcopy, ".string") && asmln->nparams == 1) {
return true;
}
if (asmdata_streq_(asmln->instrcopy, ".long") || asmdata_streq_(asmln->instrcopy, ".long")) {
return true;
}
}
// For normal lines (with an "instruction" part) then we recognise anything that is data or simple section/symbol/linkage instruction
return asmdata_streq_(asmln->instrcopy, "data8") || asmdata_streq_(asmln->instrcopy, "data16")
|| asmdata_streq_(asmln->instrcopy, "data32") || asmdata_streq_(asmln->instrcopy, "data64")
|| asmdata_streq_(asmln->instrcopy, "data.section") || asmdata_streq_(asmln->instrcopy, "data.symbol")
|| asmdata_streq_(asmln->instrcopy, "align") || asmdata_streq_(asmln->instrcopy, "reserve");
}
bool asmdata_asmln(asmdata_t* asmdata, asmln_t* asmln) {
if (!asmdata_isvalidasmln(asmdata, asmln)) {
if (asmln != NULL && asmln->errorcopy == NULL) {
asmln->errorcopy = asmdata_strdup_("Not a valid data instruction");
}
return false;
}
if (asmln->labelcopy != NULL) {
int32_t idx = asmdata_symbolhere(asmdata, asmln->labelcopy);
//printf("SYMBOL '%s'\n", asmln->labelcopy);
if (idx < 0) {
asmln->errorcopy = asmdata_strdup_("Failed to create symbol (label already defined?)");
return false;
}
}
if (asmln->instrcopy == NULL && asmln->nparams == 0) {
// No instruction. We're done! (Parameters were also checked to be 0, just to be pedantic.)
return true;
}
if (asmdata->extsyntax && (asmdata_streq_(asmln->instrcopy, ".text") || asmdata_streq_(asmln->instrcopy, ".data"))) {
asmdata_selectsection(asmdata, asmln->instrcopy+1);
return true;
}
else if (asmdata_streq_(asmln->instrcopy, "data.section")) {
if (asmln->nparams == 0) {
asmdata->activesection = NULL; // Reset the section
return true;
}
else if (asmln->nparams == 1) {
if (asmln->paramtype[0] != ASMT_TOKENTYPE_NAME && asmln->paramtype[0] != ASMT_TOKENTYPE_STRING) {
asmln->errorcopy = asmdata_strdup_("section instruction expects section identified by a name or string (but got a different parameter)");
return false;
}
asmdata_selectsection(asmdata, asmln->paramcopy[0]);
return true;
}
else {
asmln->errorcopy = asmdata_strdup_("TODO: Additional section parameters");
return false;
}
}
else if (asmdata_streq_(asmln->instrcopy, "data.symbol")) {
if (asmln->nparams != 2 || asmln->paramtype[0] != ASMT_TOKENTYPE_NAME || asmln->paramtype[1] != ASMT_TOKENTYPE_NUMBER) {
asmln->errorcopy = asmdata_strdup_("data.symbol special instruction requires a symbol name and flag integer");
return false;
}
int32_t idx = asmdata_findsymbol(asmdata, asmln->paramcopy[0], true);
if (idx < 0) {
return idx;
}
asmdata_symbol_t* symbol = &(asmdata->symbols[idx]);
if (symbol->sectionindex != -1) {
return -1; // It's already defined?
}
long x = asmdata_atoll_(asmln->paramcopy[1]);
symbol->flags |= (int)x;
return true;
}
else if (asmdata_streq_(asmln->instrcopy, "align")) {
if (asmln->nparams != 1 || asmln->paramtype[0] != ASMT_TOKENTYPE_NUMBER) {
asmln->errorcopy = asmdata_strdup_("align special instruction requires a simple integer");
return false;
}
long x = asmdata_atoll_(asmln->paramcopy[0]);
while ((asmdata_activesection(asmdata)->reservedsize % x) != 0) {
asmdata_activesection(asmdata)->reservedsize++;
}
return true;
}
else if (asmdata_streq_(asmln->instrcopy, "reserve")) {
if (asmln->nparams != 1 || asmln->paramtype[0] != ASMT_TOKENTYPE_NUMBER) {
asmln->errorcopy = asmdata_strdup_("reserve special instruction requires a simple integer");
return false;
}
long x = asmdata_atoll_(asmln->paramcopy[0]);
while (x > 0) {
asmdata_activesection(asmdata)->reservedsize++;
x--;
}
return true;
}
else if (asmdata_streq_(asmln->instrcopy, "data8")) {
if (asmln->nparams < 1) {
asmln->errorcopy = asmdata_strdup_("data instructions generally expect at least one value, otherwise they are probably erroneous");
return false;
}
int32_t i;
for (i = 0; i < asmln->nparams; i++) {
//printf("DOING ARG %d\n", i);
if (!asmdata_appendvalue_(asmdata, asmln->paramtype[i], asmln->paramcopy[i], asmln->paramx[i], ASMDATA_SIZE_8BIT)) {
asmln->errorcopy = asmdata_strdup_("invalid data value");
return false;
}
}
return true;
}
else if (asmdata->extsyntax && asmdata_streq_(asmln->instrcopy, ".string")) {
if (asmln->nparams < 1) {
asmln->errorcopy = asmdata_strdup_("data instructions generally expect at least one value, otherwise they are probably erroneous");
return false;
}
int32_t i;
for (i = 0; i < asmln->nparams; i++) {
//printf("DOING ARG %d\n", i);
if (!asmdata_appendvalue_(asmdata, asmln->paramtype[i], asmln->paramcopy[i], asmln->paramx[i], ASMDATA_SIZE_8BIT)) {
asmln->errorcopy = asmdata_strdup_("invalid data value");
return false;
}
}
asmdata_appendvalue_(asmdata, ASMT_TOKENTYPE_NUMBER, "0", NULL, ASMDATA_SIZE_8BIT);
return true;
}
else if (asmdata_streq_(asmln->instrcopy, "data16")) {
if (asmln->nparams < 1) {
asmln->errorcopy = asmdata_strdup_("data instructions generally expect at least one value, otherwise they are probably erroneous");
return false;
}
int32_t i;
for (i = 0; i < asmln->nparams; i++) {
if (!asmdata_appendvalue_(asmdata, asmln->paramtype[i], asmln->paramcopy[i], asmln->paramx[i], ASMDATA_SIZE_16BIT)) {
asmln->errorcopy = asmdata_strdup_("invalid data value");
return false;
}
}
return true;
}
else if (asmdata_streq_(asmln->instrcopy, "data32")) {
if (asmln->nparams < 1) {
asmln->errorcopy = asmdata_strdup_("data instructions generally expect at least one value, otherwise they are probably erroneous");
return false;
}
int32_t i;
for (i = 0; i < asmln->nparams; i++) {
if (!asmdata_appendvalue_(asmdata, asmln->paramtype[i], asmln->paramcopy[i], asmln->paramx[i], ASMDATA_SIZE_32BIT)) {
asmln->errorcopy = asmdata_strdup_("invalid data value");
return false;
}
}
return true;
}
else if (asmdata_streq_(asmln->instrcopy, "data64") || (asmdata->extsyntax && asmdata_streq_(asmln->instrcopy, ".long"))) {
if (asmln->nparams < 1) {
asmln->errorcopy = asmdata_strdup_("data instructions generally expect at least one value, otherwise they are probably erroneous");
return false;
}
int32_t i;
for (i = 0; i < asmln->nparams; i++) {
if (!asmdata_appendvalue_(asmdata, asmln->paramtype[i], asmln->paramcopy[i], asmln->paramx[i], ASMDATA_SIZE_64BIT)) {
asmln->errorcopy = asmdata_strdup_("invalid data value");
return false;
}
}
return true;
}
else {
asmln->errorcopy = asmdata_strdup_("internal error, asmdata recognised input but didn't translate it properly");
return false;
}
}

310
asmdata.h Normal file
View File

@ -0,0 +1,310 @@
#ifndef ASMDATA_H
#define ASMDATA_H
#include "asmln.h"
#define ASMDATA_MAXSECTIONS 200
#define ASMDATA_MAXFILLED (1024*1024*1024)
#define ASMDATA_MAXSYMBOLS 400000
#define ASMDATA_MAXREFERENCES 400000
typedef struct asmdata asmdata_t;
typedef struct asmdata_section asmdata_section_t;
typedef struct asmdata_symbol asmdata_symbol_t;
typedef struct asmdata_reference asmdata_reference_t;
typedef struct asmdata_mapentry asmdata_mapentry_t;
typedef struct asmdata_map asmdata_map_t;
struct asmdata_reference {
int32_t symbolindex;
int32_t sectionindex;
int64_t sectionoffset;
int8_t size; // 0 for 8bit, 1 for 16bit, 2 for 32bit, 3 for 64bit
int8_t baseflags;
int16_t extflags;
int32_t extdata;
};
#define ASMDATA_SYMBOL_DUMMY (1<<8)
#define ASMDATA_SYMBOL_EXPR (1<<9)
#define ASMDATA_SYMBOL_CONST (1<<10)
#define ASMDATA_SYMBOL_OP (1<<11)
struct asmdata_symbol {
const char* namecopy;
int64_t sectionoffset; // 0 if not defined, also reused for constant values in expressions (but with -1 for section)
int32_t flags; // 0 if not defined
int32_t sectionindex; // -1 if not defined
int32_t firstreferenceindex; // -1 if not defined
int32_t x_lhs;
int32_t x_op;
int32_t x_rhs;
int32_t reserved;
};
struct asmdata_section {
int32_t sectionnumber;
bool bigendian;
const char* namecopy;
uint8_t* buffer;
int32_t buffersize;
int32_t bufferfilled;
int64_t reservedsize;
int64_t virtualoffset;
};
struct asmdata {
bool finalised;
bool extsyntax;
int32_t nsections;
int32_t nsymbols;
int32_t nreferences;
int32_t pad;
asmdata_section_t* activesection;
asmdata_section_t** sections;
asmdata_symbol_t* symbols;
asmdata_reference_t* references;
/*
asmdata_section_t* sections[ASMDATA_MAXSECTIONS];
asmdata_symbol_t symbols[ASMDATA_MAXSYMBOLS];
asmdata_reference_t references[ASMDATA_MAXREFERENCES];
*/
};
typedef void (*asmdata_iterf_t)(asmdata_map_t*, char*, void*);
struct asmdata_mapentry {
int hash;
char* key;
void* value;
asmdata_mapentry_t* next;
};
struct asmdata_map {
asmdata_mapentry_t** table;
int ntable; // Number of slots allocated in the table
asmdata_iterf_t deletef;
};
asmdata_map_t* asmdata_map_new(int n, asmdata_iterf_t deletef);
void asmdata_map_set(asmdata_map_t* map, char* key, void* value);
void* asmdata_map_get(asmdata_map_t* map, char* key);
void asmdata_map_delete(asmdata_map_t* map);
asmdata_section_t* asmdata_section_new(const char* name);
bool asmdata_section_reserveextra(asmdata_section_t* section, int64_t nbytes, bool willfill);
void* asmdata_section_delete(asmdata_section_t* section);
ASMLN_INLINE bool asmdata_section_align(asmdata_section_t* section, int64_t alignment) {
while (section->reservedsize % alignment != 0) {
if (!asmdata_section_reserveextra(section, 1, false)) {
return false;
}
}
return true;
}
ASMLN_INLINE uint8_t* asmdata_section_fill(asmdata_section_t* section, int64_t nbytes) {
int64_t offset = section->reservedsize;
if (((int64_t)((int32_t)offset)) != offset || ((int64_t)((int32_t)nbytes)) != nbytes) {
return NULL;
}
if (!asmdata_section_reserveextra(section, nbytes, true)) {
return NULL;
}
return &(section->buffer[(int32_t)offset]);
}
#define ASMDATA_SIZE_8BIT ((int8_t)0)
#define ASMDATA_SIZE_16BIT ((int8_t)1)
#define ASMDATA_SIZE_32BIT ((int8_t)2)
#define ASMDATA_SIZE_64BIT ((int8_t)3)
ASMLN_INLINE bool asmdata_section_appendword(asmdata_section_t* section, int64_t word, int8_t size) {
uint8_t* target = asmdata_section_fill(section, 1LL << size);
if (target == NULL) {
return false;
}
switch (size) {
case ASMDATA_SIZE_8BIT:
target[0] = (uint8_t)word;
return true;
case ASMDATA_SIZE_16BIT:
if (section->bigendian) {
target[0] = (uint8_t)(word >> 8);
target[1] = (uint8_t)(word);
}
else {
target[0] = (uint8_t)(word);
target[1] = (uint8_t)(word >> 8);
}
return true;
case ASMDATA_SIZE_32BIT:
if (section->bigendian) {
target[0] = (uint8_t)(word >> 24);
target[1] = (uint8_t)(word >> 16);
target[2] = (uint8_t)(word >> 8);
target[3] = (uint8_t)(word);
}
else {
target[0] = (uint8_t)(word);
target[1] = (uint8_t)(word >> 8);
target[2] = (uint8_t)(word >> 16);
target[3] = (uint8_t)(word >> 24);
}
return true;
case ASMDATA_SIZE_64BIT:
if (section->bigendian) {
target[0] = (uint8_t)(word >> 56);
target[1] = (uint8_t)(word >> 48);
target[2] = (uint8_t)(word >> 40);
target[3] = (uint8_t)(word >> 32);
target[4] = (uint8_t)(word >> 24);
target[5] = (uint8_t)(word >> 16);
target[6] = (uint8_t)(word >> 8);
target[7] = (uint8_t)(word);
}
else {
target[0] = (uint8_t)(word);
target[1] = (uint8_t)(word >> 8);
target[2] = (uint8_t)(word >> 16);
target[3] = (uint8_t)(word >> 24);
target[4] = (uint8_t)(word >> 32);
target[5] = (uint8_t)(word >> 40);
target[6] = (uint8_t)(word >> 48);
target[7] = (uint8_t)(word >> 58);
}
return true;
default:
return false;
}
}
ASMLN_INLINE bool asmdata_section_appendbytes(asmdata_section_t* section, uint8_t* source, int32_t nbytes) {
uint8_t* target = asmdata_section_fill(section, nbytes);
if (target == NULL) {
return false;
}
int32_t i;
for (i = 0; i < nbytes; i++) {
target[i] = source[i];
}
return true;
}
asmdata_t* asmdata_new();
void* asmdata_delete(asmdata_t* asmdata);
/* This function should be called (exactly) once before extracting data, assuming you want linkage information retained.
* It will assemble the symbol and references list (and possibly any additional metadata) into their own special sections.
* You generally shouldn't assemble anything else after finalising the asmdata structure (the API will still let you though,
* in case you want to add e.g. a special checksum or signature section based on the finalised contents of the other sections,
* but defining or using any symbols after this point is an error).
*/
bool asmdata_finalise(asmdata_t* asmdata);
/* Produces a simple file header. This is added as the last section (typically, but not necessarily, after finalisation). A
* full binary file can then be produced by writing - firstly - the header section, and then each section (including the header
* again) and padding to the given page boundary after every section (including the first and last copies of the header section).
* Two "hint" strings (references into the strings section) can be provided as a simple means to classify file types within
* higher-level environments (e.g. a system might use a conventions like hint1="program" hint2="generic-dynamic-64bit", and might
* use different hints like hint1="library" vs hint1="program" to distinguish components, but no convention is mandated specifically).
* The hint strings are only intended to confirm type information and may be empty or ignored. An additional "inthint" field is
* also added in case high level systems need to be able to quickly identify things like architecture flags without loading strings.
* No additional metadata (e.g. filename, architecture, build time) is added in the file header. This is by design (it can easily
* be added in another section if necessary). A simple checksum of each section is added with it's record in the header, while
* the checksum of the file header itself is calculated with it's own checksum field set to zero (before it has been set!). The
* second copy of the file header is added primarily for integrity (i.e. if a section may be corrupted, how do we know the header
* or checksum itself is not corrupted? With a second copy of course), however it's secondary purpose is to verify the end of file
* for a loader. The file header will also contain a version number (currently 1), which should be taken as the version of the
* file header/format only (not necessarily related to the version of the assembler/compiler/architecture or even necessarily of
* the symbols/references sections which may not even exist in the output, but the strings section must be compatible for the
* hints to be used).
*
* Note on duplicating header: Having the header in it's own section may also be convenient for special cases e.g. having to
* inspect the header itself in "readable" assembly output or, in the future, including alternative headers for different
* environments or files with multiple sub-files/file-headers for the purposes of optimisation. In these cases, a definitive
* file header is still given at the start of the file, but an interpreter may use that header's section list to find the most
* suitable alternative file header for it's environment.
*
* Note on page sizes: For compact binaries, a page size of 1 will leave no padding between sections, but other considerations:
* Larger page sizes are primarily useful for optimising specific cases and specifically for sharing (at page granularity)
* these pages between multiple program instances in a modern (multitasking) operating system. A page size of up to around
* 64kb might make sense in some cases but larger page sizes (e.g. 2MB) typically lead to far-oversized program files. In
* any case, a loader can still determine the same program contents regardless of the page size, so a smaller size (either 1
* or a value such as 4, 8, or 16 to ensure basic alignment of fields) is generally a better default than a larger one. An
* interpreter may expect a specific page size matching it's own environmental considerations (e.g. if it's too large a
* smaller machine might run out of memory, and if it's too small it might have to copy all the data for alignment).
*/
bool asmdata_produceheader(asmdata_t* asmdata, int32_t pagesize, const char* hint1, const char* hint2, int32_t inthint);
asmdata_section_t* asmdata_findsection(asmdata_t* asmdata, const char* sectionname, bool autocreate);
ASMLN_INLINE bool asmdata_hassection(asmdata_t* asmdata, const char* sectionname) {
return asmdata_findsection(asmdata, sectionname, false) != NULL;
}
ASMLN_INLINE asmdata_section_t* asmdata_selectsection(asmdata_t* asmdata, const char* sectionname) {
asmdata_section_t* result = asmdata_findsection(asmdata, sectionname, true);
if (result != NULL) {
asmdata->activesection = result;
}
return result;
}
ASMLN_INLINE asmdata_section_t* asmdata_activesection(asmdata_t* asmdata) {
if (asmdata->activesection == NULL) {
return asmdata_selectsection(asmdata, "data");
}
return asmdata->activesection;
}
ASMLN_INLINE bool asmdata_appendword(asmdata_t* asmdata, int64_t word, int8_t size) {
return asmdata_section_appendword(asmdata_activesection(asmdata), word, size);
}
ASMLN_INLINE bool asmdata_appendbytes(asmdata_t* asmdata, uint8_t* source, int32_t nbytes) {
return asmdata_section_appendbytes(asmdata_activesection(asmdata), source, nbytes);
}
bool asmdata_beginfile(asmdata_t* asmdata, const char* name);
bool asmdata_endfile(asmdata_t* asmdata, const char* name);
int32_t asmdata_findsymbol(asmdata_t* asmdata, const char* name, bool autocreate);
int32_t asmdata_symbolhere(asmdata_t* asmdata, const char* name);
int32_t asmdata_appendreferenceword(asmdata_t* asmdata, const char* name, int8_t size);
bool asmdata_isvalidasmln(asmdata_t* asmdata, asmln_t* asmln);
bool asmdata_asmln(asmdata_t* asmdata, asmln_t* asmln);
ASMLN_INLINE bool asmdata_isvalidln(asmdata_t* asmdata, const char* ln) {
asmln_t* asmln = asmln_new(ln);
if (asmln == NULL) {
return false;
}
bool result = asmdata_isvalidasmln(asmdata, asmln);
asmln_delete(asmln);
return result;
}
ASMLN_INLINE bool asmdata_ln(asmdata_t* asmdata, const char* ln) {
asmln_t* asmln = asmln_new(ln);
if (asmln == NULL) {
return false;
}
bool result = asmdata_isvalidasmln(asmdata, asmln);
asmln_delete(asmln);
return result;
}
/* From ifndef at top of file: */
#endif

388
asmln.c Normal file
View File

@ -0,0 +1,388 @@
#include "asmln.h"
#include <memory.h>
#include <stdlib.h>
#include <stdio.h> // TODO: Remove this, it's only for debugging/testing..
static int32_t asmln_strlen_(const char* str) {
if (str == NULL) {
return 0;
}
int32_t l = 0;
while (str[l] != 0) {
l++;
}
return l;
}
static char* asmln_strndup_(char* str, int maxlen) {
char* result = calloc(maxlen + 1, 1);
if (result != NULL && str != NULL) {
int32_t i;
for (i = 0; i < maxlen; i++) {
if (str[i] == 0) {
return (char*)result;
}
result[i] = str[i];
}
}
return (char*)result;
}
static char* asmln_strdup_(char* str) {
return asmln_strndup_(str, asmln_strlen_(str));
}
static char* asmln_tokendup_(asmt_t* asmt) {
int32_t t = asmt_tokentype(asmt);
int32_t len = asmt_tokenlength(asmt);
if (len <= 0) {
return NULL;
}
if (t == ASMT_TOKENTYPE_STRING) {
return asmln_strndup_(asmt->input + (asmt->index + 1), len - 2); // Skip quotes
}
if (t == ASMT_TOKENTYPE_LABEL) {
len--;
}
//printf("Duplicating token type %d length %d\n", t, len);
const char* result = asmln_strndup_(asmt->input + asmt->index, len);
//printf("Got '%s'\n", result);
return result;
}
bool asmlnx_parse_subexpression(asmln_t* asmln, asmt_t* asmt, int32_t* type_var, char** copy_var, asmlnx_t** x_var, int32_t* incr_var) {
bool mayhavemoreparams = true;
int32_t tt = asmt_tokentype(asmt);
int32_t subc = 0;
//fprintf(stderr, "Got token type #%d\n", tt);
switch (tt)
{
case ASMT_TOKENTYPE_OPENBR:
x_var[0] = calloc(1, sizeof(asmlnx_t));
if (x_var[0] == NULL) {
fprintf(stderr, "MEMORY FAILURE\n");
return false; // TODO: Better error handling here?
}
type_var[0] = tt;
copy_var[0] = asmln_strdup_("(...)"); // TODO: Copy the whole expression source for debugging? (Probably not worthwhile.)
incr_var[0]++;
asmt_skiptoken(asmt);
if (asmlnx_parse_subexpression(asmln, asmt, &(x_var[0]->lhstype), &(x_var[0]->lhscopy), &(x_var[0]->lhsx), &subc)) {
fprintf(stderr, "BAD LHS\n");
return false;
}
if (asmt_tokentype(asmt) != ASMT_TOKENTYPE_NAME) {
fprintf(stderr, "NOT A NAME\n");
return false;
}
x_var[0]->opcopy = asmln_tokendup_(asmt);
//fprintf(stderr, "GOT OPERATOR '%s'\n", x_var[0]->opcopy);
asmt_skiptoken(asmt);
if (asmlnx_parse_subexpression(asmln, asmt, &(x_var[0]->rhstype), &(x_var[0]->rhscopy), &(x_var[0]->rhsx), &subc)) {
fprintf(stderr, "BAD RHS\n");
return false;
}
if (asmt_tokentype(asmt) != ASMT_TOKENTYPE_CLOSEBR) {
fprintf(stderr, "MISSING CLOSE\n");
return false;
}
asmt_skiptoken(asmt);
if (asmt_tokentype(asmt) == ASMT_TOKENTYPE_COMMA) {
//printf("I got a comma\n");
asmt_skiptoken(asmt);
}
else {
mayhavemoreparams = false;
}
break;
case ASMT_TOKENTYPE_NAME:
case ASMT_TOKENTYPE_NUMBER:
case ASMT_TOKENTYPE_STRING:
type_var[0] = tt;
copy_var[0] = asmln_tokendup_(asmt);
x_var[0] = NULL;
incr_var[0]++;
asmt_skiptoken(asmt);
// A hack to allow GNU-style offsets like in ld a0, 0(sp)
// This translates e.g. 0(sp) to (0 OFF sp)
if (/*tt == ASMT_TOKENTYPE_NUMBER && */asmt_tokentype(asmt) == ASMT_TOKENTYPE_OPENBR) {
asmt_skiptoken(asmt);
x_var[0] = calloc(1, sizeof(asmlnx_t));
x_var[0]->lhstype = type_var[0];
x_var[0]->lhscopy = asmln_strdup_(copy_var[0]);
x_var[0]->lhsx = NULL;
x_var[0]->opcopy = asmln_strdup_("OFF");
//printf("Copied '%s'\n", x_var[0]->lhscopy);
copy_var[0] = NULL;
type_var[0] = ASMT_TOKENTYPE_OPENBR;
if (asmlnx_parse_subexpression(asmln, asmt, &(x_var[0]->rhstype), &(x_var[0]->rhscopy), &(x_var[0]->rhsx), &subc)) {
fprintf(stderr, "BAD RHS\n");
return false;
}
if (asmt_tokentype(asmt) != ASMT_TOKENTYPE_CLOSEBR) {
fprintf(stderr, "MISSING CLOSE\n");
return false;
}
asmt_skiptoken(asmt);
}
if (asmt_tokentype(asmt) == ASMT_TOKENTYPE_COMMA) {
//printf("I got a comma\n");
asmt_skiptoken(asmt);
} else {
mayhavemoreparams = false;
}
break;
default:
mayhavemoreparams = false;
break;
}
return mayhavemoreparams;
}
// Parses a C-style string
static char* asmt_cstringhack(asmt_t* asmt) {
asmt_skipspaces(asmt);
if (asmt_isend(asmt) || !asmt_isstringstart(asmt)) {
return NULL;
}
asmt->index++;
if (asmt_isend(asmt)) {
return NULL;
}
char* result = calloc(asmt->length+1,1);
if (result == NULL) {
return NULL;
}
result[0] = '\"';
int resulti = 1;
bool finished = false;
while (!asmt_isend(asmt) && !finished) {
if (asmt->input[asmt->index] == '\\') {
asmt->index++;
if (asmt_isend(asmt)) break;
switch (asmt->input[asmt->index]) {
case 'r':
result[resulti] = '\r';
resulti++;
break;
case 'n':
result[resulti] = '\n';
resulti++;
break;
case 't':
result[resulti] = '\t';
resulti++;
break;
case '\'':
result[resulti] = '\n';
resulti++;
break;
case '\"':
result[resulti] = '\"';
resulti++;
break;
case '\\':
result[resulti] = '\\';
resulti++;
break;
default:
result[resulti] = asmt->input[asmt->index];
resulti++;
}
asmt->index++;
} else {
result[resulti] = asmt->input[asmt->index];
if (result[resulti] == '\"') {
finished = true;
}
resulti++;
asmt->index++;
}
}
result[resulti] = 0;
if (!finished) {
free(result);
return NULL;
}
return result;
}
static void asmln_parse_inner(asmln_t* asmln, const char* sourceline) {
if (asmln->errorcopy != NULL || asmln->instrcopy != NULL || asmln->labelcopy != NULL || asmln->commentcopy != NULL || asmln->nparams != 0) {
asmln->errorcopy = asmln_strdup_("Assembler structure reused improperly");
return;
}
if (sourceline == NULL) {
asmln->errorcopy = asmln_strdup_("Source line is NULL");
return;
}
asmt_t asmt;
asmt.input = sourceline;
asmt.length = asmln_strlen_(sourceline);
asmt.index = 0;
if (asmt_tokentype(&asmt) == ASMT_TOKENTYPE_LABEL) {
asmln->labelcopy = asmln_tokendup_(&asmt);
asmt_skiptoken(&asmt);
}
else {
asmln->labelcopy = NULL;
}
asmln->nparams = 0;
if (asmt_tokentype(&asmt) == ASMT_TOKENTYPE_NAME) {
asmln->instrcopy = asmln_tokendup_(&asmt);
asmt_skiptoken(&asmt);
bool mayhavemoreparams = true;
// For compatibility with .string "Hello\n" type strings
if (asmln->instrcopy[0] == '.'
&& asmln->instrcopy[1] == 's'
&& asmln->instrcopy[2] == 't'
&& asmln->instrcopy[3] == 'r'
&& asmln->instrcopy[4] == 'i'
&& asmln->instrcopy[5] == 'n'
&& asmln->instrcopy[6] == 'g'
&& asmln->instrcopy[7] == 0) {
asmln->paramcopy[0] = asmt_cstringhack(&asmt);
if (asmln->paramcopy[0] != NULL) {
asmln->paramtype[0] = ASMT_TOKENTYPE_STRING;
asmln->nparams = 1;
mayhavemoreparams = false;
}
}
int32_t tt;
while (mayhavemoreparams) {
if (asmln->nparams >= ASMLN_MAXPARAMS) {
asmln->errorcopy = asmln_strdup_("Too many parameters");
return;
}
mayhavemoreparams = asmlnx_parse_subexpression(asmln, &asmt, &asmln->paramtype[asmln->nparams], &asmln->paramcopy[asmln->nparams], &asmln->paramx[asmln->nparams], &asmln->nparams);
/*switch (tt = asmt_tokentype(&asmt))
{
case ASMT_TOKENTYPE_OPENBR:
case ASMT_TOKENTYPE_NAME:
case ASMT_TOKENTYPE_NUMBER:
case ASMT_TOKENTYPE_STRING:
if (asmln->nparams >= ASMLN_MAXPARAMS) {
asmln->errorcopy = asmln_strdup_("Too many parameters");
return;
}
asmln->paramtype[asmln->nparams] = tt;
asmln->paramcopy[asmln->nparams] = asmln_tokendup_(&asmt);
asmln->nparams++;
asmt_skiptoken(&asmt);
if (asmt_tokentype(&asmt) == ASMT_TOKENTYPE_COMMA) {
//printf("I got a comma\n");
asmt_skiptoken(&asmt);
} else {
mayhavemoreparams = false;
}
break;
default:
mayhavemoreparams = false;
break;
}*/
}
}
else {
asmln->instrcopy = NULL;
}
if (asmt_tokentype(&asmt) == ASMT_TOKENTYPE_COMMENT) {
asmln->commentcopy = asmln_tokendup_(&asmt);
asmt_skiptoken(&asmt);
}
else {
asmln->commentcopy = NULL;
}
if (asmt_tokentype(&asmt) != ASMT_TOKENTYPE_END) {
asmln->errorcopy = asmln_strdup_("Unexpected token (the source doesn't seem to be a line of valid assembler)");
return;
}
}
asmln_t* asmln_new(const char* sourceline) {
asmln_t* result = calloc(1, sizeof(asmln_t));
if (result != NULL) {
asmln_parse_inner(result, sourceline);
}
return result;
}
void* asmlnx_delete(asmlnx_t* asmlnx) {
if (asmlnx->lhscopy != NULL) {
free(asmlnx->lhscopy);
}
if (asmlnx->lhsx != NULL) {
asmlnx_delete(asmlnx->lhsx);
}
if (asmlnx->opcopy != NULL) {
free(asmlnx->opcopy);
}
if (asmlnx->rhscopy != NULL) {
free(asmlnx->rhscopy);
}
if (asmlnx->rhsx != NULL) {
asmlnx_delete(asmlnx->rhsx);
}
free(asmlnx);
return NULL;
}
void* asmln_delete(asmln_t* asmln) {
if (asmln != NULL) {
if (asmln->labelcopy != NULL) {
free(asmln->labelcopy);
asmln->labelcopy = NULL;
}
if (asmln->instrcopy != NULL) {
free(asmln->instrcopy);
asmln->instrcopy = NULL;
}
if (asmln->errorcopy != NULL) {
free(asmln->errorcopy);
asmln->errorcopy = NULL;
}
if (asmln->commentcopy != NULL) {
free(asmln->commentcopy);
asmln->commentcopy = NULL;
}
int32_t i;
for (i = 0; i < asmln->nparams; i++) {
asmln->paramtype[i] = ASMT_TOKENTYPE_END;
if (asmln->paramcopy[i] != NULL) {
free(asmln->paramcopy[i]);
asmln->paramcopy[i] = NULL;
}
if (asmln->paramx[i] != NULL) {
asmlnx_delete(asmln->paramx[i]);
asmln->paramx[i] = NULL;
}
}
asmln->nparams = 0;
free(asmln);
}
return NULL;
}

356
asmln.h Normal file
View File

@ -0,0 +1,356 @@
#ifndef ASMLN_H
#define ASMLN_H
#define _CRT_SECURE_NO_WARNINGS
#include <stdint.h>
#include <stdbool.h>
#include <stddef.h>
// XXX TODO: Memory management sorta fails when you're dealing with that many
// substructures, should use garbage collection.
//#define free fakefree
//static void fakefree(void*p) {}
/* This can be defined explicitly to change how the functions are defined (may be needed on some compilers or targets). */
#ifndef ASMLN_INLINE
#define ASMLN_INLINE static inline
#endif
/* Character functions: */
ASMLN_INLINE bool asmc_isalpha(char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z');
}
ASMLN_INLINE int32_t asmc_digitval(char c) {
if (c >= '0' && c <= '9') {
return c - '0';
}
else if (c >= 'a' && c <= 'f') {
return c - 'a';
}
else if (c >= 'A' && c <= 'F') {
return c - 'A';
}
else {
return -1;
}
}
ASMLN_INLINE bool asmc_isdigit(char c, int base) {
int v = asmc_digitval(c);
if (v < 0) {
return false;
}
else if (v < base) {
return true;
}
else {
return false;
}
}
ASMLN_INLINE bool asmc_isdec(char c) {
return asmc_isdigit(c, 10);
}
ASMLN_INLINE bool asmc_ishex(char c) {
return asmc_isdigit(c, 16);
}
ASMLN_INLINE bool asmc_isvalidlabelhead(char c) {
return asmc_isalpha(c) || c == '_' || c == '+' || c == '-' || c == '*' || c == '/' || c == '%' || c == '<' || c == '>' || c == '|' || c == '&' || c == '$' || c == '.';
}
ASMLN_INLINE bool asmc_isvalidlabeltail(char c) {
return asmc_isvalidlabelhead(c) || asmc_isdec(c);
}
ASMLN_INLINE bool asmc_isspace(char c) {
return c == ' ' || c == '\t' || c == '\r' || c == '\n';
}
/* Tokeniser functions, these should be useful whether the structure is set up for a line of input or for a whole input file at a time. */
typedef struct asmt asmt_t;
struct asmt {
const char* input;
int32_t length;
int32_t index;
};
ASMLN_INLINE bool asmt_isend(asmt_t* asmt) {
return asmt->index < 0 || asmt->index >= asmt->length;
}
ASMLN_INLINE void asmt_skipspaces(asmt_t* asmt) {
while (!asmt_isend(asmt) && asmc_isspace(asmt->input[asmt->index])) {
asmt->index++;
}
}
ASMLN_INLINE bool asmt_isnumberstart(asmt_t* asmt) {
return !asmt_isend(asmt) && (asmc_isdec(asmt->input[asmt->index]) || ((asmt->input[asmt->index] == '-' || asmt->input[asmt->index] == '+') && asmc_isdec(asmt->input[asmt->index+1])));
}
ASMLN_INLINE bool asmt_isnameorlabelstart(asmt_t* asmt) {
return !asmt_isend(asmt) && asmc_isvalidlabelhead(asmt->input[asmt->index])/* && !asmt_isnumberstart(asmt)*/;
}
ASMLN_INLINE bool asmt_isstringstart(asmt_t* asmt) {
return !asmt_isend(asmt) && asmt->input[asmt->index] == '\"';
}
ASMLN_INLINE bool asmt_iscommentstart(asmt_t* asmt) {
return !asmt_isend(asmt) && (asmt->input[asmt->index] == ';' || asmt->input[asmt->index] == '#');
}
ASMLN_INLINE int32_t asmt_numberlength(asmt_t* asmt) {
if (asmt_isnumberstart(asmt)) {
int32_t len = 1;
/*if (asmt->input[asmt->index] == '-' || asmt->input[asmt->index] == '+') {
len++;
}*/
// Detect hex/binary format TODO: Combine with handling of negatives
if (asmt->index + 2 < asmt->length && ((asmt->input[asmt->index + 1] == 'x') || (asmt->input[asmt->index + 1] == 'b'))) {
len++;
}
while (!asmt_isend(asmt) && asmt->index + len < asmt->length && asmc_isdec(asmt->input[asmt->index + len])) {
len++;
}
return len;
}
else {
return -1;
}
}
ASMLN_INLINE int32_t asmt_namelength(asmt_t* asmt) {
if (asmt_isnameorlabelstart(asmt)) {
int32_t len = 1;
while (!asmt_isend(asmt) && asmt->index + len < asmt->length && asmc_isvalidlabeltail(asmt->input[asmt->index + len])) {
len++;
}
return len;
}
else {
return -1;
}
}
ASMLN_INLINE int32_t asmt_stringlength(asmt_t* asmt) {
if (asmt_isstringstart(asmt)) {
int32_t len = 1;
while (!asmt_isend(asmt) && asmt->index + len < asmt->length && asmt->input[asmt->index + len] != '\"') {
len++;
}
if (asmt_isend(asmt) || asmt->index + len >= asmt->length) {
return -1;
}
return len + 1;
}
else {
return -1;
}
}
ASMLN_INLINE int32_t asmt_commentlength(asmt_t* asmt) {
if (asmt_iscommentstart(asmt)) {
int32_t len = 1;
while (!asmt_isend(asmt) && asmt->index + len < asmt->length && asmt->input[asmt->index + len] != '\r' && asmt->input[asmt->index + len] != '\n') {
len++;
}
return len;
}
else {
return -1;
}
}
ASMLN_INLINE bool asmt_islabelstart(asmt_t* asmt) {
int32_t namelen = asmt_namelength(asmt);
if (namelen > 0 && asmt->index + namelen < asmt->length && asmt->input[asmt->index + namelen] == ':') {
return true;
}
else {
return false;
}
}
ASMLN_INLINE bool asmt_isnamestart(asmt_t* asmt) {
return asmt_isnameorlabelstart(asmt) && !asmt_islabelstart(asmt);
}
#define ASMT_TOKENTYPE_ERROR -1
#define ASMT_TOKENTYPE_END 0
#define ASMT_TOKENTYPE_LABEL 1
#define ASMT_TOKENTYPE_NAME 2
#define ASMT_TOKENTYPE_NUMBER 3
#define ASMT_TOKENTYPE_STRING 4
#define ASMT_TOKENTYPE_COMMA 5
#define ASMT_TOKENTYPE_COMMENT 6
#define ASMT_TOKENTYPE_OPENBR 7
#define ASMT_TOKENTYPE_CLOSEBR 8
ASMLN_INLINE int32_t asmt_tokentype(asmt_t* asmt) {
asmt_skipspaces(asmt);
if (asmt_isend(asmt)) {
return ASMT_TOKENTYPE_END;
}
else if (asmt->input[asmt->index] == '-' && asmc_isdec(asmt->input[asmt->index+1])) {
return ASMT_TOKENTYPE_NUMBER;
}
else if (asmt_isnamestart(asmt)) {
return ASMT_TOKENTYPE_NAME;
}
else if (asmt_islabelstart(asmt)) {
return ASMT_TOKENTYPE_LABEL;
}
else if (asmt_isnumberstart(asmt)) {
return ASMT_TOKENTYPE_NUMBER;
}
else if (asmt_isstringstart(asmt)) {
return ASMT_TOKENTYPE_STRING;
}
else if (asmt_iscommentstart(asmt)) {
return ASMT_TOKENTYPE_COMMENT;
}
else if (asmt->input[asmt->index] == ',') {
return ASMT_TOKENTYPE_COMMA;
}
else if (asmt->input[asmt->index] == '(') {
return ASMT_TOKENTYPE_OPENBR;
}
else if (asmt->input[asmt->index] == ')') {
return ASMT_TOKENTYPE_CLOSEBR;
}
else {
return ASMT_TOKENTYPE_ERROR;
}
}
ASMLN_INLINE int32_t asmt_tokenlength(asmt_t* asmt) {
switch (asmt_tokentype(asmt)) {
case ASMT_TOKENTYPE_NAME:
return asmt_namelength(asmt);
case ASMT_TOKENTYPE_LABEL:
return asmt_namelength(asmt) + 1;
case ASMT_TOKENTYPE_NUMBER:
return asmt_numberlength(asmt);
case ASMT_TOKENTYPE_STRING:
return asmt_stringlength(asmt);
case ASMT_TOKENTYPE_COMMENT:
return asmt_commentlength(asmt);
case ASMT_TOKENTYPE_COMMA:
case ASMT_TOKENTYPE_OPENBR:
case ASMT_TOKENTYPE_CLOSEBR:
return 1;
case ASMT_TOKENTYPE_ERROR:
case ASMT_TOKENTYPE_END:
default:
return -1;
}
}
ASMLN_INLINE bool asmt_skiptoken(asmt_t* asmt) {
asmt_skipspaces(asmt); // Just in case of edge cases...
if (asmt_isend(asmt)) {
return false;
}
else {
int32_t l = asmt_tokenlength(asmt);
if (l <= 0) {
return false;
}
else {
asmt->index += l;
return true;
}
}
}
ASMLN_INLINE void asmt_skipcomments(asmt_t* asmt) {
while (asmt_iscommentstart(asmt)) {
asmt_skiptoken(asmt);
}
}
/* Main API: Deals with one line of assembly code at a time, in format like [labelname:] [instrname param1 , param2 , ... , paramN] [; comment] */
/* Set a reasonably high maximum, assuming data strings can go on for a while... */
#define ASMLN_MAXPARAMS 100
typedef struct asmln asmln_t;
typedef struct asmlnx asmlnx_t;
struct asmln {
char* labelcopy;
char* instrcopy;
int32_t nparams;
int32_t paramtype[ASMLN_MAXPARAMS];
char* paramcopy[ASMLN_MAXPARAMS];
asmlnx_t* paramx[ASMLN_MAXPARAMS];
char* commentcopy;
char* errorcopy;
};
/* Subexpressions, e.g. (1 + (2 * 3)) need to be split into a tree structure. Expressions are given the type ASMT_TOKENTYPE_OPENBR (like their first token).
* These expressions could be optimised/reduced by the assembler in the future, but otherwise are encoded directly in the output, allowing the linker/loader to
* resolve complex expressions involving linker symbols.
*/
struct asmlnx {
int32_t lhstype;
char* lhscopy;
asmlnx_t* lhsx;
char* opcopy;
int32_t rhstype;
char* rhscopy;
asmlnx_t* rhsx;
};
asmln_t* asmln_new(const char* sourceline);
void* asmln_delete(asmln_t* asmln);
bool asmlnx_parse_subexpression(asmln_t* asmln, asmt_t* asmt, int32_t* type_var, char** copy_var, asmlnx_t** x_var, int32_t* incr_var);
void* asmlnx_delete(asmlnx_t* asmlnx);
/*
ASMLN_INLINE int32_t asmln_nextnonspace(const char* l, int32_t n, int32_t i) {
while (i < n) {
if (!asmc_isspace(l[i])) {
return i;
}
i++;
}
return -1;
}
ASMLN_INLINE int32_t asmln_nextinstance(const char* l, int32_t n, int32_t i, char c) {
while (i < n) {
if (l[i] == c) {
return i;
}
i++;
}
return -1;
}*/
//ASMLN_INLINE int32_t asmln_lab
/* Convenience functions: */
/*
ASMLN_INLINE int32_t asml_len(const char* l) {
if (l == NULL) {
return 0;
}
int32_t i = 0;
while (l[i] != 0) {
i++;
}
return i;
}*/
/* From ifndef at top of file: */
#endif

421
asmpp.c Normal file
View File

@ -0,0 +1,421 @@
#include "asmpp.h"
#include <stdlib.h>
#include <stdio.h>
static void asmpp_mapdelete(asmdata_map_t* map, char* name, void* poo) {
}
asmpp_t* asmpp_new(asmpp_systemf_t outputf, void* udata) {
asmpp_t* pp = malloc(sizeof(asmpp_t));
if (pp == NULL) {
return NULL;
}
pp->outputf = outputf;
pp->udata = udata;
pp->defs = asmdata_map_new(543, &asmpp_mapdelete);
if (pp->defs == NULL) {
free(pp);
return NULL;
}
pp->macros = asmdata_map_new(543, &asmpp_mapdelete);
if (pp->macros == NULL) {
free(pp);
return NULL;
}
pp->context = NULL;
return pp;
}
void asmpp_delete(asmpp_t* pp) {
if (pp == NULL) {
return;
}
asmdata_map_delete(pp->defs);
asmdata_map_delete(pp->macros);
free(pp);
}
const char* asmpp_contexttypename(asmpp_t* pp, int type) {
switch (type) {
case ASMPP_CONTEXT_OUTER: return "OUTER";
case ASMPP_CONTEXT_MACRO_EXPAND: return "MACRO_EXPAND";
case ASMPP_CONTEXT_MACRO_COLLECT: return "MACRO_COLLECT";
case ASMPP_CONTEXT_IF_EXPAND: return "IF_EXPAND";
case ASMPP_CONTEXT_IF_PARSEONLY: return "IF_PARSEONLY";
default: "Invalid";
}
}
asmpp_context_t* asmpp_enter(asmpp_t* pp, int type, asmpp_macro_t* macro) {
//printf("ENTER CONTEXT %s(#%i) %s\n", asmpp_contexttypename(pp, type), type, macro == NULL ? "(not a macro)" : macro->proto->instrcopy);
asmpp_context_t* c = malloc(sizeof(asmpp_context_t));
if (c == NULL) {
return NULL;
}
c->type = type;
c->macro = macro;
c->locals = NULL;
c->ifvalue = 0;
c->next = pp->context;
pp->context = c;
return c;
}
void asmpp_dumpctx(asmpp_t* pp, asmpp_context_t* ctx) {
printf("Context %i %s @%p\n", ctx->type, ctx->macro == NULL ? "(not a macro)" : ctx->macro->proto->instrcopy, ctx);
}
void asmpp_dumpctxs(asmpp_t* pp) {
asmpp_context_t* ctx = pp->context;
while (ctx != NULL) {
asmpp_dumpctx(pp, ctx);
ctx = ctx->next;
}
}
void asmpp_exit(asmpp_t* pp, asmpp_context_t* context) {
if (context != pp->context) {
printf("WARNING: Trying to exit from wrong context\n");
asmpp_dumpctx(pp, context);
printf("--- STACK: ---\n");
asmpp_dumpctxs(pp);
return;
}
//printf("EXIT CONTEXT %i %s\n", context->type, context->macro == NULL ? "(not a macro)" : context->macro->proto->instrcopy);
pp->context = context->next;
if (context->locals != NULL) {
asmdata_map_delete(context->locals);
}
free(context);
}
asmpp_macro_t* asmpp_quickmacro(asmpp_t* pp, asmln_t* proto, asmpp_systemf_t systemf) {
asmpp_macro_t* macro = malloc(sizeof(asmpp_macro_t));
if (macro == NULL) {
return NULL;
}
macro->proto = proto;
macro->systemf = systemf;
macro->lines = NULL;
macro->next = NULL;
asmpp_macro_t* oldmacro = asmdata_map_get(pp->macros, proto->instrcopy);
macro->next = oldmacro;
asmdata_map_set(pp->macros, proto->instrcopy, macro);
return macro;
}
asmpp_def_t* asmpp_allocdef(asmpp_t* pp, int t, char* str, asmlnx_t* x) {
asmpp_def_t* d = malloc(sizeof(asmpp_def_t));
if (d == NULL) {
return NULL;
}
d->t = t;
d->value = str;
d->x = x;
return d;
}
asmpp_def_t* asmpp_finddef(asmpp_t* pp, char* name) {
asmpp_context_t* ctx = pp->context;
if (ctx != NULL && ctx->locals != NULL) {
asmpp_def_t* localresult = asmdata_map_get(ctx->locals, name);
if (localresult != NULL) {
return localresult;
}
}
return asmdata_map_get(pp->defs, name);
}
asmpp_macro_t* asmpp_findmacro(asmpp_t* pp, char* name, int nparams) {
asmpp_macro_t* firstm = asmdata_map_get(pp->macros, name);
asmpp_macro_t* m = firstm;
while (m != NULL) {
if (m->proto->nparams == nparams) {
return m;
}
m = m->next;
}
return NULL;
}
char* asmdata_strdup_(const char* str);
char* asmpp_strdupnull_(char* str) {
return (str == NULL) ? NULL : asmdata_strdup_(str);
}
/*int asmpp_copyxln_param(asmpp_t* pp, int* paramtp, char** strp, asmlnx_t** xp, bool expandparams) {
int t = *paramtp;
if (t == ASMT_TOKENTYPE_NAME) {
asmpp_def_t* def = asmpp_finddef(pp, *strp);
if (def != NULL) {
*paramtp = def->t;
*strp = asmpp_strdupnull_(def->value);
}
}
*strp = asmpp_strdupnull_(*strp); // NOTE: This must handle NULLs!
}*/
#define ASMPP_COPYXLN_EXPAND(dstt,dstc,dstx,srct,srcc,srcx,xp) \
do { \
bool _done = false; \
if ((xp) && (srct) == ASMT_TOKENTYPE_NAME) { \
asmpp_def_t* _def = asmpp_finddef(pp, srcc); \
if (_def != NULL) { \
_done = true; \
dstt = _def->t; \
dstc = asmpp_strdupnull_(_def->value); \
dstx = asmpp_copyxln_x(pp,_def->x,xp); \
} \
} \
if (!_done) { \
dstt = srct; \
dstc = asmpp_strdupnull_(srcc); \
dstx = asmpp_copyxln_x(pp,srcx,xp); \
} \
} while (0)
asmlnx_t* asmpp_copyxln_x(asmpp_t* pp, asmlnx_t* x, bool expandparams);
asmlnx_t* asmpp_copyxln_x(asmpp_t* pp, asmlnx_t* x, bool expandparams) {
if (x == NULL) {
return NULL;
}
asmlnx_t* result = calloc(sizeof(asmlnx_t),1);
if (result == NULL) {
return NULL;
}
ASMPP_COPYXLN_EXPAND(result->lhstype,result->lhscopy,result->lhsx,x->lhstype,x->lhscopy,x->lhsx,expandparams);
//printf("Converted '%s' to '%s'\n", x->lhscopy, result->lhscopy);
result->opcopy = asmpp_strdupnull_(x->opcopy);
ASMPP_COPYXLN_EXPAND(result->rhstype,result->rhscopy,result->rhsx,x->rhstype,x->rhscopy,x->rhsx,expandparams);
//printf("Converted '%s' to '%s'\n", x->lhscopy, result->lhscopy);
return result;
}
asmln_t* asmpp_copyxln(asmpp_t* pp, asmln_t* ln, bool expandparams) {
asmln_t* xln = malloc(sizeof(asmln_t));
xln->labelcopy = asmpp_strdupnull_(ln->labelcopy);
xln->instrcopy = asmpp_strdupnull_(ln->instrcopy);
xln->nparams = ln->nparams;
xln->commentcopy = asmpp_strdupnull_(ln->commentcopy);
xln->errorcopy = asmpp_strdupnull_(ln->errorcopy);
int i;
for (i = 0; i < xln->nparams; i++) {
ASMPP_COPYXLN_EXPAND(xln->paramtype[i],xln->paramcopy[i],xln->paramx[i],ln->paramtype[i],ln->paramcopy[i],ln->paramx[i],expandparams);
}
return xln;
}
bool asmdata_streq_(const char* a, const char* b);
/* Returns true unless compilation is disabled by an #if or equivalent context. */
bool asmpp_ifcompiling(asmpp_t* pp) {
asmpp_context_t* ctx = pp->context;
while (ctx != NULL) {
if (ctx->type == ASMPP_CONTEXT_IF_EXPAND && ctx->ifvalue == 0) {
return false;
}
ctx = ctx->next;
}
return true;
}
int asmpp_expand(asmpp_t* pp, asmln_t* ln) {
if (pp->context != NULL && pp->context->type == ASMPP_CONTEXT_MACRO_COLLECT) {
//printf("GOT MACRO LINE %s\n", ln->instrcopy);
if (asmdata_streq_(ln->instrcopy, "%endmacro")) {
asmpp_exit(pp, pp->context);
return 0;
}
asmpp_lines_t* ll = pp->context->macro->lines;
while (ll != NULL && ll->next != NULL) {
ll = ll->next;
}
//printf("Found end\n");
asmpp_lines_t* nl = malloc(sizeof(asmpp_lines_t));
if (nl == NULL) {
return -1;
}
nl->line = asmpp_copyxln(pp, ln, false);
nl->next = NULL;
if (ll != NULL) {
ll->next = nl;
} else {
pp->context->macro->lines = nl;
}
//printf("Done\n");
return 0;
}
if (asmdata_streq_(ln->instrcopy, "%endif")) {
asmpp_exit(pp, pp->context);
return 0;
} else if (!asmpp_ifcompiling(pp) && !asmdata_streq_(ln->instrcopy, "%else") && !asmdata_streq_(ln->instrcopy, "%elseif")) {
if (asmdata_streq_(ln->instrcopy, "%if")) {
asmpp_enter(pp, ASMPP_CONTEXT_IF_PARSEONLY, NULL);
}
return 0;
} else if (asmdata_streq_(ln->labelcopy, "%macro")) {
asmpp_macro_t* mac = asmpp_quickmacro(pp, asmpp_copyxln(pp, ln, false), NULL);
//printf("Got macro '%s'\n", ln->instrcopy);
asmpp_enter(pp, ASMPP_CONTEXT_MACRO_COLLECT, mac);
return 0;
} else if (asmdata_streq_(ln->labelcopy, "%def")) {
asmln_t* defln = asmpp_copyxln(pp, ln, true);
asmpp_def_t* def = asmpp_allocdef(pp, defln->paramtype[0], defln->paramcopy[0], defln->paramx[0]);
asmdata_map_set(pp->defs, defln->instrcopy, def);
return 0;
}
//dump_asmln(stdout, ln);
asmln_t* xln = asmpp_copyxln(pp, ln, true);
//dump_asmln(stdout, xln);
if (asmdata_streq_(xln->instrcopy, "%if")) {
uint64_t val;
char* err;
int sig;
if (asmpp_calc(pp, xln->paramtype[0], xln->paramcopy[0], xln->paramx[0], &val, &err, &sig)) {
//printf("Got if with value %llu\n", val);
} else {
printf("Calculation of the if factor failed: %s\n", err == NULL ? "(error string was not set)" : err);
}
asmpp_enter(pp, ASMPP_CONTEXT_IF_EXPAND, NULL);
pp->context->ifvalue = val;
if (pp->context->next != NULL) {
pp->context->locals = pp->context->next->locals;
}
return 0;
} else if (asmdata_streq_(xln->instrcopy, "%elseif")) {
uint64_t val;
char* err;
int sig;
if (pp->context->ifvalue != 0) {
return 0;
} else if (asmpp_calc(pp, xln->paramtype[0], xln->paramcopy[0], xln->paramx[0], &val, &err, &sig)) {
//printf("Got if with value %llu\n", val);
} else {
printf("Calculation of the if factor failed: %s\n", err == NULL ? "(error string was not set)" : err);
}
pp->context->ifvalue = val;
return 0;
} else if (asmdata_streq_(xln->instrcopy, "%else")) {
pp->context->ifvalue = (pp->context->ifvalue == 0) ? 1 : 0;
return 0;
}
asmpp_macro_t* m = (xln->instrcopy == NULL) ? NULL : asmpp_findmacro(pp, xln->instrcopy, xln->nparams);
int nexpanded = 0;
if (m != NULL) {
asmpp_context_t* ctx = asmpp_enter(pp, ASMPP_CONTEXT_MACRO_EXPAND, m);
if (m->systemf != NULL) {
asmpp_systemf_t sysf = m->systemf;
nexpanded = sysf(pp, xln);
} else {
ctx->locals = asmdata_map_new(23, &asmpp_mapdelete);
int argi;
for (argi = 0; argi < m->proto->nparams; argi++) {
if (m->proto->paramtype[argi] == ASMT_TOKENTYPE_OPENBR /*&& m->proto->paramx[argi]->lhstype == ASMT_TOKENTYPE_NAME && m->proto->paramx[argi]->rhstype == ASMT_TOKENTYPE_NAME && xln->paramtype[argi] == ASMT_TOKENTYPE_OPENBR*/) {
asmpp_def_t* deflhs = asmpp_allocdef(pp, xln->paramx[argi]->lhstype, xln->paramx[argi]->lhscopy, xln->paramx[argi]->lhsx);
asmpp_def_t* defop = asmpp_allocdef(pp, ASMT_TOKENTYPE_NAME, xln->paramx[argi]->opcopy, NULL);
asmpp_def_t* defrhs = asmpp_allocdef(pp, xln->paramx[argi]->rhstype, xln->paramx[argi]->rhscopy, xln->paramx[argi]->rhsx);
asmdata_map_set(ctx->locals, m->proto->paramx[argi]->lhscopy, deflhs);
//printf("Mapped '%s' to '%s'\n", m->proto->paramx[argi]->lhscopy, deflhs->value);
asmdata_map_set(ctx->locals, m->proto->paramx[argi]->opcopy, defop);
asmdata_map_set(ctx->locals, m->proto->paramx[argi]->rhscopy, defrhs);
} else {
asmpp_def_t* def = asmpp_allocdef(pp, xln->paramtype[argi], xln->paramcopy[argi], xln->paramx[argi]);
asmdata_map_set(ctx->locals, m->proto->paramcopy[argi] == NULL ? "?" : m->proto->paramcopy[argi], def);
}
}
asmpp_lines_t* l = m->lines;
while (l != NULL) {
nexpanded += asmpp_expand(pp, l->line);
l = l->next;
}
}
asmpp_exit(pp, ctx);
} else {
asmpp_systemf_t outputf = pp->outputf;
nexpanded = outputf(pp, xln);
if (nexpanded < 0) {
return -1;
//printf("TODO: exit cleanly?\n");
//exit(-1);
}
}
asmln_delete(xln);
return nexpanded;
}
bool asmpp_binop(asmpp_t* pp, uint64_t lhsresult, int lhssig, char* op, uint64_t rhsresult, int rhssig, uint64_t* resultp, char** errp, int* signp) {
if (asmdata_streq_(op, "&&")) {
if (lhsresult && rhsresult) {
return true;
} else {
return false;
}
} else if (asmdata_streq_(op, "||")) {
if (lhsresult || rhsresult) {
return true;
} else {
return false;
}
} else {
*errp = "Unknown binary operator";
return false;
}
}
long long asmdata_atoll_(const char* a);
bool asmpp_calc(asmpp_t* pp, int t, char* str, asmlnx_t* x, uint64_t* resultp, char** errp, int* sigp) {
char* err = NULL;
int sig = 0;
uint64_t result = 0;
bool calculated = false;
switch (t) {
case ASMT_TOKENTYPE_NUMBER:
result = asmdata_atoll_(str); // This handles hex/binary, TODO: foats
printf("Number '%s' -> %d (0x%x 0b%b)\n", str, result, result, result);
calculated = true;
break;
case ASMT_TOKENTYPE_OPENBR:
uint64_t lhsresult;
char* lhserror;
int lhssig;
bool lhscalc = asmpp_calc(pp, x->lhstype, x->lhscopy, x->lhsx, &lhsresult, &lhserror, &lhssig);
if (!lhscalc) {
err = lhserror;
goto retfromcalc;
}
uint64_t rhsresult;
char* rhserror;
int rhssig;
bool rhscalc = asmpp_calc(pp, x->rhstype, x->rhscopy, x->rhsx, &rhsresult, &rhserror, &rhssig);
if (!rhscalc) {
err = rhserror;
goto retfromcalc;
}
calculated = asmpp_binop(pp, lhsresult, lhssig, x->opcopy, rhsresult, rhssig, &result, &err, &sig);
break;
default:
calculated = false;
}
retfromcalc:
if (resultp != NULL) {
*resultp = result;
}
if (errp != NULL) {
*errp = err;
}
if (sigp != NULL) {
*sigp = sig;
}
return calculated;
}

76
asmpp.h Normal file
View File

@ -0,0 +1,76 @@
#ifndef ASMPP_H
#define ASMPP_H
#include "asmln.h"
#include "asmdata.h"
#include <stdint.h>
#include <stdbool.h>
typedef struct asmpp_def asmpp_def_t;
typedef struct asmpp_lines asmpp_lines_t;
typedef struct asmpp_macro asmpp_macro_t;
typedef struct asmpp_context asmpp_context_t;
typedef struct asmpp asmpp_t;
// Either outputs a line or handles some builtin macro
typedef int (*asmpp_systemf_t)(asmpp_t* pp, asmln_t* ln);
struct asmpp_def {
int t;
char* value;
asmlnx_t* x;
};
struct asmpp_lines {
asmln_t* line;
asmpp_lines_t* next;
};
struct asmpp_macro {
asmln_t* proto;
asmpp_systemf_t systemf;
asmpp_lines_t* lines;
asmpp_macro_t* next;
};
#define ASMPP_CONTEXT_OUTER 0
#define ASMPP_CONTEXT_MACRO_EXPAND 1
#define ASMPP_CONTEXT_MACRO_COLLECT 2
#define ASMPP_CONTEXT_IF_EXPAND 3
#define ASMPP_CONTEXT_IF_PARSEONLY 4
struct asmpp_context {
//bool collecting;
int type;
uint64_t ifvalue;
asmpp_macro_t* macro;
asmdata_map_t* locals;
asmpp_context_t* next;
};
struct asmpp {
void* udata;
asmpp_systemf_t outputf;
asmdata_map_t* defs;
asmdata_map_t* macros;
asmpp_context_t* context;
};
asmpp_t* asmpp_new(asmpp_systemf_t outputf, void* udata);
void asmpp_delete(asmpp_t* pp);
asmpp_context_t* asmpp_enter(asmpp_t* pp, int type, asmpp_macro_t* macro);
void asmpp_exit(asmpp_t* pp, asmpp_context_t* context);
asmpp_macro_t* asmpp_quickmacro(asmpp_t* pp, asmln_t* proto, asmpp_systemf_t systemf);
asmpp_def_t* asmpp_finddef(asmpp_t* pp, char* name);
asmpp_macro_t* asmpp_findmacro(asmpp_t* pp, char* name, int nparams);
int asmpp_expand(asmpp_t* pp, asmln_t* ln);
bool asmpp_binop(asmpp_t* pp, uint64_t lhsresult, int lhssig, char* op, uint64_t rhsresult, int rhssig, uint64_t* resultp, char** errp, int* signp);
bool asmpp_calc(asmpp_t* pp, int t, char* str, asmlnx_t* x, uint64_t* resultp, char** errp, int* signp);
// From idndef at top of file:
#endif

478
assemble.c Normal file
View File

@ -0,0 +1,478 @@
#include "asmln.h"
#include "asmdata.h"
#include "asmpp.h"
#ifdef OLD_CODE
#include "asmgeneric_old.h"
#include "asmgen1.h"
#endif
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "asmln.c"
#include "asmdata.c"
#include "asmpp.c"
void usage(int argc, char** argv, int argi) {
fprintf(stderr, "TODO: USAGE\n");
}
bool assemble_line(void* assembler, const char* line) {
return true;
}
void dump_indent(FILE* output, const char* indentString, int indent) {
while (indent-- > 0) {
fprintf(output, "%s", indentString);
}
}
void dump_x(FILE* output, int32_t type, asmlnx_t* x, const char* indentString, int indent);
void dump_x(FILE* output, int32_t type, asmlnx_t* x, const char* indentString, int indent) {
if (type != ASMT_TOKENTYPE_OPENBR || x == NULL) {
return;
}
dump_indent(output, indentString, indent);
fprintf(output, "[LHS]\t[type %d]\t'%s'\n", x->lhstype, x->lhscopy);
dump_x(output, x->lhstype, x->lhsx, indentString, indent + 1);
dump_indent(output, indentString, indent);
fprintf(output, "[OP]\t'%s'\n", x->opcopy);
dump_indent(output, indentString, indent);
fprintf(output, "[RHS]\t[type %d]\t'%s'\n", x->rhstype, x->rhscopy);
dump_x(output, x->rhstype, x->rhsx, indentString, indent + 1);
}
void dump_asmln(FILE* output, asmln_t* asmln) {
if (asmln == NULL) {
fprintf(output, "ERROR: NULL asmln\n");
}
if (asmln->errorcopy != NULL) {
fprintf(output, "ERROR: %s\n", asmln->errorcopy);
}
fprintf(output, "%s:\t%s\t[%d params]\t; %s\n",
asmln->labelcopy == NULL ? "[no label]" : asmln->labelcopy,
asmln->instrcopy == NULL ? "[no instr]" : asmln->instrcopy,
asmln->nparams,
asmln->commentcopy == NULL ? "[no comment]" : asmln->commentcopy);
int32_t i;
for (i = 0; i < asmln->nparams; i++) {
fprintf(output, "\t[param %d]\t[type %d]\t'%s'\n", i, asmln->paramtype[i], asmln->paramcopy[i]);
dump_x(output, asmln->paramtype[i], asmln->paramx[i], "\t", 2);
}
}
int assemble_warn(asmpp_t* pp, asmln_t* asmln) {
dump_asmln(stderr, asmln);
return 0;
}
int assemble_include(asmpp_t* pp, asmln_t* asmln) {
char* filename = asmln->paramcopy[0];
FILE* input = fopen(filename, "r");
if (input == NULL) {
fprintf(stderr, "Bad filename?");
return -1;
}
int32_t lnum = 1;
int xnum = 0;
int32_t buffermax = 1024 * 1024;
char* buffer = calloc(buffermax, 1);
if (buffer == NULL) {
fprintf(stderr, "Out of memory?");
return -1;
}
const char* line;
while ((line = fgets(buffer, buffermax, input)) != NULL) {
/* First parse the line.*/
asmln_t* asmln = asmln_new(line);
/* Now check for parse errors. */
if (asmln == NULL) {
fprintf(stderr, "asmln_new failed entirely!");
return false;
}
if (asmln->errorcopy != NULL) {
dump_asmln(stdout, asmln);
return false;
}
int n = asmpp_expand(pp, asmln);
if (n < 0) {
return n;
}
xnum += n;
lnum++;
}
fclose(input);
return xnum;
}
int assemble_ppout(asmpp_t* pp, asmln_t* asmln) {
if (asmln == NULL) {
fprintf(stderr, "preprocessor failed entirely!");
return -1;
}
if (asmln->errorcopy != NULL && asmln->errorcopy[0] != 0) {
dump_asmln(stdout, asmln);
return -1;
}
/* If it's a plain data line, just assemble it: */
if (asmdata_isvalidasmln(pp->udata, asmln)) {
bool tmp = asmdata_asmln(pp->udata, asmln);
//printf("Done.\n");
if (!tmp) {
fprintf(stderr, "Failed to interpret plain data line:\n");
//fprintf(stderr, "Around line %d:\t", lnum);
dump_asmln(stderr, asmln);
return -1;
}
return 1;
} else {
fprintf(stderr, "This instruction can't be interpreted:\n");
//fprintf(stderr, "Around line %d:\t", lnum);
dump_asmln(stderr, asmln);
return -1;
}
}
#define MODE_DATA 0
#define MODE_GEN1 1
#define MODE_GEN1X 2
#define MODE_GENERIC 3
#define MODE_PP 4
bool assemble_input(void* assembler, FILE* input, int32_t mode, const char* modestr) {
int32_t lnum = 1;
int32_t buffermax = 1024 * 1024;
char* buffer = calloc(buffermax, 1);
if (buffer == NULL) {
fprintf(stderr, "Out of memory?");
return false;
}
asmpp_t* pp = asmpp_new(&assemble_ppout, assembler);
asmpp_quickmacro(pp, asmln_new("include x"), &assemble_include);
asmpp_quickmacro(pp, asmln_new("warn x"), &assemble_warn);
if (modestr != NULL) {
char* modebuf = calloc(20+strlen(modestr),1);
strcat(modebuf, "include ");
strcat(modebuf, modestr);
strcat(modebuf, ".inc");
assemble_include(pp, asmln_new(modebuf));
}
const char* line;
while ((line = fgets(buffer, buffermax, input)) != NULL) {
/* First parse the line.*/
asmln_t* asmln = asmln_new(line);
/* Now check for parse errors. */
if (asmln == NULL) {
fprintf(stderr, "asmln_new failed entirely!");
return false;
}
if (asmln->errorcopy != NULL) {
dump_asmln(stdout, asmln);
return false;
}
if (mode == MODE_PP) {
int n = asmpp_expand(pp, asmln);
if (n < 0) {
fprintf(stderr, "Failed to interpret preprocessed data around line %d:\t", lnum);
//dump_asmln(stderr, asmln);
//asmln_delete(asmln);
free(buffer);
return false;
}
}
/* If it's a plain data line, just assemble it: */
else if (asmdata_isvalidasmln(assembler, asmln)) {
bool tmp = asmdata_asmln(assembler, asmln);
//printf("Done.\n");
if (!tmp) {
fprintf(stderr, "Failed to interpret plain data line:\n");
fprintf(stderr, "Around line %d:\t", lnum);
dump_asmln(stderr, asmln);
asmln_delete(asmln);
free(buffer);
return false;
}
}
#ifdef OLD_CODE
else if (mode == MODE_GEN1 || mode == MODE_GEN1X) {
bool tmp = asmgen1_asmln(assembler, asmln);
//printf("Done.\n");
if (!tmp) {
fprintf(stderr, "Failed to interpret generic instruction line:\n");
fprintf(stderr, "Around line %d:\t", lnum);
dump_asmln(stderr, asmln);
asmln_delete(asmln);
free(buffer);
return false;
}
}
else if (mode == MODE_GENERIC) {
bool tmp = asmgeneric_asmln(assembler, asmln);
//printf("Done.\n");
if (!tmp) {
fprintf(stderr, "Failed to interpret generic instruction line:\n");
fprintf(stderr, "Around line %d:\t", lnum);
dump_asmln(stderr, asmln);
asmln_delete(asmln);
free(buffer);
return false;
}
}
#endif
else {
fprintf(stderr, "This instruction can't be interpreted:\n");
fprintf(stderr, "Around line %d:\t", lnum);
dump_asmln(stderr, asmln);
asmln_delete(asmln);
free(buffer);
return false;
}
asmln = asmln_delete(asmln);
memset(buffer, 0, buffermax);
lnum++;
}
asmpp_delete(pp);
free(buffer);
return true;
}
bool assemble(void* assembler, const char* filename, int32_t mode, const char* modestr) {
FILE* input = fopen(filename, "r");
if (input == NULL) {
fprintf(stderr, "Bad filename?");
return false;
}
if (!assemble_input(assembler, input, mode, modestr)) {
fclose(input);
return false;
}
fclose(input);
return true;
}
bool produce_section(void* assembler, int sectionnum, FILE* output, bool readable) {
asmdata_t* asmdata = assembler;
asmdata_section_t* section = asmdata->sections[sectionnum];
if (readable) {
fprintf(output, "\t[base=0x%016llX reserved=0x%016llX filled=0x%08llX buffered=0x%08llX]\n", (long long)(section->virtualoffset), (long long)(section->reservedsize), (long long)(section->bufferfilled), (long long)(section->buffersize));
int32_t i = 0;
while (i < section->bufferfilled) {
if (i > 0 && i % 16 == 0) {
fprintf(output, "\n");
}
if (i % 16 == 0) {
fprintf(output, "\tS%04d+0x%016x:\t", sectionnum, i);
}
fprintf(output, "%02x ", section->buffer[i]);
i++;
}
fprintf(output, "\n\n");
}
return true;
}
bool produce_output(void* assembler, FILE* output, bool readable, bool header, int32_t pagesize) {
asmdata_t* asmdata = assembler;
int32_t sectionnum;
int32_t offset = 0;
if (header && readable) {
fprintf(output, "SECTIONS (%d):\n", asmdata->nsections);
for (sectionnum = 0; sectionnum < asmdata->nsections; sectionnum++) {
fprintf(output, "\tSECTION %04d: '%s'\n", sectionnum, asmdata->sections[sectionnum]->namecopy);
}
for (sectionnum = 0; sectionnum < asmdata->nsections; sectionnum++) {
fprintf(output, "[SECTION %04d: '%s']\n", sectionnum, asmdata->sections[sectionnum]->namecopy);
if (!produce_section(assembler, sectionnum, output, readable)) {
return false;
}
}
fprintf(output, "\n");
}
else if (header) {
asmdata_section_t* hdrsec = asmdata_findsection(asmdata, "asmdata.fileheader", false);
if (hdrsec == NULL || hdrsec->buffer == NULL) {
return false;
}
if (fwrite(hdrsec->buffer, 1, hdrsec->bufferfilled, output) != hdrsec->bufferfilled) {
return false;
}
offset += hdrsec->bufferfilled;
while ((offset % pagesize) != 0) {
char c = 0;
if (fwrite(&c, 1, 1, output) != 1) {
return false;
}
offset++;
}
for (sectionnum = 0; sectionnum < asmdata->nsections; sectionnum++) {
asmdata_section_t* sec = asmdata->sections[sectionnum];
fprintf(stderr, "Writing section '%s' (#%d) at file offset %d\n", sec->namecopy, sectionnum, offset);
if (sec->bufferfilled > 0 && fwrite(sec->buffer, 1, sec->bufferfilled, output) != sec->bufferfilled) {
return false;
}
offset += sec->bufferfilled;
while ((offset % pagesize) != 0) {
char c = 0;
if (fwrite(&c, 1, 1, output) != 1) {
return false;
}
offset++;
}
}
// We're done now
return true;
}
if (readable) {
fprintf(output, "REFERENCES (%d):\n", asmdata->nreferences);
int32_t refnum;
for (refnum = 0; refnum < asmdata->nreferences; refnum++) {
fprintf(output, "\tREFERENCE %04d -> SYMBOL %04d\n", refnum, asmdata->references[refnum].symbolindex);
}
fprintf(output, "\n");
fprintf(output, "SYMBOLS (%d):\n", asmdata->nsymbols);
int32_t symnum;
for (symnum = 0; symnum < asmdata->nsymbols; symnum++) {
fprintf(output, "\tSYMBOL %04d -> '%s'\n", symnum, asmdata->symbols[symnum].namecopy);
}
fprintf(output, "\n");
}
return true;
}
int main(int argc, char** argv) {
//argc = 4;
//argv = (char* []){ "test", "--stdout", "--readable", "--stdin"/*"--ast-only","--input","C:\\Users\\Zak\\source\\repos\\ZCC\\Debug\\test2.c"*/ };
int argi = 1;
FILE* output = NULL;
void* assembler = asmdata_new();
bool somethingtodo = false;
bool readable = false;
bool finalise = true;
bool produceheader = true;
int32_t pagesize = 1024;
const char* hint1 = "gen1";
const char* hint2 = NULL;
int32_t inthint = 0;
int32_t mode = MODE_PP;
const char* modestr = NULL;
while (argi < argc) {
if (!strcmp(argv[argi], "--usage")) {
usage(argc, argv, argi);
return 0;
}
else if (!strcmp(argv[argi], "--mode")) {
if (output != NULL || argi + 1 >= argc) {
usage(argc, argv, argi);
return -1;
}
argi++;
modestr = argv[argi];
}
else if (!strcmp(argv[argi], "--output") || (argv[argi][0] == '-' && argv[argi][1] == 'o' && argv[argi][2] == 0)) {
if (output != NULL || argi + 1 >= argc) {
usage(argc, argv, argi);
return -1;
}
argi++;
output = fopen(argv[argi], "w+");
if (output == NULL) {
fprintf(stderr, "ERROR: Failed to open output file '%s'\n", argv[argi]);
return -1;
}
}
else if (argv[argi][0] == '-' && argv[argi][1] == 'o') {
if (output != NULL) {
usage(argc, argv, argi);
return -1;
}
output = fopen(argv[argi]+2, "w+");
if (output == NULL) {
fprintf(stderr, "ERROR: Failed to open output file '%s'\n", argv[argi]+2);
return -1;
}
}
else if (!strcmp(argv[argi], "--stdin")) {
fprintf(stderr, "NOTE: Assembling from standard input\n");
if (!assemble_input(assembler, stdin, mode, modestr)) {
fprintf(stderr, "ERROR: Failed to assemble from standard input\n");
return -1;
}
somethingtodo = true;
}
else if (!strcmp(argv[argi], "--stdout")) {
fprintf(stderr, "NOTE: Dumping to standard output (use with --readable to easily inspect output)\n");
output = stdout;
}
else if (!strcmp(argv[argi], "--readable")) {
fprintf(stderr, "NOTE: Will attempt to produce 'readable' output\n");
readable = true;
}
else if (!strcmp(argv[argi], "--nofinalise")) {
fprintf(stderr, "NOTE: Output will not be finalised\n");
finalise = false;
}
else if (!strcmp(argv[argi], "--noheader")) {
fprintf(stderr, "NOTE: Output will not include file header\n");
produceheader = false;
}
else {
if (!assemble(assembler, argv[argi], mode, modestr)) {
fprintf(stderr, "ERROR: Failed to assemble '%s'\n", argv[argi]);
return -1;
}
somethingtodo = true;
}
argi++;
}
if (!somethingtodo || output == NULL) {
fprintf(stderr, "ERROR: Nothing to do? Please define an input and an output!\n");
usage(argc, argv, 0);
return -1;
}
if (finalise && !asmdata_finalise(assembler)) {
fprintf(stderr, "ERROR: Finalisation failed.\n");
return -1;
}
if (produceheader && !asmdata_produceheader(assembler, pagesize, hint1, hint2, inthint)) {
fprintf(stderr, "ERROR: Failed to produce header\n");
}
if (!produce_output(assembler, output, readable, produceheader, pagesize)) {
fprintf(stderr, "ERROR: Failed to produce output\n");
return -1;
}
if (output != stdout) {
fclose(output);
}
output = NULL;
fprintf(stderr, "FINISHED.\n");
return 0;
}

276
rv64.inc Normal file
View File

@ -0,0 +1,276 @@
%def: x0 0
%def: x1 1
%def: x2 2
%def: x3 3
%def: x4 4
%def: x5 5
%def: x6 6
%def: x7 7
%def: x8 8
%def: x9 9
%def: x10 10
%def: x11 11
%def: x12 12
%def: x13 13
%def: x14 14
%def: x15 15
%def: x16 16
%def: x17 17
%def: x18 18
%def: x19 19
%def: x20 20
%def: x21 21
%def: x22 22
%def: x23 23
%def: x24 24
%def: x25 25
%def: x26 26
%def: x27 27
%def: x28 28
%def: x29 29
%def: x30 30
%def: x31 31
%def: zero x0
%def: ra x1
%def: sp x2
%def: gp x3
%def: tp x4
%def: t0 x5
%def: t1 x6
%def: t2 x7
%def: s0 x8
%def: s1 x9
%def: a0 x10
%def: a1 x11
%def: a2 x12
%def: a3 x13
%def: a4 x14
%def: a5 x15
%def: a6 x16
%def: a7 x17
%def: fp s0
%macro: rv.rtype op, dst, fn3, src1, src2, fn7
data32 (((((op | (dst << 7)) | (fn3 << 12)) | (src1 << 15)) | (src2 << 20)) | (fn7 << 25))
%endmacro
%macro: rv.itype op, dst, fn3, src1, imm
data32 ((((op | (dst << 7)) | (fn3 << 12)) | (src1 << 15)) | (imm << 20))
%endmacro
%macro: rv.stype op, imm04, fn3, src1, src2, imm511
data32 (((((op | (imm04 << 7)) | (fn3 << 12)) | (src1 << 15)) | (src2 << 20)) | (imm511 << 25))
%endmacro
%macro: rv.stype op, imm, fn3, src1, src2
rv.stype op, (imm & 0x1F), fn3, src1, src2, (imm >> 5)
%endmacro
%macro: rv.utype op, dst, imm
data32 ((op | (dst << 7)) | (imm << 12))
%endmacro
%macro: rv.btype op, imm, fn3, src1, src2
rv.stype op, ((((imm >> 12) << 11) | (imm & 0x3FE)) | ((imm >> 11) & 1)), fn3, src1, src2
%endmacro
%macro: rv.jtype op, dst, imm
rv.utype op, dst, ((((imm & (1 << 20)) | (((imm >> 1) & 0x3FF) << 10)) | (((imm >> 11) & 1) << 8)) | ((imm >> 12) & 0xFF))
%endmacro
%def: OP_IMM 0b0010011
%def: OP_ALU 0b0110011
%def: FN3_ADD 0b000
%def: FN3_SLL 0b001
%def: FN3_SLT 0b010
%def: FN3_SLTU 0b011
%def: FN3_XOR 0b100
%def: FN3_SRL 0b101
%def: FN3_OR 0b110
%def: FN3_AND 0b111
%macro: addi dst, src, n
rv.itype OP_IMM, dst, FN3_ADD, src, n
%endmacro
%macro: slti dst, src, n
rv.itype OP_IMM, dst, FN3_SLT, src, n
%endmacro
%macro: sltiu dst, src, n
rv.itype OP_IMM, dst, FN3_SLTU, src, n
%endmacro
%macro: xori dst, src, n
rv.itype OP_IMM, dst, FN3_XOR, src, n
%endmacro
%macro: ori dst, src, n
rv.itype OP_IMM, dst, FN3_OR, src, n
%endmacro
%macro: andi dst, src, n
rv.itype OP_IMM, dst, FN3_AND, src, n
%endmacro
%macro: slli dst, src, n
rv.itype OP_IMM, dst, FN3_SLL, src, n
%endmacro
%macro: srli dst, src, n
rv.itype OP_IMM, dst, FN3_SRL, src, n
%endmacro
%macro: add dst, src1, src2
rv.stype OP_ALU, dst, FN3_ADD, src1, src2, 0b0000000
%endmacro
%macro: sub dst, src1, src2
rv.stype OP_ALU, dst, FN3_ADD, src1, src2, 0b0100000
%endmacro
%macro: sll dst, src1, src2
rv.stype OP_ALU, dst, FN3_SLL, src1, src2, 0b0000000
%endmacro
%macro: srl dst, src1, src2
rv.stype OP_ALU, dst, FN3_SRL, src1, src2, 0b0000000
%endmacro
%macro: sra dst, src1, src2
rv.stype OP_ALU, dst, FN3_SRL, src1, src2, 0b0100000
%endmacro
%macro: slt dst, src1, src2
rv.stype OP_ALU, dst, FN3_SLT, src1, src2, 0b0000000
%endmacro
%macro: sltu dst, src1, src2
rv.stype OP_ALU, dst, FN3_SLTU, src1, src2, 0b0000000
%endmacro
%macro: or dst, src1, src2
rv.stype OP_ALU, dst, FN3_OR, src1, src2, 0b0000000
%endmacro
%macro: and dst, src1, src2
rv.stype OP_ALU, dst, FN3_AND, src1, src2, 0b0000000
%endmacro
%macro: xor dst, src1, src2
rv.stype OP_ALU, dst, FN3_XOR, src1, src2, 0b0000000
%endmacro
%macro: snez dst, src2
sltu dst, zero, src2
%endmacro
%macro: lb dst, off(src)
rv.itype 0b0000011, dst, 0b000, src, off
%endmacro
%macro: lbu dst, off(src)
rv.itype 0b0000011, dst, 0b100, src, off
%endmacro
%macro: lh dst, off(src)
rv.itype 0b0000011, dst, 0b001, src, off
%endmacro
%macro: lhu dst, off(src)
rv.itype 0b0000011, dst, 0b101, src, off
%endmacro
%macro: lw dst, off(src)
rv.itype 0b0000011, dst, 0b010, src, off
%endmacro
%macro: ld dst, off(src)
rv.itype 0b0000011, dst, 0b011, src, off
%endmacro
%macro: sb src1, off(src2)
rv.stype 0b0100011, off, 0b000, src2, src1
%endmacro
%macro: sh src1, off(src2)
rv.stype 0b0100011, off, 0b001, src2, src1
%endmacro
%macro: sw src1, off(src2)
rv.stype 0b0100011, off, 0b010, src2, src1
%endmacro
%macro: sd src1, off(src2)
rv.stype 0b0100011, off, 0b011, src2, src1
%endmacro
%macro: jal dst, imm
rv.jtype 0b1101111, dst, imm
%endmacro
%macro: jalr dst, src, imm
rv.itype 0b1100111, dst, 0b000, src, imm
%endmacro
%macro: jalr src
jalr x1, src, 0
%endmacro
%macro: j imm
jal zero, imm
%endmacro
%macro: beq src1, src2, imm
rv.btype 0b1100011, imm, 0b000, src1, src2
%endmacro
%macro: bne src1, src2, imm
rv.btype 0b1100011, imm, 0b001, src1, src2
%endmacro
%macro: beqz src, imm
beq src, zero, imm
%endmacro
%macro: sltiu dst, src, imm
rv.itype 0b0010011, dst, 0b011, src, imm
%endmacro
%macro: seqz dst, src
sltiu dst, src, 1
%endmacro
%macro: lui dst, imm
rv.utype 0b0110111, dst, imm
%endmacro
%macro: li dst, imm
lui dst, (imm >> 12)
addi dst, dst, (imm & 0xFFF)
%endmacro
%macro: la dst, imm
lui dst, (imm >> 12)
addi dst, dst, (imm & 0xFFF)
%endmacro
%macro: call imm
jal x1, imm
%endmacro
%macro: ret
jalr x0, x1, 0
%endmacro
%macro: mv dst, src
addi dst, src, 0
%endmacro
# RV32M
%macro: mul dst, src1, src2
rv.rtype 0b0110011, dst, 0b000, src1, src2, 0b0000001
%endmacro
%macro: div dst, src1, src2
rv.rtype 0b0110011, dst, 0b100, src1, src2, 0b0000001
%endmacro
%macro: divu dst, src1, src2
rv.rtype 0b0110011, dst, 0b101, src1, src2, 0b0000001
%endmacro
%macro: rem dst, src1, src2
rv.rtype 0b0110011, dst, 0b110, src1, src2, 0b0000001
%endmacro
%macro: remu dst, src1, src2
rv.rtype 0b0110011, dst, 0b111, src1, src2, 0b0000001
%endmacro
# RV64I
%macro: addiw dst, src, imm
rv.itype 0b0011011, dst, FN3_ADD, src, imm
%endmacro
%macro: sext.w dst, src
addiw dst, src, 0
%endmacro
;rv.itype OP_IMM, 8, FN3_ADD, 8, 8
;addi 8, 8, 8
;ld dst, 8(8)
%macro: .global n
data.symbol n, 4096
%endmacro