From 7f74463109e9c2ffe858b1c9287a65b81c3b711a Mon Sep 17 00:00:00 2001 From: Zak Yani Star Fenton Date: Wed, 4 Jun 2025 03:45:05 +1000 Subject: [PATCH] Initial commit of current C compiler sources, partly rebranded but not yet properly cleaned up! --- cc.c | 317 ++ ccb.h | 6843 +++++++++++++++++++++++++++++++++++++++ ccbgeneric.h | 4256 ++++++++++++++++++++++++ cpp.h | 5368 ++++++++++++++++++++++++++++++ fakelibc/README | 6 + fakelibc/assert.h | 7 + fakelibc/ctype.h | 74 + fakelibc/dirent.h | 0 fakelibc/errno.h | 20 + fakelibc/fcntl.h | 0 fakelibc/float.h | 7 + fakelibc/inttypes.h | 7 + fakelibc/limits.h | 8 + fakelibc/math.h | 0 fakelibc/memory.h | 5 + fakelibc/pwd.h | 0 fakelibc/setjmp.h | 18 + fakelibc/signal.h | 0 fakelibc/stdarg.h | 27 + fakelibc/stdbool.h | 10 + fakelibc/stddef.h | 13 + fakelibc/stdint.h | 26 + fakelibc/stdio.h | 58 + fakelibc/stdlib.h | 26 + fakelibc/string.h | 47 + fakelibc/sys/resource.h | 0 fakelibc/sys/stat.h | 50 + fakelibc/sys/time.h | 0 fakelibc/sys/types.h | 0 fakelibc/time.h | 29 + fakelibc/unistd.h | 0 fakelibc/utime.h | 0 frontend.c | 913 ++++++ 33 files changed, 18135 insertions(+) create mode 100644 cc.c create mode 100644 ccb.h create mode 100644 ccbgeneric.h create mode 100644 cpp.h create mode 100644 fakelibc/README create mode 100644 fakelibc/assert.h create mode 100644 fakelibc/ctype.h create mode 100644 fakelibc/dirent.h create mode 100644 fakelibc/errno.h create mode 100644 fakelibc/fcntl.h create mode 100644 fakelibc/float.h create mode 100644 fakelibc/inttypes.h create mode 100644 fakelibc/limits.h create mode 100644 fakelibc/math.h create mode 100644 fakelibc/memory.h create mode 100644 fakelibc/pwd.h create mode 100644 fakelibc/setjmp.h create mode 100644 fakelibc/signal.h create mode 100644 fakelibc/stdarg.h create mode 100644 fakelibc/stdbool.h create mode 100644 fakelibc/stddef.h create mode 100644 fakelibc/stdint.h create mode 100644 fakelibc/stdio.h create mode 100644 fakelibc/stdlib.h create mode 100644 fakelibc/string.h create mode 100644 fakelibc/sys/resource.h create mode 100644 fakelibc/sys/stat.h create mode 100644 fakelibc/sys/time.h create mode 100644 fakelibc/sys/types.h create mode 100644 fakelibc/time.h create mode 100644 fakelibc/unistd.h create mode 100644 fakelibc/utime.h create mode 100644 frontend.c diff --git a/cc.c b/cc.c new file mode 100644 index 0000000..0f99c84 --- /dev/null +++ b/cc.c @@ -0,0 +1,317 @@ +#define TOOL_CPP + +#define _CRT_SECURE_NO_WARNINGS + + +#ifdef _WIN32 +#include +#endif +#include +#include +#include +#include + +//#include "compile.h" +#define CCB_IMPLEMENTATION +#include "ccb.h" +#define CCBGENERIC_IMPLEMENTATION +#include "ccbgeneric.h" +#ifdef TOOL_CPP +#define CPP_IMPLEMENTATION +#include "cpp.h" +#endif +#ifdef TOOL_MK +#define MK_IMPLEMENTATION +#include "mk.h" +#endif + +int sh_main(int argc, char** argv); + +void ccb_compile_error_impl(ccb_t* ccb/*, const char* fmt, ...*/) { + fprintf(stderr, "^ Around line %d, column %d of '%s'.\n", ccb->pos.uline, ccb->pos.ucol, ccb->pos.ufile); + fprintf(stderr, " [Around line %d, column %d of preprocessed input]\n", ccb->pos.line, ccb->pos.col); + /*#ifdef _ZCC + fprintf(stderr, "ERROR '%s'\n(TODO: Better formatting!)\n", fmt); + ((char*)NULL)[0] = 0; // Trigger debugger + #else + va_list a; + va_start(a, fmt); + vfprintf(stderr, fmt, a); + fprintf(stderr, "\n"); + va_end(a); + #endif*/ + + exit(EXIT_FAILURE); +} + +void ccb_compile_warn_impl(ccb_t* ccb/*, const char* fmt, ...*/) { + fprintf(stderr, "^ Around line %d, column %d of '%s'.\n", ccb->pos.uline, ccb->pos.ucol, ccb->pos.ufile); + fprintf(stderr, " [Around line %d, column %d of preprocessed input]\n", ccb->pos.line, ccb->pos.col); + /*#ifdef _ZCC + fprintf(stderr, "WARNING '%s'\n(TODO: Better formatting!)\n", fmt); + #else + va_list a; + va_start(a, fmt); + vfprintf(stderr, fmt, a); + fprintf(stderr, "\n"); + va_end(a); + #endif*/ +} + +static int startcompile(ccb_t* ccb) { + ccb_util_init(ccb); + ccb_target_init(ccb); + ccb_ast_init(ccb); + + ccb_list_t* block = ccb_parse_run(ccb); + if (!ccb->dump_ast) { + if (ccb->include_data) { + ccb_target_gen_data_section(ccb); + } + } + /* First run is required to gather externs. TODO: This should be done for any extern data elements too! */ + for (ccb_list_iterator_t* it = ccb_list_iterator(block); !ccb_list_iterator_end(it); ) { + if (!ccb->dump_ast) { + if (ccb->include_code) { + ccb_target_gen_declfunction(ccb, ccb_list_iterator_next(it)); + } + } + else { + printf("%s", ccb_ast_string(ccb, ccb_list_iterator_next(it))); + } + } + if (ccb->knownexterns != NULL) { + for (ccb_list_iterator_t* it = ccb_list_iterator(ccb->knownexterns); !ccb_list_iterator_end(it); ) { + if (!ccb->dump_ast) { + if (ccb->include_code) { + ccb_target_gen_declextern(ccb, ccb_list_iterator_next(it)); + } + } + else { + printf("%s", ccb_ast_string(ccb, ccb_list_iterator_next(it))); + } + } + } + for (ccb_list_iterator_t* it = ccb_list_iterator(block); !ccb_list_iterator_end(it); ) { + if (!ccb->dump_ast) { + if (ccb->include_code) { + ccb_target_gen_function(ccb, ccb_list_iterator_next(it)); + } + } + else { + printf("%s", ccb_ast_string(ccb, ccb_list_iterator_next(it))); + } + } + return true; +} + +/* Original main program from LICE: +int main(int argc, char **argv) { + argc--; + argv++; + return startcompile(!!(argc && !strcmp(*argv, "--dump-ast"))) + ? EXIT_SUCCESS + : EXIT_FAILURE; +} +*/ + +static void usage(int argc, char** argv, int arge) { + //fprintf(stderr, "TODO: Usage!\n"); + char* n = argv[0]; + + fprintf(stderr, "USAGE:\n\n"); + fprintf(stderr, " %s [--silent] [--ast-only|--code-only|--data-only|--usage] [TODO --asmfmt fasm|gas] [TODO --binfmt elf|flat] [--input ] [--output|--append ]\n\n", n); + fprintf(stderr, "(This is a simple core compiler designed to be invoked from a more user-friendly frontend.\nArguments must be provided in the above order, defaults to using stdin/stdout.)"); +} + +int preprocessormain(int argc, char** argv); // Extra definition of the C preprocessor main program +//void mk_main(int argc, char** argv); // Extra definition of the "make" tool main program + +/* Zak's new new main program: (frontend) */ +#include "frontend.c" +/* Zak's new main program: (backend) */ +int backend_main(int argc, char** argv) { + /*int x; + for(x=0;x argi && (!strcmp(argv[argi], "--silent") || !strcmp(argv[argi], "--usage") || !strcmp(argv[argi], "-P") || !strcmp(argv[argi], "-B"))) { // Also skip added notices if using --usage + if (!strcmp(argv[argi], "--silent")) { + argi++; + } + ccb.silent = true; + } + else { + fprintf(stderr, "SecureLang C Compiler backend (CCb), early version\n"); + fprintf(stderr, "NOTE: This program generally reads program code from standard input (until EOF) and writes assembly code to standard output.\n"); + fprintf(stderr, " It would usually be used from a compiler frontend (preprocessing, assembling and linking must be done separately).\n"); + fprintf(stderr, " Use --silent as the first argument to disable these notices or --usage to learn more.\n\n"); + } + + /* Ignore -B, used to invoke backend. */ + if (argc > argi && !strcmp(argv[argi], "-B")) { + argi++; + } + + if (argc > argi && !strcmp(argv[argi], "--ast-only")) { + if (!ccb.silent) { + fprintf(stderr, "NOTE: Will dump AST to output instead of assembly code.\n"); + } + ccb.dump_ast = true; + argi++; + } + else if (argc > argi && !strcmp(argv[argi], "--data-only")) { + if (!ccb.silent) { + fprintf(stderr, "NOTE: Will dump data section only.\n"); + } + ccb.include_code = false; + ccb.include_data = true; + argi++; + } + else if (argc > argi && !strcmp(argv[argi], "--code-only")) { + if (!ccb.silent) { + fprintf(stderr, "NOTE: Will dump code section only.\n"); + } + ccb.include_code = true; + ccb.include_data = false; + argi++; + } +#ifdef TOOL_CPP + else if (argc > argi && !strcmp(argv[argi], "-P")) { /* Invoke the built-in preprocessor instead of the backend. */ + if (!ccb.silent) { + fprintf(stderr, "NOTE: Invoking the experimental preprocessor.\n"); + } + // NOTE: This will only work for argv == 1, and because cppmain ignores the -P flag! + return preprocessormain(argc, argv); + } +#endif +#ifdef TOOL_MK + else if (argc > argi && !strcmp(argv[argi], "-M")) { /* Invoke the built-in make tool instead of the backend. */ + if (!ccb.silent) { + fprintf(stderr, "NOTE: Invoking the experimental make tool.\n"); + } + // NOTE: This will only work for argv == 1, and because cppmain ignores the -P flag! + void* tmp = NULL; + mk_main(argc, argv, &tmp); + return -1; // TODO: mk_main returns void but also has some exit calls, this behaviour needs to be checked to return error codes properly + } +#endif +#ifdef TOOL_SH + else if (argc > argi && !strcmp(argv[argi], "-S")) { /* Invoke the built-in make tool instead of the backend. */ + if (!ccb.silent) { + fprintf(stderr, "NOTE: Invoking the experimental shell tool.\n"); + } + // NOTE: This will only work for argv == 1, and because cppmain ignores the -P flag! + void* tmp = NULL; + return sh_main(argc, argv); + } +#endif + + if (argc > argi && !strcmp(argv[argi], "--usage")) { + printf("USAGE?\n"); + usage(argc, argv, -1); + return EXIT_SUCCESS; + } + + if (argc > argi && !strcmp(argv[argi], "--101")) { + if (!ccb.silent) { + fprintf(stderr, "NOTE: Using calling convention 101 (__classic_call) by default.\n", argv[argi + 1]); + } + ccb.default_callconv = 101; + argi++; + } + + if (argc > argi + 1 && !strcmp(argv[argi], "--mod")) { + if (!ccb.silent) { + fprintf(stderr, "NOTE: Using module name '%s'.\n", argv[argi + 1]); + } + ccb.mod_name = strdup(argv[argi + 1]); + argi += 2; + } + + if (argc > argi + 1 && !strcmp(argv[argi], "--prefix")) { + if (!ccb.silent) { + fprintf(stderr, "NOTE: Using symbol prefix '%s'.\n", argv[argi + 1]); + } + ccb.sym_prefix = strdup(argv[argi + 1]); + argi += 2; + } + + if (argc > argi + 1 && !strcmp(argv[argi], "--input")) { + if (!ccb.silent) { + fprintf(stderr, "NOTE: Using input from '%s'.\n", argv[argi + 1]); + } + ccb.input = fopen(argv[argi + 1], "r"); + ccb.pos.ufile = strdup(argv[argi + 1]); + argi += 2; + } + + if (argc > argi + 1 && !strcmp(argv[argi], "--output")) { + if (!ccb.silent) { + fprintf(stderr, "NOTE: Writing output to '%s'.\n", argv[argi + 1]); + } + ccb.output = fopen(argv[argi + 1], "w"); + argi += 2; + } + else if (argc > argi + 1 && !strcmp(argv[argi], "--append")) { + if (!ccb.silent) { + fprintf(stderr, "NOTE: Appending output from '%s'.\n", argv[argi]); + } + ccb.output = fopen(argv[argi + 1], "a"); + argi += 2; + } + + if (ccb.input == NULL) { + fprintf(stderr, "ERROR: Failed to open input file.\n"); + } + + if (ccb.output == NULL) { + fprintf(stderr, "ERROR: Failed to open output file.\n"); + } + + if (argi != argc) { + usage(argc, argv, argi); + return EXIT_FAILURE; + } + + int result = startcompile(&ccb); + + if (ccb.input != stdin) { + fclose(ccb.input); + } + if (ccb.output != stdout) { + fclose(ccb.output); + } + + if (result) { + return EXIT_SUCCESS; + } + else { + return EXIT_FAILURE; + } +} diff --git a/ccb.h b/ccb.h new file mode 100644 index 0000000..830eaeb --- /dev/null +++ b/ccb.h @@ -0,0 +1,6843 @@ +#ifndef CCB_H +#define CCB_H + +#ifdef _ZCC +//#define __func__ "TODO: function name" +#endif + +#define CCB_X_OBJC +#define CCB_X_OBJX + +#include +#include +#include +#include + +typedef struct ccb ccb_t; + +typedef struct ccb_pos ccb_pos_t; + +/* + * Type: ccb_list_t + * A standard double-linked list (NOTE: Moved to the top since it's needed in ccb_t) + */ +typedef struct ccb_list_s ccb_list_t; + +struct ccb_pos { + int line, col; // These record the line of compiler input + int uline, ucol; // "User" line/column (within the file that the preprocessed input claims to be from) + char* ufile; // "User" filename (the file that the preprocessed input claims to be from) +}; + +struct ccb { + FILE* input; + FILE* output; + bool dump_ast; + bool silent; + bool include_code; + bool include_data; + ccb_pos_t pos; + //int line, col; // These record the line of compiler input + //int uline, ucol; // "User" line/column (within the file that the preprocessed input claims to be from) + //char* ufile; // "User" filename (the file that the preprocessed input claims to be from) + int iline, icol; // Previous increment of line/column (subtracted on an ungetc equivalent!) + const char* func_name; // This is used for the __func__ builtin + int func_callconv; + int default_callconv; + const char* mod_name; // Module name, used for determining name of initialisation function or other module-specific symbols + ccb_list_t* mod_initstmts; // If used, the list of init statements in the module initialisation function to add to + ccb_list_t* declarednames; // If used, the list of predeclared names + ccb_list_t* knownexterns; // If used, the list of known extern symbols + long bsscount; // Can be used by the backend if it needs to manually track & generate BSS padding/alignment (which is seemingly the case for NASM...) + const char* oop_classname; + bool oop_ismeta; + const char* sym_prefix; +}; + +/* + * Type: ccb_string_t + * A type capable of representing a self-resizing string with + * O(1) strlen. + */ +typedef struct ccb_string_s ccb_string_t; + +/* + * Function: ccb_string_create + * Create a string object + */ +ccb_string_t* ccb_string_create(void); + +/* + * Function: ccb_string_buffer + * Return the raw buffer of a string object + */ +char* ccb_string_buffer(ccb_string_t* string); + +/* + * Function: ccb_string_cat + * Append a character to a string object + */ +void ccb_string_cat(ccb_string_t* string, char ch); + +void ccb_string_catcstr(ccb_string_t* string, const char* str); +void ccb_string_catint(ccb_string_t* string, long long i); + +/* + * Function: ccb_string_catf + * Append a formatted string to a string object + */ +void ccb_string_catf(ccb_string_t* string, const char* fmt, ...); + +/* + * Function: ccb_string_quote + * Escape a string's quotes + */ +char* ccb_string_quote(char* p); // C/GNU-style +char* ccb_string_quote_fasm(char* p, char quotechar); // FASM/Intel-style (closes and reopens quotes, adding special chars by number) + +/* + * Macro: CCB_SENTINEL_LIST + * Initialize an empty list in place + */ +/*#define CCB_SENTINEL_LIST ((ccb_list_t) { \ + .length = 0, \ + .head = NULL, \ + .tail = NULL \ +})*/ + + /* + * Type: ccb_list_iterator_t + * A type capable of representing an itrator for a + */ +typedef struct ccb_list_iterator_s ccb_list_iterator_t; + +/* + * Function: ccb_list_create + * Creates a new list + */ +ccb_list_t* ccb_list_create(void); + +/* + * Function: ccb_list_push + * Push an element onto a list + */ +void ccb_list_push(ccb_list_t* list, void* element); + +/* + * Function: ccb_list_pop + * Pop an element from a list + */ +void* ccb_list_pop(ccb_list_t* list); + +/* + * Function: ccb_list_length + * Used to retrieve length of a list object + */ +int ccb_list_length(ccb_list_t* list); + +/* + * Function: ccb_list_shift + * Like a ccb_list_pop but shift from the head (instead of the tail) + */ +void* ccb_list_shift(ccb_list_t* list); + +/* + * Function: ccb_list_reverse + * Reverse the contents of a list + */ +ccb_list_t* ccb_list_reverse(ccb_list_t* list); + +/* + * Function: ccb_list_iterator + * Create an iterator for a given list object + */ +ccb_list_iterator_t* ccb_list_iterator(ccb_list_t* list); + +/* + * Function: ccb_list_iterator_next + * Increment the list iterator while returning the given element + */ +void* ccb_list_iterator_next(ccb_list_iterator_t* iter); + +/* + * Function: ccb_list_iterator_end + * Test if the iterator is at the end of the list + */ +bool ccb_list_iterator_end(ccb_list_iterator_t* iter); + +/* + * Function: ccb_list_tail + * Get the last element in a list + */ +void* ccb_list_tail(ccb_list_t* list); + +typedef struct ccb_list_node_s ccb_list_node_t; + +struct ccb_list_s { + int length; + ccb_list_node_t* head; + ccb_list_node_t* tail; +}; + +/* + * Type: ccb_table_t + * A key value associative table + */ +typedef struct ccb_table_s ccb_table_t; + +struct ccb_table_s { + ccb_list_t* list; + ccb_table_t* parent; +}; + +/* + * Function: ccb_table_create + * Creates a ccb_table_t object + */ +void* ccb_table_create(void* parent); + +/* + * Funciton: ccb_table_find + * Searches for a given value in the table based on the + * key associated with it. + */ +void* ccb_table_find(ccb_table_t* table, const char* key); + +/* + * Function: ccb_table_insert + * Inserts a value for the given key as an entry in the + * table. + */ +void ccb_table_insert(ccb_table_t* table, char* key, void* value); + +/* + * Function: ccb_table_parent + * Returns the parent opaque object for the given table to + * be used as the argument to a new table. + */ +void* ccb_table_parent(ccb_table_t* table); + +/* + * Function: ccb_table_values + * Generates a list of all the values in the table, useful for + * iterating over the values. + */ +ccb_list_t* ccb_table_values(ccb_table_t* table); + +/* + * Function: ccb_table_keys + * Generate a list of all the keys in the table, useful for + * iteration over the keys. + */ +ccb_list_t* ccb_table_keys(ccb_table_t* table); + +/* + * Macro: CCB_SENTINEL_TABLE + * Initialize an empty table in place + */ +/*#define CCB_SENTINEL_TABLE ((ccb_table_t) { \ + .list = ccb_list_empty(ccb), //&CCB_SENTINEL_LIST \ + .parent = NULL \ +})*/ +ccb_table_t* ccb_table_empty(); + +#define CCB_MIN(A, B) (((A) < (B)) ? (A) : (B)) +#define CCB_MAX(A, B) (((A) > (B)) ? (A) : (B)) + + /* + * Function: ccb_memory_allocate + * Allocate some memory + */ +void* ccb_memory_allocate(size_t bytes); + +int ccb_strcasecmp(const char* s1, const char* s2); +int ccb_strncasecmp(const char* s1, const char* s2, size_t n); + +/* + * Type: ccb_lexer_token_type_t + * Type to describe a tokens type. + * + * Remarks: + * Implemented as a typedef of an enumeration, ccb_lexer_token_t + * is used to describe the current lexer token. The following + * tokens exist (as constants). + * + * Tokens: + * CCB_LEXER_TOKEN_IDENTIFIER - Identifier + * CCB_LEXER_TOKEN_PUNCT - Language punctuation + * CCB_LEXER_TOKEN_CHAR - Character literal + * CCB_LEXER_TOKEN_STRING - String literal + * CCB_LEXER_TOKEN_NUMBER - Number (of any type) + * CCB_LEXER_TOKEN_EQUAL - Equal + * CCB_LEXER_TOKEN_LEQUAL - Lesser-or-equal + * CCB_LEXER_TOKEN_GEQUAL - Greater-or-equal + * CCB_LEXER_TOKEN_NEQUAL - Not-equal + * CCB_LEXER_TOKEN_INCREMENT - Pre/post increment + * CCB_LEXER_TOKEN_DECREMENT - Pre/post decrement + * CCB_LEXER_TOKEN_ARROW - Pointer arrow `->` + * CCB_LEXER_TOKEN_LSHIFT - Left shift + * CCB_LEXER_TOKEN_RSHIFT - Right shift + * CCB_LEXER_TOKEN_COMPOUND_ADD - Compound-assignment addition + * CCB_LEXER_TOKEN_COMPOUND_SUB - Compound-assignment subtraction + * CCB_LEXER_TOKEN_COMPOUND_MUL - Compound-assignment multiplication + * CCB_LEXER_TOKEN_COMPOUND_DIV - Compound-assignment division + * CCB_LEXER_TOKEN_COMPOUND_MOD - Compound-assignment moduluas + * CCB_LEXER_TOKEN_COMPOUND_OR - Compound-assignment bit-or + * CCB_LEXER_TOKEN_COMPOUND_XOR - Compound-assignment bit-xor + * CCB_LEXER_TOKEN_COMPOUND_LSHIFT - Compound-assignment left-shift + * LEXER_TOKEN_COMPOUND_RSHIFt - Compound-assignment right-shift + * CCB_LEXER_TOKEN_AND - Logical and + * CCB_LEXER_TOKEN_OR - Logical or + */ +enum ccb_lexer_token_enum { + CCB_LEXER_TOKEN_IDENTIFIER, + CCB_LEXER_TOKEN_PUNCT, + CCB_LEXER_TOKEN_CHAR, + CCB_LEXER_TOKEN_STRING, + CCB_LEXER_TOKEN_NUMBER, + CCB_LEXER_TOKEN_EQUAL = 0x200, + CCB_LEXER_TOKEN_LEQUAL, + CCB_LEXER_TOKEN_GEQUAL, + CCB_LEXER_TOKEN_NEQUAL, + CCB_LEXER_TOKEN_INCREMENT, + CCB_LEXER_TOKEN_DECREMENT, + CCB_LEXER_TOKEN_ARROW, + CCB_LEXER_TOKEN_LSHIFT, + CCB_LEXER_TOKEN_RSHIFT, + CCB_LEXER_TOKEN_COMPOUND_ADD, + CCB_LEXER_TOKEN_COMPOUND_SUB, + CCB_LEXER_TOKEN_COMPOUND_MUL, + CCB_LEXER_TOKEN_COMPOUND_DIV, + CCB_LEXER_TOKEN_COMPOUND_MOD, + CCB_LEXER_TOKEN_COMPOUND_AND, + CCB_LEXER_TOKEN_COMPOUND_OR, + CCB_LEXER_TOKEN_COMPOUND_XOR, + CCB_LEXER_TOKEN_COMPOUND_LSHIFT, + CCB_LEXER_TOKEN_COMPOUND_RSHIFT, + CCB_LEXER_TOKEN_AND, + CCB_LEXER_TOKEN_OR +}; +typedef enum ccb_lexer_token_enum ccb_lexer_token_type_t; + +/* + * Class: ccb_lexer_token_t + * Describes a token in the token stream + */ +typedef struct { + /* + * Variable: type + * The token type + */ + ccb_lexer_token_type_t type; + + union { + long integer; + int punct; + char* string; + char character; + }; +} ccb_lexer_token_t; + +/* + * Function: ccb_lexer_islong + * Checks for a given string if it's a long-integer-literal. + * + * Parameters: + * string - The string to check + * + * Remarks: + * Returns `true` if the string is a long-literal, + * `false` otherwise. + */ +bool ccb_lexer_islong(ccb_t* ccb, char* string); + +/* + * Function: ccb_lexer_isint + * Checks for a given string if it's a int-integer-literal. + * + * Parameters: + * string - The string to check + * + * Remarks: + * Returns `true` if the string is a int-literal, + * `false` otherwise. + */ +bool ccb_lexer_isint(ccb_t* ccb, char* string); + +/* + * Function: ccb_lexer_isfloat + * Checks for a given string if it's a floating-point-literal. + * + * Parameters: + * string - The string to check + * + * Remarks: + * Returns `true` if the string is floating-point-literal, + * `false` otherwise. + */ +bool ccb_lexer_isfloat(ccb_t* ccb, char* string); + +/* + * Function: ccb_lexer_ispunct + * Checks if a given token is language punctuation and matches. + * + * Parameters: + * token - The token to test + * c - The punction to test if matches + * + * Remarks: + * Returns `true` if the given token is language punctuation and + * matches *c*. + */ +bool ccb_lexer_ispunct(ccb_t* ccb, ccb_lexer_token_t* token, int c); + +/* + * Function: ccb_lexer_unget + * Undo the given token in the token stream. + * + * Parameters: + * token - The token to unget + */ +void ccb_lexer_unget(ccb_t* ccb, ccb_lexer_token_t* token); + +/* + * Function: ccb_lexer_next + * Get the next token in the token stream. + * + * Returns: + * The next token in the token stream or NULL + * on failure or EOF. + */ +ccb_lexer_token_t* ccb_lexer_next(ccb_t* ccb); + +/* + * Function: ccb_lexer_peek + * Look at the next token without advancing the stream. + * + * Returns: + * The next token without advancing the token stream or NULL on failure + * or EOF. + * + * Remarks: + * The function will peek ahead to see the next token in the stream + * without advancing the lexer state. + */ +ccb_lexer_token_t* ccb_lexer_peek(ccb_t* ccb); + +/* + * Function: ccb_lexer_tokenstr + * Convert a token to a human-readable representation + * + * Parameters: + * token - The token to convert + * + * Returns: + * A string representation of the token or NULL on failure. + */ +char* ccb_lexer_tokenstr(ccb_t* ccb, ccb_lexer_token_t* token); + +typedef struct ccb_ast_s ccb_ast_t; + +/* + * Type: ccb_ast_type_t + * The type of ast node + * + * Constants: + * + * CCB_AST_TYPE_LITERAL - Literal + * CCB_AST_TYPE_STRING - String literal + * CCB_AST_TYPE_VAR_LOCAL - Local variable + * CCB_AST_TYPE_VAR_GLOBAL - Global variable + * CCB_AST_TYPE_CALL - Direct function call + * CCB_AST_TYPE_PTRCALL - Indirect function call + * CCB_AST_TYPE_FUNCTION - Function + * CCB_AST_TYPE_PROTOTYPE - Prototype + * CCB_AST_TYPE_DECLARATION - Declaration + * CCB_AST_TYPE_INITIALIZER - Initializer + * CCB_AST_TYPE_STRUCT - Structure + * CCB_AST_TYPE_ADDRESS - Address of operation + * CCB_AST_TYPE_DEREFERENCE - Pointer dereference + * CCB_AST_TYPE_EXPRESSION_TERNARY - Ternary expression + * CCB_AST_TYPE_EXPRESSION_CAST - Type cast expression + * CCB_AST_TYPE_STATEMENT_IF - If statement + * CCB_AST_TYPE_STATEMENT_FOR - For statement + * CCB_AST_TYPE_STATEMENT_WHILE - While statement + * CCB_AST_TYPE_STATEMENT_DO - Do statement + * CCB_AST_TYPE_STATEMENT_SWITCH - Switch statement + * CCB_AST_TYPE_STATEMENT_CASE - Switch statement case + * CCB_AST_TYPE_STATEMENT_DEFAULT - Switch statement default case + * CCB_AST_TYPE_STATEMENT_RETURN - Return statement + * CCB_AST_TYPE_STATEMENT_BREAK - Break statement + * CCB_AST_TYPE_STATEMENT_CONTINUE - Continue statement + * CCB_AST_TYPE_STATEMENT_COMPOUND - Compound statement + * CCB_AST_TYPE_STATEMENT_GOTO - Goto statement + * CCB_AST_TYPE_STATEMENT_LABEL - Goto statement label + * CCB_AST_TYPE_POST_INCREMENT - Post increment operation + * CCB_AST_TYPE_POST_DECREMENT - Post decrement operation + * CCB_AST_TYPE_PRE_INCREMENT - Pre increment operation + * CCB_AST_TYPE_PRE_DECREMENT - Pre decrement operation + * CCB_AST_TYPE_LSHIFT - Left shift operation + * CCB_AST_TYPE_RSHIFT - Right shift operation + * CCB_AST_TYPE_EQUAL - Equality condition + * CCB_AST_TYPE_GEQUAL - Greater-or-equal condition + * CCB_AST_TYPE_LEQUAL - Less-or-equal condition + * CCB_AST_TYPE_NEQUAL - Not-equal condition + * CCB_AST_TYPE_AND - Logical-and operation + * CCB_AST_TYPE_OR - Logical-or operation + */ +typedef enum { + CCB_AST_TYPE_LITERAL = 0x100, + CCB_AST_TYPE_STRING, + CCB_AST_TYPE_VAR_LOCAL, + CCB_AST_TYPE_VAR_GLOBAL, + CCB_AST_TYPE_CALL, + CCB_AST_TYPE_PTRCALL, + CCB_AST_TYPE_FUNCTION, + CCB_AST_TYPE_PROTOTYPE, + CCB_AST_TYPE_DECLARATION, + CCB_AST_TYPE_INITIALIZER, + CCB_AST_TYPE_STRUCT, + CCB_AST_TYPE_ADDRESS, + CCB_AST_TYPE_DEREFERENCE, + CCB_AST_TYPE_EXPRESSION_TERNARY, + CCB_AST_TYPE_EXPRESSION_CAST, + CCB_AST_TYPE_EXPRESSION_COMMA, + CCB_AST_TYPE_STATEMENT_IF, + CCB_AST_TYPE_STATEMENT_FOR, + CCB_AST_TYPE_STATEMENT_WHILE, + CCB_AST_TYPE_STATEMENT_DO, + CCB_AST_TYPE_STATEMENT_SWITCH, + CCB_AST_TYPE_STATEMENT_CASE, + CCB_AST_TYPE_STATEMENT_DEFAULT, + CCB_AST_TYPE_STATEMENT_RETURN, + CCB_AST_TYPE_STATEMENT_BREAK, + CCB_AST_TYPE_STATEMENT_CONTINUE, + CCB_AST_TYPE_STATEMENT_COMPOUND, + CCB_AST_TYPE_STATEMENT_GOTO, + CCB_AST_TYPE_STATEMENT_LABEL, + CCB_AST_TYPE_STATEMENT_ASM, + CCB_AST_TYPE_POST_INCREMENT, + CCB_AST_TYPE_POST_DECREMENT, + CCB_AST_TYPE_PRE_INCREMENT, + CCB_AST_TYPE_PRE_DECREMENT, + CCB_AST_TYPE_LSHIFT, + CCB_AST_TYPE_RSHIFT, + CCB_AST_TYPE_EQUAL, + CCB_AST_TYPE_GEQUAL, + CCB_AST_TYPE_LEQUAL, + CCB_AST_TYPE_NEQUAL, + CCB_AST_TYPE_AND, + CCB_AST_TYPE_OR +} ccb_ast_type_t; + +/* + * Type: ccb_type_t + * Type describing the ast type. + * + * Constants: + * + * CCB_TYPE_VOID - void + * CCB_TYPE_CHAR - char + * CCB_TYPE_SHORT - short + * CCB_TYPE_INT - int + * CCB_TYPE_LONG - long + * CCB_TYPE_LLONG - long long + * CCB_TYPE_DOUBLE - double + * CCB_TYPE_LDOUBLE - long double + * CCB_TYPE_ARRAY - array (also contains a ccb_type_t for base type) + * CCB_TYPE_POINTER - pointer (also contains a ccb_type_t for base type) + * CCB_TYPE_STRUCTURE - structure (user defined) + * CCB_TYPE_FUNCTION - function (user defined) + * TYPE_CECL - used by the parser for dealing with declarations + */ +typedef enum { + CCB_TYPE_VOID, + CCB_TYPE_CHAR, + CCB_TYPE_SHORT, + CCB_TYPE_INT, + CCB_TYPE_LONG, + CCB_TYPE_LLONG, + CCB_TYPE_FLOAT, + CCB_TYPE_DOUBLE, + CCB_TYPE_LDOUBLE, + CCB_TYPE_ARRAY, + CCB_TYPE_POINTER, + CCB_TYPE_STRUCTURE, + CCB_TYPE_FUNCTION, + CCB_TYPE_CDECL +#ifdef CCB_X_OBJC + , CCB_TYPE_ID +#endif +} ccb_type_t; + +/* + * Type: ccb_ast_data_type_t + * Type describing the indice into `ast_data_table` + * + * Constants: + * + * CCB_AST_DATA_VOID - void + * CCB_AST_DATA_LONG - long + * CCB_AST_DATA_LLONG - long long + * CCB_AST_DATA_INT - int + * CCB_AST_DATA_SHORT - short + * CCB_AST_DATA_CHAR - char + * CCB_AST_DATA_FLOAT - float + * CCB_AST_DATA_DOUBLE - double + * CCB_AST_DATA_LDOUBLE - long double + * CCB_AST_DATA_UINT - unsigned int + * CCB_AST_DATA_ULONG - unsigned long + * CCB_AST_DATA_ULLONG - unsigned long long + * CCB_AST_DATA_FUNCTION - function (current) + */ +typedef enum { + CCB_AST_DATA_VOID, + CCB_AST_DATA_LONG, + CCB_AST_DATA_LLONG, + CCB_AST_DATA_INT, + CCB_AST_DATA_SHORT, + CCB_AST_DATA_CHAR, + CCB_AST_DATA_FLOAT, + CCB_AST_DATA_DOUBLE, + CCB_AST_DATA_LDOUBLE, + CCB_AST_DATA_UINT, + CCB_AST_DATA_ULONG, + CCB_AST_DATA_ULLONG, + CCB_AST_DATA_FUNCTION, +#ifdef CCB_X_OBJC + CCB_AST_DATA_ID, +#endif + CCB_AST_DATA_COUNT +} ccb_ast_data_type_t; + +/* + * Type: ccb_cdecl_t + * Describes type of declarations + * + * Constants: + * + * CCB_CDECL_BODY - function body + * CCB_CDECL_PARAMETER - parameters (with name) + * CCB_CDECL_OBJCPARAMETER - objc parameter (type in brackets then name) + * CCB_CDECL_TYPEONLY - parameters (without name) + * CCB_CDECL_CAST - cast + */ +typedef enum { + CCB_CDECL_BODY = 1, + CCB_CDECL_PARAMETER, +#ifdef CCB_X_OBJC + CCB_CDECL_OBJCPARAMETER, +#endif + CCB_CDECL_TYPEONLY, + CCB_CDECL_CAST +} ccb_cdecl_t; + +/* + * Type: ccb_storage_t + * Describes the storage class for a given variable + * + * Constants: + * + * CCB_STORAGE_TYPEDEF - typedef to another type + * CCB_STORAGE_EXTERN - external linkage + * CCB_STORAGE_STATIC - static storage + * CCB_STORAGE_AUTO - automatic storage (implicit) + * CCB_STORAGE_REGISTER - make use of register for storage + */ +typedef enum { + CCB_STORAGE_TYPEDEF = 1, + CCB_STORAGE_EXTERN, + CCB_STORAGE_STATIC, + CCB_STORAGE_AUTO, + CCB_STORAGE_REGISTER +} ccb_storage_t; + +/* + * Struct: ccb_data_type_t + * A structure that describes a data type. + */ +typedef struct ccb_data_type_s ccb_data_type_t; +struct ccb_data_type_s { + /* + * Variable: type + * The type of the data type. + * + * See Constants for a list of + * valid constant values. + */ + ccb_type_t type; + + /* + * Variable: type + * The size of the given data data + */ + int size; + + /* + * Variable: sign + * Describes if the type is signed or unsigned. + * + * Contains `true` when signed, otherwise `false. + */ + bool sign; + + /* + * Variable: isstatic + * True if when static (global only) + */ + bool isstatic; + + /* + * Variable: length + * Instances of the data type. + * + * When used as a base-type, i.e not an array; this will be + * 1, otherwise it will be the length of the array, or -1 + * if the size of the array is unknown. + */ + int length; + + /* + * Variable: pointer + * Pointer to pointer type if pointer + * + * When the variable is a pointer type, this will point to another + * data type that describes the base type of the pointer, NULL other- + * wise. + */ + ccb_data_type_t* pointer; + + /* structure */ + struct { + /* Variable: classname + * Name of the declared class, or null (NOTE: This may be dynamically resolved). + */ + const char* classname; + + /* Variable: supername + * Name of the declared superclass (NOTE: This may be dynamically resolved). + */ + const char* supername; + + /* Variable: supertype + * Pointer to the superclass type, if defined. + */ + //ccb_data_type_t* supertype; + + /* Variable: imethods + * Table of instance methods, if defined. + */ + ccb_table_t* imethods; + + /* Variable: cmethods + * Table of class methods, if defined. + */ + ccb_table_t* cmethods; + + /* + * Variable: fields + * Pointer to a table of fields (if structure) + */ + ccb_table_t* fields; + + /* + * Variable: offset + * Offset of the given field in a structure (if a structure base type) + */ + int offset; + + /* + * Variable: isstruct + * If we're dealing with a structure this will be true, false + * otherwise. + */ + bool isstruct; + }; + + /* function */ + struct { + /* + * Variable: returntype + * Pointer to a data type which describes the return type + * of the function (if a function) + */ + ccb_data_type_t* returntype; + + /* + * Variable: parameters + * Pointer to a list of parameters for a function. + */ + ccb_list_t* parameters; + + /* + * Variable: hasdots + * Describes if the given function is variable-argument. + * + * Contains the value `true` when the function has + * three dots `...` in it's prototype, otherwise `false`. + */ + bool hasdots; + + int callconv; + bool isnaked; + }; +}; + +/* + * Struct: ccb_ast_string_t + * The *CCB_AST_TYPE_STRING* ast node. + */ +typedef struct { + /* + * Variable: data + * String contents + */ + char* data; + + /* + * Variable: label + * Name of the label associated with the string. + */ + char* label; +} ccb_ast_string_t; + +/* + * Struct: ccb_ast_variable_t + * The *CCB_AST_TYPE_VAR_LOCAL* and *CCB_AST_TYPE_VAR_GLOBAL* ast node. + */ +typedef struct { + /* + * Variable: name + * Name of the variable + */ + char* name; + + /* + * Variable: off + * Offset of the variable on the stack. + */ + int off; + + /* + * Variable: label + * Name of the label associated with the variable. + */ + char* label; + + /* + * Variable: init + * Compound literal list for initialization + */ + ccb_list_t* init; + + + /* + * Variable: isclassobj + * This is set to true for global variables representing class objects, so that initialisation can be done at send-time. + */ + bool isclassobj; +} ccb_ast_variable_t; + +/* + * Struct ccb_ast_function_call_t + * Function call + * + * Remarks: + * Not associated with any node. Instead describes the + * data associated with a function call for *ccb_ast_function_t* + */ +typedef struct { + /* + * Variable: args + * Pointer to a list of arguments for a function call + */ + ccb_list_t* args; + + /* + * Variable: paramtypes + * Pointer to a list of parameter types for the function call. + */ + ccb_list_t* paramtypes; + + /* + * Variable: callable + * Expression resulting in callable value (function pointer) for a PTRCALL. + */ + ccb_ast_t* callable; +} ccb_ast_function_call_t; + +/* + * Struct: ccb_ast_function_t + * The *CCB_AST_TYPE_FUNCTION* ast node. + */ +typedef struct { + /* + * Variable: name + * The function name + */ + char* name; + + /* + * Variable: callconv + * Records any non-default calling convention for a call + */ + int callconv; + + /* + * Variable: isnaked + * Set to true if the function prologue is not to be emitted (only used for + * functions implemented primarily in assembly language). + */ + //bool isnaked; + + /* + * Variable: call + * Data associated with a function call. + */ + ccb_ast_function_call_t call; + + /* + * Variable: params + * Pointer to a list of parameters. + */ + ccb_list_t* params; + + /* + * Variable: locals + * Pointer to a list of locals. + */ + ccb_list_t* locals; + + /* + * Variable: body + * Pointer to an ast node which describes the body. + * + * Remarks: + * A body is usually composed of a serise of ast nodes, + * typically a compound expression, but could also contain + * nested compound expressions. Think of this as a pointer + * to the head of the beginning of a serise of basic-blocks + * which are the forming of the function body. + */ + ccb_ast_t* body; +} ccb_ast_function_t; + +/* + * Struct: ccb_ast_unary_t + * Represents a unary operation in the AST tree + */ +typedef struct { + /* + * Variable: operand + * Pointer to the operand the unary operation is to + * be performed on. + */ + ccb_ast_t* operand; +} ccb_ast_unary_t; + +/* + * Struct: ccb_ast_decl_t + * Represents a declaration in the AST tree + */ +typedef struct { + /* + * Variable: var + * Pointer to the variable node associated with the + * declaration. + */ + ccb_ast_t* var; + + /* + * Variable: init + * When the declaration includes an initialization this points + * to a initlization list. + */ + ccb_list_t* init; +} ccb_ast_decl_t; + +/* + * Struct: ccb_ast_ifthen_t + * Represents a if-then node in the AST tree. + * + * Remarks: + * Describes a two-branch gaurded by conditional test node in the AST + * tree for implementing ternary expressions and if statements. + */ +typedef struct { + /* + * Variable: cond + * The condition node + */ + ccb_ast_t* cond; + + /* + * Variable: then + * Basic block for truth path in branch + */ + ccb_ast_t* then; + + /* + * Variable: last + * Basic block for false path in branch + */ + ccb_ast_t* last; +} ccb_ast_ifthen_t; + +/* + * Struct: ccb_ast_for_t + * Represents a for-loop node in the AST tree. + * + * Remarks: + * Standard for loop with precondition / initilization expression, + * conditionally testsed, and post step / expression, ergo + * for(init; cond; step) body; + */ +typedef struct { + /* Variable: init */ + ccb_ast_t* init; + /* Variable: cond */ + ccb_ast_t* cond; + /* Variable: step */ + ccb_ast_t* step; + /* Variable: body */ + ccb_ast_t* body; +} ccb_ast_for_t; + + +/* + * Struct: ccb_ast_init_t + * Represents an initializer in the AST tree. + * + * Remarks: + * Represents array initializer lists, as well as aggregate initializer + * lists for structure, enum and union. Also represents a designated + * initializer for a structure. + */ +typedef struct { + /* Variable: value */ + ccb_ast_t* value; + + /* Variable: offset */ + int offset; + + /* Variable: type */ + ccb_data_type_t* type; +} ccb_ast_init_t; + +/* + * Struct: ccb_ast_switch_t + * Represents a switch statement in the AST tree. + */ +typedef struct { + /* Variable: expr */ + ccb_ast_t* expr; + /* Variable: body */ + ccb_ast_t* body; +} ccb_ast_switch_t; + +/* + * Struct: ccb_ast_goto_t + * Represents a goto statement (or label) in the AST tree. + */ +typedef struct { + /* + * Variable: label + * When not used as a goto statement, describes the name of a label + * that may be 'gone to' with 'goto' + */ + char* label; + + /* + * Variable: where + * Where to go (label wise) for a goto statement. + */ + char* where; +} ccb_ast_goto_t; + +/* + * Struct: ccb_ast_asm_t + * Represents an asm statement in the AST tree. + */ +typedef struct { + /* + * Variable: code + * Assembler code to output. + */ + ccb_list_t* code; +} ccb_ast_asm_t; + +/* + * Struct: ccb_ast_t + * The monolthic ast tree and node at the same time. + * + * Remarks: + * The ast tree is just a doubly-linked list of ast nodes which are + * capable of being all the possible ast nodes at once. This is + * acomplished with a rather large union of all ast nodes. The only + * thing that declares what a node actually is, is the nodes type + * member. This is beneficial to keeping the complexity of the AST + * tree down, while also keeping memory footprint low. One more + * interesting aspect of this is the ability to have the AST tree + * nodes (next, prev), which make up the doubly-linked list part + * of the same union, giving us a free way to terminate the tree + * without using additional space to determine it. + */ +struct ccb_ast_s { + int type; + ccb_data_type_t* ctype; + int return_callconv; // A bit of a hack, used for checking the calling convention for a return statement + ccb_pos_t pos; + + union { + int casevalue; + long long integer; + char character; + ccb_ast_string_t string; + ccb_ast_variable_t variable; + ccb_ast_function_t function; + ccb_ast_unary_t unary; + ccb_ast_decl_t decl; + ccb_ast_ifthen_t ifstmt; + ccb_ast_for_t forstmt; + ccb_ast_switch_t switchstmt; + ccb_ast_t* returnstmt; + ccb_list_t* compound; + ccb_ast_init_t init; + ccb_ast_goto_t gotostmt; + ccb_ast_asm_t asmstmt; + + struct { + ccb_ast_t* left; + ccb_ast_t* right; + }; + + struct { + ccb_ast_t* structure; + char* field; + ccb_data_type_t* fieldtype; + }; + + struct { + double value; + char* label; + } floating; + + }; +}; + +extern ccb_data_type_t* ccb_ast_data_table[CCB_AST_DATA_COUNT]; + +extern ccb_list_t* ccb_ast_floats; +extern ccb_list_t* ccb_ast_strings; +extern ccb_list_t* ccb_ast_locals; +extern ccb_list_t* ccb_ast_gotos; +extern ccb_table_t* ccb_ast_globalenv; +extern ccb_table_t* ccb_ast_localenv; +extern ccb_table_t* ccb_ast_structures; +extern ccb_table_t* ccb_ast_unions; +extern ccb_table_t* ccb_ast_labels; + +/* + * Function: ccb_util_init + * Initialises any structures required for lists or other utilities. + */ +void ccb_util_init(ccb_t* ccb); + +/* + * Function: ccb_ast_init + * Initialises the AST's internal structures according to arch_xxx results. + */ +void ccb_ast_init(ccb_t* ccb); + +/* + * Function: ccb_ast_structure_reference + * Creates an structure reference of a given type for a given field + * + * Parameters: + * type - The type of the field for reference + * structure - The structure that contains said field to be referenced + * name - The name of the field in that structure to reference + * + * Returns: + * An ast node referencing that field in that paticular structure on + * success, otherwise NULL. + */ +ccb_ast_t* ccb_ast_structure_reference(ccb_t* ccb, ccb_data_type_t* type, ccb_ast_t* structure, char* name); + +/* + * Function: ccb_ast_structure_field + * Copies a given field data type and changes it offset + * + * Parameters: + * type - Pointer to the structure field data type + * offset - The offset of the copied data type in the structure + * + * Returns: + * A copy of the structure fields data type with the supplied + * offset on success, NULL otherwise. + */ +ccb_data_type_t* ccb_ast_structure_field(ccb_t* ccb, ccb_data_type_t* type, int offset); + +/* + * Function: ccb_ast_structure_new + * Creates a structure data type + * + * Parameters; + * field - A table of ccb_data_type_t fields for the structure + * size - The size of the structure + * isstruct - true if structure, false if structure-like + * + * Returns: + * A new structure data type with the specified fields and size on + * success, NULL otherwise. + */ +ccb_data_type_t* ccb_ast_structure_new(ccb_t* ccb, ccb_table_t* fields, int size, bool isstruct); + + +ccb_ast_t* ccb_ast_new_unary(ccb_t* ccb, int type, ccb_data_type_t* data, ccb_ast_t* operand); +ccb_ast_t* ccb_ast_new_binary(ccb_t* ccb, int type, ccb_ast_t* left, ccb_ast_t* right); +ccb_ast_t* ccb_ast_new_comma(ccb_t* ccb, int type, ccb_ast_t* left, ccb_ast_t* right); +ccb_ast_t* ccb_ast_new_integer(ccb_t* ccb, ccb_data_type_t* type, long long int value); +ccb_ast_t* ccb_ast_new_floating(ccb_t* ccb, ccb_data_type_t*, double value); +ccb_ast_t* ccb_ast_new_char(ccb_t* ccb, char value); +ccb_ast_t* ccb_ast_new_string(ccb_t* ccb, char* value); +ccb_ast_t* ccb_ast_new_label(ccb_t* ccb, char*); + +char* ccb_ast_label(ccb_t* ccb); + +ccb_ast_t* ccb_ast_declaration(ccb_t* ccb, ccb_ast_t* var, ccb_list_t* init); +ccb_ast_t* ccb_ast_variable_local(ccb_t* ccb, ccb_data_type_t* type, char* name); +ccb_ast_t* ccb_ast_variable_global(ccb_t* ccb, ccb_data_type_t* type, char* name); +ccb_ast_t* ccb_ast_call(ccb_t* ccb, ccb_data_type_t* type, char* name, ccb_list_t* args, ccb_list_t* paramtypes, int callconv); +ccb_ast_t* ccb_ast_ptrcall(ccb_t* ccb, ccb_data_type_t* type, char* name, ccb_ast_t* callable, ccb_list_t* args, ccb_list_t* paramtypes, int callconv); +ccb_ast_t* ccb_ast_function(ccb_t* ccb, ccb_data_type_t* type, char* name, ccb_list_t* params, ccb_ast_t* body, ccb_list_t* locals); +ccb_ast_t* ccb_ast_initializer(ccb_t* ccb, ccb_ast_t*, ccb_data_type_t*, int); +ccb_ast_t* ccb_ast_if(ccb_t* ccb, ccb_ast_t* cond, ccb_ast_t* then, ccb_ast_t* last); +ccb_ast_t* ccb_ast_for(ccb_t* ccb, ccb_ast_t* init, ccb_ast_t* cond, ccb_ast_t* step, ccb_ast_t* body); +ccb_ast_t* ccb_ast_while(ccb_t* ccb, ccb_ast_t* cond, ccb_ast_t* body); +ccb_ast_t* ccb_ast_do(ccb_t* ccb, ccb_ast_t* cond, ccb_ast_t* body); +ccb_ast_t* ccb_ast_return(ccb_t* ccb, int callconv, ccb_data_type_t* returntype, ccb_ast_t* val); +ccb_ast_t* ccb_ast_compound(ccb_t* ccb, ccb_list_t* statements); +ccb_ast_t* ccb_ast_ternary(ccb_t* ccb, ccb_data_type_t* type, ccb_ast_t* cond, ccb_ast_t* then, ccb_ast_t* last); +ccb_ast_t* ccb_ast_switch(ccb_t* ccb, ccb_ast_t* expr, ccb_ast_t* body); +ccb_ast_t* ccb_ast_case(ccb_t* ccb, int value); +ccb_ast_t* ccb_ast_goto(ccb_t* ccb, char*); +ccb_ast_t* ccb_ast_asm(ccb_t* ccb, ccb_list_t*); +ccb_ast_t* ccb_ast_make(ccb_t* ccb, int type); + +ccb_data_type_t* ccb_ast_prototype(ccb_t* ccb, ccb_data_type_t* returntype, ccb_list_t* paramtypes, bool dots); +ccb_data_type_t* ccb_ast_pointer(ccb_t* ccb, ccb_data_type_t* type); +ccb_data_type_t* ccb_ast_array(ccb_t* ccb, ccb_data_type_t* type, int size); +ccb_data_type_t* ccb_ast_array_convert(ccb_t* ccb, ccb_data_type_t* ast); +ccb_data_type_t* ccb_ast_result_type(ccb_t* ccb, int op, ccb_data_type_t* a, ccb_data_type_t* b); + +const char* ccb_ast_type_string(ccb_t* ccb, ccb_data_type_t* type); +bool ccb_ast_type_integer(ccb_t* ccb, ccb_data_type_t* type); +bool ccb_ast_type_floating(ccb_t* ccb, ccb_data_type_t* type); +ccb_data_type_t* ccb_ast_type_copy(ccb_t* ccb, ccb_data_type_t* type); +ccb_data_type_t* ccb_ast_type_copy_incomplete(ccb_t* ccb, ccb_data_type_t* type); +ccb_data_type_t* ccb_ast_type_create(ccb_t* ccb, ccb_type_t type, bool sign); +ccb_data_type_t* ccb_ast_type_stub(ccb_t* ccb); + + +char* ccb_ast_string(ccb_t* ccb, ccb_ast_t* ast); + +/* NOTE: The ccb_target interface will probably be reworked eventually to allow multiple targets + * to be managed easier. This will probably involve simplifying or generalising some features + * and having the code generators operate via callback functions. Right now all targets are + * implemented in the same backend, which has various options. + */ + +void ccb_target_init(ccb_t* ccb); + +// Mostly/fully implemented but only partly tested, 64-bit mode only +#define CCB_ARCH_FAMILY_X86 1 +// Not implemented yet +#define CCB_ARCH_FAMILY_ARM 2 +// Partly implemented +#define CCB_ARCH_FAMILY_RISCV 3 +// Mostly implemented but mostly untested, just generates a virtual instruction set +#define CCB_ARCH_FAMILY_GENERIC 4 +// Partly implemented, aimed at working with an earlier version of my CPU core +#define CCB_ARCH_FAMILY_GEN1 5 +int ccb_target_family(ccb_t* ccb); + +size_t ccb_target_type_size_char(ccb_t* ccb); +size_t ccb_target_type_size_short(ccb_t* ccb); +size_t ccb_target_type_size_int(ccb_t* ccb); +size_t ccb_target_type_size_long(ccb_t* ccb); +size_t ccb_target_type_size_llong(ccb_t* ccb); +size_t ccb_target_type_size_float(ccb_t* ccb); +size_t ccb_target_type_size_double(ccb_t* ccb); +size_t ccb_target_type_size_ldouble(ccb_t* ccb); +size_t ccb_target_type_size_pointer(ccb_t* ccb); + +/* + * Function: ccb_target_alignment() + * The default alignment of structure elements (padding) for the given + * architecture / ABI + */ +size_t ccb_target_alignment(ccb_t* ccb); + +/* + * Function: ccb_target_callregisters() + * The maximum number of registers to place a direct or indirect + * function call for the given architecture / ABI, after which stack + * space will be used. + */ +int ccb_target_callregisters(ccb_t* ccb); + +/* + * Function: ccb_target_callregister() + * The name of the register used to store the direct argument at the given index (which must be >= 0 and < ccb_target_callregisters()). + */ +const char* ccb_target_callregister(ccb_t* ccb, int idx); + +const char* ccb_target_r0(ccb_t* ccb); + +const char* ccb_target_r1(ccb_t* ccb); + +const char* ccb_target_sp(ccb_t* ccb); + +const char* ccb_target_bp(ccb_t* ccb); + +#define CCB_TARGET_ASMFMT_GAS 1 +#define CCB_TARGET_ASMFMT_FASM 2 +#define CCB_TARGET_ASMFMT_NASM 3 +#define CCB_TARGET_ASMFMT_RAW 4 + +int ccb_target_asmfmt(ccb_t* ccb); + +#define CCB_TARGET_BINFMT_ELF 1 +#define CCB_TARGET_BINFMT_FLAT 2 + +int ccb_target_binfmt(ccb_t* ccb); + +#define CCB_TARGET_CALLCONV_STANDARD 1 +#define CCB_TARGET_CALLCONV_WINDOWS 2 + +int ccb_target_callconv(ccb_t* ccb); + +/* + * Function: ccb_compile_error + * Write compiler error diagnostic to stderr, formatted + * + * Parameters: + * fmt - Standard format specification string + * ... - Additional variable arguments + * + * Remarks: + * This function does not return, it kills execution via + * exit(1); + */ +void ccb_compile_error_impl(ccb_t* ccb);//, const char* fmt, ...); +#define ccb_compile_error(ccb,...) \ + do{fprintf(stderr, "ERROR: "); fprintf(stderr,__VA_ARGS__);fprintf(stderr,"\n");ccb_compile_error_impl(ccb);}while(0); + +/* + * Function: ccb_compile_warn + * Same as ccb_compile_error but doesn't exit + */ +void ccb_compile_warn_impl(ccb_t* ccb);//, const char* fmt, ...); +#define ccb_compile_warn(ccb,...) \ + do{fprintf(stderr, "WARNING: "); fprintf(stderr,__VA_ARGS__);fprintf(stderr,"\n");ccb_compile_warn_impl(ccb);}while(0); + + +/* TODO: eliminate */ +ccb_list_t* ccb_parse_run(ccb_t* ccb); +int ccb_parse_evaluate(ccb_t* ccb, ccb_ast_t* ast); +void ccb_target_gen_data_section(ccb_t* ccb); +void ccb_target_gen_function(ccb_t* ccb, ccb_ast_t* function); + +#ifdef CCB_IMPLEMENTATION + +#include +#include +#include +#include +#include +#include +#include + +#define CCB_MEMORY (1024ULL*1024*1024*2) +/*0x800000*/ + +static unsigned char* ccb_memory_pool = NULL; +static size_t ccb_memory_next; + +static void ccb_memory_cleanup(void) { + free(ccb_memory_pool); +} + +void* ccb_memory_allocate(size_t bytes) { + void* value; + //fprintf(stderr, "ccb_memory_allocate: %ld\n", bytes); + + while ((bytes % 16) != 0) { + bytes++; + } + + if (!ccb_memory_pool) { + //fprintf(stderr, "ccb_memory_allocate: B %ld\n", CCB_MEMORY); + ccb_memory_pool = calloc(CCB_MEMORY,1); + ccb_memory_next = 0; + //fprintf(stderr, "ccb_memory_create: C %lx\n", ccb_memory_pool); + //TODO: atexit(ccb_memory_cleanup); + } + + if (ccb_memory_next + bytes >= CCB_MEMORY) { + fprintf(stderr, "FATAL: Out of memory, need to increase compiler reserved memory size!\n"); + ((char*)NULL)[0] = 0; // Trigger debugger + exit(-1); + } + + /* TODO: This doesn't self-compile properly yet (it's accepted but results in zero/error) + value = &(ccb_memory_pool[ccb_memory_next]); + */ + value = ccb_memory_pool + ccb_memory_next; + ccb_memory_next += bytes; + //fprintf(stderr, "ccb_memory_allocate: %lx\n", value); + + return value; +} + +void ccb_util_init(ccb_t* ccb) { + +} + +struct ccb_string_s { + char* buffer; + int allocated; + int length; +}; + +static void ccb_string_reallocate(ccb_string_t* string) { + int size = string->allocated * 2; + char* buffer = ccb_memory_allocate(size); + + strcpy(buffer, string->buffer); + string->buffer = buffer; + string->allocated = size; +} + +void ccb_string_catcstr(ccb_string_t* string, const char* str) { + size_t i = 0; + while (str[i] != 0) { + ccb_string_cat(string, str[i]); + i++; + } +} + +/* +** LTOA.C +** +** Converts a long integer to a string. +** +** Copyright 1988-90 by Robert B. Stout dba MicroFirm +** +** Released to public domain, 1991 +** +** Parameters: 1 - number to be converted +** 2 - buffer in which to build the converted string +** 3 - number base to use for conversion +** +** Returns: A character pointer to the converted string if +** successful, a NULL pointer if the number base specified +** is out of range. +*/ + +#include +#include + +#define BUFSIZE (sizeof(long) * 8 + 1) + +char *ltoa(long N, char *str, int base) +{ + /* register */ int i = 2; + long uarg; + char *tail; + char *head = str; + char buf[BUFSIZE]; + if (36 < base || 2 > base) + base = 10; /* can only use 0-9, A-Z */ + tail = buf + (BUFSIZE - 1); /* last character position */ + *tail-- = '\0'; + + if (10 == base && N < 0L) + { + *head++ = '-'; + uarg = -N; + } + else uarg = N; + if (uarg) + { + for (i = 1; uarg; ++i) + { + ///*register*/ ldiv_t r; + + int rrem = uarg % base;//ldiv(uarg, base); + *tail-- = (char)(rrem + ((9L < rrem) ? + ('A' - 10L) : '0')); + uarg = uarg / base; + } + } + else *tail-- = '0'; + memcpy(head, ++tail, i); + return str; +} + +void ccb_string_catint(ccb_string_t* string, long long i) { + const char* x = calloc(100, 1); + ccb_string_catcstr(string, ltoa(i, x, 10)); + free(x); +} + +void +#ifdef _ZCC +__classic_call +#endif +ccb_string_catf(ccb_string_t* string, const char* fmt, ...) { + //fprintf(stderr, "Catting f '%s'...\n", fmt); + va_list va; + for (;;) { + int left = string->allocated - string->length; + int write; + + va_start(va, fmt); + write = vsnprintf((string->buffer) + (string->length), left, fmt, va); + va_end(va); + + if (left <= write) { + ccb_string_reallocate(string); + continue; + } + string->length += write; + //fprintf(stderr, "Done catting f...\n"); + return; + } +} + +ccb_string_t* ccb_string_create(void) { + ccb_string_t* string = ccb_memory_allocate(sizeof(ccb_string_t)); + string->buffer = ccb_memory_allocate(8); + string->allocated = 8; + string->length = 0; + string->buffer[0] = '\0'; + return string; +} + +char* ccb_string_buffer(ccb_string_t* string) { + return string->buffer; +} + +void ccb_string_cat(ccb_string_t* string, char ch) { + if (string->allocated == (string->length + 1)) + ccb_string_reallocate(string); + string->buffer[string->length++] = ch; + string->buffer[string->length] = '\0'; +} + +char* ccb_string_quote(char* p) { + //printf("Trying to handle quotes of '%s'\n", p); + ccb_string_t* string = ccb_string_create(); + while (*p) { + if (*p == '\"' || *p == '\\') { + //ccb_string_catf(string, "\\%c", *p); + ccb_string_cat(string, '\\'); + ccb_string_cat(string, *p); + } else if (*p == '\n') { + //ccb_string_catf(string, "\\n"); + ccb_string_catcstr(string, "\\n"); + } else { + ccb_string_cat(string, *p); + } + p++; + } + return string->buffer; +} + +char* ccb_string_quote_fasm(char* p, char quotechar) { + ccb_string_t* string = ccb_string_create(); + while (*p) { + if (*p == '\"' || *p == '\'' || *p == '\\' || *p == '\n') { + //ccb_string_catf(string, "%c, %d, %c", quotechar, (int)*p, quotechar); + ccb_string_cat(string, quotechar); + ccb_string_catcstr(string, ", "); + ccb_string_catint(string, (int)*p); + ccb_string_catcstr(string, ", "); + ccb_string_cat(string, quotechar); + } else { + ccb_string_cat(string, *p); + } + p++; + } + return string->buffer; +} + +struct ccb_list_node_s { + void* element; + ccb_list_node_t* next; + ccb_list_node_t* prev; +}; + +struct ccb_list_iterator_s { + ccb_list_node_t* pointer; +}; + +ccb_list_t* ccb_list_create(void) { + ccb_list_t* list = ccb_memory_allocate(sizeof(ccb_list_t)); + list->length = 0; + list->head = NULL; + list->tail = NULL; + + return list; +} + +void* ccb_list_node_create(void* element) { + ccb_list_node_t* node = ccb_memory_allocate(sizeof(ccb_list_node_t)); + node->element = element; + node->next = NULL; + node->prev = NULL; + return node; +} + +void ccb_list_push(ccb_list_t* list, void* element) { + ccb_list_node_t* node = ccb_list_node_create(element); + if (!list->head) + list->head = node; + else { + list->tail->next = node; + node->prev = list->tail; + } + list->tail = node; + list->length++; +} + +void* ccb_list_pop(ccb_list_t* list) { + if (!list->head) + return NULL; + void* element = list->tail->element; + list->tail = list->tail->prev; + if (list->tail) + list->tail->next = NULL; + else + list->head = NULL; + list->length--; + return element; +} + +void* ccb_list_shift(ccb_list_t* list) { + if (!list->head) + return NULL; + void* element = list->head->element; + list->head = list->head->next; + if (list->head) + list->head->prev = NULL; + else + list->tail = NULL; + list->length--; + return element; +} + +int ccb_list_length(ccb_list_t* list) { + return list->length; +} + +ccb_list_iterator_t* ccb_list_iterator(ccb_list_t* list) { + ccb_list_iterator_t* iter = ccb_memory_allocate(sizeof(ccb_list_iterator_t)); + if (iter == NULL) { + fprintf(stderr, "ICE: %s (out of memory?)", __func__); + exit(-1); + } + if (list == NULL) { // TODO: Added by Zak but not sure if a call with NULL indicates an error? + iter->pointer = NULL; + } else { + iter->pointer = list->head; + } + return iter; +} + +void* ccb_list_iterator_next(ccb_list_iterator_t* iter) { + void* ret; + + if (!iter->pointer) + return NULL; + + ret = iter->pointer->element; + iter->pointer = iter->pointer->next; + + return ret; +} + +bool ccb_list_iterator_end(ccb_list_iterator_t* iter) { + return !iter->pointer; +} + +static void ccb_list_shiftify(ccb_list_t* list, void* element) { + ccb_list_node_t* node = ccb_list_node_create(element); + node->next = list->head; + if (list->head) + list->head->prev = node; + list->head = node; + if (!list->tail) + list->tail = node; + list->length++; +} + +ccb_list_t* ccb_list_reverse(ccb_list_t* list) { + ccb_list_t* ret = ccb_list_create(); + for (ccb_list_iterator_t* it = ccb_list_iterator(list); !ccb_list_iterator_end(it); ) + ccb_list_shiftify(ret, ccb_list_iterator_next(it)); + return ret; +} + +void* ccb_list_tail(ccb_list_t* list) { + if (!list->head) + return NULL; + + ccb_list_node_t* node = list->head; + while (node->next) + node = node->next; + + return node->element; +} + +typedef struct { + char* key; + void* value; +} ccb_table_entry_t; + +void* ccb_table_create(void* parent) { + ccb_table_t* table = ccb_memory_allocate(sizeof(ccb_table_t)); + table->list = ccb_list_create(); + table->parent = parent; + + return table; +} + +ccb_table_t* ccb_table_empty() { + return ccb_table_create(NULL); +} + +void* ccb_table_find(ccb_table_t* table, const char* key) { + for (; table; table = table->parent) { + for (ccb_list_iterator_t* it = ccb_list_iterator(table->list); !ccb_list_iterator_end(it); ) { + ccb_table_entry_t* entry = ccb_list_iterator_next(it); + if (!strcmp(key, entry->key)) + return entry->value; + } + } + return NULL; +} + +void ccb_table_insert(ccb_table_t* table, char* key, void* value) { + ccb_table_entry_t* entry = ccb_memory_allocate(sizeof(ccb_table_entry_t)); + entry->key = key; + entry->value = value; + + ccb_list_push(table->list, entry); +} + +void* ccb_table_parent(ccb_table_t* table) { + return table->parent; +} + +ccb_list_t* ccb_table_values(ccb_table_t* table) { + ccb_list_t* list = ccb_list_create(); + for (; table; table = table->parent) + for (ccb_list_iterator_t* it = ccb_list_iterator(table->list); !ccb_list_iterator_end(it); ) + ccb_list_push(list, ((ccb_table_entry_t*)ccb_list_iterator_next(it))->value); + return list; +} + +ccb_list_t* ccb_table_keys(ccb_table_t* table) { + ccb_list_t* list = ccb_list_create(); + for (; table; table = table->parent) + for (ccb_list_iterator_t* it = ccb_list_iterator(table->list); !ccb_list_iterator_end(it); ) + ccb_list_push(list, ((ccb_table_entry_t*)ccb_list_iterator_next(it))->key); + return list; +} + +int ccb_strcasecmp(const char* s1, const char* s2) { + const unsigned char* u1 = (const unsigned char*)s1; + const unsigned char* u2 = (const unsigned char*)s2; + + while (tolower(*u1) == tolower(*u2++)) + if (*u1++ == '\0') + return 0; + return tolower(*u1) - tolower(*--u2); +} + +int ccb_strncasecmp(const char* s1, const char* s2, size_t n) { + const unsigned char* u1 = (const unsigned char*)s1; + const unsigned char* u2 = (const unsigned char*)s2; + + if (!n) + return 0; + + do { + if (tolower(*u1) != tolower(*u2++)) + return tolower(*u1) - tolower(*--u2); + if (*u1++ == '\0') + break; + } while (--n != 0); + + return 0; +} + +static int ccb_input_getc(ccb_t* ccb) { + int result = getc(ccb->input); + switch (result) { + case '\n': + ccb->pos.line++; + ccb->pos.uline++; + //fprintf(stderr, "@line %d\n", ccb->line); + ccb->icol = ccb->pos.ucol; + ccb->pos.col = 1; + ccb->pos.ucol = 1; + break; + default: + ccb->pos.col++; + ccb->pos.ucol++; + } + return result; +} + +static void ccb_input_ungetc(ccb_t* ccb, int charcode) { + switch (charcode) { + case '\n': + ccb->pos.line--; + ccb->pos.uline--; + ccb->pos.col = ccb->icol; + ccb->pos.ucol = ccb->icol; // TODO... + break; + default: + ccb->pos.col--; // XXX: I should've tested, this seemed to be going in the wrong direction! + ccb->pos.ucol--; // XXX: I should've tested, this seemed to be going in the wrong direction! + } + ungetc(charcode, ccb->input); +} + +static ccb_list_t* ccb_lexer_buffer = NULL; ///*&CCB_SENTINEL_LIST*/ccb_list_create(); + +static ccb_lexer_token_t* ccb_lexer_token_copy(ccb_t* ccb, ccb_lexer_token_t* token) { + return memcpy(calloc(1,sizeof(ccb_lexer_token_t)), token, sizeof(ccb_lexer_token_t)); +} + +static ccb_lexer_token_t* ccb_lexer_identifier(ccb_t* ccb, ccb_string_t* str) { + ccb_lexer_token_t result = {}; + result.type = CCB_LEXER_TOKEN_IDENTIFIER; + result.string = ccb_string_buffer(str); + return ccb_lexer_token_copy(ccb, &result); + + /*return ccb_lexer_token_copy(ccb, &(ccb_lexer_token_t){ + .type = CCB_LEXER_TOKEN_IDENTIFIER, + .string = ccb_string_buffer(str) + });*/ +} +static ccb_lexer_token_t* ccb_lexer_strtok(ccb_t* ccb, ccb_string_t* str) { + ccb_lexer_token_t result= {}; + result.type = CCB_LEXER_TOKEN_STRING; + result.string = ccb_string_buffer(str); + return ccb_lexer_token_copy(ccb, &result); + /*return ccb_lexer_token_copy(ccb, &(ccb_lexer_token_t){ + .type = CCB_LEXER_TOKEN_STRING, + .string = ccb_string_buffer(str) + });*/ +} +static ccb_lexer_token_t* ccb_lexer_punct(ccb_t* ccb, int punct) { + ccb_lexer_token_t tmp = {}; + tmp.type = CCB_LEXER_TOKEN_PUNCT; + tmp.punct = punct; + return ccb_lexer_token_copy(ccb, &tmp); +} +static ccb_lexer_token_t* ccb_lexer_number(ccb_t* ccb, char* string) { + ccb_lexer_token_t tmp = {}; + tmp.type = CCB_LEXER_TOKEN_NUMBER; + tmp.string = string; + return ccb_lexer_token_copy(ccb, &tmp); +} +static ccb_lexer_token_t* ccb_lexer_char(ccb_t* ccb, char value) { + ccb_lexer_token_t tmp = {}; + tmp.type = CCB_LEXER_TOKEN_CHAR; + tmp.character = value; + return ccb_lexer_token_copy(ccb,&tmp); + + /*return ccb_lexer_token_copy(ccb, &(ccb_lexer_token_t){ + .type = CCB_LEXER_TOKEN_CHAR, + .character = value + });*/ +} + +static void ccb_lexer_skip_comment_line(ccb_t* ccb) { + for (;;) { + int c = ccb_input_getc(ccb); + //fputc(c, stderr); + if (c == '\n' || c == EOF) + return; + } +} + +static char* ccb_lexer_consume_line(ccb_t* ccb, int* mode) { + char* buffer = calloc(100,1); // TODO: Use compiler MM + if (mode != NULL) *mode = 0; + int n = 0; + int onlywhitespace = 1; + for (;;) { + int c = ccb_input_getc(ccb); + //fprintf(stderr, "Got char '%c' ows=%d mdptr=%p\n", c, onlywhitespace, mode); + if (c == '\n' || c == EOF) { + goto done; + } else if (onlywhitespace && mode != NULL && c == '{') { + //fprintf(stderr, "Setting mode -1\n"); + *mode = 1; + onlywhitespace = 0; + continue; + } else if (mode != NULL && c == '}') { + if (*mode == 1) { + *mode = 0; // single-line with braces treated as single line + } else { + *mode = -1; // Signal last line of section + } + goto done; + } + if (!isspace(c) && c != '\r') { + onlywhitespace = 0; + } + buffer[n] = (char) c; + n++; + if (n >= 99) { + ccb_compile_error(ccb, "Assembler line too long (allows at most 99 chars)"); + } + } + done: + if (onlywhitespace) { + free(buffer); + return ""; + } + // TODO: strdup and free from a large buffer? + return buffer; +} + +static void ccb_lexer_skip_comment_block(ccb_t* ccb) { + enum { + comment_outside, + comment_astrick + } state = comment_outside; + + for (;;) { + int c = ccb_input_getc(ccb); + if (c == '*') + state = comment_astrick; + else if (state == comment_astrick && c == '/') + return; + else + state = comment_outside; + } +} + +static int ccb_lexer_skip(ccb_t* ccb) { + int c; + while ((c = ccb_input_getc(ccb)) != EOF) { + if (isspace(c) || c == '\n' || c == '\r') { + //fprintf(stderr, "Yo got space/newline\n"); + continue; + } + //fprintf(stderr, "Not a space/newline: '%c' 0x%x", c, c); + ccb_input_ungetc(ccb, c); + return c; + } + return EOF; +} + +static ccb_lexer_token_t* ccb_lexer_read_number(ccb_t* ccb, int c) { + ccb_string_t* string = ccb_string_create(); + ccb_string_cat(string, c); + for (;;) { + int p = ccb_input_getc(ccb); + if (!isdigit(p) && !isalpha(p) && p != '.') { + ccb_input_ungetc(ccb, p); + return ccb_lexer_number(ccb, ccb_string_buffer(string)); + } + ccb_string_cat(string, p); + } + return NULL; +} + +static bool ccb_lexer_read_character_octal_brace(ccb_t* ccb, int c, int* r) { + if ('0' <= c && c <= '7') { + *r = (*r << 3) | (c - '0'); + return true; + } + return false; +} + +static int ccb_lexer_read_character_octal(ccb_t* ccb, int c) { + int r = c - '0'; + if (ccb_lexer_read_character_octal_brace(ccb, (c = ccb_input_getc(ccb)), &r)) { + if (!ccb_lexer_read_character_octal_brace(ccb, (c = ccb_input_getc(ccb)), &r)) + ccb_input_ungetc(ccb, c); + } + else + ccb_input_ungetc(ccb, c); + return r; +} + +static int ccb_lexer_read_character_hexadecimal(ccb_t* ccb) { + int c = ccb_input_getc(ccb); + int r = 0; + + if (!isxdigit(c)) + ccb_compile_error(ccb, "malformatted hexadecimal character"); + + for (;; c = ccb_input_getc(ccb)) { + switch (c) { + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': r = (r << 4) | (c - '0'); continue; + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': r = (r << 4) | (c - 'a' + 10); continue; + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': r = (r << 4) | (c - 'A' + 10); continue;// Fix by Zak - was minusing 'f'!? + + default: + ccb_input_ungetc(ccb, c); + return r; + } + } + return -1; +} + +static int ccb_lexer_read_character_escaped(ccb_t* ccb) { + int c = ccb_input_getc(ccb); + + switch (c) { + case '\'': return '\''; + case '"': return '"'; + case '?': return '?'; + case '\\': return '\\'; + case 'a': return '\a'; + case 'b': return '\b'; + case 'f': return '\f'; + case 'n': return '\n'; + case 'r': return '\r'; + case 't': return '\t'; + case 'v': return '\v'; + case 'e': return '\033'; + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': return ccb_lexer_read_character_octal(ccb, c); + case 'x': return ccb_lexer_read_character_hexadecimal(ccb); + case EOF: + ccb_compile_error(ccb, "malformatted escape sequence"); + + default: + return c; + } +} + +static ccb_lexer_token_t* ccb_lexer_read_character(ccb_t* ccb) { + int c = ccb_input_getc(ccb); + int r = (c == '\\') ? ccb_lexer_read_character_escaped(ccb) : c; + + if (ccb_input_getc(ccb) != '\'') + ccb_compile_error(ccb, "unterminated character"); + + return ccb_lexer_char(ccb, (char)r); +} + +static ccb_lexer_token_t* ccb_lexer_read_string(ccb_t* ccb, int allowcontinued) { + ccb_string_t* string = ccb_string_create(); + for (;;) { + int c = ccb_input_getc(ccb); + if (c == EOF) + ccb_compile_error(ccb, "Expected termination for string literal"); + + if (c == '"') { + /* This used to break the loop as soon as a '"' is found, but now it checks in + * case it's followed by another string part. In that case the processing just + * continues as though there was no break inbetween the strings. + * + * The new behaviour still has to be disabled in some cases, like if we're reading + * a string on a #pragma or line-number line, in which case we don't want to go + * skipping whitespace at the end or it'll read past the intended line and cause + * havoc! + */ + if (!allowcontinued) { + break; + } + int d = ccb_lexer_skip(ccb); + //int d = ccb_input_getc(ccb); + if (d != '"') { + /* If it isn't followed by a string, then we unget the other token's character + * we just read and break the loop knowing our string is completely ended! + * NOTE: We don't actually have to getc/ungetc since the skip function does it for us. + */ + //ccb_input_ungetc(ccb, d); + break; + } else { + /* If the skip function did find a '"' then we read/discard it and continue the loop, + * WITHOUT processing the '"' character as part of the string contents! + */ + ccb_input_getc(ccb); + continue; + } + } + if (c == '\\') + c = ccb_lexer_read_character_escaped(ccb); + ccb_string_cat(string, c); + } + return ccb_lexer_strtok(ccb, string); +} + +static ccb_lexer_token_t* ccb_lexer_read_identifier(ccb_t* ccb, int c1) { + ccb_string_t* string = ccb_string_create(); + ccb_string_cat(string, (char)c1); + + for (;;) { + int c2 = ccb_input_getc(ccb); + if (isalnum(c2) || c2 == '_' || c2 == '$') { + ccb_string_cat(string, c2); + } + else { + ccb_input_ungetc(ccb, c2); + return ccb_lexer_identifier(ccb, string); + } + } + return NULL; +} + +static ccb_lexer_token_t* ccb_lexer_read_reclassify_one(ccb_t* ccb, int expect1, int a, int e) { + int c = ccb_input_getc(ccb); + if (c == expect1) return ccb_lexer_punct(ccb, a); + ccb_input_ungetc(ccb, c); + return ccb_lexer_punct(ccb, e); +} +static ccb_lexer_token_t* ccb_lexer_read_reclassify_two(ccb_t* ccb, int expect1, int a, int expect2, int b, int e) { + int c = ccb_input_getc(ccb); + if (c == expect1) return ccb_lexer_punct(ccb, a); + if (c == expect2) return ccb_lexer_punct(ccb, b); + ccb_input_ungetc(ccb, c); + return ccb_lexer_punct(ccb, e); +} + +static ccb_lexer_token_t* ccb_lexer_read_token(ccb_t* ccb); // TODO: Predeclaration required for self-compile +static ccb_lexer_token_t* ccb_lexer_read_token_impl(ccb_t* ccb) { + int c; + if (ccb_lexer_skip(ccb) == EOF) { + //printf("ccb_lexer_read_token_impl: Got EOF\n"); + return NULL; + } + + c = ccb_input_getc(ccb); + + //printf("ccb_lexer_read_token_impl: Got '%c'\n", c); + + switch (c) { + case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': return ccb_lexer_read_number(ccb, c); + case '"': return ccb_lexer_read_string(ccb, 1); + case '\'': return ccb_lexer_read_character(ccb); + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': case 'g': case 'h': case 'i': case 'j': case 'k': + case 'l': case 'm': case 'n': case 'o': case 'p': case 'q': case 'r': case 's': case 't': case 'u': case 'v': + case 'w': case 'x': case 'y': case 'z': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': case 'G': case 'H': case 'I': case 'J': case 'K': + /*not L*/case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R': case 'S': case 'T': case 'U': case 'V': + case 'W': case 'X': case 'Y': case 'Z': + case '$': + case '_': + return ccb_lexer_read_identifier(ccb, c); + + case 'L': + switch ((c = ccb_input_getc(ccb))) { + case '"': return ccb_lexer_read_string(ccb, 1); + case '\'': return ccb_lexer_read_character(ccb); + } + ccb_input_ungetc(ccb, c); + return ccb_lexer_read_identifier(ccb, 'L'); + + case '#': + //fprintf(stderr, "WARNING: Skipping #-line (at input %d)\n", ccb->line); // TODO: Track line numbers reported by preprocessor + c = ccb_input_getc(ccb); + if (c == ' ') { + c = ccb_input_getc(ccb); + if (c >= '0' && c <= '9') { + ccb_lexer_token_t* n = ccb_lexer_read_number(ccb, c); + //fprintf(stderr, "REPORTING LINE %d\n", atoi(n->string)); + ccb->pos.uline = atoi(n->string) - 1; // Apparently it's the number of the NEXT line + c = ccb_input_getc(ccb); + if (c == ' ') { + c = ccb_input_getc(ccb); + if (c == '"') { + ccb_lexer_token_t* f = ccb_lexer_read_string(ccb, 0); + //fprintf(stderr, "REPORTING FILE '%s'\n", f->string); + ccb->pos.ufile = f->string; + } else { + ccb_input_ungetc(ccb, c); + } + } else { + ccb_input_ungetc(ccb, c); + } + } else { + ccb_input_ungetc(ccb, c); + } + } else { + ccb_input_ungetc(ccb, c); + } + ccb_lexer_skip_comment_line(ccb); + //ccb->ucol = 1; + return ccb_lexer_read_token(ccb); + + case '/': + switch ((c = ccb_input_getc(ccb))) { + case '/': + ccb_lexer_skip_comment_line(ccb); + return ccb_lexer_read_token(ccb); + case '*': + ccb_lexer_skip_comment_block(ccb); + return ccb_lexer_read_token(ccb); + } + if (c == '=') + return ccb_lexer_punct(ccb, CCB_LEXER_TOKEN_COMPOUND_DIV); + ccb_input_ungetc(ccb, c); + return ccb_lexer_punct(ccb, '/'); + + case '(': case ')': + case ',': case ';': + case '[': case ']': + case '{': case '}': + case '?': case ':': + case '~': +#ifdef CCB_X_OBJC + case '@': +#endif + return ccb_lexer_punct(ccb, c); + + case '+': return ccb_lexer_read_reclassify_two(ccb, '+', CCB_LEXER_TOKEN_INCREMENT, '=', CCB_LEXER_TOKEN_COMPOUND_ADD, '+'); + case '&': return ccb_lexer_read_reclassify_two(ccb, '&', CCB_LEXER_TOKEN_AND, '=', CCB_LEXER_TOKEN_COMPOUND_AND, '&'); + case '|': return ccb_lexer_read_reclassify_two(ccb, '|', CCB_LEXER_TOKEN_OR, '=', CCB_LEXER_TOKEN_COMPOUND_OR, '|'); + case '*': return ccb_lexer_read_reclassify_one(ccb, '=', CCB_LEXER_TOKEN_COMPOUND_MUL, '*'); + case '%': return ccb_lexer_read_reclassify_one(ccb, '=', CCB_LEXER_TOKEN_COMPOUND_MOD, '%'); + case '=': return ccb_lexer_read_reclassify_one(ccb, '=', CCB_LEXER_TOKEN_EQUAL, '='); + case '!': return ccb_lexer_read_reclassify_one(ccb, '=', CCB_LEXER_TOKEN_NEQUAL, '!'); + case '^': return ccb_lexer_read_reclassify_one(ccb, '=', CCB_LEXER_TOKEN_COMPOUND_XOR, '^'); + + case '-': + switch ((c = ccb_input_getc(ccb))) { + case '-': return ccb_lexer_punct(ccb, CCB_LEXER_TOKEN_DECREMENT); + case '>': return ccb_lexer_punct(ccb, CCB_LEXER_TOKEN_ARROW); + case '=': return ccb_lexer_punct(ccb, CCB_LEXER_TOKEN_COMPOUND_SUB); + default: + break; + } + ccb_input_ungetc(ccb, c); + return ccb_lexer_punct(ccb, '-'); + + case '<': + if ((c = ccb_input_getc(ccb)) == '=') + return ccb_lexer_punct(ccb, CCB_LEXER_TOKEN_LEQUAL); + if (c == '<') + return ccb_lexer_read_reclassify_one(ccb, '=', CCB_LEXER_TOKEN_COMPOUND_LSHIFT, CCB_LEXER_TOKEN_LSHIFT); + ccb_input_ungetc(ccb, c); + return ccb_lexer_punct(ccb, '<'); + case '>': + if ((c = ccb_input_getc(ccb)) == '=') + return ccb_lexer_punct(ccb, CCB_LEXER_TOKEN_GEQUAL); + if (c == '>') + return ccb_lexer_read_reclassify_one(ccb, '=', CCB_LEXER_TOKEN_COMPOUND_RSHIFT, CCB_LEXER_TOKEN_RSHIFT); + ccb_input_ungetc(ccb, c); + return ccb_lexer_punct(ccb, '>'); + + case '.': + c = ccb_input_getc(ccb); + if (c == '.') { + ccb_string_t* str = ccb_string_create(); + //ccb_string_catf(str, "..%c", ccb_input_getc(ccb)); + ccb_string_catcstr(str, ".."); + ccb_string_cat(str, ccb_input_getc(ccb)); + return ccb_lexer_identifier(ccb, str); + } + ccb_input_ungetc(ccb, c); + return ccb_lexer_punct(ccb, '.'); + + case EOF: + return NULL; + + default: + ccb_compile_error(ccb, "Unexpected character: `%c` (0x%x)", c, c); + } + return NULL; +} + +static ccb_lexer_token_t* ccb_lexer_read_token(ccb_t* ccb) { + ccb_lexer_token_t* result = ccb_lexer_read_token_impl(ccb); + //printf("Got token type %d\n", (result == NULL ? -1 : result->type)); + return result; +} + +bool ccb_lexer_ispunct(ccb_t* ccb, ccb_lexer_token_t* token, int c) { + //printf("Checking '%c' against '%c'\n", c, token->punct); + bool result = (token != NULL) && (token->type == CCB_LEXER_TOKEN_PUNCT) && (token->punct == c); + //printf("Got %s\n", result ? "true" : "false"); + return result; +} + +void ccb_lexer_unget(ccb_t* ccb, ccb_lexer_token_t* token) { + if (!token) + return; + ccb_list_push(ccb_lexer_buffer, token); +} + +ccb_lexer_token_t* ccb_lexer_next(ccb_t* ccb) { + if (ccb_list_length(ccb_lexer_buffer) > 0) + return ccb_list_pop(ccb_lexer_buffer); + return ccb_lexer_read_token(ccb); +} + +ccb_lexer_token_t* ccb_lexer_peek(ccb_t* ccb) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + ccb_lexer_unget(ccb, token); + return token; +} + +char* ccb_lexer_tokenstr(ccb_t* ccb, ccb_lexer_token_t* token) { + ccb_string_t* string = ccb_string_create(); + if (!token) + return "(null)"; + switch (token->type) { + case CCB_LEXER_TOKEN_PUNCT: + if (token->punct == CCB_LEXER_TOKEN_EQUAL) { + ccb_string_catf(string, "=="); + return ccb_string_buffer(string); + } + case CCB_LEXER_TOKEN_CHAR: + ccb_string_cat(string, token->character); + return ccb_string_buffer(string); + case CCB_LEXER_TOKEN_NUMBER: + //ccb_string_catf(string, "%d", token->integer); + ccb_string_catint(string, token->integer); + return ccb_string_buffer(string); + case CCB_LEXER_TOKEN_STRING: + ccb_string_catf(string, "\"%s\"", token->string); + return ccb_string_buffer(string); + case CCB_LEXER_TOKEN_IDENTIFIER: + return token->string; + default: + break; + } + ccb_compile_error(ccb, "Internal error: unexpected token"); + return NULL; +} + +ccb_data_type_t* ccb_ast_data_table[CCB_AST_DATA_COUNT];// = { + //&(ccb_data_type_t) { CCB_TYPE_VOID, 0, true }, /* void */ + //&(ccb_data_type_t) { CCB_TYPE_LONG, -1, true }, /* long */ + //&(ccb_data_type_t) { CCB_TYPE_LLONG, -1, true }, /* long long */ + //&(ccb_data_type_t) { CCB_TYPE_INT, -1, true }, /* int */ + //&(ccb_data_type_t) { CCB_TYPE_SHORT, -1, true }, /* short */ + //&(ccb_data_type_t) { CCB_TYPE_CHAR, -1, true }, /* char */ + //&(ccb_data_type_t) { CCB_TYPE_FLOAT, -1, true }, /* float */ + //&(ccb_data_type_t) { CCB_TYPE_DOUBLE, -1, true }, /* double */ + //&(ccb_data_type_t) { CCB_TYPE_LDOUBLE, -1, true }, /* long double */ + //&(ccb_data_type_t) { CCB_TYPE_LONG, -1, false }, /* unsigned long */ + //&(ccb_data_type_t) { CCB_TYPE_LLONG, -1, false }, /* unsigned long long */ + //#ifdef CCB_X_OBJC // TODO: Fix or remove this? + //& (ccb_data_type_t) { CCB_TYPE_ID, -1, false }, /* objc identifier */ + //#endif +//}; + +ccb_data_type_t* ccb_ast_data_function = NULL; + +ccb_list_t* ccb_ast_locals = NULL; +ccb_list_t* ccb_ast_gotos = NULL; +ccb_list_t* ccb_ast_floats = NULL; ///*&CCB_SENTINEL_LIST*/ccb_list_create(); +ccb_list_t* ccb_ast_strings = NULL; ///*&CCB_SENTINEL_LIST*/ccb_list_create(); + +ccb_table_t* ccb_ast_labels = NULL; +ccb_table_t* ccb_ast_globalenv = NULL; ///*&CCB_SENTINEL_TABLE*/ccb_table_empty(); +ccb_table_t* ccb_ast_localenv = NULL; ///*&CCB_SENTINEL_TABLE*/ccb_table_empty(); +ccb_table_t* ccb_ast_structures = NULL; ///*&CCB_SENTINEL_TABLE*/ccb_table_empty(); +ccb_table_t* ccb_ast_unions = NULL;// /*&CCB_SENTINEL_TABLE*/ccb_table_empty(); + +ccb_table_t* ccb_parse_typedefs = NULL; ///*&CCB_SENTINEL_TABLE*/ccb_table_empty(); + +void ccb_ast_init_t_(ccb_t* ccb, int idx, int typ, int siz, bool sig) { + ccb_ast_data_table[idx] = ccb_memory_allocate(sizeof(ccb_data_type_t)); + ccb_ast_data_table[idx]->type = typ; + ccb_ast_data_table[idx]->size = siz; + ccb_ast_data_table[idx]->sign = sig; +} + +void ccb_ast_init(ccb_t* ccb) { + //printf("Doint AST init...\n"); + + ccb_ast_init_t_(ccb, CCB_AST_DATA_VOID, CCB_TYPE_VOID, 0, true ); /* void */ + ccb_ast_init_t_(ccb, CCB_AST_DATA_LONG, CCB_TYPE_LONG, -1, true ); /* long */ + ccb_ast_init_t_(ccb, CCB_AST_DATA_LLONG, CCB_TYPE_LLONG, -1, true ); /* long long */ + ccb_ast_init_t_(ccb, CCB_AST_DATA_INT, CCB_TYPE_INT, -1, true ); /* int */ + ccb_ast_init_t_(ccb, CCB_AST_DATA_SHORT, CCB_TYPE_SHORT, -1, true ); /* short */ + ccb_ast_init_t_(ccb, CCB_AST_DATA_CHAR, CCB_TYPE_CHAR, -1, true ); /* char */ + ccb_ast_init_t_(ccb, CCB_AST_DATA_FLOAT, CCB_TYPE_FLOAT, -1, true ); /* float */ + ccb_ast_init_t_(ccb, CCB_AST_DATA_DOUBLE, CCB_TYPE_DOUBLE, -1, true ); /* double */ + ccb_ast_init_t_(ccb, CCB_AST_DATA_LDOUBLE, CCB_TYPE_LDOUBLE, -1, true ); /* long double */ + ccb_ast_init_t_(ccb, CCB_AST_DATA_UINT, CCB_TYPE_INT, -1, false ); /* unsigned int */ + ccb_ast_init_t_(ccb, CCB_AST_DATA_ULONG, CCB_TYPE_LONG, -1, false ); /* unsigned long */ + ccb_ast_init_t_(ccb, CCB_AST_DATA_ULLONG, CCB_TYPE_LLONG, -1, false ); /* unsigned long long */ + //printf("Doint AST init...\n"); + //#ifdef CCB_X_OBJC // TODO: Fix or remove this? + //ccb_ast_init_t_(ccb, 11, CCB_TYPE_ID, -1, false ); /* objc identifier */ + + //ccb_ast_init_t_(ccb, 0, CCB_TYPE_VOID, 0, true ); /* void */ + //ccb_ast_init_t_(ccb, 1, CCB_TYPE_LONG, -1, true ); /* long */ + //ccb_ast_init_t_(ccb, 2, CCB_TYPE_LLONG, -1, true ); /* long long */ + //ccb_ast_init_t_(ccb, 3, CCB_TYPE_INT, -1, true ); /* int */ + //ccb_ast_init_t_(ccb, 4, CCB_TYPE_SHORT, -1, true ); /* short */ + //ccb_ast_init_t_(ccb, 5, CCB_TYPE_CHAR, -1, true ); /* char */ + //ccb_ast_init_t_(ccb, 6, CCB_TYPE_FLOAT, -1, true ); /* float */ + //ccb_ast_init_t_(ccb, 7, CCB_TYPE_DOUBLE, -1, true ); /* double */ + //ccb_ast_init_t_(ccb, 8, CCB_TYPE_LDOUBLE, -1, true ); /* long double */ + //ccb_ast_init_t_(ccb, 9, CCB_TYPE_LONG, -1, false ); /* unsigned long */ + //ccb_ast_init_t_(ccb, 10, CCB_TYPE_LLONG, -1, false ); /* unsigned long long */ + //printf("Doint AST init...\n"); + //#ifdef CCB_X_OBJC // TODO: Fix or remove this? + //ccb_ast_init_t_(ccb, 11, CCB_TYPE_ID, -1, false ); /* objc identifier */ + + //#endif + //printf("Doint AST init...\n"); + + ccb_lexer_buffer = /*&CCB_SENTINEL_LIST*/ccb_list_create(); + ccb_ast_floats = /*&CCB_SENTINEL_LIST*/ccb_list_create(); + ccb_ast_strings = /*&CCB_SENTINEL_LIST*/ccb_list_create(); + ccb_ast_globalenv = /*&CCB_SENTINEL_TABLE*/ccb_table_empty(); + ccb_ast_structures = /*&CCB_SENTINEL_TABLE*/ccb_table_empty(); + ccb_ast_unions = /*&CCB_SENTINEL_TABLE*/ccb_table_empty(); + ccb_parse_typedefs = /*&CCB_SENTINEL_TABLE*/ccb_table_empty(); + ccb_ast_data_table[CCB_AST_DATA_VOID]->size = 0; + ccb_ast_data_table[CCB_AST_DATA_CHAR]->size = ccb_target_type_size_char(ccb); + ccb_ast_data_table[CCB_AST_DATA_SHORT]->size = ccb_target_type_size_short(ccb); + ccb_ast_data_table[CCB_AST_DATA_INT]->size = ccb_target_type_size_int(ccb); + ccb_ast_data_table[CCB_AST_DATA_LONG]->size = ccb_target_type_size_long(ccb); + ccb_ast_data_table[CCB_AST_DATA_LLONG]->size = ccb_target_type_size_llong(ccb); + ccb_ast_data_table[CCB_AST_DATA_UINT]->size = ccb_target_type_size_int(ccb); + ccb_ast_data_table[CCB_AST_DATA_ULONG]->size = ccb_target_type_size_long(ccb); + ccb_ast_data_table[CCB_AST_DATA_ULLONG]->size = ccb_target_type_size_llong(ccb); + ccb_ast_data_table[CCB_AST_DATA_FLOAT]->size = ccb_target_type_size_float(ccb); + ccb_ast_data_table[CCB_AST_DATA_DOUBLE]->size = ccb_target_type_size_double(ccb); + ccb_ast_data_table[CCB_AST_DATA_LDOUBLE]->size = ccb_target_type_size_ldouble(ccb); + //ccb_ast_data_table[CCB_AST_DATA_ID]->size = ccb_target_type_size_pointer(ccb); + //printf("Doint AST init...\n"); + + ccb_ast_data_table[CCB_AST_DATA_ID] = ccb_ast_pointer(ccb, ccb_ast_data_table[CCB_TYPE_VOID]); + //printf("Done?"); +} + +static ccb_data_type_t* ccb_ast_result_type_impl(ccb_t* ccb, jmp_buf* jmpbuf, char op, ccb_data_type_t* a, ccb_data_type_t* b); // TODO: Predeclaration required for self-compile +static ccb_data_type_t* ccb_ast_result_type_impl(ccb_t* ccb, jmp_buf* jmpbuf, char op, ccb_data_type_t* a, ccb_data_type_t* b) { + + if (a->type > b->type) { + ccb_data_type_t* t = a; + a = b; + b = t; + } + + //printf("Getting result type of %d and %d...\n", a->type, b->type); + + if (b->type == CCB_TYPE_POINTER) { + if (op == '=') + return a; + if (op != '+' && op != '-') + goto error; + if (a->type == CCB_TYPE_POINTER) { + return ccb_ast_data_table[CCB_AST_DATA_LONG]; + } + if (!ccb_ast_type_integer(ccb, a)) + goto error; + + return b; + } + + switch (a->type) { + case CCB_TYPE_VOID: + goto error; + case CCB_TYPE_CHAR: + case CCB_TYPE_SHORT: + case CCB_TYPE_INT: + switch (b->type) { + case CCB_TYPE_CHAR: + case CCB_TYPE_SHORT: + case CCB_TYPE_INT: + return ccb_ast_data_table[CCB_AST_DATA_INT]; + case CCB_TYPE_LONG: + case CCB_TYPE_LLONG: + return ccb_ast_data_table[CCB_AST_DATA_LONG]; + case CCB_TYPE_FLOAT: + case CCB_TYPE_DOUBLE: + case CCB_TYPE_LDOUBLE: + return ccb_ast_data_table[CCB_AST_DATA_DOUBLE]; + case CCB_TYPE_ARRAY: + case CCB_TYPE_POINTER: + case CCB_TYPE_FUNCTION: + return b; + default: + break; + } + ccb_compile_error(ccb, "Internal error: ast_result_type %d", b->type); + + case CCB_TYPE_LONG: + case CCB_TYPE_LLONG: + switch (b->type) { + case CCB_TYPE_LONG: + case CCB_TYPE_LLONG: + return ccb_ast_data_table[CCB_AST_DATA_LONG]; + case CCB_TYPE_FLOAT: + case CCB_TYPE_DOUBLE: + case CCB_TYPE_LDOUBLE: + return ccb_ast_data_table[CCB_AST_DATA_DOUBLE]; + case CCB_TYPE_ARRAY: + case CCB_TYPE_POINTER: + return b; + default: + break; + } + ccb_compile_error(ccb, "Internal error: ast_result_type (3)"); + + case CCB_TYPE_FLOAT: + if (b->type == CCB_TYPE_FLOAT || b->type == CCB_TYPE_DOUBLE || b->type == CCB_TYPE_LDOUBLE) + return ccb_ast_data_table[CCB_AST_DATA_DOUBLE]; + goto error; + + case CCB_TYPE_DOUBLE: + case CCB_TYPE_LDOUBLE: + if (b->type == CCB_TYPE_DOUBLE || b->type == CCB_TYPE_LDOUBLE) + return ccb_ast_data_table[CCB_AST_DATA_DOUBLE]; + goto error; + + //case CCB_TYPE_POINTER: + case CCB_TYPE_ARRAY: + if (b->type != CCB_TYPE_ARRAY) + goto error; + return ccb_ast_result_type_impl(ccb, jmpbuf, op, a->pointer, b->pointer); + + case CCB_TYPE_STRUCTURE: + if (b->type != CCB_TYPE_STRUCTURE || b->size != a->size) { + goto error; + } + return a; + + default: + ccb_compile_error(ccb, "ICE ast_result_type_impl %d", a->type); + } + +error: + longjmp(*jmpbuf, 1); + return NULL; +} + +ccb_data_type_t* ccb_ast_result_type(ccb_t* ccb, int op, ccb_data_type_t* a, ccb_data_type_t* b) { + + switch (op) { + case '!': + case '~': + case '<': + case '>': + case '&': + case '%': + case CCB_AST_TYPE_EQUAL: + case CCB_AST_TYPE_GEQUAL: + case CCB_AST_TYPE_LEQUAL: + case CCB_AST_TYPE_NEQUAL: + case CCB_AST_TYPE_AND: + case CCB_AST_TYPE_OR: + return ccb_ast_data_table[CCB_AST_DATA_INT]; + case CCB_AST_TYPE_LSHIFT: + case CCB_AST_TYPE_RSHIFT: + return a; + } + + jmp_buf jmpbuf; + if (setjmp(jmpbuf) == 0) { + return ccb_ast_result_type_impl(ccb, + &jmpbuf, + op, + ccb_ast_array_convert(ccb, a), + ccb_ast_array_convert(ccb, b) + ); + } + + ccb_compile_error(ccb, + "incompatible operands `%s' and `%s' in `%c` operation", + &(ccb_ast_type_string(ccb, a)[0]), + &(ccb_ast_type_string(ccb, b)[0]), + op + ); + + return NULL; +} + +ccb_ast_t* ccb_ast_copy(ccb_t* ccb, ccb_ast_t* ast) { + ccb_ast_t* copy = ccb_memory_allocate(sizeof(ccb_ast_t)); + if (copy == NULL) { + fprintf(stderr, "FATAL ERROR: Out of memory!\n"); + exit(-1); + } + //*copy = *ast; + memcpy(copy, ast, sizeof(ccb_ast_t)); + return copy; +} + +ccb_ast_t* ccb_ast_structure_reference(ccb_t* ccb, ccb_data_type_t* type, ccb_ast_t* structure, char* name) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_STRUCT; + tmp.ctype = type; + tmp.structure = structure; + tmp.field = name; + return ccb_ast_copy(ccb, &tmp); +} + +ccb_ast_t* ccb_ast_new_unary(ccb_t* ccb, int type, ccb_data_type_t* data, ccb_ast_t* operand) { + ccb_ast_t tmp = {}; + tmp.type = type; + tmp.ctype = data; + tmp.unary.operand = operand; + if (operand == NULL) { + ((char*)NULL)[0] = 0; // Trigger debugger + } + return ccb_ast_copy(ccb, &tmp); + /* TODO: This doesn't self-compile yet due to the nested ".unary.operand" + return ccb_ast_copy(ccb, &(ccb_ast_t) { + .type = type, + .ctype = data, + .unary.operand = operand + }); + */ +} + +ccb_ast_t* ccb_ast_new_binary(ccb_t* ccb, int type, ccb_ast_t* left, ccb_ast_t* right) { + ccb_ast_t tmp = {}; + tmp.type = type; + /* TODO? I'll disable this until I understand the intended "function pointer" syntax. + if (right->type == CCB_AST_TYPE_FUNCTION) { // Special support for assigning a function pointer without the & + right = ccb_ast_new_unary(ccb, CCB_AST_TYPE_ADDRESS, ccb_ast_pointer(ccb, right->ctype), right); + }*/ + tmp.ctype = ccb_ast_result_type(ccb, type, left->ctype, right->ctype); + ccb_ast_t* ast = ccb_ast_copy(ccb, &tmp); + if (type != '=' + && ccb_ast_array_convert(ccb, left->ctype)->type != CCB_TYPE_POINTER + && ccb_ast_array_convert(ccb, right->ctype)->type == CCB_TYPE_POINTER) { + + ast->left = right; + ast->right = left; + } + else if ((type == '+' || type == '-') + && ccb_ast_array_convert(ccb, left->ctype)->type == CCB_TYPE_POINTER + && ccb_ast_array_convert(ccb, right->ctype)->type == CCB_TYPE_POINTER) { + ast->left = left; + ast->right = right; + return ccb_ast_new_binary(ccb, '/', ast, ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LONG], left->ctype->pointer->size)); + } else { + ast->left = left; + ast->right = right; + } + return ast; +} + +/* Conceptually like a binary operator, except a bit simpler. Note that comma expressions + * have nothing to do with the commas used as separators elsewhere in the language. + */ +ccb_ast_t* ccb_ast_new_comma(ccb_t* ccb, int type, ccb_ast_t* left, ccb_ast_t* right) { + ccb_ast_t tmp = {}; + tmp.type = type; + tmp.left = left; + tmp.right = right; + tmp.ctype = right->ctype; + ccb_ast_t* ast = ccb_ast_copy(ccb, &tmp); + return ast; +} + +ccb_ast_t* ccb_ast_new_integer(ccb_t* ccb, ccb_data_type_t* type, long long int value) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_LITERAL; + tmp.ctype = type; + tmp.integer = value; + return ccb_ast_copy(ccb, &tmp); +} + +ccb_ast_t* ccb_ast_new_floating(ccb_t* ccb, ccb_data_type_t* type, double value) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_LITERAL; + tmp.ctype = type; + tmp.floating.value = value; + ccb_ast_t* ast = ccb_ast_copy(ccb, &tmp); + /* TODO: This doesn't compile yet due to nested .floating.value... + ccb_ast_t* ast = ccb_ast_copy(ccb, &(ccb_ast_t){ + .type = CCB_AST_TYPE_LITERAL, + .ctype = type, + .floating.value = value + }); + */ + ccb_list_push(ccb_ast_floats, ast); + return ast; +} + +ccb_ast_t* ccb_ast_new_string(ccb_t* ccb, char* value) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_STRING; + tmp.ctype = ccb_ast_array(ccb, ccb_ast_data_table[CCB_AST_DATA_CHAR], strlen(value) + 1); + tmp.string.data = value; + tmp.string.label = ccb_ast_label(ccb); + return ccb_ast_copy(ccb, &tmp); + /* TODO: This doesn't self-compile yet due to nested fields .string.data/.string.label + return ccb_ast_copy(ccb, &(ccb_ast_t) { + .type = CCB_AST_TYPE_STRING, + .ctype = ccb_ast_array(ccb, ccb_ast_data_table[CCB_AST_DATA_CHAR], strlen(value) + 1), + .string.data = value, + .string.label = ccb_ast_label(ccb) + }); + */ +} + +ccb_ast_t* ccb_ast_variable_local(ccb_t* ccb, ccb_data_type_t* type, char* name) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_VAR_LOCAL; + tmp.ctype = type; + tmp.variable.name = name; + ccb_ast_t* ast = ccb_ast_copy(ccb, &tmp); + /* TODO: This doesn't self-compile due to nested .variable.name... + ccb_ast_t* ast = ccb_ast_copy(ccb, &(ccb_ast_t){ + .type = CCB_AST_TYPE_VAR_LOCAL, + .ctype = type, + .variable.name = name + }); + */ + if (ccb_ast_localenv) + ccb_table_insert(ccb_ast_localenv, name, ast); + if (ccb_ast_locals) + ccb_list_push(ccb_ast_locals, ast); + return ast; +} + +/* Applies prefix to standard variable names. */ +static void ccb_ast_fixglobal(ccb_t* ccb, ccb_ast_t* ast) { + if (strlen(ccb->sym_prefix) != 0) { + char* tmp = calloc(strlen(ccb->sym_prefix) + strlen(ast->variable.label) + 1, 1); + strcat(tmp, ccb->sym_prefix); + strcat(tmp, ast->variable.label); + ast->variable.label = tmp; + } +} + +ccb_ast_t* ccb_ast_variable_global(ccb_t* ccb, ccb_data_type_t* type, char* name) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_VAR_GLOBAL; + tmp.ctype = type; + tmp.variable.name = name; + tmp.variable.label = name; + ccb_ast_fixglobal(ccb, &tmp); + ccb_ast_t* ast = ccb_ast_copy(ccb, &tmp); + /* TODO: Doesn't compile due to nested .variable.name + ccb_ast_t* ast = ccb_ast_copy(ccb, &(ccb_ast_t){ + .type = CCB_AST_TYPE_VAR_GLOBAL, + .ctype = type, + .variable.name = name, + .variable.label = name + }); + */ + ccb_table_insert(ccb_ast_globalenv, name, ast); + return ast; +} + +/* Applies prefix to standard function names. */ +static void ccb_ast_fixfunction(ccb_t* ccb, ccb_ast_t* ast) { + if (ast->function.callconv == 0 /*&& strlen(ccb->sym_prefix) != 0*/) { + char* tmp = calloc(strlen(ccb->sym_prefix) + strlen(ast->function.name) + 1, 1); + strcat(tmp, ccb->sym_prefix); + strcat(tmp, ast->function.name); + ast->function.name = tmp; + } +} + +ccb_ast_t* ccb_ast_call(ccb_t* ccb, ccb_data_type_t* type, char* name, ccb_list_t* arguments, ccb_list_t* parametertypes, int callconv) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_CALL; + tmp.ctype = type; + tmp.function.call.paramtypes = parametertypes; + tmp.function.call.args = arguments; + tmp.function.name = name; + tmp.function.callconv = callconv; + ccb_ast_fixfunction(ccb, &tmp); + return ccb_ast_copy(ccb, &tmp); + /* TODO: This doesn't self-compile yet due to nested fields + return ccb_ast_copy(ccb, &(ccb_ast_t) { + .type = CCB_AST_TYPE_CALL, + .ctype = type, + .function.call.paramtypes = parametertypes, + .function.call.args = arguments, + .function.name = name + });*/ +} + +ccb_ast_t* ccb_ast_ptrcall(ccb_t* ccb, ccb_data_type_t* type, char* name, ccb_ast_t* callable, ccb_list_t* arguments, ccb_list_t* parametertypes, int callconv) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_PTRCALL; + tmp.ctype = type; + tmp.function.call.paramtypes = parametertypes; + tmp.function.call.args = arguments; + tmp.function.name = name; + tmp.function.call.callable = callable; + tmp.function.callconv = callconv; + ccb_ast_fixfunction(ccb, &tmp); + return ccb_ast_copy(ccb, &tmp); + /* TODO: This doesn't self-compile yet due to nested fields + return ccb_ast_copy(ccb, &(ccb_ast_t) { + .type = CCB_AST_TYPE_PTRCALL, + .ctype = type, + .function.call.paramtypes = parametertypes, + .function.call.args = arguments, + .function.name = name, + .function.call.callable = callable + });*/ +} + +void ccb_ast_initpos(ccb_t* ccb, ccb_ast_t* ast) { + ast->pos.line = ccb->pos.line; + ast->pos.col = ccb->pos.col; + ast->pos.uline = ccb->pos.uline; + ast->pos.ucol = ccb->pos.ucol; + ast->pos.ufile = ccb->pos.ufile; +} + +void ccb_ast_setpos(ccb_t* ccb, ccb_ast_t* ast) { + ccb->pos.line = ast->pos.line; + ccb->pos.col = ast->pos.col; + ccb->pos.uline = ast->pos.uline; + ccb->pos.ucol = ast->pos.ucol; + ccb->pos.ufile = ast->pos.ufile; +} + +ccb_ast_t* ccb_ast_function(ccb_t* ccb, ccb_data_type_t* ret, char* name, ccb_list_t* params, ccb_ast_t* body, ccb_list_t* locals) { + ccb_ast_t tmp = {}; + ccb_ast_initpos(ccb, &tmp); + tmp.type = CCB_AST_TYPE_FUNCTION; + tmp.ctype = ret; + tmp.function.params = params; + tmp.function.locals = locals; + tmp.function.name = name; + tmp.function.body = body; + ccb_ast_fixfunction(ccb, &tmp); + return ccb_ast_copy(ccb, &tmp); + /* TODO: This doesn't self-compile yet due to nested fields + return ccb_ast_copy(ccb, &(ccb_ast_t) { + .type = CCB_AST_TYPE_FUNCTION, + .ctype = ret, + .function.name = name, + .function.params = params, + .function.locals = locals, + .function.body = body + });*/ +} + +ccb_ast_t* ccb_ast_declaration(ccb_t* ccb, ccb_ast_t* var, ccb_list_t* init) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_DECLARATION; + tmp.ctype = NULL; + tmp.decl.var = var; + tmp.decl.init = init; + return ccb_ast_copy(ccb, &tmp); + /* TODO: This doesn't self-compile yet due to nested fields + return ccb_ast_copy(ccb, &(ccb_ast_t) { + .type = CCB_AST_TYPE_DECLARATION, + .ctype = NULL, + .decl.var = var, + .decl.init = init, + });*/ +} + +ccb_ast_t* ccb_ast_initializer(ccb_t* ccb, ccb_ast_t* value, ccb_data_type_t* to, int offset) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_INITIALIZER; + tmp.init.value = value; + tmp.init.offset = offset; + tmp.init.type = to; + return ccb_ast_copy(ccb, &tmp); + /* TODO: This doesn't self-compile yet due to nested fields + return ccb_ast_copy(ccb, &(ccb_ast_t){ + .type = CCB_AST_TYPE_INITIALIZER, + .init.value = value, + .init.offset = offset, + .init.type = to + });*/ +} + +ccb_ast_t* ccb_ast_ternary(ccb_t* ccb, ccb_data_type_t* type, ccb_ast_t* cond, ccb_ast_t* then, ccb_ast_t* last) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_EXPRESSION_TERNARY; + tmp.ctype = type; + tmp.ifstmt.cond = cond; + tmp.ifstmt.then = then; + tmp.ifstmt.last = last; + return ccb_ast_copy(ccb, &tmp); + /* TODO: This doesn't self-compile yet due to nested fields + return ccb_ast_copy(ccb, &(ccb_ast_t){ + .type = CCB_AST_TYPE_EXPRESSION_TERNARY, + .ctype = type, + .ifstmt.cond = cond, + .ifstmt.then = then, + .ifstmt.last = last + });*/ +} + +static ccb_ast_t* ccb_ast_for_intermediate(ccb_t* ccb, int type, ccb_ast_t* init, ccb_ast_t* cond, ccb_ast_t* step, ccb_ast_t* body) { + ccb_ast_t tmp = {}; + tmp.type = type; + tmp.ctype = NULL; + tmp.forstmt.init = init; + tmp.forstmt.cond = cond; + tmp.forstmt.step = step; + tmp.forstmt.body = body; + return ccb_ast_copy(ccb, &tmp); + /* TODO: This doesn't self-compile yet due to nested fields + return ccb_ast_copy(ccb, &(ccb_ast_t){ + .type = type, + .ctype = NULL, + .forstmt.init = init, + .forstmt.cond = cond, + .forstmt.step = step, + .forstmt.body = body + });*/ +} + +ccb_ast_t* ccb_ast_switch(ccb_t* ccb, ccb_ast_t* expr, ccb_ast_t* body) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_STATEMENT_SWITCH; + tmp.switchstmt.expr = expr; + tmp.switchstmt.body = body; + return ccb_ast_copy(ccb, &tmp); + /* TODO: This doesn't self-compile yet due to nested fields + return ccb_ast_copy(ccb, &(ccb_ast_t){ + .type = CCB_AST_TYPE_STATEMENT_SWITCH, + .switchstmt.expr = expr, + .switchstmt.body = body + });*/ +} + +ccb_ast_t* ccb_ast_case(ccb_t* ccb, int value) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_STATEMENT_CASE; + tmp.casevalue = value; + return ccb_ast_copy(ccb, &tmp); +} + +ccb_ast_t* ccb_ast_make(ccb_t* ccb, int type) { + ccb_ast_t tmp = {}; + tmp.type = type; + return ccb_ast_copy(ccb, &tmp); +} + +ccb_ast_t* ccb_ast_if(ccb_t* ccb, ccb_ast_t* cond, ccb_ast_t* then, ccb_ast_t* last) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_STATEMENT_IF; + tmp.ctype = NULL; + tmp.ifstmt.cond = cond; + tmp.ifstmt.then = then; + tmp.ifstmt.last = last; + return ccb_ast_copy(ccb, &tmp); + /* TODO: This doesn't self-compile yet due to nested fields + return ccb_ast_copy(ccb, &(ccb_ast_t){ + .type = CCB_AST_TYPE_STATEMENT_IF, + .ctype = NULL, + .ifstmt.cond = cond, + .ifstmt.then = then, + .ifstmt.last = last + });*/ +} + +ccb_ast_t* ccb_ast_for(ccb_t* ccb, ccb_ast_t* init, ccb_ast_t* cond, ccb_ast_t* step, ccb_ast_t* body) { + return ccb_ast_for_intermediate(ccb, CCB_AST_TYPE_STATEMENT_FOR, init, cond, step, body); +} +ccb_ast_t* ccb_ast_while(ccb_t* ccb, ccb_ast_t* cond, ccb_ast_t* body) { + return ccb_ast_for_intermediate(ccb, CCB_AST_TYPE_STATEMENT_WHILE, NULL, cond, NULL, body); +} +ccb_ast_t* ccb_ast_do(ccb_t* ccb, ccb_ast_t* cond, ccb_ast_t* body) { + return ccb_ast_for_intermediate(ccb, CCB_AST_TYPE_STATEMENT_DO, NULL, cond, NULL, body); +} + +ccb_ast_t* ccb_ast_goto(ccb_t* ccb, char* label) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_STATEMENT_GOTO; + tmp.gotostmt.label = label; + tmp.gotostmt.where = NULL; + return ccb_ast_copy(ccb, &tmp); + /* TODO: This doesn't self-compile yet due to nested fields + return ccb_ast_copy(ccb, &(ccb_ast_t){ + .type = CCB_AST_TYPE_STATEMENT_GOTO, + .gotostmt.label = label, + .gotostmt.where = NULL + });*/ +} + +ccb_ast_t* ccb_ast_asm(ccb_t* ccb, ccb_list_t* code) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_STATEMENT_ASM; + tmp.asmstmt.code = code; + return ccb_ast_copy(ccb, &tmp); +} + +ccb_ast_t* ccb_ast_new_label(ccb_t* ccb, char* label) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_STATEMENT_LABEL; + tmp.gotostmt.label = label; + tmp.gotostmt.where = NULL; + return ccb_ast_copy(ccb, &tmp); + /* TODO: This doesn't self-compile yet due to nested fields + return ccb_ast_copy(ccb, &(ccb_ast_t){ + .type = CCB_AST_TYPE_STATEMENT_LABEL, + .gotostmt.label = label, + .gotostmt.where = NULL + });*/ +} + +ccb_ast_t* ccb_ast_return(ccb_t* ccb, int callconv, ccb_data_type_t* returntype, ccb_ast_t* value) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_STATEMENT_RETURN; + tmp.ctype = returntype; + tmp.returnstmt = value; + tmp.return_callconv = callconv; + return ccb_ast_copy(ccb, &tmp); + /* + return ccb_ast_copy(ccb, &(ccb_ast_t){ + .type = CCB_AST_TYPE_STATEMENT_RETURN, + .ctype = returntype, + .returnstmt = value + });*/ +} + +ccb_ast_t* ccb_ast_compound(ccb_t* ccb, ccb_list_t* statements) { + ccb_ast_t tmp = {}; + tmp.type = CCB_AST_TYPE_STATEMENT_COMPOUND; + tmp.ctype = NULL; + tmp.compound = statements; + return ccb_ast_copy(ccb, &tmp); + /* + return ccb_ast_copy(ccb, &(ccb_ast_t){ + .type = CCB_AST_TYPE_STATEMENT_COMPOUND, + .ctype = NULL, + .compound = statements + });*/ +} + +ccb_data_type_t* ccb_ast_structure_field(ccb_t* ccb, ccb_data_type_t* type, int offset) { + ccb_data_type_t* field = ccb_ast_type_copy(ccb, type); + field->offset = offset; + return field; +} + +ccb_data_type_t* ccb_ast_structure_new(ccb_t* ccb, ccb_table_t* fields, int size, bool isstruct) { + ccb_data_type_t tmp = {}; + tmp.type = CCB_TYPE_STRUCTURE; + tmp.size = size; + tmp.fields = fields; + tmp.isstruct = isstruct; + return ccb_ast_type_copy(ccb, &tmp/*(ccb_data_type_t) { + .type = CCB_TYPE_STRUCTURE, + .size = size, + .fields = fields, + .isstruct = isstruct + }*/); +} + +#ifdef _WIN32 +#define getpid _getpid +#endif + +int ccb_ast_label_index = 0; +char* ccb_ast_label(ccb_t* ccb) { + //static int index = 0; + ccb_string_t* string = ccb_string_create(); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC || ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1 + || (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW)) { + ccb_string_catf(string, "L%d_%d", getpid(), ccb_ast_label_index++); // TODO: Fix this and/or the FASM code above to use a better unique base at least + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86) { + if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM) { + if (ccb_target_binfmt(ccb) == CCB_TARGET_BINFMT_FLAT) { + //ccb_string_catf(string, "L%d_%d", getpid(), ccb_ast_label_index++); // NOTE: getpid() isn't a good way to get a random number, but is enough to be unlikely to mix up with another run's local data + ccb_string_catcstr(string, "L"); + ccb_string_catint(string, getpid()); + ccb_string_catcstr(string, "_"); + ccb_string_catint(string, ccb_ast_label_index++); + } + else { + //ccb_string_catf(string, "L%d", ccb_ast_label_index++); + ccb_string_catcstr(string, "L"); + ccb_string_catint(string, ccb_ast_label_index++); + } + } + else if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM) { + if (ccb_target_binfmt(ccb) == CCB_TARGET_BINFMT_FLAT) { + //ccb_string_catf(string, "L%d_%d", getpid(), ccb_ast_label_index++); // NOTE: getpid() isn't a good way to get a random number, but is enough to be unlikely to mix up with another run's local data + ccb_string_catcstr(string, "L"); + ccb_string_catint(string, getpid()); + ccb_string_catcstr(string, "_"); + ccb_string_catint(string, ccb_ast_label_index++); + } + else { + //ccb_string_catf(string, "L%d", ccb_ast_label_index++); + ccb_string_catcstr(string, "L"); + ccb_string_catint(string, ccb_ast_label_index++); + } + } + else { + //ccb_string_catf(string, ".L%d", ccb_ast_label_index++); + ccb_string_catcstr(string, ".L"); + ccb_string_catint(string, ccb_ast_label_index++); + } + } + else { + //ccb_string_catf(string, ".L%d", ccb_ast_label_index++); + ccb_string_catcstr(string, ".L"); + ccb_string_catint(string, ccb_ast_label_index++); + } + return ccb_string_buffer(string); +} + +bool ccb_ast_type_integer(ccb_t* ccb, ccb_data_type_t* type) { + return type->type == CCB_TYPE_CHAR + || type->type == CCB_TYPE_SHORT + || type->type == CCB_TYPE_INT + || type->type == CCB_TYPE_LONG + || type->type == CCB_TYPE_LLONG; +} + +bool ccb_ast_type_floating(ccb_t* ccb, ccb_data_type_t* type) { + return type->type == CCB_TYPE_FLOAT + || type->type == CCB_TYPE_DOUBLE + || type->type == CCB_TYPE_LDOUBLE; +} + +ccb_data_type_t* ccb_ast_type_copy(ccb_t* ccb, ccb_data_type_t* type) { + return memcpy(ccb_memory_allocate(sizeof(ccb_data_type_t)), type, sizeof(ccb_data_type_t)); +} + +ccb_data_type_t* ccb_ast_type_copy_incomplete(ccb_t* ccb, ccb_data_type_t* type) { + if (!type) + return NULL; + return (type->length == -1) + ? ccb_ast_type_copy(ccb, type) + : type; +} + +ccb_data_type_t* ccb_ast_type_create(ccb_t* ccb, ccb_type_t type, bool sign) { + + ccb_data_type_t* t = ccb_memory_allocate(sizeof(ccb_data_type_t)); + + t->type = type; + t->sign = sign; + + switch (type) { + case CCB_TYPE_VOID: t->size = 0; break; + case CCB_TYPE_CHAR: t->size = ccb_target_type_size_char(ccb); break; + case CCB_TYPE_SHORT: t->size = ccb_target_type_size_short(ccb); break; + case CCB_TYPE_INT: t->size = ccb_target_type_size_int(ccb); break; + case CCB_TYPE_LONG: t->size = ccb_target_type_size_long(ccb); break; + case CCB_TYPE_LLONG: t->size = ccb_target_type_size_llong(ccb); break; + case CCB_TYPE_FLOAT: t->size = ccb_target_type_size_float(ccb); break; + case CCB_TYPE_DOUBLE: t->size = ccb_target_type_size_double(ccb); break; + case CCB_TYPE_LDOUBLE: t->size = ccb_target_type_size_ldouble(ccb); break; + default: + ccb_compile_error(ccb, "ICE"); + } + + return t; +} + +ccb_data_type_t* ccb_ast_type_stub(ccb_t* ccb) { + ccb_data_type_t tmp = {}; + tmp.type = CCB_TYPE_CDECL; + tmp.size = 0; + return ccb_ast_copy(ccb, &tmp); + /*return ccb_ast_type_copy(ccb, &(ccb_data_type_t) { + .type = CCB_TYPE_CDECL, + .size = 0 + });*/ +} + +ccb_data_type_t* ccb_ast_prototype(ccb_t* ccb, ccb_data_type_t* returntype, ccb_list_t* paramtypes, bool dots) { + ccb_data_type_t tmp = {}; + tmp.type = CCB_TYPE_FUNCTION; + tmp.returntype = returntype; + tmp.parameters = paramtypes; + tmp.hasdots = dots; + return ccb_ast_copy(ccb, &tmp); + /*return ccb_ast_type_copy(ccb, &(ccb_data_type_t){ + .type = CCB_TYPE_FUNCTION, + .returntype = returntype, + .parameters = paramtypes, + .hasdots = dots + });*/ +} + +ccb_data_type_t* ccb_ast_array(ccb_t* ccb, ccb_data_type_t* type, int length) { + ccb_data_type_t tmp = {}; + tmp.type = CCB_TYPE_ARRAY; + tmp.pointer = type; + tmp.size = ((length < 0) ? -1 : (type->size * length)); /* TODO: This requires more brackets on self-compilation */ + tmp.length = length; + return ccb_ast_type_copy(ccb, &tmp/*(ccb_data_type_t){ + .type = CCB_TYPE_ARRAY, + .pointer = type, + .size = ((length < 0) ? -1 : type->size * length), + .length = length + }*/); +} + +ccb_data_type_t* ccb_ast_array_convert(ccb_t* ccb, ccb_data_type_t* type) { + if (type->type != CCB_TYPE_ARRAY) + return type; + return ccb_ast_pointer(ccb, type->pointer); +} + +ccb_data_type_t* ccb_ast_pointer(ccb_t* ccb, ccb_data_type_t* type) { + ccb_data_type_t tmp = {}; + tmp.type = CCB_TYPE_POINTER; + tmp.pointer = type; + tmp.size = ccb_target_type_size_pointer(ccb); + return ccb_ast_type_copy(ccb, &tmp/*(ccb_data_type_t){ + .type = CCB_TYPE_POINTER, + .pointer = type, + .size = ccb_target_type_size_pointer(ccb) + }*/); +} + +const char* ccb_ast_type_string(ccb_t* ccb, ccb_data_type_t* type) { + ccb_string_t* string; + + switch (type->type) { + case CCB_TYPE_VOID: return "void"; + case CCB_TYPE_INT: return "int"; + case CCB_TYPE_CHAR: return "char"; + case CCB_TYPE_LONG: return "long"; + case CCB_TYPE_LLONG: return "long long"; + //TODO... case CCB_TYPE_ULLONG: return "unsigned long long"; + case CCB_TYPE_SHORT: return "short"; + case CCB_TYPE_FLOAT: return "float"; + case CCB_TYPE_DOUBLE: return "double"; + case CCB_TYPE_LDOUBLE: return "long double"; +#ifdef CCB_X_OBJC + case CCB_TYPE_ID: return "id"; +#endif + + case CCB_TYPE_FUNCTION: + string = ccb_string_create(); + ccb_string_cat(string, '('); + for (ccb_list_iterator_t* it = ccb_list_iterator(type->parameters); !ccb_list_iterator_end(it); ) { + ccb_data_type_t* next = ccb_list_iterator_next(it); + ccb_string_catf(string, "%s", ccb_ast_type_string(ccb, next)); + if (!ccb_list_iterator_end(it)) + ccb_string_cat(string, ','); + } + ccb_string_catf(string, ") -> %s", ccb_ast_type_string(ccb, type->returntype)); + return ccb_string_buffer(string); + + case CCB_TYPE_POINTER: + string = ccb_string_create(); + ccb_string_catf(string, "%s*", ccb_ast_type_string(ccb, type->pointer)); + return ccb_string_buffer(string); + + case CCB_TYPE_ARRAY: + string = ccb_string_create(); + /*ccb_string_catf( + string, + "%s[%d]", + ccb_ast_type_string(ccb, type->pointer), + type->length + );*/ + ccb_string_catcstr(string, ccb_ast_type_string(ccb, type->pointer)); + ccb_string_cat(string, '['); + ccb_string_catint(string, type->length); + ccb_string_cat(string, ']'); + return ccb_string_buffer(string); + + case CCB_TYPE_STRUCTURE: + string = ccb_string_create(); + ccb_string_catcstr(string, "(struct..."); + //for (ccb_list_iterator_t* it = ccb_list_iterator(ccb_table_values(type->fields)); !ccb_list_iterator_end(it); ) + // ccb_string_catf(string, " (%s)", ccb_ast_type_string(ccb, ccb_list_iterator_next(it))); + ccb_string_cat(string, ')'); + return ccb_string_buffer(string); + + default: + break; + } + return NULL; +} + +static void ccb_ast_string_unary(ccb_t* ccb, ccb_string_t* string, const char* op, ccb_ast_t* ast) { + ccb_string_catf(string, "(%s %s)", op, ccb_ast_string(ccb, ast->unary.operand)); +} + +static void ccb_ast_string_binary(ccb_t* ccb, ccb_string_t* string, const char* op, ccb_ast_t* ast) { + ccb_string_catf(string, "(%s %s %s)", op, ccb_ast_string(ccb, ast->left), ccb_ast_string(ccb, ast->right)); +} + +static void ccb_ast_string_initialization_declaration(ccb_t* ccb, ccb_string_t* string, ccb_list_t* initlist) { + for (ccb_list_iterator_t* it = ccb_list_iterator(initlist); !ccb_list_iterator_end(it); ) { + ccb_ast_t* init = ccb_list_iterator_next(it); + ccb_string_catf(string, "%s", ccb_ast_string(ccb, init)); + if (!ccb_list_iterator_end(it)) + ccb_string_cat(string, ' '); + } +} + +static void ccb_ast_string_impl(ccb_t* ccb, ccb_string_t* string, ccb_ast_t* ast); // TODO: Predeclaration required for self-compile! +static void ccb_ast_string_impl(ccb_t* ccb, ccb_string_t* string, ccb_ast_t* ast) { + char* left; + char* right; + + if (!ast) { + ccb_string_catf(string, "(null)"); + return; + } + + switch (ast->type) { + case CCB_AST_TYPE_LITERAL: + switch (ast->ctype->type) { + case CCB_TYPE_INT: + case CCB_TYPE_SHORT: + //ccb_string_catf(string, "%d", ast->integer); + ccb_string_catint(string, ast->integer); + break; + + case CCB_TYPE_FLOAT: + case CCB_TYPE_DOUBLE: + ccb_string_catf(string, "%f", ast->floating.value); + break; + + case CCB_TYPE_LONG: + ccb_string_catf(string, "%ldL", ast->integer); + break; + + case CCB_TYPE_CHAR: + if (ast->integer == '\n') + ccb_string_catf(string, "'\n'"); + else if (ast->integer == '\\') + ccb_string_catf(string, "'\\\\'"); + else if (ast->integer == '\0') + ccb_string_catf(string, "'\\0'"); + else + ccb_string_catf(string, "'%c'", ast->integer); + break; + + default: + ccb_compile_error(ccb, "Internal error: ast_string_impl"); + break; + } + break; + + case CCB_AST_TYPE_STRING: + ccb_string_catf(string, "\"%s\"", ccb_string_quote(ast->string.data)); + break; + + case CCB_AST_TYPE_VAR_LOCAL: + ccb_string_catf(string, "%s", ast->variable.name); + if (ast->variable.init) { + ccb_string_cat(string, '('); + ccb_ast_string_initialization_declaration(ccb, string, ast->variable.init); + ccb_string_cat(string, ')'); + } + break; + + case CCB_AST_TYPE_VAR_GLOBAL: + ccb_string_catf(string, "%s", ast->variable.name); + break; + + case CCB_AST_TYPE_CALL: + ccb_string_catf(string, "(%s)%s(", ccb_ast_type_string(ccb, ast->ctype), ast->function.name); + for (ccb_list_iterator_t* it = ccb_list_iterator(ast->function.call.args); !ccb_list_iterator_end(it); ) { + ccb_string_catf(string, "%s", ccb_ast_string(ccb, ccb_list_iterator_next(it))); + if (!ccb_list_iterator_end(it)) + ccb_string_cat(string, ','); + } + ccb_string_cat(string, ')'); + break; + + case CCB_AST_TYPE_FUNCTION: + ccb_string_catf(string, "(%s)%s(", ccb_ast_type_string(ccb, ast->ctype), ast->function.name); + for (ccb_list_iterator_t* it = ccb_list_iterator(ast->function.params); !ccb_list_iterator_end(it); ) { + ccb_ast_t* param = ccb_list_iterator_next(it); + ccb_string_catf(string, "%s %s", ccb_ast_type_string(ccb, param->ctype), ccb_ast_string(ccb, param)); + if (!ccb_list_iterator_end(it)) + ccb_string_cat(string, ','); + } + ccb_string_cat(string, ')'); + ccb_ast_string_impl(ccb, string, ast->function.body); + break; + + case CCB_AST_TYPE_DECLARATION: + ccb_string_catf(string, "(decl %s %s", + ccb_ast_type_string(ccb, ast->decl.var->ctype), + ast->decl.var->variable.name + ); + ccb_ast_string_initialization_declaration(ccb, string, ast->decl.init); + ccb_string_cat(string, ')'); + break; + + case CCB_AST_TYPE_INITIALIZER: + ccb_string_catf(string, "%s@%d", ccb_ast_string(ccb, ast->init.value), ast->init.offset); + break; + + case CCB_AST_TYPE_STATEMENT_COMPOUND: + ccb_string_cat(string, '{'); + for (ccb_list_iterator_t* it = ccb_list_iterator(ast->compound); !ccb_list_iterator_end(it); ) { + ccb_ast_string_impl(ccb, string, ccb_list_iterator_next(it)); + ccb_string_cat(string, ';'); + } + ccb_string_cat(string, '}'); + break; + + case CCB_AST_TYPE_STRUCT: + ccb_ast_string_impl(ccb, string, ast->structure); + ccb_string_cat(string, '.'); + ccb_string_catf(string, ast->field); + break; + + case CCB_AST_TYPE_EXPRESSION_TERNARY: + ccb_string_catf(string, "(? %s %s %s)", + ccb_ast_string(ccb, ast->ifstmt.cond), + ccb_ast_string(ccb, ast->ifstmt.then), + ccb_ast_string(ccb, ast->ifstmt.last) + ); + break; + + case CCB_AST_TYPE_STATEMENT_IF: + ccb_string_catf(string, "(if %s %s", ccb_ast_string(ccb, ast->ifstmt.cond), ccb_ast_string(ccb, ast->ifstmt.then)); + if (ast->ifstmt.last) + ccb_string_catf(string, " %s", ccb_ast_string(ccb, ast->ifstmt.last)); + ccb_string_cat(string, ')'); + break; + + case CCB_AST_TYPE_STATEMENT_FOR: + ccb_string_catf(string, "(for %s %s %s %s)", + ccb_ast_string(ccb, ast->forstmt.init), + ccb_ast_string(ccb, ast->forstmt.cond), + ccb_ast_string(ccb, ast->forstmt.step), + ccb_ast_string(ccb, ast->forstmt.body) + ); + break; + + case CCB_AST_TYPE_STATEMENT_WHILE: + ccb_string_catf(string, "(while %s %s)", + ccb_ast_string(ccb, ast->forstmt.cond), + ccb_ast_string(ccb, ast->forstmt.body) + ); + break; + + case CCB_AST_TYPE_STATEMENT_DO: + ccb_string_catf(string, "(do %s %s)", + ccb_ast_string(ccb, ast->forstmt.cond), + ccb_ast_string(ccb, ast->forstmt.body) + ); + break; + + case CCB_AST_TYPE_STATEMENT_RETURN: + ccb_string_catf(string, "(return %s)", ccb_ast_string(ccb, ast->returnstmt)); + break; + + case CCB_AST_TYPE_ADDRESS: ccb_ast_string_unary(ccb, string, "&", ast); break; + case CCB_AST_TYPE_DEREFERENCE: ccb_ast_string_unary(ccb, string, "*", ast); break; + case CCB_LEXER_TOKEN_INCREMENT: ccb_ast_string_unary(ccb, string, "++", ast); break; + case CCB_LEXER_TOKEN_DECREMENT: ccb_ast_string_unary(ccb, string, "--", ast); break; + case '!': ccb_ast_string_unary(ccb, string, "!", ast); break; + case '&': ccb_ast_string_binary(ccb, string, "&", ast); break; + case '|': ccb_ast_string_binary(ccb, string, "|", ast); break; + case CCB_LEXER_TOKEN_AND: ccb_ast_string_binary(ccb, string, "&&", ast); break; + case CCB_LEXER_TOKEN_OR: ccb_ast_string_binary(ccb, string, "||", ast); break; + case CCB_LEXER_TOKEN_GEQUAL: ccb_ast_string_binary(ccb, string, ">=", ast); break; + case CCB_LEXER_TOKEN_LEQUAL: ccb_ast_string_binary(ccb, string, "<=", ast); break; + case CCB_LEXER_TOKEN_NEQUAL: ccb_ast_string_binary(ccb, string, "!=", ast); break; + + case CCB_AST_TYPE_EXPRESSION_CAST: + ccb_string_catf(string, "((%s) -> (%s) %s)", + ccb_ast_type_string(ccb, ast->unary.operand->ctype), + ccb_ast_type_string(ccb, ast->ctype), + ccb_ast_string(ccb, ast->unary.operand) + ); + break; + + case CCB_AST_TYPE_STATEMENT_CASE: + ccb_string_catcstr(string, "(todo: case)"); + break; + + default: + fprintf(stderr, "Unknown node type %d (0x100+%d)\n", ast->type, ast->type - 0x100); + left = ccb_ast_string(ccb, ast->left); + right = ccb_ast_string(ccb, ast->right); + if (ast->type == CCB_LEXER_TOKEN_EQUAL) + ccb_string_catf(string, "(== %s %s)", left, right); + else + ccb_string_catf(string, "(%c %s %s)", ast->type, left, right); + break; + } +} + +char* ccb_ast_string(ccb_t* ccb, ccb_ast_t* ast) { + if (ast == NULL) { + return "?NULL?"; + } + ccb_string_t* string = ccb_string_create(); + ccb_ast_string_impl(ccb, string, ast); + return ccb_string_buffer(string); +} + +static ccb_ast_t* ccb_parse_expression(ccb_t* ccb); +static ccb_ast_t* ccb_parse_expression_withcomma(ccb_t* ccb); +static ccb_ast_t* ccb_parse_expression_unary(ccb_t* ccb); +static ccb_ast_t* ccb_parse_expression_intermediate(ccb_t* ccb, int); + +static ccb_ast_t* ccb_parse_statement_compound(ccb_t* ccb); +static void ccb_parse_statement_declaration(ccb_t* ccb, ccb_list_t*); +static ccb_ast_t* ccb_parse_statement(ccb_t* ccb); + +static ccb_data_type_t* ccb_parse_declaration_specification(ccb_t* ccb, ccb_storage_t*); +static ccb_list_t* ccb_parse_initializer_declaration(ccb_t* ccb, ccb_data_type_t* type); +static ccb_data_type_t* ccb_parse_declarator(ccb_t* ccb, char**, ccb_data_type_t*, ccb_list_t*, ccb_cdecl_t); +static void ccb_parse_declaration(ccb_t* ccb, ccb_list_t*, ccb_ast_t* (*)(ccb_t*, ccb_data_type_t*, char*)); + +static void ccb_parse_function_parameter(ccb_t* ccb, ccb_data_type_t**, char**, bool); +static ccb_data_type_t* ccb_parse_function_parameters(ccb_t* ccb, ccb_list_t*, ccb_data_type_t*); + +static bool ccb_parse_type_check(ccb_t* ccb, ccb_lexer_token_t* token); + +static void ccb_parse_semantic_lvalue(ccb_t* ccb, ccb_ast_t* ast, bool allowfunc) { + switch (ast->type) { + case CCB_AST_TYPE_FUNCTION: + if (allowfunc) { + return; + } + break; + case CCB_AST_TYPE_VAR_LOCAL: + case CCB_AST_TYPE_VAR_GLOBAL: + case CCB_AST_TYPE_DEREFERENCE: + case CCB_AST_TYPE_STRUCT: + return; + } + ccb_compile_error(ccb, "expected lvalue, `%s' isn't a valid lvalue", ccb_ast_string(ccb, ast)); +} + +static void ccb_parse_semantic_notvoid(ccb_t* ccb, ccb_data_type_t* type) { + if (type->type == CCB_TYPE_VOID) + ccb_compile_error(ccb, "void not allowed in expression"); +} + +static void ccb_parse_semantic_integer(ccb_t* ccb, ccb_ast_t* node) { + if (!ccb_ast_type_integer(ccb, node->ctype)) + ccb_compile_error(ccb, "expected integer type, `%s' isn't a valid integer type", ccb_ast_string(ccb, node)); +} + +static bool ccb_parse_semantic_rightassoc(ccb_t* ccb, ccb_lexer_token_t* token) { + return (token->punct == '='); +} + +static void ccb_parse_expect(ccb_t* ccb, char punct) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (!ccb_lexer_ispunct(ccb, token, punct)) + ccb_compile_error(ccb, "expected `%c`, got %s instead", punct, ccb_lexer_tokenstr(ccb, token)); +} + +static bool ccb_parse_identifier_check(ccb_t* ccb, ccb_lexer_token_t* token, const char* identifier) { + return token->type == CCB_LEXER_TOKEN_IDENTIFIER && !strcmp(token->string, identifier); +} + +int ccb_parse_evaluate(ccb_t* ccb, ccb_ast_t* ast) { + switch (ast->type) { + case CCB_AST_TYPE_LITERAL: + if (ccb_ast_type_integer(ccb, ast->ctype)) + return ast->integer; // TODO: Should this return a 64-bit value?? + //fprintf(stderr, "NOTE: Type->type is %d\n", ast->ctype->type); + ccb_compile_error(ccb, "not a valid integer constant expression `%s'", ccb_ast_string(ccb, ast)); + break; + + case '+': return ccb_parse_evaluate(ccb, ast->left) + ccb_parse_evaluate(ccb, ast->right); + case '-': return ccb_parse_evaluate(ccb, ast->left) - ccb_parse_evaluate(ccb, ast->right); + case '*': return ccb_parse_evaluate(ccb, ast->left) * ccb_parse_evaluate(ccb, ast->right); + case '/': return ccb_parse_evaluate(ccb, ast->left) / ccb_parse_evaluate(ccb, ast->right); + case '<': return ccb_parse_evaluate(ccb, ast->left) < ccb_parse_evaluate(ccb, ast->right); + case '>': return ccb_parse_evaluate(ccb, ast->left) > ccb_parse_evaluate(ccb, ast->right); + case '^': return ccb_parse_evaluate(ccb, ast->left) ^ ccb_parse_evaluate(ccb, ast->right); + case '%': return ccb_parse_evaluate(ccb, ast->left) % ccb_parse_evaluate(ccb, ast->right); + case CCB_LEXER_TOKEN_AND: return ccb_parse_evaluate(ccb, ast->left) && ccb_parse_evaluate(ccb, ast->right); + case CCB_LEXER_TOKEN_OR: return ccb_parse_evaluate(ccb, ast->left) || ccb_parse_evaluate(ccb, ast->right); + case CCB_AST_TYPE_EQUAL /*CCB_LEXER_TOKEN_EQUAL*/: return ccb_parse_evaluate(ccb, ast->left) == ccb_parse_evaluate(ccb, ast->right); + case CCB_LEXER_TOKEN_LEQUAL: return ccb_parse_evaluate(ccb, ast->left) <= ccb_parse_evaluate(ccb, ast->right); + case CCB_LEXER_TOKEN_GEQUAL: return ccb_parse_evaluate(ccb, ast->left) >= ccb_parse_evaluate(ccb, ast->right); + case CCB_LEXER_TOKEN_NEQUAL: return ccb_parse_evaluate(ccb, ast->left) != ccb_parse_evaluate(ccb, ast->right); + case CCB_AST_TYPE_LSHIFT /* CCB_LEXER_TOKEN_LSHIFT*/: return ccb_parse_evaluate(ccb, ast->left) << ccb_parse_evaluate(ccb, ast->right); + case CCB_LEXER_TOKEN_RSHIFT: return ccb_parse_evaluate(ccb, ast->left) >> ccb_parse_evaluate(ccb, ast->right); // TODO sign differences + case CCB_AST_TYPE_EXPRESSION_TERNARY: + if(ccb_parse_evaluate(ccb, ast->ifstmt.cond) != 0) { + return ccb_parse_evaluate(ccb, ast->ifstmt.then); + } else { + return ccb_parse_evaluate(ccb, ast->ifstmt.last); + } + + /* Deal with unary operations differently */ + case '!': return !ccb_parse_evaluate(ccb, ast->unary.operand); + case '~': return ~ccb_parse_evaluate(ccb, ast->unary.operand); + case CCB_AST_TYPE_EXPRESSION_CAST: return ccb_parse_evaluate(ccb, ast->unary.operand); + + default: + ccb_compile_error(ccb, "not a valid integer constant expression `%s'", ccb_ast_string(ccb, ast)); + } + return -1; +} + +static int ccb_parse_operator_priority(ccb_t* ccb, ccb_lexer_token_t* token) { + switch (token->punct) { + case '[': + case '.': + case CCB_LEXER_TOKEN_ARROW: + return 1; + case CCB_LEXER_TOKEN_INCREMENT: + case CCB_LEXER_TOKEN_DECREMENT: + return 2; + case '*': + case '/': + case '%': + return 3; + case '+': + case '-': + return 4; + case CCB_LEXER_TOKEN_LSHIFT: + case CCB_LEXER_TOKEN_RSHIFT: + return 5; + case '<': + case '>': + return 6; + case CCB_LEXER_TOKEN_EQUAL: + case CCB_LEXER_TOKEN_GEQUAL: + case CCB_LEXER_TOKEN_LEQUAL: + case CCB_LEXER_TOKEN_NEQUAL: + return 7; + case '&': + return 8; + case '^': + return 9; + case '|': + return 10; + case CCB_LEXER_TOKEN_AND: + return 11; + case CCB_LEXER_TOKEN_OR: + return 12; + case '?': + return 13; + case '=': + case CCB_LEXER_TOKEN_COMPOUND_ADD: + case CCB_LEXER_TOKEN_COMPOUND_AND: + case CCB_LEXER_TOKEN_COMPOUND_DIV: + case CCB_LEXER_TOKEN_COMPOUND_LSHIFT: + case CCB_LEXER_TOKEN_COMPOUND_MOD: + case CCB_LEXER_TOKEN_COMPOUND_MUL: + case CCB_LEXER_TOKEN_COMPOUND_OR: + case CCB_LEXER_TOKEN_COMPOUND_RSHIFT: + case CCB_LEXER_TOKEN_COMPOUND_SUB: + case CCB_LEXER_TOKEN_COMPOUND_XOR: + return 14; + } + return -1; +} + +static ccb_list_t* ccb_parse_parameter_types(ccb_t* ccb, ccb_list_t* parameters) { + ccb_list_t* list = ccb_list_create(); + for (ccb_list_iterator_t* it = ccb_list_iterator(parameters); !ccb_list_iterator_end(it); ) + ccb_list_push(list, ((ccb_ast_t*)ccb_list_iterator_next(it))->ctype); + return list; +} + +static void ccb_parse_function_typecheck(ccb_t* ccb, const char* name, ccb_list_t* parameters, ccb_list_t* arguments) { + fprintf(stderr, "Parsing function '%s'\n", name); + if (ccb_list_length(arguments) < ccb_list_length(parameters)) + ccb_compile_error(ccb, "too few arguments for function `%s'", name); + ccb_list_iterator_t *jt = ccb_list_iterator(arguments); // TODO: Secondary variables not handled properly in "for" + for (ccb_list_iterator_t* it = ccb_list_iterator(parameters); !ccb_list_iterator_end(jt); ) + { + ccb_data_type_t* parameter = ccb_list_iterator_next(it); + ccb_data_type_t* argument = ccb_list_iterator_next(jt); + + if (parameter) { + //printf("Comparing formal params\n"); + ccb_ast_result_type(ccb, '=', parameter, argument); + } else { + //printf("Comparing to default int\n"); + ccb_ast_result_type(ccb, '=', argument, ccb_ast_data_table[CCB_AST_DATA_INT]); + } + } +} + +static ccb_ast_t* ccb_parse_finish_call(ccb_t* ccb, char* name, ccb_list_t* args) { + //fprintf(stderr, "TESTING: '%s'\n", name); + ccb_ast_t* func = ccb_table_find(ccb_ast_localenv, name); + bool isptr = false; + if (func) { + ccb_data_type_t* declaration = func->ctype; + if (declaration->type == CCB_TYPE_POINTER) { + if (declaration->callconv != 0) { + fprintf(stderr, "NOTE: Ignoring non-default calling convention %d in indirect call\n", declaration->callconv); + } + declaration = declaration->pointer; + isptr = true; + } + if (declaration->type != CCB_TYPE_FUNCTION) + ccb_compile_error(ccb, "expected a function name, `%s' isn't a function", name); + ccb_parse_function_typecheck(ccb, name, declaration->parameters, ccb_parse_parameter_types(ccb, args)); + if (declaration->callconv != 0) { + fprintf(stderr, "NOTE: Ignoring non-default calling convention %d in normal call\n", declaration->callconv); + } + if (isptr) { + return ccb_ast_ptrcall(ccb, declaration->returntype, name, func, args, declaration->parameters, func->ctype->callconv); + } + else { + return ccb_ast_call(ccb, declaration->returntype, name, args, declaration->parameters, func->ctype->callconv); + } + } + if (strlen(name) >= 6 && !strcmp(name+(strlen(name)-6), "__init")) { + // Lazily ignore warnings for undeclared __init functions (these won't take any parameters anyway...) + } else { + ccb_compile_warn(ccb, "function `%s' is not declared, integer return type implied", name); + } + return ccb_ast_call(ccb, ccb_ast_data_table[CCB_AST_DATA_INT], name, args, ccb_list_create(), 0 /* default callconv */); +} + +static char* ccb_parse_runtime_fname(ccb_t* ccb, char* name) { + /* In the future this function might convert internal names to library names + * according to user preferences (e.g. for calling into an obj-c runtime for + * @"strings" instead of calling the default function). + */ + return name; +} + +static ccb_ast_t* ccb_parse_runtime_call(ccb_t* ccb, char* name, ccb_list_t* list) { + return ccb_parse_finish_call(ccb, ccb_parse_runtime_fname(ccb, name), list); +} + +static ccb_ast_t* ccb_parse_runtime_call_0(ccb_t* ccb, char* name) { + ccb_list_t* list = ccb_list_create(); + return ccb_parse_runtime_call(ccb, name, list); +} + +static ccb_ast_t* ccb_parse_runtime_call_1(ccb_t* ccb, char* name, ccb_ast_t* arg1) { + ccb_list_t* list = ccb_list_create(); + ccb_list_push(list, arg1); + return ccb_parse_runtime_call(ccb, name, list); +} + +static ccb_ast_t* ccb_parse_runtime_call_2(ccb_t* ccb, char* name, ccb_ast_t* arg1, ccb_ast_t* arg2) { + ccb_list_t* list = ccb_list_create(); + ccb_list_push(list, arg1); + ccb_list_push(list, arg2); + return ccb_parse_runtime_call(ccb, name, list); +} + +static ccb_ast_t* ccb_parse_runtime_call_3(ccb_t* ccb, char* name, ccb_ast_t* arg1, ccb_ast_t* arg2, ccb_ast_t* arg3) { + ccb_list_t* list = ccb_list_create(); + ccb_list_push(list, arg1); + ccb_list_push(list, arg2); + ccb_list_push(list, arg3); + return ccb_parse_runtime_call(ccb, name, list); +} + +static ccb_ast_t* ccb_parse_runtime_call_4(ccb_t* ccb, char* name, ccb_ast_t* arg1, ccb_ast_t* arg2, ccb_ast_t* arg3, ccb_ast_t* arg4) { + ccb_list_t* list = ccb_list_create(); + ccb_list_push(list, arg1); + ccb_list_push(list, arg2); + ccb_list_push(list, arg3); + ccb_list_push(list, arg4); + return ccb_parse_runtime_call(ccb, name, list); +} +/* TODO... +static ccb_ast_t* ccb_parse_runtime_call_5(ccb_t* ccb, char* name, ccb_ast_t* arg1, ccb_ast_t* arg2, ccb_ast_t* arg3, ccb_ast_t* arg4, ccb_ast_t* arg5) { + ccb_list_t* list = ccb_list_create(); + ccb_list_push(list, arg1); + ccb_list_push(list, arg2); + ccb_list_push(list, arg3); + ccb_list_push(list, arg4); + ccb_list_push(list, arg5); + return ccb_parse_runtime_call(ccb, name, list); +} + +static ccb_ast_t* ccb_parse_runtime_call_6(ccb_t* ccb, char* name, ccb_ast_t* arg1, ccb_ast_t* arg2, ccb_ast_t* arg3, ccb_ast_t* arg4, ccb_ast_t* arg5, ccb_ast_t* arg6) { + ccb_list_t* list = ccb_list_create(); + ccb_list_push(list, arg1); + ccb_list_push(list, arg2); + ccb_list_push(list, arg3); + ccb_list_push(list, arg4); + ccb_list_push(list, arg5); + ccb_list_push(list, arg6); + return ccb_parse_runtime_call(ccb, name, list); +} +*/ + +static ccb_ast_t* ccb_parse_function_call(ccb_t* ccb, char* name) { + ccb_list_t* list = ccb_list_create(); + for (;;) { + + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, ')')) + break; + ccb_lexer_unget(ccb, token); + ccb_list_push(list, ccb_parse_expression(ccb)); + + token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, ')')) + break; + if (!ccb_lexer_ispunct(ccb, token, ',')) + ccb_compile_error(ccb, "unexpected token `%s'", ccb_lexer_tokenstr(ccb, token)); + } + + return ccb_parse_finish_call(ccb, name, list); +} + +static int ccb_parse_checkstart(ccb_t* ccb, const char* start, char* name) { + int i; + for (i = 0; start[i] != 0; i++) { + if (name[i] != start[i]) { + return 0; + } + } + return 1; +} + +static const char* ccb_argbuiltin_buffername(ccb_t* ccb); + +static ccb_ast_t* ccb_parse_generic(ccb_t* ccb, char* name) { + ccb_ast_t* var = NULL; + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + + if (ccb_lexer_ispunct(ccb, token, '(')) + return ccb_parse_function_call(ccb, name); + + ccb_lexer_unget(ccb, token); + + if (!(var = ccb_table_find(ccb_ast_localenv, name))) { + /*char* gname = calloc(strlen(ccb->sym_prefix) + strlen(name) + 1, 1); + strcat(gname, ccb->sym_prefix); + strcat(gname, name);*/ + if (!(var = ccb_table_find(ccb_ast_globalenv, name))) { // TODO: Check if this causes any unwanted side-effects (added by Zak to handle enums in some situations) + /* __func__ (to get the function name as a string constant) and some similar builtins have been added + * as simple fallbacks to variable resolution. + */ + if (!strcmp("__func__", name)) { + var = ccb_ast_new_string(ccb, ccb->func_name == NULL ? "(NULL)" : ccb->func_name); + ccb_list_push(ccb_ast_strings, var); + return var; + } else if (!strcmp("__builtin_func_callconv", name)) { + /* NOTE: At time of writing these values will not be reliable except (hopefully) for checking whether + * the __classic_call convention (#101) is used. This can be helpful to implement type checks when defining + * vararg macros. + */ + return ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LONG], ccb->func_callconv); + } else if (!strcmp("__builtin_default_callconv", name)) { + return ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LONG], ccb->default_callconv); + } + /* Undefined __builtin_argX aliases can still be used, but are replaced with references to a special variable to indicate that they're undefined. */ + if (ccb_parse_checkstart(ccb, "__builtin_arg", name) && ccb_argbuiltin_buffername(ccb) != NULL) { + return ccb_parse_generic(ccb, ccb_argbuiltin_buffername(ccb)); + } + ccb_compile_error(ccb, "undefined variable `%s'", name); + } + } + + return var; +} + +#ifdef CCB_X_OBJC + +ccb_data_type_t* ccb_parse_objx_methodtype(ccb_t* ccb, ccb_data_type_t* cltype, int isclass, const char* sel) { + if (isclass && cltype != NULL) { + return ccb_table_find(cltype->cmethods, sel); + } else if (cltype != NULL) { + return ccb_table_find(cltype->imethods, sel); + } else { + return NULL; + } +} + +ccb_ast_t* ccb_parse_objc_useinitstub(ccb_t* ccb, ccb_ast_t* expr); +static ccb_ast_t* ccb_parse_objc_message(ccb_t* ccb) { + int isclass = 0; + const char* clname = NULL; + ccb_data_type_t* cltype = NULL; + /* On entry, the '[' token has already been read (but not the target expression). */ + ccb_ast_t* target; + ccb_lexer_token_t* token = NULL; + bool issuper = false; + token = ccb_lexer_next(ccb); + if (token->type == CCB_LEXER_TOKEN_IDENTIFIER && !strcmp("super", token->string)) { + target = ccb_parse_generic(ccb, "self"); + issuper = true; + if (ccb->oop_ismeta) { + cltype = ccb_table_find(ccb_parse_typedefs, ccb->oop_classname); + isclass = 1; + } + } else { + ccb_lexer_unget(ccb, token); + target = ccb_parse_expression(ccb); + } + //printf("[] Got AST type 0x100+%d\n", target->type - 0x100); + if (target->type == CCB_AST_TYPE_VAR_GLOBAL && ccb_table_find(ccb_parse_typedefs, target->variable.name) != NULL) { // TODO: Actually check if it's a class or not... + if (target->variable.isclassobj) { + //printf("Sending to a class!\n"); + clname = target->variable.name; + cltype = ccb_table_find(ccb_parse_typedefs, clname); + target = ccb_parse_objc_useinitstub(ccb, target); + isclass = 1; + } + } else if (target->type == CCB_AST_TYPE_VAR_LOCAL && !strcmp("self", target->variable.name)) { + //printf("Got self call...\n"); + if (ccb->oop_ismeta) { + //printf("Using type '%s'\n", ccb->oop_classname); + cltype = ccb_table_find(ccb_parse_typedefs, ccb->oop_classname); + isclass = 1; + } + } + + + char* selector = NULL; + ccb_ast_t* selast = NULL; + ccb_ast_t* lookupast = NULL; + ccb_data_type_t* rtype = ccb_ast_data_table[CCB_AST_DATA_ID]; + + token = ccb_lexer_next(ccb); + if (token->type != CCB_LEXER_TOKEN_IDENTIFIER) { + ccb_compile_error(ccb, "unexpected token `%s'", ccb_lexer_tokenstr(ccb, token)); + } + + selector = token->string; + + int maxargs = 4; + int nargs = 0; + ccb_ast_t* args[4]; + ccb_list_t* list = ccb_list_create(); + ccb_list_t* tlist = ccb_list_create(); + ccb_list_push(list, target); + ccb_list_push(tlist, ccb_ast_data_table[CCB_AST_DATA_ID]); + + token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, ':')) { + char* oldsel = selector; + selector = calloc(strlen(selector) + 150, 1); + if (selector == NULL) { + ccb_compile_error(ccb, "Out of memory?"); + return NULL; // Unreachable + } + strcat(selector, oldsel); + strcat(selector, ":"); + + bool hasargs = true; + + while (hasargs) { + /* Append argument */ + if (nargs >= maxargs) { + ccb_compile_error(ccb, "Too many arguments", NULL); + } + ccb_ast_t* a = ccb_parse_expression(ccb); + args[nargs] = a; + nargs++; + ccb_list_push(list, a); + ccb_list_push(tlist, a->ctype); + /* Check if followed by anotherPart: */ + token = ccb_lexer_next(ccb); + if (token->type == CCB_LEXER_TOKEN_IDENTIFIER) { + strcat(selector, token->string); + ccb_parse_expect(ccb, ':'); + strcat(selector, ":"); + } + else { + ccb_lexer_unget(ccb, token); + token = NULL; + hasargs = false; + } + } + //ccb_compile_error(ccb, "messages with arguments are not yet supported", NULL); + } + else { + ccb_lexer_unget(ccb, token); + token = NULL; + } + + ccb_parse_expect(ccb, ']'); + + selast = ccb_ast_new_string(ccb, selector); + ccb_list_push(ccb_ast_strings, selast); + selast = ccb_parse_runtime_call_2(ccb, "__oop_literal_selector", selast, ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_INT], strlen(selector))); + + if (issuper) { + lookupast = ccb_parse_runtime_call_3(ccb, "__oop_superfunc", ccb_parse_generic(ccb, ccb->oop_classname), ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_INT], ccb->oop_ismeta?1:0), selast); + } else { + lookupast = ccb_parse_runtime_call_2(ccb, "__oop_methodfunc", target, selast); + } + + ccb_data_type_t* mtype = NULL; + + if (!isclass) { + if (target->ctype->type == CCB_TYPE_POINTER && target->ctype->pointer->imethods != NULL) { + cltype = target->ctype->pointer; + } + } + + mtype = ccb_parse_objx_methodtype(ccb, cltype, isclass, selector); + + if (mtype == NULL) { + ccb_compile_warn(ccb, "No declaration can be found for %s method '%s', assuming 'id' return type", isclass ? "class" : "instance", selector); + return ccb_ast_ptrcall(ccb, rtype, "[...]", lookupast, list, tlist, 0);//101); + } else { + return ccb_ast_ptrcall(ccb, mtype->returntype, "[...]", lookupast, list, mtype->parameters, 0);//101); + } + + /*switch (nargs) { + case 0: + return ccb_parse_runtime_call_2(ccb, "__oop_send0", target, selast); + case 1: + return ccb_parse_runtime_call_3(ccb, "__oop_send1", target, selast, args[0]); + case 2: + return ccb_parse_runtime_call_4(ccb, "__oop_send2", target, selast, args[0], args[1]); + / * TODO... + case 3: + return ccb_parse_runtime_call_5(ccb, "__oop_send3", target, selast, args[0], args[1], args[2]); + case 4: + return ccb_parse_runtime_call_6(ccb, "__oop_send4", target, selast, args[0], args[1], args[2], args[3]); + * / + default: + ccb_compile_error(ccb, "Language Extension Error: Unsupported number of parameters to message send", NULL); + }*/ +} +#endif + +#ifdef CCB_X_OBJC +static ccb_ast_t* ccb_parse_objc_selector(ccb_t* ccb) { + /* On entry, the '@' and 'selector' have been read already (but not the '('). */ + ccb_lexer_token_t* token = NULL; + char* selector = NULL; + char* tmpstr = NULL; + ccb_ast_t* selast = NULL; + + ccb_parse_expect(ccb, '('); + + token = ccb_lexer_next(ccb); + if (token->type != CCB_LEXER_TOKEN_IDENTIFIER) { + ccb_compile_error(ccb, "unexpected token `%s'", ccb_lexer_tokenstr(ccb, token)); + } + + selector = calloc(strlen(token->string) + 100, 1); + strcat(selector, token->string); + + token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, ':')) { + tmpstr = selector; + selector = strcat(tmpstr, ":"); + //free(tmpstr); XXX Wrong place? + while ((token = ccb_lexer_next(ccb))->type == CCB_LEXER_TOKEN_IDENTIFIER) { + tmpstr = selector; + selector = strcat(tmpstr, token->string); + //free(tmpstr); + ccb_parse_expect(ccb, ':'); + tmpstr = selector; + selector = strcat(tmpstr, ":"); + //free(tmpstr); + } + } + ccb_lexer_unget(ccb, token); + token = NULL; + + ccb_parse_expect(ccb, ')'); + + selast = ccb_ast_new_string(ccb, selector); + ccb_list_push(ccb_ast_strings, selast); + + return ccb_parse_runtime_call_2(ccb, "__oop_literal_selector", selast, ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_INT], strlen(selector))); +} +#endif + +static ccb_ast_t* ccb_parse_number_integer(ccb_t* ccb, const char* string) { + const char* p = string; + int base = 10; + + if (!ccb_strncasecmp(string, "0x", 2)) { + base = 16; + p++; + p++; + } + else if (string[0] == '0' && string[1] != '\0') { + base = 8; + p++; + } + + while (isxdigit(*p)) { + if (base == 10 && isalpha(*p)) + ccb_compile_error(ccb, "invalid character in decimal literal"); + if (base == 8 && !('0' <= *p && *p <= '7')) + ccb_compile_error(ccb, "invalid character in octal literal"); + p++; + } + + if (!ccb_strcasecmp(p, "u")) + return ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_UINT], strtol(string, NULL, base)); + if (!ccb_strcasecmp(p, "l")) + return ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LONG], strtol(string, NULL, base)); + if (!ccb_strcasecmp(p, "ul") || !ccb_strcasecmp(p, "lu")) + return ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_ULONG], strtoul(string, NULL, base)); + if (!ccb_strcasecmp(p, "ll")) + return ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LLONG], strtoll(string, NULL, base)); + if (!ccb_strcasecmp(p, "ull") || !ccb_strcasecmp(p, "llu")) + return ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_ULLONG], strtoull(string, NULL, base)); + if (*p != '\0') + ccb_compile_error(ccb, "invalid suffix for literal"); + + // TODO: Was initially using "long long"/strtoll but decided to opt for long just in case that was causing issues + // I don't think that was the problem (long long always == long?) so I changed it back, but left a todo just in case. + long long value = strtoll(string, NULL, base); + return (/*(value & ~(long)UINT_MAX) != 0*/value != (long long) (int) value) + ? ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LONG], value) + : ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_INT], value); +} + +static ccb_ast_t* ccb_parse_number_floating(ccb_t* ccb, const char* string) { + const char* p = string; + char* end; + + while (p[1]) + p++; + + ccb_ast_t* ast; + if (*p == 'l' || *p == 'L') + ast = ccb_ast_new_floating(ccb, ccb_ast_data_table[CCB_AST_DATA_LDOUBLE], strtold(string, &end)); + else if (*p == 'f' || *p == 'F') + ast = ccb_ast_new_floating(ccb, ccb_ast_data_table[CCB_AST_DATA_FLOAT], strtof(string, &end)); + else { + ast = ccb_ast_new_floating(ccb, ccb_ast_data_table[CCB_AST_DATA_DOUBLE], strtod(string, &end)); + p++; + } + + if (end != p) + ccb_compile_error(ccb, "malformatted float literal"); + + return ast; +} + +static ccb_ast_t* ccb_parse_number(ccb_t* ccb, const char* string) { + /* Fix to detect hex before checking if the number has an "e". */ + if (string[0] == '0' && string[1] == 'x') { + return ccb_parse_number_integer(ccb, string); + } + return strpbrk(string, ".pe") + ? ccb_parse_number_floating(ccb, string) + : ccb_parse_number_integer(ccb, string); +} + +static ccb_data_type_t* ccb_parse_sizeoflike_type(ccb_t* ccb, const char* debugname, bool typename, bool allowvoid); + +#ifdef CCB_X_OBJX +static ccb_ast_t* ccb_parse_objx_typeof(ccb_t* ccb, ccb_data_type_t* t); // TODO: Predeclaration required for self-compile +static ccb_ast_t* ccb_parse_objx_typeof(ccb_t* ccb, ccb_data_type_t* t) { + char* name = NULL; + bool isflt = false; + + switch (t->type) { + case CCB_TYPE_VOID: + return ccb_parse_runtime_call_0(ccb, "__oop_typeof_void"); + case CCB_TYPE_CHAR: + name = "char"; + break; + case CCB_TYPE_SHORT: + name = "short"; + break; + case CCB_TYPE_INT: + name = "int"; + break; + case CCB_TYPE_LONG: + name = "long"; + break; + case CCB_TYPE_LLONG: + name = "long long"; + break; + case CCB_TYPE_FLOAT: + name = "float"; + isflt = true; + break; + case CCB_TYPE_DOUBLE: + name = "double"; + isflt = true; + break; + case CCB_TYPE_LDOUBLE: + name = "long double"; + isflt = true; + break; + case CCB_TYPE_ARRAY: + return ccb_parse_runtime_call_2(ccb, "__oop_typeof_array", ccb_parse_objx_typeof(ccb, t->pointer), ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LONG], t->length)); + case CCB_TYPE_POINTER: + return ccb_parse_runtime_call_1(ccb, "__oop_typeof_pointer", ccb_parse_objx_typeof(ccb, t->pointer)); + case CCB_TYPE_STRUCTURE: + if (t->classname != NULL) { + ccb_ast_t* strast = ccb_ast_new_string(ccb, t->classname); + ccb_list_push(ccb_ast_strings, strast); + return ccb_parse_runtime_call_1(ccb, "__oop_typeof_dynamic", strast); + } + ccb_compile_error(ccb, "can't get @typeof structures yet"); + case CCB_TYPE_FUNCTION: + { + //ccb_compile_error(ccb, "can't get @typeof functions yet"); + ccb_ast_t* params; + + if (t->parameters == NULL || ccb_list_length(t->parameters) == 0) { + params = ccb_parse_runtime_call_1(ccb, "__oop_typeof_params_create", ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LONG], 0)); + } + else { + params = ccb_parse_runtime_call_1(ccb, "__oop_typeof_params_create", ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LONG], ccb_list_length(t->parameters))); + ccb_list_iterator_t* iter = ccb_list_iterator(t->parameters); + + while (!ccb_list_iterator_end(iter)) { + params = ccb_parse_runtime_call_2(ccb, "__oop_typeof_params_with", params, ccb_parse_objx_typeof(ccb, ccb_list_iterator_next(iter))); + } + } + + return ccb_parse_runtime_call_3(ccb, "__oop_typeof_function", + ccb_parse_objx_typeof(ccb, t->returntype), + params, + ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LONG], t->hasdots ? true : t->sign)); + } + default: + ccb_compile_error(ccb, "bad type code (%d)", t->type); + } + + // If the switch fell through to here then it must be a simple integer or float type + // The name and isflt variables should have been set during or before the switch statement. + + ccb_ast_t* strast = ccb_ast_new_string(ccb, name); + ccb_list_push(ccb_ast_strings, strast); + return ccb_parse_runtime_call_4(ccb, isflt ? "__oop_typeof_float" : "__oop_typeof_integer", + ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LONG], t->size * 8), + ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LONG], isflt ? true : t->sign), + strast, + ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LONG], strlen(name))); +} +#endif + +static ccb_ast_t* ccb_parse_pedanticbox(ccb_t* ccb, ccb_ast_t* v) { + ccb_ast_t* t = ccb_parse_objx_typeof(ccb, v->ctype); + switch (v->ctype->type) { + case CCB_TYPE_VOID: + return ccb_parse_runtime_call_1(ccb, "__oop_box_void", t); + case CCB_TYPE_CHAR: + case CCB_TYPE_SHORT: + case CCB_TYPE_INT: + case CCB_TYPE_LONG: + case CCB_TYPE_LLONG: + return ccb_parse_runtime_call_2(ccb, "__oop_box_i", t, v); + case CCB_TYPE_FLOAT: + case CCB_TYPE_DOUBLE: + case CCB_TYPE_LDOUBLE: + return ccb_parse_runtime_call_2(ccb, "__oop_box_f", t, v); + case CCB_TYPE_ARRAY: + case CCB_TYPE_POINTER: + case CCB_TYPE_FUNCTION: + return ccb_parse_runtime_call_2(ccb, "__oop_box_p", t, v); + case CCB_TYPE_STRUCTURE: + ccb_compile_error(ccb, "can't fully box structures yet"); + default: + ccb_compile_error(ccb, "bad type code (%d)", t->type); + } +} + +static ccb_ast_t* ccb_parse_expression_primary(ccb_t* ccb) { + ccb_lexer_token_t* token; + ccb_ast_t* ast; + //fprintf(stderr, "TESTING\n"); + + if (!(token = ccb_lexer_next(ccb))) + return NULL; + +#ifdef CCB_X_OBJC + bool isobjc = false; + ccb_lexer_token_t* attoken = NULL; + if (token->type == CCB_LEXER_TOKEN_PUNCT && token->character == '@') { + isobjc = true; + attoken = token; + if (!(token = ccb_lexer_next(ccb))) { + ccb_lexer_unget(ccb, attoken); + } + //ccb_compile_error(ccb, "Got objc literal"); + } +#endif + + switch (token->type) { + case CCB_LEXER_TOKEN_IDENTIFIER: +#ifdef CCB_X_OBJC + if (isobjc) { + if (strcmp(token->string, "selector") == 0) { + return ccb_parse_objc_selector(ccb); +#ifdef CCB_X_OBJX + } + else if (strcmp(token->string, "typeof") == 0) { + return ccb_parse_objx_typeof(ccb, ccb_parse_sizeoflike_type(ccb, "@typeof", false, true)); +#endif + } + else { + ccb_lexer_unget(ccb, attoken); + ccb_lexer_unget(ccb, token); + return NULL; + } + } +#endif + return ccb_parse_generic(ccb, token->string); + case CCB_LEXER_TOKEN_NUMBER: +#ifdef CCB_X_OBJC + if (isobjc) { + ast = ccb_parse_number(ccb, token->string); + if (ast->type == CCB_TYPE_FLOAT || ast->type == CCB_TYPE_DOUBLE || ast->type == CCB_TYPE_LDOUBLE) { + return ccb_parse_runtime_call_1(ccb, "__oop_literal_float", ast); + } + else { + return ccb_parse_runtime_call_1(ccb, "__oop_literal_integer", ast); + } + } +#endif + return ccb_parse_number(ccb, token->string); + case CCB_LEXER_TOKEN_CHAR: +#ifdef CCB_X_OBJC + if (isobjc) { + return ccb_parse_runtime_call_1(ccb, "__oop_literal_character", ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_CHAR], token->character)); + } +#endif + return ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_CHAR], token->character); + case CCB_LEXER_TOKEN_STRING: + ast = ccb_ast_new_string(ccb, token->string); + ccb_list_push(ccb_ast_strings, ast); +#ifdef CCB_X_OBJC + if (isobjc) { + return ccb_parse_runtime_call_2(ccb, "__oop_literal_stringn", ast, ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_INT], strlen(token->string))); + } +#endif + return ast; + case CCB_LEXER_TOKEN_PUNCT: +#ifdef CCB_X_OBJC + if (token->character == '[') { + if (isobjc) { + ccb_ast_t* prevlist = ccb_parse_runtime_call_0(ccb, "__oop_literal_emptylist"); + ccb_ast_t* tmp; + while ((tmp = ccb_parse_expression(ccb)) != NULL) { + tmp = ccb_parse_pedanticbox(ccb, tmp); + prevlist = ccb_parse_runtime_call_2(ccb, "__oop_literal_listwith", prevlist, tmp); + if (ccb_lexer_ispunct(ccb, ccb_lexer_peek(ccb), ',')) { + ccb_lexer_next(ccb); // Discard the comma + } + } + ccb_parse_expect(ccb, ']'); + return prevlist; + } else { + return ccb_parse_objc_message(ccb); + } + } + else { + ccb_lexer_unget(ccb, attoken); + ccb_lexer_unget(ccb, token); + return NULL; + } +#else + ccb_lexer_unget(ccb, token); + return NULL; +#endif + default: + break; + } + + ccb_compile_error(ccb, "Internal error: parse_expression_primary"); + return NULL; +} + +static ccb_ast_t* ccb_parse_expression_subscript(ccb_t* ccb, ccb_ast_t* ast) { + ccb_ast_t* subscript = ccb_parse_expression(ccb); + ccb_parse_expect(ccb, ']'); + ccb_ast_t* node = ccb_ast_new_binary(ccb, '+', ast, subscript); + return ccb_ast_new_unary(ccb, CCB_AST_TYPE_DEREFERENCE, node->ctype->pointer, node); +} + +static ccb_data_type_t* ccb_parse_sizeoflike_type(ccb_t* ccb, const char* debugname, bool typename, bool allowvoid) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (typename && ccb_parse_type_check(ccb, token)) { + ccb_lexer_unget(ccb, token); + ccb_data_type_t* type; + ccb_parse_function_parameter(ccb, &type, NULL, true); + return type; + } + + if (ccb_lexer_ispunct(ccb, token, '(')) { + ccb_data_type_t* next = ccb_parse_sizeoflike_type(ccb, debugname, true, allowvoid); + ccb_parse_expect(ccb, ')'); + token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, '{')) { + ccb_parse_initializer_declaration(ccb, next); + ccb_parse_expect(ccb, '}'); + } + else { + ccb_lexer_unget(ccb, token); + } + return next; + } + + ccb_lexer_unget(ccb, token); + ccb_ast_t* expression = ccb_parse_expression_unary(ccb); + if (!allowvoid && expression->ctype->size == 0) + ccb_compile_error(ccb, "%s(void) illegal", debugname); + return expression->ctype; +} + +static ccb_ast_t* ccb_parse_expression_compound_literal(ccb_t* ccb, ccb_data_type_t* type) { + char* name = ccb_ast_label(ccb); + ccb_list_t* list = ccb_parse_initializer_declaration(ccb, type); + ccb_parse_expect(ccb, '}'); + + ccb_ast_t* node = ccb_ast_variable_local(ccb, type, name); + node->variable.init = list; + return node; +} + +static ccb_ast_t* ccb_parse_expression_unary_cast(ccb_t* ccb) { + ccb_data_type_t* basetype = ccb_parse_declaration_specification(ccb, NULL); + ccb_data_type_t* casttype = ccb_parse_declarator(ccb, NULL, basetype, NULL, CCB_CDECL_CAST); + + ccb_parse_expect(ccb, ')'); + + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, '{')) + return ccb_parse_expression_compound_literal(ccb, casttype); + ccb_lexer_unget(ccb, token); + + ccb_ast_t* expression = ccb_parse_expression_intermediate(ccb, 2); //ccb_parse_expression_unary(ccb); + return ccb_ast_new_unary(ccb, CCB_AST_TYPE_EXPRESSION_CAST, casttype, expression); +} + +static int ccb_parse_expression_unary_innerisfloat(ccb_t* ccb, ccb_data_type_t* t) { + switch (t->type) { + case CCB_TYPE_FLOAT: + case CCB_TYPE_DOUBLE: + case CCB_TYPE_LDOUBLE: + return 1; + default: + return 0; + } +} + +static int ccb_parse_expression_unary_innerispointer(ccb_t* ccb, ccb_data_type_t* t) { + switch (t->type) { + case CCB_TYPE_POINTER: + case CCB_TYPE_FUNCTION: + case CCB_TYPE_ID: + return 1; + default: + return 0; + } +} + +static int ccb_parse_expression_unary_innerisint(ccb_t* ccb, ccb_data_type_t* t) { + switch (t->type) { + case CCB_TYPE_CHAR: + case CCB_TYPE_SHORT: + case CCB_TYPE_INT: + case CCB_TYPE_LONG: + case CCB_TYPE_LLONG: + return 1; + default: + return 0; + } +} + +static int ccb_parse_expression_unary_innerissigned(ccb_t* ccb, ccb_data_type_t* t) { + switch (t->type) { + case CCB_TYPE_FLOAT: + case CCB_TYPE_DOUBLE: + case CCB_TYPE_LDOUBLE: + return 1; + case CCB_TYPE_CHAR: + case CCB_TYPE_SHORT: + case CCB_TYPE_INT: + case CCB_TYPE_LONG: + case CCB_TYPE_LLONG: + return t->sign ? 1 : 0; + default: + return 0; + } +} + +static ccb_ast_t* ccb_parse_expression_unary(ccb_t* ccb) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + + if (!token) + ccb_compile_error(ccb, "unexpected end of input"); + + if (ccb_parse_identifier_check(ccb, token, "sizeof")) { + return ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LONG], ccb_parse_sizeoflike_type(ccb, "sizeof", false, false)->size); + } + /* These builtins can be thought of as periphery versions of "sizeof", which instead of size just give basic information for debugging macros (). */ + if (ccb_parse_identifier_check(ccb, token, "__builtin_isfloat")) { + return ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LONG], ccb_parse_expression_unary_innerisfloat(ccb, ccb_parse_sizeoflike_type(ccb, "__builtin_isfloat", false, false))); + } + if (ccb_parse_identifier_check(ccb, token, "__builtin_isint")) { + return ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LONG], ccb_parse_expression_unary_innerisint(ccb, ccb_parse_sizeoflike_type(ccb, "__builtin_isint", false, false))); + } + if (ccb_parse_identifier_check(ccb, token, "__builtin_ispointer")) { + return ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LONG], ccb_parse_expression_unary_innerispointer(ccb, ccb_parse_sizeoflike_type(ccb, "__builtin_ispointer", false, false))); + } + if (ccb_parse_identifier_check(ccb, token, "__builtin_issigned")) { + return ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LONG], ccb_parse_expression_unary_innerisint(ccb, ccb_parse_sizeoflike_type(ccb, "__builtin_issigned", false, false))); + } + + if (ccb_lexer_ispunct(ccb, token, '(')) { + if (ccb_parse_type_check(ccb, ccb_lexer_peek(ccb))) + return ccb_parse_expression_unary_cast(ccb); + ccb_ast_t* next = ccb_parse_expression(ccb); + ccb_parse_expect(ccb, ')'); + // TODO: Check for complex function PTRCALL here or somewhere else? + return next; + } + if (ccb_lexer_ispunct(ccb, token, '&')) { + ccb_ast_t* operand = ccb_parse_expression_intermediate(ccb, 3); + if (operand->type == CCB_AST_TYPE_DEREFERENCE) { + return operand->unary.operand; + } + ccb_parse_semantic_lvalue(ccb, operand, true); + return ccb_ast_new_unary(ccb, CCB_AST_TYPE_ADDRESS, ccb_ast_pointer(ccb, operand->ctype), operand); + } + if (ccb_lexer_ispunct(ccb, token, '!')) { + ccb_ast_t* operand = ccb_parse_expression_intermediate(ccb, 3); + return ccb_ast_new_unary(ccb, '!', ccb_ast_data_table[CCB_AST_DATA_INT], operand); + } + if (ccb_lexer_ispunct(ccb, token, '-')) { + ccb_ast_t* ast = ccb_parse_expression_intermediate(ccb, 3); + return ccb_ast_new_binary(ccb, '-', ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_INT], 0), ast); + } + if (ccb_lexer_ispunct(ccb, token, '~')) { + ccb_ast_t* ast = ccb_parse_expression_intermediate(ccb, 3); + if (!ccb_ast_type_integer(ccb, ast->ctype)) + ccb_compile_error(ccb, "invalid expression `%s'", ccb_ast_string(ccb, ast)); + return ccb_ast_new_unary(ccb, '~', ast->ctype, ast); + } + if (ccb_lexer_ispunct(ccb, token, '*')) { + ccb_ast_t* operand = ccb_parse_expression_intermediate(ccb, 3); + switch (operand->type) { + case CCB_AST_TYPE_VAR_LOCAL: + case CCB_AST_TYPE_VAR_GLOBAL: + default: + //operand = ccb_ast_new_binary(ccb, '+', operand, ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_INT], 0)); + //default: + break; //ccb_compile_error(ccb, "invalid expression `%s'", ccb_ast_string(ccb, operand)); + } + ccb_data_type_t* type = ccb_ast_array_convert(ccb, operand->ctype); + + if (type->type != CCB_TYPE_POINTER) + ccb_compile_error(ccb, "expected pointer type, `%s' isn't pointer type", ccb_ast_string(ccb, operand)); + + return ccb_ast_new_unary(ccb, CCB_AST_TYPE_DEREFERENCE, operand->ctype->pointer, operand); + } + if (ccb_lexer_ispunct(ccb, token, CCB_LEXER_TOKEN_INCREMENT) + || ccb_lexer_ispunct(ccb, token, CCB_LEXER_TOKEN_DECREMENT)) + { + ccb_ast_t* next = ccb_parse_expression_intermediate(ccb, 3); + ccb_parse_semantic_lvalue(ccb, next, false); + int operand = ccb_lexer_ispunct(ccb, token, CCB_LEXER_TOKEN_INCREMENT) + ? CCB_AST_TYPE_PRE_INCREMENT + : CCB_AST_TYPE_PRE_DECREMENT; + return ccb_ast_new_unary(ccb, operand, next->ctype, next); + } +#ifdef CCB_X_OBJC + // @expressions may be supported in the future. Only raw literals (@"foo", @123, etc.) are supported right now + //if (ccb_lexer_ispunct(ccb, token, '@')) { + // ccb_ast_t *ast = ccb_parse_expression_intermediate(3); + // return ccb_ast_new_unary('@', ast->ctype, ast); + //} +#endif + + ccb_lexer_unget(ccb, token); + return ccb_parse_expression_primary(ccb); +} + +static ccb_ast_t* ccb_parse_expression_condition(ccb_t* ccb, ccb_ast_t* condition) { + ccb_ast_t* then = ccb_parse_expression(ccb); + ccb_parse_expect(ccb, ':'); + ccb_ast_t* last = ccb_parse_expression(ccb); + // TODO: This should probably use a similar mechanism as with binops to check result type + return ccb_ast_ternary(ccb, then->ctype, condition, then, last); +} + +static ccb_ast_t* ccb_parse_structure_field(ccb_t* ccb, ccb_ast_t* structure) { + if (structure->ctype->type != CCB_TYPE_STRUCTURE) + ccb_compile_error(ccb, "expected structure type, `%s' isn't structure type", ccb_ast_string(ccb, structure)); + ccb_lexer_token_t* name = ccb_lexer_next(ccb); + if (name->type != CCB_LEXER_TOKEN_IDENTIFIER) + ccb_compile_error(ccb, "expected field name, got `%s' instead", ccb_lexer_tokenstr(ccb, name)); + + ccb_data_type_t* field = ccb_table_find(structure->ctype->fields, name->string); + if (!field) + ccb_compile_error(ccb, "structure has no such field `%s'", ccb_lexer_tokenstr(ccb, name)); + return ccb_ast_structure_reference(ccb, field, structure, name->string); +} + +static int ccb_parse_operation_compound_operator(ccb_t* ccb, ccb_lexer_token_t* token) { + if (token->type != CCB_LEXER_TOKEN_PUNCT) + return 0; + + switch (token->punct) { + case CCB_LEXER_TOKEN_COMPOUND_RSHIFT: return CCB_LEXER_TOKEN_RSHIFT; + case CCB_LEXER_TOKEN_COMPOUND_LSHIFT: return CCB_LEXER_TOKEN_LSHIFT; + case CCB_LEXER_TOKEN_COMPOUND_ADD: return '+'; + case CCB_LEXER_TOKEN_COMPOUND_AND: return '&'; + case CCB_LEXER_TOKEN_COMPOUND_DIV: return '/'; + case CCB_LEXER_TOKEN_COMPOUND_MOD: return '%'; + case CCB_LEXER_TOKEN_COMPOUND_MUL: return '*'; + case CCB_LEXER_TOKEN_COMPOUND_OR: return '|'; + case CCB_LEXER_TOKEN_COMPOUND_SUB: return '-'; + case CCB_LEXER_TOKEN_COMPOUND_XOR: return '^'; + default: + return 0; + } + + return -1; +} + +static int ccb_parse_operation_reclassify(ccb_t* ccb, int punct) { + switch (punct) { + case CCB_LEXER_TOKEN_LSHIFT: return CCB_AST_TYPE_LSHIFT; + case CCB_LEXER_TOKEN_RSHIFT: return CCB_AST_TYPE_RSHIFT; + case CCB_LEXER_TOKEN_EQUAL: return CCB_AST_TYPE_EQUAL; + case CCB_LEXER_TOKEN_GEQUAL: return CCB_AST_TYPE_GEQUAL; + case CCB_LEXER_TOKEN_LEQUAL: return CCB_AST_TYPE_LEQUAL; + case CCB_LEXER_TOKEN_NEQUAL: return CCB_AST_TYPE_NEQUAL; + case CCB_LEXER_TOKEN_AND: return CCB_AST_TYPE_AND; + case CCB_LEXER_TOKEN_OR: return CCB_AST_TYPE_OR; + default: + break; + } + return punct; +} + +static bool ccb_parse_operation_integer_check(ccb_t* ccb, int operation) { + return operation == '^' + || operation == '%' + || operation == CCB_LEXER_TOKEN_LSHIFT + || operation == CCB_LEXER_TOKEN_RSHIFT; +} + +static ccb_ast_t* ccb_parse_expression_intermediate(ccb_t* ccb, int precision) { + ccb_ast_t* ast; + ccb_ast_t* next; + + if (!(ast = ccb_parse_expression_unary(ccb))) + return NULL; + + for (;;) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (token->type != CCB_LEXER_TOKEN_PUNCT) { + ccb_lexer_unget(ccb, token); + return ast; + } + + int pri = ccb_parse_operator_priority(ccb, token); + if (pri < 0 || precision <= pri) { + ccb_lexer_unget(ccb, token); + return ast; + } + + if (ccb_lexer_ispunct(ccb, token, '?')) { + ast = ccb_parse_expression_condition(ccb, ast); + continue; + } + + if (ccb_lexer_ispunct(ccb, token, '.')) { + ast = ccb_parse_structure_field(ccb, ast); + continue; + } + + if (ccb_lexer_ispunct(ccb, token, CCB_LEXER_TOKEN_ARROW)) { + if (ast->ctype->type != CCB_TYPE_POINTER) + ccb_compile_error(ccb, "Not a valid pointer type: %s", ccb_ast_string(ccb, ast)); + ast = ccb_ast_new_unary(ccb, CCB_AST_TYPE_DEREFERENCE, ast->ctype->pointer, ast); + ast = ccb_parse_structure_field(ccb, ast); + continue; + } + + if (ccb_lexer_ispunct(ccb, token, '[')) { + ast = ccb_parse_expression_subscript(ccb, ast); + continue; + } + + if (ccb_lexer_ispunct(ccb, token, CCB_LEXER_TOKEN_INCREMENT) || + ccb_lexer_ispunct(ccb, token, CCB_LEXER_TOKEN_DECREMENT)) { + + ccb_parse_semantic_lvalue(ccb, ast, false); + int operand = ccb_lexer_ispunct(ccb, token, CCB_LEXER_TOKEN_INCREMENT) + ? CCB_AST_TYPE_POST_INCREMENT + : CCB_AST_TYPE_POST_DECREMENT; + + ast = ccb_ast_new_unary(ccb, operand, ast->ctype, ast); + continue; + } + + int compound = ccb_parse_operation_compound_operator(ccb, token); + if (ccb_lexer_ispunct(ccb, token, '=') || compound) + ccb_parse_semantic_lvalue(ccb, ast, false); + + next = ccb_parse_expression_intermediate(ccb, pri + !!ccb_parse_semantic_rightassoc(ccb, token)); + if (!next) + ccb_compile_error(ccb, "Internal error: parse_expression_intermediate (next)"); + int operation = compound ? compound : token->punct; + int op = ccb_parse_operation_reclassify(ccb, operation); + + if (ccb_parse_operation_integer_check(ccb, op)) { + ccb_parse_semantic_integer(ccb, ast); + ccb_parse_semantic_integer(ccb, next); + } + + if (compound) + ast = ccb_ast_new_binary(ccb, '=', ast, ccb_ast_new_binary(ccb, op, ast, next)); + else + ast = ccb_ast_new_binary(ccb, op, ast, next); + } + return NULL; +} + +static ccb_ast_t* ccb_parse_expression(ccb_t* ccb) { + return ccb_parse_expression_intermediate(ccb, 16); +} + +static ccb_ast_t* ccb_parse_expression_withcomma(ccb_t* ccb) { + ccb_ast_t* lhs = ccb_parse_expression(ccb); + if (lhs == NULL) { + return NULL; + } + ccb_lexer_token_t* t; + /* Might as well use the comma operator to implement itself just for demonstration! */ + while (t = ccb_lexer_peek(ccb), ccb_lexer_ispunct(ccb, t, ',')) { + ccb_lexer_next(ccb); // Discard the comma + ccb_ast_t* rhs = ccb_parse_expression(ccb); + if (rhs == NULL) { + ccb_compile_error(ccb, "Expected expression following comma"); + return NULL; + } + lhs = ccb_ast_new_comma(ccb, CCB_AST_TYPE_EXPRESSION_COMMA, lhs, rhs); + } + return lhs; +} + +static bool ccb_parse_type_check(ccb_t* ccb, ccb_lexer_token_t* token) { + if (token->type != CCB_LEXER_TOKEN_IDENTIFIER) + return false; + + static const char* keywords[] = { + "char", "short", "int", "long", "float", "double", + "struct", "union", "signed", "unsigned", "enum", "void", + "typedef", "extern", "static", "auto", "register", "const", + "volatile", "inline", "restrict"/*, "__asm", "__naked"*/ + #ifdef CCB_X_OBJC + , "id" + #endif + }; + + for (int i = 0; i < sizeof(keywords) / sizeof(const char* /* NOTE: keywords[0] doesn't self-compile */); i++) + if (!strcmp(keywords[i], token->string)) + return true; + + if (ccb_table_find(ccb_parse_typedefs, token->string)) + return true; + + return false; +} + +/* struct / union */ +static char* ccb_parse_memory_tag(ccb_t* ccb) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (token->type == CCB_LEXER_TOKEN_IDENTIFIER) + return token->string; + ccb_lexer_unget(ccb, token); + return NULL; +} + +static int ccb_parse_memory_fields_padding(ccb_t* ccb, int offset, int size) { + size = CCB_MIN(size, (int)ccb_target_alignment(ccb)); + return (offset % size == 0) ? 0 : size - offset % size; +} + +static void ccb_parse_memory_fields_squash(ccb_t* ccb, ccb_table_t* table, ccb_data_type_t* unnamed, int offset) { + for (ccb_list_iterator_t* it = ccb_list_iterator(ccb_table_keys(unnamed->fields)); !ccb_list_iterator_end(it); ) { + char* name = ccb_list_iterator_next(it); + ccb_data_type_t* type = ccb_ast_type_copy(ccb, ccb_table_find(unnamed->fields, name)); + type->offset += offset; + ccb_table_insert(table, name, type); + } +} + +static ccb_table_t* ccb_parse_memory_fields(ccb_t* ccb, int* rsize, bool isstruct) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (!ccb_lexer_ispunct(ccb, token, '{')) { + ccb_lexer_unget(ccb, token); + return NULL; + } + + int offset = *rsize;// TODO: Check that this is always initialised correctly + int maxsize = 0; + ccb_table_t* table = ccb_table_create(NULL); + + for (;;) { + if (!ccb_parse_type_check(ccb, ccb_lexer_peek(ccb))) + break; + + ccb_data_type_t* basetype = ccb_parse_declaration_specification(ccb, NULL); + + if (basetype->type == CCB_TYPE_STRUCTURE && ccb_lexer_ispunct(ccb, ccb_lexer_peek(ccb), ';')) { + ccb_lexer_next(ccb); /* Skip */ + ccb_parse_memory_fields_squash(ccb, table, basetype, offset); + if (isstruct) + offset += basetype->size; + else + maxsize = CCB_MAX(maxsize, basetype->size); + continue; + } + + for (;;) { + char* name; + ccb_data_type_t* fieldtype = ccb_parse_declarator(ccb, &name, basetype, NULL, CCB_CDECL_PARAMETER); + + ccb_parse_semantic_notvoid(ccb, fieldtype); + + if (isstruct) { + //if (fieldtype->length < 1) {ccb_compile_error(ccb, "Field type length is not set (internal error)");} + //if (fieldtype->type == CCB_TYPE_ARRAY) { + // fprintf(stderr, "### SPECIAL HANDLING OF FIELD '%s' size %d length %d ptrsize %d\n", name, fieldtype->size, fieldtype->length, fieldtype->pointer->size); + // offset += ccb_parse_memory_fields_padding(ccb, offset, fieldtype->size / (fieldtype->length < 1 ? 1 : fieldtype->length)); //, fieldtype->pointer->size); + //} else { + offset += ccb_parse_memory_fields_padding(ccb, offset, fieldtype->size /* / (fieldtype->length < 1 ? 1 : fieldtype->length)*/); + //} + fieldtype = ccb_ast_structure_field(ccb, fieldtype, offset); + offset += fieldtype->size; + } + else { + maxsize = CCB_MAX(maxsize, fieldtype->size); + fieldtype = ccb_ast_structure_field(ccb, fieldtype, 0); + } + ccb_table_insert(table, name, fieldtype); + + token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, ',')) + continue; + + ccb_lexer_unget(ccb, token); + ccb_parse_expect(ccb, ';'); + break; + } + } + ccb_parse_expect(ccb, '}'); + *rsize = isstruct ? offset : maxsize; + return table; +} + +static ccb_data_type_t* ccb_parse_tag_definition(ccb_t* ccb, ccb_table_t* table, bool isstruct, int initialsize, char* pretag) { + char* tag = ((pretag == NULL) ? ccb_parse_memory_tag(ccb) : pretag); + int size = initialsize; + ccb_table_t* fields = ((size < 0) ? NULL : ccb_parse_memory_fields(ccb, &size, isstruct)); + ccb_data_type_t* r; + + if (tag) { + if (!(r = ccb_table_find(table, tag))) { + r = ccb_ast_structure_new(ccb, NULL, 0, isstruct); + ccb_table_insert(table, tag, r); + } + } + else { + r = ccb_ast_structure_new(ccb, NULL, 0, isstruct); + if (tag) + ccb_table_insert(table, tag, r); + } + + if (r && !fields) + return r; + + if (r && fields) { + r->fields = fields; + r->size = size; + + /*while (isstruct && r->size % ccb_target_alignment(ccb) != 0) { + r->size++; + }*/ + return r; + } + + return r; +} + +/* enum */ +static ccb_data_type_t* ccb_parse_enumeration(ccb_t* ccb) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (token->type == CCB_LEXER_TOKEN_IDENTIFIER) + token = ccb_lexer_next(ccb); + if (!ccb_lexer_ispunct(ccb, token, '{')) { + ccb_lexer_unget(ccb, token); + return ccb_ast_data_table[CCB_AST_DATA_INT]; + } + int accumulate = 0; + for (;;) { + token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, '}')) + break; + + if (token->type != CCB_LEXER_TOKEN_IDENTIFIER) + ccb_compile_error(ccb, "NOPE"); + + char* name = token->string; + token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, '=')) + accumulate = ccb_parse_evaluate(ccb, ccb_parse_expression(ccb)); + else + ccb_lexer_unget(ccb, token); + + ccb_ast_t* constval = ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_INT], accumulate++); + ccb_table_insert(ccb_ast_localenv ? ccb_ast_localenv : ccb_ast_globalenv, name, constval); + //fprintf(stderr, "INSERTED '%s' into %s\n", name, ccb_ast_localenv ? "locals" : "globals"); + token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, ',')) + continue; + if (ccb_lexer_ispunct(ccb, token, '}')) + break; + + ccb_compile_error(ccb, "NOPE!"); + } + return ccb_ast_data_table[CCB_AST_DATA_INT]; +} + +/* initializer */ +static void ccb_parse_assign_string(ccb_t* ccb, ccb_list_t* init, ccb_data_type_t* type, char* p, int offset) { + if (type->length == -1) + type->length = type->size = strlen(p) + 1; + + int i = 0; + for (; i < type->length && *p; i++) { + ccb_list_push( + init, + ccb_ast_initializer(ccb, + ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_CHAR], *p++), + ccb_ast_data_table[CCB_AST_DATA_CHAR], + offset + i + ) + ); + } + + for (; i < type->length; i++) { + ccb_list_push( + init, + ccb_ast_initializer(ccb, + ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_CHAR], 0), + ccb_ast_data_table[CCB_AST_DATA_CHAR], + offset + i + ) + ); + } +} + +static bool ccb_parse_brace_maybe(ccb_t* ccb) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, '{')) + return true; + ccb_lexer_unget(ccb, token); + return false; +} + +static void ccb_parse_commaskip_maybe(ccb_t* ccb) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (!ccb_lexer_ispunct(ccb, token, ',')) + ccb_lexer_unget(ccb, token); +} + +static void ccb_parse_brace_skipto(ccb_t* ccb) { + for (;;) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, '}')) + return; + + if (ccb_lexer_ispunct(ccb, token, '.')) { + ccb_lexer_next(ccb); + ccb_parse_expect(ccb, '='); + } + else { + ccb_lexer_unget(ccb, token); + } + + ccb_ast_t* ignore = ccb_parse_expression_intermediate(ccb, 3); + if (!ignore) + return; + + /* TODO aggregate warning */ + token = ccb_lexer_next(ccb); + if (!ccb_lexer_ispunct(ccb, token, ',')) + ccb_lexer_unget(ccb, token); + } +} + +static ccb_ast_t* ccb_parse_zero(ccb_t* ccb, ccb_data_type_t* type) { + return ccb_ast_type_floating(ccb, type) + ? ccb_ast_new_floating(ccb, ccb_ast_data_table[CCB_AST_DATA_DOUBLE], 0.0) + : ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_INT], 0); +} + +static void ccb_parse_initializer_list(ccb_t* ccb, ccb_list_t* init, ccb_data_type_t* type, int offset); +static void ccb_parse_initializer_element(ccb_t* ccb, ccb_list_t* init, ccb_data_type_t* type, int offset) { + if (type == NULL) { + ccb_compile_error(ccb, "ICE: %s (NULL type?)", __func__); + } + if (type->type == CCB_TYPE_ARRAY || type->type == CCB_TYPE_STRUCTURE) + ccb_parse_initializer_list(ccb, init, type, offset); + else { + ccb_ast_t* expression = ccb_parse_expression_intermediate(ccb, 3); + if (expression == NULL) { + ccb_compile_error(ccb, "ICE: %s (NULL expression?)", __func__); + } + ccb_ast_result_type(ccb, '=', type, expression->ctype); + ccb_list_push(init, ccb_ast_initializer(ccb, expression, type, offset)); + } +} + +static void ccb_parse_initializer_zero(ccb_t* ccb, ccb_list_t* init, ccb_data_type_t* type, int offset); // TODO: Predeclaration required for self-compile +static void ccb_parse_initializer_zero(ccb_t* ccb, ccb_list_t* init, ccb_data_type_t* type, int offset) { + if (type->type == CCB_TYPE_STRUCTURE) { + ccb_list_iterator_t* it = ccb_list_iterator(ccb_table_keys(type->fields)); + while (!ccb_list_iterator_end(it)) { + char* fieldname = ccb_list_iterator_next(it); + ccb_data_type_t* fieldtype = ccb_table_find(type->fields, fieldname); + + ccb_parse_initializer_zero(ccb, init, fieldtype, offset + fieldtype->offset); + + if (!type->isstruct) + return; + } + return; + } + + if (type->type == CCB_TYPE_ARRAY) { + for (int i = 0; i < type->length; i++) + ccb_parse_initializer_zero(ccb, init, type->pointer, offset + i * type->pointer->size); + return; + } + + ccb_list_push(init, ccb_ast_initializer(ccb, ccb_parse_zero(ccb, type), type, offset)); +} + +static void ccb_parse_initializer_structure(ccb_t* ccb, ccb_list_t* init, ccb_data_type_t* type, int offset) { + bool brace = ccb_parse_brace_maybe(ccb); + ccb_list_iterator_t* it = ccb_list_iterator(ccb_table_keys(type->fields)); + ccb_table_t* wrote = ccb_table_create(NULL); + + for (;;) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, '}')) { + if (!brace) + ccb_lexer_unget(ccb, token); + goto complete; + } + + char* fieldname; + ccb_data_type_t* fieldtype; + + if (ccb_lexer_ispunct(ccb, token, '.')) { + if (!(token = ccb_lexer_next(ccb)) || token->type != CCB_LEXER_TOKEN_IDENTIFIER) + ccb_compile_error(ccb, "invalid designated initializer"); + fieldname = token->string; + if (!(fieldtype = ccb_table_find(type->fields, fieldname))) + ccb_compile_error(ccb, "field doesn't exist in designated initializer"); + + ccb_parse_expect(ccb, '='); + + it = ccb_list_iterator(ccb_table_keys(type->fields)); + while (!ccb_list_iterator_end(it)) + if (!strcmp(fieldname, ccb_list_iterator_next(it))) + break; + } + else { + ccb_lexer_unget(ccb, token); + if (ccb_list_iterator_end(it)) + break; + + fieldname = ccb_list_iterator_next(it); + fieldtype = ccb_table_find(type->fields, fieldname); + } + if (ccb_table_find(wrote, fieldname)) + ccb_compile_error(ccb, "field initialized twice in designated initializer"); + ccb_table_insert(wrote, fieldname, (void*)1); + ccb_parse_initializer_element(ccb, init, fieldtype, offset + fieldtype->offset); + ccb_parse_commaskip_maybe(ccb); + + if (!type->isstruct) + break; + } + if (brace) + ccb_parse_brace_skipto(ccb); + +complete: + it = ccb_list_iterator(ccb_table_keys(type->fields)); + while (!ccb_list_iterator_end(it)) { + char* fieldname = ccb_list_iterator_next(it); + if (ccb_table_find(wrote, fieldname)) + continue; + ccb_data_type_t* fieldtype = ccb_table_find(type->fields, fieldname); + ccb_parse_initializer_zero(ccb, init, fieldtype, offset + fieldtype->offset); + } +} + +static void ccb_parse_initializer_array(ccb_t* ccb, ccb_list_t* init, ccb_data_type_t* type, int offset) { + bool brace = ccb_parse_brace_maybe(ccb); + int size = type->pointer->size; + int i; + + for (i = 0; type->length == -1 || i < type->length; i++) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, '}')) { + if (!brace) + ccb_lexer_unget(ccb, token); + goto complete; + } + ccb_lexer_unget(ccb, token); + ccb_parse_initializer_element(ccb, init, type->pointer, offset + size * i); + ccb_parse_commaskip_maybe(ccb); + } + if (brace) + ccb_parse_brace_skipto(ccb); + +complete: + if (type->length == -1) { + type->length = i; + type->size = size * i; + } + + int testn1 = -1; + + printf("Type reports length as %ld, -1 is %ld i is %d\n", type->length, -1, i); + + int itl = type->length; + /*if (((int) type->length) == -1) { + return; // TODO: This is required due to condition below failing in self-compilation! + }*/ + + /*for (; i < type->length; i++)*/ while(itl != (int)-1 && i < itl) { + //printf("Parsing zero %d\n", i); + ccb_parse_initializer_zero(ccb, init, type->pointer, offset + size * i); + i++; + } +} + +static void ccb_parse_initializer_list(ccb_t* ccb, ccb_list_t* init, ccb_data_type_t* type, int offset) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (type->type == CCB_TYPE_ARRAY && type->pointer->type == CCB_TYPE_CHAR) { + if (token->type == CCB_LEXER_TOKEN_STRING) { + ccb_parse_assign_string(ccb, init, type, token->string, offset); + return; + } + + if (ccb_lexer_ispunct(ccb, token, '{') && ccb_lexer_peek(ccb)->type == CCB_LEXER_TOKEN_STRING) { + ccb_parse_assign_string(ccb, init, type, token->string, offset); + ccb_parse_expect(ccb, '}'); + return; + } + } + ccb_lexer_unget(ccb, token); + + if (type->type == CCB_TYPE_ARRAY) + ccb_parse_initializer_array(ccb, init, type, offset); + else if (type->type == CCB_TYPE_STRUCTURE) + ccb_parse_initializer_structure(ccb, init, type, offset); + else + ccb_compile_error(ccb, "ICE"); +} + +static ccb_list_t* ccb_parse_initializer_declaration(ccb_t* ccb, ccb_data_type_t* type) { + ccb_list_t* list = ccb_list_create(); + if (type->type == CCB_TYPE_ARRAY || type->type == CCB_TYPE_STRUCTURE) + ccb_parse_initializer_list(ccb, list, type, 0); + else + ccb_list_push(list, ccb_ast_initializer(ccb, ccb_parse_expression(ccb), type, 0)); + return list; +} + +/* declarator */ +static ccb_data_type_t* ccb_parse_declaration_specification(ccb_t* ccb, ccb_storage_t* rstorage) { + ccb_storage_t storage = 0; + ccb_lexer_token_t* token = ccb_lexer_peek(ccb); + if (!token || token->type != CCB_LEXER_TOKEN_IDENTIFIER) + ccb_compile_error(ccb, "internal error in declaration specification parsing"); + + /* + * large declaration specification state machine: + * There is six pieces of state to the following state machine + * for dealing with all the permutations of declaration + * specification. + * + * 1: The type, most common of course, this is the "base type" + * of the declaration. + * + * 2: The size, in C, types are also size specifiers on types, + * e.g short int, long int, long long int, act as 'sizes'. + * Short and long are not technically types, people who use + * them as is without a type associated with them (like unsigned) + * concludes implicit int. There is no situation where a size + * specifier would couple with anything but an int type. It + * should be noted that there has to be an "unsized" state for + * types on their own. + * + * 3: The Signness/signature, for knowing if the declaration is + * signed or unsigned. This isn't actually a boolean state because + * there needs to be an unsignness state since the char type is + * allowed to have it's signeness implementation-defined. + * + * 4: constantness + * self explanatory + * 5: vollatileness + * self explanatory + * 6: inlineness + * self explanatory + * + * 7: user (can include redundant partial specification), e.g + * typedef unsigned int foo; signed foo; <--- what to do? + * this also includes enums, unions, and structures. + */ + // TODO: Get rid of these enums (unclear if they apply locally or globally) + enum { + kvoid = 1, + kchar, + kint, + kfloat, + kdouble +#ifdef CCB_X_OBJC + , kid +#endif + } type = 0; + + enum { + kunsize, + kshort, + klong, + kllong + } size = kunsize; + + enum { + ksigned = 1, + kunsigned + } signature = 0; + +#ifdef FUCC_USEUNUSED + bool __attribute__((unused)) kconst = false; + bool __attribute__((unused)) kvolatile = false; + bool __attribute__((unused)) kinline = false; +#else + bool kconst = false; + bool kvolatile = false; + bool kinline = false; +#endif + + ccb_data_type_t* user = NULL; + ccb_data_type_t* find = NULL; + +#define ccb_set_uncheck(STATE, VALUE) \ + do { \ + STATE = VALUE; \ + } while (0) + +#define ccb_set_check(STATE, VALUE) \ + do { \ + if (STATE != 0) { \ + goto state_machine_error; \ + } \ + ccb_set_uncheck(STATE, VALUE); \ + } while (0) + +#define ccb_set_state(STATE, VALUE) \ + do { \ + ccb_set_check(STATE, VALUE); \ + switch (size) { \ + case kshort: \ + if (type != 0 && type != kint) \ + goto state_machine_error; \ + break; \ + case klong: \ + if (type != 0 && type != kint && type != kdouble) \ + goto state_machine_error; \ + break; \ + default: \ + break; \ + } \ + if (signature != 0) { \ + switch (type) { \ + case kvoid: \ + case kfloat: \ + case kdouble: \ + goto state_machine_error; \ + break; \ + default: \ + break; \ + } \ + } \ + if (user && (type != 0 || size != 0 || signature != 0)) \ + goto state_machine_error; \ + } while (0) + +#define ccb_set_class(VALUE) \ + do { \ + ccb_set_check(storage, VALUE); \ + } while (0) + +#define ccb_state_machine_try(THING) \ + if (!strcmp(token->string, THING)) + + for (;;) { + token = ccb_lexer_next(ccb); + if (!token) + ccb_compile_error(ccb, "type specification with unexpected ending"); + + if (token->type != CCB_LEXER_TOKEN_IDENTIFIER) { + ccb_lexer_unget(ccb, token); + break; + } + + ccb_state_machine_try("const") kconst = true; + else ccb_state_machine_try("volatile") kvolatile = true; + else ccb_state_machine_try("inline") kinline = true; + + else ccb_state_machine_try("typedef") ccb_set_class(CCB_STORAGE_TYPEDEF); + else ccb_state_machine_try("extern") ccb_set_class(CCB_STORAGE_EXTERN); + else ccb_state_machine_try("static") ccb_set_class(CCB_STORAGE_STATIC); + else ccb_state_machine_try("auto") ccb_set_class(CCB_STORAGE_AUTO); + else ccb_state_machine_try("register") ccb_set_class(CCB_STORAGE_REGISTER); + + else ccb_state_machine_try("void") ccb_set_state(type, kvoid); + else ccb_state_machine_try("char") ccb_set_state(type, kchar); + else ccb_state_machine_try("int") ccb_set_state(type, kint); + else ccb_state_machine_try("float") ccb_set_state(type, kfloat); + else ccb_state_machine_try("double") ccb_set_state(type, kdouble); +#ifdef CCB_X_OBJC + else ccb_state_machine_try("id") ccb_set_state(type, kid); +#endif + + else ccb_state_machine_try("signed") ccb_set_state(signature, ksigned); + else ccb_state_machine_try("unsigned") ccb_set_state(signature, kunsigned); + else ccb_state_machine_try("short") ccb_set_state(size, kshort); + + else ccb_state_machine_try("struct") ccb_set_state(user, ccb_parse_tag_definition(ccb, ccb_ast_structures, true, 0, NULL)); + else ccb_state_machine_try("union") ccb_set_state(user, ccb_parse_tag_definition(ccb, ccb_ast_unions, false, 0, NULL)); + else ccb_state_machine_try("enum") ccb_set_state(user, ccb_parse_enumeration(ccb)); + else ccb_state_machine_try("long") { + switch (size) { + case kunsize: + ccb_set_state(size, klong); + break; + case klong: + ccb_set_uncheck(size, kllong); + break; + default: + goto state_machine_error; + } + } + else if ((find = ccb_table_find(ccb_parse_typedefs, token->string))) { + ccb_set_state(user, find); + } + else { + ccb_lexer_unget(ccb, token); + break; + } + +#undef ccb_set_check +#undef ccb_set_class +#undef ccb_set_state +#undef ccb_set_uncheck + } + + if (rstorage) + *rstorage = storage; + + if (user) + return user; + + switch (type) { + case kvoid: + return ccb_ast_data_table[CCB_AST_DATA_VOID]; + case kchar: + return ccb_ast_type_create(ccb, CCB_TYPE_CHAR, signature != kunsigned); + case kfloat: + return ccb_ast_type_create(ccb, CCB_TYPE_FLOAT, false); + case kdouble: + return ccb_ast_type_create(ccb, + (size == klong) + ? CCB_TYPE_LDOUBLE + : CCB_TYPE_DOUBLE, + false + ); +#ifdef CCB_X_OBJC + case kid: + return ccb_ast_data_table[CCB_AST_DATA_ID]; +#endif + default: + break; + } + + switch (size) { + case kshort: + return ccb_ast_type_create(ccb, CCB_TYPE_SHORT, signature != kunsigned); + case klong: + return ccb_ast_type_create(ccb, CCB_TYPE_LONG, signature != kunsigned); + case kllong: + return ccb_ast_type_create(ccb, CCB_TYPE_LLONG, signature != kunsigned); + default: + /* + * You also need to deal with implicit int given the right + * conditions of the state machine. + */ + return ccb_ast_type_create(ccb, CCB_TYPE_INT, signature != kunsigned); + } + + ccb_compile_error(ccb, "State machine error (BAD)"); +state_machine_error: + ccb_compile_error(ccb, "State machine error (GOOD)"); + + return NULL; +} + +static ccb_data_type_t* ccb_parse_array_dimensions_intermediate(ccb_t* ccb, ccb_data_type_t* basetype); // TODO: Predeclaration required for self-compilation +static ccb_data_type_t* ccb_parse_array_dimensions_intermediate(ccb_t* ccb, ccb_data_type_t* basetype) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (!ccb_lexer_ispunct(ccb, token, '[')) { + ccb_lexer_unget(ccb, token); + return NULL; + } + + int dimension = -1; + if (!ccb_lexer_ispunct(ccb, ccb_lexer_peek(ccb), ']')) + dimension = ccb_parse_evaluate(ccb, ccb_parse_expression(ccb)); + + ccb_parse_expect(ccb, ']'); + ccb_data_type_t* next = ccb_parse_array_dimensions_intermediate(ccb, basetype); + if (next) { + if (next->length == -1 && dimension == -1) + ccb_compile_error(ccb, "Internal error: parse_array_dimensions_intermediate (2)"); + return ccb_ast_array(ccb, next, dimension); + } + return ccb_ast_array(ccb, basetype, dimension); +} + +static ccb_data_type_t* ccb_parse_array_dimensions(ccb_t* ccb, ccb_data_type_t* basetype) { + ccb_data_type_t* data = ccb_parse_array_dimensions_intermediate(ccb, basetype); + return (data) ? data : basetype; +} + +static void ccb_parse_function_parameter(ccb_t* ccb, ccb_data_type_t** rtype, char** name, bool next) { + ccb_data_type_t* basetype; + ccb_storage_t storage; + + basetype = ccb_parse_declaration_specification(ccb, &storage); + basetype = ccb_parse_declarator(ccb, name, basetype, NULL, next ? CCB_CDECL_TYPEONLY : CCB_CDECL_PARAMETER); + *rtype = ccb_parse_array_dimensions(ccb, basetype); +} + +static ccb_ast_t* ccb_parse_statement_if(ccb_t* ccb) { + ccb_lexer_token_t* token; + ccb_ast_t* cond; + ccb_ast_t* then; + ccb_ast_t* last; + + ccb_parse_expect(ccb, '('); + cond = ccb_parse_expression_withcomma(ccb); + ccb_parse_expect(ccb, ')'); + + + then = ccb_parse_statement(ccb); + token = ccb_lexer_next(ccb); + + if (!token || token->type != CCB_LEXER_TOKEN_IDENTIFIER || strcmp(token->string, "else")) { + ccb_lexer_unget(ccb, token); + return ccb_ast_if(ccb, cond, then, NULL); + } + + last = ccb_parse_statement(ccb); + return ccb_ast_if(ccb, cond, then, last); +} + +static ccb_ast_t* ccb_parse_statement_declaration_semicolon(ccb_t* ccb) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, ';')) + return NULL; + ccb_lexer_unget(ccb, token); + ccb_list_t* list = ccb_list_create(); + ccb_parse_statement_declaration(ccb, list); + return ccb_list_shift(list); +} + +static ccb_ast_t* ccb_parse_expression_semicolon(ccb_t* ccb) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, ';')) + return NULL; + ccb_lexer_unget(ccb, token); + ccb_ast_t* read = ccb_parse_expression_withcomma(ccb); + ccb_parse_expect(ccb, ';'); + return read; +} + +static ccb_ast_t* ccb_parse_statement_for(ccb_t* ccb) { + ccb_parse_expect(ccb, '('); + ccb_ast_localenv = ccb_table_create(ccb_ast_localenv); + ccb_ast_t* init = ccb_parse_statement_declaration_semicolon(ccb); + ccb_ast_t* cond = ccb_parse_expression_semicolon(ccb); + ccb_ast_t* step = ccb_lexer_ispunct(ccb, ccb_lexer_peek(ccb), ')') ? NULL : ccb_parse_expression_withcomma(ccb); + ccb_parse_expect(ccb, ')'); + ccb_ast_t* body = ccb_parse_statement(ccb); + ccb_ast_localenv = ccb_table_parent(ccb_ast_localenv); + return ccb_ast_for(ccb, init, cond, step, body); +} + +static ccb_ast_t* ccb_parse_statement_while(ccb_t* ccb) { + ccb_parse_expect(ccb, '('); + ccb_ast_t* cond = ccb_parse_expression_withcomma(ccb); + ccb_parse_expect(ccb, ')'); + ccb_ast_t* body = ccb_parse_statement(ccb); + return ccb_ast_while(ccb, cond, body); +} + +static ccb_ast_t* ccb_parse_statement_do(ccb_t* ccb) { + ccb_ast_t* body = ccb_parse_statement(ccb); + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + + if (!ccb_parse_identifier_check(ccb, token, "while")) + ccb_compile_error(ccb, "expected while for do"); + + ccb_parse_expect(ccb, '('); + ccb_ast_t* cond = ccb_parse_expression_withcomma(ccb); + ccb_parse_expect(ccb, ')'); + ccb_parse_expect(ccb, ';'); + + return ccb_ast_do(ccb, cond, body); +} + +static ccb_ast_t* ccb_parse_statement_break(ccb_t* ccb) { + ccb_parse_expect(ccb, ';'); + return ccb_ast_make(ccb, CCB_AST_TYPE_STATEMENT_BREAK); +} + +static ccb_ast_t* ccb_parse_statement_continue(ccb_t* ccb) { + ccb_parse_expect(ccb, ';'); + return ccb_ast_make(ccb, CCB_AST_TYPE_STATEMENT_CONTINUE); +} + +static ccb_ast_t* ccb_parse_statement_case(ccb_t* ccb) { + int value = ccb_parse_evaluate(ccb, ccb_parse_expression(ccb)); + ccb_parse_expect(ccb, ':'); + return ccb_ast_case(ccb, value); +} + +static ccb_ast_t* ccb_parse_statement_switch(ccb_t* ccb) { + ccb_parse_expect(ccb, '('); + ccb_ast_t* expression = ccb_parse_expression_withcomma(ccb); + + /* TODO lvalueness test propogate?*/ + + ccb_parse_expect(ccb, ')'); + + /* Some special handling is added for switch statements leading directly into some case/s with no {...} */ + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + bool wascase = ccb_parse_identifier_check(ccb, token, "case"); + if (wascase) { + /* If we go straight into some case/s, then we basically create an implicit compound statement assembling + * those cases and the following statement. The main difference between the result and a normal switch + * using a compound statement is that we don't create a new scope for the statement. + */ + ccb_list_t* statements = ccb_list_create(); + while (wascase) { + ccb_ast_t* casestmt = ccb_parse_statement_case(ccb); + ccb_list_push(statements, casestmt); + + token = ccb_lexer_next(ccb); + wascase = ccb_parse_identifier_check(ccb, token, "case"); + + if (!wascase) { + ccb_lexer_unget(ccb, token); + ccb_ast_t* stmt = ccb_parse_statement(ccb); + ccb_list_push(statements, stmt); + + return ccb_ast_switch(ccb, expression, ccb_ast_compound(ccb, statements)); + } + } + } else { + /* Otherwise we just unget the (non-"case") token and parse a normal/compound statement. */ + ccb_lexer_unget(ccb, token); + ccb_ast_t* body = ccb_parse_statement(ccb); + return ccb_ast_switch(ccb, expression, body); + } +} + +static ccb_ast_t* ccb_parse_statement_default(ccb_t* ccb) { + ccb_parse_expect(ccb, ':'); + return ccb_ast_make(ccb, CCB_AST_TYPE_STATEMENT_DEFAULT); +} + +static ccb_ast_t* ccb_parse_statement_return(ccb_t* ccb) { + if (ccb->func_callconv != 0) { + ccb_compile_warn(ccb, "Returning from non-standard calling convention %d", ccb->func_callconv); + } + ccb_ast_t* val = ccb_parse_expression(ccb); + ccb_parse_expect(ccb, ';'); + return ccb_ast_return(ccb, ccb->func_callconv, ccb_ast_data_table[CCB_AST_DATA_FUNCTION]->returntype, val); +} + +static ccb_ast_t* ccb_parse_statement_goto(ccb_t* ccb) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (!token || token->type != CCB_LEXER_TOKEN_IDENTIFIER) + ccb_compile_error(ccb, "expected identifier in goto statement"); + ccb_parse_expect(ccb, ';'); + + ccb_ast_t* node = ccb_ast_goto(ccb, token->string); + ccb_list_push(ccb_ast_gotos, node); + + return node; +} + +static ccb_ast_t* ccb_parse_statement_asm(ccb_t* ccb) { + int mode = 0; + ccb_list_t* l = ccb_list_create(); + char* line = ccb_lexer_consume_line(ccb, &mode); + fprintf(stderr, "MODE %d CODE: %s\n", mode, line); + ccb_list_push(l, line); + if (mode == 0) { + // No action, just a single asm line + } else if (mode == 1) { // Line starts with '{' + // Runs until mode == -1 ('}' line) or prints error if not 0 or 1 + do { + line = ccb_lexer_consume_line(ccb, &mode); + fprintf(stderr, "MODE %d CODE: %s\n", mode, line); + ccb_list_push(l, line); + } while (mode == 0); + if (mode != -1) { + ccb_compile_error(ccb, "Invalid asm line (likely stray '{'?)"); + } + } else { + ccb_compile_error(ccb, "Invalid asm line (likely stray '}')?"); + } + + ccb_ast_t* node = ccb_ast_asm(ccb, l); + return node; +} + +static void ccb_parse_label_backfill(ccb_t* ccb) { + for (ccb_list_iterator_t* it = ccb_list_iterator(ccb_ast_gotos); !ccb_list_iterator_end(it); ) { + ccb_ast_t* source = ccb_list_iterator_next(it); + char* label = source->gotostmt.label; + ccb_ast_t* destination = ccb_table_find(ccb_ast_labels, label); + + if (!destination) + ccb_compile_error(ccb, "undefined label: %s", label); + if (destination->gotostmt.where) + source->gotostmt.where = destination->gotostmt.where; + else + source->gotostmt.where = destination->gotostmt.where = ccb_ast_label(ccb); + + //fprintf(stderr, "Gave goto '%s' the target label '%s'.\n", source->gotostmt.label, source->gotostmt.where); + } +} + +static ccb_ast_t* ccb_parse_label(ccb_t* ccb, ccb_lexer_token_t* token) { + ccb_parse_expect(ccb, ':'); + char* label = token->string; + ccb_ast_t* node = ccb_ast_new_label(ccb, label); + + if (ccb_table_find(ccb_ast_labels, label)) + ccb_compile_error(ccb, "duplicate label: %s", label); + ccb_table_insert(ccb_ast_labels, label, node); + + return node; +} + +#ifdef CCB_X_OBJC +static ccb_ast_t* ccb_parse_statement_objc_throw(ccb_t* ccb) { + ccb_ast_t* val = ccb_parse_expression(ccb); + ccb_parse_expect(ccb, ';'); + return ccb_parse_runtime_call_1(ccb, "__oop_throw", val); +} +#endif + +static ccb_ast_t* ccb_parse_statement(ccb_t* ccb) { + //__ccb_checkstack(); + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + ccb_ast_t* ast; + /*fputs("TESTA\n", stderr); + fprintf(stderr, "TESTING\n");*/ + + if (ccb_lexer_ispunct(ccb, token, '{')) return ccb_parse_statement_compound(ccb); +#ifdef CCB_X_OBJC + if (ccb_lexer_ispunct(ccb, token, '@')) { + ccb_lexer_token_t* attoken = token; + token = ccb_lexer_next(ccb); + if (ccb_parse_identifier_check(ccb, token, "throw")) return ccb_parse_statement_objc_throw(ccb); + ccb_lexer_unget(ccb, token); + } +#endif + if (ccb_parse_identifier_check(ccb, token, "if")) return ccb_parse_statement_if(ccb); + if (ccb_parse_identifier_check(ccb, token, "for")) return ccb_parse_statement_for(ccb); + if (ccb_parse_identifier_check(ccb, token, "while")) return ccb_parse_statement_while(ccb); + if (ccb_parse_identifier_check(ccb, token, "do")) return ccb_parse_statement_do(ccb); + if (ccb_parse_identifier_check(ccb, token, "return")) return ccb_parse_statement_return(ccb); + if (ccb_parse_identifier_check(ccb, token, "switch")) return ccb_parse_statement_switch(ccb); + if (ccb_parse_identifier_check(ccb, token, "case")) return ccb_parse_statement_case(ccb); + if (ccb_parse_identifier_check(ccb, token, "default")) return ccb_parse_statement_default(ccb); + if (ccb_parse_identifier_check(ccb, token, "break")) return ccb_parse_statement_break(ccb); + if (ccb_parse_identifier_check(ccb, token, "continue")) return ccb_parse_statement_continue(ccb); + if (ccb_parse_identifier_check(ccb, token, "goto")) return ccb_parse_statement_goto(ccb); + if (ccb_parse_identifier_check(ccb, token, "__asm")) return ccb_parse_statement_asm(ccb); + + if (token->type == CCB_LEXER_TOKEN_IDENTIFIER && ccb_lexer_ispunct(ccb, ccb_lexer_peek(ccb), ':')) + return ccb_parse_label(ccb, token); + + ccb_lexer_unget(ccb, token); + + ast = ccb_parse_expression_withcomma(ccb); + ccb_parse_expect(ccb, ';'); + + return ast; +} + +int __ccb_checkstack(){ + #ifdef _ZCC_X64 + int* a = NULL; /* Should be unaligned. */ + int* b = NULL; /* Should be aligned. */ + if ((((long long) &b) % 16) != 0) { + fputs("ERROR: Stack unaligned in caller!\n", stderr); + /* Should trigger an error or loop. */ + while (a[0]) { + // ... + } + } else { + fputs("Stack is ok\n", stderr); + } + #endif + return 0; +} +int __ccb_ignore(int x) { + return x; +} +int __ccb_ignore2(int x, int y) { + return x; +} + + +static void ccb_parse_statement_declaration(ccb_t* ccb, ccb_list_t* list) { + /*fprintf(stderr, "TEST2\n"); + __ccb_checkstack(); + __ccb_ignore(__ccb_checkstack()); + __ccb_ignore2(33, __ccb_checkstack()); + __ccb_ignore2(__ccb_checkstack(), 99); + fprintf(stderr, "TESTING\n");*/ + ccb_lexer_token_t* token = ccb_lexer_peek(ccb); + //int test = 1; + if (!token) + ccb_compile_error(ccb, "statement declaration with unexpected ending"); + if (ccb_parse_type_check(ccb, token)) + ccb_parse_declaration(ccb, list, &ccb_ast_variable_local); // TODO: Should compiler allow function pointers w/o the "&" like gcc does? + else + ccb_list_push(list, ccb_parse_statement(ccb)); +} + +static ccb_ast_t* ccb_parse_statement_compound(ccb_t* ccb) { + /*__ccb_checkstack(); + fprintf(stderr, "TESTING\n");*/ + ccb_ast_localenv = ccb_table_create(ccb_ast_localenv); + ccb_list_t* statements = ccb_list_create(); + for (;;) { + /* Check for end before checking for inner statements, in order to handle {} without any statements. */ + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, '}')) + break; + + ccb_lexer_unget(ccb, token); + + ccb_parse_statement_declaration(ccb, statements); + } + ccb_ast_localenv = ccb_table_parent(ccb_ast_localenv); + return ccb_ast_compound(ccb, statements); +} + +static int ccb_argbuiltin_max(ccb_t* ccb) { + return 16; // This might be configurable in the future. +} + +static const char* ccb_argbuiltin(ccb_t* ccb, int argnum) { + switch (argnum) { + case 0: + return "__builtin_arg0"; + case 1: + return "__builtin_arg1"; + case 2: + return "__builtin_arg2"; + case 3: + return "__builtin_arg3"; + case 4: + return "__builtin_arg4"; + case 5: + return "__builtin_arg5"; + case 6: + return "__builtin_arg6"; + case 7: + return "__builtin_arg7"; + case 8: + return "__builtin_arg8"; + case 9: + return "__builtin_arg9"; + case 10: + return "__builtin_arg10"; + case 11: + return "__builtin_arg11"; + case 12: + return "__builtin_arg12"; + case 13: + return "__builtin_arg13"; + case 14: + return "__builtin_arg14"; + case 15: + return "__builtin_arg15"; + default: + return NULL; + } +} + +/* Returns the name of the buffer referenced by undefined argbuiltins. + * If non-NULL, then this must be defined anywhere __builtin_arg0 and similar are used. + * If it's NULL, then undefined __builtin_args are simply never defined. + */ +static const char* ccb_argbuiltin_buffername(ccb_t* ccb) { + return NULL; +} + +static ccb_data_type_t* ccb_parse_function_parameters(ccb_t* ccb, ccb_list_t* paramvars, ccb_data_type_t* returntype) { + bool typeonly = !paramvars; + ccb_list_t* paramtypes = ccb_list_create(); + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + ccb_lexer_token_t* next = ccb_lexer_next(ccb); + + if (ccb_parse_identifier_check(ccb, token, "void") && ccb_lexer_ispunct(ccb, next, ')')) + return ccb_ast_prototype(ccb, returntype, paramtypes, false); + ccb_lexer_unget(ccb, next); + if (ccb_lexer_ispunct(ccb, token, ')')) + return ccb_ast_prototype(ccb, returntype, paramtypes, true); + ccb_lexer_unget(ccb, token); + + int paramcount; + + for (paramcount = 0;; paramcount++) { + token = ccb_lexer_next(ccb); + if (ccb_parse_identifier_check(ccb, token, "...")) { + if (ccb_list_length(paramtypes) == 0) + ccb_compile_error(ccb, "ICE: %s (0)", __func__); + ccb_parse_expect(ccb, ')'); + return ccb_ast_prototype(ccb, returntype, paramtypes, true); + } + else { + ccb_lexer_unget(ccb, token); + } + + ccb_data_type_t* ptype; + char* name; + ccb_parse_function_parameter(ccb, &ptype, &name, typeonly); + ccb_parse_semantic_notvoid(ccb, ptype); + if (ptype->type == CCB_TYPE_ARRAY) + ptype = ccb_ast_pointer(ccb, ptype->pointer); + ccb_list_push(paramtypes, ptype); + + if (!typeonly) { + void* vardef = ccb_ast_variable_local(ccb, ptype, name); + ccb_list_push(paramvars, vardef); + const char* builtinalias = ccb_argbuiltin(ccb, paramcount); + if (builtinalias != NULL /*paramcount == 0*/) { + /* Create a __builtin alias for the first argument. + * This is useful for iterating the arguments via macros, i.e. for a va_start macro to see where the last argument is relative to the first argument. + * In this implementation, it will conveniently be undefined unless a first argument is actually defined! Undefined __builtin_arg variables can be automatically simulated at lookup. + */ + ccb_table_insert(ccb_ast_localenv, builtinalias /*"__builtin_arg0"*/, vardef); + } + } + + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, ')')) + return ccb_ast_prototype(ccb, returntype, paramtypes, false); + + if (!ccb_lexer_ispunct(ccb, token, ',')) + ccb_compile_error(ccb, "ICE: %s (2)", __func__); + } + + /* Add any additional builtins. These are now mostly handled at-lookup. */ + //const char* buffername = ccb_argbuiltin_buffername(ccb); + //if (buffername != NULL) { + // int aliascount; + // for (aliascount = paramcount; aliascount < ccb_argbuiltin_max(ccb); aliascount++) { + // const char* builtinalias = ccb_argbuiltin(ccb, aliascount); + // ccb_table_insert(ccb_ast_localenv, builtinalias /*"__builtin_arg0"*/, bufferdef); + // } + //} +} + +static ccb_ast_t* ccb_parse_function_definition(ccb_t* ccb, ccb_data_type_t* functype, char* name, ccb_list_t* parameters, int asmstmt) { + ccb_ast_localenv = ccb_table_create(ccb_ast_localenv); + ccb_ast_locals = ccb_list_create(); + ccb_ast_data_table[CCB_AST_DATA_FUNCTION] = functype; + + ccb_ast_t* body; + if (asmstmt) { + //ccb_lexer_next(ccb); + body = ccb_parse_statement_asm(ccb); + } else { + body = ccb_parse_statement_compound(ccb); + } + ccb_ast_t* r = ccb_ast_function(ccb, functype, name, parameters, body, ccb_ast_locals); + + ccb_table_insert(ccb_ast_globalenv, name, r); + + ccb_ast_data_table[CCB_AST_DATA_FUNCTION] = NULL; + ccb_ast_localenv = NULL; + ccb_ast_locals = NULL; + + return r; +} + +static bool ccb_parse_function_definition_check(ccb_t* ccb) { + ccb_list_t* buffer = ccb_list_create(); + int nests = 0; + bool paren = false; + bool ready = true; + + for (;;) { + + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + ccb_list_push(buffer, token); + + //printf("Checking token... %d %s %s\n", nests, paren? "paren":"no-paren", ready?"ready":"no-ready"); + + if (!token) + ccb_compile_error(ccb, "function definition with unexpected ending"); + + if (nests == 0 && paren && (ccb_lexer_ispunct(ccb, token, '{') || ccb_parse_identifier_check(ccb, token, "__asm"))) + break; + + if (nests == 0 && (ccb_lexer_ispunct(ccb, token, ';') + || ccb_lexer_ispunct(ccb, token, ',') + || ccb_lexer_ispunct(ccb, token, '='))) + { + ready = false; + break; + } + + if (ccb_lexer_ispunct(ccb, token, '(')) + nests++; + + if (ccb_lexer_ispunct(ccb, token, ')')) { + if (nests == 0) + ccb_compile_error(ccb, "unmatches parenthesis"); + paren = true; + nests--; + } + } + + while (ccb_list_length(buffer) > 0) + ccb_lexer_unget(ccb, ccb_list_pop(buffer)); + + return ready; +} + + + +static ccb_ast_t* ccb_parse_function_definition_intermediate(ccb_t* ccb) { + //fprintf(stderr,"TESTING\n"); + ccb_data_type_t* basetype; + char* name; + ccb_list_t* parameters = ccb_list_create(); + + basetype = ccb_parse_declaration_specification(ccb, NULL); + ccb_ast_localenv = ccb_table_create(ccb_ast_globalenv); + ccb_ast_labels = ccb_table_create(NULL); + ccb_ast_gotos = ccb_list_create(); + int oldcallconv = ccb->func_callconv; + ccb->func_callconv = ccb->default_callconv; + + ccb_data_type_t* functype = ccb_parse_declarator(ccb, &name, basetype, parameters, CCB_CDECL_BODY); + if (functype->isnaked) ccb_compile_warn(ccb, "Got naked function"); + /* The ccb->func_name field is used for the __func__ builtin. */ + char* oldname = ccb->func_name; + ccb->func_name = name; + //ccb->func_callconv = functype->callconv; + int asmstmt; + if (ccb_parse_identifier_check(ccb, ccb_lexer_peek(ccb), "__asm")) { + ccb_lexer_next(ccb); + asmstmt = 1; + } else { + asmstmt = 0; + ccb_parse_expect(ccb, '{'); + } + ccb_ast_t* value = ccb_parse_function_definition(ccb, functype, name, parameters, asmstmt); + + ccb_parse_label_backfill(ccb); + + ccb_ast_localenv = NULL; + ccb->func_callconv = oldcallconv; + ccb->func_name = oldname; + return value; +} + +/* TODO: Predeclaration is required to self-compile recursive functions. */ +static ccb_data_type_t* ccb_parse_declarator_direct_restage(ccb_t* ccb, ccb_data_type_t* basetype, ccb_list_t* parameters); +static ccb_data_type_t* ccb_parse_declarator_direct_restage(ccb_t* ccb, ccb_data_type_t* basetype, ccb_list_t* parameters) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, '[')) { + int length; + token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, ']')) + length = -1; + else { + ccb_lexer_unget(ccb, token); + length = ccb_parse_evaluate(ccb, ccb_parse_expression(ccb)); + ccb_parse_expect(ccb, ']'); + } + + ccb_data_type_t* type = ccb_parse_declarator_direct_restage(ccb, basetype, parameters); + if (type->type == CCB_TYPE_FUNCTION) + ccb_compile_error(ccb, "array of functions"); + return ccb_ast_array(ccb, type, length); + } + if (ccb_lexer_ispunct(ccb, token, '(')) { + if (basetype->type == CCB_TYPE_FUNCTION) + ccb_compile_error(ccb, "function returning function"); + if (basetype->type == CCB_TYPE_ARRAY) + ccb_compile_error(ccb, "function returning array"); + return ccb_parse_function_parameters(ccb, parameters, basetype); + } + ccb_lexer_unget(ccb, token); + return basetype; +} + +static void ccb_parse_qualifiers_skip(ccb_t* ccb) { + for (;;) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + if (ccb_parse_identifier_check(ccb, token, "const") + || ccb_parse_identifier_check(ccb, token, "volatile") + || ccb_parse_identifier_check(ccb, token, "restrict")) { + continue; + } + ccb_lexer_unget(ccb, token); + return; + } +} + +// TODO: Predeclaration is required for recursive functions +static ccb_data_type_t* ccb_parse_declarator_direct(ccb_t* ccb, char** rname, ccb_data_type_t* basetype, ccb_list_t* parameters, ccb_cdecl_t context); +static ccb_data_type_t* ccb_parse_declarator_direct(ccb_t* ccb, char** rname, ccb_data_type_t* basetype, ccb_list_t* parameters, ccb_cdecl_t context) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + ccb_lexer_token_t* next = ccb_lexer_peek(ccb); + bool hascallconv = false; + bool isnaked = false; + int callconv = 0; + + if (ccb_lexer_ispunct(ccb, token, '(') && !ccb_parse_type_check(ccb, next) && !ccb_lexer_ispunct(ccb, next, ')')) { + if (ccb_parse_identifier_check(ccb, next, "__cdecl")) { + fprintf(stderr, "NOTE: Parsed a __cdecl but this may be ignored\n"); + // TODO.. + hascallconv = true; + callconv = 100; + next = ccb_lexer_next(ccb); + } else if (ccb_parse_identifier_check(ccb, next, "__classic_call")) { + fprintf(stderr, "NOTE: Parsed a __classic_call but this may be ignored\n"); + // TODO.. + hascallconv = true; + callconv = 101; + next = ccb_lexer_next(ccb); + } + if (ccb_parse_identifier_check(ccb, next, "__naked")) { + fprintf(stderr, "NOTE: Parsed a __naked\n"); + // TODO.. + isnaked = true; + next = ccb_lexer_next(ccb); + } + ccb_data_type_t* stub = ccb_ast_type_stub(ccb); + ccb_data_type_t* type = ccb_parse_declarator_direct(ccb, rname, stub, parameters, context); + //fprintf(stderr, "AAA\n"); + ccb_parse_expect(ccb, ')'); + //fprintf(stderr, "AAB\n"); + + // TODO: This doesn't compile yet: *stub = *ccb_parse_declarator_direct_restage(ccb, basetype, parameters); + ccb_data_type_t* tmp = ccb_parse_declarator_direct_restage(ccb, basetype, parameters); + memcpy(stub, tmp, sizeof(ccb_data_type_t)); + if (hascallconv) { + type->callconv = callconv; + ccb->func_callconv = callconv; + ccb_compile_warn(ccb, "Set non-standard calling convention"); + } else { + type->callconv = ccb->default_callconv; + } + type->isnaked = isnaked; + return type; + } + + if (ccb_lexer_ispunct(ccb, token, '*')) { + ccb_parse_qualifiers_skip(ccb); + ccb_data_type_t* stub = ccb_ast_type_stub(ccb); + ccb_data_type_t* type = ccb_parse_declarator_direct(ccb, rname, stub, parameters, context); + + // TODO: This doesn't compile yet: *stub = *ccb_ast_pointer(ccb, basetype); + ccb_data_type_t* tmp = ccb_ast_pointer(ccb, basetype);; + memcpy(stub, tmp, sizeof(ccb_data_type_t)); + // TODO: Do I need to check callconv here too? + return type; + } + +#ifdef CCB_X_OBJC + if (context == CCB_CDECL_OBJCPARAMETER) { + if (ccb_lexer_ispunct(ccb, token, ')')) { + token = ccb_lexer_next(ccb); + } + else { + ccb_compile_error(ccb, "expected `)' to finish objc parameter type"); + } + } +#endif + + if (ccb_parse_identifier_check(ccb, token, "__cdecl")) { + fprintf(stderr, "NOTE: Parsed a __cdecl but this may be ignored\n"); + // TODO.. + hascallconv = true; + callconv = 100; + token = ccb_lexer_next(ccb); + } else if (ccb_parse_identifier_check(ccb, token, "__classic_call")) { + fprintf(stderr, "NOTE: Parsed a __classic_call but this may be ignored\n"); + // TODO.. + hascallconv = true; + callconv = 101; + token = ccb_lexer_next(ccb); + } + if (ccb_parse_identifier_check(ccb, token, "__naked")) { + fprintf(stderr, "NOTE: Parsed a __naked\n"); + // TODO.. + isnaked = true; + token = ccb_lexer_next(ccb); + } + + if (token->type == CCB_LEXER_TOKEN_IDENTIFIER) { + if (context == CCB_CDECL_CAST || rname == NULL) + ccb_compile_error(ccb, "wasn't expecting identifier `%s'", ccb_lexer_tokenstr(ccb, token)); + *rname = token->string; + ccb_data_type_t* type = ccb_parse_declarator_direct_restage(ccb, basetype, parameters); + if (hascallconv) { + type->callconv = callconv; + ccb->func_callconv = callconv; + ccb_compile_warn(ccb, "Set non-standard calling convention"); + } else { + type->callconv = ccb->default_callconv; + } + type->isnaked = isnaked; + return type; + } + +#ifdef CCB_X_OBJC + if (context == CCB_CDECL_BODY || context == CCB_CDECL_PARAMETER || context == CCB_CDECL_OBJCPARAMETER) +#else + if (context == CCB_CDECL_BODY || context == CCB_CDECL_PARAMETER) +#endif + ccb_compile_error(ccb, "expected identifier, `(` or `*` for declarator"); + + ccb_lexer_unget(ccb, token); + + ccb_data_type_t* result = ccb_parse_declarator_direct_restage(ccb, basetype, parameters); + if (hascallconv) { + result->callconv = callconv; + ccb->func_callconv = callconv; + ccb_compile_warn(ccb, "Set non-standard calling convention"); + } else { + result->callconv = ccb->default_callconv; // If no calling convention is specified, use the default + } + result->isnaked = isnaked; + return result; +} + +static void ccb_parse_array_fix(ccb_t* ccb, ccb_data_type_t* type); // TODO: Predeclaration required for self-compile +static void ccb_parse_array_fix(ccb_t* ccb, ccb_data_type_t* type) { + if (type->type == CCB_TYPE_ARRAY) { + ccb_parse_array_fix(ccb, type->pointer); + type->size = type->length * type->pointer->size; + } + else if (type->type == CCB_TYPE_POINTER) { + ccb_parse_array_fix(ccb, type->pointer); + } + else if (type->type == CCB_TYPE_FUNCTION) { + ccb_parse_array_fix(ccb, type->returntype); + } +} + +static ccb_data_type_t* ccb_parse_declarator(ccb_t* ccb, char** rname, ccb_data_type_t* basetype, ccb_list_t* parameters, ccb_cdecl_t context) { + ccb_data_type_t* type = ccb_parse_declarator_direct(ccb, rname, basetype, parameters, context); +#ifdef CCB_X_OBJC + if (context != CCB_CDECL_OBJCPARAMETER) +#endif + ccb_parse_array_fix(ccb, type); + return type; +} + +void ccb_extrastage_setpredeclared(ccb_t* ccb, const char* name); +void ccb_extrastage_setknownextern(ccb_t* ccb, const char* name); + +static void ccb_parse_declaration(ccb_t* ccb, ccb_list_t* list, ccb_ast_t* (*make)(ccb_t* ccb, ccb_data_type_t*, char*)) { + ccb_storage_t storage; + ccb_data_type_t* basetype = ccb_parse_declaration_specification(ccb, &storage); + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + + if (ccb_lexer_ispunct(ccb, token, ';')) + return; + + ccb_lexer_unget(ccb, token); + + for (;;) { + char* name = NULL; + ccb_data_type_t* type = ccb_parse_declarator(ccb, &name, ccb_ast_type_copy_incomplete(ccb, basetype), NULL, CCB_CDECL_BODY); + + if (storage == CCB_STORAGE_STATIC) + type->isstatic = true; + + token = ccb_lexer_next(ccb); + if (ccb_lexer_ispunct(ccb, token, '=')) { + if (storage == CCB_STORAGE_TYPEDEF) + ccb_compile_error(ccb, "invalid use of typedef"); + ccb_parse_semantic_notvoid(ccb, type); + ccb_ast_t* var = make(ccb, type, name); + ccb_list_push(list, ccb_ast_declaration(ccb, var, ccb_parse_initializer_declaration(ccb, var->ctype))); + token = ccb_lexer_next(ccb); + /* NOTE: This should override isknownextern, in the event that something is declared as extern and then declared again. */ + ccb_extrastage_setpredeclared(ccb, name); + } + else if (storage == CCB_STORAGE_TYPEDEF) { + ccb_table_insert(ccb_parse_typedefs, name, type); + } + else if (type->type == CCB_TYPE_FUNCTION) { + make(ccb, type, name); + } + else { + ccb_ast_t* var = make(ccb, type, name); + if (storage == CCB_STORAGE_EXTERN) { + ccb_extrastage_setknownextern(ccb, name); + } else { + /* NOTE: This should override isknownextern, in the event that something is declared as extern and then declared again. */ + ccb_extrastage_setpredeclared(ccb, name); + ccb_list_push(list, ccb_ast_declaration(ccb, var, NULL)); + } + } + if (ccb_lexer_ispunct(ccb, token, ';')) + return; + if (!ccb_lexer_ispunct(ccb, token, ',')) + ccb_compile_error(ccb, "Confused!"); + } +} + +#ifdef CCB_X_OBJC +bool ccb_parse_objc_definition_check(ccb_t* ccb) { + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + + if (ccb_lexer_ispunct(ccb, token, '@')) { + token = ccb_lexer_next(ccb); + if (token->type != CCB_LEXER_TOKEN_IDENTIFIER) { + ccb_compile_error(ccb, "unexpected token following @: `%s'", ccb_lexer_tokenstr(ccb, token)); + } + + if (strcmp(token->string, "interface") == 0) { + ccb_lexer_unget(ccb, token); + return true; + } + else if (strcmp(token->string, "implementation") == 0) { + ccb_lexer_unget(ccb, token); + return true; + } + else if (strcmp(token->string, "class") == 0) { + ccb_lexer_unget(ccb, token); + return true; + } + else if (strcmp(token->string, "protocol") == 0) { + ccb_compile_error(ccb, "protocols aren't supported yet"); + return false; // Unreachable + } + else { + ccb_compile_error(ccb, "unexpected identifier following @: `%s'", token->string); + return false; // Unreachable + } + } + else { + ccb_lexer_unget(ccb, token); + return false; + } +} +#endif + +#ifdef CCB_X_OBJC +/* Parses method (definition and/or implementation) and returns it or NULL when finished. */ +ccb_ast_t* ccb_parse_objc_method(ccb_t* ccb, char* classname, bool isimpl, ccb_list_t* initstmts) { + //TODO: ccb->func_callconv = 101; + ccb_lexer_token_t* token = ccb_lexer_next(ccb); // Should be either + or - if this is a method + bool ismeta = false; + char* selector = NULL; + char* abiname = NULL; + char* tmp = NULL; + bool moreargs = false; + + if (ccb_lexer_ispunct(ccb, token, '+')) { + ismeta = true; + ccb->oop_ismeta = true; + } + else if (ccb_lexer_ispunct(ccb, token, '-')) { + ismeta = false; + ccb->oop_ismeta = false; + } + else { + ccb_lexer_unget(ccb, token); + return NULL; + } + + if (isimpl) { + + ccb_ast_localenv = ccb_table_create(ccb_ast_globalenv); + ccb_ast_labels = ccb_table_create(NULL); + ccb_ast_gotos = ccb_list_create(); + } + + token = ccb_lexer_next(ccb); + ccb_data_type_t* rettype = NULL; + if (ccb_lexer_ispunct(ccb, token, '(')) { + ccb_data_type_t* basetype = ccb_parse_declaration_specification(ccb, NULL); + rettype = ccb_parse_declarator(ccb, NULL, basetype, NULL, CCB_CDECL_TYPEONLY); + ccb_parse_expect(ccb, ')'); + token = ccb_lexer_next(ccb); + } + + if (token->type != CCB_LEXER_TOKEN_IDENTIFIER) { + ccb_compile_error(ccb, "unexpected token following `%s': `%s'", ismeta ? "+" : "-", ccb_lexer_tokenstr(ccb, token)); + } + + selector = calloc(strlen(token->string) + 1000, 1); // TODO: A more appropriate limit + abiname = calloc(strlen(token->string) + 1000, 1); // TODO: A more appropriate limit + strcat(selector, token->string);// _strdup(token->string); + strcat(abiname, classname); + strcat(abiname, "__");// _strdup(token->string); + if (ismeta) { + strcat(abiname, "meta__"); + } else { + strcat(abiname, "instance__"); + } + strcat(abiname, token->string);// _strdup(token->string); + + ccb_list_t* paramtypes = ccb_list_create(); + ccb_list_t* paramvars = ccb_list_create(); + + ccb_list_push(paramtypes, ccb_ast_data_table[CCB_AST_DATA_ID]); + ccb_list_push(paramvars, ccb_ast_variable_local(ccb, ismeta ? ccb_ast_data_table[CCB_AST_DATA_ID] : ccb_ast_pointer(ccb, ccb_table_find(ccb_parse_typedefs, ccb->oop_classname)), "self")); + + if (ccb_lexer_ispunct(ccb, token = ccb_lexer_next(ccb), ':')) { + moreargs = true; + do { + tmp = selector; + selector = strcat(tmp, ":"); + strcat(abiname, "_"); + //free(tmp); XXX wrong place? + token = ccb_lexer_next(ccb); + char* argname; + if (ccb_lexer_ispunct(ccb, token, '(')) { + ccb_data_type_t* basetype = ccb_parse_declaration_specification(ccb, NULL); + ccb_data_type_t* casttype = ccb_parse_declarator(ccb, &argname, basetype, NULL, CCB_CDECL_OBJCPARAMETER); + ccb_list_push(paramtypes, casttype); + ccb_list_push(paramvars, ccb_ast_variable_local(ccb, casttype, argname)); + } + else { + ccb_lexer_unget(ccb, token); + token = NULL; + } + token = ccb_lexer_next(ccb); + if (token->type == CCB_LEXER_TOKEN_IDENTIFIER) { + tmp = selector; + selector = strcat(tmp, token->string); + strcat(abiname, token->string); + //free(tmp); + ccb_parse_expect(ccb, ':'); + moreargs = true; + } + else { + ccb_lexer_unget(ccb, token); + token = NULL; + moreargs = false; + } + } while (moreargs); + } + else { + ccb_lexer_unget(ccb, token); + } + + // ccb_data_type_t* ccb_ast_prototype(ccb_t* ccb, ccb_data_type_t* returntype, ccb_list_t* paramtypes, bool dots); + ccb_data_type_t* functype = ccb_ast_prototype(ccb, rettype, paramtypes, false); + //TODO: functype->callconv = 101; + + //if (isimpl) { + + ccb_ast_localenv = ccb_table_create(ccb_ast_localenv); + ccb_ast_locals = ccb_list_create(); + ccb_ast_data_table[CCB_AST_DATA_FUNCTION] = functype; + + /*if (!ccb_lexer_ispunct(ccb, token, '{')) { + ccb_compile_error(ccb, "Expected {...} for method implementation"); + }*/ + //ccb_lexer_next(ccb); + //fprintf(stderr, "Parsing body...\n"); + ccb_ast_t* body = ccb_parse_statement(ccb);//_compound(ccb); + //fprintf(stderr, "Done parsing body...\n"); + + ccb_ast_t* r = ccb_ast_function(ccb, functype, abiname, paramvars, body, ccb_ast_locals); + //TODO: r->function.callconv = 101; + + ccb_table_insert(ccb_ast_globalenv, abiname, r); + + ccb_ast_t* selstr = ccb_ast_new_string(ccb, selector); + ccb_list_push(ccb_ast_strings, selstr); + + if (isimpl) { + ccb_list_push(initstmts, + ccb_parse_runtime_call_3(ccb, ismeta ? "__oop_addmetamethod" : "__oop_addinstancemethod", + ccb_ast_variable_global(ccb, ccb_ast_data_table[CCB_AST_DATA_ID], classname), + selstr, + ccb_ast_new_unary(ccb, CCB_AST_TYPE_ADDRESS, ccb_ast_pointer(ccb, r->ctype), r)));//ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_INT], 1234))); + } + + ccb_data_type_t* cltype = ccb_table_find(ccb_parse_typedefs, classname); + + if (cltype == NULL) { + ccb_compile_error(ccb, "Can't find class '%s' (is it defined?)", classname); + } + + if (ismeta) { + ccb_table_insert(cltype->cmethods, selector, functype); + } else { + ccb_table_insert(cltype->imethods, selector, functype); + } + + /* NOTE: This is a bit of a hack, but the objx_addinstancemethod/objx_addmetamethod call is + * added to the initialisation stub function BEFORE we leave the context of the function we're adding. + * This is so that objx_addinstancemethod/objx_addmetamethod gets looked up properly (otherwise, we'd + * have to re-enter the init stub context instead, but the effect would be the same). + */ + ccb_ast_data_table[CCB_AST_DATA_FUNCTION] = NULL; + ccb_ast_localenv = NULL; + ccb_ast_locals = NULL; + + ccb->func_callconv = ccb->default_callconv; + + ccb->oop_ismeta = false; + + if (!isimpl) { + return (void*)(size_t)1; // XXX hack... TODO wtf was I doing here?? + } + + return r; + //} + //else { + // ccb_parse_expect(ccb, ';'); + //} + + return (void*)(size_t)1; // XXX hack... +} + +ccb_ast_t* ccb_parse_objc_useinitstub(ccb_t* ccb, ccb_ast_t* expr) { + char* abiname = calloc(1000, 1); // TODO: A more appropriate limit + strcat(abiname, expr->variable.name); + strcat(abiname, "__init"); + ccb_ast_t* initfunc = ccb_table_find(ccb_ast_globalenv, abiname); + if (initfunc == NULL) { + ccb_compile_error(ccb, "Can't find initialisation function '%s'", abiname); + } + ccb_ast_t* funcptrexpr = ccb_ast_new_unary(ccb, CCB_AST_TYPE_ADDRESS, ccb_ast_pointer(ccb, initfunc->ctype), initfunc); + return ccb_parse_runtime_call_2(ccb, "__oop_getclass", + ccb_ast_new_unary(ccb, CCB_AST_TYPE_ADDRESS, + ccb_ast_pointer(ccb, ccb_ast_data_table[CCB_AST_DATA_ID]), + expr), + funcptrexpr); +} + +ccb_ast_t* ccb_parse_objc_mkinitstub(ccb_t* ccb, char* classname, ccb_list_t* stmts) { + //bool isimpl = true; + //ccb_lexer_token_t* token = ccb_lexer_next(ccb); // Should be either + or - if this is a method + //bool ismeta = false; + //char* selector = NULL; + char* abiname = NULL; + //char* tmp = NULL; + //bool moreargs = false; + + + ccb_data_type_t* typ = ccb_table_find(ccb_parse_typedefs, classname); + const char* supname = ""; + int siz = -1; + if (typ == NULL) { + ccb_compile_warn(ccb, "No corresponding @interface for '%s'", classname); + } else { + supname = typ->supername; + siz = typ->size; + //if (supname == NULL) { + //supname = ""; + //} + } + + ccb_ast_localenv = ccb_table_create(ccb_ast_globalenv); + ccb_ast_labels = ccb_table_create(NULL); + ccb_ast_gotos = ccb_list_create(); + + ccb_data_type_t* rettype = ccb_ast_data_table[CCB_AST_DATA_VOID]; + + abiname = calloc(1000, 1); // TODO: A more appropriate limit + strcat(abiname, classname); + strcat(abiname, "__init");// _strdup(token->string); + + ccb_list_t* paramtypes = ccb_list_create(); + ccb_list_t* paramvars = ccb_list_create(); + + //ccb_list_push(paramtypes, ccb_ast_data_table[CCB_AST_DATA_INT]); + //ccb_list_push(paramvars, ccb_ast_variable_local(ccb, ccb_ast_data_table[CCB_AST_DATA_INT], "self")); + + // ccb_data_type_t* ccb_ast_prototype(ccb_t* ccb, ccb_data_type_t* returntype, ccb_list_t* paramtypes, bool dots); + ccb_data_type_t* functype = ccb_ast_prototype(ccb, rettype, paramtypes, false); + + ccb_ast_localenv = ccb_table_create(ccb_ast_localenv); + ccb_ast_locals = ccb_list_create(); + ccb_ast_data_table[CCB_AST_DATA_FUNCTION] = functype; + + ccb_ast_t* body = ccb_ast_compound(ccb, (stmts == NULL) ? ccb_list_create() : stmts); + + ccb_ast_t* namestr = ccb_ast_new_string(ccb, classname); + ccb_list_push(ccb_ast_strings, namestr); + //ccb_ast_t* superstr = ccb_ast_new_string(ccb, supname); + //ccb_list_push(ccb_ast_strings, superstr); + + if (stmts != NULL) { + ccb_list_push(stmts, + ccb_parse_runtime_call_4(ccb, "__oop_addclass", + ccb_ast_new_unary(ccb, CCB_AST_TYPE_ADDRESS, ccb_ast_pointer(ccb, ccb_ast_data_table[CCB_AST_DATA_ID]), ccb_ast_variable_global(ccb, ccb_ast_data_table[CCB_AST_DATA_ID], classname)), + namestr, + ((supname == NULL) ? ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LLONG], 0) : ccb_parse_objc_useinitstub(ccb, ccb_ast_variable_global(ccb, ccb_ast_data_table[CCB_AST_DATA_ID], supname))),//superstr, + ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_INT], siz))); // Instance size + } + + ccb_ast_t* r = ccb_ast_function(ccb, functype, abiname, paramvars, body, ccb_ast_locals); + + ccb_table_insert(ccb_ast_globalenv, abiname, r); + + ccb_ast_data_table[CCB_AST_DATA_FUNCTION] = NULL; + ccb_ast_localenv = NULL; + ccb_ast_locals = NULL; + + /* Once we've got the ABI name, we can add a call to it within the module initialisation. */ + if (stmts != NULL) { + if(ccb->mod_initstmts != NULL) { + ccb_list_push(ccb->mod_initstmts, ccb_parse_runtime_call_0(ccb, abiname)); + } else { + fprintf(stderr, "WARNING: Classes are implemented here but with no module name for initialisation!\n"); + } + } + + return r; +} + +ccb_ast_t* ccb_parse_objc_mkmodinitstub(ccb_t* ccb, char* modname, ccb_list_t* stmts) { + //bool isimpl = true; + //ccb_lexer_token_t* token = ccb_lexer_next(ccb); // Should be either + or - if this is a method + //bool ismeta = false; + //char* selector = NULL; + char* abiname = NULL; + //char* tmp = NULL; + //bool moreargs = false; + + ccb_ast_localenv = ccb_table_create(ccb_ast_globalenv); + ccb_ast_labels = ccb_table_create(NULL); + ccb_ast_gotos = ccb_list_create(); + + ccb_data_type_t* rettype = ccb_ast_data_table[CCB_AST_DATA_VOID]; + + abiname = calloc(1000, 1); // TODO: A more appropriate limit + strcat(abiname, "__module__"); + strcat(abiname, modname); + strcat(abiname, "__init");// _strdup(token->string); + + ccb_list_t* paramtypes = ccb_list_create(); + ccb_list_t* paramvars = ccb_list_create(); + + //ccb_list_push(paramtypes, ccb_ast_data_table[CCB_AST_DATA_INT]); + //ccb_list_push(paramvars, ccb_ast_variable_local(ccb, ccb_ast_data_table[CCB_AST_DATA_INT], "self")); + + // ccb_data_type_t* ccb_ast_prototype(ccb_t* ccb, ccb_data_type_t* returntype, ccb_list_t* paramtypes, bool dots); + ccb_data_type_t* functype = ccb_ast_prototype(ccb, rettype, paramtypes, false); + + ccb_ast_localenv = ccb_table_create(ccb_ast_localenv); + ccb_ast_locals = ccb_list_create(); + ccb_ast_data_table[CCB_AST_DATA_FUNCTION] = functype; + + ccb_ast_t* body = ccb_ast_compound(ccb, (stmts == NULL) ? ccb_list_create() : stmts); + + //ccb_ast_t* namestr = ccb_ast_new_string(ccb, classname); + //ccb_list_push(ccb_ast_strings, namestr); + //ccb_ast_t* superstr = ccb_ast_new_string(ccb, supname); + //ccb_list_push(ccb_ast_strings, superstr); + + /* + if (stmts != NULL) { + ccb_list_push(stmts, + ccb_parse_runtime_call_4(ccb, "__oop_addclass", + ccb_ast_new_unary(ccb, CCB_AST_TYPE_ADDRESS, ccb_ast_pointer(ccb, ccb_ast_data_table[CCB_AST_DATA_ID]), ccb_ast_variable_global(ccb, ccb_ast_data_table[CCB_AST_DATA_ID], classname)), + namestr, + ((supname == NULL) ? ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_LLONG], 0) : ccb_parse_objc_useinitstub(ccb, ccb_ast_variable_global(ccb, ccb_ast_data_table[CCB_AST_DATA_ID], supname))),//superstr, + ccb_ast_new_integer(ccb, ccb_ast_data_table[CCB_AST_DATA_INT], siz))); // Instance size + } + */ + + ccb_ast_t* r = ccb_ast_function(ccb, functype, abiname, paramvars, body, ccb_ast_locals); + + ccb_table_insert(ccb_ast_globalenv, abiname, r); + + ccb_ast_data_table[CCB_AST_DATA_FUNCTION] = NULL; + ccb_ast_localenv = NULL; + ccb_ast_locals = NULL; + return r; +} +#endif + +#ifdef CCB_X_OBJC +void ccb_parse_objc_interface(ccb_t* ccb, ccb_list_t* list, char* name) { + ccb->oop_classname = name; + ccb_lexer_token_t* token = ccb_lexer_next(ccb); + char* supname = NULL; + ccb_data_type_t* suptype = NULL; + int supsize = 0; + ccb_ast_t* method = NULL; + + if (ccb_lexer_ispunct(ccb, token, ':')) { + token = ccb_lexer_next(ccb); + if (token->type == CCB_LEXER_TOKEN_IDENTIFIER) { + supname = token->string; + suptype = ccb_table_find(ccb_parse_typedefs, supname); + if (suptype == NULL) { + ccb_compile_error("Undefined base class '%s'", supname); + } else { + supsize = suptype->size; + while ((supsize % ccb_target_type_size_pointer(ccb)) != 0) { + supsize++; + } + } + } + else { + ccb_compile_error(ccb, "unexpected token following `:': `%s'", ccb_lexer_tokenstr(ccb, token)); + } + } + else { + ccb_lexer_unget(ccb, token); + token = NULL; + } + + /* Before we actually begin parsing the class, which initially happens by parsing the "struct"-like part, + * we first create a dummy typedef. + * That is, an "@interface Foo {...}" translates into a "typedef struct Foo Foo", followed by the "struct Foo {...}" + * (the main difference for the struct part being that a flag is set to say that it's a class). + */ + + /* The first call to ccb_parse_tag_definition(...) is given a -1 initial size, indicating that it isn't to + * attempt to read any fields yet. + */ + ccb_data_type_t* foo = ccb_parse_tag_definition(ccb, ccb_ast_structures, true, -1, name); + + foo->classname = name; + foo->supername = supname; + + ccb_table_insert(ccb_parse_typedefs, name, foo); + + foo = ccb_parse_tag_definition(ccb, ccb_ast_structures, true, supsize, name); + + foo->classname = name; + foo->supername = supname; + + if (suptype == NULL) { + foo->cmethods = ccb_table_create(NULL); + foo->imethods = ccb_table_create(NULL); + } else { + foo->cmethods = ccb_table_create(suptype->cmethods); + foo->imethods = ccb_table_create(suptype->imethods); + } + + ccb_table_insert(ccb_parse_typedefs, name, foo); //ccb_ast_data_table[CCB_AST_DATA_INT]); + + while ((method = ccb_parse_objc_method(ccb, name, false, NULL)) != NULL) { + //ccb_list_push(list, method); + //ccb_compile_warn(ccb, "ignoring @interface method (TODO)"); + } + + //ccb_compile_error(ccb, "TODO: @interface"); + + ccb_parse_expect(ccb, '@'); + token = ccb_lexer_next(ccb); + if (token->type != CCB_LEXER_TOKEN_IDENTIFIER || strcmp(token->string, "end") != 0) { + ccb_compile_error(ccb, "unexpected token following `@': `%s'", ccb_lexer_tokenstr(ccb, token)); + } + ccb_ast_t* globvar = ccb_ast_variable_global(ccb, ccb_ast_data_table[CCB_AST_DATA_ID], name); + globvar->variable.isclassobj = true; + + //ccb_list_t* initstmts = ccb_list_create(); + ccb_parse_objc_mkinitstub(ccb, name, NULL); //initstmts); + + ccb->oop_classname = NULL; +} +#endif + +#ifdef CCB_X_OBJC +void ccb_parse_objc_implementation(ccb_t* ccb, ccb_list_t* list, char* name) { + ccb->oop_classname = name; + ccb_lexer_token_t* token = NULL; + ccb_ast_t* method = NULL; + + ccb_list_t* initstmts = ccb_list_create(); + ccb_ast_t* initstub = ccb_parse_objc_mkinitstub(ccb, name, initstmts); + + ccb_list_push(list, initstub); + + while ((method = ccb_parse_objc_method(ccb, name, true, initstmts)) != NULL) { + ccb_list_push(list, method); + //ccb_compile_warn(ccb, "ignoring @implementation method (TODO)"); + } + + ccb_parse_expect(ccb, '@'); + token = ccb_lexer_next(ccb); + if (token->type != CCB_LEXER_TOKEN_IDENTIFIER || strcmp(token->string, "end") != 0) { + ccb_compile_error(ccb, "unexpected token following `@': `%s'", ccb_lexer_tokenstr(ccb, token)); + } + + // Has already been declared like an extern in the @interface section, but the class object is declared as a global here + ccb_ast_t* globvar = ccb_ast_variable_global(ccb, ccb_ast_data_table[CCB_AST_DATA_ID], name); + globvar->variable.isclassobj = true; + ccb_list_push(list, ccb_ast_declaration(ccb, globvar, ccb_list_create())); + + ccb->oop_classname = NULL; + + //ccb_compile_error(ccb, "TODO: @implementation"); +} +#endif + +#ifdef CCB_X_OBJC +void ccb_parse_objc_forwardclass(ccb_t* ccb, ccb_list_t* list, char* name) { + ccb_parse_expect(ccb, ';'); + + ccb_table_insert(ccb_parse_typedefs, name, ccb_ast_data_table[CCB_AST_DATA_ID]); + + ccb_compile_warn(ccb, "TODO: Finish @class"); + + //ccb_list_push(list, ccb_ast_declaration(ccb, ccb_ast_variable_global(ccb, ccb_ast_data_table[CCB_AST_DATA_ID], name), ccb_list_create())); +} +#endif + +#ifdef CCB_X_OBJC +void ccb_parse_objc_definition(ccb_t* ccb, ccb_list_t* list) { + // The '@' token has already been read (but the 'interface'/'implementation'/... and name tokens haven't) + ccb_lexer_token_t* ttoken = ccb_lexer_next(ccb); + ccb_lexer_token_t* ntoken = NULL; + + if (ttoken->type == CCB_LEXER_TOKEN_IDENTIFIER) { + ntoken = ccb_lexer_next(ccb); + if (ntoken->type != CCB_LEXER_TOKEN_IDENTIFIER) { + ccb_compile_error(ccb, "unexpected token following @%s: `%s'", ttoken->string, ccb_lexer_tokenstr(ccb, ntoken)); + } + + if (strcmp(ttoken->string, "interface") == 0) { + ccb_parse_objc_interface(ccb, list, ntoken->string); + return; + } + else if (strcmp(ttoken->string, "implementation") == 0) { + ccb_parse_objc_implementation(ccb, list, ntoken->string); + return; + } + else if (strcmp(ttoken->string, "class") == 0) { + ccb_parse_objc_forwardclass(ccb, list, ntoken->string); + return; + } + ccb_lexer_unget(ccb, ntoken); + } + + ccb_compile_error(ccb, "TODO: @%s", ccb_lexer_tokenstr(ccb, ttoken)); +} +#endif + +int ccb_extrastage_ispredeclared(ccb_t* ccb, const char* name) { + if (ccb->declarednames != NULL) { + for (ccb_list_iterator_t* it = ccb_list_iterator(ccb->declarednames); !ccb_list_iterator_end(it); ) { + const char* entry = ccb_list_iterator_next(it); + if (!strcmp(name, entry)) + return 1; + } + } + return 0; +} + +void ccb_extrastage_setpredeclared(ccb_t* ccb, const char* name) { + if (!ccb_extrastage_ispredeclared(ccb, name)) { + if (ccb->declarednames == NULL) { + ccb->declarednames = ccb_list_create(); + } + ccb_list_push(ccb->declarednames, name); + } +} + +int ccb_extrastage_isknownextern(ccb_t* ccb, const char* name) { + if (ccb->knownexterns != NULL) { + for (ccb_list_iterator_t* it = ccb_list_iterator(ccb->knownexterns); !ccb_list_iterator_end(it); ) { + const char* entry = ccb_list_iterator_next(it); + if (!strcmp(name, entry)) + return 1; + } + } + return 0; +} + +void ccb_extrastage_setknownextern(ccb_t* ccb, const char* name) { + if (!ccb_extrastage_isknownextern(ccb, name)) { + if (ccb->knownexterns == NULL) { + ccb->knownexterns = ccb_list_create(); + } + ccb_list_push(ccb->knownexterns, name); + } +} + +ccb_list_t* ccb_parse_run(ccb_t* ccb) { + ccb_list_t* list = ccb_list_create(); + + /* If we're going to use OOP initialisations (to load classes etc.) then we need to create + * an initialisation function, so that each class init can be invoked systematically. The + * alternative (used by GCC?) is to use linker tricks to concatenate any initialisation stuff, + * but this is problematic for many reasons (firstly you're stuck with one linker, and secondly + * it's more difficult to debug than just using some normal functions). This way, each logical + * module (OOP source file) has a single initialisation function, which calls in defined order + * any class initialisations. + */ + if (ccb->mod_name != NULL) { + ccb_list_t* initstmts = ccb_list_create(); + ccb_ast_t* initstub = ccb_parse_objc_mkmodinitstub(ccb, ccb->mod_name, initstmts); + + ccb_list_push(list, initstub); + + ccb->mod_initstmts = initstmts; + } + for (;;) { + if (!ccb_lexer_peek(ccb)) + return list; +#ifdef CCB_X_OBJC + if (ccb_parse_objc_definition_check(ccb)) { + ccb_parse_objc_definition(ccb, list); + } + else +#endif + if (ccb_parse_function_definition_check(ccb)) + ccb_list_push(list, ccb_parse_function_definition_intermediate(ccb)); + else + ccb_parse_declaration(ccb, list, &ccb_ast_variable_global); + } + return NULL; +} + + +/* From ifdef of CCB_IMPLEMENTATION: */ +#endif + +/* From ifndef at top of file: */ +#endif diff --git a/ccbgeneric.h b/ccbgeneric.h new file mode 100644 index 0000000..70fed4a --- /dev/null +++ b/ccbgeneric.h @@ -0,0 +1,4256 @@ +#ifndef CCBGENERIC_H +#define CCBGENERIC_H + +// TODO: Make the parser header consistent with my git version +#include "ccb.h" + +#ifdef CCBGENERIC_IMPLEMENTATION + +//#define CCB_TARGET_ENVPREFIX "CCB_" + +static int ccb_target_fam;// = -1; +static int ccb_target_wordsize_val;// = -1; +static int ccb_target_callconv_val;// = -1; + +static void ccb_target_gen_rv_addi(ccb_t* ccb, char* destr, char* srcr, int offset); +static void ccb_target_gen_rv_loadstore(ccb_t* ccb, char* op, char* spec, char* r, char* baser, int offset, char* tmpr); +static void ccb_target_gen_rv_load(ccb_t* ccb, char* spec, char* r, char* baser, int offset, char* tmpr); +static void ccb_target_gen_rv_store(ccb_t* ccb, char* spec, char* r, char* baser, int offset, char* tmpr); + +void ccb_target_init(ccb_t* ccb) { + const char* x; + + //fprintf(stderr, "XXX: INITIALISING TARGET\n"); + ccb_target_fam = -1; + ccb_target_wordsize_val = -1; + ccb_target_callconv_val = -1; + + x = getenv(/*CCB_TARGET_ENVPREFIX*/ "CCB_FAMILY"); + + //fprintf(stderr, "XXX: INITIALISING TARGET ...\n"); + + //fprintf(stderr, "XXX: INITIALISING TARGET GOT '%s'\n", x); + if (x != NULL) { + //printf("Decoding target value '%s'\n", x); + + //fprintf(stderr, "XXX: INITIALISING TARGET A\n"); + if (strcmp(x, "x86") == 0 || strcmp(x, "X86") == 0) { + ccb_target_fam = CCB_ARCH_FAMILY_X86; + } + else if (strcmp(x, "arm") == 0 || strcmp(x, "ARM") == 0) { + ccb_target_fam = CCB_ARCH_FAMILY_ARM; + } + else if (strcmp(x, "risc-v") == 0 || strcmp(x, "RISC-V") == 0 || strcmp(x, "riscv") == 0 || strcmp(x, "RISCV") == 0) { + //printf("Is RV\n"); + ccb_target_fam = CCB_ARCH_FAMILY_RISCV; + } + else if (strcmp(x, "generic") == 0 || strcmp(x, "GENERIC") == 0) { + ccb_target_fam = CCB_ARCH_FAMILY_GENERIC; + } + else if (strcmp(x, "gen1") == 0 || strcmp(x, "GEN1") == 0) { + ccb_target_fam = CCB_ARCH_FAMILY_GEN1; + } + else { + ccb_compile_error(ccb, "Invalid value for environment variable (%s=\"%s\")", /* CCB_TARGET_ENVPREFIX */ "CCB_FAMILY", x); + } + } + else { + + //fprintf(stderr, "XXX: INITIALISING TARGET B\n"); + ccb_target_fam = CCB_ARCH_FAMILY_X86; // GENERIC; // X86; // TODO: Guess based on build environment or use compiler flag? (Maybe use GENERIC by default in the future?) + } + + //fprintf(stderr, "XXX: HALF INITIALISING TARGET\n"); + + x = getenv(/* CCB_TARGET_ENVPREFIX */ "CCB_CALLCONV"); + if (x != NULL) { + if (strcmp(x, "standard") == 0 || strcmp(x, "STANDARD") == 0) { + ccb_target_callconv_val = CCB_TARGET_CALLCONV_STANDARD; + } + else if (strcmp(x, "windows") == 0 || strcmp(x, "WINDOWS") == 0) { + ccb_target_callconv_val = CCB_TARGET_CALLCONV_WINDOWS; + } + else { + ccb_compile_error(ccb, "Invalid value for environment variable (%s=\"%s\")", /* CCB_TARGET_ENVPREFIX */ "CCB_CALLCONV", x); + } + } + else { + ccb_target_callconv_val = CCB_TARGET_CALLCONV_STANDARD; + } + + x = getenv(/* CCB_TARGET_ENVPREFIX */ "CCB_WORDSIZE"); + if (x != NULL) { + printf("Decoding word size value '%s'\n", x); + if (strcmp(x, "16") == 0) { + ccb_target_wordsize_val = 16; + } + else if (strcmp(x, "32") == 0) { + ccb_target_wordsize_val = 32; + } + else if (strcmp(x, "64") == 0) { + printf("Is 64-bit\n"); + ccb_target_wordsize_val = 64; + } + else { + ccb_compile_error(ccb, "Invalid value for environment variable (%s=\"%s\"): Only 16, 32 and 64-bit word sizes are available", /* CCB_TARGET_ENVPREFIX */ "CCB_ARCH_WORDSIZE", x); + } + } + //printf("Got family id %ld\n", ccb_target_fam); + switch (ccb_target_fam) { + case CCB_ARCH_FAMILY_X86: + if (ccb_target_wordsize_val == -1) { + ccb_target_wordsize_val = 64; + } + break; + case CCB_ARCH_FAMILY_ARM: + case CCB_ARCH_FAMILY_RISCV: + if (ccb_target_wordsize_val == -1) { + ccb_target_wordsize_val = 64; + } + if (ccb_target_wordsize_val < 32) { + ccb_compile_error(ccb, "Invalid value for environment variable (%s=\"%s\"): Not available in the ARM or RISC-V targets", /* CCB_TARGET_ENVPREFIX */ "CCB_ARCH_WORDSIZE", x); + } + break; + case CCB_ARCH_FAMILY_GEN1: + if (ccb_target_wordsize_val == -1) { + ccb_target_wordsize_val = 64; + } + if (ccb_target_wordsize_val != 64) { + ccb_compile_error(ccb, "Invalid value for environment variable (%s=\"%s\"): Not available in the GEN1 target", /* CCB_TARGET_ENVPREFIX */ "CCB_WORDSIZE", x); + } + break; + case CCB_ARCH_FAMILY_GENERIC: + if (ccb_target_wordsize_val == -1) { + ccb_target_wordsize_val = 64; + } + break; + default: + ccb_compile_error(ccb, "Internal Error (missing case?)"); + } + + //fprintf(stderr, "XXX: DONE INITIALISING TARGET\n"); +} + +int ccb_target_family(ccb_t* ccb) { + switch (ccb_target_fam) { + case CCB_ARCH_FAMILY_X86: + case CCB_ARCH_FAMILY_ARM: + case CCB_ARCH_FAMILY_RISCV: + case CCB_ARCH_FAMILY_GENERIC: + case CCB_ARCH_FAMILY_GEN1: + return ccb_target_fam; + default: + ccb_compile_error(ccb, "Internal Error: arch_init() hasn't completed yet"); + } +} + +int ccb_target_wordsize(ccb_t* ccb) { + return ccb_target_wordsize_val; +} + +int ccb_target_wordbytes(ccb_t* ccb) { + return ccb_target_wordsize(ccb)/8; +} + +int ccb_target_callconv(ccb_t* ccb) { + return ccb_target_callconv_val; +} + +size_t ccb_target_type_size_char(ccb_t* ccb) { + return 1; +} + +size_t ccb_target_type_size_short(ccb_t* ccb) { + return 2; +} + +size_t ccb_target_type_size_int(ccb_t* ccb) { + if (ccb_target_wordsize(ccb) >= 32) { + return 4; + } + else { + return 2; + } +} + +size_t ccb_target_type_size_long(ccb_t* ccb) { + if (ccb_target_wordsize(ccb) >= 64) { + return 8; + } + else { + return 4; + } +} + +size_t ccb_target_type_size_llong(ccb_t* ccb) { + return 8; +} + +size_t ccb_target_type_size_float(ccb_t* ccb) { + return 4; +} + +size_t ccb_target_type_size_double(ccb_t* ccb) { + return 8; +} + +size_t ccb_target_type_size_ldouble(ccb_t* ccb) { + return 8; +} + +size_t ccb_target_type_size_pointer(ccb_t* ccb) { + return ccb_target_wordsize(ccb) / 8; +} + +size_t ccb_target_alignment(ccb_t* ccb) { + return 1; +} + +int ccb_target_callregisters(ccb_t* ccb) { + switch (ccb_target_family(ccb)) { + case CCB_ARCH_FAMILY_X86: + return (ccb_target_callconv(ccb) == CCB_TARGET_CALLCONV_WINDOWS) ? 4 : 6; // TODO: This only applies to 64-bit mode + case CCB_ARCH_FAMILY_ARM: + return 8; // TODO: ARM only has 4 on 32-bit targets + case CCB_ARCH_FAMILY_RISCV: + return 8; // Pretty sure this is the same for all RISC-V ISAs + case CCB_ARCH_FAMILY_GENERIC: + return 6; // Currently... + case CCB_ARCH_FAMILY_GEN1: + return 4; // With standard-ish ABI, might use about 8-ish in the future + default: + ccb_compile_error(ccb, "Target Error: arch_callregisters() got unknown arch"); + return -1; // Unreachable + } +} + +const char* ccb_target_callregister(ccb_t* ccb, int idx) { + if (idx < 0 || idx >= ccb_target_callregisters(ccb)) { + ccb_compile_error(ccb, "Target Error: arch_callregister(cc,%d) received bad argument", idx); + } + + switch (ccb_target_family(ccb)) { + case CCB_ARCH_FAMILY_X86: + if (ccb_target_callconv(ccb) == CCB_TARGET_CALLCONV_WINDOWS) { + switch (idx) { + case 0: + return "rcx"; + case 1: + return "rdx"; + case 2: + return "r8"; + case 3: + return "r9"; + } + } + else { + switch (idx) { + case 0: + return "rdi"; + case 1: + return "rsi"; + case 2: + return "rdx"; + case 3: + return "rcx"; + case 4: + return "r8"; + case 5: + return "r9"; + } + } + ccb_compile_error(ccb, "Target Error: Register lookup failed"); + case CCB_ARCH_FAMILY_ARM: + switch (idx) { + case 0: + return "x0"; + case 1: + return "x1"; + case 2: + return "x2"; + case 3: + return "x3"; + case 4: + return "x4"; + case 5: + return "x5"; + case 6: + return "x6"; + case 7: + return "x7"; + } + ccb_compile_error(ccb, "Target Error: Register lookup failed"); + case CCB_ARCH_FAMILY_RISCV: + switch (idx) { + case 0: + return "a0"; // Also known as 'x10' + case 1: + return "a1"; + case 2: + return "a2"; + case 3: + return "a3"; + case 4: + return "a4"; + case 5: + return "a5"; + case 6: + return "a6"; + case 7: + return "a7"; // .. 'x17' + } + ccb_compile_error(ccb, "Target Error: Register lookup failed"); + case CCB_ARCH_FAMILY_GENERIC: + switch (idx) { + case 0: + return "r7"; + case 1: + return "r6"; + case 2: + return "r2"; + case 3: + return "r1"; + case 4: + return "r8"; + case 5: + return "r9"; + } + ccb_compile_error(ccb, "Target Error: Register lookup failed"); + case CCB_ARCH_FAMILY_GEN1: + switch (idx) { + case 0: + return "$r0"; + case 1: + return "$r1"; + case 2: + return "$r2"; + case 3: + return "$r3"; + } + ccb_compile_error(ccb, "Target Error: Register lookup failed"); + default: + ccb_compile_error(ccb, "Target Error: Register lookup failed"); + return "ERROR?"; // Unreachable + } +} + + +const char* ccb_target_r0(ccb_t* ccb) { + switch (ccb_target_family(ccb)) { + case CCB_ARCH_FAMILY_X86: + return "rax"; + case CCB_ARCH_FAMILY_GEN1: + return "$r0"; + case CCB_ARCH_FAMILY_GENERIC: + return "r0"; + case CCB_ARCH_FAMILY_RISCV: + return "a0"; + case CCB_ARCH_FAMILY_ARM: + return "x0"; + default: + return "todo"; + } +} + +const char* ccb_target_r1(ccb_t* ccb) { + switch (ccb_target_family(ccb)) { + case CCB_ARCH_FAMILY_X86: + return "rcx"; + case CCB_ARCH_FAMILY_GEN1: + return "$r1"; + case CCB_ARCH_FAMILY_GENERIC: + return "r1"; + case CCB_ARCH_FAMILY_RISCV: + return "a1"; + case CCB_ARCH_FAMILY_ARM: + return "x1"; + default: + return "todo"; + } +} + +const char* ccb_target_r15(ccb_t* ccb) { + switch (ccb_target_family(ccb)) { + case CCB_ARCH_FAMILY_X86: + return "r15"; + case CCB_ARCH_FAMILY_GEN1: + return "$rf"; + case CCB_ARCH_FAMILY_GENERIC: + return "r15"; + case CCB_ARCH_FAMILY_RISCV: + return "t0"; + default: + return "todo"; + } +} + +const char* ccb_target_sp(ccb_t* ccb) { + switch (ccb_target_family(ccb)) { + case CCB_ARCH_FAMILY_X86: + return "rsp"; + case CCB_ARCH_FAMILY_GEN1: + return "$rstack"; + case CCB_ARCH_FAMILY_RISCV: + return "sp"; // Also known as 'x2' + default: + return "r4"; + } +} + +const char* ccb_target_bp(ccb_t* ccb) { + switch (ccb_target_family(ccb)) { + case CCB_ARCH_FAMILY_X86: + return "rbp"; + case CCB_ARCH_FAMILY_GEN1: + return "$rbase"; + case CCB_ARCH_FAMILY_RISCV: + return "x8";//"fp"; // Also known as 's0' (or `x8`) + default: + return "r5"; + } +} + +static int ccb_target_gen_asmfmt_cached = 0; + +int ccb_target_asmfmt(ccb_t* ccb) { + if (ccb_target_gen_asmfmt_cached == 0) { + const char* val = getenv(/* CCB_TARGET_ENVPREFIX */ "CCB_ASMFMT"); + if (val == NULL) { + ccb_target_gen_asmfmt_cached = CCB_TARGET_ASMFMT_GAS; + } else if (strcmp(val, "") == 0 || strcmp(val, "gas") == 0) { + ccb_target_gen_asmfmt_cached = CCB_TARGET_ASMFMT_GAS; + } + else if (strcmp(val, "fasm") == 0) { + ccb_target_gen_asmfmt_cached = CCB_TARGET_ASMFMT_FASM; + } + else if (strcmp(val, "nasm") == 0) { + ccb_target_gen_asmfmt_cached = CCB_TARGET_ASMFMT_NASM; + } + else if (strcmp(val, "raw") == 0) { + ccb_target_gen_asmfmt_cached = CCB_TARGET_ASMFMT_RAW; + } + else { + fprintf(stderr, "Invalid value for CC_ASMFMT (\"%s\")\n", val); + return -1; + } + } + + return ccb_target_gen_asmfmt_cached; +} + +static int ccb_target_binfmt_cached = 0; + +int ccb_target_binfmt(ccb_t* ccb) { + if (ccb_target_binfmt_cached == 0) { + const char* val = getenv(/* CCB_TARGET_ENVPREFIX */ "CCB_BINFMT"); + if (val == NULL || strcmp(val, "") == 0 || strcmp(val, "elf") == 0) { + ccb_target_binfmt_cached = CCB_TARGET_BINFMT_ELF; + } + else if (strcmp(val, "flat") == 0) { + ccb_target_binfmt_cached = CCB_TARGET_BINFMT_FLAT; + } + else { + fprintf(stderr, "Invalid value for CC_BINFMT (\"%s\")\n", val); + return -1; + } + } + + return ccb_target_binfmt_cached; +} + +/* +static const char *registers[] = { + "rdi", "rsi", "rdx", + "rcx", "r8", "r9" +}; +*/ + +static void ccb_target_gen_expression(ccb_t* ccb, ccb_ast_t*); +/** Conceptually the same as ccb_target_gen_expression, except it will pop the result off the stack. */ +static void ccb_target_gen_statement(ccb_t* ccb, ccb_ast_t*); +static void ccb_target_gen_declaration_initialization(ccb_t* ccb, ccb_list_t*, int); +#ifdef _ZCC +#define ccb_target_gen_emit(...) do{fprintf(ccb->output, __VA_ARGS__); fprintf(ccb->output, "\n"); fflush(ccb->output);}while(0) +#define ccb_target_gen_emit_inline(...) do{fprintf(ccb->output, __VA_ARGS__); fprintf(ccb->output, "\n"); fflush(ccb->output);}while(0) +/* +TODO: Better string handling... #define ccb_target_gen_emit(...) ccb_target_gen_emit_impl(ccb, __LINE__, "\t" __VA_ARGS__) +*/ +#else +#define ccb_target_gen_emit(...) ccb_target_gen_emit_impl(ccb, __LINE__, __VA_ARGS__) +#define ccb_target_gen_emit_inline(...) ccb_target_gen_emit_impl(ccb, __LINE__, __VA_ARGS__) +#endif +#define ccb_target_gen_push(X) ccb_target_gen_push_ (ccb, X, __LINE__) +#define ccb_target_gen_pop(X) ccb_target_gen_pop_ (ccb, X, __LINE__) +#define ccb_target_gen_drop() ccb_target_gen_drop_ (ccb, __LINE__) +#define ccb_target_gen_push_xmm(X) ccb_target_gen_push_xmm_(ccb, X, __LINE__) +#define ccb_target_gen_pop_xmm(X) ccb_target_gen_pop_xmm_ (ccb, X, __LINE__) + +static int ccb_target_gen_stack = 0; + +static char* ccb_target_gen_label_break = NULL; +static char* ccb_target_gen_label_continue = NULL; +//static char* ccb_target_gen_label_break_store = NULL; +//static char* ccb_target_gen_label_continue_store = NULL; +static char* ccb_target_gen_label_switch = NULL; +//static char* ccb_target_gen_label_switch_store = NULL; + +#ifdef _ZCC +#define ccb_target_gen_emit_impl(ccb,ln,...) do{fprintf(ccb->output, __VA_ARGS__); fprintf(ccb->output, "\n"); } while(0) +#else +void ccb_target_gen_emit_impl(ccb_t* ccb, int line, const char* fmt, ...) { + va_list args; + va_start(args, fmt); + int col = vfprintf(ccb->output, fmt, args); + va_end(args); + + for (const char* p = fmt; *p; p++) + if (*p == '\t') + col += 8 - 1; + + col = (40 - col) > 0 ? (40 - col) : 2; + + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC || (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + fprintf(ccb->output, "%*c % 4d %d\n", col, ';', line, ccb_target_gen_stack); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_ARM) { + // 32-bit: fprintf(ccb->output, "%*c % 4d %d\n", col, '@', line, ccb_target_gen_stack); + fprintf(ccb->output, " // %d %d\n", line, ccb_target_gen_stack); + } + else { + fprintf(ccb->output, "%*c % 4d %d\n", col, '#', line, ccb_target_gen_stack); + } +} +#endif + +//static void ccb_target_gen_jump_save(ccb_t* ccb, char* lbreak, char* lcontinue) { +#define ccb_target_gen_jump_save(ccb,lbreak,lcontinue) char* ccb_target_gen_label_break_store = ccb_target_gen_label_break; char* ccb_target_gen_label_continue_store = ccb_target_gen_label_continue; ccb_target_gen_label_break = lbreak; ccb_target_gen_label_continue = lcontinue + +//static void ccb_target_gen_jump_restore(ccb_t* ccb) { +#define ccb_target_gen_jump_restore(ccb) do { ccb_target_gen_label_break = ccb_target_gen_label_break_store; ccb_target_gen_label_continue = ccb_target_gen_label_continue_store; } while(0); + +static int ccb_target_regcode(ccb_t* ccb, const char* reg) { + if (!strcmp(reg, "rax")) { + return 0; + } + else if (!strcmp(reg, "rcx")) { + return 1; + } + else if (!strcmp(reg, "rdx")) { + return 2; + } + else if (!strcmp(reg, "rbx")) { + return 3; + } + else if (!strcmp(reg, "rsi")) { + return 6; + } + else if (!strcmp(reg, "rdi")) { + return 7; + } + else { + //fprintf(stderr, "WARNING: Unimplemented regcode '%s'\n", reg); + return -1; + } +} + +static void ccb_target_gen_push_(ccb_t* ccb, const char* reg, int line) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit_impl(ccb, line, "\twrite32 $rsp, %s, -8", reg); // TODO: Fix this mess. + ccb_target_gen_emit_impl(ccb, line, "\txor $rscratch, $rscratch, $rscratch"); + ccb_target_gen_emit_impl(ccb, line, "\taddimm $rscratch, $rscratch, 32"); + ccb_target_gen_emit_impl(ccb, line, "\tshrz $rscratch, %s, $rscratch", reg); + ccb_target_gen_emit_impl(ccb, line, "\twrite32 $rsp, $rscratch, -4"); + ccb_target_gen_emit_impl(ccb, line, "\taddimm $rsp, -8"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + if (ccb_target_wordsize(ccb) == 32) { + ccb_target_gen_emit_impl(ccb, line, "\taddi sp, sp, -4"); + ccb_target_gen_emit_impl(ccb, line, "\tsw %s, 0(sp)", reg); + } else { + ccb_target_gen_emit_impl(ccb, line, "\taddi sp, sp, -8"); + ccb_target_gen_emit_impl(ccb, line, "\tsd %s, 0(sp)", reg); + } + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_ARM) { + // TODO: Optimise ARM64 stack operations + // (for some horrible reason, you can't use SP as a simple word-sized stack :s) + // For details: https://community.arm.com/arm-community-blogs/b/architectures-and-processors-blog/posts/using-the-stack-in-aarch64-implementing-push-and-pop + ccb_target_gen_emit_impl(ccb, line, "\tstr %s, [sp, #-16]", reg); + ccb_target_gen_stack += (ccb_target_wordsize(ccb)/8); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit_impl(ccb, line, "\tpushr %s", reg); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit_impl(ccb, line, "\tpush %s", reg); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW) { + int regcode = ccb_target_regcode(ccb, reg); + if (regcode < 0) { + ccb_target_gen_emit_impl(ccb, line, "\tdata8 0x?? ; TODO: pop %s", reg); + } + else { + ccb_target_gen_emit_impl(ccb, line, "\tdata8 0x%x ; push %s", 0x50 + regcode, reg); + } + } + else { + ccb_target_gen_emit_impl(ccb, line, "\tpush %%%s", reg); + } + ccb_target_gen_stack += (ccb_target_wordsize(ccb)/8); +} +static void ccb_target_gen_pop_(ccb_t* ccb, const char* reg, int line) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit_impl(ccb, line, "\tread32 %s, $rsp, 4", reg); + ccb_target_gen_emit_impl(ccb, line, "\txor $rscratch, $rscratch, $rscratch"); + ccb_target_gen_emit_impl(ccb, line, "\taddimm $rscratch, $rscratch, 32"); + ccb_target_gen_emit_impl(ccb, line, "\tshl %s, %s, $rscratch", reg, reg); + ccb_target_gen_emit_impl(ccb, line, "\tread32 %rscratch, $rsp, 0"); + ccb_target_gen_emit_impl(ccb, line, "\tor %s, %s, %rscratch", reg, reg); + ccb_target_gen_emit_impl(ccb, line, "\taddimm $rsp, $rsp, 8"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + if (ccb_target_wordsize(ccb) == 32) { + ccb_target_gen_emit_impl(ccb, line, "\tlw %s, 0(sp)", reg); + ccb_target_gen_emit_impl(ccb, line, "\taddi sp, sp, 4"); + } + else { + ccb_target_gen_emit_impl(ccb, line, "\tld %s, 0(sp)", reg); + ccb_target_gen_emit_impl(ccb, line, "\taddi sp, sp, 8"); + } + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_ARM) { + // TODO: Optimise ARM64 stack operations + // (for some horrible reason, you can't use SP as a simple word-sized stack :s) + ccb_target_gen_emit_impl(ccb, line, "\tldr %s, [sp], #16", reg); + ccb_target_gen_stack -= (ccb_target_wordsize(ccb)/8); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit_impl(ccb, line, "\tpopr %s", reg); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit_impl(ccb, line, "\tpop %s", reg); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW) { + int regcode = ccb_target_regcode(ccb, reg); + if (regcode < 0) { + ccb_target_gen_emit_impl(ccb, line, "\tdata8 0x?? ; TODO: pop %s", reg); + } + else { + ccb_target_gen_emit_impl(ccb, line, "\tdata8 0x%x ; pop %s", 0x58 + regcode, reg); + } + } + else { + ccb_target_gen_emit_impl(ccb, line, "\tpop %%%s", reg); + } + ccb_target_gen_stack -= (ccb_target_wordsize(ccb)/8); +} +static void ccb_target_gen_drop_(ccb_t* ccb, int line) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit_impl(ccb, line, "\taddimm $rsp, $rsp, 8"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + if (ccb_target_wordsize(ccb) == 32) { + //ccb_target_gen_emit_impl(ccb, line, "\tlw %s, 0(sp)", reg); + ccb_target_gen_emit_impl(ccb, line, "\taddi sp, sp, 4"); + } + else { + //ccb_target_gen_emit_impl(ccb, line, "\tld %s, 0(sp)", reg); + ccb_target_gen_emit_impl(ccb, line, "\taddi sp, sp, 8"); + } + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_ARM) { + // TODO: Optimise ARM64 stack operations + // (for some horrible reason, you can't use SP as a simple word-sized stack :s) + //cb_target_gen_emit_impl(ccb, line, "\tldr %s, [sp], #16", reg); + //ccb_target_gen_stack -= (ccb_target_wordsize(ccb)/8); + ccb_compile_error(ccb, "TODO"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + //ccb_target_gen_emit_impl(ccb, line, "\tpopr %s", reg); + ccb_compile_error(ccb, "TODO"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit_impl(ccb, line, "\tadd rsp, %d", ccb_target_wordbytes(ccb)); // TODO: Adapt push/pop/drop to 32 bit sp if needed.. + } + /*else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW) { + int regcode = ccb_target_regcode(ccb, reg); + if (regcode < 0) { + ccb_target_gen_emit_impl(ccb, line, "\tdata8 0x?? ; TODO: pop %s", reg); + } + else { + ccb_target_gen_emit_impl(ccb, line, "\tdata8 0x%x ; pop %s", 0x58 + regcode, reg); + } + }*/ + else { + //ccb_target_gen_emit_impl(ccb, line, "\tpop %%%s", reg); + ccb_target_gen_emit_impl(ccb, line, "\taddq $%d, %%rsp", ccb_target_wordbytes(ccb)); + } + ccb_target_gen_stack -= (ccb_target_wordsize(ccb)/8); +} +static void ccb_target_gen_push_xmm_(ccb_t* ccb, int r, int line) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit_impl(ccb, line, "\tsubrc r4, 8"); + ccb_target_gen_emit_impl(ccb, line, "\tsetrmf r4, f%d", r); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit_impl(ccb, line, "\taddi sp, sp, -8"); + ccb_target_gen_emit_impl(ccb, line, "\tfsd fa%d, 0(sp)", r); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit_impl(ccb, line, "\tsub rsp, 8"); + ccb_target_gen_emit_impl(ccb, line, "\tmovsd [rsp], xmm%d", r); + } + else { + ccb_target_gen_emit_impl(ccb, line, "\tsub $8, %%rsp"); + ccb_target_gen_emit_impl(ccb, line, "\tmovsd %%xmm%d, (%%rsp)", r); + } + ccb_target_gen_stack += 8; +} +static void ccb_target_gen_pop_xmm_(ccb_t* ccb, int r, int line) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit_impl(ccb, line, "\tsetfrm f%d, r4", r); + ccb_target_gen_emit_impl(ccb, line, "\taddrc r4, 8"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit_impl(ccb, line, "\tfld fa%d, 0(sp)", r); + ccb_target_gen_emit_impl(ccb, line, "\taddi sp, sp, 8"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit_impl(ccb, line, "\tmovsd xmm%d, [rsp]", r); + ccb_target_gen_emit_impl(ccb, line, "\tadd rsp, 8"); + } + else { + ccb_target_gen_emit_impl(ccb, line, "\tmovsd (%%rsp), %%xmm%d", r); + ccb_target_gen_emit_impl(ccb, line, "\tadd $8, %%rsp"); + } + ccb_target_gen_stack -= 8; +} + +static const char* ccb_target_gen_register_integer(ccb_t* ccb, ccb_data_type_t* type, char r) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { // TODO... + switch (type->size) { + case 1: return (r == 'a') ? "r0x8" : "r1x8"; + case 2: return (r == 'a') ? "r0x16" : "r1x16"; + case 4: return (r == 'a') ? "v0" : "v1"; + case 8: return (r == 'a') ? "v0x64" : "v1x64"; + } + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + switch (type->size) { + case 1: return (r == 'a') ? "r0x8" : "r1x8"; + case 2: return (r == 'a') ? "r0x16" : "r1x16"; + case 4: return (r == 'a') ? "r0x32" : "r1x32"; + case 8: return (r == 'a') ? "r0" : "r1"; + } + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + return (r == 'a') ? "a0" : "a1"; + } + else { + switch (type->size) { + case 1: return (r == 'a') ? "al" : "cl"; + case 2: return (r == 'a') ? "ax" : "cx"; + case 4: return (r == 'a') ? "eax" : "ecx"; + case 8: return (r == 'a') ? "rax" : "rcx"; + } + } + ccb_compile_error(ccb, "Unexpected operand to ccb_target_gen_register_integer"); + return ""; // Unreachable? +} + +static const char* ccb_target_gen_loadstorespec(ccb_t* ccb, ccb_data_type_t* type, bool isstore) { + if (type->type == CCB_TYPE_FLOAT && !type->sign) { + printf("WARNING: float type has sign set to false, correcting this...\n"); + type->sign = true; + } + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + switch (type->size) { + case 1: return isstore ? "b" : (type->sign ? "b" : "bu"); + case 2: return isstore ? "h" : (type->sign ? "h" : "hu"); + case 4: return isstore ? "w" : (type->sign ? "w" : "wu"); + case 8: return "d"; // isstore ? "d" : (type->sign ? "b" : "bu"); + default: + if (type->type == CCB_TYPE_STRUCTURE) { + ccb_compile_error(ccb, "Unexpected operand to ccb_target_gen_loadstorespec (type STRUCTURE, size=%d) - perhaps you wanted a pointer?", type->size); + } else if (type->type == CCB_TYPE_FUNCTION) { + ccb_compile_error(ccb, "Unexpected operand to ccb_target_gen_loadstorespec (type FUNCTION, size=%d) - perhaps you wanted a function pointer?", type->size); + } else { + ccb_compile_error(ccb, "Unexpected operand to ccb_target_gen_loadstorespec (type %d, size=%d)\n", type->type, type->size); + } + return ""; // Unreachable? + } + } + else { + return ""; + } +} + +static void ccb_target_gen_load_global(ccb_t* ccb, ccb_data_type_t* type, char* label, int offset) { + if (type->type == CCB_TYPE_ARRAY) { + if (offset) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("xor $r0, $r0, $r0"); + ccb_target_gen_emit("addimm $r0, (%s + %d)", label, offset); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("addi a0, zero, %s", label); // TODO: Use lui here? + ccb_target_gen_emit("la a0, %s", label); + //ccb_target_gen_emit("addi a0, a0, %d", offset); + ccb_target_gen_rv_addi(ccb, "a0", "a0", offset); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrcpc r0, %s, %d", label, offset); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("lea rax, [%s + %d]", label, offset); + } + else { + ccb_target_gen_emit("lea %s+%d(%%rip), %%rax", label, offset); + } + } + else { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("xor $r0, $r0, $r0"); + ccb_target_gen_emit("addimm $r0, (%s + %d)", label); + //ccb_target_gen_emit("read32 $r0, $r1, 0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrc r0, %s", label); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("addi a0, zero, %s", label); // TODO: Use lui here? + ccb_target_gen_emit("la a0, %s", label); // TODO: Use lui here? + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("lea rax, [%s]", label); + } + else { + ccb_target_gen_emit("lea %s(%%rip), %%rax", label); + } + } + return; + } + + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("xor $r1, $r1, $r1"); + ccb_target_gen_emit("addimm $r1, $r1, (%s + %d)", label, offset); + if (type->size <= 4) { + ccb_target_gen_emit("read32 $r0, $r1, 0"); + if (type->size < 4) { + int32_t mask = ((type->size == 1) ? 0xFF : 0xFFFF); + ccb_target_gen_emit("xor $r1, $r1, $r1"); + ccb_target_gen_emit("addimm $r1, %d", mask); // TODO: This might not work out so well for 16-bit, must check. + ccb_target_gen_emit("and $r0, $r0, $r1"); + } + } + else { + ccb_target_gen_emit("read32 $r0, $r1, 4"); + ccb_target_gen_emit("shlz $r0, $r0, 32"); + ccb_target_gen_emit("read32 $r1, $r1, 0"); + ccb_target_gen_emit("or $r0, $r0, $r1"); // TODO: Sign bit may also interfere here. + } + } + else { + + if (type->size < 4) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrc r0, 0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("addi a0, zero, 0"); // TODO: Probably unneeded? + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov eax, 0"); + } + else { + ccb_target_gen_emit("mov $0, %%eax"); + } + } + + const char* reg = ccb_target_gen_register_integer(ccb, type, 'a'); + + if (offset) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrcpcm %s, %s, %d", reg, label, offset); + } + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("addi t0, zero, %s", label); // TODO: Use lui? Better temporary register? + ccb_target_gen_emit("la t0, %s", label); // TODO: Use lui? Better temporary register? + //ccb_target_gen_emit("ld %s, t0, %d", reg, offset); + //ccb_target_gen_emit("ld %s, %d(t0)", reg, offset); + const char* spec = ccb_target_gen_loadstorespec(ccb, type, false); + ccb_target_gen_emit("l%s %s, %d(%s)", spec, reg, offset, "t0"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov %s, [%s + %d]", reg, label, offset); + } + else { + ccb_target_gen_emit("mov %s+%d(%%rip), %%%s", label, offset, reg); + } + } + else { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrcm %s, %s", reg, label); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("addi t0, zero, %s", label); // TODO: Use lui? Better temporary register? + ccb_target_gen_emit("la t0, %s", label); // TODO: Use lui? Better temporary register? + //ccb_target_gen_emit("ld %s, t0, 0", reg); + //ccb_target_gen_emit("ld %s, 0(t0)", reg); + const char* spec = ccb_target_gen_loadstorespec(ccb, type, false); + + if (type->type == CCB_TYPE_FLOAT || type->type == CCB_TYPE_DOUBLE || type->type == CCB_TYPE_LDOUBLE) { + ccb_target_gen_emit("fl%s %s, %d(%s)", spec, "fa0", 0, "t0"); + } else { + ccb_target_gen_emit("l%s %s, %d(%s)", spec, reg, 0, "t0"); + } + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov %s, [%s]", reg, label); + } + else { + ccb_target_gen_emit("mov %s(%%rip), %%%s", label, reg); + } + } + } +} + +static void ccb_target_gen_cast_int(ccb_t* ccb, ccb_data_type_t* to, ccb_data_type_t* type) { + if (!ccb_ast_type_floating(ccb, type)) { + if (type->type == CCB_TYPE_INT) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("db 0x48"); + ccb_target_gen_emit("db 0x98"); // TODO: This is test only for now, needs to handle unsigned/other + } else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86) { + ccb_target_gen_emit("cltq"); // TODO: This is test only for now, needs to handle unsigned/other + } /* TODO: Fix this elsewhere? May not be an issue on RISC-V since I think it auto-extends (bonus TODO: Check that!) */ + if (to != NULL && to->sign && type->sign && type->size == 4) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("addw a0, a0, zero"); // Sign extend by using a 32-bit addition + } + } + } + return; + } + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("intf r0, f0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("fcvt.l.d a0, fa0, rtz"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("cvttsd2si eax, xmm0"); + } + else { + ccb_target_gen_emit("cvttsd2si %%xmm0, %%eax"); + } +} + +static void ccb_target_gen_cast_float(ccb_t* ccb, ccb_data_type_t* type) { + if (ccb_ast_type_floating(ccb, type)) { + return; + } + + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("floati f0, r0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("fcvt.d.l fa0, a0"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("cvtsi2sd xmm0, eax"); + } + else { + ccb_target_gen_emit("cvtsi2sd %%eax, %%xmm0"); + } +} + +static int ccb_target_gen_r02nextwd(ccb_t* ccb, ccb_data_type_t* type, int* countvar) { // TODO... + if (countvar[0] < type->size) { + if (type->size <= ccb_target_wordbytes(ccb)*2) { + if (countvar[0] == 0) { + // No action, first word is already in r0 + } else { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("addi %s, %s, 0", ccb_target_r0(ccb), ccb_target_r1(ccb)); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov %s, %s", ccb_target_r0(ccb), ccb_target_r1(ccb)); + } + else { + ccb_target_gen_emit("mov %%%s, %%%s", ccb_target_r1(ccb), ccb_target_r0(ccb)); + } + } + } else { + ccb_target_gen_pop(ccb_target_r0(ccb)); + } + countvar[0] += ccb_target_wordbytes(ccb); + return 1; + } else { + return 0; + } +} + +static void ccb_target_gen_load_local(ccb_t* ccb, ccb_data_type_t* var, const char* base, int offset) { + if (var->type == CCB_TYPE_ARRAY) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrrpc r0, %s, %d", base, offset); // Set-register-to-register-plus-constant (we only want the address of array variable) + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("addi a0, %s, %d", base, offset); // Set-register-to-register-plus-constant (we only want the address of array variable) + ccb_target_gen_rv_addi(ccb, "a0", base, offset); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("addimm $r0, %s, %d", base, offset); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("lea rax, [%s + %d]", base, offset); + } + else { + ccb_target_gen_emit("lea %d(%%%s), %%rax", offset, base); + } + } + else if (var->type == CCB_TYPE_FLOAT) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setfrpcmx32 f0, %s, %d", base, offset); // Set-float-to-register-plus-constant's-memory-x32bit + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("flw ft0, %d(%s)", offset, base); + ccb_target_gen_emit("fcvt.d.s fa0, ft0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("todo"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("cvtps2pd xmm0, [%s + %d]", base, offset); + } + else { + ccb_target_gen_emit("cvtps2pd %d(%%%s), %%xmm0", offset, base); + } + } + else if (var->type == CCB_TYPE_DOUBLE || var->type == CCB_TYPE_LDOUBLE) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setfrpcm f0, %s, %d", base, offset); // Set-float-to-register-plus-constant's-memory + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("fld fa0, %d(%s)", offset, base); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("todo"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("movsd xmm0, [%s + %d]", base, offset); + } + else { + ccb_target_gen_emit("movsd %d(%%%s), %%xmm0", offset, base); + } + } + else if (var->size > ccb_target_wordbytes(ccb)) { + if (var->size <= ccb_target_wordbytes(ccb)*2) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("ld %s, %d(%s)", ccb_target_r0(ccb), offset, base); + ccb_target_gen_emit("ld %s, %d(%s)", ccb_target_r1(ccb), offset+ccb_target_wordbytes(ccb), base); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov %s, [%s + %d]", ccb_target_r0(ccb), base, offset); + ccb_target_gen_emit("mov %s, [%s + %d]", ccb_target_r1(ccb), base, offset+ccb_target_wordbytes(ccb)); + } + else { + ccb_target_gen_emit("mov %d(%%%s), %%%s", offset, base, ccb_target_r0(ccb)); + ccb_target_gen_emit("mov %d(%%%s), %%%s", offset+ccb_target_wordbytes(ccb), base, ccb_target_r1(ccb)); + } + } else { + /* I could probably hard-code a more efficient way, but we can just read each word from the end to + * the start and push each word to the stack as it's read in order to get the correct layout on the + * stack. This way the stack accounting doesn't need to be manually adjusted. + */ + long count = var->size-1; + while ((count % ccb_target_wordbytes(ccb)) != 0) { + count--; + } + while (count >= 0) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("ld %s, %d(%s)", ccb_target_r0(ccb), offset+count, base); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov %s, [%s + %d]", ccb_target_r0(ccb), base, offset+count); + } + else { + ccb_target_gen_emit("mov %d(%%%s), %%%s", offset+count, base, ccb_target_r0(ccb)); + } + ccb_target_gen_push(ccb_target_r0(ccb)); + count -= ccb_target_wordbytes(ccb); + } + //ccb_compile_error(ccb, "TODO: Load large structs..."); + } + } else { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("xor $r1, $r1, $r1"); + ccb_target_gen_emit("addimm $r1, %s, %d", base, offset); + if (var->size <= 4) { + ccb_target_gen_emit("read32 $r0, $r1, 0"); + if (var->size < 4) { + int32_t mask = ((var->size == 1) ? 0xFF : 0xFFFF); + ccb_target_gen_emit("xor $r1, $r1, $r1"); + ccb_target_gen_emit("addimm $r1, %d", mask); // TODO: This might not work out so well for 16-bit, must check. + ccb_target_gen_emit("and $r0, $r0, $r1"); + } + } + else { + ccb_target_gen_emit("read32 $r0, $r1, 4"); + ccb_target_gen_emit("shlz $r0, $r0, 32"); + ccb_target_gen_emit("read32 $r1, $r1, 0"); + ccb_target_gen_emit("or $r0, $r0, $r1"); // TODO: Sign bit may also interfere here. + } + } + else { + const char* reg = ccb_target_gen_register_integer(ccb, var, 'c'); + const char* spec = ccb_target_gen_loadstorespec(ccb, var, false); + if (var->size < 4) { // TODO: Should this be 8 (or normal int/pointer size) ?? + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrc r0, 0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("li a1, 0");// No need to clear top bits, handled in load instruction (??) + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov ecx, 0"); + } + else { + ccb_target_gen_emit("mov $0, %%ecx"); + } + } + + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("peek32 %s, %s, %d", reg, base, offset); + ccb_target_gen_emit("move v0, v1"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrrpcm %s, %s, %d", reg, base, offset); + ccb_target_gen_emit("setrr r0, r1"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("l%s %s, %d(%s)", spec, reg, offset, base); + ccb_target_gen_emit("addi a0, a1, 0"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov %s, [%s + %d]", reg, base, offset); + if (var->sign && var->size < 8) { + //ccb_target_gen_emit("movsx rax, %s", reg); + /* Same workaround as for YASM: */ + ccb_target_gen_emit("db 0x48"); + ccb_target_gen_emit("db 0x63"); + ccb_target_gen_emit("db 0xC1"); + }/* else if (var->size < 8) { + ccb_target_gen_emit("movzx rax, %s", reg); + }*/ else { + ccb_target_gen_emit("mov rax, %s", "rcx" /*reg*/); + } + //ccb_target_gen_emit("mov rax, rcx"); + } + else { + ccb_target_gen_emit("mov %d(%%%s), %%%s", offset, base, reg); + if (var->sign && var->size == 4) { + /* YASM seems to support most/all GNU syntax EXCEPT having a compatible movsxd variant of movsx. + * TODO: Check if there's some better way than this. */ + ccb_target_gen_emit(".byte 0x48"); + ccb_target_gen_emit(".byte 0x63"); + ccb_target_gen_emit(".byte 0xC1"); + /*if (getenv("USING_YASM") != NULL) { + ccb_target_gen_emit("movsxd dword %%%s, %%rax", reg); + } else { + ccb_target_gen_emit("movsxd %%%s, %%rax", reg); + }*/ + } else if (var->sign && var->size < 8) { + ccb_target_gen_emit("movsx %%%s, %%rax", reg); + } /*else if (var->size < 8) { + ccb_target_gen_emit("movzx %%%s, %%rax", reg); + }*/ else { + ccb_target_gen_emit("mov %%%s, %%rax", "rcx"/*reg*/); + } + //ccb_target_gen_emit("mov %%rcx, %%rax"); + } + } + } +} + +static void ccb_target_gen_rv_addi(ccb_t* ccb, char* destr, char* srcr, int offset) { + /*if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("addrc %s, %d", destr, offset); + }*/ + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + /* For RISC-V, the range of an addi instruction is limited to inbetween -2048 and 2047 (inclusive). + * The best approach would be to use a scratch register, but since this might be used in critical + * places where other register values are important (and because register usage isn't tracked entirely) + * a fallback of simply doing multiple additions is used. + * + * A warning is printed in these cases, so that you'll know if a structure access is outside of the + * comfortable range. + */ + if (offset < -2048) { + ccb_compile_warn(ccb, "Hard-coding a long subtraction (addi of %d) for register %s (this generally means a structure or function stack frame is uncomfortably large)", offset, destr); + ccb_target_gen_rv_addi(ccb, destr, srcr, -2048); + ccb_target_gen_rv_addi(ccb, destr, destr, offset + 2048); + } else if (offset > 2047) { + ccb_compile_warn(ccb, "Hard-coding a long addition (addi of %d) for register %s (this generally means a structure or function stack frame is uncomfortably large)", offset, destr); + ccb_target_gen_rv_addi(ccb, destr, srcr, 2047); + ccb_target_gen_rv_addi(ccb, destr, destr, offset - 2047); + } else { + ccb_target_gen_emit("addi %s, %s, %d", destr, srcr, offset); + } + } else { + ccb_compile_error(ccb, "TODO: Internal error, no long _addi support for non-RISC-V targets"); + } + + /* + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("add %s, %d", destr, offset); + } + else { + ccb_target_gen_emit("add %d, %%%s", offset, destr); + }*/ +} + +static void ccb_target_gen_rv_loadstore(ccb_t* ccb, char* op, char* spec, char* r, char* baser, int offset, char* tmpr) { + /*if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("addrc %s, %d", destr, offset); + }*/ + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + /* For RISC-V, the range of an addi instruction is limited to inbetween -2048 and 2047 (inclusive). + * The best approach would be to use a scratch register, but since this might be used in critical + * places where other register values are important (and because register usage isn't tracked entirely) + * a fallback of simply doing multiple additions is used. + * + * A warning is printed in these cases, so that you'll know if a structure access is outside of the + * comfortable range. + */ + if (offset < -2048 || offset > 2047) { + ccb_compile_warn(ccb, "Using temporary register %s for long load/store op", tmpr); + ccb_target_gen_rv_addi(ccb, tmpr, baser, offset); + ccb_target_gen_rv_loadstore(ccb, op, spec, r, tmpr, 0, NULL); + } else { + ccb_target_gen_emit("%s%s %s, %d(%s)", op, spec, r, offset, baser); + } + } else { + ccb_compile_error(ccb, "TODO: Internal error, no long _addi support for non-RISC-V targets"); + } + + /* + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("add %s, %d", destr, offset); + } + else { + ccb_target_gen_emit("add %d, %%%s", offset, destr); + }*/ +} + +static void ccb_target_gen_rv_load(ccb_t* ccb, char* spec, char* r, char* baser, int offset, char* tmpr) { + ccb_target_gen_rv_loadstore(ccb, "l", spec, r, baser, offset, tmpr); +} + +static void ccb_target_gen_rv_store(ccb_t* ccb, char* spec, char* r, char* baser, int offset, char* tmpr) { + ccb_target_gen_rv_loadstore(ccb, "s", spec, r, baser, offset, tmpr); +} + +static void ccb_target_gen_save_global(ccb_t* ccb, char* name, ccb_data_type_t* type, int offset) { + // TODO: Support for floating-point globals, or is this elsewhere? + const char* reg = ccb_target_gen_register_integer(ccb, type, 'a'); + if (offset) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrpcmr %s, %d, %s", name, offset, reg); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("addi t0, zero, %s", name); // TODO: lui? + ccb_target_gen_emit("la t0, %s", name); + const char* spec = ccb_target_gen_loadstorespec(ccb, type, true); + ccb_target_gen_emit("s%s %s, %d(%s)", spec, reg, offset, "t0"); + //ccb_target_gen_emit("sd %s, %d(t0)", reg, offset); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov [%s + %d], %s", name, offset, reg); + } + else { + ccb_target_gen_emit("mov %%%s, %s+%d(%%rip)", reg, name, offset); + } + } + else { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setcmr %s, %s", name, reg); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("addi t0, zero, %s", name); // TODO: lui? + ccb_target_gen_emit("la t0, %s", name); // TODO: lui? + const char* spec = ccb_target_gen_loadstorespec(ccb, type, true); + if (type->type == CCB_TYPE_FLOAT || type->type == CCB_TYPE_DOUBLE || type->type == CCB_TYPE_LDOUBLE) { + ccb_target_gen_emit("fs%s %s, %d(%s)", spec, "fa0", 0, "t0"); + } else { + ccb_target_gen_emit("s%s %s, %d(%s)", spec, reg, 0, "t0"); + //ccb_target_gen_emit("sd %s, 0(t0)", reg); + } + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov [%s], %s", name, reg); + } + else { + ccb_target_gen_emit("mov %%%s, %s(%%rip)", reg, name); + } + } +} + +static void ccb_target_gen_save_inner(ccb_t* ccb, ccb_data_type_t* type, int offset, const char* treg, const char* areg) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrpmr %s, %s, %d", treg, areg, offset); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + const char* spec = ccb_target_gen_loadstorespec(ccb, type, true); + //ccb_target_gen_emit("s%s %s, %d(%s)", spec, treg, offset, areg); + ccb_target_gen_rv_store(ccb, spec, treg, areg, offset, "t1"); + /*if (type->type == CCB_TYPE_INT) { + ccb_target_gen_emit("sw %s, %d(%s)", treg, offset, areg); + } else { + ccb_target_gen_emit("sd %s, %d(%s)", treg, offset, areg); + }*/ + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov [%s + %d], %s", areg, offset, treg); + } + else { + ccb_target_gen_emit("mov %%%s, %d(%%%s)", treg, offset, areg); + } +} + +static int ccb_target_gen_nextwd2r0(ccb_t* ccb, ccb_data_type_t* type, int* countvar) { + if (countvar[0] < type->size) { + if (type->size <= ccb_target_wordbytes(ccb)*2) { + if (countvar[0] == 0) { + // No action, first word is already in r0 + } else { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("addi %s, %s, 0", ccb_target_r0(ccb), ccb_target_r1(ccb)); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov %s, %s", ccb_target_r0(ccb), ccb_target_r1(ccb)); + } + else { + ccb_target_gen_emit("mov %%%s, %%%s", ccb_target_r1(ccb), ccb_target_r0(ccb)); + } + } + } else { + ccb_target_gen_pop(ccb_target_r0(ccb)); + } + countvar[0] += ccb_target_wordbytes(ccb); + // TODO: Update this to return number of bytes to write instead? + return 1; + } else { + return 0; + } +} + +static void ccb_target_gen_save_dolongstore(ccb_t* ccb, ccb_data_type_t* type, const char* base, int offset, int strict) { + // TODO: This will overwrite some bytes past the end of non-word-aligned struct sizes, this probably needs to be fixed + // for some cases (i.e. if it's within another struct) but is less important for local/global variables and parameters + // (which are mostly treated as locals) since those should all be word-aligned anyway. If I was pedantic I'd check + // whether it's faster or slower to read a whole word versus 1-7 bytes, and my guess is that it'd depend on the + // number of bytes and how many instructions it would otherwise take to read/write them, but reading/writing a whole word + // is simpler in cases where it does work. + int countvar = 0; + while (ccb_target_gen_nextwd2r0(ccb, type, &countvar)) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrpcmr %s, %d, %s", base, offset, ccb_target_gen_register_integer(ccb, type, 'a')); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("sd %s, %d(fp)", ccb_target_gen_register_integer(ccb, type, 'a'), offset); + ccb_target_gen_emit("sd %s, %d(%s)", ccb_target_r0(ccb), offset+countvar-ccb_target_wordbytes(ccb), base); + // TODO: Work out why it didn't work this way... + //ccb_target_gen_save_inner(ccb, ccb_ast_data_table[CCB_TYPE_LONG], offset+countvar-ccb_target_wordbytes(ccb), ccb_target_r0(ccb), "x8"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov [%s + %d], %s", base, offset+countvar-ccb_target_wordbytes(ccb), ccb_target_r0(ccb)); + } + else { + ccb_target_gen_emit("mov %%%s, %d(%%%s)", ccb_target_r0(ccb), offset+countvar-ccb_target_wordbytes(ccb), base); + } + } +} + +static void ccb_target_gen_save_local(ccb_t* ccb, ccb_data_type_t* type, int offset) { + if (type->type == CCB_TYPE_FLOAT) { + ccb_target_gen_push_xmm(0); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrpcmfx32 r5, %d, f0", offset); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("fcvt.s.d ft0, fa0"); + //ccb_target_gen_emit("fsw ft0, %d(fp)", offset); + ccb_target_gen_emit("fsw ft0, %d(x8)", offset); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("unpcklpd xmm0, xmm0"); + ccb_target_gen_emit("cvtpd2ps xmm0, xmm0"); + ccb_target_gen_emit("movss [rbp + %d], xmm0", offset); + } + else { + ccb_target_gen_emit("unpcklpd %%xmm0, %%xmm0"); + ccb_target_gen_emit("cvtpd2ps %%xmm0, %%xmm0"); + ccb_target_gen_emit("movss %%xmm0, %d(%%rbp)", offset); + } + ccb_target_gen_pop_xmm(0); + } + else if (type->type == CCB_TYPE_DOUBLE || type->type == CCB_TYPE_LDOUBLE) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrpcmf r5, %d, f0", offset); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("fsd fa0, %d(fp)", offset); + ccb_target_gen_emit("fsd fa0, %d(x8)", offset); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("movsd [rbp + %d], xmm0", offset); + } + else { + ccb_target_gen_emit("movsd %%xmm0, %d(%%rbp)", offset); + } + } + /*else if (type->type == CCB_TYPE_INT) { // TODO: This needs to be cleaned up! + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrpcmr r5, %d, %s", offset, ccb_target_gen_register_integer(ccb, type, 'a')); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("sd %s, %d(fp)", ccb_target_gen_register_integer(ccb, type, 'a'), offset); + ccb_target_gen_emit("sw %s, %d(x8)", ccb_target_gen_register_integer(ccb, type, 'a'), offset); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov [rbp + %d], %s", offset, ccb_target_gen_register_integer(ccb, type, 'a')); + } + else { + ccb_target_gen_emit("mov %%%s, %d(%%rbp)", ccb_target_gen_register_integer(ccb, type, 'a'), offset); + } + }*/ + else if (type->size > ccb_target_wordbytes(ccb)) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_save_dolongstore(ccb, type, "x8", offset, 0); + } else { + ccb_target_gen_save_dolongstore(ccb, type, "rbp", offset, 0); + } + } else { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrpcmr r5, %d, %s", offset, ccb_target_gen_register_integer(ccb, type, 'a')); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("sd %s, %d(fp)", ccb_target_gen_register_integer(ccb, type, 'a'), offset); + //ccb_target_gen_emit("sd %s, %d(x8)", ccb_target_gen_register_integer(ccb, type, 'a'), offset); + ccb_target_gen_save_inner(ccb, type, offset, ccb_target_gen_register_integer(ccb, type, 'a'), "x8"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov [rbp + %d], %s", offset, ccb_target_gen_register_integer(ccb, type, 'a')); + } + else { + ccb_target_gen_emit("mov %%%s, %d(%%rbp)", ccb_target_gen_register_integer(ccb, type, 'a'), offset); + } + } +} + +static void ccb_target_gen_assignment_dereference_intermediate(ccb_t* ccb, ccb_data_type_t* type, int offset) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrrm r1, r4"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("ld a1, 0(sp)"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov rcx, [rsp]"); + } + else { + ccb_target_gen_emit("mov (%%rsp), %%rcx"); + } + + const char* reg = ccb_target_gen_register_integer(ccb, type, 'c'); + + if (offset) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrpcmr r0, %d, %s", offset, reg); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + // TODO: This part (and again below) needs to be cleaned up... + /*if (type->type == CCB_TYPE_INT) { + ccb_target_gen_emit("sw %s, %d(a0)", reg, offset); + } else { + ccb_target_gen_emit("sd %s, %d(a0)", reg, offset); + }*/ + ccb_target_gen_save_inner(ccb, type, offset, reg, ccb_target_gen_register_integer(ccb, type, 'a')); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov [rax + %d], %s", offset, reg); + } + else { + ccb_target_gen_emit("mov %%%s, %d(%%rax)", reg, offset); + } + } + else { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrmr r0, %s", reg); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + /*if (type->type == CCB_TYPE_INT) { + ccb_target_gen_emit("sw %s, 0(a0)", reg); + } else { + ccb_target_gen_emit("sd %s, 0(a0)", reg); + }*/ + + ccb_target_gen_save_inner(ccb, type, 0, reg, ccb_target_gen_register_integer(ccb, type, 'a')); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov [rax], %s", reg); + } + else { + ccb_target_gen_emit("mov %%%s, (%%rax)", reg); + } + } + + ccb_target_gen_pop(ccb_target_r0(ccb)); +} + +static void ccb_target_gen_assignment_dereference(ccb_t* ccb, ccb_ast_t* var) { + ccb_target_gen_push(ccb_target_r0(ccb)); + ccb_target_gen_expression(ccb, var->unary.operand); + ccb_target_gen_assignment_dereference_intermediate(ccb, var->unary.operand->ctype->pointer, 0); +} + +static void ccb_target_gen_ensure_lva(ccb_t* ccb, ccb_ast_t* ast) { + if (ast->variable.init) + ccb_target_gen_declaration_initialization(ccb, ast->variable.init, ast->variable.off); + ast->variable.init = NULL; +} + +static void ccb_target_gen_pointer_arithmetic(ccb_t* ccb, char op, ccb_ast_t* left, ccb_ast_t* right) { + ccb_target_gen_expression(ccb, left); + ccb_target_gen_push(ccb_target_r0(ccb)); + ccb_target_gen_expression(ccb, right); + + int size = left->ctype->pointer->size; + if (size > 1) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("mulrc r0, %d", size); + } + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + if (size == 2 || size == 4 || size == 8) { + ccb_target_gen_emit("slli a0, a0, %d", (size == 2) ? 1 : ((size == 4) ? 2 : 3)); + } + else { + //ccb_target_gen_emit("addi a1, zero, %d", size); + ccb_target_gen_emit("li a1, %d", size); + ccb_target_gen_emit("mul a0, a0, a1"); + } + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("imul rax, %d", size); + } + else { + ccb_target_gen_emit("imul $%d, %%rax", size); + } + } + + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrr r1, r0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("addi a1, a0, 0"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov rcx, rax"); + } + else { + ccb_target_gen_emit("mov %%rax, %%rcx"); + } + + ccb_target_gen_pop(ccb_target_r0(ccb)); + + if (op == '-') { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("subrr r0, r1"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("sub a0, a0, a1"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("sub rax, rcx"); + } + else { + ccb_target_gen_emit("sub %%rcx, %%rax"); + } + } else if (op == '+') { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("addrr r0, r1"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("add a0, a0, a1"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("add rax, rcx"); + } + else { + ccb_target_gen_emit("add %%rcx, %%rax"); + } + } else { + ccb_compile_error(ccb, "Unhandled pointer op '%c'\n", op); + } +} + +static void ccb_target_gen_assignment_structure(ccb_t* ccb, ccb_ast_t* structure, ccb_data_type_t* field, int offset) { + switch (structure->type) { + case CCB_AST_TYPE_VAR_LOCAL: + ccb_target_gen_ensure_lva(ccb, structure); + ccb_target_gen_save_local(ccb, field, structure->variable.off + field->offset + offset); + break; + + case CCB_AST_TYPE_VAR_GLOBAL: + ccb_target_gen_save_global(ccb, structure->variable.label, field, field->offset + offset); + break; + + case CCB_AST_TYPE_STRUCT: + ccb_target_gen_assignment_structure(ccb, structure->structure, field, offset + structure->ctype->offset); + break; + + case CCB_AST_TYPE_DEREFERENCE: + ccb_target_gen_push(ccb_target_r0(ccb)); + ccb_target_gen_expression(ccb, structure->unary.operand); + ccb_target_gen_assignment_dereference_intermediate(ccb, field, field->offset + offset); + break; + + default: + ccb_compile_error(ccb, "Internal error: gen_assignment_structure"); + break; + } +} + +static void ccb_target_gen_load_structure(ccb_t* ccb, ccb_ast_t* structure, ccb_data_type_t* field, int offset, int pointeronly) { + switch (structure->type) { + case CCB_AST_TYPE_VAR_LOCAL: + ccb_target_gen_ensure_lva(ccb, structure); + ccb_target_gen_load_local(ccb, field, ccb_target_bp(ccb), structure->variable.off + field->offset + offset); + break; + case CCB_AST_TYPE_VAR_GLOBAL: + ccb_target_gen_load_global(ccb, field, structure->variable.label, field->offset + offset); + break; + case CCB_AST_TYPE_STRUCT: + ccb_target_gen_load_structure(ccb, structure->structure, field, structure->ctype->offset + offset, pointeronly); + break; + case CCB_AST_TYPE_DEREFERENCE: + ccb_target_gen_expression(ccb, structure->unary.operand); + if (pointeronly) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("addrc %s, %d", ccb_target_r0(ccb), field->offset + offset); + } + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("addi %s, %s, %d", ccb_target_r0(ccb), ccb_target_r0(ccb), field->offset + offset); + ccb_target_gen_rv_addi(ccb, ccb_target_r0(ccb), ccb_target_r0(ccb), field->offset + offset); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("add %s, %d", ccb_target_r0(ccb), field->offset + offset); + } + else { + ccb_target_gen_emit("add %d, %%%s", field->offset + offset, ccb_target_r0(ccb)); + } + } else { + ccb_target_gen_load_local(ccb, field, ccb_target_r0(ccb), field->offset + offset); + } + + break; + default: + ccb_compile_error(ccb, "Internal error: gen_assignment_structure"); + break; + } +} + +static void ccb_target_gen_assignment(ccb_t* ccb, ccb_ast_t* var) { + switch (var->type) { + case CCB_AST_TYPE_DEREFERENCE: + ccb_target_gen_assignment_dereference(ccb, var); + break; + case CCB_AST_TYPE_STRUCT: + ccb_target_gen_assignment_structure(ccb, var->structure, var->ctype, 0); + break; + case CCB_AST_TYPE_VAR_LOCAL: + ccb_target_gen_ensure_lva(ccb, var); + ccb_target_gen_save_local(ccb, var->ctype, var->variable.off); + break; + case CCB_AST_TYPE_VAR_GLOBAL: + ccb_target_gen_save_global(ccb, var->variable.label, var->ctype, 0); + break; + default: + ccb_compile_error(ccb, "Internal error: gen_assignment"); + } +} + +static void ccb_target_gen_comparision(ccb_t* ccb, char* operation, ccb_ast_t* ast) { + if (ccb_ast_type_floating(ccb, ast->left->ctype) || ccb_ast_type_floating(ccb, ast->right->ctype)) { + ccb_target_gen_expression(ccb, ast->left); + ccb_target_gen_cast_float(ccb, ast->left->ctype); + ccb_target_gen_push_xmm(0); + ccb_target_gen_expression(ccb, ast->right); + ccb_target_gen_cast_float(ccb, ast->right->ctype); + ccb_target_gen_pop_xmm(1); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("compff f1, f0 ; TODO: Right way around?"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + if (ccb_strcasecmp(operation, "setl") == 0) { + ccb_target_gen_emit("flt.d a0, fa1, fa0"); + } + else if (ccb_strcasecmp(operation, "setg") == 0) { + ccb_target_gen_emit("flt.d a0, fa0, fa1"); + } + else if (ccb_strcasecmp(operation, "setle") == 0) { + ccb_target_gen_emit("fle.d a0, fa1, fa0"); + } + else if (ccb_strcasecmp(operation, "setge") == 0) { + ccb_target_gen_emit("fle.d a0, fa0, fa1"); + } + else if (ccb_strcasecmp(operation, "sete") == 0) { + ccb_target_gen_emit("feq.d a0, fa0, fa1"); + } + else if (ccb_strcasecmp(operation, "setne") == 0) { + ccb_target_gen_emit("feq.d a0, fa0, fa1"); + ccb_target_gen_emit("xori a0, a0, 1"); + } + else { + ccb_compile_error(ccb, "Bad internal comparison operator"); + } + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("ucomisd xmm1, xmm0"); + } + else { + ccb_target_gen_emit("ucomisd %%xmm0, %%xmm1"); + } + } + else { + ccb_target_gen_expression(ccb, ast->left); + ccb_target_gen_cast_int(ccb, NULL, ast->left->ctype); + ccb_target_gen_push(ccb_target_r0(ccb)); + ccb_target_gen_expression(ccb, ast->right); + ccb_target_gen_cast_int(ccb, NULL, ast->right->ctype); + ccb_target_gen_pop(ccb_target_r1(ccb)); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("comprr r1, r0 ; TODO: Right way around?"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + if (ccb_strcasecmp(operation, "setl") == 0) { + ccb_target_gen_emit("slt a0, a1, a0"); + } else if (ccb_strcasecmp(operation, "setg") == 0) { + ccb_target_gen_emit("slt a0, a0, a1"); + } else if (ccb_strcasecmp(operation, "setle") == 0) { + ccb_target_gen_emit("slt a0, a0, a1"); + ccb_target_gen_emit("xori a0, a0, 1"); // XXX TODO PRObLEMATIC... + } else if (ccb_strcasecmp(operation, "setge") == 0) { + ccb_target_gen_emit("slt a0, a1, a0"); + ccb_target_gen_emit("xori a0, a0, 1"); + } else if (ccb_strcasecmp(operation, "sete") == 0) { + ccb_target_gen_emit("xor a0, a0, a1"); + ccb_target_gen_emit("seqz a0, a0"); + } else if (ccb_strcasecmp(operation, "setne") == 0) { + ccb_target_gen_emit("xor a0, a0, a1"); + ccb_target_gen_emit("snez a0, a0"); + } else { + ccb_compile_error(ccb, "Bad internal comparison operator"); + } + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("cmp rcx, rax ; TODO: Right way around?"); + } + else { + ccb_target_gen_emit("cmp %%rax, %%rcx"); + } + } + + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("%s r0", operation); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + // No cleanup needed + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("%s al", operation); + ccb_target_gen_emit("movzx eax, al ; TODO: Right instruction?"); + } + else { + ccb_target_gen_emit("%s %%al", operation); + ccb_target_gen_emit("movzx %%al, %%eax"); + //TODO: Check if movzx & movzb are the same: ccb_target_gen_emit("movzb %%al, %%eax"); + } +} + +static void ccb_target_gen_binary_arithmetic_integer(ccb_t* ccb, ccb_ast_t* ast) { + char* op; + switch (ast->type) { + case '+': op = (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC ? "addrr" : "add"); break; + case '-': op = (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC ? "subrr" : "sub"); break; + case '*': op = (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC ? "mulrr" : (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV ? "mul" : "imul")); break; + case '^': op = (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC ? "xorrr" : "xor"); break; + case CCB_AST_TYPE_LSHIFT: op = (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC ? "shlrr" : (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV ? /*"sla"*/ "sll" : "sal")); break; + case CCB_AST_TYPE_RSHIFT: op = (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC ? "shrrr" : (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV ? (ast->ctype->sign ? "sra" : "srl") :"sar")); break; // TODO: Unsigned variant? + case '/': + case '%': + op = NULL; // Avoid uninitialised warning/error + break; + default: + ccb_compile_error(ccb, "Internal error: gen_binary"); + return; // Seems intended. -Zak + break; + } + + ccb_target_gen_expression(ccb, ast->left); + ccb_target_gen_cast_int(ccb, NULL, ast->left->ctype); + ccb_target_gen_push(ccb_target_r0(ccb)); + ccb_target_gen_expression(ccb, ast->right); + ccb_target_gen_cast_int(ccb, NULL, ast->right->ctype); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrr r1, r0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("addimm $r1, $r0, 0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("addi a1, a0, 0"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov rcx, rax"); + } + else { + ccb_target_gen_emit("mov %%rax, %%rcx"); + } + ccb_target_gen_pop(ccb_target_r0(ccb)); + + if (ast->type == '/' || ast->type == '%') { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("signx64x32 r0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("sext.w a0, a0"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("cqo"); + } + else { + ccb_target_gen_emit("cqto"); + } + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("div r0, r2, r1"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("%s a0, a0, a1", ast->type == '%' ? "rem" : "div"); + //if (ast->type != '%'){ + ccb_target_gen_emit("sext.w a0, a0"); + //} + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("idiv rcx"); + } + else { + ccb_target_gen_emit("idiv %%rcx"); + } + if (ast->type == '%') { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrr r0, r2"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + // Handled above + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov eax, edx"); + } + else { + ccb_target_gen_emit("mov %%edx, %%eax"); + } + } + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && (ast->type == CCB_AST_TYPE_LSHIFT || ast->type == CCB_AST_TYPE_RSHIFT)) { + if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("%s rax, cl", op); + } + else { + ccb_target_gen_emit("%s %%cl, %%rax", op); + } + } + else { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("%s r0, r1", op); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("%s $r0, $r0, $r1", op); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("%s a0, a0, a1", op); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("%s rax, rcx", op); + } + else { + ccb_target_gen_emit("%s %%rcx, %%rax", op); + } + } +} + +static void ccb_target_gen_binary_arithmetic_floating(ccb_t* ccb, ccb_ast_t* ast) { + char* op; + switch (ast->type) { + case '+': op = (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC ? "addff" : (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV ? "fadd.d" : "addsd")); break; + case '-': op = (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC ? "subff" : (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV ? "fsub.d" : "subsd")); break; + case '*': op = (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC ? "mulff" : (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV ? "fmul.d" : "mulsd")); break; + case '/': op = (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC ? "divff" : (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV ? "fdiv.d" : "divsd")); break; + default: + ccb_compile_error(ccb, "Internal error: gen_binary"); + return; // XXX Seems intended. -Zak + break; + } + + ccb_target_gen_expression(ccb, ast->left); + ccb_target_gen_cast_float(ccb, ast->left->ctype); + ccb_target_gen_push_xmm(0); + ccb_target_gen_expression(ccb, ast->right); + ccb_target_gen_cast_float(ccb, ast->right->ctype); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("movff fa1, fa0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("fmv.d fa1, fa0"); // NOTE: It's very easy to get f0/f1 and fa0/fa1 mixed up - this previously prevented arithmetic from working! + } + else { + ccb_target_gen_emit("movsd %%xmm0, %%xmm1"); // TODO: Is this wrong order for FASM? + } + ccb_target_gen_pop_xmm(0); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("%s f0, f1", op); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("%s fa0, fa0, fa1", op); + } + else { + ccb_target_gen_emit("%s %%xmm1, %%xmm0", op); // TODO: Is this wrong order for FASM? (Or is it corrected by other reversals?) + } +} + +static void ccb_target_gen_load(ccb_t* ccb, ccb_data_type_t* to, ccb_data_type_t* from) { + if (ccb_ast_type_floating(ccb, to)) + ccb_target_gen_cast_float(ccb, from); + else + ccb_target_gen_cast_int(ccb, to, from); +} + +static void ccb_target_gen_save(ccb_t* ccb, ccb_data_type_t* to, ccb_data_type_t* from) { + if (ccb_ast_type_integer(ccb, from) && to->type == CCB_TYPE_FLOAT) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("floatrx32 f0, r0"); // Float-of-register-x32bit + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("fcvt.s.i fa0, a0"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("cvtsi2ss xmm0, eax"); + } + else { + ccb_target_gen_emit("cvtsi2ss %%eax, %%xmm0"); + } + } + else if (ccb_ast_type_floating(ccb, from) && to->type == CCB_TYPE_FLOAT) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("floatsfd f0, f0"); // Float-single-of-float-double + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("fcvt.s.d fa0, fa0"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("cvtpd2ps xmm0, xmm0"); + } + else { + ccb_target_gen_emit("cvtpd2ps %%xmm0, %%xmm0"); + } + } + else if (ccb_ast_type_integer(ccb, from) && (to->type == CCB_TYPE_DOUBLE || to->type == CCB_TYPE_LDOUBLE)) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("floatr f0, r0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("fcvt.d.l fa0, a0"); // NOTE: .i on 32-bit? + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("cvtsi2sd xmm0, eax"); + } + else { + ccb_target_gen_emit("cvtsi2sd %%eax, %%xmm0"); + } + } + else if (!(ccb_ast_type_floating(ccb, from) && (to->type == CCB_TYPE_DOUBLE || to->type == CCB_TYPE_LDOUBLE))) { + ccb_target_gen_load(ccb, to, from); // TODO: I'm hoping this is a reasonable code-path as long as to/from ordering is known? + } +} + +static void ccb_target_gen_binary(ccb_t* ccb, ccb_ast_t* ast) { + if (ast->ctype->type == CCB_TYPE_POINTER) { + ccb_target_gen_pointer_arithmetic(ccb, ast->type, ast->left, ast->right); + return; + } + + switch (ast->type) { // TODO: Unsigned variants + case '<': ccb_target_gen_comparision(ccb, (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC ? "setrflaglt" : "setl"), ast); return; + case '>': ccb_target_gen_comparision(ccb, (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC ? "setrflaggt" : "setg"), ast); return; + case CCB_AST_TYPE_EQUAL: ccb_target_gen_comparision(ccb, (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC ? "setrflaget" : "sete"), ast); return; + case CCB_AST_TYPE_GEQUAL: ccb_target_gen_comparision(ccb, (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC ? "setrflagge" : "setge"), ast); return; + case CCB_AST_TYPE_LEQUAL: ccb_target_gen_comparision(ccb, (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC ? "setrflagle" : "setle"), ast); return; + case CCB_AST_TYPE_NEQUAL: ccb_target_gen_comparision(ccb, (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC ? "setrflagne" : "setne"), ast); return; + } + + if (ccb_ast_type_integer(ccb, ast->ctype)) + ccb_target_gen_binary_arithmetic_integer(ccb, ast); + else if (ccb_ast_type_floating(ccb, ast->ctype)) + ccb_target_gen_binary_arithmetic_floating(ccb, ast); + else + ccb_compile_error(ccb, "Internal error in ccb_target_gen_binary, type=%d", ast->ctype->type); +} + +static void ccb_target_gen_literal_save(ccb_t* ccb, ccb_ast_t* ast, ccb_data_type_t* type, int offset) { + switch (type->type) { + case CCB_TYPE_CHAR: + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrpcmcx8 r5, %d, %d", offset, ast->integer); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("addi t0, zero, %d", ast->integer); + ccb_target_gen_emit("li t0, %d", ast->integer); + //ccb_target_gen_emit("sb t0, %d(fp)", offset); + ccb_target_gen_emit("sb t0, %d(x8)", offset); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov byte [rbp + %d], %d", offset, ast->integer); + } + else { + ccb_target_gen_emit("movb $%d, %d(%%rbp)", ast->integer, offset); + } + break; + + case CCB_TYPE_SHORT: + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrpcmcx16 r5, %d, %d", offset, ast->integer); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("addi t0, zero, %d", ast->integer); + ccb_target_gen_emit("li t0, %d", ast->integer); + //ccb_target_gen_emit("sh t0, %d(fp)", offset); + ccb_target_gen_emit("sh t0, %d(x8)", offset); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov word [rbp + %d], %d", offset, ast->integer); + } + else { + ccb_target_gen_emit("movw $%d, %d(%%rbp)", ast->integer, offset); + } + break; + + case CCB_TYPE_INT: + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrpcmcx32 r5, %d, %d", offset, ast->integer); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("addi t0, zero, %d", ast->integer); + ccb_target_gen_emit("li t0, %d", ast->integer); + //ccb_target_gen_emit("sw t0, %d(fp)", offset); + ccb_target_gen_emit("sw t0, %d(x8)", offset); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov dword [rbp + %d], %d", offset, ast->integer); + } + else { + ccb_target_gen_emit("movl $%d, %d(%%rbp)", ast->integer, offset); + } + break; + + case CCB_TYPE_LONG: + case CCB_TYPE_LLONG: + case CCB_TYPE_POINTER: + ccb_target_gen_push(ccb_target_r0(ccb)); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + unsigned long long tmp = ast->integer; /* TODO: Casts in function calls? */ + ccb_target_gen_emit("setrc r0, %llu", tmp /*(unsigned long long)ast->integer*/); + ccb_target_gen_emit("setrpcmr r5, %d, r0", offset); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + unsigned long long tmp = ast->integer; /* TODO: Casts in function calls? */ + //ccb_target_gen_emit("addi t0, zero, %d", tmp); // TODO: Handle larger literals! + ccb_target_gen_emit("li t0, %d", tmp); // TODO: Handle larger literals! + //ccb_target_gen_emit("sd t0, %d(fp)", offset); + ccb_target_gen_emit("sd t0, %d(x8)", offset); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + unsigned long long tmp = ast->integer; /* TODO: Casts in function calls? */ + ccb_target_gen_emit("mov qword rax, %llu", tmp); + ccb_target_gen_emit("mov qword [rbp + %d], rax", offset); + } + else { + unsigned long long tmp = ast->integer; /* TODO: Casts in function calls? */ + ccb_target_gen_emit("movq $%llu, %%rax", tmp); + ccb_target_gen_emit("movq %%rax, %d(%%rbp)", offset); + } + ccb_target_gen_pop(ccb_target_r0(ccb)); + break; + + case CCB_TYPE_FLOAT: + case CCB_TYPE_DOUBLE: + ccb_target_gen_push(ccb_target_r0(ccb)); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + double tmp = ast->floating.value; // TODO: Allow getting address of struct member + ccb_target_gen_emit("setrc rax, %llu", *((unsigned long long*) &tmp)); + ccb_target_gen_emit("setrpcmr r5, %d, r0", offset); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + double tmp = ast->floating.value; // TODO: Allow getting address of struct member + ccb_target_gen_emit("li a0, %llu", *((unsigned long long*) & tmp)); + ccb_target_gen_emit("sd a0,%d(x8)", offset); + //ccb_target_gen_emit("fmv.d.x fa0, a0"); + + //ccb_target_gen_emit("lui a0,%%hi(%s)", ast->floating.label); + //ccb_target_gen_emit("fld fa0,%%lo(%s)(a0)", ast->floating.label); + //ccb_target_gen_emit("fsd fa0,%d(x8)", offset); + /* + lui a5,%hi(.LC0) + fld fa5,%lo(.LC0)(a5) + fsd fa5,-24(s0) + */ + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + double tmp = ast->floating.value; // TODO: Allow getting address of struct member + ccb_target_gen_emit("mov rax, %llu", *((unsigned long long*) & tmp)); + ccb_target_gen_emit("mov [rbp + %d], rax", offset); + } + else { + double tmp = ast->floating.value; // TODO: Allow getting address of struct member + ccb_target_gen_emit("movq $%llu, %%rax", *((unsigned long long*) & tmp)); + ccb_target_gen_emit("movq %%rax, %d(%%rbp)", offset); + } + ccb_target_gen_pop(ccb_target_r0(ccb)); + break; + + default: + ccb_compile_error(ccb, "codegen internal error in %s", __func__); + } +} + +static void ccb_target_gen_declaration_initialization(ccb_t* ccb, ccb_list_t* init, int offset) { + for (ccb_list_iterator_t* it = ccb_list_iterator(init); !ccb_list_iterator_end(it); ) { + ccb_ast_t* node = ccb_list_iterator_next(it); + if (node->init.value->type == CCB_AST_TYPE_LITERAL) + ccb_target_gen_literal_save(ccb, node->init.value, node->init.type, node->init.offset + offset); + else { + ccb_target_gen_expression(ccb, node->init.value); + ccb_target_gen_save_local(ccb, node->init.type, node->init.offset + offset); + } + } +} + +/* Production of prefix/postfix ++ and -- operators requires a little care to find the correct size of the increment. + * If the target type is a pointer, then it should be incremented/decremented by the element size. + */ +static int ccb_target_incrsize(ccb_t* ccb, ccb_ast_t* ast) { + switch (ast->ctype->type) { + case CCB_TYPE_ARRAY: // TODO: Should this be here? + case CCB_TYPE_POINTER: + return ast->ctype->pointer->size; + default: + return 1; + } +} + +static void ccb_target_gen_emit_prefix(ccb_t* ccb, ccb_ast_t* ast, const char* op) { + // Note: The "right-hand-side" is put in "left". This might be changed later. + //fprintf(stderr, "NOTE: prefix left is type %d, incrsize is %d\n", ast->left->type, ccb_target_incrsize(ccb, ast->left)); + int size = ccb_target_incrsize(ccb, ast->left); //1; //, ast->right->ctype->size + ccb_target_gen_expression(ccb, ast->unary.operand); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("%src r0, %d", op, size); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + if (ccb_strcasecmp(op, "sub") == 0) { + //ccb_target_gen_emit("addi a0, a0, -%d", size); + ccb_target_gen_rv_addi(ccb, "a0", "a0", -size); + } + else { + //ccb_target_gen_emit("addi a0, a0, %d", size); + ccb_target_gen_rv_addi(ccb, "a0", "a0", size); + } + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("%s rax, %d", op, size); + } + else { + ccb_target_gen_emit("%s $%d, %%rax", op, size); + } + ccb_target_gen_assignment(ccb, ast->unary.operand); +} + +static void ccb_target_gen_emit_postfix(ccb_t* ccb, ccb_ast_t* ast, const char* op) { + //fprintf(stderr, "NOTE: postfix left is type %d, incrsize is %d\n", ast->left->type, ccb_target_incrsize(ccb, ast->left)); + int size = ccb_target_incrsize(ccb, ast->left); + ccb_target_gen_expression(ccb, ast->unary.operand); + ccb_target_gen_push(ccb_target_r0(ccb)); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("%src r0, %d", op, size); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + if (ccb_strcasecmp(op, "sub") == 0) { + //ccb_target_gen_emit("addi a0, a0, -%d", size); + ccb_target_gen_rv_addi(ccb, "a0", "a0", -size); + } + else { + //ccb_target_gen_emit("addi a0, a0, %d", size); + ccb_target_gen_rv_addi(ccb, "a0", "a0", size); + } + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("%s rax, %d", op, size); + } + else { + ccb_target_gen_emit("%s $%d, %%rax", op, size); + } + ccb_target_gen_assignment(ccb, ast->unary.operand); + ccb_target_gen_pop(ccb_target_r0(ccb)); +} + +static ccb_list_t* ccb_target_gen_function_argument_types(ccb_t* ccb, ccb_ast_t* ast) { + ccb_list_t* list = ccb_list_create(); + ccb_list_iterator_t* jt = ccb_list_iterator(ast->function.call.paramtypes); // TODO: Compiler doesn't support multiple initialisers in for statements properly! + for (ccb_list_iterator_t* it = ccb_list_iterator(ast->function.call.args); !ccb_list_iterator_end(it); ) { + //fprintf(stderr, "Got list iterators @%lx, %lx\n", it, jt); + ccb_ast_t* value = ccb_list_iterator_next(it); + //fprintf(stderr, "Got value @%lx\n", value); + ccb_data_type_t* type = ccb_list_iterator_next(jt); + //fprintf(stderr, "Got type @%lx\n", type); + + ccb_list_push(list, type ? type : ccb_ast_result_type(ccb, '=', value->ctype, ccb_ast_data_table[CCB_AST_DATA_INT])); + } + return list; +} + +static void ccb_target_gen_je(ccb_t* ccb, const char* label) { + // TODO: This should be called ..gen_jz (jump-if-zero) + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("comprc r0, 0"); + ccb_target_gen_emit("jumpcifeq %s", label); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("addi t0, zero, 0"); + ccb_target_gen_emit("beq a0, zero, %s", label); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("test rax, rax"); + ccb_target_gen_emit("je %s", label); + } + else { + ccb_target_gen_emit("test %%rax, %%rax"); + ccb_target_gen_emit("je %s", label); + } +} + +static void ccb_target_gen_label_impl(ccb_t* ccb, int ln, const char* label) { + ccb_target_gen_emit("%s: %s from ln%d", label, (ccb_target_asmfmt(ccb)==CCB_TARGET_ASMFMT_FASM||ccb_target_asmfmt(ccb)==CCB_TARGET_ASMFMT_NASM)?";":"#", ln); +} + +// TODO/FIXME: This breaks some self-tests at present +//#define ccb_target_gen_label(ccb,lbl) ccb_target_gen_label_impl(ccb,__LINE__,lbl) +#define ccb_target_gen_label(ccb,lbl) ccb_target_gen_label_impl(ccb,0,lbl) + +static void ccb_target_gen_jmp_impl(ccb_t* ccb, int line, const char* label) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("jumpc %s", label); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("j %s", label); + } + else { + ccb_target_gen_emit("jmp %s %s from ln%d", label, (ccb_target_asmfmt(ccb)==CCB_TARGET_ASMFMT_FASM||ccb_target_asmfmt(ccb)==CCB_TARGET_ASMFMT_NASM)?";":"#", line); + } +} + +// TODO/FIXME: This breaks some self-tests at present +//#define ccb_target_gen_jmp(ccb,lbl) ccb_target_gen_jmp_impl(ccb,__LINE__,lbl) +#define ccb_target_gen_jmp(ccb,lbl) ccb_target_gen_jmp_impl(ccb,0,lbl) + +int tmpintctr;// TODO: Fails to self-compile: = 999; + +static long ccb_target_wordaligned(ccb_t* ccb, long s) { + while ((s%ccb_target_wordbytes(ccb)) != 0) { + s++; + } + return s; +} + +static void ccb_target_gen_statement(ccb_t* ccb, ccb_ast_t* ast) { + if (!ast) return; + ccb_target_gen_expression(ccb, ast); + + switch (ast->type) { + case CCB_AST_TYPE_STATEMENT_RETURN: + return; + default: break; + } + if (ast->ctype == NULL) { + return; + } else if (ast->ctype->size > ccb_target_wordbytes(ccb)*2) { + ccb_compile_warn(ccb, "Popping result..."); + long s = ast->ctype->size; + while (s > 0) { + //TODO... + ccb_target_gen_drop(); + s -= ccb_target_wordbytes(ccb); + } + } +} + +static void ccb_target_gen_expression(ccb_t* ccb, ccb_ast_t* ast) { + //printf("Processing expression type 0x100+%d\n", ast == NULL ? -1 : ast->type-0x100); + if (!ast) return; + + char* begin = NULL; + char* ne = NULL; + char* end = NULL; + char* step = NULL; + char* skip = NULL; + + int regi = 0, backi; + int regx = 0, backx; + + int nstackints = 0, istackints; + int nstackfloats = 0, istackfloats; + int nregints = 0, iregints; + int nregfloats = 0, iregfloats; + + ccb_list_t* argtypes; + int callconv = 0; + int largeresult = 0; + + switch (ast->type) { + case CCB_AST_TYPE_LITERAL: + switch (ast->ctype->type) { + case CCB_TYPE_CHAR: + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrc r0, %d", ast->integer); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("xor $r0, $r0, $r0"); + ccb_target_gen_emit("addimm $r0, $r0, %d", ast->integer); // TODO: Sizing + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("addi a0, zero, %d", ast->integer); // TODO: Sizing + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov rax, %d", ast->integer); + } + else { + ccb_target_gen_emit("mov $%d, %%rax", ast->integer); + } + break; + + case CCB_TYPE_INT: + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrc r0, %d", ast->integer); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("xor $r0, $r0, $r0"); + ccb_target_gen_emit("addimm $r0, $r0, %d", ast->integer); // TODO: Sizing + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("addi a0, zero, %d", ast->integer); // TODO: Sizing + //ccb_target_gen_emit("lui a0, %%hi(%d)", ast->integer); // TODO: Sizing + //ccb_target_gen_emit("addi a0, a0, %%lo(%d)", ast->integer); // TODO: Sizing + ccb_target_gen_emit("li a0, %d", ast->integer); // TODO: Sizing + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_ARM) { + ccb_target_gen_emit("ldr x0, =%d", ast->integer); // TODO: Sizing + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov rax, %d", ast->integer); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW) { + tmpintctr++; + ccb_target_gen_emit("section data"); + ccb_target_gen_emit("tmpint%d: data64 %d", tmpintctr, ast->integer); + ccb_target_gen_emit("section code"); + ccb_target_gen_emit("data8 0x48, 0x89, 0x04, 0x25 ; mov rax, qword ds:..."); + ccb_target_gen_emit("data32 tmpint%d", tmpintctr); + } + else { + ccb_target_gen_emit("mov $%d, %%rax", ast->integer); + } + break; + + case CCB_TYPE_LONG: + case CCB_TYPE_LLONG: + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + unsigned long long tmp = ast->integer; /* TODO: Casts in function calls? */ + ccb_target_gen_emit("setrc r0, %llu", tmp); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + unsigned long long tmp = ast->integer; /* TODO: Casts in function calls? */ + ccb_target_gen_emit("xor $r0, $r0, $r0"); + ccb_target_gen_emit("addimm $r0, $r0, %llu", tmp); // TODO: Sizing + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + unsigned long long tmp = ast->integer; /* TODO: Casts in function calls? */ + ccb_target_gen_emit("li a0, 0x%llx", tmp); // TODO: Sizing + /*ccb_target_gen_emit("li t0, 0x%x", (tmp>>32)&0xFFFFFFFFL); // TODO: Sizing + ccb_target_gen_emit("slli t0, t0, 32"); // TODO: Sizing + ccb_target_gen_emit("li a0, 0x%x", tmp&0xFFFFFFFFL); // TODO: Sizing + ccb_target_gen_emit("slli a0, a0, 32"); // TODO: Sizing + ccb_target_gen_emit("srli a0, a0, 32"); // TODO: Sizing + ccb_target_gen_emit("or a0, a0, t0"); // TODO: Sizing*/ + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + long long tmp = ast->integer; /* TODO: Casts in function calls? */ + ccb_target_gen_emit("mov rax, %lld", tmp); + } + else { + long long tmp = ast->integer; /* TODO: Casts in function calls? */ + ccb_target_gen_emit("mov $%lld, %%rax", tmp); + } + break; + + case CCB_TYPE_FLOAT: + case CCB_TYPE_DOUBLE: + case CCB_TYPE_LDOUBLE: + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setfcm f0, %s", ast->floating.label); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("todo"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("la t0, %s", ast->floating.label); + //ccb_target_gen_emit("lui t0, %%hi(%s)", ast->floating.label); + //ccb_target_gen_emit("addi t0, t0, %%lo(%s)", ast->floating.label); + ccb_target_gen_emit("fld fa0, 0(t0)"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("movsd xmm0, [%s]", ast->floating.label); + } + else { + ccb_target_gen_emit("movsd %s(%%rip), %%xmm0", ast->floating.label); + } + break; + + default: + ccb_compile_error(ccb, "Internal error: gen_expression"); + } + break; + + case CCB_AST_TYPE_STRING: + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrc r0, %s", ast->string.label); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("todo"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("lui a0, %%hi(%s)", ast->string.label); + //ccb_target_gen_emit("addi a0, a0, %%lo(%s)", ast->string.label); + ccb_target_gen_emit("la a0, %s", ast->string.label); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_ARM) { + ccb_target_gen_emit("ldr x0, =%s", ast->string.label); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("lea rax, [%s] ; TODO: Offset from RIP explicitly?", ast->string.label); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW) { + ccb_target_gen_emit("data8 0x48, 0x8D, 0x04, 0x25 ; lea rax, [...]"); + ccb_target_gen_emit("data32 %s ; Pointer to string", ast->string.label); + } + else { + ccb_target_gen_emit("lea %s(%%rip), %%rax", ast->string.label); + } + break; + + case CCB_AST_TYPE_VAR_LOCAL: + ccb_target_gen_ensure_lva(ccb, ast); + ccb_target_gen_load_local(ccb, ast->ctype, ccb_target_bp(ccb), ast->variable.off); + break; + case CCB_AST_TYPE_VAR_GLOBAL: + ccb_target_gen_load_global(ccb, ast->ctype, ast->variable.label, 0); + break; + + case CCB_AST_TYPE_PTRCALL: + // TODO: Test this again... + ccb_target_gen_push(ccb_target_r15(ccb)); + //ccb_target_gen_expression(ccb, ast->function.call.callable); + // was ccb_target_gen_push("$rF"); + //ccb_target_gen_push(ccb_target_r0(ccb)); + //ccb_target_gen_pop(ccb_target_r15(ccb)); + //was ccb_target_gen_pop("$rF"); + case CCB_AST_TYPE_CALL: + if (ast->function.callconv != 0) { + ccb_compile_warn(ccb, "Invoking function with non-standard calling convention %d", ast->function.callconv); + callconv = ast->function.callconv; + } else { + callconv = 0; + } + /* At least for "__classic_call", if the return type won't naturally fit in the return registers then + * a pointer to an area reserved for the result will have been inserted beneath the first regular argument. + * So we just need to increment the spare arguments offset by one word in that case! + * TODO: Store the offset of that argument somewhere to make sure other code doesn't get it wrong? + */ + if (callconv == 101 && ast->ctype->size > ccb_target_wordbytes(ccb)*2) { + ccb_compile_warn(ccb, "Invoking function with experimental large return value support!"); + largeresult = 1; + } else { + largeresult = 0; + } + + int resultstack = 0; + if (largeresult) { + int x; + // TODO: Initialise the register before pushing so it's not initialising it with junk! + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("TODO"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("TODO"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("addi %s, zero, 0", ccb_target_r0(ccb)); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov %s, 0", ccb_target_r0(ccb)); + } + else { + ccb_target_gen_emit("mov $0, %%%s", ccb_target_r0(ccb)); + } + for (x = 0; x < ast->ctype->size; x += ccb_target_wordbytes(ccb)) { + ccb_target_gen_push(ccb_target_r0(ccb)); + } + resultstack = ccb_target_gen_stack; + } + + argtypes = ccb_target_gen_function_argument_types(ccb, ast); + for (ccb_list_iterator_t* it = ccb_list_iterator(argtypes); !ccb_list_iterator_end(it); ) { + ccb_data_type_t* tmp = ccb_list_iterator_next(it); + if (ccb_ast_type_floating(ccb, tmp) && callconv != 101) { + if (regx > 0) ccb_target_gen_push_xmm(regx); + regx++; + nregfloats++; + } else if (ccb_ast_type_floating(ccb, tmp)) { + nstackints++; //nstackfloats++; NOTE: Seems to be easier to just track them as ints! + } else if (tmp->size > ccb_target_wordbytes(ccb)) { + long s = ccb_target_wordaligned(ccb, tmp->size); + nstackints += s/ccb_target_wordbytes(ccb); + } else { + if (callconv != 101 && regi < ccb_target_callregisters(ccb)) { + const char* re = ccb_target_callregister(ccb, regi++); + if (ccb_strcasecmp(re, ccb_target_r0(ccb)) == 0) { + // Avoid pushing/popping the result register! + } + else { + //XXX TODO: Something like this MIGHT be needed sometimes but otherwise complicates output: + //ccb_target_gen_push(re); + } + nregints++; + } else { + // This will go on the stack so it doesn't need a register! + nstackints++; + } + } + } + + // TODO: Consider nstackints/nstackfloats in stack alignment! + + int old_stack = ccb_target_gen_stack; + if (((ccb_target_gen_stack + ((nstackints + nstackfloats + largeresult) * 8)) % 16) != 0) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("subrc r4, 8"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("addimm $rsp, $rsp, -8"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("addi sp, sp, -8"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("sub rsp, 8"); + } + else { + ccb_target_gen_emit("sub $8, %%rsp"); + } + ccb_target_gen_stack += 8; + } + + + istackints = nstackints; + istackfloats = nstackfloats; + //fprintf(stderr, "test1\n"); + + ccb_list_iterator_t* itt = ccb_list_iterator(ccb_list_reverse(argtypes)); + for (ccb_list_iterator_t* it = ccb_list_iterator(ccb_list_reverse(ast->function.call.args)); !ccb_list_iterator_end(it); ) { + ccb_ast_t* v = ccb_list_iterator_next(it); + ccb_data_type_t* t = ccb_list_iterator_next(itt); + /*if (ccb_ast_type_floating(ccb, v)) { + //ccb_target_gen_pop_xmm(--backx); + } + else {*/ + if (istackints > 0) { + ccb_target_gen_expression(ccb, v); + + ccb_target_gen_save(ccb, /*ptype*/ /*v->ctype*/ t, v->ctype); + //ccb_target_gen_push_xmm(0); + if (ccb_ast_type_floating(ccb, v->ctype)) { // TODO: I think I'm using this function wrong... + istackints--; + fprintf(stderr, "Pushing 1 float\n"); + //ccb_target_gen_push(ccb_target_r0(ccb)); + ccb_target_gen_push_xmm(0); + } else if (t->size > ccb_target_wordbytes(ccb)) { + if (t->size <= ccb_target_wordbytes(ccb)*2) { + istackints -= 2; + ccb_target_gen_push(ccb_target_r1(ccb)); + ccb_target_gen_push(ccb_target_r0(ccb)); + } else { + // Hopefully it has already been pushed! Just calculate the correct number of stack slots + long s = ccb_target_wordaligned(ccb, t->size); + istackints -= s/ccb_target_wordbytes(ccb); + } + } else { + istackints--; + //ccb_data_type_t* ptype = ccb_list_iterator_next(jt); + //ccb_target_gen_save(ccb, ptype, v->ctype); + ccb_target_gen_push(ccb_target_r0(ccb)); + } + } else { + //ccb_target_gen_pop(ccb_target_callregister(ccb, --backi)); + } + //} + } + + istackints = nstackints; + istackfloats = nstackfloats; + iregints = nregints; + iregfloats = nregfloats; + //fprintf(stderr, "test2\n"); + + ccb_list_iterator_t* jt = ccb_list_iterator(argtypes); + for (ccb_list_iterator_t* it = ccb_list_iterator(ast->function.call.args); !ccb_list_iterator_end(it);) + { + ccb_ast_t* v = ccb_list_iterator_next(it); + ccb_data_type_t* ptype = ccb_list_iterator_next(jt); + if (callconv != 101 && ccb_ast_type_floating(ccb, ptype)) { + ccb_target_gen_expression(ccb, v); + ccb_target_gen_save(ccb, ptype, v->ctype); + ccb_target_gen_push_xmm(0); + } else { + /*if (istackints > 0) { + istackints--; + } else*/ if (nregints > 0) { + ccb_target_gen_expression(ccb, v); + ccb_target_gen_save(ccb, ptype, v->ctype); + ccb_target_gen_push(ccb_target_r0(ccb)); + nregints--; + } + } + } + + /* For pointer calls, calculate target address after pushing args but before moving them to registers: */ + if (ast->type == CCB_AST_TYPE_PTRCALL) { + ccb_target_gen_expression(ccb, ast->function.call.callable); + ccb_target_gen_push(ccb_target_r0(ccb)); + ccb_target_gen_pop(ccb_target_r15(ccb)); + } + + backi = regi; + backx = regx; + + istackints = nstackints; + //fprintf(stderr, "test3\n"); + + for (ccb_list_iterator_t* it = ccb_list_iterator(ccb_list_reverse(argtypes)); !ccb_list_iterator_end(it); ) { + ccb_data_type_t* t = ccb_list_iterator_next(it); + if (ccb_ast_type_floating(ccb, t) && callconv != 101) { + ccb_target_gen_pop_xmm(--backx); + } + else { + if (istackints > 0) { + long s = ccb_target_wordaligned(ccb, t->size); + istackints-= s/ccb_target_wordbytes(ccb); + } else { + ccb_target_gen_pop(ccb_target_callregister(ccb, --backi)); + } + } + } + + if (largeresult) { + int diff = ccb_target_gen_stack - resultstack; + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("TODO"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("TODO"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("addi %s, sp, %d", ccb_target_r0(ccb), diff); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov %s, rsp", ccb_target_r0(ccb)); + ccb_target_gen_emit("add %s, %d", ccb_target_r0(ccb), diff); + } + else { + ccb_target_gen_emit("mov %%rsp, %%%s", ccb_target_r0(ccb)); + ccb_target_gen_emit("add $%d, %%%s", diff, ccb_target_r0(ccb)); + } + ccb_target_gen_push(ccb_target_r0(ccb)); + } + + // TODO : I forget wtf was going on here... + /* + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrc r0, %d", regx); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("xor $r0, $r0, $r0"); + ccb_target_gen_emit("addimm $r0, $r0, %d", regx); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("li a0, %d", regx); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_ARM) { + //ccb_target_gen_emit("addi a0, zero, %d", regx); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov eax, %d", regx); // TODO: Should this be rax?? + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW) { + ccb_target_gen_emit("data8 0x48, 0xB8 ; mov rax, ..."); + ccb_target_gen_emit("data64 %d", regx); + } + else { + ccb_target_gen_emit("mov $%d, %%eax", regx); + }*/ + + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_callconv(ccb) == CCB_TARGET_CALLCONV_WINDOWS) { + // TODO: Fix/test/update this stuff so that __classic_call works on Windows too. + ccb_target_gen_emit("push 0"); + ccb_target_gen_emit("push 0"); + ccb_target_gen_emit("push 0"); + ccb_target_gen_emit("push 0"); + } + + if (ast->type == CCB_AST_TYPE_PTRCALL) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("callr r15"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("be $rlink, $r0, $rF"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("jalr t0"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("call r15"); + } + else { + ccb_target_gen_emit("call *%%r15 # GNU syntax is a fucking disaster"); + } + + } + else { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("callc %s", ast->function.name); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("xor $rF, $rF, $rF"); + ccb_target_gen_emit("addimm $rF, $rF, %s", ast->function.name); + ccb_target_gen_emit("be $rlink, $r0, $rF"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("call %s", ast->function.name); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_ARM) { + ccb_target_gen_emit("bl %s", ast->function.name); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW) { + tmpintctr++; + ccb_target_gen_emit("data8 0xE8 ; call ..."); + ccb_target_gen_emit("data32 (%s - callinstrend%i) ; instruction-relative address", ast->function.name, tmpintctr); + ccb_target_gen_emit("callinstrend%i:", tmpintctr); + } + else if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM) { + //NOTE: Redeclaring externs with each call does NOT work with FASM + if (!ccb_extrastage_ispredeclared(ccb, ast->function.name)) { + ccb_target_gen_emit("extrn %s", ast->function.name); + ccb_extrastage_setpredeclared(ccb, ast->function.name); + } + + ccb_target_gen_emit("call %s", ast->function.name); + } + else if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM) { + if (!ccb_extrastage_ispredeclared(ccb, ast->function.name)) { + ccb_target_gen_emit("extern %s", ast->function.name); + ccb_extrastage_setknownextern(ccb, ast->function.name); + ccb_extrastage_setpredeclared(ccb, ast->function.name); + } + /* Position-independent code now works with NASM */ + if (ccb_extrastage_ispredeclared(ccb, ast->function.name) && !ccb_extrastage_isknownextern(ccb, ast->function.name)) { + ccb_target_gen_emit("call %s", ast->function.name); + } else { + ccb_target_gen_emit("call %s wrt ..plt", ast->function.name); + } + //ccb_target_gen_emit("call [rel %s wrt ..got]", ast->function.name); + } + else { + ccb_target_gen_emit("call %s", ast->function.name); + } + } + + istackints = nstackints; + istackfloats = nstackfloats; + //fprintf(stderr, "test4\n"); + + if (largeresult) { + ccb_target_gen_drop(); + } + + for (ccb_list_iterator_t* it = ccb_list_iterator(ccb_list_reverse(argtypes)); !ccb_list_iterator_end(it); ) { + ccb_data_type_t* t = ccb_list_iterator_next(it); + if (ccb_ast_type_floating(ccb, t) && callconv != 101) { + /*if (regx != 1) + ccb_target_gen_pop_xmm(--regx);*/ + } + else { + if (istackints > 0) { + long s = ccb_target_wordaligned(ccb, t->size); + istackints -= s/ccb_target_wordbytes(ccb); + while (s > 0) { + //ccb_target_gen_pop(ccb_target_r1/*15*/(ccb)); // TODO: Add a "drop" equivalent + // BROKEN? + ccb_target_gen_drop(); + s -= ccb_target_wordbytes(ccb); + } + } else { + /*const char* re = ccb_target_callregister(ccb, --regi); + if (ccb_strcasecmp(re, ccb_target_r0(ccb)) == 0) { + // Avoid pushing/popping the result register! + } + else { + ccb_target_gen_pop(re); + }*/ + } + } + } + + + if (((old_stack + ((nstackints + nstackfloats + largeresult) * 8)) % 16) != 0) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("addrc r4, 8"); + } + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("addi sp, sp, 8"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("add rsp, 8"); + } + else { + ccb_target_gen_emit("add $8, %%rsp"); + } + ccb_target_gen_stack -= 8; + } + + + + istackints = nstackints; + istackfloats = nstackfloats; + //fprintf(stderr, "test5\n"); + + for (ccb_list_iterator_t* it = ccb_list_iterator(ccb_list_reverse(argtypes)); !ccb_list_iterator_end(it); ) { + ccb_data_type_t* t = ccb_list_iterator_next(it); + if (ccb_ast_type_floating(ccb, t) && callconv != 101) { + if (regx != 1) + ccb_target_gen_pop_xmm(--regx); + } + else { + if (istackints > 0) { + long s = ccb_target_wordaligned(ccb, t->size); + istackints -= s/ccb_target_wordbytes(ccb); + //istackints--; + //ccb_target_gen_pop("r15"); // TODO: Add a "drop" equivalent + } else { + const char* re = ccb_target_callregister(ccb, --regi); + if (ccb_strcasecmp(re, ccb_target_r0(ccb)) == 0) { + // Avoid pushing/popping the result register! + } + else { + //XXX TODO: Something like this MIGHT be needed sometimes but otherwise complicates output: + //ccb_target_gen_pop(re); + } + } + } + } + + /* Finally, decode the floating-point result for a __classic_call invocation */ + if (callconv == 101 && ccb_ast_type_floating(ccb, ast->ctype)) { + ccb_compile_warn(ccb, "Generating invocation of special floating point return hack for __classic_call"); + ccb_target_gen_push(ccb_target_r0(ccb)); + ccb_target_gen_pop_xmm(0); + } + + if (ast->ctype->type == CCB_TYPE_FLOAT) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("addrr r0, r1"); // TODO + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("fcvt.d.s fa0, fa0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("cvtps2pd xmm0, xmm0"); + } else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86) { + ccb_target_gen_emit("cvtps2pd %%xmm0, %%xmm0"); + } + else { + ccb_target_gen_emit("todo"); + } + } + + if (ast->type == CCB_AST_TYPE_PTRCALL) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_pop("t0"); + } else { + ccb_target_gen_pop("r15"); + } + } + + break; + + case CCB_AST_TYPE_DECLARATION: + if (ast->decl.init) + ccb_target_gen_declaration_initialization(ccb, ast->decl.init, ast->decl.var->variable.off); + break; + + case CCB_AST_TYPE_ADDRESS: + switch (ast->unary.operand->type) { + case CCB_AST_TYPE_FUNCTION: + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrc r0, %s", ast->unary.operand->function.name); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("lui a0, %%hi(%s)", ast->unary.operand->function.name); + //ccb_target_gen_emit("addi a0, a0, %%lo(%s)", ast->unary.operand->function.name); + ccb_target_gen_emit("la a0, %s", ast->unary.operand->function.name); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("lea rax, [%s]", ast->unary.operand->function.name); + } + else { + ccb_target_gen_emit("lea %s(%%rip), %%rax", ast->unary.operand->function.name); + } + break; + case CCB_AST_TYPE_VAR_LOCAL: + ccb_target_gen_ensure_lva(ccb, ast->unary.operand); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrrpc r0, r5, %d", ast->unary.operand->variable.off); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("add a0, fp, %d", ast->unary.operand->variable.off); + //ccb_target_gen_emit("addi a0, x8, %d", ast->unary.operand->variable.off); + ccb_target_gen_rv_addi(ccb, "a0", "x8", ast->unary.operand->variable.off); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("lea rax, [rbp + %d]", ast->unary.operand->variable.off); + } + else { + ccb_target_gen_emit("lea %d(%%rbp), %%rax", ast->unary.operand->variable.off); + } + break; + + case CCB_AST_TYPE_VAR_GLOBAL: + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrc r0, %s", ast->unary.operand->variable.label); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("lea rax, [%s]", ast->unary.operand->variable.label); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("lui a0, %%hi(%s)", ast->unary.operand->variable.label); + //ccb_target_gen_emit("addi a0, a0, %%lo(%s)", ast->unary.operand->variable.label); + ccb_target_gen_emit("la a0, %s", ast->unary.operand->variable.label); + } + else { + ccb_target_gen_emit("lea %s(%%rip), %%rax", ast->unary.operand->variable.label); + } + break; + + case CCB_AST_TYPE_STRUCT: + ccb_target_gen_load_structure(ccb, ast->structure, ast->ctype, 0, 1); + break; + + case CCB_AST_TYPE_DEREFERENCE: + ccb_target_gen_expression(ccb, ast->unary.operand); + break; + + default: + ccb_compile_error(ccb, "Internal error in gen_expression, trying to get address of type 0x100+%d", ast->unary.operand->type-0x100); + break; + } + break; + + case CCB_AST_TYPE_DEREFERENCE: + ccb_target_gen_expression(ccb, ast->unary.operand); + ccb_target_gen_load_local(ccb, ast->unary.operand->ctype->pointer, ccb_target_r0(ccb), 0); + ccb_target_gen_load(ccb, ast->ctype, ast->unary.operand->ctype->pointer); + break; + + case CCB_AST_TYPE_STATEMENT_IF: + case CCB_AST_TYPE_EXPRESSION_TERNARY: + ccb_target_gen_expression(ccb, ast->ifstmt.cond); + ne = ccb_ast_label(ccb); + ccb_target_gen_je(ccb, ne); + ccb_target_gen_expression(ccb, ast->ifstmt.then); + if (ast->ifstmt.last) { + end = ccb_ast_label(ccb); + ccb_target_gen_jmp(ccb, end); + ccb_target_gen_label(ccb, ne); + //ccb_target_gen_emit("# TEST LINE C 1"); + ccb_target_gen_expression(ccb, ast->ifstmt.last); + //ccb_target_gen_emit("# TEST LINE C 2"); + ccb_target_gen_label(ccb, end); + } + else { + ccb_target_gen_label(ccb, ne); + } + break; + + case CCB_AST_TYPE_STATEMENT_FOR: { + if (ast->forstmt.init) + ccb_target_gen_expression(ccb, ast->forstmt.init); + begin = ccb_ast_label(ccb); + step = ccb_ast_label(ccb); + end = ccb_ast_label(ccb); + ccb_target_gen_jump_save(ccb, end, step); + ccb_target_gen_label(ccb, begin); + if (ast->forstmt.cond) { + ccb_target_gen_expression(ccb, ast->forstmt.cond); + ccb_target_gen_je(ccb, end); + } + ccb_target_gen_expression(ccb, ast->forstmt.body); + ccb_target_gen_label(ccb, step); + if (ast->forstmt.step) + ccb_target_gen_expression(ccb, ast->forstmt.step); + ccb_target_gen_jmp(ccb, begin); + ccb_target_gen_label(ccb, end); + ccb_target_gen_jump_restore(ccb); + } break; + + case CCB_AST_TYPE_STATEMENT_WHILE: { + begin = ccb_ast_label(ccb); + end = ccb_ast_label(ccb); + ccb_target_gen_jump_save(ccb, end, begin); + ccb_target_gen_label(ccb, begin); + ccb_target_gen_expression(ccb, ast->forstmt.cond); + ccb_target_gen_je(ccb, end); + ccb_target_gen_expression(ccb, ast->forstmt.body); + ccb_target_gen_jmp(ccb, begin); + ccb_target_gen_label(ccb, end); + ccb_target_gen_jump_restore(ccb); + } break; + + case CCB_AST_TYPE_STATEMENT_DO: { + begin = ccb_ast_label(ccb); + end = ccb_ast_label(ccb); + //fprintf(stderr, "generated begin '%s' end '%s'\n", begin, end); + ccb_target_gen_jump_save(ccb, end, begin); + ccb_target_gen_label(ccb, begin); + ccb_target_gen_expression(ccb, ast->forstmt.body); + ccb_target_gen_expression(ccb, ast->forstmt.cond); + //ccb_target_gen_emit("#TODO TEST LINE 1"); + ccb_target_gen_je(ccb, end); + ccb_target_gen_jmp(ccb, begin); + ccb_target_gen_label(ccb, end); + ccb_target_gen_jump_restore(ccb); + //ccb_target_gen_emit("#TODO TEST LINE 2"); + } break; + + case CCB_AST_TYPE_STATEMENT_BREAK: + if (!ccb_target_gen_label_break) + ccb_compile_error(ccb, "ICE"); + ccb_target_gen_jmp(ccb, ccb_target_gen_label_break); + break; + + case CCB_AST_TYPE_STATEMENT_CONTINUE: + if (!ccb_target_gen_label_continue) + ccb_compile_error(ccb, "ICE"); + ccb_target_gen_jmp(ccb, ccb_target_gen_label_continue); + break; + + case CCB_AST_TYPE_STATEMENT_SWITCH: { + char* ccb_target_gen_label_switch_store = ccb_target_gen_label_switch; + char* ccb_target_gen_label_break_store = ccb_target_gen_label_break; + ccb_target_gen_expression(ccb, ast->switchstmt.expr); + ccb_target_gen_label_switch = ccb_ast_label(ccb); + ccb_target_gen_label_break = ccb_ast_label(ccb); + //fprintf(stderr, "generated switch '%s' break '%s'\n", ccb_target_gen_label_switch, ccb_target_gen_label_break); + ccb_target_gen_jmp(ccb, ccb_target_gen_label_switch); + ccb_target_gen_expression(ccb, ast->switchstmt.body); + ccb_target_gen_label(ccb, ccb_target_gen_label_switch); + //ccb_target_gen_emit("#TODO TEST LINE B 1"); + ccb_target_gen_label(ccb, ccb_target_gen_label_break); + //ccb_target_gen_emit("# TODO TEST LINE B 2"); + ccb_target_gen_label_switch = ccb_target_gen_label_switch_store; + ccb_target_gen_label_break = ccb_target_gen_label_break_store; + } break; + + case CCB_AST_TYPE_STATEMENT_CASE: + if (!ccb_target_gen_label_switch) + ccb_compile_error(ccb, "ICE"); + skip = ccb_ast_label(ccb); + ccb_target_gen_jmp(ccb, skip); + ccb_target_gen_label(ccb, ccb_target_gen_label_switch); + + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("comprr r0, %d", ast->casevalue); // TODO: Is this the right ordering?? + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("addi a1, zero, %d", ast->casevalue); + ccb_target_gen_emit("li a1, %d", ast->casevalue); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("cmp eax, %d", ast->casevalue); + } + else { + ccb_target_gen_emit("cmp $%d, %%eax", ast->casevalue); + } + ccb_target_gen_label_switch = ccb_ast_label(ccb); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("jumpcifne %s", ccb_target_gen_label_switch); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("bne a0, a1, %s", ccb_target_gen_label_switch); + } + else { + ccb_target_gen_emit("jne %s", ccb_target_gen_label_switch); + } + ccb_target_gen_label(ccb, skip); + break; + + case CCB_AST_TYPE_STATEMENT_DEFAULT: + if (!ccb_target_gen_label_switch) + ccb_compile_error(ccb, "ICE"); + ccb_target_gen_label(ccb, ccb_target_gen_label_switch); + ccb_target_gen_label_switch = ccb_ast_label(ccb); + break; + + case CCB_AST_TYPE_STATEMENT_GOTO: + ccb_target_gen_jmp(ccb, ast->gotostmt.where); + break; + + case CCB_AST_TYPE_STATEMENT_LABEL: + if (ast->gotostmt.where) + ccb_target_gen_label(ccb, ast->gotostmt.where); + break; + /* + case CCB_AST_TYPE_STATEMENT_GOTO: + fprintf(stderr, "Generating goto '%s'\n", ast->gotostmt.label); + ccb_target_gen_jmp(ccb, ast->gotostmt.label); + break; + + case CCB_AST_TYPE_STATEMENT_LABEL: + fprintf(stderr, "Generating label '%s'\n", ast->gotostmt.label); + ccb_target_gen_label(ccb, ast->gotostmt.label); + break; + */ + case CCB_AST_TYPE_STATEMENT_RETURN: + if (ast->returnstmt) { + ccb_target_gen_expression(ccb, ast->returnstmt); + ccb_target_gen_save(ccb, ast->ctype, ast->returnstmt->ctype); + /* For __classic_call, we emulate "soft float" style conventions - always using integer registers for the result. */ + if (ast->return_callconv == 101 && ccb_ast_type_floating(ccb, ast->ctype)) { + ccb_compile_warn(ccb, "Generating experimental floating point return for __classic_call"); + ccb_target_gen_push_xmm(0); + ccb_target_gen_pop(ccb_target_r0(ccb)); + } else if (ast->return_callconv == 101 && ast->ctype->size > ccb_target_wordbytes(ccb)*2) { + ccb_compile_warn(ccb, "Generating experimental large value return for __classic_call"); + // TODO: This could all be cleaned up a lot (I somehow forgot indirection at first hence the longer code..), but it seems to WORK! so for now I'm keeping it + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("TODO"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("TODO"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("addi %s, x8, %d", ccb_target_r0(ccb), ccb_target_type_size_pointer(ccb)*2); + ccb_target_gen_emit("ld %s, 0(%s)", ccb_target_r1(ccb), ccb_target_r0(ccb)); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov %s, rbp", ccb_target_r0(ccb)); + ccb_target_gen_emit("add %s, %d", ccb_target_r0(ccb), ccb_target_type_size_pointer(ccb)*2); + ccb_target_gen_emit("movq %s, [%s]", ccb_target_r1(ccb), ccb_target_r0(ccb)); + } + else { + ccb_target_gen_emit("mov %%rbp, %%%s", ccb_target_r0(ccb)); + ccb_target_gen_emit("add $%d, %%%s", ccb_target_type_size_pointer(ccb)*2, ccb_target_r0(ccb)); + ccb_target_gen_emit("movq 0(%%%s), %%%s", ccb_target_r0(ccb), ccb_target_r1(ccb)); + } + + ccb_target_gen_save_dolongstore(ccb, ast->ctype, ccb_target_r1(ccb), 0, 0); + } + } else { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("TODO"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_pop("TODO"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("li a0, 0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_ARM) { + ccb_target_gen_emit("TODO"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW) { + ccb_target_gen_emit("TODO"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov rax, 0"); + } + else { + ccb_target_gen_emit("mov $0, %%rax"); + } + } + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrr r4, r5"); + ccb_target_gen_emit("popr r5"); + ccb_target_gen_emit("return"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_pop("$rlink"); + ccb_target_gen_emit("bto $rlink, $rlink, $rlink"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("addi sp, x8, 0"); + ccb_target_gen_pop(ccb_target_bp(ccb)); + ccb_target_gen_pop("ra"); + ccb_target_gen_emit("ret"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_ARM) { + ccb_target_gen_emit("ldp x29, x30, [sp], #32"); + //ccb_target_gen_emit("add sp, sp, 16"); + ccb_target_gen_emit("ret"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW) { + ccb_target_gen_emit("data8 0xC9 ; leave"); + ccb_target_gen_emit("data8 0xC3 ; ret"); + } + else { + ccb_target_gen_emit("leave"); + ccb_target_gen_emit("ret"); + } + break; + + case CCB_AST_TYPE_STATEMENT_COMPOUND: + for (ccb_list_iterator_t* it = ccb_list_iterator(ast->compound); !ccb_list_iterator_end(it); ) { + ccb_target_gen_statement(ccb, ccb_list_iterator_next(it)); + } + break; + + case CCB_AST_TYPE_STATEMENT_ASM: + for (ccb_list_iterator_t* it = ccb_list_iterator(ast->asmstmt.code); !ccb_list_iterator_end(it); ) { + ccb_target_gen_emit("%s", ccb_list_iterator_next(it)); + } + break; + + case CCB_AST_TYPE_STRUCT: + ccb_target_gen_load_structure(ccb, ast->structure, ast->ctype, 0, 0); + break; + + case '!': + ccb_target_gen_expression(ccb, ast->unary.operand); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("comprc r0, 0"); + ccb_target_gen_emit("setrflageq r0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("not a0, a0"); + //ccb_target_gen_emit("andi a0, a0, 1"); + ccb_target_gen_emit("seqz a0, a0"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("cmp rax, 0"); + ccb_target_gen_emit("sete al"); + ccb_target_gen_emit("movzx eax, al"); + } + else { + ccb_target_gen_emit("cmp $0, %%rax"); + ccb_target_gen_emit("sete %%al"); + ccb_target_gen_emit("movzx %%al, %%eax"); + //ccb_target_gen_emit("movzb %%al, %%eax"); + } + break; + + case CCB_AST_TYPE_AND: + end = ccb_ast_label(ccb); + ccb_target_gen_expression(ccb, ast->left); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("comprc r0, 0"); + ccb_target_gen_emit("setrc r0, 0"); + ccb_target_gen_emit("jumpcifeq %s", end); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("mv a1, a0"); + ccb_target_gen_emit("addi a2, zero, 0"); + ccb_target_gen_emit("addi a0, zero, 0"); + ccb_target_gen_emit("beq a1, a2, %s", end); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("test rax, rax"); + ccb_target_gen_emit("mov rax, 0"); + ccb_target_gen_emit("je %s", end); + } + else { + ccb_target_gen_emit("test %%rax, %%rax"); + ccb_target_gen_emit("mov $0, %%rax"); + ccb_target_gen_emit("je %s", end); + } + + ccb_target_gen_expression(ccb, ast->right); + + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("comprc r0, 0"); + ccb_target_gen_emit("setrc r0, 0"); + ccb_target_gen_emit("jumpcifeq %s", end); + ccb_target_gen_emit("setrc r0, 1"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("mv a1, a0"); + ccb_target_gen_emit("addi a2, zero, 0"); + ccb_target_gen_emit("addi a0, zero, 0"); + ccb_target_gen_emit("beq a1, a2, %s", end); + ccb_target_gen_emit("addi a0, zero, 1"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("test rax, rax"); + ccb_target_gen_emit("mov rax, 0"); + ccb_target_gen_emit("je %s", end); + ccb_target_gen_emit("mov rax, 1"); + } + else { + ccb_target_gen_emit("test %%rax, %%rax"); + ccb_target_gen_emit("mov $0, %%rax"); + ccb_target_gen_emit("je %s", end); + ccb_target_gen_emit("mov $1, %%rax"); + } + ccb_target_gen_label(ccb, end); + break; + + case CCB_AST_TYPE_OR: + end = ccb_ast_label(ccb); + ccb_target_gen_expression(ccb, ast->left); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("comprc r0, 0"); + ccb_target_gen_emit("setrc r0, 1"); + ccb_target_gen_emit("jumpcifne %s", end); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("mv a1, a0"); + ccb_target_gen_emit("addi a2, zero, 0"); + ccb_target_gen_emit("addi a0, zero, 1"); + ccb_target_gen_emit("bne a1, a2, %s", end); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("test rax, rax"); + ccb_target_gen_emit("mov rax, 1"); + ccb_target_gen_emit("jne %s", end); + } + else { + ccb_target_gen_emit("test %%rax, %%rax"); + ccb_target_gen_emit("mov $1, %%rax"); + ccb_target_gen_emit("jne %s", end); + } + + ccb_target_gen_expression(ccb, ast->right); + + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("comprc r0, 0"); + ccb_target_gen_emit("setrc r0, 1"); + ccb_target_gen_emit("jumpcifne %s", end); + ccb_target_gen_emit("setrc r0, 0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("mv a1, a0"); + ccb_target_gen_emit("addi a2, zero, 0"); + ccb_target_gen_emit("addi a0, zero, 1"); + ccb_target_gen_emit("bne a1, a2, %s", end); + ccb_target_gen_emit("addi a0, zero, 0"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("test rax, rax"); + ccb_target_gen_emit("mov rax, 1"); + ccb_target_gen_emit("jne %s", end); + ccb_target_gen_emit("mov rax, 0"); // Fix, I think I mistranslated at first.. + } + else { + ccb_target_gen_emit("test %%rax, %%rax"); + ccb_target_gen_emit("mov $1, %%rax"); + ccb_target_gen_emit("jne %s", end); + ccb_target_gen_emit("mov $0, %%rax"); + } + ccb_target_gen_label(ccb, end); + break; + + case '&': + case '|': + ccb_target_gen_expression(ccb, ast->left); + ccb_target_gen_push(ccb_target_r0(ccb)); + ccb_target_gen_expression(ccb, ast->right); + ccb_target_gen_pop(ccb_target_r1(ccb)); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("%s r0, r1", (ast->type == '|') ? "orrr" : "andrr"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("%s a0, a0, a1", (ast->type == '|') ? "or" : "and"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("%s rax, rcx", (ast->type == '|') ? "or" : "and"); + } + else { + ccb_target_gen_emit("%s %%rcx, %%rax", (ast->type == '|') ? "or" : "and"); + } + break; + + case '~': + ccb_target_gen_expression(ccb, ast->left); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("notr r0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("not a0, a0"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + const char* r = ccb_target_gen_register_integer(ccb, ast->ctype, 'a'); + ccb_target_gen_emit("not %s", r); // TODO: The actual issue here may be with sign extension (at least one test suite failure...) + //ccb_target_gen_emit("xor rax, -1"); + } + else { + const char* r = ccb_target_gen_register_integer(ccb, ast->ctype, 'a'); + ccb_target_gen_emit("not %%%s", r); // TODO: The actual issue here may be with sign extension (at least one test suite failure...) + //ccb_target_gen_emit("xor %%rax, -1"); + } + break; + + case CCB_AST_TYPE_POST_INCREMENT: ccb_target_gen_emit_postfix(ccb, ast, "add"); break; + case CCB_AST_TYPE_POST_DECREMENT: ccb_target_gen_emit_postfix(ccb, ast, "sub"); break; + case CCB_AST_TYPE_PRE_INCREMENT: ccb_target_gen_emit_prefix(ccb, ast, "add"); break; + case CCB_AST_TYPE_PRE_DECREMENT: ccb_target_gen_emit_prefix(ccb, ast, "sub"); break; + + case CCB_AST_TYPE_EXPRESSION_CAST: + ccb_target_gen_expression(ccb, ast->unary.operand); + if (ast->ctype->sign && ast->unary.operand->ctype->type == CCB_AST_DATA_INT && ast->unary.operand->ctype->sign && ast->unary.operand->ctype->size == 4) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("addw a0, a0, zero"); // Sign extend by using a 32-bit addition + } + } + ccb_target_gen_load(ccb, ast->ctype, ast->unary.operand->ctype); + break; + + case '=': + ccb_target_gen_expression(ccb, ast->right); + ccb_target_gen_load(ccb, ast->ctype, ast->right->ctype); + ccb_target_gen_assignment(ccb, ast->left); + /* An assigment's result is expected to be the assigned value, so for structs we need to reconstruct a copy of the value */ + if (ast->right->ctype->size > ccb_target_wordbytes(ccb)) { + ccb_compile_warn(ccb, "Pushing/reconstructing result... of size %d", ast->right->ctype->size); + //TODO: Make this "dup" the value or something instead... + ccb_target_gen_expression(ccb, ast->left); + } + break; + + case CCB_AST_TYPE_EXPRESSION_COMMA: + ccb_target_gen_expression(ccb, ast->left); + /* TODO: Drop result of first expression from stack for values larger than a word? */ + ccb_target_gen_expression(ccb, ast->right); + break; + + default: + ccb_target_gen_binary(ccb, ast); + } +} + +static void ccb_target_gen_data_initialization_intermediate(ccb_t* ccb, ccb_table_t* labels, char* data, ccb_table_t* literal, ccb_list_t* init, int offset); +static void ccb_target_gen_data_initialization_intermediate(ccb_t* ccb, ccb_table_t* labels, char* data, ccb_table_t* literal, ccb_list_t* init, int offset) { + for (ccb_list_iterator_t* it = ccb_list_iterator(init); !ccb_list_iterator_end(it); ) { + ccb_ast_t* node = ccb_list_iterator_next(it); + + //printf("Iterating over node at 0x%lx (init.value at 0x%lx)\n", node, node->init.value); + //printf("Unary operand at 0x%lx \n", node->init.value->unary.operand); + // TODO: Doesn't properly optimise away the "&&" here so we have to triple-if it: + if ((node->init.value->type == CCB_AST_TYPE_ADDRESS)) + if( (node->init.value->unary.operand->type == CCB_AST_TYPE_VAR_LOCAL)) + if ( (node->init.value->unary.operand->variable.init)) { + + + //printf("address\n"); + char* label = ccb_ast_label(ccb); + ccb_string_t* string = ccb_string_create(); + + //ccb_string_catf(string, "%d", node->init.offset + offset); + ccb_string_catint(string, node->init.offset + offset); + + ccb_table_insert(literal, label, node->init.value->unary.operand); + ccb_table_insert(labels, ccb_string_buffer(string), label); + continue; + } + + //printf("...\n"); + + if (node->init.value->type == CCB_AST_TYPE_VAR_LOCAL && node->init.value->variable.init) { + ccb_target_gen_data_initialization_intermediate(ccb, labels, data, literal, node->init.value->variable.init, node->init.offset + offset); + continue; + } + + //printf("...\n"); + //printf("Using %d\n", node->init.type->type); + + switch (node->init.type->type) { + case CCB_TYPE_FLOAT: + *(float*)(data + node->init.offset + offset) = node->init.value->floating.value; + break; + + case CCB_TYPE_DOUBLE: + *(double*)(data + node->init.offset + offset) = node->init.value->floating.value; + break; + + case CCB_TYPE_CHAR: + *(char*)(data + node->init.offset + offset) = ccb_parse_evaluate(ccb, node->init.value); + break; + + case CCB_TYPE_SHORT: + *(short*)(data + node->init.offset + offset) = ccb_parse_evaluate(ccb, node->init.value); + break; + + case CCB_TYPE_INT: + *(int*)(data + node->init.offset + offset) = ccb_parse_evaluate(ccb, node->init.value); + break; + + case CCB_TYPE_LONG: + *(long*)(data + node->init.offset + offset) = ccb_parse_evaluate(ccb, node->init.value); + break; + + case CCB_TYPE_LLONG: + *(long long*)(data + node->init.offset + offset) = ccb_parse_evaluate(ccb, node->init.value); + break; + + case CCB_TYPE_POINTER: + *(long*)(data + node->init.offset + offset) = ccb_parse_evaluate(ccb, node->init.value); + + default: + break; + } + } +} + +static void ccb_target_gen_data_initialization(ccb_t* ccb, ccb_table_t* table, ccb_list_t* list, int size) { + char* data = ccb_memory_allocate(size); + memset(data, 0, size); + + ccb_table_t* labels = ccb_table_create(NULL); + ccb_target_gen_data_initialization_intermediate(ccb, labels, data, table, list, 0); + + int i = 0; + for (; i <= size - 4; i += 4) { + ccb_string_t* string = ccb_string_create(); + //ccb_string_catf(string, "%d", i); + ccb_string_catint(string, i); + char* label = ccb_table_find(labels, ccb_string_buffer(string)); + if (label) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1 || ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC + || (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW)) { + ccb_target_gen_emit("data64 %s", label); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("dq %s", label); + } + else { + ccb_target_gen_emit(".quad %s", label); + } + i += 4; + } + else { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1 || ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC + || (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW)) { + ccb_target_gen_emit("data32 %d", data[i]); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("dd %d", data[i]); + } + else { + //ccb_target_gen_emit(".long %d", data[i]); + ccb_target_gen_emit(".byte %d", data[i]); + ccb_target_gen_emit(".byte %d", data[i+1]); + ccb_target_gen_emit(".byte %d", data[i+2]); + ccb_target_gen_emit(".byte %d", data[i+3]); + } + } + } + for (; i < size; i++) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1 || ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC + || (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW)) { + ccb_target_gen_emit("data8 %d", data[i]); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("db %d", data[i]); + } + else { + ccb_target_gen_emit(".byte %d", data[i]); + } + } + + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1 || ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC + || (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW)) { + ccb_target_gen_emit("align 8"); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("align 8"); + } + else { + ccb_target_gen_emit(".align 8"); + } +} + +/* + * Recursive compliteral generation, emits data initialization + * until there is nothing to initialize left. + */ +static void ccb_target_gen_data_literal(ccb_t* ccb, char* label, ccb_ast_t* ast) { + ccb_table_t* table = ccb_table_create(NULL); + ccb_target_gen_emit_inline("%s:", label); + ccb_target_gen_data_initialization(ccb, table, ast->variable.init, ast->ctype->size); + + for (ccb_list_iterator_t* it = ccb_list_iterator(ccb_table_keys(table)); !ccb_list_iterator_end(it); ) { + char* label = ccb_list_iterator_next(it); + ccb_ast_t* node = ccb_table_find(table, label); + + ccb_target_gen_data_literal(ccb, label, node); + } +} + +static void ccb_target_gen_localorglobal(ccb_t* ccb, ccb_ast_t* ast) { + if (!ast->decl.var->ctype->isstatic) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1 || ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit_inline("symbol %s, public", ast->decl.var->variable.label); + } + else if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM) { + if (ccb_target_binfmt(ccb) == CCB_TARGET_BINFMT_FLAT) { + ccb_target_gen_emit_inline("; [public %s]", ast->decl.var->variable.label); + } + else { + ccb_target_gen_emit_inline("public %s", ast->decl.var->variable.label); + } + } + else if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM) { + if (ccb_target_binfmt(ccb) == CCB_TARGET_BINFMT_FLAT) { + ccb_target_gen_emit_inline("; [public %s]", ast->decl.var->variable.label); + } + else { + ccb_target_gen_emit_inline("global %s", ast->decl.var->variable.label); + } + } + else { + ccb_target_gen_emit_inline(".global %s", ast->decl.var->variable.label); + } + } +} + +static void ccb_target_gen_data(ccb_t* ccb, ccb_ast_t* ast) { + ccb_table_t* table = ccb_table_create(NULL); + + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1 || ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit_inline("section data"); + } + else if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW) { + if (ccb_target_binfmt(ccb) == CCB_TARGET_BINFMT_FLAT) { + // ignored for flat binaries for now + } + else { + ccb_target_gen_emit_inline("section '.data' writable"); + } + } + else if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW) { + if (ccb_target_binfmt(ccb) == CCB_TARGET_BINFMT_FLAT) { + // ignored for flat binaries for now + } + else { + ccb_target_gen_emit_inline("section .data"); + } + } + else { + ccb_target_gen_emit_inline(".data"); + } + + ccb_target_gen_localorglobal(ccb, ast); + + ccb_target_gen_emit_inline("%s:", ast->decl.var->variable.label); + ccb_target_gen_data_initialization(ccb, table, ast->decl.init, ast->decl.var->ctype->size); + + for (ccb_list_iterator_t* it = ccb_list_iterator(ccb_table_keys(table)); !ccb_list_iterator_end(it); ) { + char* label = ccb_list_iterator_next(it); + ccb_ast_t* node = ccb_table_find(table, label); + + ccb_target_gen_data_literal(ccb, label, node); + } +} + +static void ccb_target_gen_bss(ccb_t* ccb, ccb_ast_t* ast) { + ccb_target_gen_localorglobal(ccb, ast); + + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1 || ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit_inline("section zerodata"); + ccb_target_gen_emit("align %d", ast->decl.var->ctype->size < 8 ? ast->decl.var->ctype->size : 8); + ccb_target_gen_emit_inline("%s:", ast->decl.var->variable.label); + ccb_target_gen_emit("reserve %d", ast->decl.var->ctype->size); + } + else if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM) { + if (ccb_target_binfmt(ccb) == CCB_TARGET_BINFMT_FLAT) { + ccb_target_gen_emit_inline("; [bss section]"); + } + else { + ccb_target_gen_emit_inline("section '.bss' writeable"); + } + ccb_target_gen_emit("align %d", ast->decl.var->ctype->size < 8 ? ast->decl.var->ctype->size : 8); + + /* I thought some errors in the FASM output were due to reserving rather than setting bytes explicitly, + * turns out that wasn't the case (I was missing the "section" directive above!). However, the + * explicitly-setting code is preserved here in case it's useful in the future (e.g. for targetting + * assemblers which can't reserve bytes like FASM). + + switch (ast->decl.var->ctype->size) { + case 1: + ccb_target_gen_emit("%s: db 0", ast->decl.var->variable.name); + break; + + case 2: + ccb_target_gen_emit("%s: dw 0", ast->decl.var->variable.name); + break; + + case 4: + ccb_target_gen_emit("%s: dd 0", ast->decl.var->variable.name); + break; + + case 8: + ccb_target_gen_emit("%s: dq 0", ast->decl.var->variable.name); + break; + + default: + ccb_target_gen_emit("%s:", ast->decl.var->variable.name); + int i; + for (i = 0; i < ast->decl.var->ctype->size; i++) { + ccb_target_gen_emit(" db 0"); + } + } + */ + // Simply reserve the required number of bytes + ccb_target_gen_emit("%s: rb %d", ast->decl.var->variable.label, ast->decl.var->ctype->size); + } + else if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM) { + if (ccb_target_binfmt(ccb) == CCB_TARGET_BINFMT_FLAT) { + ccb_target_gen_emit_inline("; [bss section]"); + } + else { + ccb_target_gen_emit_inline("section .bss"); + } + long alignment = ast->decl.var->ctype->size < 8 ? ast->decl.var->ctype->size : 8; + while ((ccb->bsscount % alignment) != 0) { + ccb_target_gen_emit("resb 1 ; Manual padding for alignment"); // TODO: See if this actually makes a difference + ccb->bsscount++; + } + //ccb_target_gen_emit("align %d", ast->decl.var->ctype->size < 8 ? ast->decl.var->ctype->size : 8); + /* + switch (ast->decl.var->ctype->size) { + case 1: + ccb_target_gen_emit("%s: db 0", ast->decl.var->variable.name); + break; + + case 2: + ccb_target_gen_emit("%s: dw 0", ast->decl.var->variable.name); + break; + + case 4: + ccb_target_gen_emit("%s: dd 0", ast->decl.var->variable.name); + break; + + case 8: + ccb_target_gen_emit("%s: dq 0", ast->decl.var->variable.name); + break; + + default: + ccb_target_gen_emit("%s:", ast->decl.var->variable.name); + int i; + for (i = 0; i < ast->decl.var->ctype->size; i++) { + ccb_target_gen_emit(" db 0"); + } + } + */ + // TODO: Compiler seems to segfault instead of reporting error on a stray "*/" + // Simply reserve the required number of bytes + ccb_target_gen_emit("%s resb %d", ast->decl.var->variable.label, ast->decl.var->ctype->size); + ccb->bsscount += ast->decl.var->ctype->size; + } + else { + ccb_target_gen_emit(".lcomm %s, %d", ast->decl.var->variable.label, ast->decl.var->ctype->size); + } +} + +static void ccb_target_gen_global(ccb_t* ccb, ccb_ast_t* var) { + if (var->decl.init) { + ccb_target_gen_data(ccb, var); + } + else { + ccb_target_gen_bss(ccb, var); + } +} + +void ccb_target_gen_data_section(ccb_t* ccb) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1 || ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC + || (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW)) { + ccb_target_gen_emit_inline("section data"); + } + else if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM) { + if (ccb_target_binfmt(ccb) == CCB_TARGET_BINFMT_FLAT) { + ccb_target_gen_emit_inline("; [data section]"); + } + else { + ccb_target_gen_emit_inline("format elf64"); + /*ccb_target_gen_emit("extrn printf ; XXX TODO Total hack"); + ccb_target_gen_emit("extrn exit ; XXX TODO Total hack"); + ccb_target_gen_emit("extrn strcmp ; XXX TODO Total hack"); + ccb_target_gen_emit("extrn strlen ; XXX TODO Total hack"); + ccb_target_gen_emit("extrn strcat ; XXX TODO Total hack"); + ccb_target_gen_emit("extrn calloc ; XXX TODO Total hack");*/ + ccb_target_gen_emit_inline("section '.data' writeable"); + } + } + else if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM) { + if (ccb_target_binfmt(ccb) == CCB_TARGET_BINFMT_FLAT) { + ccb_target_gen_emit_inline("; [data section]"); + } + else { + /*ccb_target_gen_emit_inline("format elf64"); + ccb_target_gen_emit("extrn printf ; XXX TODO Total hack"); + ccb_target_gen_emit("extrn exit ; XXX TODO Total hack"); + ccb_target_gen_emit("extrn strcmp ; XXX TODO Total hack"); + ccb_target_gen_emit("extrn strlen ; XXX TODO Total hack"); + ccb_target_gen_emit("extrn strcat ; XXX TODO Total hack"); + ccb_target_gen_emit("extrn calloc ; XXX TODO Total hack");*/ + /* Apparently it's necessary to address things using [rel ...] to get position-independent code on NASM. + * Adding "default rel" at the start automates this, and allows the NASM backend to pass more tests. + * However, I'm not sure if this is the correct solution, and doesn't fix all cases, so this remains a TODO. + * The compiler build seems more-broken with it disabled. + */ + ccb_target_gen_emit_inline("default rel"); + ccb_target_gen_emit_inline("section .data"); + } + } + else { + ccb_target_gen_emit_inline(".data"); + } + + for (ccb_list_iterator_t* it = ccb_list_iterator(ccb_ast_strings); !ccb_list_iterator_end(it); ) { + ccb_ast_t* ast = ccb_list_iterator_next(it); + ccb_target_gen_emit_inline("%s: ", ast->string.label); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1 || ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC + || (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW)) { + ccb_target_gen_emit("data8 \"%s\", 0", ccb_string_quote_fasm(ast->string.data, '\"')); + } + else if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM) { + ccb_target_gen_emit("db '%s', 0", ccb_string_quote_fasm(ast->string.data, '\'')); + } + else if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM) { + ccb_target_gen_emit("db \"%s\", 0", ccb_string_quote_fasm(ast->string.data, '\"')); + } + else { + ccb_target_gen_emit(".string \"%s\"", ccb_string_quote(ast->string.data)); + } + } + + for (ccb_list_iterator_t* it = ccb_list_iterator(ccb_ast_floats); !ccb_list_iterator_end(it); ) { + ccb_ast_t* ast = ccb_list_iterator_next(it); + char* label = ccb_ast_label(ccb); + + ast->floating.label = label; + ccb_target_gen_emit_inline("%s:", label); + double tmp = ast->floating.value; + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1 || ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC + || ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW) { + ccb_target_gen_emit("data32 %d", ((int*)&tmp)[0]); + ccb_target_gen_emit("data32 %d", ((int*)&tmp)[1]); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("dd %d", ((int*)&tmp)[0]); + ccb_target_gen_emit("dd %d", ((int*)&tmp)[1]); + } + else { + ccb_target_gen_emit(".long %d", ((int*)&tmp)[0]); + ccb_target_gen_emit(".long %d", ((int*)&tmp)[1]); + } + } +} + +static int ccb_target_gen_alignment(ccb_t* ccb, int n, int align) { + int remainder = n % align; + return (remainder == 0) + ? n + : n - remainder + align; +} + +static void ccb_target_gen_function_prologue(ccb_t* ccb, ccb_ast_t* ast) { + int callconv = 0; + if (ccb_list_length(ast->function.params) > ccb_target_callregisters(ccb)) { + // TODO: Track callconv if (ast->function.callconv != 0) { + // fprintf(stderr, "NOTE: This function has calling convention %d\n", ast->function.callconv); + //} + + //ccb_compile_error(ccb, "Too many params for function"); + ccb_compile_warn(ccb, "This function will use stack arguments, which are only partly tested"); + } + if (ast->ctype->callconv != 0) { + ccb_compile_warn(ccb, "Generating function with non-standard calling convention %d", ast->ctype->callconv); + callconv = ast->ctype->callconv; + } + + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1 || ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC + || (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW)) { + ccb_target_gen_emit_inline("section code"); + ccb_target_gen_emit_inline("symbol %s, public", ast->function.name); + } + else if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM) { + if (ccb_target_binfmt(ccb) == CCB_TARGET_BINFMT_FLAT) { + ccb_target_gen_emit_inline("; [text section]"); + ccb_target_gen_emit_inline("; [public %s]", ast->function.name); + } + else { + ccb_target_gen_emit_inline("section '.text' executable"); + // This is now handled by the predeclaration/extern-checking mechanism: + //ccb_target_gen_emit_inline("public %s", ast->function.name); + } + } + else if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM) { + if (ccb_target_binfmt(ccb) == CCB_TARGET_BINFMT_FLAT) { + ccb_target_gen_emit_inline("; [text section]"); + ccb_target_gen_emit_inline("; [public %s]", ast->function.name); + } + else { + ccb_target_gen_emit_inline("section .text"); + // This is now handled by the predeclaration/extern-checking mechanism: + //ccb_target_gen_emit_inline("global %s", ast->function.name); + } + } + else { + ccb_target_gen_emit_inline(".text"); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit_inline(".option norvc"); + } + ccb_target_gen_emit_inline(".global %s", ast->function.name); + } + + ccb_target_gen_emit_inline("%s:", ast->function.name); + if (ast->ctype->isnaked) { + ccb_compile_warn(ccb, "Producing naked function with no prologue code!"); + return; // Don't generate any more cruft if it's a naked function. + } + ccb_target_gen_stack += (ccb_target_wordsize(ccb)/8); // Remember to account for return instruction pointer as well as rbp + + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_push("$rlink"); // On this architecture we need to store the return address explicitly + } else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_push("ra"); // On this architecture we need to store the return address explicitly + } + + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW) { + ccb_target_gen_emit("data8 0x55 ; push rbp"); + } else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_ARM) { + // We use an optimised approach for ARM (this should push both) + ccb_target_gen_emit("stp x29, x30, [sp, #-32]"); + //ccb_target_gen_emit("sub sp, sp, 32"); + ccb_target_gen_stack -= (ccb_target_wordsize(ccb)/8); + } else { + ccb_target_gen_push(ccb_target_bp(ccb)); + } + + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_ARM) { + ccb_target_gen_emit("mov x29, sp"); + } else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("addi fp, sp, 0"); + ccb_target_gen_emit("addi x8, sp, 0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("addimm $rbase, $rstack, 0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrr r5, r4"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov rbp, rsp"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW) { + ccb_target_gen_emit("data8 0x48, 0x89, 0xEC ; mov rbp, rsp"); + } + else { + ccb_target_gen_emit("mov %%rsp, %%rbp"); + } + + int offset = 0; + int spareoffset = ccb_target_type_size_pointer(ccb)*2; + int regi = 0; + int regx = 0; + + /* At least for "__classic_call", if the return type won't naturally fit in the return registers then + * a pointer to an area reserved for the result will have been inserted beneath the first regular argument. + * So we just need to increment the spare arguments offset by one word in that case! + * TODO: Store the offset of that argument somewhere to make sure other code doesn't get it wrong? + */ + if (callconv == 101 && ast->ctype->returntype->size > ccb_target_wordbytes(ccb)*2) { + ccb_compile_warn(ccb, "Producing function with experimental large return value support!"); + spareoffset += ccb_target_wordbytes(ccb); + } + + for (ccb_list_iterator_t* it = ccb_list_iterator(ast->function.params); !ccb_list_iterator_end(it); ) { + ccb_ast_t* value = ccb_list_iterator_next(it); + bool spare = false; + + if (callconv != 101 && value->ctype->type == CCB_TYPE_FLOAT) { + ccb_target_gen_push_xmm(regx++); + } + else if (callconv != 101 && (value->ctype->type == CCB_TYPE_DOUBLE || value->ctype->type == CCB_TYPE_LDOUBLE)) { + ccb_target_gen_push_xmm(regx++); + } + else { // TODO: Support for 64-bit values on 32-bit architectures + if (callconv != 101 && regi < ccb_target_callregisters(ccb)) { + ccb_target_gen_push(ccb_target_callregister(ccb, regi++)); + } else { + spare = true; + } + } + if (spare){ + value->variable.off = spareoffset; + spareoffset += ccb_target_gen_alignment(ccb, value->ctype->size, 8); + } else { + offset -= ccb_target_gen_alignment(ccb, value->ctype->size, 8); + value->variable.off = offset; + } + } + + int localdata = 0; + for (ccb_list_iterator_t* it = ccb_list_iterator(ast->function.locals); !ccb_list_iterator_end(it); ) { + ccb_ast_t* value = ccb_list_iterator_next(it); + int inneroffset = ccb_target_gen_alignment(ccb, value->ctype->size, 8); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1 || ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC || (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("; var offset %d size %d <- offset %d ldata %d", inneroffset, value->ctype->size, offset, localdata); + } + else { + ccb_target_gen_emit("# var offset %d size %d <- offset %d ldata %d", inneroffset, value->ctype->size, offset, localdata); + } + offset -= inneroffset; + value->variable.off = offset; + localdata -= inneroffset; + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1 || ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC || (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("; var offset %d size %d -> offset %d ldata %d", inneroffset, value->ctype->size, offset, localdata); + } + else { + ccb_target_gen_emit("# var offset %d size %d -> offset %d ldata %d", inneroffset, value->ctype->size, offset, localdata); + } + } + + /*while ((localdata % 16) != 0) { + localdata--; + }*/ + + if (localdata) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_emit("addiu sp, %d", localdata); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("subrc r4, %d", -localdata); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + //ccb_target_gen_emit("addi sp, sp, %d", localdata); + ccb_target_gen_rv_addi(ccb, "sp", "sp", localdata); + } + else if ((ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("sub rsp, %d", -localdata); + } + else { + ccb_target_gen_emit("sub $%d, %%rsp", -localdata); + } + //ccb_target_gen_stack += -localdata; // XXX TODO: I don't know *why* this fixes most of the test cases, it just does. -Zak. + } + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1 || ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC || (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("; stack before: %d", ccb_target_gen_stack); + } + else { + ccb_target_gen_emit("# stack before: %d", ccb_target_gen_stack); + } + ccb_target_gen_stack += -localdata;//-(offset - 8); + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1 || ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC || (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("; stack after: %d", ccb_target_gen_stack); + } + else { + ccb_target_gen_emit("# stack after: %d", ccb_target_gen_stack); + } +} + +static void ccb_target_gen_function_epilogue(ccb_t* ccb) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("TODO"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_pop("TODO"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("li a0, 0"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_ARM) { + ccb_target_gen_emit("TODO"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW) { + ccb_target_gen_emit("TODO"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + ccb_target_gen_emit("mov rax, 0"); + } + else { + ccb_target_gen_emit("mov $0, %%rax"); + } + + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GEN1) { + ccb_target_gen_pop("$rbase"); + ccb_target_gen_pop("$rlink"); + ccb_target_gen_emit("bto $rlink, $rlink, $rlink"); + /*ccb_target_gen_emit("addimm $sp, $fp, 8"); + ccb_target_gen_emit("move sp, fp"); + ccb_target_gen_emit("loadw fp, sp, 4"); + ccb_target_gen_emit("addiu sp, sp, 8"); + ccb_target_gen_emit("jr ra"); + ccb_target_gen_emit("nop");*/ + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC) { + ccb_target_gen_emit("setrr r4, r5"); + ccb_target_gen_emit("popr r5"); + ccb_target_gen_emit("return"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_RISCV) { + ccb_target_gen_emit("addi sp, x8, 0"); + ccb_target_gen_pop(ccb_target_bp(ccb)); + ccb_target_gen_pop("ra"); + ccb_target_gen_emit("ret"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_ARM) { + ccb_target_gen_emit("ldp x29, x30, [sp], #32"); + //ccb_target_gen_emit("add sp, sp, 16"); + ccb_target_gen_emit("ret"); + } + else if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_X86 && ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_RAW) { + ccb_target_gen_emit("data8 0xC9 ; leave"); + ccb_target_gen_emit("data8 0xC3 ; ret"); + } + else { + ccb_target_gen_emit("leave"); + ccb_target_gen_emit("ret"); + } +} + +/* This is called to predaclare any functions as globals if needed. The purpose is mostly for + * assemblers which require externs to be declared: If a symbol isn't declared then it should + * be externed. + * + * TODO: Extra care may be needed for extern variables, and for other specialised linkage cases (especially on Windows). + */ +void ccb_target_gen_declfunction(ccb_t* ccb, ccb_ast_t* ast) { + ccb_ast_setpos(ccb, ast); + if (ast->type == CCB_AST_TYPE_FUNCTION) { + if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM) { + if (ccb_target_binfmt(ccb) == CCB_TARGET_BINFMT_FLAT) { + ccb_target_gen_emit_inline("; [text section]"); + ccb_target_gen_emit_inline("; [public %s]", ast->function.name); + } + else { + //ccb_target_gen_emit_inline("section '.text' executable"); + ccb_target_gen_emit_inline("public %s", ast->function.name); + } + } + else if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM) { + if (ccb_target_binfmt(ccb) == CCB_TARGET_BINFMT_FLAT) { + ccb_target_gen_emit_inline("; [text section]"); + ccb_target_gen_emit_inline("; [public %s]", ast->function.name); + } + else { + //ccb_target_gen_emit_inline("section .text"); + ccb_target_gen_emit_inline("global %s", ast->function.name); + } + } + + /* Set it as predeclared in the list. */ + ccb_extrastage_setpredeclared(ccb, ast->function.name); + } + else if (ast->type == CCB_AST_TYPE_DECLARATION) { + // Should we do any extra checks here?ccb_target_gen_global(ccb, ast); + } + else { + ccb_compile_error(ccb, "ICE"); + } +} +/* This is used to predeclare any explicit "extern" symbols. */ +void ccb_target_gen_declextern(ccb_t* ccb, const char* name) { + /* If something is declared as extern and then declared again, we skip the extern declaration. */ + if (ccb_extrastage_ispredeclared(ccb, name)) { + return; + } + if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM) { + if (ccb_target_binfmt(ccb) == CCB_TARGET_BINFMT_FLAT) { + ccb_target_gen_emit_inline("; [text section]"); + ccb_target_gen_emit_inline("; [extrn %s]", name); + } + else { + ccb_target_gen_emit_inline("extrn %s", name); + } + } + else if (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM) { + if (ccb_target_binfmt(ccb) == CCB_TARGET_BINFMT_FLAT) { + ccb_target_gen_emit_inline("; [extern %s]", name); + } + else { + ccb_target_gen_emit_inline("extern %s", name); + } + } + + /* Set it as predeclared in the list. */ + ccb_extrastage_setpredeclared(ccb, name); +} + +void ccb_target_gen_function(ccb_t* ccb, ccb_ast_t* ast) { + ccb_ast_setpos(ccb, ast); + ccb_target_gen_stack = 0; + if (ast->type == CCB_AST_TYPE_FUNCTION) { + //printf("Producing function '%s'...\n", ast->function.name); + /*if (!ast->function.isnaked)*/ + ccb_target_gen_function_prologue(ccb, ast); + ccb_target_gen_expression(ccb, ast->function.body); + if (!ast->ctype->isnaked) ccb_target_gen_function_epilogue(ccb); + } + else if (ast->type == CCB_AST_TYPE_DECLARATION) { + ccb_target_gen_global(ccb, ast); + } + else { + ccb_compile_error(ccb, "ICE"); + } + if (ccb_target_gen_stack > 8) { + if (ccb_target_family(ccb) == CCB_ARCH_FAMILY_GENERIC || (ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_FASM || ccb_target_asmfmt(ccb) == CCB_TARGET_ASMFMT_NASM)) { + fprintf(ccb->output, ";; stack is misaligned by %d bytes [this warning may be outdated]\n", ccb_target_gen_stack); + } + else { + fprintf(ccb->output, "## stack is misaligned by %d bytes [this warning may be outdated]\n", ccb_target_gen_stack); + } + } +} + + +/* From ifdef of CCBGENERIC_IMPLEMENTATION: */ +#endif + +/* From ifndef at top of file: */ +#endif diff --git a/cpp.h b/cpp.h new file mode 100644 index 0000000..64a8cf5 --- /dev/null +++ b/cpp.h @@ -0,0 +1,5368 @@ +/* NOTE FROM ZAK: This is based on the "public domain" Decus C Preprocessor code + * The original code was downloaded from https://github.com/alexyangfox/public-domain/tree/decuscpp/decuscpp + * (however, it has been modified to fit, so any bugs may be my own!) + * + * RECENT CHANGES: + * - Made compilable/integrated with my new C compiler (work-in-progress, still a bit sketchy at time of writing) + * - Added support for vararg macros, e.g. #define LOG(...) fprintf(stderr, __VA_ARGS__) + * - Added support for ("C++ style?") line comments + * - Added support for DOS-style (CR+LF) line terminators + * - Changed IDMAX to 666 + */ +#define IDMAX 666 +/* + * I n t e r n a l D e f i n i t i o n s f o r C P P + * + * In general, definitions in this file should not be changed. + */ + + /* + * S y s t e m D e p e n d e n t + * D e f i n i t i o n s f o r C P P + * + * Definitions in this file may be edited to configure CPP for particular + * host operating systems and target configurations. + * + * NOTE: cpp assumes it is compiled by a compiler that supports macros + * with arguments. If this is not the case (as for Decus C), #define + * nomacarg -- and provide function equivalents for all macros. + * + * cpp also assumes the host and target implement the Ascii character set. + * If this is not the case, you will have to do some editing here and there. + */ + + /* + * This redundant definition of TRUE and FALSE works around + * a limitation of Decus C. + */ +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif + + /* + * Define the HOST operating system. This is needed so that + * cpp can use appropriate filename conventions. + */ +#define SYS_UNKNOWN 0 +#define SYS_UNIX 1 +#define SYS_VMS 2 +#define SYS_RSX 3 +#define SYS_RT11 4 +#define SYS_LATTICE 5 +#define SYS_ONYX 6 +#define SYS_68000 7 + +#ifndef HOST +#ifdef unix +#define HOST SYS_UNIX +#else +#ifdef vms +#define HOST SYS_VMS +#else +#ifdef rsx +#define HOST SYS_RSX +#else +#ifdef rt11 +#define HOST SYS_RT11 +#else +#define HOST SYS_UNIX +#endif +#endif +#endif +#endif +#endif + +#ifndef HOST +#define HOST SYS_UNKNOWN +#endif + + /* + * We assume that the target is the same as the host system + */ +#ifndef TARGET +#define TARGET HOST +#endif + + /* + * In order to predefine machine-dependent constants, + * several strings are defined here: + * + * MACHINE defines the target cpu (by name) + * SYSTEM defines the target operating system + * COMPILER defines the target compiler + * + * The above may be #defined as "" if they are not wanted. + * They should not be #defined as NULL. + * + * LINE_PREFIX defines the # output line prefix, if not "line" + * This should be defined as "" if cpp is to replace + * the "standard" C pre-processor. + * + * FILE_LOCAL marks functions which are referenced only in the + * file they reside. Some C compilers allow these + * to be marked "static" even though they are referenced + * by "extern" statements elsewhere. + * + * OK_DOLLAR Should be set TRUE if $ is a valid alphabetic character + * in identifiers (default), or zero if $ is invalid. + * Default is TRUE. + * + * OK_CONCAT Should be set TRUE if # may be used to concatenate + * tokens in macros (per the Ansi Draft Standard) or + * FALSE for old-style # processing (needed if cpp is + * to process assembler source code). + * + * OK_DATE Predefines the compilation date if set TRUE. + * Not permitted by the Nov. 12, 1984 Draft Standard. + * + * S_CHAR etc. Define the sizeof the basic TARGET machine word types. + * By default, sizes are set to the values for the HOST + * computer. If this is inappropriate, see the code in + * cpp3.c for details on what to change. Also, if you + * have a machine where sizeof (signed int) differs from + * sizeof (unsigned int), you will have to edit code and + * tables in cpp3.c (and extend the -S option definition.) + * + * CPP_LIBRARY May be defined if you have a site-specific include directory + * which is to be searched *before* the operating-system + * specific directories. + */ + +#if TARGET == SYS_LATTICE + /* + * We assume the operating system is pcdos for the IBM-PC. + * We also assume the small model (just like the PDP-11) + */ +#define MACHINE "i8086" +#define SYSTEM "pcdos" +#endif + +#if TARGET == SYS_ONYX +#define MACHINE "z8000" +#define SYSTEM "unix" +#endif + +#if TARGET == SYS_VMS +#define MACHINE "vax" +#define SYSTEM "vms" +#define COMPILER "vax11c" +#endif + +#if TARGET == SYS_RSX +#define MACHINE "pdp11" +#define SYSTEM "rsx" +#define COMPILER "decus" +#endif + +#if TARGET == SYS_RT11 +#define MACHINE "pdp11" +#define SYSTEM "rt11" +#define COMPILER "decus" +#endif + +#if TARGET == SYS_68000 || defined(M68000) || defined(m68000) || defined(m68k) + /* + * All three machine designators have been seen in various systems. + * Warning -- compilers differ as to sizeof (int). cpp3 assumes that + * sizeof (int) == 2 + */ +#define MACHINE "M68000", "m68000", "m68k" +#define SYSTEM "unix" +#endif + +#if TARGET == SYS_UNIX +#define SYSTEM "unix" +#ifdef pdp11 +#define MACHINE "pdp11" +#endif +#ifdef vax +#define MACHINE "vax" +#endif +#endif + + /* + * defaults + */ + +#ifndef MSG_PREFIX +#define MSG_PREFIX "cpp: " +#endif + +#ifndef LINE_PREFIX +#define LINE_PREFIX "" +#endif + + /* + * OLD_PREPROCESSOR forces the definition of OK_DOLLAR, OK_CONCAT, + * COMMENT_INVISIBLE, and STRING_FORMAL to values appropriate for + * an old-style preprocessor. + */ + +#ifndef OLD_PREPROCESSOR +#define OLD_PREPROCESSOR FALSE +#endif + +#if OLD_PREPROCESSOR +#define OK_DOLLAR FALSE +#define OK_CONCAT TRUE +#define COMMENT_INVISIBLE TRUE +#define STRING_FORMAL TRUE +#define IDMAX 63 /* actually, seems to be unlimited */ +#endif + + /* + * RECURSION_LIMIT may be set to -1 to disable the macro recursion test. + */ +#ifndef RECURSION_LIMIT +#define RECURSION_LIMIT 1000 +#endif + + /* + * BITS_CHAR may be defined to set the number of bits per character. + * it is needed only for multi-byte character constants. + */ +#ifndef BITS_CHAR +#define BITS_CHAR 8 +#endif + + /* + * BIG_ENDIAN is set TRUE on machines (such as the IBM 360 series) + * where 'ab' stores 'a' in the high-bits and 'b' in the low-bits. + * It is set FALSE on machines (such as the PDP-11 and Vax-11) + * where 'ab' stores 'a' in the low-bits and 'b' in the high-bits. + * (Or is it the other way around?) -- Warning: BIG_ENDIAN code is untested. + */ +#ifndef BIG_ENDIAN +#define BIG_ENDIAN FALSE +#endif + + /* + * COMMENT_INVISIBLE may be defined to allow "old-style" comment + * processing, whereby the comment becomes a zero-length token + * delimiter. This permitted tokens to be concatenated in macro + * expansions. This was removed from the Draft Ansi Standard. + */ +#ifndef COMMENT_INVISIBLE +#define COMMENT_INVISIBLE FALSE +#endif + + /* + * STRING_FORMAL may be defined to allow recognition of macro parameters + * anywhere in replacement strings. This was removed from the Draft Ansi + * Standard and a limited recognition capability added. + */ +#ifndef STRING_FORMAL +#define STRING_FORMAL FALSE +#endif + + /* + * OK_DOLLAR enables use of $ as a valid "letter" in identifiers. + * This is a permitted extension to the Ansi Standard and is required + * for e.g., VMS, RSX-11M, etc. It should be set FALSE if cpp is + * used to preprocess assembler source on Unix systems. OLD_PREPROCESSOR + * sets OK_DOLLAR FALSE for that reason. + */ +#ifndef OK_DOLLAR +#define OK_DOLLAR TRUE +#endif + + /* + * OK_CONCAT enables (one possible implementation of) token concatenation. + * If cpp is used to preprocess Unix assembler source, this should be + * set FALSE as the concatenation character, #, is used by the assembler. + */ +#ifndef OK_CONCAT +#define OK_CONCAT TRUE +#endif + + /* + * OK_DATE may be enabled to predefine today's date as a string + * at the start of each compilation. This is apparently not permitted + * by the Draft Ansi Standard. + */ +#ifndef OK_DATE +#define OK_DATE TRUE +#endif + + /* + * Some common definitions. + */ + +#ifndef DEBUG +#define DEBUG FALSE +#endif + + /* + * The following definitions are used to allocate memory for + * work buffers. In general, they should not be modified + * by implementors. + * + * PAR_MAC The maximum number of #define parameters (31 per Standard) + * Note: we need another one for strings. + * IDMAX The longest identifier, 31 per Ansi Standard + * NBUFF Input buffer size + * NWORK Work buffer size -- the longest macro + * must fit here after expansion. + * NEXP The nesting depth of #if expressions + * NINCLUDE The number of directories that may be specified + * on a per-system basis, or by the -I option. + * BLK_NEST The number of nested #if's permitted. + */ + +#ifndef IDMAX +#define IDMAX 31 +#endif +#define PAR_MAC (31 + 1) +#define NBUFF (4096*8) +#define NWORK (4096*8) +#define NEXP 128 +#define NINCLUDE 7 +#define NPARMWORK (NWORK * 2) +#define BLK_NEST 32 + + /* + * Some special constants. These may need to be changed if cpp + * is ported to a wierd machine. + * + * NOTE: if cpp is run on a non-ascii machine, ALERT and VT may + * need to be changed. They are used to implement the proposed + * ANSI standard C control characters '\a' and '\v' only. + * DEL is used to tag macro tokens to prevent #define foo foo + * from looping. Note that we don't try to prevent more elaborate + * #define loops from occurring. + */ + +#ifndef ALERT +#define ALERT '\007' /* '\a' is "Bell" */ +#endif + +#ifndef VT +#define VT '\013' /* Vertical Tab CTRL/K */ +#endif + + +#ifndef FILE_LOCAL +#ifdef decus +#define FILE_LOCAL static +#else +#ifdef vax11c +#define FILE_LOCAL static +#else +#define FILE_LOCAL /* Others are global */ +#endif +#endif +#endif + + + +// ifndef TRUE; +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif +#ifndef EOS + /* + * This is predefined in Decus C + */ +#define EOS 0 //'\0' /* End of string */ +#endif +#define EOF_CHAR 0 /* Returned by get() on eof */ +#define NULLST ((char *) NULL) /* Pointer to nowhere (linted) */ +#define DEF_NOARGS (-1) /* #define foo vs #define foo() */ + + /* + * The following may need to change if the host system doesn't use ASCII. + */ +#define DEF_MAGIC 0x1D /* Magic for #defines */ +#define TOK_SEP 0x1E /* Token concatenation delim. */ +#define COM_SEP 0x1F /* Magic comment separator */ + + /* + * Note -- in Ascii, the following will map macro formals onto DEL + the + * C1 control character region (decimal 128 .. (128 + PAR_MAC)) which will + * be ok as long as PAR_MAC is less than 33). Note that the last PAR_MAC + * value is reserved for string substitution. + */ + +#define MAC_PARM 0x7F /* Macro formals start here */ +#if PAR_MAC >= 33 +assertion fails -- PAR_MAC isn't less than 33 +#endif +#define LASTPARM (PAR_MAC - 1) + +/* + * Character type codes. + */ + +#define INV 0 /* Invalid, must be zero */ +#define OP_EOE INV /* End of expression */ +#define DIG 1 /* Digit */ +#define LET 2 /* Identifier start */ +#define FIRST_BINOP OP_ADD +#define OP_ADD 3 +#define OP_SUB 4 +#define OP_MUL 5 +#define OP_DIV 6 +#define OP_MOD 7 +#define OP_ASL 8 +#define OP_ASR 9 +#define OP_AND 10 /* &, not && */ +#define OP_OR 11 /* |, not || */ +#define OP_XOR 12 +#define OP_EQ 13 +#define OP_NE 14 +#define OP_LT 15 +#define OP_LE 16 +#define OP_GE 17 +#define OP_GT 18 +#define OP_ANA 19 /* && */ +#define OP_ORO 20 /* || */ +#define OP_QUE 21 /* ? */ +#define OP_COL 22 /* : */ +#define OP_CMA 23 /* , (relevant?) */ +#define LAST_BINOP OP_CMA /* Last binary operand */ + /* + * The following are unary. + */ +#define FIRST_UNOP OP_PLU /* First Unary operand */ +#define OP_PLU 24 /* + (draft ANSI standard) */ +#define OP_NEG 25 /* - */ +#define OP_COM 26 /* ~ */ +#define OP_NOT 27 /* ! */ +#define LAST_UNOP OP_NOT +#define OP_LPA 28 /* ( */ +#define OP_RPA 29 /* ) */ +#define OP_END 30 /* End of expression marker */ +#define OP_MAX (OP_END + 1) /* Number of operators */ +#define OP_FAIL (OP_END + 1) /* For error returns */ + + /* + * The following are for lexical scanning only. + */ + +#define QUO 65 /* Both flavors of quotation */ +#define DOT 66 /* . might start a number */ +#define SPA 67 /* Space and tab */ +#define BSH 68 /* Just a backslash */ +#define END 69 /* EOF */ + + /* + * These bits are set in ifstack[] + */ +#define WAS_COMPILING 1 /* TRUE if compile set at entry */ +#define ELSE_SEEN 2 /* TRUE when #else processed */ +#define TRUE_SEEN 4 /* TRUE when #if TRUE processed */ + + /* + * Define bits for the basic types and their adjectives + */ + +#define T_CHAR 1 +#define T_INT 2 +#define T_FLOAT 4 +#define T_DOUBLE 8 +#define T_SHORT 16 +#define T_LONG 32 +#define T_SIGNED 64 +#define T_UNSIGNED 128 +#define T_PTR 256 /* Pointer */ +#define T_FPTR 512 /* Pointer to functions */ + + /* + * The DEFBUF structure stores information about #defined + * macros. Note that the defbuf->repl information is always + * in malloc storage. + */ + + typedef struct defbuf { + struct defbuf* link; /* Next define in chain */ + char* repl; /* -> replacement */ + int hash; /* Symbol table hash */ + int nargs; /* For define(args) */ + char name[1]; /* #define name */ +} DEFBUF; + +/* + * The FILEINFO structure stores information about open files + * and macros being expanded. + */ + +typedef struct fileinfo { + char* bptr; /* Buffer pointer */ + int line; /* for include or macro */ + FILE* fp; /* File if non-null */ + struct fileinfo* parent; /* Link to includer */ + char* filename; /* File/macro name */ + char* progname; /* From #line statement */ + unsigned int unrecur; /* For macro recursion */ + char buffer[1]; /* current input line */ +} FILEINFO; + +/* + * The SIZES structure is used to store the values for #if sizeof + */ + +typedef struct sizes { + short bits; /* If this bit is set, */ + short size; /* this is the datum size value */ + short psize; /* this is the pointer size */ +} SIZES; +/* + * nomacarg is a built-in #define on Decus C. + */ + +#ifdef nomacarg +#define cput output /* cput concatenates tokens */ +#else +#if COMMENT_INVISIBLE +#define cput(c) { if (c != TOK_SEP && c != COM_SEP) putchar(c); } +#else +#define cput(c) { if (c != TOK_SEP) putchar(c); } +#endif +#endif + +#ifndef nomacarg +#define streq(s1, s2) (strcmp(s1, s2) == 0) +#endif + + /* + * Error codes. VMS uses system definitions. + * Decus C codes are defined in stdio.h. + * Others are cooked to order. + */ + +#if HOST == SYS_VMS +#include +#include +#define IO_NORMAL (SS$_NORMAL | STS$M_INHIB_MSG) +#define IO_ERROR SS$_ABORT +#endif + /* + * Note: IO_NORMAL and IO_ERROR are defined in the Decus C stdio.h file + */ +#ifndef IO_NORMAL +#define IO_NORMAL 0 +#endif +#ifndef IO_ERROR +#define IO_ERROR 1 +#endif + + /* + * Externs + */ + +extern int line; /* Current line number */ +extern int wrongline; /* Force #line to cc pass 1 */ +extern char type[]; /* Character classifier */ +extern char token[IDMAX + 1]; /* Current input token */ +extern int instring; /* TRUE if scanning string */ +extern int inmacro; /* TRUE if scanning #define */ +extern int errors; /* Error counter */ +extern int recursion; /* Macro depth counter */ +extern char ifstack[BLK_NEST]; /* #if information */ +#define compiling ifstack[0] +extern char* ifptr; /* -> current ifstack item */ +extern char* incdir[NINCLUDE]; /* -i directories */ +extern char** incend; /* -> active end of incdir */ +extern int cflag; /* -C option (keep comments) */ +extern int eflag; /* -E option (ignore errors) */ +extern int nflag; /* -N option (no pre-defines) */ +extern int rec_recover; /* unwind recursive macros */ +extern char* preset[]; /* Standard predefined symbols */ +extern char* magic[]; /* Magic predefined symbols */ +extern FILEINFO* infile; /* Current input file */ +extern char work[NWORK + 1]; /* #define scratch */ +extern char* workp; /* Free space in work */ +#if DEBUG +extern int debug; /* Debug level */ +#endif +extern int keepcomments; /* Don't remove comments if set */ +extern SIZES size_table[]; /* For #if sizeof sizes */ +extern char* getmem(); /* Get memory or die. */ +extern DEFBUF* lookid(); /* Look for a #define'd thing */ +extern DEFBUF* defendel(); /* Symbol table enter/delete */ +extern char* savestring(); /* Stuff string in malloc mem. */ +#include // Change from Zak, just include standard versions... +#include +#include +#include +//extern char* strcpy(); +//extern char* strcat(); +//extern char* strrchr(); +//extern char* strchr(); +//extern long time(); +//extern char* sprintf(); /* Lint needs this */ + +#ifdef CPP_IMPLEMENTATION +/* Originally from cpp1.c: */ +/* + * CPP main program. + * + * Edit history + * 21-May-84 MM "Field test" release + * 23-May-84 MM Some minor hacks. + * 30-May-84 ARF Didn't get enough memory for __DATE__ + * Added code to read stdin if no input + * files are provided. + * 29-Jun-84 MM Added ARF's suggestions, Unixifying cpp. + * 11-Jul-84 MM "Official" first release (that's what I thought!) + * 22-Jul-84 MM/ARF/SCK Fixed line number bugs, added cpp recognition + * of #line, fixed problems with #include. + * 23-Jul-84 MM More (minor) include hacking, some documentation. + * Also, redid cpp's #include files + * 25-Jul-84 MM #line filename isn't used for #include searchlist + * #line format is + * 25-Jul-84 ARF/MM Various bugs, mostly serious. Removed homemade doprint + * 01-Aug-84 MM Fixed recursion bug, remove extra newlines and + * leading whitespace from cpp output. + * 02-Aug-84 MM Hacked (i.e. optimized) out blank lines and unneeded + * whitespace in general. Cleaned up unget()'s. + * 03-Aug-84 Keie Several bug fixes from Ed Keizer, Vrije Universitet. + * -- corrected arg. count in -D and pre-defined + * macros. Also, allow \n inside macro actual parameter + * lists. + * 06-Aug-84 MM If debugging, dump the preset vector at startup. + * 12-Aug-84 MM/SCK Some small changes from Sam Kendall + * 15-Aug-84 Keie/MM cerror, cwarn, etc. take a single string arg. + * cierror, etc. take a single int. arg. + * changed LINE_PREFIX slightly so it can be + * changed in the makefile. + * 31-Aug-84 MM USENET net.sources release. + * 7-Sep-84 SCH/ado Lint complaints + * 10-Sep-84 Keie Char's can't be signed in some implementations + * 11-Sep-84 ado Added -C flag, pathological line number fix + * 13-Sep-84 ado Added -E flag (does nothing) and "-" file for stdin. + * 14-Sep-84 MM Allow # 123 as a synonym for #line 123 + * 19-Sep-84 MM scanid always reads to token, make sure #line is + * written to a new line, even if -C switch given. + * Also, cpp - - reads stdin, writes stdout. + * 03-Oct-84 ado/MM Several changes to line counting and keepcomments + * stuff. Also a rewritten control() hasher -- much + * simpler and no less "perfect". Note also changes + * in cpp3.c to fix numeric scanning. + * 04-Oct-84 MM Added recognition of macro formal parameters if + * they are the only thing in a string, per the + * draft standard. + * 08-Oct-84 MM One more attack on scannumber + * 15-Oct-84 MM/ado Added -N to disable predefined symbols. Fixed + * linecount if COMMENT_INVISIBLE enabled. + * 22-Oct-84 MM Don't evaluate the #if/#ifdef argument if + * compilation is supressed. This prevents + * unnecessary error messages in sequences such as + * #ifdef FOO -- undefined + * #if FOO == 10 -- shouldn't print warning + * 25-Oct-84 MM Fixed bug in false ifdef supression. On vms, + * #include should open foo.h -- this duplicates + * the behavior of Vax-C + * 31-Oct-84 ado/MM Parametized $ in indentifiers. Added a better + * token concatenator and took out the trial + * concatenation code. Also improved #ifdef code + * and cleaned up the macro recursion tester. + * 2-Nov-84 MM/ado Some bug fixes in token concatenation, also + * a variety of minor (uninteresting) hacks. + * 6-Nov-84 MM Happy Birthday. Broke into 4 files and added + * #if sizeof (basic_types) + * 9-Nov-84 MM Added -S* for pointer type sizes + * 13-Nov-84 MM Split cpp1.c, added vms defaulting + * 23-Nov-84 MM/ado -E supresses error exit, added CPP_INCLUDE, + * fixed strncpy bug. + * 3-Dec-84 ado/MM Added OLD_PREPROCESSOR + * 7-Dec-84 MM Stuff in Nov 12 Draft Standard + * 17-Dec-84 george Fixed problems with recursive macros + * 17-Dec-84 MM Yet another attack on #if's (f/t)level removed. + * 07-Jan-85 ado Init defines before doing command line options + * so -Uunix works. + */ + + /*)BUILD + $(PROGRAM) = cpp + $(FILES) = { cpp1 cpp2 cpp3 cpp4 cpp5 cpp6 } + $(INCLUDE) = { cppdef.h cpp.h } + $(STACK) = 2000 + $(TKBOPTIONS) = { + STACK = 2000 + } + */ + +#ifdef DOCUMENTATION + +title cpp C Pre - Processor +index C pre - processor + +synopsis +.s.nf +cpp[-options][infile[outfile]] +.s.f +description + +CPP reads a C source file, expands macrosand include +files, and writes an input file for the C compiler. +If no file arguments are given, CPP reads from stdin +and writes to stdout.If one file argument is given, +it will define the input file, while two file arguments +define both inputand output files.The file name "-" +is a synonym for stdin or stdout as appropriate. + +The following options are supported.Options may +be given in either case. +.lm + 16 +.p - 16 +- C If set, source - file comments are written +to the output file.This allows the output of CPP to be +used as the input to a program, such as lint, that expects +commands embedded in specially - formatted comments. +.p - 16 +- Dname = value Define the name as if the programmer wrote + +#define name value + +at the start of the first file.If "=value" is not +given, a value of "1" will be used. + +On non - unix systems, all alphabetic text will be forced +to upper - case. +.p - 16 +- E Always return "success" to the operating +system, even if errors were detected.Note that some fatal +errors, such as a missing #include file, will terminate +CPP, returning "failure" even if the - E option is given. +.p - 16 +- Idirectory Add this directory to the list of +directories searched for #include "..." and #include <...> +commands.Note that there is no space between the +"-I" and the directory string.More than one - I command +is permitted.On non - Unix systems "directory" is forced +to upper - case. +.p - 16 +- N CPP normally predefines some symbols defining +the target computer and operating system.If - N is specified, +no symbols will be predefined.If - N - N is specified, the +"always present" symbols, __LINE__, __FILE__, and __DATE__ +are not defined. +.p - 16 +- Stext CPP normally assumes that the size of +the target computer's basic variable types is the same as the size +of these types of the host computer. (This can be overridden + when CPP is compiled, however.) The - S option allows dynamic + respecification of these values. "text" is a string of + numbers, separated by commas, that specifies correct sizes. + The sizes must be specified in the exact order : + +char short int long float double + +If you specify the option as "-S*text", pointers to these +types will be specified. - S * takes one additional argument +for pointer to function(e.g. int (*)()) + +For example, to specify sizes appropriate for a PDP - 11, +you would write : + +c s i l f d func +- S1, 2, 2, 2, 4, 8, +-S * 2, 2, 2, 2, 2, 2, 2 + +Note that all values must be specified. +.p - 16 +- Uname Undefine the name as if + +#undef name + +were given.On non - Unix systems, "name" will be forced to +upper - case. +.p - 16 +- Xnumber Enable debugging code.If no value is +given, a value of 1 will be used. (For maintenence of + CPP only.) + .s.lm - 16 + + Pre - Defined Variables + + When CPP begins processing, the following variables will + have been defined(unless the - N option is specified) : + .s + Target computer(as appropriate) : + .s + pdp11, vax, M68000 m68000 m68k + .s + Target operating system(as appropriate) : + .s + rsx, rt11, vms, unix + .s + Target compiler(as appropriate) : + .s + decus, vax11c + .s + The implementor may add definitions to this list. + The default definitions match the definition of the + host computer, operating system, and C compiler. + .s + The following are always available unless undefined(or + -N was specified twice) : + .lm + 16 + .p - 12 + __FILE__ The input(or #include) file being compiled + (as a quoted string). + .p - 12 + __LINE__ The line number being compiled. + .p - 12 + __DATE__ The date and time of compilation as + a Unix ctime quoted string(the trailing newline is removed). + Thus, + .s + printf("Bug at line %s,", __LINE__); +printf(" source file %s", __FILE__); +printf(" compiled on %s", __DATE__); +.s.lm - 16 + +Draft Proposed Ansi Standard Considerations + +The current version of the Draft Proposed Standard +explicitly states that "readers are requested not to specify +or claim conformance to this draft." Readers and users +of Decus CPP should not assume that Decus CPP conforms +to the standard, or that it will conform to the actual +C Language Standard. + +When CPP is itself compiled, many features of the Draft +Proposed Standard that are incompatible with existing +preprocessors may be disabled.See the comments in CPP's +source for details. + +The latest version of the Draft Proposed Standard(as reflected + in Decus CPP) is dated November 12, 1984. + + Comments are removed from the input text.The comment + is replaced by a single space character.The - C option + preserves comments, writing them to the output file. + + The '$' character is considered to be a letter.This is + a permitted extension. + + The following new features of C are processed by CPP : +.s.comment Note : significant spaces, not tabs, .br quotes #if, #elif +.br; ####_#elif expression(_#else _#if) +.br; ####'_\xNNN' (Hexadecimal constant) +.br; ####'_\a' (Ascii BELL) +.br; ####'_\v' (Ascii Vertical Tab) +.br; ####_#if defined NAME 1 if defined, 0 if not +.br; ####_#if defined(NAME) 1 if defined, 0 if not +.br; ####_#if sizeof(basic type) +.br; ####unary + +.br; ####123U, 123LU Unsigned ints and longs. +.br; ####12.3L Long double numbers +.br; ####token_#token Token concatenation +.br; ####_#include token Expands to filename + +The Draft Proposed Standard has extended C, adding a constant +string concatenation operator, where + +"foo" "bar" + +is regarded as the single string "foobar". (This does not + affect CPP's processing but does permit a limited form of + macro argument substitution into strings as will be discussed.) + + The Standard Committee plans to add token concatenation + to #define command lines.One suggested implementation + is as follows : the sequence "Token1#Token2" is treated + as if the programmer wrote "Token1Token2".This could + be used as follows : + +#line 123 +#define ATLINE foo#__LINE__ + +ATLINE would be defined as foo123. + +Note that "Token2" must either have the format of an +identifier or be a string of digits.Thus, the string + +#define ATLINE foo#1x3 + +generates two tokens : "foo1" and "x3". + +If the tokens T1 and T2 are concatenated into T3, +this implementation operates as follows : + +1. Expand T1 if it is a macro. +2. Expand T2 if it is a macro. +3. Join the tokens, forming T3. +4. Expand T3 if it is a macro. + +A macro formal parameter will be substituted into a string +or character constant if it is the only component of that +constant : + +#define VECSIZE 123 +#define vprint(name, size) \ + printf("name" "[" "size" "] = {\n") +... vprint(vector, VECSIZE); + +expands(effectively) to + +vprint("vector[123] = {\n"); + +Note that this will be useful if your C compiler supports +the new string concatenation operation noted above. +As implemented here, if you write + +#define string(arg) "arg" +... string("foo") ... + +This implementation generates "foo", rather than the strictly +correct ""foo"" (which will probably generate an error message). +This is, strictly speaking, an error in CPPand may be removed +from future releases. + +error messages + +Many.CPP prints warning or error messages if you try to +use multiple - byte character constants(non - transportable) +if you #undef a symbol that was not defined, or if your +program has potentially nested comments. + +author + +Martin Minow + +bugs + +The #if expression processor uses signed integers only. +I.e, #if 0xFFFFu < 0 may be TRUE. + +#endif + +#include +#include +//#include "cppdef.h" +//#include "cpp.h" + + /* + * Commonly used global variables: + * line is the current input line number. + * wrongline is set in many places when the actual output + * line is out of sync with the numbering, e.g, + * when expanding a macro with an embedded newline. + * + * token holds the last identifier scanned (which might + * be a candidate for macro expansion). + * errors is the running cpp error counter. + * infile is the head of a linked list of input files (extended by + * #include and macros being expanded). infile always points + * to the current file/macro. infile->parent to the includer, + * etc. infile->fd is NULL if this input stream is a macro. + */ + int line; /* Current line number */ +int wrongline; /* Force #line to compiler */ +char token[IDMAX + 1]; /* Current input token */ +int errors; /* cpp error counter */ +FILEINFO* infile = NULL; /* Current input file */ +#if DEBUG +int debug; /* TRUE if debugging now */ +#endif +/* + * This counter is incremented when a macro expansion is initiated. + * If it exceeds a built-in value, the expansion stops -- this tests + * for a runaway condition: + * #define X Y + * #define Y X + * X + * This can be disabled by falsifying rec_recover. (Nothing does this + * currently: it is a hook for an eventual invocation flag.) + */ +int recursion; /* Infinite recursion counter */ +int rec_recover = TRUE; /* Unwind recursive macros */ + +/* + * instring is set TRUE when a string is scanned. It modifies the + * behavior of the "get next character" routine, causing all characters + * to be passed to the caller (except ). Note especially that + * comments and \ are not removed from the source. (This + * prevents cpp output lines from being arbitrarily long). + * + * inmacro is set by #define -- it absorbs comments and converts + * form-feed and vertical-tab to space, but returns \ + * to the caller. Strictly speaking, this is a bug as \ + * shouldn't delimit tokens, but we'll worry about that some other + * time -- it is more important to prevent infinitly long output lines. + * + * instring and inmarcor are parameters to the get() routine which + * were made global for speed. + */ +int instring = FALSE; /* TRUE if scanning string */ +int inmacro = FALSE; /* TRUE if #defining a macro */ + +/* + * work[] and workp are used to store one piece of text in a temporay + * buffer. To initialize storage, set workp = work. To store one + * character, call save(c); (This will fatally exit if there isn't + * room.) To terminate the string, call save(EOS). Note that + * the work buffer is used by several subroutines -- be sure your + * data won't be overwritten. The extra byte in the allocation is + * needed for string formal replacement. + */ +char work[NWORK + 1]; /* Work buffer */ +char* workp; /* Work buffer pointer */ + +/* + * keepcomments is set TRUE by the -C option. If TRUE, comments + * are written directly to the output stream. This is needed if + * the output from cpp is to be passed to lint (which uses commands + * embedded in comments). cflag contains the permanent state of the + * -C flag. keepcomments is always falsified when processing #control + * commands and when compilation is supressed by a false #if + * + * If eflag is set, CPP returns "success" even if non-fatal errors + * were detected. + * + * If nflag is non-zero, no symbols are predefined except __LINE__. + * __FILE__, and __DATE__. If nflag > 1, absolutely no symbols + * are predefined. + */ +int keepcomments = FALSE; /* Write out comments flag */ +int cflag = FALSE; /* -C option (keep comments) */ +int eflag = FALSE; /* -E option (never fail) */ +int nflag = 0; /* -N option (no predefines) */ + +/* + * ifstack[] holds information about nested #if's. It is always + * accessed via *ifptr. The information is as follows: + * WAS_COMPILING state of compiling flag at outer level. + * ELSE_SEEN set TRUE when #else seen to prevent 2nd #else. + * TRUE_SEEN set TRUE when #if or #elif succeeds + * ifstack[0] holds the compiling flag. It is TRUE if compilation + * is currently enabled. Note that this must be initialized TRUE. + */ +char ifstack[BLK_NEST] = { TRUE }; /* #if information */ +char* ifptr = NULL; /*ifstack;*/ /* -> current ifstack[] */ + +/* + * incdir[] stores the -i directories (and the system-specific + * #include <...> directories. + */ +char* incdir[NINCLUDE]; /* -i directories */ +char** incend = NULL; /*incdir;*/ /* -> free space in incdir[] */ + +/* + * This is the table used to predefine target machine and operating + * system designators. It may need hacking for specific circumstances. + * Note: it is not clear that this is part of the Ansi Standard. + * The -N option supresses preset definitions. + */ +char* preset[] = { /* names defined at cpp start */ +/* TODO: Fix this... -Zak +#ifdef MACHINE + MACHINE, +#endif +#ifdef SYSTEM + SYSTEM, +#endif +#ifdef COMPILER + COMPILER, +#endif +#if DEBUG + "decus_cpp",*/ /* Ourselves! */ +/*#endif*/ + NULL /* Must be last */ +}; + +/* + * The value of these predefined symbols must be recomputed whenever + * they are evaluated. The order must not be changed. + */ +char* magic[3];// = { /* Note: order is important */ +// NULL, /*"__LINE__",*/ +// NULL, /*"__FILE__",*/ +// NULL /* Must be last */ +//}; + +char* opname[31]; /* Moved here so it can be set up easily in the main function. */ +int basic_types_init(); +/* +preprocessormain(argc, argv) +int argc; +char* argv[]; +*/ +int preprocessormain(int argc, char **argv) +{ + register int i; + ifptr = ifstack; + incend = incdir; + + + //fprintf(stderr, "Hello from preprocessor\n"); + + magic[0] = "__LINE__"; + magic[1] = "__FILE__"; + magic[3] = NULL; + opname[0] = "end of expression"; + opname[1] = "val"; + opname[2] = "id"; + opname[3] = "+"; + opname[4] = "-"; + opname[5] = "*"; + opname[6] = "/"; + opname[7] = "%"; + opname[8] = "<<"; + opname[9] = ">>"; + opname[10] = "&"; + opname[11] = "|"; + opname[12] = "^"; + opname[13] = "=="; + opname[14] = "!="; + opname[15] = "<"; + opname[16] = "<="; + opname[17] = ">="; + opname[18] = ">"; + opname[19] = "&&"; + opname[20] = "||"; + opname[21] = "?"; + opname[22] = ":"; + opname[23] = ","; + opname[24] = "unary +"; + opname[25] = "unary -"; + opname[26] = "~"; + opname[27] = "!"; + opname[28] = "("; + opname[29] = ")"; + opname[30] = "(none)"; + basic_types_init(); + //fprintf(stderr, "Hello from preprocessor\n"); + +#if HOST == SYS_VMS + argc = getredirection(argc, argv); /* vms >file and stdin */ + /* + * Open input file, "-" means use stdin. + */ + if (!streq(argv[1], "-")) { + if (freopen(argv[1], "r", stdin) == NULL) { + perror(argv[1]); + cerror("Can't open input file \"%s\"", argv[1]); + exit(IO_ERROR); + } + strcpy(work, argv[1]); /* Remember input filename */ + break; + } /* Else, just get stdin */ + case 0: /* No args? */ + case 1: /* No files, stdin -> stdout */ +#if HOST == SYS_UNIX + work[0] = EOS; /* Unix can't find stdin name */ +#else + fgetname(stdin, work); /* Vax-11C, Decus C know name */ +#endif + break; + + default: + exit(IO_ERROR); /* Can't happen */ + } + setincdirs(); /* Setup -I include directories */ + addfile(stdin, work); /* "open" main input file */ +#if DEBUG + if (debug > 0) + dumpdef("preset #define symbols"); +#endif + cppmain(); /* Process main file */ + // TODO: Hack to make comparison compile. -Zak + long long tmpptr1 = (long long) ifptr; + long long tmpptr2 = (long long) ifstack+0; + if ((i = (tmpptr1 - tmpptr2)) != 0) { + //if ((i = (ifptr - &ifstack[0])) != 0) { +#if OLD_PREPROCESSOR + ciwarn("Inside #ifdef block at end of input, depth = %d", i); +#else + cierror("Inside #ifdef block at end of input, depth = %d", i); +#endif + } + fclose(stdout); + if (errors > 0) { + fprintf(stderr, (errors == 1) + ? "%d error in preprocessor\n" + : "%d errors in preprocessor\n", errors); + if (!eflag) + exit(IO_ERROR); + } + exit(IO_NORMAL); /* No errors or -E option set */ + } + +int output(int c); + +FILE_LOCAL +int cppmain() +/* + * Main process for cpp -- copies tokens from the current input + * stream (main file, include file, or a macro) to the output + * file. + */ +{ + register int c; /* Current character */ + register int counter; /* newlines and spaces */ + /*extern int output();*/ /* Output one character */ + + /* + * Explicitly output a #line at the start of cpp output so + * that lint (etc.) knows the name of the original source + * file. If we don't do this explicitly, we may get + * the name of the first #include file instead. + * We also seem to need a blank line following that first #line. + */ + sharp(); + putchar('\n'); + /* + * This loop is started "from the top" at the beginning of each line + * wrongline is set TRUE in many places if it is necessary to write + * a #line record. (But we don't write them when expanding macros.) + * + * The counter variable has two different uses: at + * the start of a line, it counts the number of blank lines that + * have been skipped over. These are then either output via + * #line records or by outputting explicit blank lines. + * When expanding tokens within a line, the counter remembers + * whether a blank/tab has been output. These are dropped + * at the end of the line, and replaced by a single blank + * within lines. + */ + for (;;) { + counter = 0; /* Count empty lines */ + for (;;) { /* For each line, ... */ + while (scantype(c = get()) == SPA) /* Skip leading blanks */ + ; /* in this line. */ + if (c == '\n') /* If line's all blank, */ + ++counter; /* Do nothing now */ + else if (c == '#') { /* Is 1st non-space '#' */ + keepcomments = FALSE; /* Don't pass comments */ + counter = control(counter); /* Yes, do a #command */ + keepcomments = (cflag && compiling); + } + else if (c == EOF_CHAR) /* At end of file? */ + break; + else if (!compiling) { /* #ifdef false? */ + skipnl(); /* Skip to newline */ + counter++; /* Count it, too. */ + } + else { + break; /* Actual token */ + } + } + if (c == EOF_CHAR) /* Exit process at */ + break; /* End of file */ + /* + * If the loop didn't terminate because of end of file, we + * know there is a token to compile. First, clean up after + * absorbing newlines. counter has the number we skipped. + */ + if ((wrongline && infile->fp != NULL) || counter > 4) + sharp(); /* Output # line number */ + else { /* If just a few, stuff */ + while (--counter >= 0) /* them out ourselves */ + putchar('\n'); + } + /* + * Process each token on this line. + */ + unget(); /* Reread the char. */ + for (;;) { /* For the whole line, */ + do { /* Token concat. loop */ + for (counter = 0; (scantype(c = get()) == SPA);) { +#if COMMENT_INVISIBLE + if (c != COM_SEP) + counter++; +#else + counter++; /* Skip over blanks */ +#endif + } + if (c == EOF_CHAR || c == '\n') + goto end_line; /* Exit line loop */ + else if (counter > 0) /* If we got any spaces */ + putchar(' '); /* Output one space */ + c = macroid(c); /* Grab the token */ + } while (scantype(c) == LET && catenate()); + if (c == EOF_CHAR || c == '\n') /* From macro exp error */ + goto end_line; /* Exit line loop */ + switch (scantype(c)) { + case LET: + fputs(token, stdout); /* Quite ordinary token */ + break; + + + case DIG: /* Output a number */ + case DOT: /* Dot may begin floats */ + scannumber(c, &output); + break; + + case QUO: /* char or string const */ + scanstring(c, &output); /* Copy it to output */ + break; + + default: /* Some other character */ + cput(c); /* Just output it */ + break; + } /* Switch ends */ + } /* Line for loop */ + end_line: if (c == '\n') { /* Compiling at EOL? */ + putchar('\n'); /* Output newline, if */ + if (infile->fp == NULL) /* Expanding a macro, */ + wrongline = TRUE; /* Output # line later */ + } + } /* Continue until EOF */ +} + +/* +output(c) +int c; +*/ +int output(int c) +/* + * Output one character to stdout -- output() is passed as an + * argument to scanstring() + */ +{ +#if COMMENT_INVISIBLE + if (c != TOK_SEP && c != COM_SEP) +#else + if (c != TOK_SEP) +#endif + putchar(c); +} + +static char* sharpfilename = NULL; + +FILE_LOCAL +sharp() +/* + * Output a line number line. + */ +{ + register char* name; + + if (keepcomments) /* Make sure # comes on */ + putchar('\n'); /* a fresh, new line. */ + printf("#%s %d", LINE_PREFIX, line); + if (infile->fp != NULL) { + name = (infile->progname != NULL) + ? infile->progname : infile->filename; + if (sharpfilename == NULL + || sharpfilename != NULL && !streq(name, sharpfilename)) { + if (sharpfilename != NULL) + free(sharpfilename); + sharpfilename = savestring(name); + printf(" \"%s\"", name); + } + } + putchar('\n'); + wrongline = FALSE; +} + +/* Originally from cpp2.c: */ +/* + * C P P 2 . C + * + * Process #control lines + * + * Edit history + * 13-Nov-84 MM Split from cpp1.c + */ + +#include +#include +//#include "cppdef.h" +//#include "cpp.h" +#if HOST == SYS_VMS + /* + * Include the rms stuff. (We can't just include rms.h as it uses the + * VaxC-specific library include syntax that Decus CPP doesn't support. + * By including things by hand, we can CPP ourself.) + */ +#include +#include +#include +#include +#endif + + /* + * Generate (by hand-inspection) a set of unique values for each control + * operator. Note that this is not guaranteed to work for non-Ascii + * machines. CPP won't compile if there are hash conflicts. + */ +// TODO/XXX: These values have been hand-simplified to numbers. -Zak +#define L_assert 327 /*('a' + ('s' << 1))*/ +#define L_define 304 /*('d' + ('f' << 1))*/ +#define L_elif 311 /*('e' + ('i' << 1))*/ +#define L_else 331 /*('e' + ('s' << 1))*/ +#define L_endif 301 /*('e' + ('d' << 1))*/ +#define L_if 105 /*('i' + (EOS << 1))*/ +#define L_ifdef 305 /*('i' + ('d' << 1))*/ +#define L_ifndef 325 /*('i' + ('n' << 1))*/ +#define L_import 329 /*('i' + ('p' << 1))*/ /* TODO. -Zak */ +#define L_include 303 /*('i' + ('c' << 1))*/ +#define L_line 328 /*('l' + ('n' << 1))*/ +#define L_nogood 0 /*(EOS + (EOS << 1))*/ /* To catch #i */ +#define L_pragma 306 /*('p' + ('a' << 1))*/ +#define L_undef 317 /*('u' + ('d' << 1))*/ +#define L_warning 347 /*('w' + ('r' << 1))*/ +#define L_error 345 /* we do a special check here because the hash otherwise conflicts with L_import. -Zak.*/ +#if DEBUG +#define L_debug 296 /*('d' + ('b' << 1))*/ /* #debug */ +#define L_nodebug 310 /*('n' + ('d' << 1))*/ /* #nodebug */ +#endif + +/* +int +control(counter) +int counter;*/ /* Pending newline counter */ +int control(int counter) +/* + * Process #control lines. Simple commands are processed inline, + * while complex commands have their own subroutines. + * + * The counter is used to force out a newline before #line, and + * #pragma commands. This prevents these commands from ending up at + * the end of the previous line if cpp is invoked with the -C option. + */ +{ + register int c; + register char* tp; + register int hash; + char* ep; + + c = skipws(); + if (c == '\n' || c == EOF_CHAR) + return (counter + 1); + if (!isdigit(c)) + scanid(c); /* Get #word to token[] */ + else { + unget(); /* Hack -- allow #123 as a */ + strcpy(token, "line"); /* synonym for #line 123 */ + } + hash = (token[1] == EOS) ? L_nogood : (token[0] + (token[2] << 1)); + switch (hash) { + case L_assert: tp = "assert"; break; + case L_define: tp = "define"; break; + case L_elif: tp = "elif"; break; + case L_else: tp = "else"; break; + case L_endif: tp = "endif"; break; + case L_if: tp = "if"; break; + case L_ifdef: tp = "ifdef"; break; + case L_ifndef: tp = "ifndef"; break; + case L_import: tp = "import"; break; + case L_include: tp = "include"; break; + case L_line: tp = "line"; break; + case L_pragma: tp = "pragma"; break; + case L_undef: tp = "undef"; break; + // This is checked separately since the hash would conflict with import: case L_error: tp = "error"; break; + case L_warning: tp = "warning"; break; +#if DEBUG + case L_debug: tp = "debug"; break; + case L_nodebug: tp = "nodebug"; break; +#endif + default: hash = L_nogood; + case L_nogood: tp = ""; break; + } + if (!streq(tp, token)) { + /* Since the hashes for #error and #import would conflict, we do a special + * extra check for whether an L_import hash is actually an L_error. + */ + if (hash == L_import && streq("error", token)) { + hash = L_error; + } else { + hash = L_nogood; + } + } + /* + * hash is set to a unique value corresponding to the + * control keyword (or L_nogood if we think it's nonsense). + */ + if (infile->fp == NULL) + cwarn("Control line \"%s\" within macro expansion", token); + if (!compiling) { /* Not compiling now */ + switch (hash) { + case L_if: /* These can't turn */ + case L_ifdef: /* compilation on, but */ + case L_ifndef: /* we must nest #if's */ + //if (++ifptr >= &ifstack[BLK_NEST]) { + ifptr++; + if (ifptr >= ifstack+BLK_NEST /*&ifstack[BLK_NEST]*/) { + goto if_nest_err; + //cfatal("Too many nested #%s statements", token); + } + *ifptr = 0; /* !WAS_COMPILING */ + case L_line: /* Many */ + /* + * Are pragma's always processed? + */ + case L_pragma: /* options */ + case L_import: /* (this too?) */ + case L_error: /* (this too?) */ + case L_warning: /* (this too?) */ + case L_include: /* are uninteresting */ + case L_define: /* if we */ + case L_undef: /* aren't */ + case L_assert: /* compiling. */ + dump_line: skipnl(); /* Ignore rest of line */ + return (counter + 1); + } + } + /* + * Make sure that #line and #pragma are output on a fresh line. + */ + if (counter > 0 && (hash == L_line || hash == L_pragma)) { + putchar('\n'); + counter--; + } + switch (hash) { + case L_line: + /* + * Parse the line to update the line number and "progname" + * field and line number for the next input line. + * Set wrongline to force it out later. + */ + c = skipws(); + workp = work; /* Save name in work */ + while (c != '\n' && c != EOF_CHAR) { + save(c); + c = get(); + } + unget(); + save(EOS); + /* + * Split #line argument into and + * We subtract 1 as we want the number of the next line. + */ + line = atoi(work) - 1; /* Reset line number */ + for (tp = work; isdigit(*tp) || scantype(*tp) == SPA; tp++) + ; /* Skip over digits */ + if (*tp != EOS) { /* Got a filename, so: */ + if (*tp == '"' && (ep = strrchr(tp + 1, '"')) != NULL) { + tp++; /* Skip over left quote */ + *ep = EOS; /* And ignore right one */ + } + if (infile->progname != NULL) /* Give up the old name */ + free(infile->progname); /* if it's allocated. */ + infile->progname = savestring(tp); + } + wrongline = TRUE; /* Force output later */ + break; + + case L_import: + doinclude(1); + break; + + case L_include: + doinclude(0); + break; + + case L_define: + dodefine(); + break; + + case L_undef: + doundef(); + break; + + case L_else: + if (ifptr == &ifstack[0]) + goto nest_err; + else if ((*ifptr & ELSE_SEEN) != 0) + goto else_seen_err; + *ifptr |= ELSE_SEEN; + if ((*ifptr & WAS_COMPILING) != 0) { + if (compiling || (*ifptr & TRUE_SEEN) != 0) + compiling = FALSE; + else { + compiling = TRUE; + } + } + break; + + case L_elif: + if (ifptr == &ifstack[0]) + goto nest_err; + else if ((*ifptr & ELSE_SEEN) != 0) { + else_seen_err: cerror("#%s may not follow #else", token); + goto dump_line; + } + if ((*ifptr & (WAS_COMPILING | TRUE_SEEN)) != WAS_COMPILING) { + compiling = FALSE; /* Done compiling stuff */ + goto dump_line; /* Skip this clause */ + } + doif(L_if); + break; + + case L_if: + case L_ifdef: + case L_ifndef: + ifptr++; + //if (++ifptr >= &ifstack[BLK_NEST]) + if (ifptr >= ifstack + BLK_NEST /*&ifstack[BLK_NEST]*/) { + if_nest_err : cfatal("Too many nested #%s statements", token); + } + *ifptr = WAS_COMPILING; + doif(hash); + break; + + case L_endif: + if (ifptr == &ifstack[0]) { + nest_err: cerror("#%s must be in an #if", token); + goto dump_line; + } + if (!compiling && (*ifptr & WAS_COMPILING) != 0) + wrongline = TRUE; + compiling = ((*ifptr & WAS_COMPILING) != 0); + --ifptr; + break; + + case L_assert: + if (eval() == 0) + cerror("Preprocessor assertion failure", NULLST); + break; + + case L_pragma: + /* + * #pragma is provided to pass "options" to later + * passes of the compiler. cpp doesn't have any yet. + */ + printf("#pragma "); + while ((c = get()) != '\n' && c != EOF_CHAR) + cput(c); + unget(); + break; + + case L_warning: + cwarn("WARNING LINE REACHED (TODO: NICELY FORMAT)", NULL); + break; + + case L_error: + cerror("WARNING LINE REACHED (TODO: NICELY FORMAT)", NULL); + break; + +#if DEBUG + case L_debug: + if (debug == 0) + dumpdef("debug set on"); + debug++; + break; + + case L_nodebug: + debug--; + break; +#endif + + default: + /* + * Undefined #control keyword. + * Note: the correct behavior may be to warn and + * pass the line to a subsequent compiler pass. + * This would allow #asm or similar extensions. + */ + cerror("Illegal # command \"%s\"", token); + break; + } + if ((hash != L_include) && (hash != L_import)) { +#if OLD_PREPROCESSOR + /* + * Ignore the rest of the #control line so you can write + * #if foo + * #endif foo + */ + goto dump_line; /* Take common exit */ +#else + if (skipws() != '\n') { + cwarn("Unexpected text in #control line ignored", NULLST); + skipnl(); + } +#endif + } + return (counter + 1); +} + +/* +FILE_LOCAL +doif(hash) +int hash; +*/ +FILE_LOCAL int doif(int hash) +/* + * Process an #if, #ifdef, or #ifndef. The latter two are straightforward, + * while #if needs a subroutine of its own to evaluate the expression. + * + * doif() is called only if compiling is TRUE. If false, compilation + * is always supressed, so we don't need to evaluate anything. This + * supresses unnecessary warnings. + */ +{ + register int c; + register int found; + + if ((c = skipws()) == '\n' || c == EOF_CHAR) { + unget(); + goto badif; + } + if (hash == L_if) { + //fprintf(stderr, "Doing #if...\n"); + unget(); + found = (eval() != 0); /* Evaluate expr, != 0 is TRUE */ + hash = L_ifdef; /* #if is now like #ifdef */ + } + else { + //fprintf(stderr, "Doing #something-else...\n"); + if (scantype(c) != LET) /* Next non-blank isn't letter */ + goto badif; /* ... is an error */ + found = (lookid(c) != NULL); /* Look for it in symbol table */ + } + if (found == (hash == L_ifdef)) { + compiling = TRUE; + *ifptr |= TRUE_SEEN; + } + else { + compiling = FALSE; + } + return; + +badif: cerror("#if, #ifdef, or #ifndef without an argument", NULLST); +#if !OLD_PREPROCESSOR + skipnl(); /* Prevent an extra */ + unget(); /* Error message */ +#endif + return; +} + + void addimport(char* name); + +FILE_LOCAL +int doinclude(int isimport) +/* + * Process the #include control line. + * There are three variations: + * #include "file" search somewhere relative to the + * current source file, if not found, + * treat as #include . + * #include Search in an implementation-dependent + * list of places. + * #include token Expand the token, it must be one of + * "file" or , process as such. + * + * Note: the November 12 draft forbids '>' in the #include format. + * This restriction is unnecessary and not implemented. + */ +{ + register int c; + register int delim; +#if HOST == SYS_VMS + char def_filename[NAM$C_MAXRSS + 1]; +#endif + + delim = macroid(skipws()); + + if (delim != '<' && delim != '"') + goto incerr; + if (delim == '<') + delim = '>'; + workp = work; + instring = TRUE; /* Accept all characters */ +#ifdef CONTROL_COMMENTS_NOT_ALLOWED + while ((c = get()) != '\n' && c != EOF_CHAR) + save(c); /* Put it away. */ + unget(); /* Force nl after includee */ + /* + * The draft is unclear if the following should be done. + */ + while (--workp >= work && *workp == ' ') + ; /* Trim blanks from filename */ + if (*workp != delim) + goto incerr; +#else + while ((c = get()) != delim && c != EOF_CHAR) + save(c); +#endif + * workp = EOS; /* Terminate filename */ + instring = FALSE; + + if (isimport && delim != '>') { + cerror("#import only supports <...> syntax", ""); + return 0; + } + //fprintf(stderr, "NOTE: Include name is '%s'\n", work); + if (isimport) { + if (isimported(work)) { + //fprintf(stderr, "NOTE: Skipping import of '%s' (already imported)\n", work); + wrongline = TRUE; // Hopefully this will correct the missing line in output + return 0; + } + addimport(work); + } +#if HOST == SYS_VMS + /* + * Assume the default .h filetype. + */ + if (!vmsparse(work, ".H", def_filename)) { + perror(work); /* Oops. */ + goto incerr; + } + else if (openinclude(def_filename, (delim == '"'))) + return 0; +#else + if (openinclude(work, (delim == '"'))) + return 0; +#endif + /* + * No sense continuing if #include file isn't there. + */ + cfatal("Cannot open include file \"%s\"", work); + +incerr: cerror("#include syntax error", NULLST); + return 0; +} + +typedef struct imported imported_t; +struct imported { + char* name; + imported_t* next; +}; +imported_t* importedlist = NULL; + +int isimported(char* name) { + imported_t* l = importedlist; + while (l != NULL) { + if (!strcmp(l->name, name)) { + return 1; + } + l = l->next; + } + return 0; +} + +void addimport(char* name) { + imported_t* head = calloc(1, sizeof(imported_t)); + if (head == NULL) { + cfatal("Out of memory in addimport", ""); + return; + } + head->name = strdup(name); + if (head->name == NULL) { + cfatal("Out of memory in addimport", ""); + free(head); + return; + } + head->next = importedlist; + importedlist = head; +} + +FILE_LOCAL int openinclude(char* filename, int searchlocal) +/*char* filename;*/ /* Input file name */ +/*int searchlocal;*/ /* TRUE if #include "file" */ +/* + * Actually open an include file. This routine is only called from + * doinclude() above, but was written as a separate subroutine for + * programmer convenience. It searches the list of directories + * and actually opens the file, linking it into the list of + * active files. Returns TRUE if the file was opened, FALSE + * if openinclude() fails. No error message is printed. + */ +{ + register char** incptr; +#if HOST == SYS_VMS +#if NWORK < (NAM$C_MAXRSS + 1) + << error, NWORK isn't greater than NAM$C_MAXRSS >> +#endif +#endif + /*char tmpname[NWORK];*/ /* Filename work area */ + char* tmpname = calloc(NWORK,1); + if (tmpname == NULL) { + cfatal("Failed to allocate memory", "openinclude buffer area"); + return FALSE; + } + + if (searchlocal) { + /* + * Look in local directory first + */ +#if HOST == SYS_UNIX + /* + * Try to open filename relative to the directory of the current + * source file (as opposed to the current directory). (ARF, SCK). + */ + if (filename[0] != '/' + && hasdirectory(infile->filename, tmpname)) + strcat(tmpname, filename); + else { + strcpy(tmpname, filename); + } +#else + if (!hasdirectory(filename, tmpname) + && hasdirectory(infile->filename, tmpname)) + strcat(tmpname, filename); + else { + strcpy(tmpname, filename); + } +#endif + if (openfile(tmpname)) { + free(tmpname); + return (TRUE); + } + } + /* + * Look in any directories specified by -I command line + * arguments, then in the builtin search list. + */ + for (incptr = incdir; incptr < incend; incptr = incptr + 1 /* TODO: Make ++ work: incptr++*/) { + if (strlen(incptr[0]) + strlen(filename) >= (NWORK - 1)) + cfatal("Filename work buffer overflow", NULLST); + else { +#if HOST == SYS_UNIX + if (filename[0] == '/') + strcpy(tmpname, filename); + else { + sprintf(tmpname, "%s/%s", *incptr, filename); + } +#else + if (!hasdirectory(filename, tmpname)) + sprintf(tmpname, "%s%s", *incptr, filename); +#endif + if (openfile(tmpname)) { + free(tmpname); + return (TRUE); + } + } + } + free(tmpname); + return (FALSE); +} + +FILE_LOCAL int +hasdirectory(char* source, char* result) +/*char* source;*/ /* Directory to examine */ +/*char* result;*/ /* Put directory stuff here */ +/* + * If a device or directory is found in the source filename string, the + * node/device/directory part of the string is copied to result and + * hasdirectory returns TRUE. Else, nothing is copied and it returns FALSE. + */ +{ +#if HOST == SYS_UNIX + register char* tp; + + if ((tp = strrchr(source, '/')) == NULL) + return (FALSE); + else { + // XXX TODO: This uses typecasting hack to allow compilation. -Zak + strncpy(result, source, ((long long)tp) - ((long long)source) + 1); + result[((long long)tp) - ((long long)source) + 1] = EOS; + return (TRUE); + } +#else +#if HOST == SYS_VMS + if (vmsparse(source, NULLST, result) + && result[0] != EOS) + return (TRUE); + else { + return (FALSE); + } +#else + /* + * Random DEC operating system (RSX, RT11, RSTS/E) + */ + register char* tp; + + if ((tp = strrchr(source, ']')) == NULL + && (tp = strrchr(source, ':')) == NULL) + return (FALSE); + else { + strncpy(result, source, tp - source + 1); + result[tp - source + 1] = EOS; + return (TRUE); + } +#endif +#endif +} + +#if HOST == SYS_VMS + +/* + * EXP_DEV is set if a device was specified, EXP_DIR if a directory + * is specified. (Both set indicate a file-logical, but EXP_DEV + * would be set by itself if you are reading, say, SYS$INPUT:) + */ +#define DEVDIR (NAM$M_EXP_DEV | NAM$M_EXP_DIR) + +FILE_LOCAL int +vmsparse(source, defstring, result) +char* source; +char* defstring; /* non-NULL -> default string. */ +char* result; /* Size is at least NAM$C_MAXRSS + 1 */ +/* + * Parse the source string, applying the default (properly, using + * the system parse routine), storing it in result. + * TRUE if it parsed, FALSE on error. + * + * If defstring is NULL, there are no defaults and result gets + * (just) the node::[directory] part of the string (possibly "") + */ +{ + struct FAB fab = cc$rms_fab; /* File access block */ + struct NAM nam = cc$rms_nam; /* File name block */ + char fullname[NAM$C_MAXRSS + 1]; + register char* rp; /* Result pointer */ + + fab.fab$l_nam = &nam; /* fab -> nam */ + fab.fab$l_fna = source; /* Source filename */ + fab.fab$b_fns = strlen(source); /* Size of source */ + fab.fab$l_dna = defstring; /* Default string */ + if (defstring != NULLST) + fab.fab$b_dns = strlen(defstring); /* Size of default */ + nam.nam$l_esa = fullname; /* Expanded filename */ + nam.nam$b_ess = NAM$C_MAXRSS; /* Expanded name size */ + if (sys$parse(&fab) == RMS$_NORMAL) { /* Parse away */ + fullname[nam.nam$b_esl] = EOS; /* Terminate string */ + result[0] = EOS; /* Just in case */ + rp = &result[0]; + /* + * Remove stuff added implicitly, accepting node names and + * dev:[directory] strings (but not process-permanent files). + */ + if ((nam.nam$l_fnb & NAM$M_PPF) == 0) { + if ((nam.nam$l_fnb & NAM$M_NODE) != 0) { + strncpy(result, nam.nam$l_node, nam.nam$b_node); + rp += nam.nam$b_node; + *rp = EOS; + } + if ((nam.nam$l_fnb & DEVDIR) == DEVDIR) { + strncpy(rp, nam.nam$l_dev, nam.nam$b_dev + nam.nam$b_dir); + rp += nam.nam$b_dev + nam.nam$b_dir; + *rp = EOS; + } + } + if (defstring != NULLST) { + strncpy(rp, nam.nam$l_name, nam.nam$b_name + nam.nam$b_type); + rp += nam.nam$b_name + nam.nam$b_type; + *rp = EOS; + if ((nam.nam$l_fnb & NAM$M_EXP_VER) != 0) { + strncpy(rp, nam.nam$l_ver, nam.nam$b_ver); + rp[nam.nam$b_ver] = EOS; + } + } + return (TRUE); + } + return (FALSE); +} +#endif + + +/* Originally from cpp3.c: */ + +/* + * C P P 3 . C + * + * File open and command line options + * + * Edit history + * 13-Nov-84 MM Split from cpp1.c + */ + +#include +#include +//#include "cppdef.h" +//#include "cpp.h" +#if DEBUG && (HOST == SYS_VMS || HOST == SYS_UNIX) +#include +extern int abort(); /* For debugging */ +#endif + +int +openfile(char* filename) +/*char* filename;*/ +/* + * Open a file, add it to the linked list of open files. + * This is called only from openfile() above. + */ +{ + register FILE* fp; + + if ((fp = fopen(filename, "r")) == NULL) { +#if DEBUG + perror(filename); +#endif + return (FALSE); + } +#if DEBUG + if (debug) + fprintf(stderr, "Reading from \"%s\"\n", filename); +#endif + addfile(fp, filename); + return (TRUE); +} + +int addfile(FILE* fp, char* filename) +/*FILE* fp; */ /* Open file pointer */ +/*char* filename;*/ /* Name of the file */ +/* + * Initialize tables for this open file. This is called from openfile() + * above (for #include files), and from the entry to cpp to open the main + * input file. It calls a common routine, getfile() to build the FILEINFO + * structure which is used to read characters. (getfile() is also called + * to setup a macro replacement.) + */ +{ + register FILEINFO* file; + extern FILEINFO* getfile(); + + file = getfile(NBUFF, filename); + file->fp = fp; /* Better remember FILE * */ + file->buffer[0] = EOS; /* Initialize for first read */ + line = 1; /* Working on line 1 now */ + wrongline = TRUE; /* Force out initial #line */ +} + +int setincdirs() +/* + * Append system-specific directories to the include directory list. + * Called only when cpp is started. + */ +{ + +#ifdef CPP_INCLUDE + * incend++ = CPP_INCLUDE; +#define IS_INCLUDE 1 +#else +#define IS_INCLUDE 0 +#endif + +#if HOST == SYS_UNIX + //incend[0] = "/usr/include"; + //incend = incend + 1; + //* incend++ = "/usr/include"; +#define MAXINCLUDE (NINCLUDE - 1 - IS_INCLUDE) +#endif + +#if HOST == SYS_VMS + extern char* getenv(); + + if (getenv("C$LIBRARY") != NULL) + *incend++ = "C$LIBRARY:"; + *incend++ = "SYS$LIBRARY:"; +#define MAXINCLUDE (NINCLUDE - 2 - IS_INCLUDE) +#endif + +#if HOST == SYS_RSX + extern int $$rsts; /* TRUE on RSTS/E */ + extern int $$pos; /* TRUE on PRO-350 P/OS */ + extern int $$vms; /* TRUE on VMS compat. */ + + if ($$pos) { /* P/OS? */ + *incend++ = "SY:[ZZDECUSC]"; /* C #includes */ + *incend++ = "LB:[1,5]"; /* RSX library */ + } + else if ($$rsts) { /* RSTS/E? */ + *incend++ = "SY:@"; /* User-defined account */ + *incend++ = "C:"; /* Decus-C library */ + *incend++ = "LB:[1,1]"; /* RSX library */ + } + else if ($$vms) { /* VMS compatibility? */ + *incend++ = "C:"; + } + else { /* Plain old RSX/IAS */ + *incend++ = "LB:[1,1]"; + } +#define MAXINCLUDE (NINCLUDE - 3 - IS_INCLUDE) +#endif + +#if HOST == SYS_RT11 + extern int $$rsts; /* RSTS/E emulation? */ + + if ($$rsts) + *incend++ = "SY:@"; /* User-defined account */ + *incend++ = "C:"; /* Decus-C library disk */ + *incend++ = "SY:"; /* System (boot) disk */ +#define MAXINCLUDE (NINCLUDE - 3 - IS_INCLUDE) +#endif +} + +int +dooptions(int argc, char** argv) +/*int argc; +char* argv[];*/ +/* + * dooptions is called to process command line arguments (-Detc). + * It is called only at cpp startup. + */ +{ + register char* ap; + register DEFBUF* dp; + register int c; + int i, j; + char* arg; + SIZES* sizp; /* For -S */ + int size; /* For -S */ + int isdatum; /* FALSE for -S* */ + int endtest; /* For -S */ + + for (i = j = 1; i < argc; i++) { + arg = ap = argv[i]; + if (*ap++ != '-' || *ap == EOS) + argv[j++] = argv[i]; + else { + c = *ap++; /* Option byte */ + if (islower(c)) /* Normalize case */ + c = toupper(c); + switch (c) { /* Command character */ + case 'P': /* Special flag to invoke preprocessor, ignored if we're already invoked! -Zak. */ + break; + case 'C': /* Keep comments */ + cflag = TRUE; + keepcomments = TRUE; + break; + + case 'D': /* Define symbol */ +#if HOST != SYS_UNIX + zap_uc(ap); /* Force define to U.C. */ +#endif + /* + * If the option is just "-Dfoo", make it -Dfoo=1 + */ + while (*ap != EOS && *ap != '=') + ap++; + if (*ap == EOS) + ap = "1"; + else + *ap++ = EOS; + /* + * Now, save the word and its definition. + */ + dp = defendel(argv[i] + 2, FALSE); + dp->repl = savestring(ap); + dp->nargs = DEF_NOARGS; + break; + + case 'E': /* Ignore non-fatal */ + eflag = TRUE; /* errors. */ + break; + + case 'I': /* Include directory */ + if (incend >= incdir + MAXINCLUDE /*&incdir[MAXINCLUDE]*/) + cfatal("Too many include directories", NULLST); + if (strlen(arg) == 2) { // Check if parameter is in the next argument + if (i + 1 >= argc) { + cfatal("Expected parameter following -I option"); + } + i++; + arg = ap = argv[i]; + } + incend[0] = ap; + incend = incend + 1; + //*incend++ = ap; + break; + + case 'N': /* No predefineds */ + nflag++; /* Repeat to undefine */ + break; /* __LINE__, etc. */ + + case 'S': + sizp = size_table; + if (isdatum = (*ap != '*')) /* If it's just -S, */ + endtest = T_FPTR; /* Stop here */ + else { /* But if it's -S* */ + ap++; /* Step over '*' */ + endtest = 0; /* Stop at end marker */ + } + while (sizp->bits != endtest && *ap != EOS) { + if (!isdigit(*ap)) { /* Skip to next digit */ + ap++; + continue; + } + size = 0; /* Compile the value */ + while (isdigit(*ap)) { + size *= 10; + size += (*ap++ - '0'); + } + if (isdatum) + sizp->size = size; /* Datum size */ + else + sizp->psize = size; /* Pointer size */ + sizp++; + } + if (sizp->bits != endtest) + cwarn("-S, too few values specified in %s", argv[i]); + else if (*ap != EOS) + cwarn("-S, too many values, \"%s\" unused", ap); + break; + + case 'U': /* Undefine symbol */ +#if HOST != SYS_UNIX + zap_uc(ap); +#endif + if (defendel(ap, TRUE) == NULL) + cwarn("\"%s\" wasn't defined", ap); + break; + +#if DEBUG + case 'X': /* Debug */ + debug = (isdigit(*ap)) ? atoi(ap) : 1; +#if (HOST == SYS_VMS || HOST == SYS_UNIX) + signal(SIGINT, abort); /* Trap "interrupt" */ +#endif + fprintf(stderr, "Debug set to %d\n", debug); + break; +#endif + + default: /* What is this one? */ + cwarn("Unknown option \"%s\"", arg); + fprintf(stderr, "The following options are valid:\n -C\t\t\tWrite source file comments to output\n -Dsymbol=value\tDefine a symbol with the given (optional) value\n -Idirectory\t\tAdd a directory to the #include search list\n -N\t\t\tDon't predefine target-specific names\n -Stext\t\tSpecify sizes for #if sizeof\n -Usymbol\t\tUndefine symbol\n"); +#if DEBUG + fprintf(stderr, " -Xvalue\t\tSet internal debug flag\n"); +#endif + break; + } /* Switch on all options */ + } /* If it's a -option */ + } /* For all arguments */ + if (j > 3) { + cerror( + "Too many file arguments. Usage: cpp [input [output]]", + NULLST); + } + return (j); /* Return new argc */ +} + +#if HOST != SYS_UNIX +FILE_LOCAL +zap_uc(ap) +register char* ap; +/* + * Dec operating systems mangle upper-lower case in command lines. + * This routine forces the -D and -U arguments to uppercase. + * It is called only on cpp startup by dooptions(). + */ +{ + while (*ap != EOS) { + /* + * Don't use islower() here so it works with Multinational + */ + if (*ap >= 'a' && *ap <= 'z') + *ap = toupper(*ap); + ap++; + } +} +#endif + +int initdefines() +/* + * Initialize the built-in #define's. There are two flavors: + * #define decus 1 (static definitions) + * #define __FILE__ ?? (dynamic, evaluated by magic) + * Called only on cpp startup. + * + * Note: the built-in static definitions are supressed by the -N option. + * __LINE__, __FILE__, and __DATE__ are always present. + */ +{ + register char** pp; + register char* tp; + register DEFBUF* dp; + int i; + long tvec; + extern char* ctime(); + + /* + * Predefine the built-in symbols. Allow the + * implementor to pre-define a symbol as "" to + * eliminate it. + */ + if (nflag == 0) { + for (pp = preset; *pp != NULL; pp = pp + 1) { + if (*pp[0] != EOS) { + dp = defendel(*pp, FALSE); + dp->repl = savestring("1"); + dp->nargs = DEF_NOARGS; + } + } + } + /* + * The magic pre-defines (__FILE__ and __LINE__ are + * initialized with negative argument counts. expand() + * notices this and calls the appropriate routine. + * DEF_NOARGS is one greater than the first "magic" definition. + */ + if (nflag < 2) { + pp = magic; /* TODO/XXX: Hack to allow compilation. -Zak */ + for (/*pp = magic,*/ i = DEF_NOARGS; *pp != NULL; pp = pp + 1) { + dp = defendel(*pp, FALSE); + dp->nargs = --i; + } +#if OK_DATE + /* + * Define __DATE__ as today's date. + */ + dp = defendel("__DATE__", FALSE); + dp->repl = tp = getmem(27); + dp->nargs = DEF_NOARGS; + time(&tvec); + *tp++ = '"'; + strcpy(tp, ctime(&tvec)); + tp[24] = '"'; /* Overwrite newline */ +#endif + } +} + +#if HOST == SYS_VMS +/* + * getredirection() is intended to aid in porting C programs + * to VMS (Vax-11 C) which does not support '>' and '<' + * I/O redirection. With suitable modification, it may + * useful for other portability problems as well. + */ + +int +getredirection(int argc, char** argv) +/*int argc; +char** argv;*/ +/* + * Process vms redirection arg's. Exit if any error is seen. + * If getredirection() processes an argument, it is erased + * from the vector. getredirection() returns a new argc value. + * + * Warning: do not try to simplify the code for vms. The code + * presupposes that getredirection() is called before any data is + * read from stdin or written to stdout. + * + * Normal usage is as follows: + * + * main(argc, argv) + * int argc; + * char *argv[]; + * { + * argc = getredirection(argc, argv); + * } + */ +{ + register char* ap; /* Argument pointer */ + int i; /* argv[] index */ + int j; /* Output index */ + int file; /* File_descriptor */ + extern int errno; /* Last vms i/o error */ + + for (j = i = 1; i < argc; i++) { /* Do all arguments */ + switch (*(ap = argv[i])) { + case '<': /* ': /* >file or >>file */ + if (*++ap == '>') { /* >>file */ + /* + * If the file exists, and is writable by us, + * call freopen to append to the file (using the + * file's current attributes). Otherwise, create + * a new file with "vanilla" attributes as if the + * argument was given as ">filename". + * access(name, 2) returns zero if we can write on + * the specified file. + */ + if (access(++ap, 2) == 0) { + if (freopen(ap, "a", stdout) != NULL) + break; /* Exit case statement */ + perror(ap); /* Error, can't append */ + exit(errno); /* After access test */ + } /* If file accessable */ + } + /* + * On vms, we want to create the file using "standard" + * record attributes. creat(...) creates the file + * using the caller's default protection mask and + * "variable length, implied carriage return" + * attributes. dup2() associates the file with stdout. + */ + if ((file = creat(ap, 0, "rat=cr", "rfm=var")) == -1 + || dup2(file, fileno(stdout)) == -1) { + perror(ap); /* Can't create file */ + exit(errno); /* is a fatal error */ + } /* If '>' creation */ + break; /* Exit case test */ + + default: + argv[j++] = ap; /* Not a redirector */ + break; /* Exit case test */ + } + } /* For all arguments */ + argv[j] = NULL; /* Terminate argv[] */ + return (j); /* Return new argc */ +} +#endif + + + + + +/* Originally from cpp4.c: */ +/* + * C P P 4 . C + * M a c r o D e f i n i t i o n s + * + * Edit History + * 31-Aug-84 MM USENET net.sources release + * 04-Oct-84 MM __LINE__ and __FILE__ must call ungetstring() + * so they work correctly with token concatenation. + * Added string formal recognition. + * 25-Oct-84 MM "Short-circuit" evaluate #if's so that we + * don't print unnecessary error messages for + * #if !defined(FOO) && FOO != 0 && 10 / FOO ... + * 31-Oct-84 ado/MM Added token concatenation + * 6-Nov-84 MM Split off eval stuff + */ + +#include +#include +//#include "cppdef.h" +//#include "cpp.h" + /* + * parm[], parmp, and parlist[] are used to store #define() argument + * lists. nargs contains the actual number of parameters stored. + */ +static char parm[NPARMWORK + 1]; /* define param work buffer */ +static char* parmp; /* Free space in parm */ +static char* parlist[LASTPARM]; /* -> start of each parameter */ +static int nargs; /* Parameters for this macro */ + +int save(int c); +int dodefine() +/* + * Called from control when a #define is scanned. This module + * parses formal parameters and the replacement string. When + * the formal parameter name is encountered in the replacement + * string, it is replaced by a character in the range 128 to + * 128+NPARAM (this allows up to 32 parameters within the + * Dec Multinational range). If cpp is ported to an EBCDIC + * machine, you will have to make other arrangements. + * + * There is some special case code to distinguish + * #define foo bar + * from #define foo() bar + * + * Also, we make sure that + * #define foo foo + * expands to "foo" but doesn't put cpp into an infinite loop. + * + * A warning message is printed if you redefine a symbol to a + * different text. I.e, + * #define foo 123 + * #define foo 123 + * is ok, but + * #define foo 123 + * #define foo +123 + * is not. + * + * The following subroutines are called from define(): + * checkparm called when a token is scanned. It checks through the + * array of formal parameters. If a match is found, the + * token is replaced by a control byte which will be used + * to locate the parameter when the macro is expanded. + * textput puts a string in the macro work area (parm[]), updating + * parmp to point to the first free byte in parm[]. + * textput() tests for work buffer overflow. + * charput puts a single character in the macro work area (parm[]) + * in a manner analogous to textput(). + */ +{ + register int c; + register DEFBUF* dp; /* -> new definition */ + int isredefine; /* TRUE if redefined */ + char* old; /* Remember redefined */ + /*extern int save();*/ /* Save char in work[] */ + + old = NULL; // NOTE: Fix from Zak (possibly uninitialised otherwise) + int varargsidx = -1; + + if (scantype(c = skipws()) != LET) + goto bad_define; + isredefine = FALSE; /* Set if redefining */ + if ((dp = lookid(c)) == NULL) /* If not known now */ + dp = defendel(token, FALSE); /* Save the name */ + else { /* It's known: */ + isredefine = TRUE; /* Remember this fact */ + old = dp->repl; /* Remember replacement */ + dp->repl = NULL; /* No replacement now */ + } + parlist[0] = parmp = parm; /* Setup parm buffer */ + if ((c = get()) == '(') { /* With arguments? */ + nargs = 0; /* Init formals counter */ + do { /* Collect formal parms */ + if (nargs >= LASTPARM) + cfatal("Too many arguments for macro", NULLST); + else if ((c = skipws()) == ')') + break; /* Got them all */ + else if (scantype(c) != LET && scantype(c) != DOT) /* Bad formal syntax */ + goto bad_define; + scanid(c); /* Get the formal param */ + if (streq(".", token)) { + /* A bit of a hack, we just read/check two more dot tokens instead of tokenising the "...". */ + c = get(); + scanid(c); + if (!streq(".", token)) { + goto bad_define; + } + c = get(); + scanid(c); + if (!streq(".", token)) { + goto bad_define; + } + varargsidx = nargs; + //cwarn("TODO: varargs", ""); + } + parlist[nargs++] = parmp; /* Save its start */ + textput(token); /* Save text in parm[] */ + } while ((c = skipws()) == ','); /* Get another argument */ + if (c != ')') /* Must end at ) */ + goto bad_define; + c = ' '; /* Will skip to body */ + } + else { + /* + * DEF_NOARGS is needed to distinguish between + * "#define foo" and "#define foo()". + */ + nargs = DEF_NOARGS; /* No () parameters */ + } + if (scantype(c) == SPA) /* At whitespace? */ + c = skipws(); /* Not any more. */ + workp = work; /* Replacement put here */ + inmacro = TRUE; /* Keep \ now */ + while (c != EOF_CHAR && c != '\n') { /* Compile macro body */ +#if OK_CONCAT +#if COMMENT_INVISIBLE + if (c == COM_SEP) { /* Token concatenation? */ + save(TOK_SEP); /* Stuff a delimiter */ + c = get(); +#else + if (c == '#') { /* Token concatenation? */ + while (workp > work&& scantype(workp[-1]) == SPA) + --workp; /* Erase leading spaces */ + save(TOK_SEP); /* Stuff a delimiter */ + c = skipws(); /* Eat whitespace */ + if (c == '#') c = skipws(); /* Eat hash why not */ +#endif + if (scantype(c) == LET) /* Another token here? */ + ; /* Stuff it normally */ + else if (scantype(c) == DIG) { /* Digit string after? */ + while (scantype(c) == DIG) { /* Stuff the digits */ + save(c); + c = get(); + } + save(TOK_SEP); /* Delimit 2nd token */ + } + else { +#if ! COMMENT_INVISIBLE + ciwarn("Strange character after # (%d.)", c); +#endif + } + continue; + } +#endif + switch (scantype(c)) { + case LET: + checkparm(c, dp); /* Might be a formal */ + break; + + case DIG: /* Number in mac. body */ + case DOT: /* Maybe a float number */ + scannumber(c, &save); /* Scan it off */ + break; + + case QUO: /* String in mac. body */ +#if STRING_FORMAL + stparmscan(c, dp); /* Do string magic */ +#else + stparmscan(c); +#endif + break; + + case BSH: /* Backslash */ + save('\\'); + if ((c = get()) == '\n') + wrongline = TRUE; + else if (c == '\r') { + if ((c = get()) == '\n') + wrongline = TRUE; + } + save(c); + break; + + case SPA: /* Absorb whitespace */ + /* + * Note: the "end of comment" marker is passed on + * to allow comments to separate tokens. + */ + if (workp[-1] == ' ') /* Absorb multiple */ + break; /* spaces */ + else if (c == '\t') + c = ' '; /* Normalize tabs */ + /* Fall through to store character */ + default: /* Other character */ + save(c); + break; + } + c = get(); + } + inmacro = FALSE; /* Stop newline hack */ + unget(); /* For control check */ + if (workp > work&& workp[-1] == ' ') /* Drop trailing blank */ + workp--; + *workp = EOS; /* Terminate work */ + dp->repl = savestring(work); /* Save the string */ + dp->nargs = nargs; /* Save arg count */ + /* Unless it's a varargs then use the special encoding... */ + if (varargsidx >= 0) { + dp->nargs = -64 - varargsidx; + } + //fprintf(stderr, "Argument count %d\n", dp->nargs); +#if DEBUG + if (debug) + dumpadef("macro definition", dp); +#endif + if (isredefine) { /* Error if redefined */ + if ((old != NULL && dp->repl != NULL && !streq(old, dp->repl)) + || (old == NULL && dp->repl != NULL) + || (old != NULL && dp->repl == NULL)) { +#ifdef STRICT_UNDEF + cerror("Redefining defined variable \"%s\"", dp->name); +#else + cwarn("Redefining defined variable \"%s\"", dp->name); +#endif + } + if (old != NULL) /* We don't need the */ + free(old); /* old definition now. */ + } + return; + +bad_define: + cerror("#define syntax error", NULLST); + inmacro = FALSE; /* Stop hack */ + } + +int checkparm(int c, DEFBUF* dp) +/*register int c; +DEFBUF* dp;*/ +/* + * Replace this param if it's defined. Note that the macro name is a + * possible replacement token. We stuff DEF_MAGIC in front of the token + * which is treated as a LETTER by the token scanner and eaten by + * the output routine. This prevents the macro expander from + * looping if someone writes "#define foo foo". + */ +{ + register int i; + register char* cp; + + scanid(c); /* Get parm to token[] */ + for (i = 0; i < nargs; i++) { /* For each argument */ + if (streq(parlist[i], token)) { /* If it's known */ + save(i + MAC_PARM); /* Save a magic cookie */ + return; /* And exit the search */ + } + } + if (streq("__VA_ARGS__", token)) { /* Zaks varargs hack. */ + save(192); // Well use 192 as the varargs hack... + return; + } + /*if (streq("#", token)) {/* Zak's #name hack. * / + save(193); // Well use 193 as the #name hack... + return; + }*/ + if (streq(dp->name, token)) /* Macro name in body? */ + save(DEF_MAGIC); /* Save magic marker */ + for (cp = token; *cp != EOS;) /* And save */ + save(*cp++); /* The token itself */ +} + +#if STRING_FORMAL +int stparmscan(int delim, DEFBUF* dp) +/*int delim; +register DEFBUF* dp;*/ +/* + * Scan the string (starting with the given delimiter). + * The token is replaced if it is the only text in this string or + * character constant. The algorithm follows checkparm() above. + * Note that scanstring() has approved of the string. + */ +{ + register int c; + + /* + * Warning -- this code hasn't been tested for a while. + * It exists only to preserve compatibility with earlier + * implementations of cpp. It is not part of the Draft + * ANSI Standard C language. + */ + save(delim); + instring = TRUE; + while ((c = get()) != delim + && c != '\n' + && c != EOF_CHAR) { + if (scantype(c) == LET) /* Maybe formal parm */ + checkparm(c, dp); + else { + save(c); + if (c == '\\') + save(get()); + } + } + instring = FALSE; + if (c != delim) + cerror("Unterminated string in macro body", NULLST); + save(c); +} +#else +/* +stparmscan(delim) +int delim; +*/ +int save(int c); +int stparmscan(int delim) +/* + * Normal string parameter scan. + */ +{ + register char* wp; + register int i; + /*extern int save();*/ + + wp = workp; /* Here's where it starts */ + if (!scanstring(delim, &save)) + return; /* Exit on scanstring error */ + workp[-1] = EOS; /* Erase trailing quote */ + wp++; /* -> first string content byte */ + for (i = 0; i < nargs; i++) { + if (streq(parlist[i], wp)) { + *wp++ = MAC_PARM + PAR_MAC; /* Stuff a magic marker */ + *wp++ = (i + MAC_PARM); /* Make a formal marker */ + *wp = wp[-3]; /* Add on closing quote */ + workp = wp + 1; /* Reset string end */ + return; + } + } + workp[-1] = wp[-1]; /* Nope, reset end quote. */ +} +#endif + +int doundef() +/* + * Remove the symbol from the defined list. + * Called from the #control processor. + */ +{ + register int c; + + if (scantype(c = skipws()) != LET) + cerror("Illegal #undef argument", NULLST); + else { + scanid(c); /* Get name to token[] */ + if (defendel(token, TRUE) == NULL) { +#ifdef STRICT_UNDEF + cwarn("Symbol \"%s\" not defined in #undef", token); +#endif + } + } +} + +int textput(char* text) +/*char* text;*/ +/* + * Put the string in the parm[] buffer. + */ +{ + register int size; + + size = strlen(text) + 1; + if ((parmp + size) >= parm + NPARMWORK /*&parm[NPARMWORK]*/) + cfatal("Macro work area overflow", NULLST); + else { + strcpy(parmp, text); + parmp += size; + } +} + +int charput(int c) +/*register int c;*/ +/* + * Put the byte in the parm[] buffer. + */ +{ + if (parmp >= parm + NPARMWORK /*&parm[NPARMWORK]*/) + cfatal("Macro work area overflow", NULLST); + else { + *parmp++ = c; + } +} + +/* + * M a c r o E x p a n s i o n + */ + +static DEFBUF* macro; /* Catches start of infinite macro */ + +/* +expand(tokenp) +register DEFBUF* tokenp; +*/ +int expand(DEFBUF* tokenp) +/* + * Expand a macro. Called from the cpp mainline routine (via subroutine + * macroid()) when a token is found in the symbol table. It calls + * expcollect() to parse actual parameters, checking for the correct number. + * It then creates a "file" containing a single line containing the + * macro with actual parameters inserted appropriately. This is + * "pushed back" onto the input stream. (When the get() routine runs + * off the end of the macro line, it will dismiss the macro itself.) + */ +{ + register int c; + register FILEINFO* file; + extern FILEINFO* getfile(); + //fprintf(stderr, "Got token '%s'\n", tokenp->name); + +#if DEBUG + if (debug) + dumpadef("expand entry", tokenp); +#endif + /* + * If no macro is pending, save the name of this macro + * for an eventual error message. + */ + recursion++; + if (/*recursion++*/recursion-1 == 0) + macro = tokenp; + else if (recursion == RECURSION_LIMIT) { + cerror("Recursive macro definition of \"%s\"", tokenp->name); + fprintf(stderr, "(Defined by \"%s\")\n", macro->name); + if (rec_recover) { + do { + c = get(); + } while (infile != NULL && infile->fp == NULL); + unget(); + recursion = 0; + return; + } + } + /* + * Here's a macro to expand. + */ + nargs = 0; /* Formals counter */ + parmp = parm; /* Setup parm buffer */ + int varargidx = -1; + switch (tokenp->nargs) { + + case DEF_NOARGS: /* No parameters just stuffs */ + expstuff(tokenp,-1); /* Do actual parameters */ + break; + case (-2): /* __LINE__ */ + sprintf(work, "%d", line); + ungetstring(work); + break; + + case (-3): /* __FILE__ */ + for (file = infile; file != NULL; file = file->parent) { + if (file->fp != NULL) { + sprintf(work, "\"%s\"", (file->progname != NULL) + ? file->progname : file->filename); + ungetstring(work); + break; + } + } + break; + + default: + /* + * Nothing funny about this macro. + */ + if (tokenp->nargs <= -64) { + varargidx = (0 - tokenp->nargs) - 64; + } else if (tokenp->nargs < 0) + cfatal("Bug: Illegal __ macro \"%s\"", tokenp->name); + while ((c = skipws()) == '\n') /* Look for (, skipping */ + wrongline = TRUE; /* spaces and newlines */ + if (c != '(') { + /*Wrong numb + * If the programmer writes + * #define foo() ... + * ... + * foo [no ()] + * just write foo to the output stream. + */ + unget(); + cwarn("Macro \"%s\" needs arguments", tokenp->name); + fputs(tokenp->name, stdout); + return; + } + else if (expcollect()) { /* Collect arguments */ + if (tokenp->nargs != nargs && !(varargidx >= 0 && nargs > varargidx)) { /* Should be an error? */ + cwarn("Wrong number of macro arguments for \"%s\"", + tokenp->name); + } +#if DEBUG + if (debug) + dumpparm("expand"); +#endif + } /* Collect arguments */ + case -999: /* No parameters just stuffs */ + expstuff(tokenp, varargidx); /* Do actual parameters */ + break; + } /* nargs switch */ +} + +int charput(int c); + +FILE_LOCAL int +expcollect() +/* + * Collect the actual parameters for this macro. TRUE if ok. + */ +{ + register int c; + register int paren; /* For embedded ()'s */ + /*extern int charput();*/ + + for (;;) { + paren = 0; /* Collect next arg. */ + while ((c = skipws()) == '\n') /* Skip over whitespace */ + wrongline = TRUE; /* and newlines. */ + if (c == ')') { /* At end of all args? */ + /* + * Note that there is a guard byte in parm[] + * so we don't have to check for overflow here. + */ + *parmp = EOS; /* Make sure terminated */ + break; /* Exit collection loop */ + } + else if (nargs >= LASTPARM) + cfatal("Too many arguments in macro expansion", NULLST); + parlist[nargs++] = parmp; /* At start of new arg */ + for (;; c = cget()) { /* Collect arg's bytes */ + if (c == EOF_CHAR) { + cerror("end of file within macro argument", NULLST); + return (FALSE); /* Sorry. */ + } + else if (c == '\\') { /* Quote next character */ + charput(c); /* Save the \ for later */ + charput(cget()); /* Save the next char. */ + continue; /* And go get another */ + } + else if (scantype(c) == QUO) { /* Start of string? */ + scanstring(c, &charput); /* Scan it off */ + continue; /* Go get next char */ + } + else if (c == '(') /* Worry about balance */ + paren++; /* To know about commas */ + else if (c == ')') { /* Other side too */ + if (paren == 0) { /* At the end? */ + unget(); /* Look at it later */ + break; /* Exit arg getter. */ + } + paren--; /* More to come. */ + } + else if (c == ',' && paren == 0) /* Comma delimits args */ + break; + else if (c == '\n') /* Newline inside arg? */ + wrongline = TRUE; /* We'll need a #line */ + charput(c); /* Store this one */ + } /* Collect an argument */ + charput(EOS); /* Terminate argument */ +#if DEBUG + if (debug) + printf("parm[%d] = \"%s\"\n", nargs, parlist[nargs - 1]); +#endif + } /* Collect all args. */ + return (TRUE); /* Normal return */ +} + +FILE_LOCAL +int expstuff(DEFBUF* tokenp, int varargidx) +/*DEFBUF* tokenp;*/ /* Current macro being expanded */ +/* + * Stuff the macro body, replacing formal parameters by actual parameters. + */ +{ + register int c; /* Current character */ + register char* inp; /* -> repl string */ + register char* defp; /* -> macro output buff */ + int size; /* Actual parm. size */ + char* defend; /* -> output buff end */ + int string_magic; /* String formal hack */ + FILEINFO* file; /* Funny #include */ + extern FILEINFO* getfile(); + int namehacktivated = 0; /* For the #name syntax to produce a string, we'll set a temporary flag. */ + + file = getfile(NBUFF, tokenp->name); + inp = tokenp->repl; /* -> macro replacement */ + defp = file->buffer; /* -> output buffer */ + defend = defp + (NBUFF - 1); /* Note its end */ + if (inp != NULL) { + while ((c = (*inp++ & 0xFF)) != EOS) { + /* It seems macro args are encoded as special characters. See checkparm function. -Zak */ + if (c >= MAC_PARM && c <= (MAC_PARM + PAR_MAC)) { + string_magic = (c == (MAC_PARM + PAR_MAC)); + if (string_magic) + c = (*inp++ & 0xFF); + /* + * Replace formal parameter by actual parameter string. + */ + if ((c -= MAC_PARM) < nargs) { + if (namehacktivated) { + strcpy(defp, "\""); + defp++; + } + size = strlen(parlist[c]); + if ((defp + size) >= defend) + goto nospace; + /* + * Erase the extra set of quotes. + */ + if (string_magic && defp[-1] == parlist[c][0]) { + strcpy(defp - 1, parlist[c]); + defp += (size - 2); + } + else { + strcpy(defp, parlist[c]); + defp += size; + } + + if (namehacktivated) { + strcpy(defp, "\""); + defp++; + namehacktivated = 0; + } + } + } else if (c == 192 && varargidx >= 0) { /* Zak's varargs hack (see checkparm function). */ + int tmp; + for (tmp = varargidx; tmp < nargs; tmp++) { + if (tmp != varargidx) { /* For all but the first varargs param we need to add a comma. */ + if ((defp + 1) >= defend) { + goto nospace; + } + strcpy(defp, ","); + defp += 1; + } + size = strlen(parlist[tmp]); + if ((defp + size) >= defend) + goto nospace; + /* + * Erase the extra set of quotes. + */ + if (string_magic && defp[-1] == parlist[tmp][0]) { + strcpy(defp - 1, parlist[tmp]); + defp += (size - 2); + } + else { + strcpy(defp, parlist[tmp]); + defp += size; + } + } + // TODO... + //cfatal("TODO", "__VA_ARGS__"); + } else if (c == 193) { + namehacktivated = 1; + } + else if (defp >= defend) { + nospace: cfatal("Out of space in macro \"%s\" arg expansion", + tokenp->name); + } + else { + *defp++ = c; + } + } + } + *defp = EOS; +#if DEBUG + if (debug > 1) + printf("macroline: \"%s\"\n", file->buffer); +#endif +} + +#if DEBUG +dumpparm(why) +char* why; +/* + * Dump parameter list. + */ +{ + register int i; + + printf("dump of %d parameters (%d bytes total) %s\n", + nargs, parmp - parm, why); + for (i = 0; i < nargs; i++) { + printf("parm[%d] (%d) = \"%s\"\n", + i + 1, strlen(parlist[i]), parlist[i]); + } +} +#endif + +/* Originally from cpp5.c: */ +/* + * C P P 5 . C + * E x p r e s s i o n E v a l u a t i o n + * + * Edit History + * 31-Aug-84 MM USENET net.sources release + * 04-Oct-84 MM __LINE__ and __FILE__ must call ungetstring() + * so they work correctly with token concatenation. + * Added string formal recognition. + * 25-Oct-84 MM "Short-circuit" evaluate #if's so that we + * don't print unnecessary error messages for + * #if !defined(FOO) && FOO != 0 && 10 / FOO ... + * 31-Oct-84 ado/MM Added token concatenation + * 6-Nov-84 MM Split from #define stuff, added sizeof stuff + * 19-Nov-84 ado #if error returns TRUE for (sigh) compatibility + */ + +#include +#include +//#include "cppdef.h" +//#include "cpp.h" + + /* + * Evaluate an #if expression. + */ +/* NOTE: This has been moved to top. */ +/*static char* opname[31];*//* = {*/ /* For debug and error messages */ +/*"end of expression", "val", "id", + "+", "-", "*", "/", "%", + "<<", ">>", "&", "|", "^", + "==", "!=", "<", "<=", ">=", ">", + "&&", "||", "?", ":", ",", + "unary +", "unary -", "~", "!", "(", ")", "(none)", +};*/ + +/* + * opdope[] has the operator precedence: + * Bits + * 7 Unused (so the value is always positive) + * 6-2 Precedence (000x .. 017x) + * 1-0 Binary op. flags: + * 01 The binop flag should be set/cleared when this op is seen. + * 10 The new value of the binop flag. + * Note: Expected, New binop + * constant 0 1 Binop, end, or ) should follow constants + * End of line 1 0 End may not be preceeded by an operator + * binary 1 0 Binary op follows a value, value follows. + * unary 0 0 Unary op doesn't follow a value, value follows + * ( 0 0 Doesn't follow value, value or unop follows + * ) 1 1 Follows value. Op follows. + */ + +static char opdope[OP_MAX] = { + 0001, /* End of expression */ + 0002, /* Digit */ + 0000, /* Letter (identifier) */ + 0141, 0141, 0151, 0151, 0151, /* ADD, SUB, MUL, DIV, MOD */ + 0131, 0131, 0101, 0071, 0071, /* ASL, ASR, AND, OR, XOR */ + 0111, 0111, 0121, 0121, 0121, 0121, /* EQ, NE, LT, LE, GE, GT */ + 0061, 0051, 0041, 0041, 0031, /* ANA, ORO, QUE, COL, CMA */ +/* + * Unary op's follow + */ + 0160, 0160, 0160, 0160, /* NEG, PLU, COM, NOT */ + 0170, 0013, 0023, /* LPA, RPA, END */ +}; + +int getopdope(int i) { + switch(i) { + case 0: return 1; + case 1: return 2; + case 2: return 0; + + case 3: + case 4: + return 0141; + + case 5: + case 6: + case 7: + return 0151; + + case 8: + case 9: + return 0131; + + case 10: + return 0101; + + case 11: + case 12: + return 0071; + + case 13: + case 14: + return 0111; + + case 15: + case 16: + case 17: + case 18: + return 0121; + + case 19: + return 0061; + + case 20: + return 0051; + + case 21: + case 22: + return 0041; + + case 23: + return 0031; + + case 24: + case 25: + case 26: + case 27: + return 0160; + + case 28: + return 0170; + + case 29: + return 0013; + + case 30: + return 0023; + + default: + cfatal("Bad argument to getopdope()",""); + return -1; + } +} +/* + * OP_QUE and OP_RPA have alternate precedences: + */ +#define OP_RPA_PREC 0013 +#define OP_QUE_PREC 0034 + + /* + * S_ANDOR and S_QUEST signal "short-circuit" boolean evaluation, so that + * #if FOO != 0 && 10 / FOO ... + * doesn't generate an error message. They are stored in optab.skip. + */ +#define S_ANDOR 2 +#define S_QUEST 1 + +typedef struct optab { + char op; /* Operator */ + char prec; /* Its precedence */ + char skip; /* Short-circuit: TRUE to skip */ + char _padding; +} OPTAB; +static int evalue; /* Current value from evallex() */ + +#ifdef nomacargs +FILE_LOCAL int +isbinary(op) +register int op; +{ + return (op >= FIRST_BINOP && op <= LAST_BINOP); +} + +FILE_LOCAL int +isunary(op) +register int op; +{ + return (op >= FIRST_UNOP && op <= LAST_UNOP); +} +#else +#define isbinary(op) (op >= FIRST_BINOP && op <= LAST_BINOP) +#define isunary(op) (op >= FIRST_UNOP && op <= LAST_UNOP) +#endif + +/* + * The following definitions are used to specify basic variable sizes. + */ + +#ifndef S_CHAR +#define S_CHAR (sizeof (char)) +#endif +#ifndef S_SINT +#define S_SINT (sizeof (short int)) +#endif +#ifndef S_INT +#define S_INT (sizeof (int)) +#endif +#ifndef S_LINT +#define S_LINT (sizeof (long int)) +#endif +#ifndef S_FLOAT +#define S_FLOAT (sizeof (float)) +#endif +#ifndef S_DOUBLE +#define S_DOUBLE (sizeof (double)) +#endif +#ifndef S_PCHAR +#define S_PCHAR (sizeof (char *)) +#endif +#ifndef S_PSINT +#define S_PSINT (sizeof (short int *)) +#endif +#ifndef S_PINT +#define S_PINT (sizeof (int *)) +#endif +#ifndef S_PLINT +#define S_PLINT (sizeof (long int *)) +#endif +#ifndef S_PFLOAT +#define S_PFLOAT (sizeof (float *)) +#endif +#ifndef S_PDOUBLE +#define S_PDOUBLE (sizeof (double *)) +#endif +#ifndef S_PFPTR +#define S_PFPTR (sizeof (int (*)())) +#endif + +typedef struct types { + short type; /* This is the bit if */ + char* name; /* this is the token word */ +} TYPES; + +static TYPES basic_types[] = { + { T_CHAR, NULL /*"char"*/, }, + { T_INT, NULL /*"int"*/, }, + { T_FLOAT, NULL /*"float"*/, }, + { T_DOUBLE, NULL /*"double"*/, }, + { T_SHORT, NULL /*"short"*/, }, + { T_LONG, NULL /*"long"*/, }, + { T_SIGNED, NULL /*"signed"*/, }, + { T_UNSIGNED, NULL /*"unsigned"*/, }, + { 0, NULL, }, /* Signal end */ +}; + +int basic_types_init() { + basic_types[0].name = "char"; + basic_types[1].name = "int"; + basic_types[2].name = "float"; + basic_types[3].name = "double"; + basic_types[4].name = "short"; + basic_types[5].name = "long"; + basic_types[6].name = "signed"; + basic_types[7].name = "unsigned"; +} + +/* + * Test_table[] is used to test for illegal combinations. + */ +static short test_table[] = { + // TODO/XXX: This is another hack to make it compile... -Zak + 60 /*T_FLOAT | T_DOUBLE | T_LONG | T_SHORT*/, + 15 /*T_FLOAT | T_DOUBLE | T_CHAR | T_INT*/, + 204 /*T_FLOAT | T_DOUBLE | T_SIGNED | T_UNSIGNED*/, + 49 /*T_LONG | T_SHORT | T_CHAR*/, + 0 /* end marker */ +}; + +/* + * The order of this table is important -- it is also referenced by + * the command line processor to allow run-time overriding of the + * built-in size values. The order must not be changed: + * char, short, int, long, float, double (func pointer) + */ +SIZES size_table[] = { + { T_CHAR, S_CHAR, S_PCHAR }, /* char */ + { T_SHORT, S_SINT, S_PSINT }, /* short int */ + { T_INT, S_INT, S_PINT }, /* int */ + { T_LONG, S_LINT, S_PLINT }, /* long */ + { T_FLOAT, S_FLOAT, S_PFLOAT }, /* float */ + { T_DOUBLE, S_DOUBLE, S_PDOUBLE }, /* double */ + { T_FPTR, 0, S_PFPTR }, /* int (*()) */ + { 0, 0, 0 }, /* End of table */ +}; + +FILE_LOCAL int* +evaleval(int* valp, int op, int skip); + +int +eval() +/* + * Evaluate an expression. Straight-forward operator precedence. + * This is called from control() on encountering an #if statement. + * It calls the following routines: + * evallex Lexical analyser -- returns the type and value of + * the next input token. + * evaleval Evaluate the current operator, given the values on + * the value stack. Returns a pointer to the (new) + * value stack. + * For compatiblity with older cpp's, this return returns 1 (TRUE) + * if a syntax error is detected. + */ +{ + register int op=0; /* Current operator */ + register int* valp; /* -> value vector */ + register OPTAB* opp; /* Operator stack */ + int prec=0; /* Op precedence */ + int binop=0; /* Set if binary op. needed */ + int op1=0; /* Operand from stack */ + int skip=0; /* For short-circuit testing */ + int value[NEXP]; /* Value stack */ + OPTAB opstack[NEXP]; /* Operand stack */ + /*extern int* evaleval();*/ /* Does actual evaluation */ + for (int i = 0; i < NEXP; i++) { + value[i] = 0; + opstack[i].op = 0; + opstack[i].prec = 0; + opstack[i].skip = 0; + } + + valp = value+0; + opp = opstack+0; + opp->op = OP_END; /* Mark bottom of stack */ + opp->prec = getopdope(OP_END); //opdope[OP_END]; /* And its precedence */ + opp->skip = 0; /* Not skipping now */ + binop = 0; +again:; +#ifdef DEBUG_EVAL + printf("In #if at again: skip = %d, binop = %d, line is: %s", + opp->skip, binop, infile->bptr); +#endif + op = evallex(opp->skip); + //fprintf(stderr, "Got op id %d\n", op); + if (op /*(op = evallex(opp->skip))*/ == OP_SUB && binop == 0) + op = OP_NEG; /* Unary minus */ + else if (op == OP_ADD && binop == 0) + op = OP_PLU; /* Unary plus */ + else if (op == OP_FAIL) + return (1); /* Error in evallex */ +#ifdef DEBUG_EVAL + printf("op = %s, opdope = %03o, binop = %d, skip = %d\n", + opname[op], getopdope(op) /*opdope[op]*/, binop, opp->skip); +#endif + if (op == DIG) { /* Value? */ + if (binop != 0) { + cerror("misplaced constant in #if", NULLST); + return (1); + } + else if (valp >= value+(NEXP-1)/*&value[NEXP - 1]*/) { + cerror("#if value stack overflow", NULLST); + return (1); + } + else { +#ifdef DEBUG_EVAL + printf("pushing %d onto value stack[%d]\n", + evalue, ((long long)valp) - ((long long)value)); +#endif + valp[0] = evalue; + valp = valp+1; + //* valp++ = evalue; + binop = 1; + } + goto again; + } + else if (op > OP_END) { + cerror("Illegal #if line", NULLST); + return (1); + } + prec = getopdope(op); //opdope[op]; + if (binop != (prec & 1)) { + fprintf(stderr, "binop = %d prec=%d prec&1=%d\n", binop, prec, prec&1); + cerror("Operator %s in incorrect context", opname[op]); + return (1); + } + binop = (prec & 2) >> 1; + for (;;) { +#ifdef DEBUG_EVAL + printf("op %s, prec %d., stacked op %s, prec %d, skip %d\n", + opname[op], prec, opname[opp->op], opp->prec, opp->skip); +#endif + if (prec > opp->prec) { + if (op == OP_LPA) { + prec = OP_RPA_PREC; + } else if (op == OP_QUE) { + prec = OP_QUE_PREC; + } + op1 = opp->skip; /* Save skip for test */ + /* + * Push operator onto op. stack. + */ + opp = opp + 1; + if (opp >= (opstack + NEXP) /*&opstack[NEXP]*/) { + cerror("expression stack overflow at op \"%s\"", + opname[op]); + return (1); + } + opp->op = op; + opp->prec = prec; + skip = (valp[-1] != 0)?1:0; /* Short-circuit tester */ + /* + * Do the short-circuit stuff here. Short-circuiting + * stops automagically when operators are evaluated. + */ + if ((op == OP_ANA && !skip) + || (op == OP_ORO && skip)) + opp->skip = S_ANDOR; /* And/or skip starts */ + else if (op == OP_QUE) /* Start of ?: operator */ + opp->skip = (op1 & S_ANDOR) | ((!skip) ? S_QUEST : 0); + else if (op == OP_COL) { /* : inverts S_QUEST */ + opp->skip = (op1 & S_ANDOR) + | (((op1 & S_QUEST) != 0) ? 0 : S_QUEST); + } + else { /* Other ops leave */ + opp->skip = op1; /* skipping unchanged. */ + } +#ifdef DEBUG_EVAL + printf("stacking %s, valp[-1] == %d at %s", + opname[op], valp[-1], infile->bptr); + dumpstack(opstack, opp, value, valp); +#endif + goto again; + } + /* + * Pop operator from op. stack and evaluate it. + * End of stack and '(' are specials. + */ + skip = opp->skip; /* Remember skip value */ + op1 = opp->op; + switch (op1 /*(op1 = opp->op)*/) { /* Look at stacked op */ + case OP_END: /* Stack end marker */ + + if (op == OP_EOE) + return (valp[-1]); /* Finished ok. */ + else fprintf(stderr, "WARNING: Unusual end sequence?\n"); + goto again; /* Read another op. */ + + case OP_LPA: /* ( on stack */ + if (op != OP_RPA) { /* Matches ) on input */ + cerror("unbalanced paren's, op is \"%s\"", opname[op]); + return (1); + } + opp = opp + - 1; /* Unstack it */ + /* goto again; -- Fall through */ + + case OP_QUE: + goto again; /* Evaluate true expr. */ + + case OP_COL: /* : on stack. */ + opp = opp - 1; /* Unstack : */ + if (opp->op != OP_QUE) { /* Matches ? on stack? */ + cerror("Misplaced '?' or ':', previous operator is %s", + opname[opp->op]); + return (1); + } + /* + * Evaluate op1. + */ + default: /* Others: */ + opp = opp + - 1; /* Unstack the operator */ +#ifdef DEBUG_EVAL + printf("Stack before evaluation of %s\n", opname[op1]); + dumpstack(opstack, opp, value, valp); +#endif + valp = evaleval(valp, op1, skip); +#ifdef DEBUG_EVAL + printf("Stack after evaluation\n"); + dumpstack(opstack, opp, value, valp); +#endif + } /* op1 switch end */ + } /* Stack unwind loop */ +} + +FILE_LOCAL int +evallex(int skip) +/*int skip;*/ /* TRUE if short-circuit evaluation */ +/* + * Return next eval operator or value. Called from eval(). It + * calls a special-purpose routines for 'char' strings and + * numeric values: + * evalchar called to evaluate 'x' + * evalnum called to evaluate numbers. + */ +{ + register int c, c1, t; + +again: do { /* Collect the token */ + c = skipws(); + if ((c = macroid(c)) == EOF_CHAR || c == '\n') { + unget(); + return (OP_EOE); /* End of expression */ + } +} while ((t = scantype(c)) == LET && catenate()); +if (t == INV) { /* Total nonsense */ + if (!skip) { + if (isascii(c) && isprint(c)) + cierror("illegal character '%c' in #if", c); + else + cierror("illegal character (%d decimal) in #if", c); + } + return (OP_FAIL); +} +else if (t == QUO) { /* ' or " */ + if (c == '\'') { /* Character constant */ + evalue = evalchar(skip); /* Somewhat messy */ +#ifdef DEBUG_EVAL + printf("evalchar returns %d.\n", evalue); +#endif + return (DIG); /* Return a value */ + } + cerror("Can't use a string in an #if", NULLST); + return (OP_FAIL); +} +else if (t == LET) { /* ID must be a macro */ + if (streq(token, "defined")) { /* Or defined name */ + c1 = c = skipws(); + if (c == '(') /* Allow defined(name) */ + c = skipws(); + if (scantype(c) == LET) { + evalue = (lookid(c) != NULL); + if (c1 != '(' /* Need to balance */ + || skipws() == ')') /* Did we balance? */ + return (DIG); /* Parsed ok */ + } + cerror("Bad #if ... defined() syntax", NULLST); + return (OP_FAIL); + } + else if (streq(token, "sizeof")) /* New sizeof hackery */ + return (dosizeof()); /* Gets own routine */ + /* + * The Draft ANSI C Standard says that an undefined symbol + * in an #if has the value zero. We are a bit pickier, + * warning except where the programmer was careful to write + * #if defined(foo) ? foo : 0 + */ +#ifdef STRICT_UNDEF + if (!skip) + cwarn("undefined symbol \"%s\" in #if, 0 used", token); +#endif + evalue = 0; + return (DIG); +} +else if (t == DIG) { /* Numbers are harder */ + evalue = evalnum(c); +#ifdef DEBUG_EVAL + printf("evalnum returns %d.\n", evalue); +#endif +} +else if (strchr("!=<>&|\\", c) != NULL) { + /* + * Process a possible multi-byte lexeme. + */ + c1 = cget(); /* Peek at next char */ + switch (c) { + case '!': + if (c1 == '=') + return (OP_NE); + break; + + case '=': + if (c1 != '=') { /* Can't say a=b in #if */ + unget(); + cerror("= not allowed in #if", NULLST); + return (OP_FAIL); + } + return (OP_EQ); + + case '>': + case '<': + if (c1 == c) + return ((c == '<') ? OP_ASL : OP_ASR); + else if (c1 == '=') + return ((c == '<') ? OP_LE : OP_GE); + break; + + case '|': + case '&': + if (c1 == c) + return ((c == '|') ? OP_ORO : OP_ANA); + break; + + case '\\': + if (c1 == '\n') /* Multi-line if */ + goto again; + cerror("Unexpected \\ in #if", NULLST); + return (OP_FAIL); + } + unget(); +} +return (t); +} + +FILE_LOCAL int +dosizeof() +/* + * Process the sizeof (basic type) operation in an #if string. + * Sets evalue to the size and returns + * DIG success + * OP_FAIL bad parse or something. + */ +{ + register int c; + register TYPES* tp; + register SIZES* sizp; + register short* testp; + short typecode; + + if ((c = skipws()) != '(') + goto nogood; + /* + * Scan off the tokens. + */ + typecode = 0; + while ((c = skipws())) { + if ((c = macroid(c)) == EOF_CHAR || c == '\n') + goto nogood; /* End of line is a bug */ + else if (c == '(') { /* thing (*)() func ptr */ + if (skipws() == '*' + && skipws() == ')') { /* We found (*) */ + if (skipws() != '(') /* Let () be optional */ + unget(); + else if (skipws() != ')') + goto nogood; + typecode |= T_FPTR; /* Function pointer */ + } + else { /* Junk is a bug */ + goto nogood; + } + } + else if (scantype(c) != LET) /* Exit if not a type */ + break; + else if (!catenate()) { /* Maybe combine tokens */ + /* + * Look for this unexpandable token in basic_types. + * The code accepts "int long" as well as "long int" + * which is a minor bug as bugs go (and one shared with + * a lot of C compilers). + */ + for (tp = basic_types; tp->name != NULLST; tp = tp+1 /*tp++*/) { + if (streq(token, tp->name)) + break; + } + if (tp->name == NULLST) { + cerror("#if sizeof, unknown type \"%s\"", token); + return (OP_FAIL); + } + typecode |= tp->type; /* Or in the type bit */ + } + } + /* + * We are at the end of the type scan. Chew off '*' if necessary. + */ + if (c == '*') { + typecode |= T_PTR; + c = skipws(); + } + if (c == ')') { /* Last syntax check */ + for (testp = test_table; *testp != 0; testp=testp+1/*testp++*/) { + if (!bittest(typecode & *testp)) { + cerror("#if ... sizeof: illegal type combination", NULLST); + return (OP_FAIL); + } + } + /* + * We assume that all function pointers are the same size: + * sizeof (int (*)()) == sizeof (float (*)()) + * We assume that signed and unsigned don't change the size: + * sizeof (signed int) == (sizeof unsigned int) + */ + if ((typecode & T_FPTR) != 0) /* Function pointer */ + typecode = T_FPTR | T_PTR; + else { /* Var or var * datum */ + typecode &= ~(T_SIGNED | T_UNSIGNED); + if ((typecode & (T_SHORT | T_LONG)) != 0) + typecode &= ~T_INT; + } + if ((typecode & ~T_PTR) == 0) { + cerror("#if sizeof() error, no type specified", NULLST); + return (OP_FAIL); + } + /* + * Exactly one bit (and possibly T_PTR) may be set. + */ + for (sizp = size_table; sizp->bits != 0; sizp=sizp+1/*sizp++*/) { + if ((typecode & ~T_PTR) == sizp->bits) { + evalue = ((typecode & T_PTR) != 0) + ? sizp->psize : sizp->size; + return (DIG); + } + } /* We shouldn't fail */ + cierror("#if ... sizeof: bug, unknown type code 0x%x", typecode); + return (OP_FAIL); + } + +nogood: unget(); + cerror("#if ... sizeof() syntax error", NULLST); + return (OP_FAIL); +} + +FILE_LOCAL int +bittest(int value) +/* + * TRUE if value is zero or exactly one bit is set in value. + */ +{ +#if (4096 & ~(-4096)) == 0 + return ((value & ~(-value)) == 0); +#else + /* + * Do it the hard way (for non 2's complement machines) + */ + return (value == 0 || value ^ (value - 1) == (value * 2 - 1)); +#endif +} + +FILE_LOCAL int +evalnum(int c) +/*register int c;*/ +/* + * Expand number for #if lexical analysis. Note: evalnum recognizes + * the unsigned suffix, but only returns a signed int value. + */ +{ + register int value; + register int base; + register int c1; + + if (c != '0') + base = 10; + else if ((c = cget()) == 'x' || c == 'X') { + base = 16; + c = cget(); + } + else base = 8; + value = 0; + for (;;) { + c1 = c; + if (isascii(c) && isupper(c1)) + c1 = tolower(c1); + if (c1 >= 'a') + c1 -= ('a' - 10); + else c1 -= '0'; + if (c1 < 0 || c1 >= base) + break; + value *= base; + value += c1; + c = cget(); + } + if (c == 'u' || c == 'U') /* Unsigned nonsense */ + c = cget(); + unget(); + return (value); +} + +FILE_LOCAL int +evalchar(int skip) +/*int skip;*/ /* TRUE if short-circuit evaluation */ +/* + * Get a character constant + */ +{ + register int c; + register int value; + register int count; + + instring = TRUE; + if ((c = cget()) == '\\') { + switch ((c = cget())) { + case 'a': /* New in Standard */ +#if ('a' == '\a' || '\a' == ALERT) + value = ALERT; /* Use predefined value */ +#else + value = '\a'; /* Use compiler's value */ +#endif + break; + + case 'b': + value = '\b'; + break; + + case 'f': + value = '\f'; + break; + + case 'n': + value = '\n'; + break; + + case 'r': + value = '\r'; + break; + + case 't': + value = '\t'; + break; + + case 'v': /* New in Standard */ +#if ('v' == '\v' || '\v' == VT) + value = VT; /* Use predefined value */ +#else + value = '\v'; /* Use compiler's value */ +#endif + break; + + case 'x': /* '\xFF' */ + count = 3; + value = 0; + while ((((c = get()) >= '0' && c <= '9') + || (c >= 'a' && c <= 'f') + || (c >= 'A' && c <= 'F')) + && (--count >= 0)) { + value *= 16; + value += (c <= '9') ? (c - '0') : ((c & 0xF) + 9); + } + unget(); + break; + + default: + if (c >= '0' && c <= '7') { + count = 3; + value = 0; + while (c >= '0' && c <= '7' && --count >= 0) { + value *= 8; + value += (c - '0'); + c = get(); + } + unget(); + } + else value = c; + break; + } + } + else if (c == '\'') + value = 0; + else value = c; + /* + * We warn on multi-byte constants and try to hack + * (big|little)endian machines. + */ +#if BIG_ENDIAN + count = 0; +#endif + while ((c = get()) != '\'' && c != EOF_CHAR && c != '\n') { + if (!skip) + ciwarn("multi-byte constant '%c' isn't portable", c); +#if BIG_ENDIAN + count += BITS_CHAR; + value += (c << count); +#else + value <<= BITS_CHAR; + value += c; +#endif + } + instring = FALSE; + return (value); +} + +FILE_LOCAL int* +evaleval(int* valp, int op, int skip) +/*register int* valp; +int op; +int skip;*/ /* TRUE if short-circuit evaluation */ +/* + * Apply the argument operator to the data on the value stack. + * One or two values are popped from the value stack and the result + * is pushed onto the value stack. + * + * OP_COL is a special case. + * + * evaleval() returns the new pointer to the top of the value stack. + */ +{ + int v1, v2; + + if (isbinary(op)) { + valp = valp+-1; + v2 = valp[0]; + //v2 = *--valp; + } + valp = valp+-1; + v1 = valp[0]; + //v1 = *--valp; +#ifdef DEBUG_EVAL + printf("%s op %s", (isbinary(op)) ? "binary" : "unary", + opname[op]); + if (isbinary(op)) + printf(", v2 = %d.", v2); + printf(", v1 = %d.\n", v1); +#endif + switch (op) { + case OP_EOE: + break; + + case OP_ADD: + v1 += v2; + break; + + case OP_SUB: + v1 -= v2; + break; + + case OP_MUL: + v1 *= v2; + break; + + case OP_DIV: + case OP_MOD: + if (v2 == 0) { + if (!skip) { + cwarn("%s by zero in #if, zero result assumed", + (op == OP_DIV) ? "divide" : "mod"); + } + v1 = 0; + } + else if (op == OP_DIV) + v1 /= v2; + else + v1 %= v2; + break; + + case OP_ASL: + v1 <<= v2; + break; + + case OP_ASR: + v1 >>= v2; + break; + + case OP_AND: + v1 &= v2; + break; + + case OP_OR: + v1 |= v2; + break; + + case OP_XOR: + v1 ^= v2; + break; + + case OP_EQ: + v1 = (v1 == v2)?1:0; + break; + + case OP_NE: + v1 = (v1 != v2)?1:0; + break; + + case OP_LT: + v1 = (v1 < v2)?1:0; + break; + + case OP_LE: + v1 = (v1 <= v2)?1:0; + break; + + case OP_GE: + v1 = (v1 >= v2)?1:0; + break; + + case OP_GT: + v1 = (v1 > v2)?1:0; + break; + + case OP_ANA: + v1 = (v1 && v2)?1:0; + break; + + case OP_ORO: + v1 = (v1 || v2)?1:0; + break; + + case OP_COL: + /* + * v1 has the "true" value, v2 the "false" value. + * The top of the value stack has the test. + */ + valp = valp+-1; + v1 = valp[0] ? v1 : v2; + //v1 = (*--valp) ? v1 : v2; + break; + + case OP_NEG: + v1 = (-v1); + break; + + case OP_PLU: + break; + + case OP_COM: + v1 = ~v1; + break; + + case OP_NOT: + v1 = !v1; + break; + + default: + cierror("#if bug, operand = %d.", op); + v1 = 0; + } + valp[0] = v1; + valp = valp+1; + //*valp++ = v1; + return (valp); +} + +#ifdef DEBUG_EVAL +dumpstack(opstack, opp, value, valp) +OPTAB opstack[NEXP]; /* Operand stack */ +register OPTAB* opp; /* Operator stack */ +int value[NEXP]; /* Value stack */ +register int* valp; /* -> value vector */ +{ + printf("index op prec skip name -- op stack at %s", infile->bptr); + while (opp > opstack) { + printf(" [%2d] %2d %03o %d %s\n", opp - opstack, + opp->op, opp->prec, opp->skip, opname[opp->op]); + opp--; + } + while (--valp >= value) { + printf("value[%d] = %d\n", (valp - value), *valp); + } +} +#endif + + +/* Originally from cpp6.c: */ + +/* + * C P P 6 . C + * S u p p o r t R o u t i n e s + * + * Edit History + * 25-May-84 MM Added 8-bit support to type table. + * 30-May-84 ARF sharp() should output filename in quotes + * 02-Aug-84 MM Newline and #line hacking. sharp() now in cpp1.c + * 31-Aug-84 MM USENET net.sources release + * 11-Sep-84 ado/MM Keepcomments, also line number pathological + * 12-Sep-84 ado/MM bug if comment changes to space and we unget later. + * 03-Oct-84 gkr/MM Fixed scannumber bug for '.e' (as in struct.element). + * 04-Oct-84 MM Added ungetstring() for token concatenation + * 08-Oct-84 MM Yet another attack on number scanning + * 31-Oct-84 ado Parameterized $ in identifiers + * 2-Nov-84 MM Token concatenation is messier than I thought + * 6-Dec-84 MM \ is everywhere invisible. + */ + +#include +#include +//#include "cppdef.h" +//#include "cpp.h" + + /* + * skipnl() skips over input text to the end of the line. + * skipws() skips over "whitespace" (spaces or tabs), but + * not skip over the end of the line. It skips over + * TOK_SEP, however (though that shouldn't happen). + * scanid() reads the next token (C identifier) into token[]. + * The caller has already read the first character of + * the identifier. Unlike macroid(), the token is + * never expanded. + * macroid() reads the next token (C identifier) into token[]. + * If it is a #defined macro, it is expanded, and + * macroid() returns TRUE, otherwise, FALSE. + * catenate() Does the dirty work of token concatenation, TRUE if it did. + * scanstring() Reads a string from the input stream, calling + * a user-supplied function for each character. + * This function may be output() to write the + * string to the output file, or save() to save + * the string in the work buffer. + * scannumber() Reads a C numeric constant from the input stream, + * calling the user-supplied function for each + * character. (output() or save() as noted above.) + * save() Save one character in the work[] buffer. + * savestring() Saves a string in malloc() memory. + * getfile() Initialize a new FILEINFO structure, called when + * #include opens a new file, or a macro is to be + * expanded. + * getmem() Get a specified number of bytes from malloc memory. + * output() Write one character to stdout (calling putchar) -- + * implemented as a function so its address may be + * passed to scanstring() and scannumber(). + * lookid() Scans the next token (identifier) from the input + * stream. Looks for it in the #defined symbol table. + * Returns a pointer to the definition, if found, or NULL + * if not present. The identifier is stored in token[]. + * defnedel() Define enter/delete subroutine. Updates the + * symbol table. + * get() Read the next byte from the current input stream, + * handling end of (macro/file) input and embedded + * comments appropriately. Note that the global + * instring is -- essentially -- a parameter to get(). + * cget() Like get(), but skip over TOK_SEP. + * unget() Push last gotten character back on the input stream. + * cerror(), cwarn(), cfatal(), cierror(), ciwarn() + * These routines format an print messages to the user. + * cerror & cwarn take a format and a single string argument. + * cierror & ciwarn take a format and a single int (char) argument. + * cfatal takes a format and a single string argument. + */ + + /* + * This table must be rewritten for a non-Ascii machine. + * + * Note that several "non-visible" characters have special meaning: + * Hex 1D DEF_MAGIC -- a flag to prevent #define recursion. + * Hex 1E TOK_SEP -- a delimiter for token concatenation + * Hex 1F COM_SEP -- a zero-width whitespace for comment concatenation + */ +#if TOK_SEP != 0x1E || COM_SEP != 0x1F || DEF_MAGIC != 0x1D +<< error type table isn't correct >> +#endif + +#if OK_DOLLAR +#define DOL LET +#else +#define DOL 000 +#endif + +char type[256] = { /* Character type codes Hex */ + END, 000, 000, 000, 000, 000, 000, 000, /* 00 */ + 000, SPA, 000, 000, 000, SPA, 000, 000, /* 08 */ /* NOTE from Zak - added carriage return as a space! */ + 000, 000, 000, 000, 000, 000, 000, 000, /* 10 */ + 000, 000, 000, 000, 000, LET, 000, SPA, /* 18 */ + SPA,OP_NOT, QUO, 000, DOL,OP_MOD,OP_AND, QUO, /* 20 !"#$%&' */ +OP_LPA,OP_RPA,OP_MUL,OP_ADD, 000,OP_SUB, DOT,OP_DIV, /* 28 ()*+,-./ */ + DIG, DIG, DIG, DIG, DIG, DIG, DIG, DIG, /* 30 01234567 */ + DIG, DIG,OP_COL, 000, OP_LT, OP_EQ, OP_GT,OP_QUE, /* 38 89:;<=>? */ + 000, LET, LET, LET, LET, LET, LET, LET, /* 40 @ABCDEFG */ + LET, LET, LET, LET, LET, LET, LET, LET, /* 48 HIJKLMNO */ + LET, LET, LET, LET, LET, LET, LET, LET, /* 50 PQRSTUVW */ + LET, LET, LET, 000, BSH, 000,OP_XOR, LET, /* 58 XYZ[\]^_ */ + 000, LET, LET, LET, LET, LET, LET, LET, /* 60 `abcdefg */ + LET, LET, LET, LET, LET, LET, LET, LET, /* 68 hijklmno */ + LET, LET, LET, LET, LET, LET, LET, LET, /* 70 pqrstuvw */ + LET, LET, LET, 000, OP_OR, 000,OP_NOT, 000, /* 78 xyz{|}~ */ + 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ + 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ + 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ + 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ + 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ + 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ + 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ + 000, 000, 000, 000, 000, 000, 000, 000, /* 80 .. FF */ +}; + +int scantype(int c) { + //return type[ c]; + char tmpstr[2]; + tmpstr[0] = (char)c; + tmpstr[1] = 0; + + if (scanisalpha(c)) { + return LET; + } else if (scanisnum(c)) { + return DIG; + } else { + switch (c) { + case 0: + return END; + case '+': + return OP_ADD; + case '-': + return OP_SUB; + case '*': + return OP_MUL; + case '/': + return OP_DIV; + case ':': + return OP_COL; + case '=': + return OP_EQ; + case '(': + return OP_LPA; + case ')': + return OP_RPA; + case '!': + case '~': // Should these have different definitions?? + return OP_NOT; + case '?': + return OP_QUE; + case '%': + return OP_MOD; + case '|': + return OP_OR; + case '&': + return OP_AND; + case '^': + return OP_XOR; + case ' ': + case '\t': + case '\r': + return SPA; + case '\'': + case '"': + return QUO; + case '.': + return DOT; + case ',': + return 0; //??? + case '<': + return OP_LT; + case '>': + return OP_GT; + case '\\': + return BSH; + case 30: + return LET; + case '#': + //return SPA; + case ';': + case '{': + case '}': + case '[': + case ']': + case '@': + case '\n': + case 127: + return 0; + default: + fprintf(stderr, "NOTE: Char id is %d, old type is %d\n", c, type[c]); + cfatal("Unhandled character '%s'", tmpstr+0); + return -1; + //return type[c]; + //return 0; + } + } +} + +skipnl() +/* + * Skip to the end of the current input line. + */ +{ + register int c; + + do { /* Skip to newline */ + c = get(); + } while (c != '\n' && c != EOF_CHAR); +} + +int +skipws() +/* + * Skip over whitespace + */ +{ + register int c; + + do { /* Skip whitespace */ + c = get(); +#if COMMENT_INVISIBLE + } while (scantype(c) == SPA || c == COM_SEP); +#else +} while (scantype(c) == SPA); +#endif +return (c); +} + +int scanisalpha(int c) { + return ((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')) || (c == '_'); +} + +int scanisnum(int c) { + return ((c >= '0') && (c <= '9')); +} + +int scanisalnum(int c) { + return scanisalpha(c) || scanisnum(c); +} + +int scanid(int c) +/*register int c;*/ /* First char of id */ +/* + * Get the next token (an id) into the token buffer. + * Note: this code is duplicated in lookid(). + * Change one, change both. + */ +{ + register char* bp; + + if (c == DEF_MAGIC) /* Eat the magic token */ + c = get(); /* undefiner. */ + bp = token; + int first = 1; + while (first || scanisalnum(c) /*scantype(c) == LET || scantype(c) == DIG*/) { + first=0; + //do { + //fprintf(stderr, "Got character '%c' type %d\n", c, scantype(c)); + if (((unsigned long long)bp) < (((unsigned long long) token) + IDMAX))/*&token[IDMAX])*/ { /* token dim is IDMAX+1 */ + bp[0] = c; + bp++; + //*bp++ = c; + } + c = get(); + }// while (scantype(c) == LET || scantype(c) == DIG); + //fprintf(stderr, "Got character '%c' type %d\n", c, scantype(c)); + unget(); + bp[0] = EOS; + //fprintf(stderr, "scanid got token '%s'\n", token); +} + +int +macroid(int c) +/*register int c;*/ +/* + * If c is a letter, scan the id. if it's #defined, expand it and scan + * the next character and try again. + * + * Else, return the character. If scantype(c) is a LET, the token is in token. + */ +{ + register DEFBUF* dp; + + if (infile != NULL && infile->fp != NULL) + recursion = 0; + while (/*scantype(c) == LET*/scanisalpha(c) && (dp = lookid(c)) != NULL) { + expand(dp); + c = get(); + } + return (c); +} + +int +catenate() +/* + * A token was just read (via macroid). + * If the next character is TOK_SEP, concatenate the next token + * return TRUE -- which should recall macroid after refreshing + * macroid's argument. If it is not TOK_SEP, unget() the character + * and return FALSE. + */ +{ + register int c; + register char* token1; + +#if OK_CONCAT + if (get() != TOK_SEP) { /* Token concatenation */ + unget(); + return (FALSE); + } + else { + token1 = savestring(token); /* Save first token */ + c = macroid(get()); /* Scan next token */ + switch (scantype(c)) { /* What was it? */ + case LET: /* An identifier, ... */ + if (strlen(token1) + strlen(token) >= NWORK) + cfatal("work buffer overflow doing %s #", token1); + sprintf(work, "%s%s", token1, token); + break; + + case DIG: /* A digit string */ + strcpy(work, token1); + workp = work + strlen(work); + do { + save(c); + } while ((c = get()) != TOK_SEP); + /* + * The trailing TOK_SEP is no longer needed. + */ + save(EOS); + break; + + default: /* An error, ... */ +#if ! COMMENT_INVISIBLE + if (isprint(c)) + cierror("Strange character '%c' after #", c); + else + cierror("Strange character (%d.) after #", c); +#endif + strcpy(work, token1); + unget(); + break; + } + /* + * work has the concatenated token and token1 has + * the first token (no longer needed). Unget the + * new (concatenated) token after freeing token1. + * Finally, setup to read the new token. + */ + free(token1); /* Free up memory */ + ungetstring(work); /* Unget the new thing, */ + return (TRUE); + } +#else + return (FALSE); /* Not supported */ +#endif +} + +int +scanstring(int delim, int (*outfun)()) +/*register int delim;*/ /* ' or " */ +/*int (*outfun)();*/ /* Output function */ +/* + * Scan off a string. Warning if terminated by newline or EOF. + * outfun() outputs the character -- to a buffer if in a macro. + * TRUE if ok, FALSE if error. + */ +{ + register int c; + //fprintf(stderr, "scanstring()A\n"); + instring = TRUE; /* Don't strip comments */ + //fprintf(stderr, "scanstring()B\n"); + outfun(delim); + //fprintf(stderr, "scanstring()C\n"); + /*(*outfun)(delim);*/ + c = get(); + while (/*(c = get())*/c != delim + && c != '\n' + && c != EOF_CHAR) { + //fprintf(stderr, "scanstring()X\n"); + if (c != DEF_MAGIC) + outfun(c);/*(*outfun)(c);*/ + if (c == '\\') + outfun(get());/*(*outfun)(get());*/ + c = get(); + } + //fprintf(stderr, "scanstring()D\n"); + instring = FALSE; + //fprintf(stderr, "scanstring()E\n"); + if (c == delim) { + outfun(c); /*(*outfun)(c);*/ + //fprintf(stderr, "scanstring()F\n"); + return (TRUE); + } + else { + cerror("Unterminated string", NULLST); + unget(); + return (FALSE); + } +} + +int scannumber(int c, int (*outfun)()) +/*register int c;*/ /* First char of number */ +/*register int (*outfun)();*/ /* Output/store func */ +/* + * Process a number. We know that c is from 0 to 9 or dot. + * Algorithm from Dave Conroy's Decus C. + */ +{ + register int radix; /* 8, 10, or 16 */ + int expseen; /* 'e' seen in floater */ + int signseen; /* '+' or '-' seen */ + int octal89; /* For bad octal test */ + int dotflag; /* TRUE if '.' was seen */ + + expseen = FALSE; /* No exponent seen yet */ + signseen = TRUE; /* No +/- allowed yet */ + octal89 = FALSE; /* No bad octal yet */ + radix = 10; /* Assume decimal */ + if ((dotflag = (c == '.')) != FALSE) { /* . something? */ + outfun('.');/*(*outfun)('.');*/ /* Always out the dot */ + if (scantype(c = get()) != DIG) { /* If not a float numb, */ + unget(); /* Rescan strange char */ + return; /* All done for now */ + } + } /* End of float test */ + else if (c == '0') { /* Octal or hex? */ + outfun(c);/*(*outfun)(c);*/ /* Stuff initial zero */ + radix = 8; /* Assume it's octal */ + c = get(); /* Look for an 'x' */ + if (c == 'x' || c == 'X') { /* Did we get one? */ + radix = 16; /* Remember new radix */ + outfun(c);/*(*outfun)(c);*/ /* Stuff the 'x' */ + c = get(); /* Get next character */ + } + } + for (;;) { /* Process curr. char. */ + /* + * Note that this algorithm accepts "012e4" and "03.4" + * as legitimate floating-point numbers. + */ + if (radix != 16 && (c == 'e' || c == 'E')) { + if (expseen) /* Already saw 'E'? */ + break; /* Exit loop, bad nbr. */ + expseen = TRUE; /* Set exponent seen */ + signseen = FALSE; /* We can read '+' now */ + radix = 10; /* Decimal exponent */ + } + else if (radix != 16 && c == '.') { + if (dotflag) /* Saw dot already? */ + break; /* Exit loop, two dots */ + dotflag = TRUE; /* Remember the dot */ + radix = 10; /* Decimal fraction */ + } + else if (c == '+' || c == '-') { /* 1.0e+10 */ + if (signseen) /* Sign in wrong place? */ + break; /* Exit loop, not nbr. */ + /* signseen = TRUE; */ /* Remember we saw it */ + } + else { /* Check the digit */ + switch (c) { + case '8': case '9': /* Sometimes wrong */ + octal89 = TRUE; /* Do check later */ + case '0': case '1': case '2': case '3': + case '4': case '5': case '6': case '7': + break; /* Always ok */ + + case 'a': case 'b': case 'c': case 'd': case 'e': case 'f': + case 'A': case 'B': case 'C': case 'D': case 'E': case 'F': + if (radix == 16) /* Alpha's are ok only */ + break; /* if reading hex. */ + default: /* At number end */ + goto done; /* Break from for loop */ + } /* End of switch */ + } /* End general case */ + outfun(c);/*(*outfun)(c);*/ /* Accept the character */ + signseen = TRUE; /* Don't read sign now */ + c = get(); /* Read another char */ + } /* End of scan loop */ + /* + * When we break out of the scan loop, c contains the first + * character (maybe) not in the number. If the number is an + * integer, allow a trailing 'L' for long and/or a trailing 'U' + * for unsigned. If not those, push the trailing character back + * on the input stream. Floating point numbers accept a trailing + * 'L' for "long double". + */ +done: if (dotflag || expseen) { /* Floating point? */ + if (c == 'l' || c == 'L') { + outfun(c);/*(*outfun)(c);*/ + c = get(); /* Ungotten later */ + } +} +else { /* Else it's an integer */ + /* + * We know that dotflag and expseen are both zero, now: + * dotflag signals "saw 'L'", and + * expseen signals "saw 'U'". + */ + for (;;) { + switch (c) { + case 'l': + case 'L': + if (dotflag) + goto nomore; + dotflag = TRUE; + break; + + case 'u': + case 'U': + if (expseen) + goto nomore; + expseen = TRUE; + break; + + default: + goto nomore; + } + outfun(c);/*(*outfun)(c);*/ /* Got 'L' or 'U'. */ + c = get(); /* Look at next, too. */ + } +} +nomore: unget(); /* Not part of a number */ +if (octal89 && radix == 8) +cwarn("Illegal digit in octal number", NULLST); +} + +int save(int c) +/*register int c;*/ +{ + if (((unsigned long long) workp) >= ((unsigned long long) work + NWORK)) /*&work[NWORK]) */{ + work[NWORK - 1] = '\0'; + cfatal("Work buffer overflow: %s", work); + } + else { + workp[0] = c; + workp++; + //*workp++ = c; + } +} + +char* +savestring(char* text) +/*char* text;*/ +/* + * Store a string into free memory. + */ +{ + register char* result; + + result = getmem(strlen(text) + 1); + strcpy(result, text); + return (result); +} + +FILEINFO* +getfile(int bufsize, char* name) +/*int bufsize;*/ /* Line or define buffer size */ +/*char* name;*/ /* File or macro name string */ +/* + * Common FILEINFO buffer initialization for a new file or macro. + */ +{ + register FILEINFO* file; + register int size; + + size = strlen(name); /* File/macro name */ + file = (FILEINFO*)getmem(sizeof(FILEINFO) + bufsize + size); + file->parent = infile; /* Chain files together */ + file->fp = NULL; /* No file yet */ + file->filename = savestring(name); /* Save file/macro name */ + file->progname = NULL; /* No #line seen yet */ + file->unrecur = 0; /* No macro fixup */ + file->bptr = file->buffer; /* Initialize line ptr */ + file->buffer[0] = EOS; /* Force first read */ + file->line = 0; /* (Not used just yet) */ + if (infile != NULL) /* If #include file */ + infile->line = line; /* Save current line */ + infile = file; /* New current file */ + line = 1; /* Note first line */ + return (file); /* All done. */ +} + +char* +getmem(int size) /* TODO: This doesn't look very 64-bit safe... -Zak */ +/*int size;*/ +/* + * Get a block of free memory. + */ +{ + register char* result; + //extern char* malloc(); + + if ((result = malloc((unsigned)size)) == NULL) + cfatal("Out of memory", NULLST); + return (result); +} + +/* + * C P P S y m b o l T a b l e s + */ + + /* + * SBSIZE defines the number of hash-table slots for the symbol table. + * It must be a power of 2. + */ +#ifndef SBSIZE +#define SBSIZE 64 +#endif +#define SBMASK (SBSIZE - 1) +#if (SBSIZE ^ SBMASK) != ((SBSIZE * 2) - 1) +<< error, SBSIZE must be a power of 2 >> +#endif + +static DEFBUF* symtab[SBSIZE]; /* Symbol table queue headers */ + +DEFBUF* +lookid(int c) +/*int c;*/ /* First character of token */ +/* + * Look for the next token in the symbol table. Returns token in "token". + * If found, returns the table pointer; Else returns NULL. + */ +{ + register int nhash; + register DEFBUF* dp; + register char* np; + int temp; + int isrecurse; /* For #define foo foo */ + + np = token; + nhash = 0; + if ((isrecurse = (c == DEF_MAGIC))) /* If recursive macro */ + c = get(); /* hack, skip DEF_MAGIC */ + do { + if (((unsigned long long)np) < ((unsigned long long) token+IDMAX)/* &token[IDMAX]*/) { /* token dim is IDMAX+1 */ + np[0] = c; + np++;/**np++ = c;*/ /* Store token byte */ + nhash += c; /* Update hash value */ + } + c = get(); /* And get another byte */ + } while (scanisalnum(c) /*scantype(c) == LET || scantype(c) == DIG*/); + unget(); /* Rescan terminator */ + *np = EOS; /* Terminate token */ + if (isrecurse) /* Recursive definition */ + return (NULL); /* undefined just now */ + nhash += (((long long)np) - ((long long)token)); /* Fix hash value */ + dp = symtab[nhash & SBMASK]; /* Starting bucket */ + while (dp != (DEFBUF*)NULL) { /* Search symbol table */ + if (dp->hash == nhash /* Fast precheck */ + && (temp = strcmp(dp->name, token)) >= 0) + break; + dp = dp->link; /* Nope, try next one */ + } + return ((temp == 0) ? dp : NULL); +} + +DEFBUF* +defendel(char* name, int delete) +/*char* name; +int delete;*/ /* TRUE to delete a symbol */ +/* + * Enter this name in the lookup table (delete = FALSE) + * or delete this name (delete = TRUE). + * Returns a pointer to the define block (delete = FALSE) + * Returns NULL if the symbol wasn't defined (delete = TRUE). + */ +{ + register DEFBUF* dp; + register DEFBUF** prevp; + register char* np; + int nhash; + int temp; + int size; + //fprintf(stderr, "Hello from defendel(..)A0x%lx\n",name); + + nhash = 0; + for (/*nhash = 0,*/ np = name; np[0] != EOS;np++) { + nhash += np[0]; //np++;//*np++; + } + //fprintf(stderr, "Hello from defendel(..)B\n"); + size = (((long long)np) - ((long long)name)); + nhash += size; + prevp = symtab + (nhash & SBMASK)/*&symtab[nhash & SBMASK]*/; + //fprintf(stderr, "Hello from defendel(..)C\n"); + while ((dp = *prevp) != (DEFBUF*)NULL) { + if (dp->hash == nhash + && (temp = strcmp(dp->name, name)) >= 0) { + if (temp > 0) + dp = NULL; /* Not found */ + else { + *prevp = dp->link; /* Found, unlink and */ + if (dp->repl != NULL) /* Free the replacement */ + free(dp->repl); /* if any, and then */ + free((char*)dp); /* Free the symbol */ + } + break; + } + prevp = dp; /* XXX Note from Zak: Was like this "prevp = &dp->link;" but that just gets a pointer to dp right? */ + } + if (!delete) { + dp = (DEFBUF*)getmem(sizeof(DEFBUF) + size); + dp->link = *prevp; + *prevp = dp; + dp->hash = nhash; + dp->repl = NULL; + dp->nargs = 0; + strcpy(dp->name, name); + } + return (dp); +} + +#if DEBUG + +dumpdef(why) +char* why; +{ + register DEFBUF* dp; + register DEFBUF** syp; + + printf("CPP symbol table dump %s\n", why); + for (syp = symtab; syp < &symtab[SBSIZE]; syp++) { + if ((dp = *syp) != (DEFBUF*)NULL) { + printf("symtab[%d]\n", (syp - symtab)); + do { + dumpadef((char*)NULL, dp); + } while ((dp = dp->link) != (DEFBUF*)NULL); + } + } +} + +dumpadef(why, dp) +char* why; /* Notation */ +register DEFBUF* dp; +{ + register char* cp; + register int c; + + printf(" \"%s\" [%d]", dp->name, dp->nargs); + if (why != NULL) + printf(" (%s)", why); + if (dp->repl != NULL) { + printf(" => "); + for (cp = dp->repl; (c = *cp++ & 0xFF) != EOS;) { + if (c >= MAC_PARM && c <= (MAC_PARM + PAR_MAC)) + printf("<%d>", c - MAC_PARM); + else if (isprint(c) || c == '\n' || c == '\t') + putchar(c); + else if (c < ' ') + printf("<^%c>", c + '@'); + else + printf("<\\0%o>", c); + } + } + else { + printf(", no replacement."); + } + putchar('\n'); +} +#endif + +/* + * G E T + */ + +int +get() +/* + * Return the next character from a macro or the current file. + * Handle end of file from #include files. + */ +{ + register int c; + register FILEINFO* file; + register int popped; /* Recursion fixup */ + //fprintf(stderr, "Hello from get()A\n"); + + popped = 0; +get_from_file: + if ((file = infile) == NULL) + return (EOF_CHAR); +newline: +#if 0 + printf("get(%s), recursion %d, line %d, bptr = %d, buffer \"%s\"\n", + file->filename, recursion, line, + file->bptr - file->buffer, file->buffer); +#endif + //fprintf(stderr, "Hello from get()B\n"); + /* + * Read a character from the current input line or macro. + * At EOS, either finish the current macro (freeing temp. + * storage) or read another line from the current input file. + * At EOF, exit the current file (#include) or, at EOF from + * the cpp input file, return EOF_CHAR to finish processing. + */ + c = file->bptr[0] & 0xFF; + file->bptr++; + if (c == EOS) { + //fprintf(stderr, "Hello from get()C\n"); + /* + * Nothing in current line or macro. Get next line (if + * input from a file), or do end of file/macro processing. + * In the latter case, jump back to restart from the top. + */ + if (file->fp == NULL) { /* NULL if macro */ + //fprintf(stderr, "Hello from get()D\n"); + popped++; + recursion -= file->unrecur; + if (recursion < 0) + recursion = 0; + infile = file->parent; /* Unwind file chain */ + } + else { /* Else get from a file */ + //fprintf(stderr, "Hello from get()E\n"); + if ((file->bptr = fgets(file->buffer, NBUFF, file->fp)) + != NULL) { + //fprintf(stderr, "Hello from get()F\n"); +#if DEBUG + if (debug > 1) { /* Dump it to stdout */ + printf("\n#line %d (%s), %s", + line, file->filename, file->buffer); + } +#endif + goto newline; /* process the line */ + } + else { + //fprintf(stderr, "Hello from get()G\n"); + fclose(file->fp); /* Close finished file */ + if ((infile = file->parent) != NULL) { + /* + * There is an "ungotten" newline in the current + * infile buffer (set there by doinclude() in + * cpp1.c). Thus, we know that the mainline code + * is skipping over blank lines and will do a + * #line at its convenience. + */ + wrongline = TRUE; /* Need a #line now */ + } + } + } + //fprintf(stderr, "Hello from get()H\n"); + /* + * Free up space used by the (finished) file or macro and + * restart input from the parent file/macro, if any. + */ + free(file->filename); /* Free name and */ + if (file->progname != NULL) /* if a #line was seen, */ + free(file->progname); /* free it, too. */ + free((char*)file); /* Free file space */ + if (infile == NULL) /* If at end of file */ + return (EOF_CHAR); /* Return end of file */ + line = infile->line; /* Reset line number */ + goto get_from_file; /* Get from the top. */ + } + //fprintf(stderr, "Hello from get()X\n"); + /* + * Common processing for the new character. + */ + if (c == DEF_MAGIC && file->fp != NULL) /* Don't allow delete */ + goto newline; /* from a file */ + if (file->parent != NULL) { /* Macro or #include */ + if (popped != 0) + file->parent->unrecur += popped; + else { + recursion -= file->parent->unrecur; + if (recursion < 0) + recursion = 0; + file->parent->unrecur = 0; + } + } + if (c == '\n') /* Maintain current */ + line = line + 1; /* line counter */ + if (instring) /* Strings just return */ + return (c); /* the character. */ + else if (c == '/') { /* Comment? */ + instring = TRUE; /* So get() won't loop */ + if ((c = get()) != '*' && c != '/') { /* Next byte '*'/'/'? */ + instring = FALSE; /* Nope, no comment */ + unget(); /* Push the char. back */ + return ('/'); /* Return the slash */ + } + int islinecomment = (c == '/'); + if (keepcomments) { /* If writing comments */ + putchar('/'); /* Write out the */ + putchar(c /*'*'*/); /* initializer */ + } + for (;;) { /* Eat a comment */ + c = get(); + test: if (keepcomments && c != EOF_CHAR) + cput(c); + switch (c) { + case EOF_CHAR: + cerror("EOF in comment", NULLST); + return (EOF_CHAR); + + case '/': + if ((c = get()) != '*') /* Don't let comments */ + goto test; /* Nest. */ +#ifdef STRICT_COMMENTS + cwarn("Nested comments", NULLST); +#endif + /* Fall into * stuff */ + case '*': + if ((c = get()) != '/' || islinecomment) /* If comment doesn't */ + goto test; /* end, look at next */ + instring = FALSE; /* End of comment, */ + if (keepcomments) { /* Put out the comment */ + cput(c); /* terminator, too */ + } + /* + * A comment is syntactically "whitespace" -- + * however, there are certain strange sequences + * such as + * #define foo(x) (something) + * foo|* comment *|(123) + * these are '/' ^ ^ + * where just returning space (or COM_SEP) will cause + * problems. This can be "fixed" by overwriting the + * '/' in the input line buffer with ' ' (or COM_SEP) + * but that may mess up an error message. + * So, we peek ahead -- if the next character is + * "whitespace" we just get another character, if not, + * we modify the buffer. All in the name of purity. + */ + if (file->bptr[0] == '\n' + || scantype(file->bptr[0] & 0xFF) == SPA) + goto newline; +#if COMMENT_INVISIBLE + /* + * Return magic (old-fashioned) syntactic space. + */ + return ((file->bptr[-1] = COM_SEP)); +#else + return ((file->bptr[-1] = ' ')); +#endif + + case '\n': /* we'll need a #line */ + if (!keepcomments) + wrongline = TRUE; /* later... */ + if (islinecomment) { // Check line comment terminator + cput('\n'); + instring = FALSE; + return ((file->bptr[-1] = '\n')); + } + default: /* Anything else is */ + break; /* Just a character */ + } /* End switch */ + } /* End comment loop */ + } /* End if in comment */ + else if (!inmacro && c == '\\') { /* If backslash, peek */ + if ((c = get()) == '\n') { /* for a . If so, */ + wrongline = TRUE; + goto newline; + } + else { /* Backslash anything */ + unget(); /* Get it later */ + return ('\\'); /* Return the backslash */ + } + } + else if (c == '\f' || c == VT) /* Form Feed, Vertical */ + c = ' '; /* Tab are whitespace */ + return (c); /* Just return the char */ +} + +int unget() +/* + * Backup the pointer to reread the last character. Fatal error + * (code bug) if we backup too far. unget() may be called, + * without problems, at end of file. Only one character may + * be ungotten. If you need to unget more, call ungetstring(). + */ +{ + register FILEINFO* file; + + if ((file = infile) == NULL) + return; /* Unget after EOF */ + if (--file->bptr < file->buffer) + cfatal("Too much pushback", NULLST); + if (*file->bptr == '\n') /* Ungetting a newline? */ + --line; /* Unget the line number, too */ +} + +int ungetstring(char* text) +/*char* text;*/ +/* + * Push a string back on the input stream. This is done by treating + * the text as if it were a macro. + */ +{ + register FILEINFO* file; + extern FILEINFO* getfile(); + + file = getfile(strlen(text) + 1, ""); + strcpy(file->buffer, text); +} + +int +cget() +/* + * Get one character, absorb "funny space" after comments or + * token concatenation + */ +{ + register int c; + + do { + c = get(); +#if COMMENT_INVISIBLE + } while (c == TOK_SEP || c == COM_SEP); +#else +} while (c == TOK_SEP); +#endif +return (c); +} + +/* + * Error messages and other hacks. The first byte of severity + * is 'S' for string arguments and 'I' for int arguments. This + * is needed for portability with machines that have int's that + * are shorter than char *'s. + */ + +static int +domsg(char* severity, char* format, char* arg) +/*char* severity;*/ /* "Error", "Warning", "Fatal" */ +/*char* format;*/ /* Format for the error message */ +/*char* arg;*/ /* Something for the message */ +/* + * Print filenames, macro names, and line numbers for error messages. + */ +{ + register char* tp; + register FILEINFO* file; + + fprintf(stderr, "%sline %d, %s: ", MSG_PREFIX, line, severity+1/*&severity[1]*/); + if (*severity == 'S') + fprintf(stderr, format, arg); + else + fprintf(stderr, format, (int)arg); + putc('\n', stderr); + if ((file = infile) == NULL) + return; /* At end of file */ + if (file->fp != NULL) { + tp = file->buffer; /* Print current file */ + fprintf(stderr, "%s", tp); /* name, making sure */ + if (tp[strlen(tp) - 1] != '\n') /* there's a newline */ + putc('\n', stderr); + } + while ((file = file->parent) != NULL) { /* Print #includes, too */ + if (file->fp == NULL) + fprintf(stderr, "from macro %s\n", file->filename); + else { + tp = file->buffer; + fprintf(stderr, "from file %s, line %d:\n%s", + (file->progname != NULL) + ? file->progname : file->filename, + file->line, tp); + if (tp[strlen(tp) - 1] != '\n') + putc('\n', stderr); + } + } +} + +int cerror(char* format, char* sarg) +/*char* format; +char* sarg;*/ /* Single string argument */ +/* + * Print a normal error message, string argument. + */ +{ + domsg("SError", format, sarg); + errors++; +} + +int cierror(char* format, int narg) +/*char* format; +int narg;*/ /* Single numeric argument */ +/* + * Print a normal error message, numeric argument. + */ +{ + domsg("IError", format, (char*)narg); + errors++; +} + +int cfatal(char* format, char* sarg) +/*char* format; +char* sarg; */ /* Single string argument */ +/* + * A real disaster + */ +{ + domsg("SFatal error", format, sarg); + exit(IO_ERROR); +} + +int cwarn(char* format, char* sarg) +/*char* format; +char* sarg; */ /* Single string argument */ +/* + * A non-fatal error, string argument. + */ +{ + domsg("SWarning", format, sarg); +} + +int ciwarn(char* format, int narg) +/*char* format; +int narg;*/ /* Single numeric argument */ +/* + * A non-fatal error, numeric argument. + */ +{ + domsg("IWarning", format, (char*)narg); +} + + + + + +/* From ifdef of CPP_IMPLEMENTATION: */ +#endif diff --git a/fakelibc/README b/fakelibc/README new file mode 100644 index 0000000..090a280 --- /dev/null +++ b/fakelibc/README @@ -0,0 +1,6 @@ +The fakelibc directory contains "fake" header files to allow the compiler to +self-host on Linux-compatible systems without needing complete C compatibility. + +It can also be used for other small tools built with the compiler, but is not +a replacement for a full libc (just a more-portable wrapper over GNU libc or +compatible). diff --git a/fakelibc/assert.h b/fakelibc/assert.h new file mode 100644 index 0000000..7b59e38 --- /dev/null +++ b/fakelibc/assert.h @@ -0,0 +1,7 @@ +#ifndef _FAKELIBC_ASSERT_H +#define _FAKELIBC_ASSERT_H + +#define assert(...) do {} while(0) + +/* From ifndef at top of file: */ +#endif diff --git a/fakelibc/ctype.h b/fakelibc/ctype.h new file mode 100644 index 0000000..18f8e29 --- /dev/null +++ b/fakelibc/ctype.h @@ -0,0 +1,74 @@ +#ifndef _FAKELIBC_CTYPE_H +#define _FAKELIBC_CTYPE_H +/* +#define isspace fake_isspace +#define isdigit fake_isdigit +#define isxdigit fake_isxdigit +#define isalpha fake_isalpha +#define isalnum fake_isalnum +#define isprint fake_isprint +#define ispunct fake_ispunct + +static int fake_isspace(int ch) { + return (ch == ' ' || ch == '\t' || ch == '\r' || ch == '\n'); +} + +static int fake_isdigit(int ch) { + return (ch >= '0') && (ch <= '9'); +} + +static int fake_isxdigit(int ch) { + return isdigit(ch) || ((ch >= 'a') && (ch <= 'f')) || ((ch >= 'A') && (ch <= 'F')); +} + +static int fake_isalpha(int ch) { + return ((ch >= 'a') && (ch <= 'z')) || ((ch >= 'A') && (ch <= 'Z')); +} + +static int fake_isalnum(int ch) { + return isalpha(ch) || isdigit(ch); +} + +static int fake_isprint(int ch) { + return isalnum(ch) || isspace(ch) || ispunct(ch); +} + +static int fake_ispunct(int ch) { + switch (ch) { + case ',': + case '<': + case '.': + case '>': + case '/': + case '?': + case ';': + case ':': + case '\'': + case '\"': + case '[': + case ']': + case '{': + case '}': + case '`': + case '~': + case '@': + case '#': + case '$': + case '%': + case '^': + case '&': + case '*': + case '(': + case ')': + case '-': + case '_': + case '=': + case '+': + return 1; + default: + return 0; + } +} +*/ +/* From ifndef at top of file: */ +#endif diff --git a/fakelibc/dirent.h b/fakelibc/dirent.h new file mode 100644 index 0000000..e69de29 diff --git a/fakelibc/errno.h b/fakelibc/errno.h new file mode 100644 index 0000000..ec388fa --- /dev/null +++ b/fakelibc/errno.h @@ -0,0 +1,20 @@ +#ifndef _FAKELIBC_ERRNO_H +#define _FAKELIBC_ERRNO_H + +//extern int errno; + +/* On modern Linux platforms the errno is simulated. This is presumably so that each + * thread can have it's own errno without the ABI becoming a huge mess. + */ +#ifdef __MAC +int errno; +#else +int* __errno_location(); + +#define errno __errno_location()[0] +#endif + +#define ENOENT 2 + +/* From ifndef at top of file: */ +#endif diff --git a/fakelibc/fcntl.h b/fakelibc/fcntl.h new file mode 100644 index 0000000..e69de29 diff --git a/fakelibc/float.h b/fakelibc/float.h new file mode 100644 index 0000000..f30d9c7 --- /dev/null +++ b/fakelibc/float.h @@ -0,0 +1,7 @@ +#ifndef _FAKELIBC_FLOAT_H +#define _FAKELIBC_FLOAT_H + +#define DBL_MAX_EXP 1024 +#define DBL_MANT_DIG 53 + +#endif diff --git a/fakelibc/inttypes.h b/fakelibc/inttypes.h new file mode 100644 index 0000000..677becb --- /dev/null +++ b/fakelibc/inttypes.h @@ -0,0 +1,7 @@ +#ifndef _FAKELIBC_INTTYPES_H +#define _FAKELIBC_INTTYPES_H + +#define PRIx32 "x" + +/* From ifndef at top of file: */ +#endif diff --git a/fakelibc/limits.h b/fakelibc/limits.h new file mode 100644 index 0000000..0775541 --- /dev/null +++ b/fakelibc/limits.h @@ -0,0 +1,8 @@ +#ifndef _FAKELIBC_LIMITS_H +#define _FAKELIBC_LIMITS_H + +#define CHAR_BIT 8 +#define UINT_MAX 0xFFFFFFFFU + +/* From ifndef at top of file: */ +#endif diff --git a/fakelibc/math.h b/fakelibc/math.h new file mode 100644 index 0000000..e69de29 diff --git a/fakelibc/memory.h b/fakelibc/memory.h new file mode 100644 index 0000000..ad6c943 --- /dev/null +++ b/fakelibc/memory.h @@ -0,0 +1,5 @@ +#ifndef _FAKELIBC_MEMORY_H +#define _FAKELIBC_MEMORY_H + +/* From ifndef at top of file: */ +#endif diff --git a/fakelibc/pwd.h b/fakelibc/pwd.h new file mode 100644 index 0000000..e69de29 diff --git a/fakelibc/setjmp.h b/fakelibc/setjmp.h new file mode 100644 index 0000000..48fa7f3 --- /dev/null +++ b/fakelibc/setjmp.h @@ -0,0 +1,18 @@ +#ifndef _FAKELIBC_SETJMP_H +#define _FAKELIBC_SETJMP_H + +/*struct jmp_buf_struct { + int todo; +}; + +typedef struct jmp_buf_struct jmp_buf;*/ +typedef int jmp_buf; + +#define setjmp(x) \ + 0 +// (printf("WARNING: Unimplemented: setjmp\n") && 0) +#define longjmp(x,y) \ + printf("WARNING: Unimplemented: longjmp\n") + +/* From ifndef at top of file: */ +#endif diff --git a/fakelibc/signal.h b/fakelibc/signal.h new file mode 100644 index 0000000..e69de29 diff --git a/fakelibc/stdarg.h b/fakelibc/stdarg.h new file mode 100644 index 0000000..315b543 --- /dev/null +++ b/fakelibc/stdarg.h @@ -0,0 +1,27 @@ +#ifndef _FAKELIBC_STDARG_H +#define _FAKELIBC_STDARG_H + +// TODO: This will basically not work except for the simplest printf-like cases + +typedef long long** va_list; + +#define _VA_CHECK() \ + if (__builtin_func_callconv != 101) {\ + printf("ERROR: Unpacking varargs currently only works with __classic_call (#101). Function %s uses convention %d instead.\n", __func__, __builtin_func_callconv);\ + } + +#define va_start(list,lastarg) \ + do {\ + _VA_CHECK();\ + list = &lastarg;\ + list++;\ + } while(0) + +#define va_arg(list,T) \ + (T)(list++) + +#define va_end(list) \ + do {list = (void*)0;} while(0) + +/* From ifndef at top of file: */ +#endif diff --git a/fakelibc/stdbool.h b/fakelibc/stdbool.h new file mode 100644 index 0000000..cb30969 --- /dev/null +++ b/fakelibc/stdbool.h @@ -0,0 +1,10 @@ +#ifndef _FAKELIBC_STDBOOL_H +#define _FAKELIBC_STDBOOL_H + +typedef int bool; + +#define true 1 +#define false 0 + +/* From ifndef at top of file: */ +#endif diff --git a/fakelibc/stddef.h b/fakelibc/stddef.h new file mode 100644 index 0000000..9b44bcd --- /dev/null +++ b/fakelibc/stddef.h @@ -0,0 +1,13 @@ +#ifndef _FAKELIBC_STDDEF_H +#define _FAKELIBC_STDDEF_H + +//#ifndef _FAKELIBC_STDLIB_H +typedef long size_t; +//#endif + +#ifndef NULL +#define NULL ((void*) 0) +#endif + +/* From ifndef at top of file: */ +#endif diff --git a/fakelibc/stdint.h b/fakelibc/stdint.h new file mode 100644 index 0000000..19a551c --- /dev/null +++ b/fakelibc/stdint.h @@ -0,0 +1,26 @@ +#ifndef _FAKELIBC_STDINT_H +#define _FAKELIBC_STDINT_H + +typedef char int8_t; +typedef short int16_t; +typedef int int32_t; +typedef long long int64_t; +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +typedef unsigned long long uint64_t; + +typedef int64_t intptr_t; +typedef uint64_t uintptr_t; + +typedef uint32_t uint_fast8_t; +typedef int32_t int_fast8_t; +typedef uint32_t uint_fast16_t; +typedef int32_t int_fast16_t; +typedef uint32_t uint_fast32_t; +typedef int32_t int_fast32_t; +typedef uint64_t uint_fast64_t; +typedef int64_t int_fast64_t; + +/* From ifndef at top of file: */ +#endif diff --git a/fakelibc/stdio.h b/fakelibc/stdio.h new file mode 100644 index 0000000..1b38211 --- /dev/null +++ b/fakelibc/stdio.h @@ -0,0 +1,58 @@ +#ifndef _FAKELIBC_STDIO_H +#define _FAKELIBC_STDIO_H + +struct FILE_internals {}; + +typedef struct FILE_internals FILE; + +#define EXIT_FAILURE 1 +#define EXIT_SUCCESS 0 + +#ifdef __MAC +extern FILE* __stderrp; +extern FILE* __stdinp; +extern FILE* __stdoutp; +#define stderr __stderrp +#define stdin __stdinp +#define stdout __stdoutp +#endif + +extern FILE* stderr; +extern FILE* stdin; +extern FILE* stdout; + +#define EOF ((int)-1) + +int printf(const char* fmt, ...); +int sprintf(char *buf, const char* fmt, ...); +int fprintf(FILE* f, const char* fmt, ...); + +FILE* fopen(const char* name, const char* mode); +int fclose(FILE* f); +int fflush(FILE* f); + +long fread(void* buffer, long size, long count, FILE* f); +long fwrite(void* buffer, long size, long count, FILE* f); + +char* fgets(char*, int, FILE*); + +int fputs(const char*, FILE*); +int fputc(int, FILE*); + +void perror(const char*); + +int putc(int c, FILE* f); + +int putchar(int c); + +int fseek(FILE* f, long offset, int wh); +#define SEEK_SET 0 +#define SEEK_CUR 1 +#define SEEK_END 2 + +long ftell(FILE* f); + +long getline(char** linevar, long *nvar, FILE* f); + +/* From ifndef at top of file: */ +#endif diff --git a/fakelibc/stdlib.h b/fakelibc/stdlib.h new file mode 100644 index 0000000..5a3a7b3 --- /dev/null +++ b/fakelibc/stdlib.h @@ -0,0 +1,26 @@ +#ifndef _FAKELIBC_STDLIB_H +#define _FAKELIBC_STDLIB_H + +//typedef long size_t; +#include + +void* malloc(long sz); +void* calloc(long n, long sz); +void* realloc(void* mem, long sz); +void free(void* mem); +char* getenv(const char* name); +void exit(int x); + +long strtol(const char* str, char**endvar, int base); +long long strtoll(const char* str, char**endvar, int base); +unsigned long strtoul(const char* str, char**endvar, int base); +unsigned long long strtoull(const char* str, char**endvar, int base); + +float strtof(const char* str, char**endvar); +double strtod(const char* str, char**endvar); + +double atof(const char* str); +int atoi(const char* str); + +/* From ifndef at top of file: */ +#endif diff --git a/fakelibc/string.h b/fakelibc/string.h new file mode 100644 index 0000000..57b357f --- /dev/null +++ b/fakelibc/string.h @@ -0,0 +1,47 @@ +#ifndef _FAKELIBC_STRING_H +#define _FAKELIBC_STRING_H + +#include + +//size_t strlen(const char* foo); +#define strlen fake_strlen +static size_t fake_strlen(const char* foo) { + if (foo == NULL) { + return 0; + } else { + size_t i = 0; + while (foo[i] != 0) i++; + return i; + } +} + +char *strchr(const char* str, int chr); +char *strrchr(const char* str, int chr); +char* strcat(char* str, const char* cat); +char* strcpy(char* buffer, const char* str); +char* strncpy(char* buffer, const char* str, size_t n); +const char* strpbrk(const char* str, const char* search); +int strcmp(const char* a, const char* b); +char* strdup(const char* str); +char* strndup(const char* str, size_t n); + +//void* memcpy(void* dst, const void* src, size_t nbytes); +#define memcpy fake_memcpy +static void* fake_memcpy(void* dst, const void* src, size_t nbytes) { + // TODO: This was only required because calling libc's version triggered errors, this probably isn't an issue now that more bugs have been fixed. + //printf("fake_memcpy(%lx, %lx, %ld)\n", dst, src, nbytes); + char* cdst = (char*) dst; + const char* csrc = (char*) src; + size_t i = 0; + for (i = 0; i < nbytes; i++) { + //printf("fake_memcpy %ld\n", i); + cdst[i] = csrc[i]; + } + //printf("fake_memcpy done!\n"); + return dst; +} + +void* memset(void* mem, int byt, size_t nbytes); + +/* From ifndef at top of file: */ +#endif diff --git a/fakelibc/sys/resource.h b/fakelibc/sys/resource.h new file mode 100644 index 0000000..e69de29 diff --git a/fakelibc/sys/stat.h b/fakelibc/sys/stat.h new file mode 100644 index 0000000..6cf1417 --- /dev/null +++ b/fakelibc/sys/stat.h @@ -0,0 +1,50 @@ +#ifndef _FAKELIBC_SYS_STAT_H +#define _FAKELIBC_SYS_STAT_H + +#include + +/* This is currently designed to match x86-64 definitions for Linux. They probably don't + * differ a lot across architectures (besides differences between 32-/64-bit builds) + * but the standard definitions on Linux are unusually cryptic, some of the typedefs + * involve 3 or more nested macros spread out across all sorts of includes, so using + * them as a reference feels a bit ridiculous. + */ + +#define st_mtime st_mtim.tv_sec +#define st_atime st_atim.tv_sec +#define st_ctime st_ctim.tv_sec + +typedef unsigned int ino_t; +typedef unsigned int mode_t; +typedef unsigned int uid_t; +typedef unsigned int gid_t; +typedef unsigned long dev_t; +typedef unsigned long nlink_t; + +struct stat { + dev_t st_dev; + ino_t st_ino; + + nlink_t st_nlink; + mode_t st_mode; + + uid_t st_uid; + gid_t st_gid; + int __padding_A; + + dev_t st_rdev; + long st_size; + long st_blocksize; + long st_blocks; + + struct timespec st_atim; + struct timespec st_mtim; + struct timespec st_ctim; + + long __reserved_A; + long __reserved_B; + long __reserved_C; +}; + +/* From ifndef at top of file: */ +#endif diff --git a/fakelibc/sys/time.h b/fakelibc/sys/time.h new file mode 100644 index 0000000..e69de29 diff --git a/fakelibc/sys/types.h b/fakelibc/sys/types.h new file mode 100644 index 0000000..e69de29 diff --git a/fakelibc/time.h b/fakelibc/time.h new file mode 100644 index 0000000..e5e355d --- /dev/null +++ b/fakelibc/time.h @@ -0,0 +1,29 @@ +#ifndef _FAKELIBC_TIME_H +#define _FAKELIBC_TIME_H + +typedef long time_t; + +char* ctime(const time_t* timevar); + +/* TODO: Check if modern systems add any more fields. */ +struct tm { + int tm_sec; + int tm_min; + int tm_hour; + int tm_mday; + int tm_mon; + int tm_year; + int tm_wday; + int tm_yday; + int tm_isdst; +}; + +struct timespec { + int tv_sec; + long tv_nsec; +}; + +struct tm* localtime(const time_t* timep); + +/* From ifndef at top of file: */ +#endif diff --git a/fakelibc/unistd.h b/fakelibc/unistd.h new file mode 100644 index 0000000..e69de29 diff --git a/fakelibc/utime.h b/fakelibc/utime.h new file mode 100644 index 0000000..e69de29 diff --git a/frontend.c b/frontend.c new file mode 100644 index 0000000..794c92d --- /dev/null +++ b/frontend.c @@ -0,0 +1,913 @@ +#include +#include +#include + +#ifdef _WIN32 +#include // for _spawnvp on Windows +#endif + +#define BRUTAL_MAX_FILES 1000 + +int backend_main(int argc, char** argv); + +int system(const char* cmd); +char* strdup(const char* str); + +void brutal_string_append(char** buffer, char* str) { + //fprintf(stderr, "appending '%s' to '%s'\n", str, buffer[0]); + int len1 = (buffer[0] == NULL) ? 0 : strlen(buffer[0]); + //fprintf(stderr, "len1=%d\n", len1); + int len2 = strlen(str); + //fprintf(stderr, "len2=%d\n", len2); + char* nbuf = calloc(len1 + len2 + 1, 1); + if (nbuf == NULL) { + fprintf(stderr, "ERROR: Out of memory!\n"); + exit(1); + } + int i; + for (i = 0; i < len1+len2; i++) { + if (i < len1) { + nbuf[i] = buffer[0][i]; + } else { + nbuf[i] = str[i-len1]; + } + } + if (buffer[0] != NULL) { + free(buffer[0]); + } + buffer[0] = nbuf; +} + +int brutal_args_count(char** argv) { + if (argv==NULL) { + return 0; + } + int i; + for (i = 0; argv[i] != NULL; i++) { + // Just count... + } + return i; +} + +void brutal_args_append(char*** buffer, char* str) { + int len = brutal_args_count(buffer[0]); + char** nbuf = calloc(len + 2, sizeof(void*)); + if (nbuf == NULL) { + fprintf(stderr, "ERROR: Out of memory!\n"); + exit(1); + } + int i; + for (i = 0; i < len+1; i++) { + if (i < len) { + nbuf[i] = buffer[0][i]; + } else { + nbuf[i] = str; + } + } + if (buffer[0] != NULL) { + free(buffer[0]); + } + buffer[0] = nbuf; +} + +void brutal_args_append_all(char*** buffer, char** moreargs) { + int i; + for (i = 0; moreargs[i] != NULL; i++) { + brutal_args_append(buffer, moreargs[i]); + } +} + +int brutal_needs_quotes(char* arg) { + int i; + for (i = 0; arg[i] != 0; i++) { + char c = arg[i]; + if (c == ' ') { + return 1; + } + } + return 0; +} + +int brutal_setenv(char* name, char* value, int replace, int echo, int fake) { + if (echo) { + fprintf(stderr, "SETENV: Using (%s) environment variable '%s' to '%s'\n", replace ? "replacing" : "if unset", name, value); + } + if (!fake) { + return setenv(name, value, replace); + } + return 0; +} + +int brutal_exec(char** argv, int echo, int fake) { + char* cmd = NULL; + int i; + for (i = 0; argv[i] != NULL; i++) { + int q = brutal_needs_quotes(argv[i]); + if (q) { + brutal_string_append(&cmd, i == 0 ? "\"" : " \""); + } else if (i != 0) { + brutal_string_append(&cmd, " "); + } + brutal_string_append(&cmd, argv[i]); + if (q) brutal_string_append(&cmd, "\""); + } + if (echo) { + fprintf(stderr, "%s command: '%s'\n", fake ? "NOT executing" : "executing", cmd); + } + return fake ? 0 : system(cmd); +} + +int brutal_startswith(const char* start, char* str) { + int i = 0; + while (start[i] != 0) { + if (str[i] != start[i]) { + return 0; + } + i++; + } + return 1; +} + +int brutal_endswith(char* str, const char* end) { + size_t len1 = strlen(str); + size_t len2 = strlen(end); + if (len1 < len2) { + return 0; + } + int i = (len1-len2); + int j = 0; + while (str[i] != 0) { + if (str[i] != end[j]) { + return 0; + } + i++; + j++; + } + return 1; +} + +typedef struct compilerjob compilerjob_t; + +struct compilerjob { + char* selfcmd; + char** inputs; + char** incdirs; + char** defines; + int numinputs; + int numincdirs; + int numdefines; + int skipcompiler; + int skiplinker; + int skipassembler; + int echocmd; + int fakecmd; + int keeptmp; + int useyasm; + int usefasm; + int usenasm; + int usezasm; + int useas; + int nasmsyntax; + int gc; + int thread; + int mm; + int riscv; + int linkstatic; + int usemold; + int use101; + char* output; + char* tmpdir; + char** tmpfiles; + char* modinitfile; // If not null, should be added as an input + char* modinitfunc; // Name of function in modinit + char* sym_prefix; +}; + +char* brutal_path_end(compilerjob_t* job, char* path) { + char* lastpart = path; + int i; + for (i = 0; path[i] != 0; i++) { + if (path[i] == '/' || path[i] == '\\') { + lastpart = path+i+1; + } + } + return lastpart; +} + +char* brutal_path_sanitise(compilerjob_t* job, char* path) { + path = strdup(brutal_path_end(job, path)); + int i = 0; + while (path[i] != 0) { + char c = path[i]; + if (((c >= 'a') && (c <= 'z')) || ((c >= 'A') && (c <= 'Z')) || ((c >= '0') && (c <= '9'))) { + // Keep it. + } else { + path[i] = '_'; + } + i++; + } + return path; +} + +/* I guess the most portable way of checking if a file "exists" is trying to open it for reading + * (If it exists but we can't open it for reading, then it's unlikely that we'll be able to accidentally overwrite it either!) + */ +int brutal_exists(char* fname) { + FILE* f = fopen(fname, "rb"); + if (f == NULL) { + return 0; + } else { + fclose(f); + return 1; + } +} + +char* brutal_tmp(compilerjob_t* job, char* namehint1, char* namehint2); +char* brutal_tmp(compilerjob_t* job, char* namehint1, char* namehint2) { + char* result = NULL; + namehint1 = brutal_path_end(job, namehint1); + namehint2 = brutal_path_end(job, namehint2); + brutal_string_append(&result, job->tmpdir); + brutal_string_append(&result, "/"); + if (!brutal_startswith("brutal-", namehint1)) { + brutal_string_append(&result, "brutal-"); + } + brutal_string_append(&result, namehint1); + brutal_string_append(&result, ".tmp"); + brutal_string_append(&result, namehint2); + + // If the file already exists, we'll print a notice that you've probably got temporary files hanging around and try a new name + if (brutal_exists(result)) { + char* newhint1 = NULL; + fprintf(stderr, "WARNING: File '%s' already exists, you probably have temporary files laying around. I'll try a new name just in case it's a user-modified file!.\n", result); + brutal_string_append(&newhint1, namehint1); + brutal_string_append(&newhint1, "2"); + return brutal_tmp(job, newhint1, namehint2); + } + + // Slight workaround while compiler bugs are being fixed, create a temporary copy of the tmpfiles pointer... + char** tmpfiles = job->tmpfiles; + + // Add the result to tmpfiles + brutal_args_append(&tmpfiles, result); + + // Other end of workaround... + job->tmpfiles = tmpfiles; + + // And we're done! + return result; +} + +int brutal_exec_cp(compilerjob_t* job, char* input, char* output) { + char** args = NULL; + brutal_args_append(&args, "cp"); + + brutal_args_append(&args, input); + + brutal_args_append(&args, output); + + return brutal_exec(args, job->echocmd, job->fakecmd); +} + +int brutal_exec_rm(compilerjob_t* job, char* fname) { + char** args = NULL; + brutal_args_append(&args, "rm"); + + brutal_args_append(&args, fname); + + return brutal_exec(args, job->echocmd, job->fakecmd); +} + +int brutal_exec_preprocessor(compilerjob_t* job, char* input, char* output, int isasm) { + char** args = NULL; + brutal_args_append(&args, job->selfcmd); + brutal_args_append(&args, "-P"); + + int i; + for (i = 0; i < job->numincdirs; i++) { + //char* tmp = NULL; + //brutal_string_append(&tmp, "-I"); + //brutal_string_append(&tmp, job->incdirs[i]); + brutal_args_append(&args, "-I"); + brutal_args_append(&args, job->incdirs[i]);//tmp); + } + for (i = 0; i < job->numdefines; i++) { + char* tmp = NULL; + brutal_string_append(&tmp, "-D"); + brutal_string_append(&tmp, job->defines[i]); + brutal_args_append(&args, tmp); + } + + if (isasm) { + char* tmp = NULL; + brutal_string_append(&tmp, "-D"); + brutal_string_append(&tmp, "__ASSEMBLER__"); + brutal_args_append(&args, tmp); + } + + brutal_args_append(&args, input); + + brutal_args_append(&args, output); + + return brutal_exec(args, job->echocmd, job->fakecmd); +} + +int brutal_exec_compiler(compilerjob_t* job, char* input, char* output, char* modname) { + char** args = NULL; + brutal_args_append(&args, job->selfcmd); + brutal_args_append(&args, "-B"); + + if (job->use101) { + brutal_args_append(&args, "--101"); + } + + if (modname != NULL) { + brutal_args_append(&args, "--mod"); + brutal_args_append(&args, modname); + } + + if (job->sym_prefix != NULL) { + brutal_args_append(&args, "--prefix"); + brutal_args_append(&args, job->sym_prefix); + } + + brutal_args_append(&args, "--input"); + brutal_args_append(&args, input); + + brutal_args_append(&args, "--output"); + brutal_args_append(&args, output); + + return brutal_exec(args, job->echocmd, job->fakecmd); +} + +int brutal_exec_assembler(compilerjob_t* job, char* input, char* output) { + char** args = NULL; + + if (job->useyasm) { + brutal_args_append(&args, "yasm"); + if (!job->nasmsyntax) { + brutal_args_append(&args, "-p"); + brutal_args_append(&args, "gnu"); + } + brutal_args_append(&args, "-f"); + brutal_args_append(&args, "elf64"); + } else if (job->usenasm) { + brutal_args_append(&args, "nasm"); + brutal_args_append(&args, "-f"); + brutal_args_append(&args, "elf64"); + } else if (job->usefasm) { + brutal_args_append(&args, "fasm"); + brutal_args_append(&args, "-m"); + brutal_args_append(&args, "500000"); // Set a memory limit a bit under 512mb + } else if (job->usezasm) { + brutal_args_append(&args, "zasm"); + brutal_args_append(&args, "--mode"); + brutal_args_append(&args, "rv64"); + } else if (job->useas) { + if (job->riscv) { +#ifdef __riscv + brutal_args_append(&args, "as"); +#else + brutal_args_append(&args, "riscv64-linux-gnu-as"); +#endif + brutal_args_append(&args, "--traditional-format"); + brutal_args_append(&args, "-fno-pic"); + brutal_args_append(&args, "-march=rv64imafd"); + //brutal_args_append(&args, "-march=rv64ifd"); // NOTE: These settings need to be fine-tuned... floating-point ABI is not currently matching the default for Ubuntu cross-compilation + brutal_args_append(&args, "-mabi=lp64d"); + //-march=rv64ifd -mabi=lp64d + } else { + brutal_args_append(&args, "as"); + brutal_args_append(&args, "--64"); + } + } else { + if (job->riscv) { +#ifdef __riscv + brutal_args_append(&args, "cc"); +#else + brutal_args_append(&args, "riscv64-linux-gnu-gcc"); +#endif + } else { + brutal_args_append(&args, "cc"); + } + brutal_args_append(&args, "-c"); + } + + brutal_args_append(&args, input); + + if (!job->usefasm) { + brutal_args_append(&args, "-o"); + } + brutal_args_append(&args, output); + + return brutal_exec(args, job->echocmd, job->fakecmd); +} + +int brutal_exec_linker(compilerjob_t* job, char** inputs, char* output) { + char** args = NULL; + + if (job->usemold) { + brutal_args_append(&args, "mold"); + brutal_args_append(&args, "-L/usr/lib/x86_64-linux-gnu"); + brutal_args_append(&args, "-L/usr/lib/gcc/x86_64-linux-gnu/11"); + brutal_args_append(&args, "-lc"); + brutal_args_append(&args, "-lgcc"); + } else if (job->riscv) { +#ifdef __riscv + brutal_args_append(&args, "cc"); +#else + brutal_args_append(&args, "riscv64-linux-gnu-gcc"); +#endif + } else { + brutal_args_append(&args, "cc"); + } + + if (job->linkstatic) { + brutal_args_append(&args, "--static"); + } + + if (job->thread) { + brutal_args_append(&args, "-pthread"); + } + + brutal_args_append_all(&args, inputs); + + if (job->gc) { + brutal_args_append(&args, "-lgc"); + } + if (job->mm) { + // TODO: It probably makes more sense to dynamically link SOME of this stuff (or, at least, to have more options) + // But in any case, it seemed to make sense to have a shortcut to link a multimedia-enabled program + brutal_args_append(&args, "-lSDL2"); + brutal_args_append(&args, "-lcairo"); + } + + brutal_args_append(&args, "-o"); + brutal_args_append(&args, output); + + return brutal_exec(args, job->echocmd, job->fakecmd); +} + +int brutal_mkmodinit(compilerjob_t* job, char** modnames) { + if (job->fakecmd) { + return 0; + } + + FILE* o = fopen(job->modinitfile, "w"); + if (o == NULL) { + return 1; + } + + int i; + for (i = 0; modnames[i] != NULL; i++) { + fprintf(o, "void __module__%s__init();\n", modnames[i]); + } + + fprintf(o, "void __oop_init_begin();\n"); + fprintf(o, "void __oop_init_end();\n"); + + fprintf(o, "\n"); + + fprintf(o, "void %s() {\n", job->modinitfunc); + + fprintf(o, " __oop_init_begin();\n"); + for (i = 0; modnames[i] != NULL; i++) { + fprintf(o, " __module__%s__init();\n", modnames[i]); + } + fprintf(o, " __oop_init_end();\n"); + + fprintf(o, "}\n"); + + fclose(o); + return 0; +} + +int brutal_run(compilerjob_t* job) { + char** tolink = NULL; + char** toinit = NULL; + int result = 0; + char* last = NULL; + + //fprintf(stderr, "Attempting to run...\n"); + + int i; + for (i = 0; i < job->numinputs; i++) { + char* input = job->inputs[i]; + //fprintf(stderr, "Doing input '%s'\n", input); + char* modname = NULL; + int isasm = 0; + int shouldprep = 1; + if (brutal_endswith(input, ".m") || brutal_endswith(input, ".M")) { + modname = brutal_path_sanitise(job, input); + fprintf(stderr, "NOTE: Using module name '%s'\n", modname); + brutal_args_append(&toinit, modname); + } else if (brutal_endswith(input, ".s")) { + isasm = 1; + shouldprep = 0; + } else if (brutal_endswith(input, ".S")) { + isasm = 1; + } + /* If this is the modinit file, we need to create it before trying to compile it! */ + if (job->modinitfile != NULL && !strcmp(input, job->modinitfile)) { + if (brutal_exists(job->modinitfile)) { + fprintf(stderr, "ERROR: Module init file already exists, will not overwrite existing file!\n"); + exit(1); + } + if (brutal_mkmodinit(job, toinit) != 0) { + fprintf(stderr, "ERROR: Failed to create module init file!\n"); + exit(1); + } + } + if (shouldprep) { + char* tmp = brutal_tmp(job, input, isasm ? ".preprocessed.s" : ".preprocessed.c"); + result = brutal_exec_preprocessor(job, input, tmp, isasm); + if (result != 0) goto cleanup; + input = tmp; + } + if (!isasm && !job->skipcompiler) { + char* tmp = brutal_tmp(job, input, ".S"); + result = brutal_exec_compiler(job, input, tmp, modname); + if (result != 0) goto cleanup; + input = tmp; + } + if (!job->skipassembler) { + char* tmp = brutal_tmp(job, input, ".o"); + result = brutal_exec_assembler(job, input, tmp); + if (result != 0) goto cleanup; + input = tmp; + } + brutal_args_append(&tolink, input); + last = input; + } + //fprintf(stderr, "Finished first part...\n"); + + if (job->skiplinker) { + result = brutal_exec_cp(job, last, job->output); + if (result != 0) goto cleanup; + } else { + result = brutal_exec_linker(job, tolink, job->output); + if (result != 0) goto cleanup; + } + + cleanup: + + if (job->tmpfiles != NULL && !job->keeptmp) { + for (i = 0; job->tmpfiles[i] != NULL; i++) { + result = brutal_exec_rm(job, job->tmpfiles[i]); + if (result != 0) { + fprintf(stderr, "WARNING: Failed to remove temporary file '%s'\n", job->tmpfiles[i]); + } + } + } + + return result; +} + +int brutal_usage(int argc, char** argv, int argi, char* problem) { + FILE* o = (problem == NULL) ? stdout : stderr; + const char* n = argv[0]; + + fprintf(o, "USAGE:\n"); + fprintf(o, " %s [options] input1.c [input2.c ...]\n", n); + + fprintf(o, "\n"); + + fprintf(o, "COMMON OPTIONS:\n"); + fprintf(o, " -I Add a preprocessor include directory (requires an argument)\n"); + fprintf(o, " -D Add a preprocessor definition (requires an argument)\n"); + fprintf(o, " -o Set the output filename (requires an argument)\n"); + fprintf(o, " -c Skip linker (only produce assembled file, no executable)\n"); + fprintf(o, " -S Skip assembler (only produce assembly code, no assembled file)\n"); + fprintf(o, " -E Skip compiler (only produce preprocessed code, no compiled code)\n"); + + fprintf(o, "\n"); + + fprintf(o, "TARGET/TOOLCHAIN OPTIONS:\n"); + fprintf(o, " --static Build with/for static linking\n"); + fprintf(o, " --dynamic Build with/for dynamic linking\n"); + fprintf(o, " --use-fasm Use FASM assembler (default will pass assembly files to the platform's 'cc')\n"); + fprintf(o, " --use-nasm Use NASM assembler (default will pass assembly files to the platform's 'cc')\n"); + fprintf(o, " --use-yasm Use YASM assembler (default will pass assembly files to the platform's 'cc')\n"); + fprintf(o, " --use-zasm Use ZASM assembler (default will pass assembly files to the platform's 'cc')\n"); + fprintf(o, " --use-as Use the 'as' assembler directly (this will usually be the same backend used in the default mode)\n"); + fprintf(o, " --nasm-syntax Use NASM syntax in assembly output (this can be used with --use-yasm, otherwise GNU syntax will be default)\n"); + fprintf(o, " --RV64 Use RISC-V target (this assumes a suitable GNU toolchain for assembly & linkage)\n"); + fprintf(o, " --AMD64 Use AMD64/x86-64 target (this assumes a suitable GNU toolchain for assembly & linkage)\n"); + + fprintf(o, "\n"); + + fprintf(o, "SPECIAL CODE GENERATION OPTIONS:\n"); + fprintf(o, " --prefix Add a prefix to standard C symbols (requires an argument)\n"); + fprintf(o, " --101 Use __classic_call calling convention by default (only for 1337 h4x0r5)\n"); + + fprintf(o, "\n"); + + fprintf(o, "BACKEND INVOCATION OPTIONS:\n"); + fprintf(o, " --echo Echo command invocations & environment settings to the shell\n"); + fprintf(o, " --fake Fake command invocations (can be used in conjunction with --echo to just show which commands would be used to compile something)\n"); + fprintf(o, " --keeptmp Keep temporary files\n"); + fprintf(o, " --compiler Invoke the internal compiler directly (must be first option, passes all options to backend)\n"); + fprintf(o, " --preprocessor Invoke the internal preprocessor directly (must be first option, passes all options to preprocessor)\n"); + + fprintf(o, "\n"); + + fprintf(o, "EXTENSION OPTIONS:\n"); + fprintf(o, " --gc Enable garbage collection\n"); + fprintf(o, " --thread Enable multithreading\n"); + fprintf(o, " (aliases: -pthread, --pthread)\n"); + fprintf(o, " --mm Enable multimedia (requires SDL2, cairo libraries)\n"); + + fprintf(o, "\n"); + + fprintf(o, "TOOLCHAIN INFORMATION:\n"); + fprintf(o, " --usage Shows usage information\n"); + fprintf(o, " (aliases: --help, -h, -u, -H, -U)\n"); + fprintf(o, " --version Shows version information\n"); + + fprintf(o, "\n"); + + if (problem != NULL) { + fprintf(o, "ERROR:\n"); + fprintf(o, " Around arg #%d: %s\n", argi, problem); + } + + return (problem == NULL) ? 0 : 1; +} + +void brutal_xdefine(compilerjob_t* job, const char* def) { + if (job->numdefines < BRUTAL_MAX_FILES) { + job->defines[job->numdefines] = def; + job->numdefines++; + } else { + fprintf(stderr, "ERROR: Too many defines\n"); + exit(1); + } +} + +int main(int argc, char** argv, char** envp) { + + compilerjob_t* job; + + /* Shortcuts to invoke the backend features directly. */ + if (argc > 1) { + if (!strcmp(argv[1], "-B") || !strcmp(argv[1], "--compiler") || !strcmp(argv[1], "--backend")) { + argv[1] = "-B"; + return backend_main(argc, argv); + } else if (!strcmp(argv[1], "-P") || !strcmp(argv[1], "--preprocessor")) { + argv[1] = "-P"; + return backend_main(argc, argv); + #ifdef TOOL_MK + } else if (!strcmp(argv[1], "-M") || !strcmp(argv[1], "--make")) { + argv[1] = "-M"; + mk_main(argc, argv, envp); + return 0; + #endif + } + } + + fprintf(stderr, "SecureLang C Compiler Frontend\n"); + + job = calloc(1, sizeof(compilerjob_t)); + if (job == NULL) { + fprintf(stderr, "ERROR: Out of memory!\n"); + return 1; + } + job->inputs = calloc(BRUTAL_MAX_FILES, sizeof(void*)); + job->incdirs = calloc(BRUTAL_MAX_FILES, sizeof(void*)); + job->defines = calloc(BRUTAL_MAX_FILES, sizeof(void*)); + if (job->inputs == NULL || job->incdirs == NULL || job->defines == NULL) { + fprintf(stderr, "ERROR: Out of memory!\n"); + return 1; + } + job->selfcmd = strdup(argv[0]); + job->linkstatic = 0; + job->use101 = 0; + job->sym_prefix = NULL; +#ifdef __riscv + job->riscv = 1; +#else + job->riscv = 0; +#endif + int specifiedlinkage = 0; + int needsmodinit = 0; + + int argi = 1; + while (argi < argc) { + char* a = argv[argi]; + //fprintf(stderr, "Processing argument '%s'\n", a); + if (brutal_startswith("-o", a)) { + if (strlen(a) > 2) { + job->output = strdup(a+2); + } else if (argi+1 < argc) { + argi++; + job->output = strdup(argv[argi]); + } else { + return brutal_usage(argc, argv, argi, "Expected output filename following -o"); + } + } else if (brutal_startswith("--prefix", a)) { + if (argi+1 < argc) { + argi++; + job->sym_prefix = strdup(argv[argi]); + } else { + return brutal_usage(argc, argv, argi, "Expected symbol prefix following --prefix"); + } + } else if (brutal_startswith("-I", a)) { + char* dir = NULL; + if (strlen(a) > 2) { + dir = strdup(a+2); + } else if (argi+1 < argc) { + argi++; + dir = strdup(argv[argi]); + } else { + return brutal_usage(argc, argv, argi, "Expected directory name following -I"); + } + //fprintf(stderr, "Got include directory '%s'\n", dir); + if (job->numincdirs < BRUTAL_MAX_FILES) { + job->incdirs[job->numincdirs] = dir; + job->numincdirs++; + } else { + return brutal_usage(argc, argv, argi, "Too many include directories"); + } + } else if (brutal_startswith("-D", a)) { + char* def = NULL; + if (strlen(a) > 2) { + def = strdup(a+2); + } else if (argi+1 < argc) { + argi++; + def = strdup(argv[argi]); + } else { + return brutal_usage(argc, argv, argi, "Expected preprocessor definition following -D"); + } + if (job->numdefines < BRUTAL_MAX_FILES) { + job->defines[job->numdefines] = def; + job->numdefines++; + } else { + return brutal_usage(argc, argv, argi, "Too many defines"); + } + } else if (!strcmp("--static", a)) { + job->linkstatic = 1; + specifiedlinkage = 1; + } else if (!strcmp("--dynamic", a)) { + job->linkstatic = 0; + specifiedlinkage = 1; + } else if (brutal_startswith("--RV", a) || brutal_startswith("--rv", a)) { + fprintf(stderr, "WARNING: Current RISC-V backend is being tested for 64-bit with minimal floating-point support\n"); + job->riscv = 1; + } else if (brutal_startswith("--AMD", a) || brutal_startswith("--amd", a)) { + job->riscv = 0; + } else if (!strcmp("--use-as", a)) { + job->useas = 1; + } else if (!strcmp("--use-yasm", a)) { + job->useyasm = 1; + } else if (!strcmp("--use-fasm", a)) { + job->usefasm = 1; + } else if (!strcmp("--use-nasm", a)) { + job->usenasm = 1; + job->nasmsyntax = 1; + } else if (!strcmp("--use-zasm", a)) { + job->usezasm = 1; + } else if (!strcmp("--nasm-syntax", a)) { + job->nasmsyntax = 1; + } else if (!strcmp("--use-mold", a)) { + job->usemold = 1; + } else if (!strcmp("--version", a)) { + fprintf(stderr, "Too early to tell.\n"); + return 0; + } else if (!strcmp("--usage", a) || !strcmp("--help", a) || !strcmp("-h", a) || !strcmp("-u", a) || !strcmp("-H", a) || !strcmp("-U", a)) { + return brutal_usage(argc, argv, argi, NULL); + } else if (!strcmp("--version", a)) { + fprintf(stderr, "Too early to tell.\n"); + return 0; + } else if (!strcmp("-E", a)) { + job->skipcompiler = 1; + job->skipassembler = 1; + job->skiplinker = 1; + } else if (!strcmp("-S", a)) { + job->skipassembler = 1; + job->skiplinker = 1; + } else if (!strcmp("-c", a)) { + job->skiplinker = 1; + } else if (!strcmp("--101", a)) { + job->use101 = 1; + } else if (!strcmp("--echo", a)) { + job->echocmd = 1; + } else if (!strcmp("--fake", a)) { + job->fakecmd = 1; + } else if (!strcmp("--keeptmp", a)) { + job->keeptmp = 1; + } else if (!strcmp("--gc", a)) { + job->gc = 1; + } else if (!strcmp("--thread", a) || !strcmp("-pthread", a) || !strcmp("--pthread", a)) { + job->thread = 1; + } else if (!strcmp("--mm", a)) { + job->mm = 1; + } else if (brutal_startswith("-", a)) { + return brutal_usage(argc, argv, argi, "Bad option"); + } else { + if (job->numinputs < BRUTAL_MAX_FILES) { + char* cp = strdup(a); + if (brutal_endswith(cp, ".m") || brutal_endswith(cp, ".M")) { + needsmodinit = 1; + } + //fprintf(stderr, "Got strings '%s' '%s'\n", a, cp); + job->inputs[job->numinputs] = cp; + job->numinputs = job->numinputs + 1; + } else { + return brutal_usage(argc, argv, argi, "Too many input files"); + } + } + argi++; + } + //fprintf(stderr, "Finished processing commands...\n"); + + if (needsmodinit && (job->skipassembler || job->skiplinker)) { + needsmodinit = false; + } + + if (job->numinputs < 1) { + return brutal_usage(argc, argv, argi, "Nothing to do (expected input file)"); + } else { + //fprintf(stderr, "Got %d inputs\n", job->numinputs); + } + + if (job->output == NULL) { + if (job->skipcompiler) { + job->output = "./a.out.C"; + } else if (job->skipassembler) { + job->output = "./a.out.S"; + } else if (job->skiplinker) { + job->output = "./a.out.o"; + } else { + job->output = "./a.out"; + } + } + + if (job->tmpdir == NULL) { + job->tmpdir = "."; + } + + if (specifiedlinkage) { + if ((job->riscv || job->useyasm || job->usefasm || job->usezasm) && !job->linkstatic) { + fprintf(stderr, "WARNING: Dynamic linking using RISC-V or FASM/YASM targets is work-in-progress.\n"); + } + } else { + if ((job->riscv || job->useyasm || job->usefasm)) { + fprintf(stderr, "NOTE: Using static linking as default on RISC-V or FASM/YASM targets.\n"); + job->linkstatic = 1; + } + } + + /* Set any environment variables for current backend. + * TODO: Evenutally move the more-stable options into regular command-line arguments. + */ + if (job->riscv) { + brutal_setenv("CCB_FAMILY", "risc-v", 1, job->echocmd, job->fakecmd); + } + + if (job->usefasm) { + brutal_setenv("CCB_ASMFMT", "fasm", 1, job->echocmd, job->fakecmd); + } + + if (job->nasmsyntax) { + brutal_setenv("CCB_ASMFMT", "nasm", 1, job->echocmd, job->fakecmd); + } + + if (needsmodinit) { + if (job->modinitfile == NULL) { + job->modinitfile = brutal_tmp(job, "modinit", ".c"); + } + if (job->modinitfunc == NULL) { + job->modinitfunc = "__modinit"; + } + if (job->numinputs < BRUTAL_MAX_FILES) { + job->inputs[job->numinputs] = job->modinitfile; + job->numinputs = job->numinputs + 1; + } else { + return brutal_usage(argc, argv, argi, "Too many input files"); + } + } + + //fprintf(stderr, "Ready to run...\n"); + + brutal_xdefine(job, "__BRUTAL"); + brutal_xdefine(job, "_ZCC"); + if (job->gc) { + brutal_xdefine(job, "__BRUTAL_FEATURE_GC"); + } + if (job->mm) { + brutal_xdefine(job, "__BRUTAL_FEATURE_MM"); + } + if (job->thread) { + brutal_xdefine(job, "__BRUTAL_FEATURE_THREAD"); + } + if (job->riscv) { + brutal_xdefine(job, "__BRUTAL_CPU_RV64"); + brutal_xdefine(job, "__riscv"); + } else { + brutal_xdefine(job, "__BRUTAL_CPU_X64"); + brutal_xdefine(job, "__x86_64__"); + } + brutal_xdefine(job, "__BRUTAL_OS_LINUX"); // TODO: Better/more target flags + + return brutal_run(job); +}