/* A Simple memory manager. PUBLIC DOMAIN, NO COPYRIGHT, NO WARRANTY. * Peace, love & Buddhism for all. -Zak Fenton, MMXX. * (2025 update) This is now a real malloc, you may also use the license in my libc. -Zak. * FEATURES: * Minimal, portable and embeddable implementations of "malloc", "free", "calloc" and "realloc" functions. * Should be threadsafe if a locking function is provided before initialisation (the init function MUST be called * before attempting to allocate any memory! But only a "bigchunk" function is required for the initialisation to * work.) Fine-grained locking might be added in the future after trying some multithreaded tests. * Almost completely standalone, no standard library functions are required and the only headers required are "stdint" and * "stdbool" (which are both very easy to implement in the rare cases where standard versions don't work). * Compaction is provided and works separately to the regular malloc/free control flows. The intention is for malloc/free * to work fairly quickly and not bother sorting or merging lists, then an idle process or similar can regularly scan and * compact the heap. (Returning unused memory to the environment hasn't been implemented yet, but should basically consist * of a quick check-and-trim process after running the compaction function.) * Provides a fairly robust way of getting free/used statistics (this is designed for future extension so only minimal * changes are required to support new or system-specific metrics). * Works in both 32-bit and 64-bit builds (should also adapt to smaller or larger word sizes, should use appropriate types * and sizeof instead of making assumptions but rounding values etc. may need to be adapted for more obscure cases. * Will attempt to grow the heap if it runs out of memory (and should fail gracefully if it can't). * Will attempt to release whatever memory it can spare if you call elmm_friendlycompact. * NOT YET IMPLEMENTED: * Only tested under very simple and fake conditions (see testmain.c), TODO: Proper stress tests (might be easy to hook up * something like Lua which can take a memory manager callback and then just run some large Lua program, otherwise some * kind of benchmark program might be a good basis for a test). * Some debugging code has been left in for now (search "printf" to remove the debug lines, but some may be useful to * indicate TODOs or errors etc. A separate "warning" callback might be added in the future for debugging memory issues.) * Not very fast and may not have enough safeguards for some use cases, but hopefully will be enough for a starting point. * Locking should probably just be determined with #defines, the function pointer style MAY be helpful for debugging though. */ #ifndef ELMM_H #define ELMM_H #include #include #define ELMM_VERSION 0x0100 /**/ #define ELMM_STATIC static #define ELMM_INLINE static inline /* Internally, a smallchunk header is used for each allocated and unallocated piece (besides the header). */ #define ELMM_SMALLCHUNK_FREE 1234 #define ELMM_SMALLCHUNK_ALLOCATED 4321 typedef struct elmm_smallchunk elmm_smallchunk_t; struct elmm_smallchunk { uintptr_t check1; /* Either ELMM_SMALLCHUNK_FREE or ELMM_SMALLCHUNK_ALLOCATED.*/ uintptr_t size; elmm_smallchunk_t* next; uintptr_t check2; /* Either ELMM_SMALLCHUNK_FREE or ELMM_SMALLCHUNK_ALLOCATED.*/ }; /* Internal elements of a bigchunk*/ typedef struct elmm_bigchunk_internals elmm_bigchunk_internals_t; typedef struct elmm_bigchunk elmm_bigchunk_t; struct elmm_bigchunk_internals { uintptr_t allocatedTop; elmm_bigchunk_t* nextChunk; elmm_bigchunk_t* prevChunk; elmm_smallchunk_t* firstFree; elmm_smallchunk_t* firstAllocated; }; struct elmm_bigchunk { void* startAddress; uintptr_t headerSize; uintptr_t totalSize; uintptr_t maximumSize; const char* errorString; elmm_bigchunk_internals_t internals; }; /* The bigchunk allocator takes a size (or zero) and an "old" bigchunk address (or NULL). * If the oldchunk is NULL and a chunk with the given size can be allocated, the function should * allocate a chunk (of at least that size) and fill in a elmm_bigchunk_t structure at the start * of the chunk (the structure's address must match the start address). In this case, a NULL * should be returned if the chunk can't be allocated. * * Unused fields of the bigchunk structure * should be cleared to zero, but the rest of the memory doesn't necessarily need to be cleared * (it probably should be anyway for security reasons, but the allocator will clear it itself * anyway). The total size of a bigchunk is expected to always include the header (the header is * part of the chunk, and only needs to be separated by it's own headerSize field). * * If the oldchunk is non-null and the size is zero, the function should deallocate that chunk * entirely (or set it to it's minimum size, depending on implementation), and return NULL (if * there is an error, it should return the structure and set it's error string). * * Before returning a bigchunk, the function can set the maximumSize field to a value other * than zero, in which case the higher-level allocater may call the function again with * the same chunk and a different size value to attempt to resize the chunk. The allocator * will generally try to use memory from an existing bigchunk before attempting to get a new * one, which means an implementation of the bigchunk function can just work with a single * resizable bigchunk (as in the sbrk implementation). * * Between calls to the bigchunk function, the allocator may change, resize or reconstruct the * bigchunk header, but subsequent calls will always use the same header address and sensible values * (it must always be located at the very start of the chunk and the size fields should be unchanged * between calls, except for the headerSize which may be increased by the caller to account for it's own * chunk headers). */ typedef elmm_bigchunk_t* (*elmm_bigchunk_function_t)(uintptr_t size, elmm_bigchunk_t* oldchunk, void* udata); /* Internally, the memory manager works by allocating "big chunks" from somewhere else and then allocating the * "small chunks" requested by the user within that memory. On Unix-like systems and on embedded systems using * only one heap at a time, it may be more convenient to allocate from a single memory bank which is just * increased or decreased in size as required. The system call usually used to achieve this is called "sbrk", * so a wrapper around our usual "big chunk" allocation scheme which can be plugged directly into sbrk (by * ignoring the udata argument). */ /* The error code returned by sbrk should be "NULL minus one", but some casts may be required to get the value * right without compiler errors. */ #define ELMM_SBRK_ERROR ((void*)(((char*)(((void*)NULL))) - 1)) typedef struct elmm_sbrk_data elmm_sbrk_data_t; typedef void* (*elmm_sbrk_function_t)(intptr_t incr, void* udata); struct elmm_sbrk_data { elmm_sbrk_function_t func; void* udata; /* This can either be set to the maximum heap size, or to zero (which will imply a default of 1GB). * This should generally be set as high as possible, but can be used as an easy way to set an effective * limit on memory usage. */ uintptr_t max; /* This should initially be set to NULL, but is otherwise used to store the single resizable chunk * which represents the resizable heap. */ elmm_bigchunk_t* onlyChunk; }; /* This function can be used as the bigchunk function if you need it to work on top of a "sbrk"-like API. * A elmm_sbrk_data_t should be given as the userdata, and it holds a pointer to an sbrk-like function. */ ELMM_STATIC elmm_bigchunk_t* elmm_bigchunk_sbrk(uintptr_t size, elmm_bigchunk_t* oldchunk, void* udata) { elmm_sbrk_data_t* data = (elmm_sbrk_data_t*)udata; elmm_sbrk_function_t sbrkf = data->func; //printf("elmm_bigchunk_sbrk(%d,%d,%d)\n", size, oldchunk, udata); if (size > 0 && oldchunk == NULL) { if (data->onlyChunk != NULL) { return NULL; } data->onlyChunk = (elmm_bigchunk_t*)sbrkf(size, data->udata); if (data->onlyChunk == ELMM_SBRK_ERROR) { return NULL; } data->onlyChunk->startAddress = (void*)(data->onlyChunk); // Header MUST be at the start address. data->onlyChunk->headerSize = sizeof(elmm_bigchunk_t); data->onlyChunk->totalSize = size; data->onlyChunk->maximumSize = (data->max == 0) ? 1024 * 1024 * 1024 : data->max; return data->onlyChunk; } else if (size > 0 && oldchunk != NULL) { //printf("Attempting to resize...\n"); if (data->onlyChunk != oldchunk) { return NULL; } intptr_t diff = size - data->onlyChunk->totalSize; void* sbrkresult = sbrkf(diff, data->udata); // TODO: Should probably check that the sbrk function returned a pointer exactly where we expected if (sbrkresult == ELMM_SBRK_ERROR) { return NULL; } data->onlyChunk->totalSize = size; return data->onlyChunk; } else if (size == 0 && oldchunk != NULL) { //printf("Attempting to deallocate...\n"); if (data->onlyChunk != oldchunk) { return NULL; } //TODO... return NULL; } else { //printf("TODO!!!\n"); return NULL; } } /* A locking function can be provided to the memory manager so that it can be used in multithreaded * environments. If provided, the locking function should return true on success or false on error, * and needs to obey a few pretty universal commands that should be easy to implement for any * multithreaded environment. Use of a lock function ensures that calls to the bigchunk function * would not be made simultaneously on different threads, but other locks may be used internally * to maintain structures. * * The locking function is expected to either be very straightforward (using the variable address * to do some platform-specific atomic check-and-set operation) or to allocate/delete it's own * lock structures (storing the address of the corresponding structure in each lockVariable). */ #define ELMM_LOCK_NEW 1 #define ELMM_LOCK_DELETE 2 #define ELMM_LOCK_TRYLOCK 3 #define ELMM_LOCK_WAITLOCK 4 #define ELMM_LOCK_UNLOCK 5 #define ELMM_LOCK_GETTHREAD 6 typedef bool (*elmm_lock_function_t)(int command, void** lockVarible, void* udata); /* This function is used internally as the lock function if no other one is provided. * (The current implementation doesn't perform any checks, it just sets the lock to * NULL and reports success. Future versions might at least attempt to make sure * the commands are issued in the correct order.) */ ELMM_STATIC bool elmm_nolock(int command, void** lockVariable, void* udata) { *lockVariable = NULL; return true; } typedef struct elmm elmm_t; struct elmm { /* Some configuration options come first. These can be set to zero before initialisation, * but shouldn't be modified by the caller after initialising the memory mananger. */ uintptr_t bigchunkMinimum; uintptr_t bigchunkGranularity; /* Callback functions for the heap management and locking come next. A bigchunk function * is mandatory, but the lock function is optional (if it's NULL at initialisation, * it will be set to a locking function which has no effect or only works in single-threaded * implementations). */ elmm_bigchunk_function_t bigchunkFunction; void* bigchunkData; elmm_lock_function_t lockFunction; void* lockData; /* Internal pointers are stored last. */ void* mainLock; elmm_bigchunk_t* firstChunk; bool initialised; }; #define ELMM_STAT_VERSION 0 #define ELMM_STAT_ALLOCATED 1 #define ELMM_STAT_FREE 2 #define ELMM_STAT_TOTAL 3 #define ELMM_STAT_OVERHEADS 4 #define ELMM_STATTOP 5 /* Called internally by elmm_innerstat to add up the "size" elements of each smallchunk in the given list. */ ELMM_INLINE uintptr_t elmm_innerstatpart(elmm_t* mm, elmm_smallchunk_t* listHead) { uintptr_t result = 0; while (listHead != NULL) { result += listHead->size; listHead = listHead->next; } return result; } /* This function is only useful for getting statistics WHILE THE MEMORY MANAGER IS LOCKED OR NOT BEING USED! * The elmm_stat function is designed for more regular use (it will lock the memory manager and collect * all of the statistics consistently). */ ELMM_STATIC uintptr_t elmm_innerstat(elmm_t* mm, uintptr_t statnum) { elmm_bigchunk_t* chunk = mm->firstChunk; uintptr_t result = 0; switch (statnum) { case ELMM_STAT_VERSION: return ELMM_VERSION; case ELMM_STAT_ALLOCATED: while (chunk != NULL) { result += elmm_innerstatpart(mm, chunk->internals.firstAllocated); chunk = chunk->internals.nextChunk; } break; case ELMM_STAT_FREE: while (chunk != NULL) { result += elmm_innerstatpart(mm, chunk->internals.firstFree); chunk = chunk->internals.nextChunk; } break; case ELMM_STAT_TOTAL: while (chunk != NULL) { result += chunk->totalSize; chunk = chunk->internals.nextChunk; } break; case ELMM_STAT_OVERHEADS: return elmm_innerstat(mm, ELMM_STAT_TOTAL) - (elmm_innerstat(mm, ELMM_STAT_ALLOCATED) + elmm_innerstat(mm, ELMM_STAT_FREE)); default: return 0 - 1; } return result; } ELMM_INLINE elmm_smallchunk_t* elmm_getheader(elmm_t* mm, void* allocatedMemory) { if (allocatedMemory == NULL) { return NULL; } else { return ((elmm_smallchunk_t*)allocatedMemory) - 1; } } ELMM_INLINE uintptr_t elmm_sizeof(elmm_t* mm, void* allocatedMemory) { if (allocatedMemory == NULL) { return 0; } else { return elmm_getheader(mm, allocatedMemory)->size; } } ELMM_STATIC elmm_bigchunk_t* elmm_allocinner(elmm_t* mm, uintptr_t minsize) { /* Header size plus the size of some number of smallchunk structures is added to the minimum size * to account for any overheads which might be required to allocate (at least) a structure of * the given size size. */ minsize += sizeof(elmm_bigchunk_t) + (sizeof(elmm_smallchunk_t) * 10); // This should more than cover the minimum overheads if (minsize < mm->bigchunkMinimum) { minsize = mm->bigchunkMinimum; } while ((minsize % mm->bigchunkGranularity) != 0) { minsize++; } elmm_bigchunk_function_t bigchf = mm->bigchunkFunction; elmm_bigchunk_t* result = bigchf(minsize, NULL, mm->bigchunkData); if (result == NULL) { printf("BIGCHUNK FUNCTION GAVE US NULL!\n"); return NULL; } if (result->headerSize > sizeof(elmm_bigchunk_t)) { return NULL; } if (result->startAddress != (void*)result) { return NULL; } result->internals.nextChunk = NULL; result->internals.prevChunk = NULL; /* When allocated by the bigchunk function, the header size only needs to represent the size of the header * known to (and cleared or filled in by) the bigchunk function. It's extended here to ensure that it fits * all of our interna fields (which could be ignored by the bigchunk implementation) and also aligns to a * reasonable boundary for the purposes of allocating within it. */ result->headerSize = sizeof(elmm_bigchunk_t); while ((result->headerSize % 16) != 0) { result->headerSize++; } uint8_t* bytes = (uint8_t*)(result->startAddress); result->internals.firstAllocated = NULL; result->internals.firstFree = (elmm_smallchunk_t*)(bytes + result->headerSize); result->internals.firstFree->check1 = ELMM_SMALLCHUNK_FREE; result->internals.firstFree->size = result->totalSize - (result->headerSize + sizeof(elmm_smallchunk_t)); result->internals.firstFree->next = NULL; result->internals.firstFree->check2 = ELMM_SMALLCHUNK_FREE; return result; } bool elmm_dolock(elmm_t* mm, int command, void** lockVariable, void* udata) { elmm_lock_function_t lockf = mm->lockFunction; return lockf(command, lockVariable, udata); } /* Must be called before any other functions to initialise a memory manager. * The memory manager structure itself is provided by the caller, and at a minimum it should have it's * bigchunkFunction set to an appropriate value (all unused fields should be cleared to zero/NULL before * initialisation). * * NOTE: The reason the init function needs to be called explicitly (rather than automatically e.g. in the * first call to "elmm_malloc") is just because of edge-cases involving multithreaded programs: If the * memory manager isn't used before creating a second thread, then calling elmm_malloc at the same time from any * two threads could lead to both threads trying to initialise the structure at the same time (which is * critical because the initialisation function needs to initialise any locks which would normally safely * synchronise multithreaded access). This shouldn't be an issue in many cases, so in a wrapper function * you could just check the "initialised" field and initialise whenever necessary (but there would probably * be a better place to put the initialisation call in most cases anyway). */ ELMM_STATIC bool elmm_init(elmm_t* mm) { if (mm == NULL) { return false; } if (mm->initialised) { return false; } if (mm->bigchunkFunction == NULL) { return false; } if (mm->bigchunkMinimum == 0) { mm->bigchunkMinimum = 1024; } if (mm->bigchunkGranularity == 0) { mm->bigchunkGranularity = 1024; } if (mm->lockFunction == NULL) { mm->lockFunction = &elmm_nolock; mm->lockData = NULL; } if (!elmm_dolock(mm, ELMM_LOCK_NEW, &mm->mainLock, mm->lockData)) { return false; } /* The lock is then obtained to ensure that the structure is locked until * AFTER it's set to initialised. */ if (!elmm_dolock(mm, ELMM_LOCK_WAITLOCK, &mm->mainLock, mm->lockData)) { return false; } mm->firstChunk = elmm_allocinner(mm, mm->bigchunkMinimum); if (mm->firstChunk == NULL) { //printf("ALLOC FAILED\n"); return false; } mm->initialised = true; if (!elmm_dolock(mm, ELMM_LOCK_UNLOCK, &mm->mainLock, mm->lockData)) { return false; } return true; } ELMM_INLINE bool elmm_checkinit(elmm_t* mm) { if (mm != NULL && mm->initialised) { return true; } else { return false; /* Initially the plan was to automatically call elmm_init(mm), but then I realised this wouldn't be threadsafe. * So now each memory manager structure needs to be initialised explicitly before using it (and functions should * just fail otherwise). The initialisation function (and, if called, the cleanup function) should only be invoked * from a single thread (but if a lock function is provided, normal memory management functions can be called from * any thread once it's initialised). */ } } /* The inverse of elmm_init. Should be called when the heap is completely finished to deallocate any remaining chunks. In * practice, if only one heap is used for the entire duration of a program then a program doesn't really need to clean it up * (all of the program's memory would normally be reclaimed when the program ends anyway), but if multiple heaps are used * it may become necessary to deallocate some of them individually. */ ELMM_INLINE bool elmm_cleanup(elmm_t* mm) { if (mm == NULL || mm->bigchunkFunction == NULL || mm->lockFunction == NULL) { return false; } elmm_dolock(mm, ELMM_LOCK_WAITLOCK, &mm->mainLock, mm->lockData); elmm_bigchunk_t* chunk = mm->firstChunk; mm->firstChunk = NULL; elmm_bigchunk_function_t bigchf = mm->bigchunkFunction; while (chunk != NULL) { elmm_bigchunk_t* deadChunk = chunk; chunk = deadChunk->internals.nextChunk; bigchf(0, deadChunk, mm->bigchunkData); } elmm_dolock(mm, ELMM_LOCK_UNLOCK, &mm->mainLock, mm->lockData); /* Finally, delete the lock.*/ elmm_dolock(mm, ELMM_LOCK_DELETE, &mm->mainLock, mm->lockData); return true; } /* This function is called by elmm_growheap in order to resize a chunk. One important responsibility is to create a new * smallchunk representing the allocated space. */ ELMM_INLINE bool elmm_growinner(elmm_t* mm, elmm_bigchunk_t* bigchunk, uintptr_t increment) { increment += sizeof(elmm_bigchunk_t) + (sizeof(elmm_smallchunk_t) * 10); // This should more than cover the minimum overheads if (increment < mm->bigchunkMinimum) { increment = mm->bigchunkMinimum; } while ((increment % mm->bigchunkGranularity) != 0) { increment++; } uintptr_t oldSize = bigchunk->totalSize; uintptr_t newSize = oldSize + increment; elmm_bigchunk_function_t bigchf = mm->bigchunkFunction; if (bigchf(newSize, bigchunk, mm->bigchunkData) != bigchunk) { //printf("The bigchunk function failed!\n"); return false; } /* It might be higher than our requested size, but we should indicate failure if it's lower. */ if (bigchunk->totalSize < newSize) { //printf("The bigchunk function reported success but didn't allocate enough space\n"); return false; } /* Now we just need to calculate the address and size of the newChunk, initialise it's fields * and add it to the free list. */ elmm_smallchunk_t* newChunk = (elmm_smallchunk_t*)(((uint8_t*)(bigchunk->startAddress)) + oldSize); //printf("The new chunk starts at %d\n", newChunk); newChunk->check1 = ELMM_SMALLCHUNK_FREE; newChunk->next = bigchunk->internals.firstFree; bigchunk->internals.firstFree = newChunk; newChunk->size = (bigchunk->totalSize - oldSize) - sizeof(elmm_smallchunk_t); newChunk->check2 = ELMM_SMALLCHUNK_FREE; //printf("Smallchunk size: %d\n", newChunk->size); return true; } /* If not enough FREE smallchunks exist within the heap, this function will be * called to attempt to allocate more. It should return true if successful and * false otherwise. */ ELMM_STATIC bool elmm_growheap(elmm_t* mm, uintptr_t increment) { //printf("elmm_growheap(%d, %d)\n", mm, increment); if (!elmm_checkinit(mm)) { return false; } if (increment > 1024 * 1024 * 1024) { return false; // TODO: Check against some better maximum incr? } while ((increment % mm->bigchunkGranularity) != 0) { increment++; } /* First we need to check existing chunks, if any can be resized to fit the * new data we should attempt that. */ elmm_bigchunk_t* chunk = mm->firstChunk; elmm_bigchunk_t* lastValidChunk = chunk; while (chunk != NULL) { if (chunk->maximumSize >= (chunk->totalSize + increment)) { //printf("Looks like we can resize this chunk!\n"); if (elmm_growinner(mm, chunk, increment)) { // Only if it works should we return now! return true; } /* Even if the maximum size indicates otherwise, there might be some reason the chunk * can't be resized, so if growinner failed we need to continue and try to allocate * somewhere else. */ } lastValidChunk = chunk; chunk = chunk->internals.nextChunk; } //printf("Attempting to allocate a new chunk...\n"); elmm_bigchunk_t* newChunk = elmm_allocinner(mm, increment); if (newChunk == NULL) { printf("Internal allocation failed\n"); return false; } if (mm->firstChunk == NULL) { // Wouldn't normally happen, but maybe if initialisation changes mm->firstChunk = newChunk; } else { lastValidChunk->internals.nextChunk = newChunk; newChunk->internals.prevChunk = lastValidChunk; } return true; } /* Locks the memory manager while collecting statistics into the given array (up to at most arrayLength elements). * The array indices will match the ELMM_STAT_ values and the number of elements filled will be returned (zero upon * complete failure). */ ELMM_STATIC uintptr_t elmm_stat(elmm_t* mm, uintptr_t* statArray, uintptr_t arrayLength) { if (!elmm_checkinit(mm)) { return 0; } if (!elmm_dolock(mm, ELMM_LOCK_WAITLOCK, &mm->mainLock, mm->lockData)) { return 0; } void* result = NULL; uintptr_t i; for (i = 0; i < arrayLength && i < ELMM_STATTOP; i++) { statArray[i] = elmm_innerstat(mm, i); } if (!elmm_dolock(mm, ELMM_LOCK_UNLOCK, &mm->mainLock, mm->lockData)) { return 0; } return i; } /* The chunkymalloc function attempts to allocate a smaller chunk from the free list within a given bigchunk. */ ELMM_INLINE void* elmm_chunkymalloc(elmm_t* mm, elmm_bigchunk_t* bigchunk, uintptr_t size) { while ((size % sizeof(elmm_smallchunk_t)) != 0) { size++; } elmm_smallchunk_t** chunkptr = &bigchunk->internals.firstFree; elmm_smallchunk_t* chunk = bigchunk->internals.firstFree; while (chunk != NULL) { if (chunk->size >= size) { //printf("Can fit\n"); if (chunk->size >= size + (2 * sizeof(elmm_smallchunk_t))) { //printf("Will split\n"); uint8_t* rawbytes = (uint8_t*) chunk; elmm_smallchunk_t* upperchunk = (elmm_smallchunk_t*)(rawbytes + sizeof(elmm_smallchunk_t) + size); while ((((uintptr_t)upperchunk) % sizeof(elmm_smallchunk_t)) != 0) { upperchunk = (elmm_smallchunk_t*) (((uintptr_t)upperchunk) + 1); } upperchunk->check1 = ELMM_SMALLCHUNK_FREE; uintptr_t oldsize = chunk->size; chunk->size = (((uintptr_t)upperchunk) - ((uintptr_t)chunk)) - sizeof(elmm_smallchunk_t); upperchunk->size = oldsize - (((uintptr_t)upperchunk) - ((uintptr_t)chunk)); upperchunk->next = chunk->next; upperchunk->check2 = ELMM_SMALLCHUNK_FREE; chunk->next = upperchunk; //printf("Splitted one chunk of %d into two chunks of %d and %d\n", oldsize, chunk->size, upperchunk->size); } *chunkptr = chunk->next; chunk->check1 = ELMM_SMALLCHUNK_ALLOCATED; chunk->next = bigchunk->internals.firstAllocated; bigchunk->internals.firstAllocated = chunk; chunk->check2 = ELMM_SMALLCHUNK_ALLOCATED; return (void*)(chunk + 1); } chunkptr = &chunk->next; chunk = chunk->next; } //printf("No fit in chunk %d\n", bigchunk); /* If no chunk was found, just return NULL. */ return NULL; } /* The inner malloc function attempts to allocate from any free chunks. This performs * most of the job of malloc, but leaves the edge cases (i.e. when we need to obtain more memory) * as well as locking for the outer elmm_malloc function (and leaves the hard bits to elmm_chunkymalloc). */ ELMM_INLINE void* elmm_innermalloc(elmm_t* mm, uintptr_t size) { elmm_bigchunk_t* chunk = mm->firstChunk; while (chunk != NULL) { void* result = elmm_chunkymalloc(mm, chunk, size); if (result != NULL) { return result; // Success! } chunk = chunk->internals.nextChunk; } /* If we got to the end without any being able to allocate, then we didn't find * any free memory in the heap. */ return NULL; } /* The main allocation function, equivalent to malloc. */ ELMM_INLINE void* elmm_malloc(elmm_t* mm, uintptr_t size) { if (!elmm_checkinit(mm)) { return NULL; } if (!elmm_dolock(mm, ELMM_LOCK_WAITLOCK, &mm->mainLock, mm->lockData)) { return NULL; } void* result = NULL; /* First try innermalloc once, for the usual case where memory is already available. */ result = elmm_innermalloc(mm, size); /* Only if the first allocation failed do we need to expand the heap and try again. */ if (result == NULL) { //printf("innermalloc failed, trying growheap...\n"); if (elmm_growheap(mm, size)) { /* And only if the heap has actually be grown should we try again. */ //printf("growheap worked, trying malloc again...\n"); result = elmm_innermalloc(mm, size); if (result == NULL) { //printf("That failed :(\n"); } else { //printf("That worked :D\n"); } } else { //printf("growheap failed\n"); } } if (!elmm_dolock(mm, ELMM_LOCK_UNLOCK, &mm->mainLock, mm->lockData)) { return NULL; } return result; } /* Only used internally by elmm_free to find the bigchunk which the data at the given pointer would reside in. */ ELMM_INLINE elmm_bigchunk_t* elmm_innerchunk(elmm_t* mm, void* pointer) { elmm_bigchunk_t* chunk = mm->firstChunk; while (chunk != NULL) { uintptr_t iptr = (uintptr_t)pointer; uintptr_t cptr = (uintptr_t)(chunk->startAddress); if (iptr >= cptr && iptr < cptr + chunk->totalSize) { return chunk; } chunk = chunk->internals.nextChunk; } return NULL; } /* Only used internally by elmm_free to unlink a smallchunk element from a list. */ ELMM_INLINE bool elmm_innerunlink(elmm_t* mm, elmm_smallchunk_t** listVariable, elmm_smallchunk_t* element) { if (element == *listVariable) { *listVariable = element->next; element->next = NULL; return true; } elmm_smallchunk_t* prevChunk = *listVariable; elmm_smallchunk_t* chunk = prevChunk->next; while (chunk != NULL) { if (chunk == element) { prevChunk->next = element->next; element->next = NULL; return true; } prevChunk = chunk; chunk = chunk->next; } return false; } /* The main deallocation function, equivalent to free (except it should always return true, otherwise * it has e.g. been fed a NULL value or worse). */ ELMM_INLINE bool elmm_free(elmm_t* mm, void* pointer) { if (!elmm_checkinit(mm)) { return false; } if (!elmm_dolock(mm, ELMM_LOCK_WAITLOCK, &mm->mainLock, mm->lockData)) { return false; } bool success = false; elmm_smallchunk_t* header = elmm_getheader(mm, pointer); if (header != NULL) { if (header->check1 == ELMM_SMALLCHUNK_ALLOCATED && header->check1 == header->check2) { elmm_bigchunk_t* bigchunk = elmm_innerchunk(mm, pointer); if (bigchunk != NULL) { if (elmm_innerunlink(mm, &bigchunk->internals.firstAllocated, header)) { header->check1 = ELMM_SMALLCHUNK_FREE; header->next = bigchunk->internals.firstFree; header->check2 = ELMM_SMALLCHUNK_FREE; bigchunk->internals.firstFree = header; success = true; } } } } if (!elmm_dolock(mm, ELMM_LOCK_UNLOCK, &mm->mainLock, mm->lockData)) { return false; } return success; } /* The equivalent of the calloc function, which is built on top of the malloc implementation. */ ELMM_INLINE void* elmm_calloc(elmm_t* mm, uintptr_t count, uintptr_t elementSize) { if (elementSize == 3) { elementSize = 4; } else if (elementSize > 4 && elementSize < 8) { elementSize = 8; } else if (elementSize > 8 && elementSize < 16) { elementSize = 16; } else if (elementSize > 16) { while ((elementSize % 16) != 0) { elementSize++; } } uintptr_t size = count * elementSize; void* result = elmm_malloc(mm, size); if (result == NULL) { return NULL; } else { uint8_t* bytes = (uint8_t*)result; uintptr_t i; for (i = 0; i < size; i++) { bytes[i] = 0; } return result; } } /* The equivalent of the realloc function, which is built on top of the calloc and free implementations but also * uses an internal API to get the original pointer's size. */ ELMM_INLINE void* elmm_realloc(elmm_t* mm, void* pointer, uintptr_t newSize) { if (pointer == NULL) { if (newSize == 0) { return NULL; } else { return elmm_calloc(mm, newSize, 1); } } else if (newSize == 0) { elmm_free(mm, pointer); return NULL; } else { uintptr_t oldSize = elmm_sizeof(mm, pointer); void* newPointer = elmm_calloc(mm, newSize, 1); if (newPointer == NULL) { return NULL; } uint8_t* oldBytes = (uint8_t*)pointer; uint8_t* newBytes = (uint8_t*)newPointer; uintptr_t commonSize = (oldSize < newSize) ? oldSize : newSize; uintptr_t i; for (i = 0; i < commonSize; i++) { newBytes[i] = oldBytes[i]; } elmm_free(mm, pointer); return newPointer; } } ELMM_INLINE intptr_t elmm_innercompact(elmm_t* mm, elmm_bigchunk_t* bigchunk) { intptr_t result = 0; elmm_smallchunk_t** chunkvar = &bigchunk->internals.firstFree; elmm_smallchunk_t* chunk = *chunkvar; while (chunk != NULL) { uintptr_t addr1 = (uintptr_t)chunk; uintptr_t addr2 = (uintptr_t)(chunk->next); if (chunk->next != NULL && addr1 > addr2) { /* Sorting is required. */ //printf("I'm going to sort chunks %d and %d into correct order...\n", addr1, addr2); *chunkvar = chunk->next; chunk->next = (*chunkvar)->next; (*chunkvar)->next = chunk; result++; } else if (addr2 == (addr1 + sizeof(elmm_smallchunk_t) + chunk->size)) { //printf("I'm going to compact chunks %d and %d into one chunk...\n", addr1, addr2); chunk->size += sizeof(elmm_smallchunk_t) + chunk->next->size; chunk->next = chunk->next->next; result++; } chunkvar = &chunk->next; chunk = chunk->next; } return result; } /* Performs one-or-more cycles of compaction, returning the total number of sorting or compaction operations performed, and stopping * early if it stops finding sortable or compactable entries. Returns -1 on error. */ ELMM_INLINE intptr_t elmm_compact(elmm_t* mm/*, uintptr_t maxCycles*/) { if (!elmm_checkinit(mm)) { return -1; } if (!elmm_dolock(mm, ELMM_LOCK_WAITLOCK, &mm->mainLock, mm->lockData)) { return -1; } intptr_t result = 0; elmm_bigchunk_t* chunk = mm->firstChunk; while (chunk != NULL) { intptr_t nchanges = elmm_innercompact(mm, chunk); result += nchanges; chunk = chunk->internals.nextChunk; } if (!elmm_dolock(mm, ELMM_LOCK_UNLOCK, &mm->mainLock, mm->lockData)) { return -1; } return result; } /* Just calls elmm_compact until either there's nothing more to compact or an error occurs. * NOTE: This should be threadsafe, in that the internal cycles perform locking, but if you * continue allocating the whole time it's running it'll just keep compacting forever. This * might be the desired behaviour in some scenarios (particularly if you have a dedicated * compaction thread - you'd just call this and then run friendlycompact and sleep for a * while if/when it returns - or if you need to perform compaction only at specific times * this function might also do the trick) but for more general usage you'd probably just want * to call elmm_friendlycompact regularly. */ ELMM_INLINE intptr_t elmm_fullcompact(elmm_t* mm) { intptr_t result = 0; intptr_t tmpresult; while ((tmpresult = elmm_compact(mm)) > 0) { result += tmpresult; } if (tmpresult < 0) { return tmpresult; } else { return result; } } ELMM_INLINE intptr_t elmm_innertrim(elmm_t* mm, elmm_bigchunk_t* bigchunk) { uintptr_t nalloc = elmm_innerstatpart(mm, bigchunk->internals.firstAllocated); uintptr_t nfree = elmm_innerstatpart(mm, bigchunk->internals.firstFree); /* This is the minimum size of a smallchunk worth returning to the system, equal to * whatever's set as the bigchunk granularity except taking it's smallchunk header size * into account. */ uintptr_t mindealloc = mm->bigchunkGranularity - sizeof(elmm_smallchunk_t); elmm_bigchunk_function_t bigchf = mm->bigchunkFunction; if (nalloc == 0) { //printf("That's it, I'm deleting the whole chunk at %d\n", bigchunk); if (bigchunk == mm->firstChunk) { mm->firstChunk = bigchunk->internals.nextChunk; } if (bigchunk->internals.nextChunk != NULL) { bigchunk->internals.prevChunk = bigchunk->internals.prevChunk; } if (bigchf(0, bigchunk, mm->bigchunkData) != NULL) { return -1; } } else if (bigchunk->maximumSize > 0 && nfree >= mindealloc) { /* If the chunk is resizable and there's enough free memory that it'd be worth deallocating, then * try to find a large chunk at the end of this bigchunk and resize. If the free space is too fragmented * or not at the end of the chunk then we'll just have to leave it for later. */ //printf("I'm looking for a chunk to deallocate inside %d\n", bigchunk); elmm_smallchunk_t** smallchunkVar = &bigchunk->internals.firstFree; elmm_smallchunk_t* smallchunk = *smallchunkVar; while (smallchunk != NULL) { if (smallchunk->size >= mindealloc) { uintptr_t startaddr = (uintptr_t)(bigchunk->startAddress); uintptr_t addr = (uintptr_t)smallchunk; /* If this smallchunk ends right at the end of the bigchunk or close enough that another chunk header wouldn't fit, then * let's trim it! */ if (addr + sizeof(elmm_smallchunk_t) + smallchunk->size >= startaddr + bigchunk->totalSize - sizeof(elmm_bigchunk_t)) { //printf("Okay I'm gonna trim some out of the smallchunk at %d with size %d\n", addr, smallchunk->size); } intptr_t actualDealloc = mm->bigchunkGranularity; intptr_t newsz = smallchunk->size - actualDealloc; if (newsz < 0) { /* Set the pointer to this chunk to a pointer to the next chunk (or NULL) instead. We don't have to worry about the * next iteration either, since we can return as soon as we yield some memory. */ *smallchunkVar = smallchunk->next; //printf("I'm deleting that smallchunk entirely.\n"); } else { while (newsz >= mm->bigchunkGranularity && (bigchunk->totalSize - actualDealloc >= mm->bigchunkMinimum)) { newsz -= mm->bigchunkGranularity; actualDealloc += mm->bigchunkGranularity; } /* If we're only trimming part of this smallchunk we can leave the rest in place and just modify it's size. */ smallchunk->size = newsz; //printf("I left it with a size of %d\n", smallchunk->size); } uintptr_t oldtotal = bigchunk->totalSize; if (bigchf(bigchunk->totalSize - actualDealloc, bigchunk, mm->bigchunkData) != bigchunk) { //printf("The bigchunk function failed.\n"); return -1; } /* I guess it worked! But we'll return the actual size calculation in case it ended up zero or erroneous. */ return oldtotal - bigchunk->totalSize; } smallchunkVar = &smallchunk->next; smallchunk = *smallchunkVar; } } else { return 0; } } /* Attempts partial compaction and then tries to yield any spare memory to the operating system (or at least * back to the bigchunk allocator, whatever it wants to do with it). Returns the amount of memory yielded * (zero if none can be yielded) or a negative value on error. Note that this doesn't attempt full compaction * (to avoid locking the memory manager for too long - and also since it wouldn't be required if enough memory * is free), it just runs one compaction cycle and then tries to trim the heap opportunistically. */ ELMM_INLINE intptr_t elmm_friendlycompact(elmm_t* mm) { if (elmm_compact(mm) < 0) { return -1; } if (!elmm_dolock(mm, ELMM_LOCK_WAITLOCK, &mm->mainLock, mm->lockData)) { return -1; } intptr_t result = 0; elmm_bigchunk_t* chunk = mm->firstChunk; while (chunk != NULL) { /* Get the pointer to the next chunk BEFORE trimming, in case this chunk gets yielded entirely. */ elmm_bigchunk_t* next = chunk->internals.nextChunk; intptr_t nreleased = elmm_innertrim(mm, chunk); if (nreleased < 0) { return -1; } result += nreleased; chunk = next; } if (!elmm_dolock(mm, ELMM_LOCK_UNLOCK, &mm->mainLock, mm->lockData)) { return -1; } return result; } /* From ifndef at top of file: */ #endif