slkern/fsinstance.c

1122 lines
42 KiB
C
Raw Permalink Normal View History

// NEW CODE
#include "fsinstance.h"
#include "types.h"
#include "param.h"
#include "memlayout.h"
#include "riscv.h"
#include "sched.h"
#include "defs.h"
#include "fs.h"
#include "stat.h"
#include "proc.h"
#include "kprintf.h"
#include "../mkfs/fsformat.h"
void* memcpy(void*, void*, long);
fsinstance_t* fsinstance_alloc() {
fsinstance_t* result = kalloc();
if (result) {
result->superblock = kalloc();
if (!result->superblock) {
kfree(result);
return NULL;
}
result->itbl = kalloc();
if (!result->itbl) {
kfree(result->superblock);
kfree(result);
return NULL;
}
initlock(&result->itbl_spin, "itbl_spin");
for (int i = 0; i < NINODE; i++) {
result->itbl[i] = kalloc();
if (!result->itbl[i]) {
printf("Failed to preallocate inode\n");
while (--i > 0) {
kfree(result->itbl[i]);
}
kfree(result->superblock);
kfree(result->itbl);
kfree(result);
return NULL;
}
memset(result->itbl[i], 0, 4096);
initsleeplock(&(result->itbl[i]->lock), "inode");
}
memset(result->superblock, 0, sizeof(fsformat_superblock_t));
return result;
} else {
return NULL;
}
}
void fsinstance_loadsuperblock(fsinstance_t* instance, unsigned int device) {
diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, device, 1);
memcpy(instance->superblock, buffer->data, sizeof(fsformat_superblock_t));
diskio_buffer_release(buffer);
}
void* fsinstance_init(fsinstance_t* instance, unsigned int device) {
FSINSTANCE_CHECK(instance);
fsinstance_loadsuperblock(instance, device);
if (instance->superblock->magic == FSFORMAT_MAGIC_OLD) {
// If the magic number is the old one that means fs is "version 0".
instance->fsversion = 0;
fsinstance_inittransactions(instance, device, BSIZE); // TODO: Make block size fully configurable.
// TODO: Add magic number for new filesystem and code to track version etc.
} else if (instance->superblock->magic == FSFORMAT_MAGIC_NEW) {
// If the magic number is the new one that means the filesystem's
// version number is >= 1 and some extended fields of the superblock
// are available.
instance->fsversion = instance->superblock->extd_versionflags & 0xFF;
// TODO: Check block size etc.
fsinstance_inittransactions(instance, device, instance->superblock->extd_blocksize); // TODO: Make block size fully configurable.
// TODO: Add magic number for new filesystem and code to track version etc.
} else {
panic("fsinstance_init: Bad filesystem (superblock magic number doesn't match any known options)");
}
return instance->superblock;
}
void fsinstance_resetblocktozero(fsinstance_t* instance, unsigned int device, unsigned int blocknumber) {
diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, device, blocknumber);
memset(buffer->data, 0, BSIZE); // TODO: Configurable block size
fsinstance_writelogged(instance, buffer);
diskio_buffer_release(buffer);
}
#define FSINSTANCE_BITMAPBLOCK(n) \
(instance->superblock->firstbitmapblock + ((n)/bitsperblock))
// Attemptes to allocate a block, clearing the block to zeroes and
// returning it's block number on success or printing a warning and
// returning 0 if out of space.
unsigned int fsinstance_allocdatablock(fsinstance_t* instance, unsigned int device) {
FSINSTANCE_CHECK(instance);
int bitsperblock = BSIZE*8; // TODO: Configurable block size
for (int blocknumber = 0; blocknumber < instance->superblock->totalblocks; blocknumber += bitsperblock) {
diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, device, FSINSTANCE_BITMAPBLOCK(blocknumber));
for (int bitindex = 0; bitindex < bitsperblock && (blocknumber + bitindex) < instance->superblock->totalblocks; bitindex++) {
int mask = 1 << (bitindex & 7); // bitindex % 8
int byteindex = bitindex >> 3; // bitindex / 8
if ((buffer->data[byteindex] & mask) == 0) {
// The block is marked as free so we should mark it as used.
buffer->data[byteindex] |= mask;
fsinstance_writelogged(instance, buffer);
// Then we can release the buffer since we'll be returning soon.
diskio_buffer_release(buffer);
// Now we have the final block number, zero it and return.
unsigned int finalnumber = blocknumber + bitindex;
fsinstance_resetblocktozero(instance, device, finalnumber);
return finalnumber;
}
}
diskio_buffer_release(buffer);
}
printf("fsinstance_allocdatablock: No more free blocks, filesystem is full!\n");
return 0;
}
// Counts the free blocks (NOTE: This should be run inside a transaction
// for consistency, although it doesn't modify any blocks). Returns the
// number of blocks marked free for data or directory allocation.
unsigned int fsinstance_countfreeblocks(fsinstance_t* instance, unsigned int device) {
FSINSTANCE_CHECK(instance);
int bitsperblock = BSIZE*8; // TODO: Configurable block size
unsigned int count = 0;
for (int blocknumber = 0; blocknumber < instance->superblock->totalblocks; blocknumber += bitsperblock) {
diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, device, FSINSTANCE_BITMAPBLOCK(blocknumber));
for (int bitindex = 0; bitindex < bitsperblock && (blocknumber + bitindex) < instance->superblock->totalblocks; bitindex++) {
int mask = 1 << (bitindex & 7); // bitindex % 8
int byteindex = bitindex >> 3; // bitindex / 8
if ((buffer->data[byteindex] & mask) == 0) {
// The block is marked as free so we simply imcrement the count
count++;
}
}
diskio_buffer_release(buffer);
}
return count;
}
// Marks a data block as free in the used/free bitmap (this assumes that
// any reference to it will have been reset separately).
void fsinstance_freedatablock(fsinstance_t* instance, unsigned int device, unsigned int blocknumber) {
FSINSTANCE_CHECK(instance);
int bitsperblock = BSIZE*8; // TODO: Configurable block size
diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, device, FSINSTANCE_BITMAPBLOCK(blocknumber));
unsigned int bitindex = blocknumber % bitsperblock;
unsigned int byteindex = bitindex >> 3; // bitindex / 8
int mask = 1 << (bitindex & 7); // bitindex % 8
if ((buffer->data[byteindex] & mask) == 0) {
panic("fsinstance_freedatablock: Caller attempted to free a block which is already marked free!");
}
buffer->data[byteindex] &= ~mask; // And it with every bit EXCEPT the one we want to clear
fsinstance_writelogged(instance, buffer);
diskio_buffer_release(buffer);
}
// TODO: Configurable block size
#define FSINSTANCE_INODEBLOCK(n) \
(((n) / IPB) + instance->superblock->firstinodeblock)
// Looks up the inode number on the filesystem and returns the in-memory
// inode structure with reference count adjusted for the caller, without
// locking or loading the inode.
fsinstance_inode_t* fsinstance_getinode(fsinstance_t* instance, unsigned int device, unsigned int inodenumber) {
FSINSTANCE_CHECK(instance);
acquire(&instance->itbl_spin);
fsinstance_inode_t* emptyinode = NULL;
for (int inodeindex = 0; inodeindex < NINODE; inodeindex++) { // TODO: Flexible size of in-memory inode table
fsinstance_inode_t* inode = instance->itbl[inodeindex];
if (inode->referencecount > 0 && inode->inodenumber == inodenumber && inode->device == device) { // NOTE: Checks for device are probably unnecessary unless backends are combined for some reason
// This inode already has an in-memory reference, so just return it
inode->referencecount = inode->referencecount + 1;
release(&instance->itbl_spin); // After unlocking the inode table.
return inode;
}
if (inode->referencecount == 0 && emptyinode == NULL) {
// Save the empty inode until we've checked them all for a match.
emptyinode = inode;
}
}
// If no match was found, we need to (hopefully) fill an empty inode.
if (emptyinode == NULL) {
panic("fsinstance_getinode: the in-memory inode table is exhausted, future versions should have better handling for this case!\n"); // TODO: Better handling!
}
emptyinode->instance = instance;
emptyinode->device = device;
emptyinode->inodenumber = inodenumber;
emptyinode->referencecount = 1;
emptyinode->valid = 0; // Mark this as having not been loaded yet!
// Be sure to unlock the inode table before returning.
release(&instance->itbl_spin);
return emptyinode;
}
// Allocates an inode, marking it with the given type. Returns the
// allocated inode (without locking it) on success or prints a warning
// and returns NULL on failure.
fsinstance_inode_t* fsinstance_allocinode(fsinstance_t* instance, unsigned int device, short inodetype) {
FSINSTANCE_CHECK(instance);
for (int inodenumber = 1; inodenumber < instance->superblock->inodelimit; inodenumber++) {
// First load the block containing the inode
diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, device, FSINSTANCE_INODEBLOCK(inodenumber));
// Then get the inode offset within the block.
fsformat_inode_t* diskinode = ((fsformat_inode_t*)&(buffer->data[0])) + (inodenumber % IPB); // TODO: Configurable block size
if (diskinode->type == 0) {
// This inode is marked unused, so mark it with the type & save it
memset(diskinode, 0, sizeof(fsformat_inode_t)); // Be sure to zero
diskinode->type = inodetype;
fsinstance_writelogged(instance, buffer);
// Now we can release the buffer and return the new inode.
diskio_buffer_release(buffer);
return fsinstance_getinode(instance, device, inodenumber);
}
diskio_buffer_release(buffer);
}
printf("fsinstance_allocinode: No more inodes, filesystem inode table is full!\n");
return NULL;
}
// Increases the reference count of an inode, locking on the inode table
// and returning the inode pointer to simulate a copy operation.
fsinstance_inode_t* fsinstance_inode_copyref(fsinstance_inode_t* inode) {
acquire(&inode->instance->itbl_spin);
inode->referencecount = inode->referencecount + 1;
release(&inode->instance->itbl_spin);
return inode;
}
// Saves an inode to disk. This should be called after modifying any
// inode field that's saved to disk, and must be called while the caller
// holds the inode's lock.
void fsinstance_inode_save(fsinstance_inode_t* inode) {
fsinstance_t* instance = inode->instance;
FSINSTANCE_CHECK(instance);
// Begin by loading the block with the on-disk inode
diskio_buffer_t* buffer = diskio_buffer_read(inode->instance->cache, inode->device, FSINSTANCE_INODEBLOCK(inode->inodenumber));
// Get the inode offset within the block.
fsformat_inode_t* diskinode = ((fsformat_inode_t*)&(buffer->data[0])) + (inode->inodenumber % IPB); // TODO: Configurable block size
// Update the fields of the on-disk inode structure
diskinode->type = inode->type;
diskinode->device_major = inode->major;
diskinode->device_minor = inode->minor;
diskinode->linkcount = inode->nlink;
diskinode->totalbytes = inode->size;
// Copy the direct+indirect addresses
memcpy(diskinode->addrs, inode->addrs, sizeof(unsigned int)*(NDIRECT + 1));
// Save it to disk (or at least to the transaction log!)
fsinstance_writelogged(inode->instance, buffer);
// Finally release the buffer.
diskio_buffer_release(buffer);
}
// Unlocks an inode but does not perform any other actions like saving.
void fsinstance_inode_unlock(fsinstance_inode_t* inode) {
if (inode == NULL) {
panic("fsinstance_inode_unlock: inode argument is NULL");
} else if (!holdingsleep(&inode->lock)) {
panic("fsinstance_inode_unlock: inode argument is not locked");
} else if (inode->referencecount < 1) {
panic("fsinstance_inode_unlock: inode argument reference count is below 1");
}
releasesleep(&inode->lock);
}
// Locks the inode, and reads it into memory for the first time if it
// hasn't been loaded yet.
void fsinstance_inode_lockandload(fsinstance_inode_t* inode) {
fsinstance_t* instance = inode->instance;
FSINSTANCE_CHECK(instance);
if (inode == NULL) {
panic("fsinstance_inode_lockandload: inode argument is NULL");
} else if (inode->referencecount < 1) {
panic("fsinstance_inode_lockandload: inode argument reference count is below 1");
}
acquiresleep(&inode->lock);
// Load the inode from disk but only if it hasn't already been loaded.
if (!inode->valid) {
diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, inode->device, FSINSTANCE_INODEBLOCK(inode->inodenumber));
// Get the inode offset within the block.
fsformat_inode_t* diskinode = ((fsformat_inode_t*)&(buffer->data[0])) + (inode->inodenumber % IPB); // TODO: Configurable block size
inode->type = diskinode->type;
inode->major = diskinode->device_major;
inode->minor = diskinode->device_minor;
inode->nlink = diskinode->linkcount;
inode->size = diskinode->totalbytes;
memcpy(inode->addrs, diskinode->addrs, sizeof(unsigned int) * (NDIRECT + 1));
diskio_buffer_release(buffer);
inode->valid = 1;
if (inode->type == 0) {
panic("fsinstance_inode_lockandload: inode type is zero, filesystem is corrupt or caller is buggy");
}
}
}
void fsinstance_deleteindirect(fsinstance_t* instance, unsigned int blocknumber) {
diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, instance->fslog_device, blocknumber);
unsigned int* addrs = (void*) buffer->data;
for (int indirectindex = 0; indirectindex < NINDIRECT; indirectindex++) { // TODO: Make block size configurable
unsigned int innerblock = addrs[indirectindex];
if (innerblock != 0) {
fsinstance_freedatablock(instance, instance->fslog_device, innerblock);
}
// NOTE: addrs[indirectindex] doesn't need to be reset because
// we'll be freeing the whole table!
}
diskio_buffer_release(buffer);
// Free the page holding the list of indirect pages:
fsinstance_freedatablock(instance, instance->fslog_device, blocknumber);
}
// Deletes the entire contents of an inode but does not perform any
// other action such as deleting the inode (it's size will become 0).
// The inode must be locked by the caller.
void fsinstance_inode_deletecontents(fsinstance_inode_t* inode) {
fsinstance_t* instance = inode->instance;
FSINSTANCE_CHECK(instance);
// We can begin by setting the size to zero, since we won't be relying
// on that to check which pages to free
inode->size = 0;
// Do the hard part first, check for indirect blocks. These are stored
// as a block full of block addresses, with it's own address stored at
// the end of the list of "direct" block addresses.
unsigned int indirectpage = inode->addrs[NDIRECT]; // Value at end of array
if (indirectpage != 0) {
// Reset the reference from the inode because we'll free the table.
inode->addrs[NDIRECT] = 0;
diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, inode->device, indirectpage);
unsigned int* addrs = (void*) buffer->data;
for (int indirectindex = 0; indirectindex < NINDIRECT; indirectindex++) { // TODO: Make block size configurable
unsigned int blocknumber = addrs[indirectindex];
if (blocknumber != 0) {
if (instance->fsversion == 0) {
// Simply delete the referenced block
fsinstance_freedatablock(instance, inode->device, blocknumber);
} else {
// Do a deep delete of a block-of-blockrefs
fsinstance_deleteindirect(instance, blocknumber);
}
}
// NOTE: addrs[indirectindex] doesn't need to be reset because
// we'll be freeing the whole table!
}
diskio_buffer_release(buffer);
// Free the page holding the list of indirect pages:
fsinstance_freedatablock(instance, inode->device, indirectpage);
}
// Delete the "direct" blocks, these are simple block addresses.
for (int directindex = 0; directindex < NDIRECT; directindex++) {
if (inode->addrs[directindex]) {
fsinstance_freedatablock(instance, inode->device, inode->addrs[directindex]);
inode->addrs[directindex] = 0;
}
}
// Finally save the inode.
fsinstance_inode_save(inode);
}
// The inverse of a get operation, and is also responsible for
// deleting inodes which have been "unlinked". This must be called from
// within a transaction.
void fsinstance_inode_unget(fsinstance_inode_t* inode) {
fsinstance_t* instance = inode->instance;
acquire(&instance->itbl_spin);
if (inode->valid && inode->nlink == 0 && inode->referencecount == 1) {
// Since referencecount is 1 we are the only user of the inode so
// this lock should instantly belong to us.
acquiresleep(&inode->lock);
// Release the itable lock while deleting.
release(&instance->itbl_spin);
// Delete any data (or dirent) blocks associated with this inode
// before deleting the rest of the structure.
fsinstance_inode_deletecontents(inode);
inode->type = 0;
fsinstance_inode_save(inode);
inode->valid = 0; // Mark it as not being a loaded inode
releasesleep(&inode->lock);
acquire(&instance->itbl_spin);
}
// Finally decrement the reference count and unlock the inode table.
inode->referencecount = inode->referencecount - 1;
release(&instance->itbl_spin);
}
void fsinstance_inode_unlockandunget(fsinstance_inode_t* inode) {
fsinstance_inode_unlock(inode);
fsinstance_inode_unget(inode);
}
// Copy information for a "stat" system call to a (kernel-mode or
// filesystem-mode) buffer.
void fsinstance_inode_getstatinfo(fsinstance_inode_t* inode, struct stat* output) {
output->type = inode->type;
output->size = inode->size;
output->dev = inode->device;
output->ino = inode->inodenumber;
output->nlink = inode->nlink;
}
// Returns the actual block number of the given logical block of a file,
// allocating an associated block if it doesn't exist.
unsigned int fsinstance_inode_getactualblock(fsinstance_inode_t* inode, unsigned int blocknumber) {
fsinstance_t* instance = inode->instance;
if (blocknumber < NDIRECT) {
unsigned int a = inode->addrs[blocknumber];
if (a == 0) {
a = fsinstance_allocdatablock(instance, inode->device);
inode->addrs[blocknumber] = a;
}
return a;
}
unsigned int indirectindex = blocknumber - NDIRECT;
unsigned int tableblock;
if (instance->fsversion == 0) {
tableblock = inode->addrs[NDIRECT];
if (tableblock == 0) {
tableblock = fsinstance_allocdatablock(instance, inode->device);
inode->addrs[NDIRECT] = tableblock;
if (tableblock == 0) {
return 0;
}
}
} else {
unsigned int tableotables = inode->addrs[instance->superblock->extd_directblocks];
if (tableotables == 0) {
tableotables = fsinstance_allocdatablock(instance, inode->device);
inode->addrs[instance->superblock->extd_directblocks] = tableotables;
if (tableotables == 0) {
return 0;
}
}
//printf("using tableotables at %d\n", tableotables);
diskio_buffer_t* ttbuffer = diskio_buffer_read(instance->cache, inode->device, tableotables);
unsigned int* tt = (void*) ttbuffer->data;
tableblock = tt[indirectindex / (instance->superblock->extd_blocksize / 4)];
if (tableblock == 0) {
tableblock = fsinstance_allocdatablock(instance, inode->device);
tt[indirectindex / (instance->superblock->extd_blocksize / 4)] = tableblock;
if (tableblock == 0) {
return 0;
}
fsinstance_writelogged(instance, ttbuffer);
}
diskio_buffer_release(ttbuffer);
indirectindex %= (instance->superblock->extd_blocksize / 4);
}
//printf("using tableblock at %d\n", tableblock);
if (indirectindex >= NINDIRECT) {
panic("fsinstance_inode_getactualblock: Block index is out of range");
}
diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, inode->device, tableblock);
unsigned int* table = (void*) buffer->data;
unsigned int finaladdr = table[indirectindex];
if (finaladdr == 0) {
finaladdr = fsinstance_allocdatablock(instance, inode->device);
table[indirectindex] = finaladdr;
fsinstance_writelogged(instance, buffer);
}
diskio_buffer_release(buffer);
//printf("using finaladdr at %d\n", finaladdr);
return finaladdr;
}
#define FSINSTANCE_MIN(x,y) \
(((x) >= (y)) ? (y) : (x))
#define FSINSTANCE_MAX(x,y) \
(((x) >= (y)) ? (x) : (y))
// Reads from an inode's data into memory. The memory can be either in
// user-land or kernel-land, with isuserland set to 1 in the case of
// reading to a user-land buffer. The caller is expected to have locked
// the inode.
int fsinstance_inode_read(fsinstance_inode_t* inode, int isuserland, unsigned long long address, unsigned int offset, unsigned int size) {
fsinstance_t* instance = inode->instance;
if (offset > inode->size || offset + size < offset) { // Check for overflows as well as reading from beyond the end of file
return 0; // No bytes read.
}
unsigned int realsize;
if (offset + size > inode->size) {
realsize = inode->size - offset;
} else {
realsize = size;
}
unsigned int i = 0;
while (i < realsize) {
unsigned int blocknumber = fsinstance_inode_getactualblock(inode, offset / BSIZE); // TODO: Make block size configurable
if (!blocknumber) {
return -1;
}
diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, inode->device, blocknumber);
unsigned int nread = FSINSTANCE_MIN(realsize - i, BSIZE - offset%BSIZE);
if (either_copyout(isuserland, address, buffer->data + offset % BSIZE, nread) == -1) {
diskio_buffer_release(buffer);
return -1;
}
diskio_buffer_release(buffer);
i += nread;
address += nread;
offset += nread;
}
return i;
}
// Writes to an inode's data from memory. The address can be either
// in user-land or kernel-land, with isuserland set to 1 in the case of
// reading to a user-land buffer. The caller is expected to have locked
// the inode.
int fsinstance_inode_write(fsinstance_inode_t* inode, int isuserland, unsigned long long address, unsigned int offset, unsigned int size) {
fsinstance_t* instance = inode->instance;
if (offset > inode->size || offset + size < offset) {
return -1;
}
if (offset + size > MAXFILE * BSIZE) { // TODO: Adjustable limits.
return -1;
}
unsigned int i = 0;
while (i < size) {
unsigned int blocknumber = fsinstance_inode_getactualblock(inode, offset / BSIZE); // TODO: Make block size configurable
if (blocknumber == 0) {
goto savetodisk;
}
diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, inode->device, blocknumber);
unsigned int nwritten = FSINSTANCE_MIN(size - i, BSIZE - offset % BSIZE);
if (either_copyin(buffer->data + offset % BSIZE, isuserland, address, nwritten) == -1) {
diskio_buffer_release(buffer);
goto savetodisk;
}
fsinstance_writelogged(inode->instance, buffer);
diskio_buffer_release(buffer);
i += nwritten;
address += nwritten;
offset += nwritten;
}
savetodisk:
inode->size = FSINSTANCE_MAX(offset, inode->size);
// Save the inode information to disk, especially as size and block
// addresses may have changed (NOTE: the data itself will have already
// been written to the associated blocks but they won't be linked in
// to the file properly on disk until it's saved).
fsinstance_inode_save(inode);
return i;
}
int fsinstance_nameseq(fsinstance_t* instance, const char* namea, const char* nameb) {
return (strncmp(namea, nameb, instance->fsversion == 0 ? FSFORMAT_NAMESIZE_OLD : FSFORMAT_NAMESIZE_NEW) == 0) ? 1 : 0; // TODO: Compare longer names...
}
fsinstance_inode_t* fsinstance_inode_lookup(fsinstance_inode_t* directory, char* filename, unsigned int* entryoffsetvar) {
// Begin by checking that the directory inode and filesystem instance
// are valid, as these checks may be especially essential when
// searching through the filesystem!
fsinstance_t* instance = directory->instance;
FSINSTANCE_CHECK(instance);
if (directory->type != T_DIR) { // TODO: Replace these constants
panic("fsinstance_inode_lookup: Bad inode type, expecting directory but inode is not a directory");
}
// Convenience variables for directory size & entry size calculations.
// dirsize divided by entrysize should give the number of entries.
unsigned int dirsize = directory->size;
unsigned int entrysize = (unsigned int) (instance->fsversion == 0 ? sizeof(fsformat_dirent_v0_t) : sizeof(fsformat_dirent_v1_t));
// Then traverse through, reading 1 entry at a time until either
// finding a match or getting to the end of the directory.
for (unsigned int offset = 0; offset < dirsize; offset += entrysize) {
if (instance->fsversion == 0) {
fsformat_dirent_v0_t entry;
int r = fsinstance_inode_read(directory, 0, (unsigned long long) &entry, offset, entrysize);
if (r != entrysize) {
// TODO: Replace panics within the filesystem with some kind of
// unmount-when-corrupted system.
panic("fsinstance_inode_lookup: Read failed internally");
}
// If found set the offset variable (if it isn't NULL), get the
// file's inode and return it.
if (fsinstance_nameseq(instance, filename, entry.filename)) {
if (entryoffsetvar != NULL) {
*entryoffsetvar = offset;
}
return fsinstance_getinode(instance, directory->device, entry.inodenumber);
}
} else {
fsformat_dirent_v1_t entry;
int r = fsinstance_inode_read(directory, 0, (unsigned long long) &entry, offset, entrysize);
if (r != entrysize) {
// TODO: Replace panics within the filesystem with some kind of
// unmount-when-corrupted system.
panic("fsinstance_inode_lookup: Read failed internally");
}
// If found set the offset variable (if it isn't NULL), get the
// file's inode and return it.
if (fsinstance_nameseq(instance, filename, entry.filename)) {
if (entryoffsetvar != NULL) {
*entryoffsetvar = offset;
}
return fsinstance_getinode(instance, directory->device, entry.datainode);
}
}
}
// If the search finished without returning that means there was no
// match, so just return NULL.
return NULL;
}
// Checks if a directory has a given filename in it, returning 1 if
// there is a match and 0 otherwise.
int fsinstance_inode_hasentry(fsinstance_inode_t* directory, char* filename) {
fsinstance_inode_t* inode = fsinstance_inode_lookup(directory, filename, NULL);
if (inode == NULL) {
return 0;
} else {
fsinstance_inode_unget(inode);
return 1;
}
}
// Attempts to insert a new directory entry into the given directory,
// returning 0 if successful or -1 otherwise. This function
int fsinstance_inode_insert(fsinstance_inode_t* directory, char* filename, unsigned int inodenumber) {
fsinstance_t* instance = directory->instance;
FSINSTANCE_CHECK(instance);
if (fsinstance_inode_hasentry(directory, filename)) {
return -1;
}
// Convenience variables for directory size & entry size calculations.
// dirsize divided by entrysize should give the number of entries.
unsigned int dirsize = directory->size;
unsigned int entrysize = (unsigned int) (instance->fsversion == 0 ? sizeof(fsformat_dirent_v0_t) : sizeof(fsformat_dirent_v1_t));
unsigned int offset = 0;
while (offset < dirsize) {
if (instance->fsversion == 0) {
fsformat_dirent_v0_t entry;
int r = fsinstance_inode_read(directory, 0, (unsigned long long) &entry, offset, entrysize);
if (r != entrysize) {
// TODO: Replace panics within the filesystem with some kind of
// unmount-when-corrupted system.
panic("fsinstance_inode_insert: Read failed internally");
}
if (!entry.inodenumber) {
goto foundentry;
}
} else {
fsformat_dirent_v1_t entry;
int r = fsinstance_inode_read(directory, 0, (unsigned long long) &entry, offset, entrysize);
if (r != entrysize) {
// TODO: Replace panics within the filesystem with some kind of
// unmount-when-corrupted system.
panic("fsinstance_inode_insert: Read failed internally");
}
if (!entry.datainode) {
goto foundentry;
}
}
offset += entrysize;
}
// When the code reaches foundentry:, it will have either found a free
// entry or searched to the end of the directory. Either way we can
// now simply write the directory entry to that offset and it will be
// added!
foundentry:
if (instance->fsversion == 0) {
fsformat_dirent_v0_t newentry;
strncpy(newentry.filename, filename, FSFORMAT_NAMESIZE_OLD); // TODO: Support for longer names
newentry.inodenumber = inodenumber;
if (fsinstance_inode_write(directory, 0, (unsigned long long) &newentry, offset, entrysize) != entrysize) {
return -1;
}
} else {
fsformat_dirent_v1_t newentry;
strncpy(newentry.filename, filename, FSFORMAT_NAMESIZE_NEW); // TODO: Support for longer names
newentry.datainode = inodenumber;
newentry.metainode = 0xFFFFFFFF;
if (fsinstance_inode_write(directory, 0, (unsigned long long) &newentry, offset, entrysize) != entrysize) {
return -1;
}
}
// If we got to the end then return 0 to indicate success.
return 0;
}
#define FSINSTANCE_SKIPSLASHES(iter) \
while (*iter == '/' || *iter == '\\') { \
iter++; \
}
#define FSINSTANCE_SKIPFILENAME(iter) \
while (*iter != 0 && *iter != '/' && *iter != '\\') { \
iter++; \
}
// Scan to the next part of the path, reading the first part into
// filenamevar (which must have FSFORMAT_NAMESIZE reserved bytes).
// Returns the path iterator at the start of the next filename/dirname
// or NULL to indicate end of string.
char* fsinstance_scanfilename(fsinstance_t* instance, char* pathiterator, char* filenamevar) {
FSINSTANCE_SKIPSLASHES(pathiterator);
char* namestart = pathiterator;
if (*namestart == 0) {
return 0; // End of path
}
FSINSTANCE_SKIPFILENAME(pathiterator);
int namelen = pathiterator - namestart;
memcpy(filenamevar, namestart, FSINSTANCE_MIN(namelen, FSFORMAT_NAMESIZE_OLD)); // TODO: Support for longer filenames
if (namelen < FSFORMAT_NAMESIZE_OLD) {
filenamevar[namelen] = 0;
}
FSINSTANCE_SKIPSLASHES(pathiterator); // This is probably unnecessary?
return pathiterator;
}
// The internal path lookup function (used by the simple lookups). The
// filenamevar must point to an array of FSINSTANCE_NAMESIZE bytes which
// will be used as a scratch variable as well as to look up the filename
// within the parent directory (if getparent is non-zero). Any file
// lookup needs to happen within a transaction.
fsinstance_inode_t* fsinstance_lookuppath(fsinstance_t* instance, char* path, int getparent, char* filenamevar) {
FSINSTANCE_CHECK(instance);
fsinstance_inode_t* inode;
if (*path == '/' || *path == '\\') {
inode = fsinstance_getinode(instance, instance->fslog_device, ROOTINO); // TODO: Make all this configurable
} else {
inode = fsinstance_inode_copyref(myproc()->cwd); // TODO: This shouldn't be here... It should be passed in
}
char* pathiterator = path;
do {
pathiterator = fsinstance_scanfilename(instance, pathiterator, filenamevar);
if (!pathiterator) {
goto finished;
}
fsinstance_inode_lockandload(inode);
if (inode->type != T_DIR) {
fsinstance_inode_unlockandunget(inode);
return NULL;
}
// If searching for the file's parent directory, stop when there is
// no more path left without looking up the filename.
if (*pathiterator == 0 && getparent) {
fsinstance_inode_unlock(inode);
return inode;
}
fsinstance_inode_t* subnode = fsinstance_inode_lookup(inode, filenamevar, NULL);
if (subnode == NULL) {
fsinstance_inode_unlockandunget(inode);
return NULL;
}
fsinstance_inode_unlockandunget(inode);
inode = subnode;
} while(1);
finished:
// If we reached here and we're looking for a parent directory then
// there mustn't be one.
if (getparent) {
fsinstance_inode_unget(inode);
return NULL;
}
// Otherwise we're just looking for whatever is at the end of the path
return inode;
}
// Simple path lookup of a path's filename within a parent directory.
// The filename variable needs to point to at least FSFORMAT_NAMESIZE
// bytes of reserved memory. Any file lookup needs to happen within a
// transaction.
fsinstance_inode_t* fsinstance_lookupparent(fsinstance_t* instance, char* path, char* filenamevar) {
return fsinstance_lookuppath(instance, path, 1, filenamevar);
}
// Simple path lookup, returning the inode matching the path or NULL if
// not found. Any file lookup needs to happen within a transaction.
fsinstance_inode_t* fsinstance_lookup(fsinstance_t* instance, char* path) {
char scratchvar[FSFORMAT_NAMESIZE_NEW]; // Uses the new size as this is larger
return fsinstance_lookuppath(instance, path, 0, scratchvar);
}
// Saves the log header to disk, this is used internally to set the disk
// into a state where the transaction is finishable.
void fsinstance_savetransaction(fsinstance_t* instance) {
// First read in the structure.
diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, instance->fslog_device, instance->fslog_start);
fsinstance_logheader_t* header = (void*) buffer->data;
// Then change the values to our in-memory copy
int total = instance->fslog_header.number;
int count = 0;
while (count < total) {
header->blocks[count] = instance->fslog_header.blocks[count];
count++;
}
header->number = total;
// Then save the copy to disk
diskio_buffer_write(buffer);
diskio_buffer_release(buffer);
}
// Loads the log header from disk, this is used internally to reload the
// structure at startup.
void fsinstance_loadtransaction(fsinstance_t* instance) {
// First read in the log header
diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, instance->fslog_device, instance->fslog_start);
fsinstance_logheader_t* header = (void*) buffer->data;
// Then copy the structure values into our in-memory copy
int total = header->number;
int count = 0;
while (count < total) {
instance->fslog_header.blocks[count] = header->blocks[count];
count++;
}
instance->fslog_header.number = total;
// Then release the buffer.
diskio_buffer_release(buffer);
printf("fsinstance_loadtransaction: got %d block references\n", total);
}
// Called internally to save the cached blocks included in this
// transaction.
void fsinstance_savecache(fsinstance_t* instance) {
int total = instance->fslog_header.number;
int count = 0;
while (count < total) {
// Get the block we want logged.
diskio_buffer_t* src = diskio_buffer_read(instance->cache, instance->fslog_device, instance->fslog_header.blocks[count]);
// Then get the block in the log we'll be storing it to (note, the
diskio_buffer_t* dst = diskio_buffer_read(instance->cache, instance->fslog_device, instance->fslog_start + 1 + count);
memcpy(dst->data, src->data, BSIZE); // TODO: Make block size configurable
// Save the block data into the log.
diskio_buffer_write(dst);
diskio_buffer_release(src);
diskio_buffer_release(dst);
count++;
}
}
// Called internally to copy the logged blocks to their final locations.
// This is mostly an inverse of fsinstance_savecache.
void fsinstance_applytransactionblocks(fsinstance_t* instance, int rebootmode) {
int total = instance->fslog_header.number;
int count = 0;
while (count < total) {
// Get the block we want logged.
diskio_buffer_t* src = diskio_buffer_read(instance->cache, instance->fslog_device, instance->fslog_start + 1 + count);
// Then get the block in the log we'll be storing it to (note, the
diskio_buffer_t* dst = diskio_buffer_read(instance->cache, instance->fslog_device, instance->fslog_header.blocks[count]);
memcpy(dst->data, src->data, BSIZE); // TODO: Make block size configurable
// Save the block data into the log.
diskio_buffer_write(dst);
if (!rebootmode) {
diskio_buffer_dereference(dst);
}
diskio_buffer_release(src);
diskio_buffer_release(dst);
count++;
}
}
// Called internally to reset the on-disk transaction to a "zero blocks"
// state.
void fsinstance_resettransaction(fsinstance_t* instance) {
// Just reset the log count and save the transaction header.
instance->fslog_header.number = 0;
fsinstance_savetransaction(instance);
}
// Called at startup time or whenever else the disk is mounted to check
// and apply any partially-applied writes, i.e. an "after-reboot check".
void fsinstance_rebootcheck(fsinstance_t* instance) {
// This will load and perform any transaction left on the device,
// which will only be left in the event that a transaction was partly
// completed, therefore applying the whole transaction's blocks will
// complete the transaction and return the filesystem to a consistent
// state.
// In the typical case (when the filesystem is already consistent),
// there will be nothing in the log to commit.
fsinstance_loadtransaction(instance);
fsinstance_applytransactionblocks(instance, 1); // 1 == reboot check
// Reset the log's block count to zero and save.
fsinstance_resettransaction(instance);
}
// Called internally to perform a "commit" of the current transaction
// using the other internal functions. This will have no effect in cases
// where no actual data has been modified in this transaction.
void fsinstance_innercommit(fsinstance_t* instance) {
if (instance->fslog_header.number == 0) {
return;
}
// First the block data of the transaction is written to the reserved
// log blocks.
fsinstance_savecache(instance);
// Then the transaction header itself is written, meaning that as long
// as this part is either written or not written the filesystem will
// be left in a state before or after the transaction was committed.
fsinstance_savetransaction(instance);
// Then attempt to copy the blocks to their intended final locations,
// which may fail and be retried by the reboot-check if the system
// loses power or reboots during copying.
fsinstance_applytransactionblocks(instance, 0); // 0 = this call isn't the reboot check
// Reset the log's block count to zero and save.
fsinstance_resettransaction(instance);
}
// This is like the diskio_buffer_write function except that it just
// adds the block to the log to be committed later.
void fsinstance_writelogged(fsinstance_t* instance, diskio_buffer_t* buffer) {
acquire(&instance->fslog_lock);
if (instance->fslog_header.number >= instance->fslog_size || instance->fslog_header.number >= FSINSTANCE_MAXLOGBLOCKS) {
panic("fsinstance_writelogged: The filesystem is not configured for transactions this large");
}
if (instance->fslog_outstanding <= 0) {
panic("fsinstance_writelogged: Caller attempted to write without being inside of a transaction's begin/end sequence");
}
int blockindex = 0;
while (blockindex < instance->fslog_header.number) {
if (instance->fslog_header.blocks[blockindex] == buffer->blocknumber) {
goto logged;
}
blockindex++;
}
instance->fslog_header.blocks[blockindex] = buffer->blocknumber;
if (blockindex == instance->fslog_header.number) {
diskio_buffer_reference(buffer); // It's now "referenced" by the log
instance->fslog_header.number++;
}
logged:
release(&instance->fslog_lock);
}
// Called at the beginning of a filesystem transaction, waits until the
// filesystem is ready to accept more transactions and then returns with
// a new transaction marked as being in progress.
void fsinstance_begin(fsinstance_t* instance) {
acquire(&instance->fslog_lock);
do {
if (instance->fslog_committing || instance->fslog_header.number + ((instance->fslog_outstanding + 1) * FSINSTANCE_MAXBLOCKSPEROP) > instance->fslog_size) {
// Adding another operation might overflow the log, so sleep until
// it's committed. This is also the case if another transaction
// set is already committing.
sleep(&instance->fslog_sleepvariable, &instance->fslog_lock);
} else {
instance->fslog_outstanding++;
release(&instance->fslog_lock);
goto ready;
}
} while(1);
ready:
return;
}
// Called at the end of a filesystem transaction. Commits the whole set
// of transactions if this is the last one in progress.
void fsinstance_end(fsinstance_t* instance) {
acquire(&instance->fslog_lock);
if (instance->fslog_outstanding < 1) {
panic("fsinstance_end: No transaction is in progress, need to call fsinstance_begin first!");
} else if (instance->fslog_committing) {
panic("fsinstance_end: This transaction set is already being committed, likely a multiprocessing bug");
}
instance->fslog_outstanding--;
int reallycommit = 0;
if (instance->fslog_outstanding == 0) {
reallycommit = 1;
instance->fslog_committing = 1;
} else {
// Wake up any processes waiting in fsinstance_begin.
sched_wake(&instance->fslog_sleepvariable);
}
// Release the log before any commit may be attempted, since that may
// otherwise cause us to sleep with a lock held!
release(&instance->fslog_lock);
if (reallycommit) {
fsinstance_innercommit(instance);
acquire(&instance->fslog_lock);
instance->fslog_committing = 0;
sched_wake(&instance->fslog_sleepvariable);
release(&instance->fslog_lock);
}
}
// This is called internally to initialise/"reboot" the filesystem
// transaction logging system. This will initialise the logging-related
// variables and call fsinstance_rebootcheck to perform any unfinished
// transactions to leave the filesystem structures in a consistent state
void fsinstance_inittransactions(fsinstance_t* instance, unsigned int device, int blocksize) {
if (blocksize <= sizeof(fsinstance_logheader_t)) {
panic("fsinstance_inittransactions: Block size given by the caller is too small for the log header");
}
initlock(&instance->fslog_lock, "fslog_lock");
instance->fslog_start = instance->superblock->firstlogblock;
instance->fslog_size = instance->superblock->logblocks;
instance->fslog_device = device;
// This does most of the actual work of "rebooting" the filesystem, by
// finishing any partly-completed transactions to restore filesystem
// structures to a consistent state.
fsinstance_rebootcheck(instance);
}