// NEW CODE #include "fsinstance.h" #include "types.h" #include "param.h" #include "memlayout.h" #include "riscv.h" #include "sched.h" #include "defs.h" #include "fs.h" #include "stat.h" #include "proc.h" #include "kprintf.h" #include "../mkfs/fsformat.h" void* memcpy(void*, void*, long); fsinstance_t* fsinstance_alloc() { fsinstance_t* result = kalloc(); if (result) { result->superblock = kalloc(); if (!result->superblock) { kfree(result); return NULL; } result->itbl = kalloc(); if (!result->itbl) { kfree(result->superblock); kfree(result); return NULL; } initlock(&result->itbl_spin, "itbl_spin"); for (int i = 0; i < NINODE; i++) { result->itbl[i] = kalloc(); if (!result->itbl[i]) { printf("Failed to preallocate inode\n"); while (--i > 0) { kfree(result->itbl[i]); } kfree(result->superblock); kfree(result->itbl); kfree(result); return NULL; } memset(result->itbl[i], 0, 4096); initsleeplock(&(result->itbl[i]->lock), "inode"); } memset(result->superblock, 0, sizeof(fsformat_superblock_t)); return result; } else { return NULL; } } void fsinstance_loadsuperblock(fsinstance_t* instance, unsigned int device) { diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, device, 1); memcpy(instance->superblock, buffer->data, sizeof(fsformat_superblock_t)); diskio_buffer_release(buffer); } void* fsinstance_init(fsinstance_t* instance, unsigned int device) { FSINSTANCE_CHECK(instance); fsinstance_loadsuperblock(instance, device); if (instance->superblock->magic == FSFORMAT_MAGIC_OLD) { // If the magic number is the old one that means fs is "version 0". instance->fsversion = 0; fsinstance_inittransactions(instance, device, BSIZE); // TODO: Make block size fully configurable. // TODO: Add magic number for new filesystem and code to track version etc. } else if (instance->superblock->magic == FSFORMAT_MAGIC_NEW) { // If the magic number is the new one that means the filesystem's // version number is >= 1 and some extended fields of the superblock // are available. instance->fsversion = instance->superblock->extd_versionflags & 0xFF; // TODO: Check block size etc. fsinstance_inittransactions(instance, device, instance->superblock->extd_blocksize); // TODO: Make block size fully configurable. // TODO: Add magic number for new filesystem and code to track version etc. } else { panic("fsinstance_init: Bad filesystem (superblock magic number doesn't match any known options)"); } return instance->superblock; } void fsinstance_resetblocktozero(fsinstance_t* instance, unsigned int device, unsigned int blocknumber) { diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, device, blocknumber); memset(buffer->data, 0, BSIZE); // TODO: Configurable block size fsinstance_writelogged(instance, buffer); diskio_buffer_release(buffer); } #define FSINSTANCE_BITMAPBLOCK(n) \ (instance->superblock->firstbitmapblock + ((n)/bitsperblock)) // Attemptes to allocate a block, clearing the block to zeroes and // returning it's block number on success or printing a warning and // returning 0 if out of space. unsigned int fsinstance_allocdatablock(fsinstance_t* instance, unsigned int device) { FSINSTANCE_CHECK(instance); int bitsperblock = BSIZE*8; // TODO: Configurable block size for (int blocknumber = 0; blocknumber < instance->superblock->totalblocks; blocknumber += bitsperblock) { diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, device, FSINSTANCE_BITMAPBLOCK(blocknumber)); for (int bitindex = 0; bitindex < bitsperblock && (blocknumber + bitindex) < instance->superblock->totalblocks; bitindex++) { int mask = 1 << (bitindex & 7); // bitindex % 8 int byteindex = bitindex >> 3; // bitindex / 8 if ((buffer->data[byteindex] & mask) == 0) { // The block is marked as free so we should mark it as used. buffer->data[byteindex] |= mask; fsinstance_writelogged(instance, buffer); // Then we can release the buffer since we'll be returning soon. diskio_buffer_release(buffer); // Now we have the final block number, zero it and return. unsigned int finalnumber = blocknumber + bitindex; fsinstance_resetblocktozero(instance, device, finalnumber); return finalnumber; } } diskio_buffer_release(buffer); } printf("fsinstance_allocdatablock: No more free blocks, filesystem is full!\n"); return 0; } // Counts the free blocks (NOTE: This should be run inside a transaction // for consistency, although it doesn't modify any blocks). Returns the // number of blocks marked free for data or directory allocation. unsigned int fsinstance_countfreeblocks(fsinstance_t* instance, unsigned int device) { FSINSTANCE_CHECK(instance); int bitsperblock = BSIZE*8; // TODO: Configurable block size unsigned int count = 0; for (int blocknumber = 0; blocknumber < instance->superblock->totalblocks; blocknumber += bitsperblock) { diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, device, FSINSTANCE_BITMAPBLOCK(blocknumber)); for (int bitindex = 0; bitindex < bitsperblock && (blocknumber + bitindex) < instance->superblock->totalblocks; bitindex++) { int mask = 1 << (bitindex & 7); // bitindex % 8 int byteindex = bitindex >> 3; // bitindex / 8 if ((buffer->data[byteindex] & mask) == 0) { // The block is marked as free so we simply imcrement the count count++; } } diskio_buffer_release(buffer); } return count; } // Marks a data block as free in the used/free bitmap (this assumes that // any reference to it will have been reset separately). void fsinstance_freedatablock(fsinstance_t* instance, unsigned int device, unsigned int blocknumber) { FSINSTANCE_CHECK(instance); int bitsperblock = BSIZE*8; // TODO: Configurable block size diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, device, FSINSTANCE_BITMAPBLOCK(blocknumber)); unsigned int bitindex = blocknumber % bitsperblock; unsigned int byteindex = bitindex >> 3; // bitindex / 8 int mask = 1 << (bitindex & 7); // bitindex % 8 if ((buffer->data[byteindex] & mask) == 0) { panic("fsinstance_freedatablock: Caller attempted to free a block which is already marked free!"); } buffer->data[byteindex] &= ~mask; // And it with every bit EXCEPT the one we want to clear fsinstance_writelogged(instance, buffer); diskio_buffer_release(buffer); } // TODO: Configurable block size #define FSINSTANCE_INODEBLOCK(n) \ (((n) / IPB) + instance->superblock->firstinodeblock) // Looks up the inode number on the filesystem and returns the in-memory // inode structure with reference count adjusted for the caller, without // locking or loading the inode. fsinstance_inode_t* fsinstance_getinode(fsinstance_t* instance, unsigned int device, unsigned int inodenumber) { FSINSTANCE_CHECK(instance); acquire(&instance->itbl_spin); fsinstance_inode_t* emptyinode = NULL; for (int inodeindex = 0; inodeindex < NINODE; inodeindex++) { // TODO: Flexible size of in-memory inode table fsinstance_inode_t* inode = instance->itbl[inodeindex]; if (inode->referencecount > 0 && inode->inodenumber == inodenumber && inode->device == device) { // NOTE: Checks for device are probably unnecessary unless backends are combined for some reason // This inode already has an in-memory reference, so just return it inode->referencecount = inode->referencecount + 1; release(&instance->itbl_spin); // After unlocking the inode table. return inode; } if (inode->referencecount == 0 && emptyinode == NULL) { // Save the empty inode until we've checked them all for a match. emptyinode = inode; } } // If no match was found, we need to (hopefully) fill an empty inode. if (emptyinode == NULL) { panic("fsinstance_getinode: the in-memory inode table is exhausted, future versions should have better handling for this case!\n"); // TODO: Better handling! } emptyinode->instance = instance; emptyinode->device = device; emptyinode->inodenumber = inodenumber; emptyinode->referencecount = 1; emptyinode->valid = 0; // Mark this as having not been loaded yet! // Be sure to unlock the inode table before returning. release(&instance->itbl_spin); return emptyinode; } // Allocates an inode, marking it with the given type. Returns the // allocated inode (without locking it) on success or prints a warning // and returns NULL on failure. fsinstance_inode_t* fsinstance_allocinode(fsinstance_t* instance, unsigned int device, short inodetype) { FSINSTANCE_CHECK(instance); for (int inodenumber = 1; inodenumber < instance->superblock->inodelimit; inodenumber++) { // First load the block containing the inode diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, device, FSINSTANCE_INODEBLOCK(inodenumber)); // Then get the inode offset within the block. fsformat_inode_t* diskinode = ((fsformat_inode_t*)&(buffer->data[0])) + (inodenumber % IPB); // TODO: Configurable block size if (diskinode->type == 0) { // This inode is marked unused, so mark it with the type & save it memset(diskinode, 0, sizeof(fsformat_inode_t)); // Be sure to zero diskinode->type = inodetype; fsinstance_writelogged(instance, buffer); // Now we can release the buffer and return the new inode. diskio_buffer_release(buffer); return fsinstance_getinode(instance, device, inodenumber); } diskio_buffer_release(buffer); } printf("fsinstance_allocinode: No more inodes, filesystem inode table is full!\n"); return NULL; } // Increases the reference count of an inode, locking on the inode table // and returning the inode pointer to simulate a copy operation. fsinstance_inode_t* fsinstance_inode_copyref(fsinstance_inode_t* inode) { acquire(&inode->instance->itbl_spin); inode->referencecount = inode->referencecount + 1; release(&inode->instance->itbl_spin); return inode; } // Saves an inode to disk. This should be called after modifying any // inode field that's saved to disk, and must be called while the caller // holds the inode's lock. void fsinstance_inode_save(fsinstance_inode_t* inode) { fsinstance_t* instance = inode->instance; FSINSTANCE_CHECK(instance); // Begin by loading the block with the on-disk inode diskio_buffer_t* buffer = diskio_buffer_read(inode->instance->cache, inode->device, FSINSTANCE_INODEBLOCK(inode->inodenumber)); // Get the inode offset within the block. fsformat_inode_t* diskinode = ((fsformat_inode_t*)&(buffer->data[0])) + (inode->inodenumber % IPB); // TODO: Configurable block size // Update the fields of the on-disk inode structure diskinode->type = inode->type; diskinode->device_major = inode->major; diskinode->device_minor = inode->minor; diskinode->linkcount = inode->nlink; diskinode->totalbytes = inode->size; // Copy the direct+indirect addresses memcpy(diskinode->addrs, inode->addrs, sizeof(unsigned int)*(NDIRECT + 1)); // Save it to disk (or at least to the transaction log!) fsinstance_writelogged(inode->instance, buffer); // Finally release the buffer. diskio_buffer_release(buffer); } // Unlocks an inode but does not perform any other actions like saving. void fsinstance_inode_unlock(fsinstance_inode_t* inode) { if (inode == NULL) { panic("fsinstance_inode_unlock: inode argument is NULL"); } else if (!holdingsleep(&inode->lock)) { panic("fsinstance_inode_unlock: inode argument is not locked"); } else if (inode->referencecount < 1) { panic("fsinstance_inode_unlock: inode argument reference count is below 1"); } releasesleep(&inode->lock); } // Locks the inode, and reads it into memory for the first time if it // hasn't been loaded yet. void fsinstance_inode_lockandload(fsinstance_inode_t* inode) { fsinstance_t* instance = inode->instance; FSINSTANCE_CHECK(instance); if (inode == NULL) { panic("fsinstance_inode_lockandload: inode argument is NULL"); } else if (inode->referencecount < 1) { panic("fsinstance_inode_lockandload: inode argument reference count is below 1"); } acquiresleep(&inode->lock); // Load the inode from disk but only if it hasn't already been loaded. if (!inode->valid) { diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, inode->device, FSINSTANCE_INODEBLOCK(inode->inodenumber)); // Get the inode offset within the block. fsformat_inode_t* diskinode = ((fsformat_inode_t*)&(buffer->data[0])) + (inode->inodenumber % IPB); // TODO: Configurable block size inode->type = diskinode->type; inode->major = diskinode->device_major; inode->minor = diskinode->device_minor; inode->nlink = diskinode->linkcount; inode->size = diskinode->totalbytes; memcpy(inode->addrs, diskinode->addrs, sizeof(unsigned int) * (NDIRECT + 1)); diskio_buffer_release(buffer); inode->valid = 1; if (inode->type == 0) { panic("fsinstance_inode_lockandload: inode type is zero, filesystem is corrupt or caller is buggy"); } } } void fsinstance_deleteindirect(fsinstance_t* instance, unsigned int blocknumber) { diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, instance->fslog_device, blocknumber); unsigned int* addrs = (void*) buffer->data; for (int indirectindex = 0; indirectindex < NINDIRECT; indirectindex++) { // TODO: Make block size configurable unsigned int innerblock = addrs[indirectindex]; if (innerblock != 0) { fsinstance_freedatablock(instance, instance->fslog_device, innerblock); } // NOTE: addrs[indirectindex] doesn't need to be reset because // we'll be freeing the whole table! } diskio_buffer_release(buffer); // Free the page holding the list of indirect pages: fsinstance_freedatablock(instance, instance->fslog_device, blocknumber); } // Deletes the entire contents of an inode but does not perform any // other action such as deleting the inode (it's size will become 0). // The inode must be locked by the caller. void fsinstance_inode_deletecontents(fsinstance_inode_t* inode) { fsinstance_t* instance = inode->instance; FSINSTANCE_CHECK(instance); // We can begin by setting the size to zero, since we won't be relying // on that to check which pages to free inode->size = 0; // Do the hard part first, check for indirect blocks. These are stored // as a block full of block addresses, with it's own address stored at // the end of the list of "direct" block addresses. unsigned int indirectpage = inode->addrs[NDIRECT]; // Value at end of array if (indirectpage != 0) { // Reset the reference from the inode because we'll free the table. inode->addrs[NDIRECT] = 0; diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, inode->device, indirectpage); unsigned int* addrs = (void*) buffer->data; for (int indirectindex = 0; indirectindex < NINDIRECT; indirectindex++) { // TODO: Make block size configurable unsigned int blocknumber = addrs[indirectindex]; if (blocknumber != 0) { if (instance->fsversion == 0) { // Simply delete the referenced block fsinstance_freedatablock(instance, inode->device, blocknumber); } else { // Do a deep delete of a block-of-blockrefs fsinstance_deleteindirect(instance, blocknumber); } } // NOTE: addrs[indirectindex] doesn't need to be reset because // we'll be freeing the whole table! } diskio_buffer_release(buffer); // Free the page holding the list of indirect pages: fsinstance_freedatablock(instance, inode->device, indirectpage); } // Delete the "direct" blocks, these are simple block addresses. for (int directindex = 0; directindex < NDIRECT; directindex++) { if (inode->addrs[directindex]) { fsinstance_freedatablock(instance, inode->device, inode->addrs[directindex]); inode->addrs[directindex] = 0; } } // Finally save the inode. fsinstance_inode_save(inode); } // The inverse of a get operation, and is also responsible for // deleting inodes which have been "unlinked". This must be called from // within a transaction. void fsinstance_inode_unget(fsinstance_inode_t* inode) { fsinstance_t* instance = inode->instance; acquire(&instance->itbl_spin); if (inode->valid && inode->nlink == 0 && inode->referencecount == 1) { // Since referencecount is 1 we are the only user of the inode so // this lock should instantly belong to us. acquiresleep(&inode->lock); // Release the itable lock while deleting. release(&instance->itbl_spin); // Delete any data (or dirent) blocks associated with this inode // before deleting the rest of the structure. fsinstance_inode_deletecontents(inode); inode->type = 0; fsinstance_inode_save(inode); inode->valid = 0; // Mark it as not being a loaded inode releasesleep(&inode->lock); acquire(&instance->itbl_spin); } // Finally decrement the reference count and unlock the inode table. inode->referencecount = inode->referencecount - 1; release(&instance->itbl_spin); } void fsinstance_inode_unlockandunget(fsinstance_inode_t* inode) { fsinstance_inode_unlock(inode); fsinstance_inode_unget(inode); } // Copy information for a "stat" system call to a (kernel-mode or // filesystem-mode) buffer. void fsinstance_inode_getstatinfo(fsinstance_inode_t* inode, struct stat* output) { output->type = inode->type; output->size = inode->size; output->dev = inode->device; output->ino = inode->inodenumber; output->nlink = inode->nlink; } // Returns the actual block number of the given logical block of a file, // allocating an associated block if it doesn't exist. unsigned int fsinstance_inode_getactualblock(fsinstance_inode_t* inode, unsigned int blocknumber) { fsinstance_t* instance = inode->instance; if (blocknumber < NDIRECT) { unsigned int a = inode->addrs[blocknumber]; if (a == 0) { a = fsinstance_allocdatablock(instance, inode->device); inode->addrs[blocknumber] = a; } return a; } unsigned int indirectindex = blocknumber - NDIRECT; unsigned int tableblock; if (instance->fsversion == 0) { tableblock = inode->addrs[NDIRECT]; if (tableblock == 0) { tableblock = fsinstance_allocdatablock(instance, inode->device); inode->addrs[NDIRECT] = tableblock; if (tableblock == 0) { return 0; } } } else { unsigned int tableotables = inode->addrs[instance->superblock->extd_directblocks]; if (tableotables == 0) { tableotables = fsinstance_allocdatablock(instance, inode->device); inode->addrs[instance->superblock->extd_directblocks] = tableotables; if (tableotables == 0) { return 0; } } //printf("using tableotables at %d\n", tableotables); diskio_buffer_t* ttbuffer = diskio_buffer_read(instance->cache, inode->device, tableotables); unsigned int* tt = (void*) ttbuffer->data; tableblock = tt[indirectindex / (instance->superblock->extd_blocksize / 4)]; if (tableblock == 0) { tableblock = fsinstance_allocdatablock(instance, inode->device); tt[indirectindex / (instance->superblock->extd_blocksize / 4)] = tableblock; if (tableblock == 0) { return 0; } fsinstance_writelogged(instance, ttbuffer); } diskio_buffer_release(ttbuffer); indirectindex %= (instance->superblock->extd_blocksize / 4); } //printf("using tableblock at %d\n", tableblock); if (indirectindex >= NINDIRECT) { panic("fsinstance_inode_getactualblock: Block index is out of range"); } diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, inode->device, tableblock); unsigned int* table = (void*) buffer->data; unsigned int finaladdr = table[indirectindex]; if (finaladdr == 0) { finaladdr = fsinstance_allocdatablock(instance, inode->device); table[indirectindex] = finaladdr; fsinstance_writelogged(instance, buffer); } diskio_buffer_release(buffer); //printf("using finaladdr at %d\n", finaladdr); return finaladdr; } #define FSINSTANCE_MIN(x,y) \ (((x) >= (y)) ? (y) : (x)) #define FSINSTANCE_MAX(x,y) \ (((x) >= (y)) ? (x) : (y)) // Reads from an inode's data into memory. The memory can be either in // user-land or kernel-land, with isuserland set to 1 in the case of // reading to a user-land buffer. The caller is expected to have locked // the inode. int fsinstance_inode_read(fsinstance_inode_t* inode, int isuserland, unsigned long long address, unsigned int offset, unsigned int size) { fsinstance_t* instance = inode->instance; if (offset > inode->size || offset + size < offset) { // Check for overflows as well as reading from beyond the end of file return 0; // No bytes read. } unsigned int realsize; if (offset + size > inode->size) { realsize = inode->size - offset; } else { realsize = size; } unsigned int i = 0; while (i < realsize) { unsigned int blocknumber = fsinstance_inode_getactualblock(inode, offset / BSIZE); // TODO: Make block size configurable if (!blocknumber) { return -1; } diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, inode->device, blocknumber); unsigned int nread = FSINSTANCE_MIN(realsize - i, BSIZE - offset%BSIZE); if (either_copyout(isuserland, address, buffer->data + offset % BSIZE, nread) == -1) { diskio_buffer_release(buffer); return -1; } diskio_buffer_release(buffer); i += nread; address += nread; offset += nread; } return i; } // Writes to an inode's data from memory. The address can be either // in user-land or kernel-land, with isuserland set to 1 in the case of // reading to a user-land buffer. The caller is expected to have locked // the inode. int fsinstance_inode_write(fsinstance_inode_t* inode, int isuserland, unsigned long long address, unsigned int offset, unsigned int size) { fsinstance_t* instance = inode->instance; if (offset > inode->size || offset + size < offset) { return -1; } if (offset + size > MAXFILE * BSIZE) { // TODO: Adjustable limits. return -1; } unsigned int i = 0; while (i < size) { unsigned int blocknumber = fsinstance_inode_getactualblock(inode, offset / BSIZE); // TODO: Make block size configurable if (blocknumber == 0) { goto savetodisk; } diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, inode->device, blocknumber); unsigned int nwritten = FSINSTANCE_MIN(size - i, BSIZE - offset % BSIZE); if (either_copyin(buffer->data + offset % BSIZE, isuserland, address, nwritten) == -1) { diskio_buffer_release(buffer); goto savetodisk; } fsinstance_writelogged(inode->instance, buffer); diskio_buffer_release(buffer); i += nwritten; address += nwritten; offset += nwritten; } savetodisk: inode->size = FSINSTANCE_MAX(offset, inode->size); // Save the inode information to disk, especially as size and block // addresses may have changed (NOTE: the data itself will have already // been written to the associated blocks but they won't be linked in // to the file properly on disk until it's saved). fsinstance_inode_save(inode); return i; } int fsinstance_nameseq(fsinstance_t* instance, const char* namea, const char* nameb) { return (strncmp(namea, nameb, instance->fsversion == 0 ? FSFORMAT_NAMESIZE_OLD : FSFORMAT_NAMESIZE_NEW) == 0) ? 1 : 0; // TODO: Compare longer names... } fsinstance_inode_t* fsinstance_inode_lookup(fsinstance_inode_t* directory, char* filename, unsigned int* entryoffsetvar) { // Begin by checking that the directory inode and filesystem instance // are valid, as these checks may be especially essential when // searching through the filesystem! fsinstance_t* instance = directory->instance; FSINSTANCE_CHECK(instance); if (directory->type != T_DIR) { // TODO: Replace these constants panic("fsinstance_inode_lookup: Bad inode type, expecting directory but inode is not a directory"); } // Convenience variables for directory size & entry size calculations. // dirsize divided by entrysize should give the number of entries. unsigned int dirsize = directory->size; unsigned int entrysize = (unsigned int) (instance->fsversion == 0 ? sizeof(fsformat_dirent_v0_t) : sizeof(fsformat_dirent_v1_t)); // Then traverse through, reading 1 entry at a time until either // finding a match or getting to the end of the directory. for (unsigned int offset = 0; offset < dirsize; offset += entrysize) { if (instance->fsversion == 0) { fsformat_dirent_v0_t entry; int r = fsinstance_inode_read(directory, 0, (unsigned long long) &entry, offset, entrysize); if (r != entrysize) { // TODO: Replace panics within the filesystem with some kind of // unmount-when-corrupted system. panic("fsinstance_inode_lookup: Read failed internally"); } // If found set the offset variable (if it isn't NULL), get the // file's inode and return it. if (fsinstance_nameseq(instance, filename, entry.filename)) { if (entryoffsetvar != NULL) { *entryoffsetvar = offset; } return fsinstance_getinode(instance, directory->device, entry.inodenumber); } } else { fsformat_dirent_v1_t entry; int r = fsinstance_inode_read(directory, 0, (unsigned long long) &entry, offset, entrysize); if (r != entrysize) { // TODO: Replace panics within the filesystem with some kind of // unmount-when-corrupted system. panic("fsinstance_inode_lookup: Read failed internally"); } // If found set the offset variable (if it isn't NULL), get the // file's inode and return it. if (fsinstance_nameseq(instance, filename, entry.filename)) { if (entryoffsetvar != NULL) { *entryoffsetvar = offset; } return fsinstance_getinode(instance, directory->device, entry.datainode); } } } // If the search finished without returning that means there was no // match, so just return NULL. return NULL; } // Checks if a directory has a given filename in it, returning 1 if // there is a match and 0 otherwise. int fsinstance_inode_hasentry(fsinstance_inode_t* directory, char* filename) { fsinstance_inode_t* inode = fsinstance_inode_lookup(directory, filename, NULL); if (inode == NULL) { return 0; } else { fsinstance_inode_unget(inode); return 1; } } // Attempts to insert a new directory entry into the given directory, // returning 0 if successful or -1 otherwise. This function int fsinstance_inode_insert(fsinstance_inode_t* directory, char* filename, unsigned int inodenumber) { fsinstance_t* instance = directory->instance; FSINSTANCE_CHECK(instance); if (fsinstance_inode_hasentry(directory, filename)) { return -1; } // Convenience variables for directory size & entry size calculations. // dirsize divided by entrysize should give the number of entries. unsigned int dirsize = directory->size; unsigned int entrysize = (unsigned int) (instance->fsversion == 0 ? sizeof(fsformat_dirent_v0_t) : sizeof(fsformat_dirent_v1_t)); unsigned int offset = 0; while (offset < dirsize) { if (instance->fsversion == 0) { fsformat_dirent_v0_t entry; int r = fsinstance_inode_read(directory, 0, (unsigned long long) &entry, offset, entrysize); if (r != entrysize) { // TODO: Replace panics within the filesystem with some kind of // unmount-when-corrupted system. panic("fsinstance_inode_insert: Read failed internally"); } if (!entry.inodenumber) { goto foundentry; } } else { fsformat_dirent_v1_t entry; int r = fsinstance_inode_read(directory, 0, (unsigned long long) &entry, offset, entrysize); if (r != entrysize) { // TODO: Replace panics within the filesystem with some kind of // unmount-when-corrupted system. panic("fsinstance_inode_insert: Read failed internally"); } if (!entry.datainode) { goto foundentry; } } offset += entrysize; } // When the code reaches foundentry:, it will have either found a free // entry or searched to the end of the directory. Either way we can // now simply write the directory entry to that offset and it will be // added! foundentry: if (instance->fsversion == 0) { fsformat_dirent_v0_t newentry; strncpy(newentry.filename, filename, FSFORMAT_NAMESIZE_OLD); // TODO: Support for longer names newentry.inodenumber = inodenumber; if (fsinstance_inode_write(directory, 0, (unsigned long long) &newentry, offset, entrysize) != entrysize) { return -1; } } else { fsformat_dirent_v1_t newentry; strncpy(newentry.filename, filename, FSFORMAT_NAMESIZE_NEW); // TODO: Support for longer names newentry.datainode = inodenumber; newentry.metainode = 0xFFFFFFFF; if (fsinstance_inode_write(directory, 0, (unsigned long long) &newentry, offset, entrysize) != entrysize) { return -1; } } // If we got to the end then return 0 to indicate success. return 0; } #define FSINSTANCE_SKIPSLASHES(iter) \ while (*iter == '/' || *iter == '\\') { \ iter++; \ } #define FSINSTANCE_SKIPFILENAME(iter) \ while (*iter != 0 && *iter != '/' && *iter != '\\') { \ iter++; \ } // Scan to the next part of the path, reading the first part into // filenamevar (which must have FSFORMAT_NAMESIZE reserved bytes). // Returns the path iterator at the start of the next filename/dirname // or NULL to indicate end of string. char* fsinstance_scanfilename(fsinstance_t* instance, char* pathiterator, char* filenamevar) { FSINSTANCE_SKIPSLASHES(pathiterator); char* namestart = pathiterator; if (*namestart == 0) { return 0; // End of path } FSINSTANCE_SKIPFILENAME(pathiterator); int namelen = pathiterator - namestart; memcpy(filenamevar, namestart, FSINSTANCE_MIN(namelen, FSFORMAT_NAMESIZE_OLD)); // TODO: Support for longer filenames if (namelen < FSFORMAT_NAMESIZE_OLD) { filenamevar[namelen] = 0; } FSINSTANCE_SKIPSLASHES(pathiterator); // This is probably unnecessary? return pathiterator; } // The internal path lookup function (used by the simple lookups). The // filenamevar must point to an array of FSINSTANCE_NAMESIZE bytes which // will be used as a scratch variable as well as to look up the filename // within the parent directory (if getparent is non-zero). Any file // lookup needs to happen within a transaction. fsinstance_inode_t* fsinstance_lookuppath(fsinstance_t* instance, char* path, int getparent, char* filenamevar) { FSINSTANCE_CHECK(instance); fsinstance_inode_t* inode; if (*path == '/' || *path == '\\') { inode = fsinstance_getinode(instance, instance->fslog_device, ROOTINO); // TODO: Make all this configurable } else { inode = fsinstance_inode_copyref(myproc()->cwd); // TODO: This shouldn't be here... It should be passed in } char* pathiterator = path; do { pathiterator = fsinstance_scanfilename(instance, pathiterator, filenamevar); if (!pathiterator) { goto finished; } fsinstance_inode_lockandload(inode); if (inode->type != T_DIR) { fsinstance_inode_unlockandunget(inode); return NULL; } // If searching for the file's parent directory, stop when there is // no more path left without looking up the filename. if (*pathiterator == 0 && getparent) { fsinstance_inode_unlock(inode); return inode; } fsinstance_inode_t* subnode = fsinstance_inode_lookup(inode, filenamevar, NULL); if (subnode == NULL) { fsinstance_inode_unlockandunget(inode); return NULL; } fsinstance_inode_unlockandunget(inode); inode = subnode; } while(1); finished: // If we reached here and we're looking for a parent directory then // there mustn't be one. if (getparent) { fsinstance_inode_unget(inode); return NULL; } // Otherwise we're just looking for whatever is at the end of the path return inode; } // Simple path lookup of a path's filename within a parent directory. // The filename variable needs to point to at least FSFORMAT_NAMESIZE // bytes of reserved memory. Any file lookup needs to happen within a // transaction. fsinstance_inode_t* fsinstance_lookupparent(fsinstance_t* instance, char* path, char* filenamevar) { return fsinstance_lookuppath(instance, path, 1, filenamevar); } // Simple path lookup, returning the inode matching the path or NULL if // not found. Any file lookup needs to happen within a transaction. fsinstance_inode_t* fsinstance_lookup(fsinstance_t* instance, char* path) { char scratchvar[FSFORMAT_NAMESIZE_NEW]; // Uses the new size as this is larger return fsinstance_lookuppath(instance, path, 0, scratchvar); } // Saves the log header to disk, this is used internally to set the disk // into a state where the transaction is finishable. void fsinstance_savetransaction(fsinstance_t* instance) { // First read in the structure. diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, instance->fslog_device, instance->fslog_start); fsinstance_logheader_t* header = (void*) buffer->data; // Then change the values to our in-memory copy int total = instance->fslog_header.number; int count = 0; while (count < total) { header->blocks[count] = instance->fslog_header.blocks[count]; count++; } header->number = total; // Then save the copy to disk diskio_buffer_write(buffer); diskio_buffer_release(buffer); } // Loads the log header from disk, this is used internally to reload the // structure at startup. void fsinstance_loadtransaction(fsinstance_t* instance) { // First read in the log header diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, instance->fslog_device, instance->fslog_start); fsinstance_logheader_t* header = (void*) buffer->data; // Then copy the structure values into our in-memory copy int total = header->number; int count = 0; while (count < total) { instance->fslog_header.blocks[count] = header->blocks[count]; count++; } instance->fslog_header.number = total; // Then release the buffer. diskio_buffer_release(buffer); printf("fsinstance_loadtransaction: got %d block references\n", total); } // Called internally to save the cached blocks included in this // transaction. void fsinstance_savecache(fsinstance_t* instance) { int total = instance->fslog_header.number; int count = 0; while (count < total) { // Get the block we want logged. diskio_buffer_t* src = diskio_buffer_read(instance->cache, instance->fslog_device, instance->fslog_header.blocks[count]); // Then get the block in the log we'll be storing it to (note, the diskio_buffer_t* dst = diskio_buffer_read(instance->cache, instance->fslog_device, instance->fslog_start + 1 + count); memcpy(dst->data, src->data, BSIZE); // TODO: Make block size configurable // Save the block data into the log. diskio_buffer_write(dst); diskio_buffer_release(src); diskio_buffer_release(dst); count++; } } // Called internally to copy the logged blocks to their final locations. // This is mostly an inverse of fsinstance_savecache. void fsinstance_applytransactionblocks(fsinstance_t* instance, int rebootmode) { int total = instance->fslog_header.number; int count = 0; while (count < total) { // Get the block we want logged. diskio_buffer_t* src = diskio_buffer_read(instance->cache, instance->fslog_device, instance->fslog_start + 1 + count); // Then get the block in the log we'll be storing it to (note, the diskio_buffer_t* dst = diskio_buffer_read(instance->cache, instance->fslog_device, instance->fslog_header.blocks[count]); memcpy(dst->data, src->data, BSIZE); // TODO: Make block size configurable // Save the block data into the log. diskio_buffer_write(dst); if (!rebootmode) { diskio_buffer_dereference(dst); } diskio_buffer_release(src); diskio_buffer_release(dst); count++; } } // Called internally to reset the on-disk transaction to a "zero blocks" // state. void fsinstance_resettransaction(fsinstance_t* instance) { // Just reset the log count and save the transaction header. instance->fslog_header.number = 0; fsinstance_savetransaction(instance); } // Called at startup time or whenever else the disk is mounted to check // and apply any partially-applied writes, i.e. an "after-reboot check". void fsinstance_rebootcheck(fsinstance_t* instance) { // This will load and perform any transaction left on the device, // which will only be left in the event that a transaction was partly // completed, therefore applying the whole transaction's blocks will // complete the transaction and return the filesystem to a consistent // state. // In the typical case (when the filesystem is already consistent), // there will be nothing in the log to commit. fsinstance_loadtransaction(instance); fsinstance_applytransactionblocks(instance, 1); // 1 == reboot check // Reset the log's block count to zero and save. fsinstance_resettransaction(instance); } // Called internally to perform a "commit" of the current transaction // using the other internal functions. This will have no effect in cases // where no actual data has been modified in this transaction. void fsinstance_innercommit(fsinstance_t* instance) { if (instance->fslog_header.number == 0) { return; } // First the block data of the transaction is written to the reserved // log blocks. fsinstance_savecache(instance); // Then the transaction header itself is written, meaning that as long // as this part is either written or not written the filesystem will // be left in a state before or after the transaction was committed. fsinstance_savetransaction(instance); // Then attempt to copy the blocks to their intended final locations, // which may fail and be retried by the reboot-check if the system // loses power or reboots during copying. fsinstance_applytransactionblocks(instance, 0); // 0 = this call isn't the reboot check // Reset the log's block count to zero and save. fsinstance_resettransaction(instance); } // This is like the diskio_buffer_write function except that it just // adds the block to the log to be committed later. void fsinstance_writelogged(fsinstance_t* instance, diskio_buffer_t* buffer) { acquire(&instance->fslog_lock); if (instance->fslog_header.number >= instance->fslog_size || instance->fslog_header.number >= FSINSTANCE_MAXLOGBLOCKS) { panic("fsinstance_writelogged: The filesystem is not configured for transactions this large"); } if (instance->fslog_outstanding <= 0) { panic("fsinstance_writelogged: Caller attempted to write without being inside of a transaction's begin/end sequence"); } int blockindex = 0; while (blockindex < instance->fslog_header.number) { if (instance->fslog_header.blocks[blockindex] == buffer->blocknumber) { goto logged; } blockindex++; } instance->fslog_header.blocks[blockindex] = buffer->blocknumber; if (blockindex == instance->fslog_header.number) { diskio_buffer_reference(buffer); // It's now "referenced" by the log instance->fslog_header.number++; } logged: release(&instance->fslog_lock); } // Called at the beginning of a filesystem transaction, waits until the // filesystem is ready to accept more transactions and then returns with // a new transaction marked as being in progress. void fsinstance_begin(fsinstance_t* instance) { acquire(&instance->fslog_lock); do { if (instance->fslog_committing || instance->fslog_header.number + ((instance->fslog_outstanding + 1) * FSINSTANCE_MAXBLOCKSPEROP) > instance->fslog_size) { // Adding another operation might overflow the log, so sleep until // it's committed. This is also the case if another transaction // set is already committing. sleep(&instance->fslog_sleepvariable, &instance->fslog_lock); } else { instance->fslog_outstanding++; release(&instance->fslog_lock); goto ready; } } while(1); ready: return; } // Called at the end of a filesystem transaction. Commits the whole set // of transactions if this is the last one in progress. void fsinstance_end(fsinstance_t* instance) { acquire(&instance->fslog_lock); if (instance->fslog_outstanding < 1) { panic("fsinstance_end: No transaction is in progress, need to call fsinstance_begin first!"); } else if (instance->fslog_committing) { panic("fsinstance_end: This transaction set is already being committed, likely a multiprocessing bug"); } instance->fslog_outstanding--; int reallycommit = 0; if (instance->fslog_outstanding == 0) { reallycommit = 1; instance->fslog_committing = 1; } else { // Wake up any processes waiting in fsinstance_begin. sched_wake(&instance->fslog_sleepvariable); } // Release the log before any commit may be attempted, since that may // otherwise cause us to sleep with a lock held! release(&instance->fslog_lock); if (reallycommit) { fsinstance_innercommit(instance); acquire(&instance->fslog_lock); instance->fslog_committing = 0; sched_wake(&instance->fslog_sleepvariable); release(&instance->fslog_lock); } } // This is called internally to initialise/"reboot" the filesystem // transaction logging system. This will initialise the logging-related // variables and call fsinstance_rebootcheck to perform any unfinished // transactions to leave the filesystem structures in a consistent state void fsinstance_inittransactions(fsinstance_t* instance, unsigned int device, int blocksize) { if (blocksize <= sizeof(fsinstance_logheader_t)) { panic("fsinstance_inittransactions: Block size given by the caller is too small for the log header"); } initlock(&instance->fslog_lock, "fslog_lock"); instance->fslog_start = instance->superblock->firstlogblock; instance->fslog_size = instance->superblock->logblocks; instance->fslog_device = device; // This does most of the actual work of "rebooting" the filesystem, by // finishing any partly-completed transactions to restore filesystem // structures to a consistent state. fsinstance_rebootcheck(instance); }