diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..5593f74 --- /dev/null +++ b/LICENSE @@ -0,0 +1,25 @@ +Files marked NEW CODE can be attributed just to Zak's copyright with the same license, other files will either have their own information or are to be taken under the full license including the xv6 copyright statement: + +Copyright (c) 2024, 2025 Zak Yani Star Fenton +Copyright (c) 2006-2024 Frans Kaashoek, Robert Morris, Russ Cox, + Massachusetts Institute of Technology + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + diff --git a/asmnew.S b/asmnew.S new file mode 100644 index 0000000..c025e0c --- /dev/null +++ b/asmnew.S @@ -0,0 +1,79 @@ +// This is NEW CODE for saving/restoring FPU registers and possibly other activities + +.text +.option norvc + +.globl fpu_save +fpu_save: + fsd f0, 8(a0) + fsd f1, 16(a0) + fsd f2, 24(a0) + fsd f3, 32(a0) + fsd f4, 40(a0) + fsd f5, 48(a0) + fsd f6, 56(a0) + fsd f7, 64(a0) + fsd f8, 72(a0) + fsd f9, 80(a0) + fsd f10, 88(a0) + fsd f11, 96(a0) + fsd f12, 104(a0) + fsd f13, 112(a0) + fsd f14, 120(a0) + fsd f15, 128(a0) + fsd f16, 136(a0) + fsd f17, 144(a0) + fsd f18, 152(a0) + fsd f19, 160(a0) + fsd f20, 168(a0) + fsd f21, 176(a0) + fsd f22, 184(a0) + fsd f23, 192(a0) + fsd f24, 200(a0) + fsd f25, 208(a0) + fsd f26, 216(a0) + fsd f27, 224(a0) + fsd f28, 232(a0) + fsd f29, 240(a0) + fsd f30, 248(a0) + fsd f31, 256(a0) + frcsr t0 // Get float control register + sd t0, 0(a0) // Save the control register bits + ret + +.globl fpu_restore +fpu_restore: + fld f0, 8(a0) + fld f1, 16(a0) + fld f2, 24(a0) + fld f4, 40(a0) + fld f5, 48(a0) + fld f6, 56(a0) + fld f7, 64(a0) + fld f8, 72(a0) + fld f9, 80(a0) + fld f10, 88(a0) + fld f11, 96(a0) + fld f12, 104(a0) + fld f13, 112(a0) + fld f14, 120(a0) + fld f15, 128(a0) + fld f16, 136(a0) + fld f17, 144(a0) + fld f18, 152(a0) + fld f19, 160(a0) + fld f20, 168(a0) + fld f21, 176(a0) + fld f22, 184(a0) + fld f23, 192(a0) + fld f24, 200(a0) + fld f25, 208(a0) + fld f26, 216(a0) + fld f27, 224(a0) + fld f28, 232(a0) + fld f29, 240(a0) + fld f30, 248(a0) + ld t0, 0(a0) // Load the float control register bits + fscsr t0 // Set float control register + ret + diff --git a/bitarray.c b/bitarray.c new file mode 100644 index 0000000..bdf7434 --- /dev/null +++ b/bitarray.c @@ -0,0 +1,216 @@ +/* NEW CODE for optimised bit array. + * Copyright (C) 2024 Zak Fenton + * NO WARRANTY USE AT YOUR OWN RISK etc. under terms of UNLICENSE or MIT license + */ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "sched.h" +#include "riscv.h" +#include "proc.h" +#include "bitarray.h" +#include "defs.h" +#include "kprintf.h" + +struct bitarray* bitarrayalloc(int nbits) { + int i; + if (nbits > 64*64) { + panic("argument to bitarrayalloc is too large"); + } + if (sizeof(struct bitarray) >= PGSIZE) { + panic("bitarray structure too large to fit in a page"); + } + struct bitarray* result = kalloc(); + result->size = nbits; + if (result == (void*)0ULL) { + panic("failed to allocate bitarray structure"); + } + initlock(&(result->lock), "bitarray"); + for (i = 0; i < 64; i++) { + result->data[i] = 0ULL; + } + return result; +} + +int bitarray_get(struct bitarray* a, int index) { + if (index < 0 || index >= a->size) { + return 0; + } + acquire(&(a->lock)); + uint64 x = a->data[index>>6]; + if ((x & (1ULL << (index & 63))) != 0) { + //printf("Bit at %p # %d is ON\n", a, index); + release(&(a->lock)); + return 1; + } + //printf("Bit at %p # %d is OFF\n", a, index); + release(&(a->lock)); + return 0; +} + +int bitarray_getnolock(struct bitarray* a, int index) { + if (index < 0 || index >= a->size) { + return 0; + } + uint64 x = a->data[index>>6]; + if ((x & (1ULL << (index & 63))) != 0) { + //printf("Bit at %p # %d is ON\n", a, index); + return 1; + } + //printf("Bit at %p # %d is OFF\n", a, index); + return 0; +} + +/*#ifdef _ZCC +int __naked bitarraylsb(uint64 x) __asm { + ctz a1, a1 + ret +// addi a2, zero, 0 +// addi a4, zero, 1 +// .lsbloop: +// and a3, a1, a4 +// bne a3, zero, .lsbend +// srli a1, a1, 1 +// addi a2, a2, 1 +// j .lsbloop +// .lsbend: +// addi a1, a2, 0 +// ret +} +int __naked bitarraylsb2(uint64 i, uint64 x) __asm { + srl a3, a2, a1 + ctz a4, a3 + add a1, a1, a4 + ret + addi a4, zero, 1 + addi a6, zero, 64 + .lsb2loop: + srl a3, a2, a1 + and a5, a4, a1 + bne a5, zero, .lsb2end + addi a1, a1, 1 + beq a1, a6, .lsb2nope + j .lsb2loop + .lsb2nope: + addi a1, zero, -1 + .lsb2end: + ret +} +#else*/ +int bitarraylsb(uint64 x) { + int i; + for (i = 0; i < 64; i++) { + if (x & (1ULL << i)) { + return i; + } + } + return -1; +} +int bitarraylsb2(int starti, uint64 x) { + for (; starti < 64; starti++) { + if (x & (1ULL << starti)) { + return starti; + } + } + return -1; +} +//#endif + +int bitarraynextnzw(uint64* w, int start, int lim) { + int i; + for (i = start; i < lim; i++) { + if (w[i]) { + return i; + } + } + return -1; +} + +void bitarray_setnolock(struct bitarray* a, int index, int val) { + //printf("Setting %p # %d to %s\n", a, index, val ? "ON" : "OFF"); + if (index < 0 || index >= a->size) { + panic("invalid index argument to bitarray_setnolock"); + } + uint64 x = a->data[index>>6]; + if (val) { + a->data[index>>6] = x | (1ULL << (index & 63)); + } else { + a->data[index>>6] = x & ~(1ULL << (index & 63)); + } +} + +int bitarray_findlowest(struct bitarray* a, int startidx) { + //printf("Finding lowest set bit in %p from # %d\n", a, startidx); + if (startidx < 0 || startidx >= a->size) { + return -1; + //startidx = 0; + } + acquire(&(a->lock)); + int i; + //uartputc_sync('a'); + i = bitarraylsb2(startidx & 63, a->data[startidx>>6]); + if (i < 0) { + i = ((startidx >> 6) + 1) << 6; + //for (i = startidx; (i & 63) != 0 && i < a->size; i++) { + /*if (bitarray_getnolock(a, i)) { + release(&(a->lock)); + return i; + }*/ + //} + } else { + //uartputc_sync('A'); + release(&(a->lock)); + //printf("Got %d (path 1)\n", i + ((startidx >> 6) << 6)); + return i + ((startidx >> 6) << 6); + } + //uartputc_sync('b'); + for (; i < a->size; i+=64) { + uint64 x = a->data[i>>6]; + if (x) { + release(&(a->lock)); + //printf("Got %d (path 2)\n", i+bitarraylsb(x)); + return i+bitarraylsb(x); + } + } + //uartputc_sync('c'); + release(&(a->lock)); + return -1; +} + +int bitarray_poplowest(struct bitarray* a, int startidx) { + if (startidx < 0 || startidx >= a->size) { + startidx = 0; + } + acquire(&(a->lock)); + int i; + for (i = startidx; i < a->size; i++) { + if ((i & 63) == 0) { + uint64 x = a->data[i>>6]; + if (x == 0ULL) { + i += 63; + } + } + if (bitarray_getnolock(a, i)) { + bitarray_setnolock(a, i, 0); + release(&(a->lock)); + return i; + } + } + release(&(a->lock)); + return -1; +} + +void bitarray_set(struct bitarray* a, int index, int val) { + //printf("Setting %p # %d to %s\n", a, index, val ? "ON" : "OFF"); + if (index < 0 || index >= a->size) { + panic("invalid index argument to bitarray_set"); + } + acquire(&(a->lock)); + uint64 x = a->data[index>>6]; + if (val) { + a->data[index>>6] = x | (1ULL << (index & 63)); + } else { + a->data[index>>6] = x & ~(1ULL << (index & 63)); + } + release(&(a->lock)); +} diff --git a/bitarray.h b/bitarray.h new file mode 100644 index 0000000..f277f96 --- /dev/null +++ b/bitarray.h @@ -0,0 +1,27 @@ +/* NEW CODE for optimised bit array. + * Copyright (C) 2024 Zak Fenton + * NO WARRANTY USE AT YOUR OWN RISK etc. under terms of UNLICENSE or MIT license + */ + +#ifndef _BITARRAY_H +#define _BITARRAY_H + +struct bitarray { + sched_spinlock_t lock; + int size; + int pad; + uint64 data[64]; +}; + +struct bitarray* bitarrayalloc(int nbits); + +int bitarray_get(struct bitarray* a, int index); +int bitarray_getnolock(struct bitarray* a, int index); +int bitarray_findlowest(struct bitarray* a, int startidx); +int bitarray_poplowest(struct bitarray* a, int startidx); +void bitarray_set(struct bitarray* a, int index, int val); + + +/* From ifndef at top of file: */ +#endif + diff --git a/cc_stdarg.h b/cc_stdarg.h new file mode 100644 index 0000000..1e20c53 --- /dev/null +++ b/cc_stdarg.h @@ -0,0 +1,37 @@ +// This is NEW CODE for auditing purposes, but should actually be replaced with libc/include/stdarg.h +#ifndef _FAKELIBC_STDARG_H +#define _FAKELIBC_STDARG_H + +// TODO: This will basically not work except for the simplest printf-like cases + +struct va_list_struct {long long** ptr;}; +typedef struct va_list_struct va_list; +//typedef int va_list; + +#define _VA_CHECK() \ + if (__builtin_func_callconv != 101) {\ + printf("ERROR: Unpacking varargs currently only works with __classic_call (#101). Function %s uses convention %d instead.\n", __func__, __builtin_func_callconv);\ + } + +#define va_start(list,lastarg) \ + do {\ + _VA_CHECK();\ + list.ptr = &lastarg;\ + list.ptr++;\ + } while(0) + +#define va_arg(list,T) \ + (T)(*list.ptr++) + +#define va_end(list) \ + do {list.ptr = (void*)0;} while(0) + + +/* +#define va_start(...) printf("WARNING: Unimplemented: va_start in %s\n", __func__) +#define va_arg(...) printf("WARNING: Unimplemented: va_start in %s\n", __func__) +#define va_end(...) printf("WARNING: Unimplemented: va_end in %s\n", __func__) +*/ + +/* From ifndef at top of file: */ +#endif diff --git a/console.c b/console.c new file mode 100644 index 0000000..b9d5224 --- /dev/null +++ b/console.c @@ -0,0 +1,187 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +// +// Console input and output, to the uart. +// Reads are line at a time. +// Implements special input characters: +// newline -- end of line +// control-h -- backspace +// control-u -- kill line +// control-d -- end of file +// control-p -- print process list +// + +#include "types.h" +#include "param.h" +#include "sched.h" +#include "fs.h" +#include "file.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" +#include "proc.h" + +#define BACKSPACE 0x100 +#define C(x) ((x)-'@') // Control-x + +// +// send one character to the uart. +// called by printf(), and to echo input characters, +// but not from write(). +// +void +consputc(int c) +{ + if(c == BACKSPACE){ + // if the user typed backspace, overwrite with a space. + uartputc_sync('\b'); uartputc_sync(' '); uartputc_sync('\b'); + } else { + uartputc_sync(c); + } +} + +sched_spinlock_t conslock; + + // input +#define INPUT_BUF_SIZE 128 + char consbuf[INPUT_BUF_SIZE]; + uint consr; // Read index + uint consw; // Write index + uint conse; // Edit index + +// +// user write()s to the console go here. +// +int +consolewrite(int user_src, uint64 src, int n) +{ + int i; + + for(i = 0; i < n; i++){ + char c; + if(either_copyin(&c, user_src, src+i, 1) == -1) + break; + uartputc(c); + } + + return i; +} + +// +// user read()s from the console go here. +// copy (up to) a whole input line to dst. +// user_dist indicates whether dst is a user +// or kernel address. +// +int +consoleread(int user_dst, uint64 dst, int n) +{ + uint target; + int c; + char cbuf; + + target = n; + acquire(&conslock); + while(n > 0){ + // wait until interrupt handler has put some + // input into cons.buffer. + while(consr == consw){ + if(killed(myproc())){ + release(&conslock); + return -1; + } + sleep(&consr, &conslock); + } + + c = consbuf[consr++ % INPUT_BUF_SIZE]; + + if(c == C('D')){ // end-of-file + if(n < target){ + // Save ^D for next time, to make sure + // caller gets a 0-byte result. + consr--; + } + break; + } + + // copy the input byte to the user-space buffer. + cbuf = c; + if(either_copyout(user_dst, dst, &cbuf, 1) == -1) + break; + + dst++; + --n; + + if(c == '\n'){ + // a whole line has arrived, return to + // the user-level read(). + break; + } + } + release(&conslock); + + return target - n; +} + +// +// the console input interrupt handler. +// uartintr() calls this for input character. +// do erase/kill processing, append to cons.buf, +// wake up consoleread() if a whole line has arrived. +// +void +consoleintr(int c) +{ + acquire(&conslock); + + switch(c){ + case C('P'): // Print process list. + sched_dumpstatus(); + break; + case C('U'): // Kill line. + while(conse != consw && + consbuf[(conse-1) % INPUT_BUF_SIZE] != '\n'){ + conse--; + consputc(BACKSPACE); + } + break; + case C('H'): // Backspace + case '\x7f': // Delete key + if(conse != consw){ + conse--; + consputc(BACKSPACE); + } + break; + default: + if(c != 0 && conse-consr < INPUT_BUF_SIZE){ + c = (c == '\r') ? '\n' : c; + + // echo back to the user. + consputc(c); + + // store for consumption by consoleread(). + consbuf[conse++ % INPUT_BUF_SIZE] = c; + + if(c == '\n' || c == C('D') || conse-consr == INPUT_BUF_SIZE){ + // wake up consoleread() if a whole line (or end-of-file) + // has arrived. + consw = conse; + sched_wake(&consr); + } + } + break; + } + + release(&conslock); +} +void +consoleinit(void) +{ + initlock(&conslock, "cons"); + + uartinit(); + + // connect read and write system calls + // to consoleread and consolewrite. + devsw[CONSOLE].read = &consoleread; + devsw[CONSOLE].write = &consolewrite; +} diff --git a/defs.h b/defs.h new file mode 100644 index 0000000..b14534f --- /dev/null +++ b/defs.h @@ -0,0 +1,190 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +#include "sched.h" +#include "fsinstance.h" +#include "mkfs/fsformat.h" +#include "diskio.h" +struct context; +struct file; +struct pipe; +struct proc; +struct stat; +struct fsinstance; + +#ifdef _ZCC +void __sync_synchronize(); +#endif + +void _entry(); // Only invoked at boot, indicates start of kernel image + +// bio.c +void binit(void); +diskio_buffer_t* bread(uint, uint); +void diskio_buffer_release(diskio_buffer_t*); +void diskio_buffer_write(diskio_buffer_t*); +void diskio_buffer_reference(diskio_buffer_t*); +void diskio_buffer_dereference(diskio_buffer_t*); + +// console.c +void consoleinit(void); +void consoleintr(int); +void consputc(int); + +// exec.c +int execve(char*, char**, char**); + +// file.c +struct file* filealloc(void); +void fileclose(struct file*); +struct file* filedup(struct file*); +void fileinit(void); +int fileread(struct file*, uint64, int n); +int filestat(struct file*, uint64 addr); +int filewrite(struct file*, uint64, int n); + +// fs.c +void* fsinit(struct fsinstance*, unsigned int); +int namecmp(const char*, const char*); + +// ramdisk.c +void ramdiskinit(void); +void ramdiskintr(void); +void ramdiskrw(diskio_buffer_t*); + +// physpg.c +#define PHYSPG_FREERAM 1001 +#define PHYSPG_METADATA 1002 +#define PHYSPG_KALLOC 1003 +void physpg_initbegin(); +void physpg_initend(); +uint64 physpg_freeram(); +uint64 physpg_totalram(); +void* physpg_alloc1(int mode); +void physpg_free1(int mode, void* physpg); +void physpg_setrange(int mode, void* start, void* end); +#define kalloc() physpg_alloc1(PHYSPG_KALLOC) +#define kfree(p) physpg_free1(PHYSPG_KALLOC,p) + +// log.c +void initlog(struct fsinstance*, int, fsformat_superblock_t*); + +// pipe.c +int pipealloc(struct file**, struct file**); +void pipeclose(struct pipe*, int); +int piperead(struct pipe*, uint64, int); +int pipewrite(struct pipe*, uint64, int); + +// proc.c +int cpuid(void); +void exit(int); +int fork(void); +int thrd(uint64 fnc, uint64 stk, uint64 arg); +int affin(uint64 mask); +int growproc(int); +void proc_mapstacks(pagetable_t); +pagetable_t proc_pagetable(struct proc *); +void proc_freepagetable(pagetable_t, uint64, int); +int kill(int); +int killed(struct proc*); +void setkilled(struct proc*); +struct proc* myproc(); +void procinit(void); +#ifdef _ZCC +void scheduler(void); +#else +void scheduler(void) __attribute__((noreturn)); +#endif +void sched(void); +void sleep(void*, sched_spinlock_t*); +void userinit(void); +int wait(uint64); +void yield(void); +int either_copyout(int user_dst, uint64 dst, void *src, uint64 len); +int either_copyin(void *dst, int user_src, uint64 src, uint64 len); +void procdump(void); + +// swtch.S +void swtch(struct context*, struct context*); + +// spinlock.c +void acquire(sched_spinlock_t*); +int holding(sched_spinlock_t*); +void initlock(sched_spinlock_t*, char*); +void release(sched_spinlock_t*); +void push_off(void); +void pop_off(void); + +// sleeplock.c +void acquiresleep(sched_sleeplock_t*); +void releasesleep(sched_sleeplock_t*); +int holdingsleep(sched_sleeplock_t*); +void initsleeplock(sched_sleeplock_t*, char*); + +// string.c +int memcmp(const void*, const void*, uint); +void* memmove(void*, const void*, uint); +void* memset(void*, int, unsigned long long); +char* safestrcpy(char*, const char*, int); +int strlen(const char*); +int strncmp(const char*, const char*, uint); +char* strncpy(char*, const char*, int); + +// syscall.c +void argint(int, int*); +int argstr(int, char*, int); +void argaddr(int, uint64 *); +int fetchstr(uint64, char*, int); +int fetchaddr(uint64, uint64*); +void syscall(); + +// trap.c +extern uint ticks; +void trapinit(void); +void trapinithart(void); +extern sched_spinlock_t tickslock; +void usertrapret(void); + +// uart.c +void uartinit(void); +void uartintr(void); +void uartputc(int); +void uartputc_sync(int); +int uartgetc(void); + +// vm.c +void kvminit(void); +void kvminithart(void); +void kvmmap(pagetable_t, uint64, uint64, uint64, int); +int mappages(pagetable_t, uint64, uint64, uint64, int); +pagetable_t uvmcreate(void); +void uvmfirst(pagetable_t, uchar *, uint); +uint64 uvmalloc(pagetable_t, uint64, uint64, int); +uint64 uvmdealloc(pagetable_t, uint64, uint64); +int uvmcopy(pagetable_t, pagetable_t, uint64); +int uvmcopyshallow(pagetable_t, pagetable_t, uint64); +void uvmfree(pagetable_t, uint64, int); +void uvmunmap(pagetable_t, uint64, uint64, int); +void uvmclear(pagetable_t, uint64); +pte_t * walk(pagetable_t, uint64, int); +uint64 walkaddr(pagetable_t, uint64); +int copyout(pagetable_t, uint64, char *, uint64); +int copyin(pagetable_t, char *, uint64, uint64); +int copyinstr(pagetable_t, char *, uint64, uint64); + +// plic.c +void plicinit(void); +void plicinithart(void); +int plic_claim(void); +void plic_complete(int); + +// virtio_disk.c +void virtio_disk_init(void); +void virtio_disk_rw(diskio_buffer_t *, int); +void virtio_disk_intr(void); + +// number of elements in fixed-size array +#define NELEM(x) (sizeof(x)/sizeof((x)[0])) + +extern char end[]; // first address after kernel. + // defined by kernel.ld. + +#define MB (1024ULL*1024ULL) diff --git a/diskio.c b/diskio.c new file mode 100644 index 0000000..287371a --- /dev/null +++ b/diskio.c @@ -0,0 +1,309 @@ +// NEW CODE replacing old buffer management code with a more flexible system + +#include "diskio.h" +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" +#include "drives.h" +#include "fsinstance.h" +#include "vmrd.h" +#include "kprintf.h" + +int strcmp(const char*, const char*); +void* memcpy(void*, void*, long); + +diskio_buffer_t* diskio_buffer_alloc(diskio_cache_t* owner, unsigned long long blocksize) { + if (blocksize > 4096ULL) { // TODO: Standardise page/block limits + panic("diskio_buffer_alloc: Bad block size!"); + } + diskio_buffer_t* result = kalloc(); + if (result) { + if (blocksize == 0ULL) { + result->data = NULL; + } else { + result->data = kalloc(); + if (!result->data) { + kfree(result); + return NULL; + } + } + + result->owner = owner; + result->referencecount = 0; + result->padding = 0; // unused + result->isvalid = 0; + result->isdisk = 0; + result->device = 0; + result->blocknumber = 0; + + result->previous = NULL; + result->next = NULL; + + initsleeplock(&result->lock, "diskio_buffer_t"); + } + return result; +} + +void diskio_buffer_free(diskio_buffer_t* buffer) { + if (buffer) { + if (buffer->data) { + kfree(buffer->data); + } + kfree(buffer->data); + } +} + +void diskio_buffer_reference(diskio_buffer_t* buffer) { + acquire(&buffer->owner->spin); + buffer->referencecount = buffer->referencecount + 1; + release(&buffer->owner->spin); +} + +void diskio_buffer_dereference(diskio_buffer_t* buffer) { + acquire(&buffer->owner->spin); + buffer->referencecount = buffer->referencecount - 1; + release(&buffer->owner->spin); +} + +// Checks that the buffer is already locked and writes it to the disk. +void diskio_buffer_write(diskio_buffer_t* buffer) { + if (!holdingsleep(&buffer->lock)) { + panic("diskio_buffer_write: Calling code is not holding the appropriate lock!"); + } + + diskio_performwrite(buffer); +} + +// Performs a logical read of a buffer, returning it in locked form with the given block. +diskio_buffer_t* diskio_buffer_read(diskio_cache_t* cache, unsigned int device, unsigned int blocknumber) { + diskio_buffer_t* buffer = diskio_buffer_get_noread(cache, device, blocknumber); + + if (!buffer->isvalid) { + diskio_performread(buffer); + buffer->isvalid = 1; + } + + return buffer; +} + +diskio_buffer_t* diskio_buffer_get_noread(diskio_cache_t* cache, unsigned int device, unsigned int blocknumber) { + acquire(&cache->spin); + + diskio_buffer_t* list = cache->leastrecentlist; + + // Look for a matching cached entry if one exists and lock it. + for (diskio_buffer_t* buffer = list->next; buffer != list; buffer = buffer->next) { + if (buffer->device == device && buffer->blocknumber == blocknumber) { + buffer->referencecount = buffer->referencecount + 1; + release(&cache->spin); + acquiresleep(&buffer->lock); + return buffer; + } + } + + // Otherwise find an empty buffer to re-allocate. + for (diskio_buffer_t* buffer = list->previous; buffer != list; buffer = buffer->previous) { + if (buffer->referencecount == 0) { + buffer->referencecount = 1; + buffer->isvalid = 0; + buffer->device = device; + buffer->blocknumber = blocknumber; + //printf("Set device to 0x%x (0x%x)\n", buffer->device, device); + + release(&cache->spin); + + acquiresleep(&buffer->lock); + + return buffer; + } + } + + panic("diskio_buffer_get_noread: TODO: Better handling when running out of buffers!"); + + return NULL; +} + +void diskio_buffer_release(diskio_buffer_t* buffer) { + diskio_cache_t* cache = buffer->owner; + + if (!holdingsleep(&buffer->lock)) { + panic("diskio_buffer_release: Calling code is not holding the appropriate lock!"); + } + + releasesleep(&buffer->lock); + + acquire(&cache->spin); + + diskio_buffer_t* list = cache->leastrecentlist; + + buffer->referencecount = buffer->referencecount - 1; + + if (buffer->referencecount == 0) { + buffer->next->previous = buffer->previous; + buffer->previous->next = buffer->next; + + buffer->next = list->next; + buffer->previous = list; + + list->next->previous = buffer; + list->next = buffer; + } + + release(&cache->spin); +} + +diskio_cache_t* diskio_cache_alloc(unsigned long long buffercount, unsigned long long blocksize) { + if (buffercount > DISKIO_CACHE_MAXBUFFERS) { + panic("diskio_cache_alloc: buffercount is out of range!"); + } + diskio_cache_t* result = kalloc(); + if (result) { + initlock(&result->spin, "diskio_cache_t"); + + result->buffercount = buffercount; + + diskio_buffer_t* listhead = diskio_buffer_alloc(result, 0); + if (listhead == NULL) { + kfree(result); + return NULL; + } + listhead->previous = listhead; + listhead->next = listhead; + result->leastrecentlist = listhead; + + for (int bufnum = 0; bufnum < buffercount; bufnum++) { + diskio_buffer_t* buffer = diskio_buffer_alloc(result, blocksize); + if (buffer == NULL) { + while (--bufnum > 0) { + diskio_buffer_free(result->buffers[bufnum]); + diskio_buffer_free(listhead); + kfree(result); + } + return NULL; + } + + buffer->next = listhead->next; + buffer->previous = listhead; + listhead->next->previous = buffer; + listhead->next = buffer; + + result->buffers[bufnum] = buffer; + } + + result->blocksize = blocksize; + } + return result; +} + +void diskio_cache_free(diskio_cache_t* cache) { + if (cache) { + // TODO: Free block cache structure + } +} + +void diskio_performread(diskio_buffer_t* buffer) { + if ((buffer->device & 0xFF00) == 0x0900) { + diskio_ramdisk_rw(buffer, 0); + } else { + if (vmrd_present()) { + vmrd_rw(buffer, 0); + } else { + virtio_disk_rw(buffer, 0); + } + } +} + +void diskio_performwrite(diskio_buffer_t* buffer) { + if ((buffer->device & 0xFF00) == 0x0900) { + diskio_ramdisk_rw(buffer, 1); + } else { + if (vmrd_present()) { + vmrd_rw(buffer, 0); + } else { + virtio_disk_rw(buffer, 1); + } + } +} + +#define RESOURCES_MAX 256 + +typedef struct resource resource_t; +struct resource { + const char* type; + const char* name; + unsigned long size; + unsigned char* data; +}; + +resource_t resources[RESOURCES_MAX]; + +int nresources = -1; + +void resources_checkinit() { + if (nresources == -1) { + printf("Initialising resources...\n"); + nresources = 0; + } +} + +void resources_register(const char* type, const char* name, unsigned char* data, unsigned long size) { + resources_checkinit(); + if (strcmp(type, "RAMDISK") == 0) { + resources[nresources].type = type; + resources[nresources].name = name; + resources[nresources].data = data; + resources[nresources].size = size; + nresources++; + } else { + printf("TODO: resources_register(\"%s\", \"%s\", %p, %d);\n", type, name, data, (int) size); + } +} + +void diskio_ramdisk_rw(diskio_buffer_t* buffer, int wr) { + resource_t* ramdisk = &resources[buffer->device & 0xFF]; + if ((buffer->blocknumber + 1)*DISKIO_BLOCK_SIZE > ramdisk->size) { + panic("diskio_ramdisk_rw: block number out of range"); + } + unsigned long offset = buffer->blocknumber * DISKIO_BLOCK_SIZE; + unsigned char* ramdiskptr = ramdisk->data + offset; + if (wr) { + memcpy(ramdiskptr, buffer->data, DISKIO_BLOCK_SIZE); + } else { + memcpy(buffer->data, ramdiskptr, DISKIO_BLOCK_SIZE); + } +} + +void diskio_mountramdisk(struct drives* drives, const char* mountname, unsigned int device) { + printf("TODO: mount ramdisk '%s' on device %x\n", mountname, device); + + printf(" diskio_cache_alloc()...\n"); + diskio_cache_t* cache = diskio_cache_alloc(NBUF, DISKIO_BLOCK_SIZE); + printf(" diskio_cache_alloc() returned %p\n", cache); + + printf(" fsinstance_alloc()...\n"); + fsinstance_t* instance = fsinstance_alloc(); + printf(" fsinstance_alloc() returned %p\n", instance); + + instance->cache = cache; + + printf(" drives_setup()...\n"); + int dn = drives_setup(drives, DRIVES_HANDLER_FS, instance, mountname); + printf(" drives_setup() returned %d\n", dn); + + printf(" fsinstance_init()...\n"); + void * fsp = fsinstance_init(instance, device); + printf(" fsinstance_init() returned %p\n", fsp); + + printf("device=%x\n", (int)((long)instance->fslog_device)); +} + +void diskio_mountallramdisks(struct drives* drives) { + for (int i = 0; i < nresources; i++) { + resource_t* r = &resources[i]; + if (!strcmp(r->type, "RAMDISK")) { + diskio_mountramdisk(drives, r->name, 0x0900 + i); + } + } +} diff --git a/diskio.h b/diskio.h new file mode 100644 index 0000000..b4c0332 --- /dev/null +++ b/diskio.h @@ -0,0 +1,83 @@ +// NEW CODE implementing a simple disk i/o multiplexing layer +#ifndef _DISKIO_H +#define _DISKIO_H + +#include "sched.h" + +// TODO: Replace BSIZE references in old code with this and/or make it flexible +#define DISKIO_BLOCK_SIZE 4096 + +typedef struct diskio_buffer diskio_buffer_t; + +typedef struct diskio_cache diskio_cache_t; + +// NOTE: This still resembles the old structure but will probably be changed +// as different options/backends/etc. are added. +struct diskio_buffer { + diskio_cache_t* owner; + unsigned int referencecount; + int padding; + int isvalid; + int isdisk; // Is this a buffer owned by the disk (rather than the fs code??) + unsigned int device; + unsigned int blocknumber; + sched_sleeplock_t lock; + // Least-recently-used list: + diskio_buffer_t* previous; + diskio_buffer_t* next; + // Block data + unsigned char* data; // TODO: Flexible buffers +}; + +// The diskio_cache structure should fit in a page, so it must be limited in +// to a few hundred buffer references for now: +#define DISKIO_CACHE_MAXBUFFERS 500 + +struct diskio_cache { + sched_spinlock_t spin; + diskio_buffer_t* leastrecentlist; + unsigned long long buffercount; + unsigned long long blocksize; + diskio_buffer_t* buffers[DISKIO_CACHE_MAXBUFFERS]; +}; + +// These are the internal allocation functions, a cache pre-allocates a number +// of buffer structures (each representing 1 disk block) then re-allocates them +// on demand from it's internal list. +diskio_buffer_t* diskio_buffer_alloc(diskio_cache_t* owner, unsigned long long blocksize); +void diskio_buffer_free(diskio_buffer_t* buffer); +diskio_cache_t* diskio_cache_alloc(unsigned long long buffercount, unsigned long long blocksize); +void diskio_cache_free(diskio_cache_t* cache); + +// These are the internal read/write functions which link to any I/O multiplexing +void diskio_performread(diskio_buffer_t* buffer); +void diskio_performwrite(diskio_buffer_t* buffer); + +// Performs a logical write of a buffered block through it's associated cache, +// checks that the buffer is already locked and writes it to the disk. +void diskio_buffer_write(diskio_buffer_t* buffer); + +// Performs a logical read of a buffered block through a cache, returning it in +// locked form with the given block. +diskio_buffer_t* diskio_buffer_read(diskio_cache_t* cache, unsigned int device, unsigned int blocknumber); + +// (Re-)allocates a buffer from the cache for a given device and block number +// combination, but does NOT attempt to read the block from disk. Returns an +// existing or newly repurposed buffer from the cache in a locked state. +diskio_buffer_t* diskio_buffer_get_noread(diskio_cache_t* cache, unsigned int device, unsigned int blocknumber); + +void diskio_buffer_release(diskio_buffer_t* buffer); + +void diskio_buffer_reference(diskio_buffer_t* buffer); +void diskio_buffer_dereference(diskio_buffer_t* buffer); + +// Used internally for reads/writes to ramdisk devices +void diskio_ramdisk_rw(diskio_buffer_t* buffer, int wr); + +struct drives; // Defined properly in drives.h +// Called to mount any available ramdisk images on the given drives structure. +void diskio_mountallramdisks(struct drives* drives); + +// From ifndef at top of file: +#endif + diff --git a/drives.c b/drives.c new file mode 100644 index 0000000..ef43a41 --- /dev/null +++ b/drives.c @@ -0,0 +1,334 @@ +/* NEW CODE implementing a simple system like "drive letters" but longer names, + * Copyright (C) 2024 Zak Fenton + * NO WARRANTY USE AT YOUR OWN RISK etc. under terms of UNLICENSE or MIT license + */ +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "sched.h" +#include "riscv.h" +#include "proc.h" +#include "drives.h" +#include "defs.h" +#include "fs.h" +#include "kprintf.h" + +#ifndef NULL +#define NULL ((void*)0ULL) +#endif + +char* strcat(char*, const char*); +void* memcpy(void*, void*, long); + + +struct drives* drives_alloc() { + int i; + if (sizeof(struct drives) >= PGSIZE) { + panic("drives structure too large to fit in a page"); + } + struct drives* result = kalloc(); + if (result == NULL) { + panic("failed to allocate drives structure"); + } + initlock(&(result->lock), "drives"); + for (i = 0; i < DRIVES_MAX; i++) { + result->entries[i].handler = DRIVES_HANDLER_NONE; + } + return result; +} + +/* Returns the length of the drive name in the given string, allowing for + * strings with more path information to be used in lookup (rather than copying + * just the drive name to a new string!). + */ +static int drives_namelen(const char* n) { + int i = 0; + if (n == NULL) { + return -1; + } + while (*n != 0 && *n != ':') { + if (i >= DRIVES_NAMEMAX || *n == '/' || *n == '\\') { + return -1; + } + n++; + i++; + } + return i; +} + +/* Like drives_namelen but checks that a drive name ending in ':' is actually + * present (returning -1 otherwise). + */ +static int drives_namelencheck(const char* n) { + int i = 0; + if (n == (void*)0ULL) { + return -1; + } + while (*n != 0) { + if (*n == ':') { + return i; + } + if (i >= DRIVES_NAMEMAX || *n == '/' || *n == '\\') { + return -1; + } + n++; + i++; + } + return -1; +} + +/* Returns 1 if the name matches the given drive number or 0 otherwise. */ +static int drives_namesmatch(struct drives* drives, int driveid, const char* name, int namelen) { + int i; + for (i = 0; i < namelen; i++) { + if (drives->entries[driveid].name[i] != name[i]) { + return 0; // Not a match, character differs + } + } + if (i == DRIVES_NAMEMAX || drives->entries[driveid].name[i] == 0) { + return 1; // Is a match, we checked until the end + } else { + return 0; // Not a match, drive name has more characters + } +} + +/* Scans the drives structure for a matching drive. Can be run without locking, + * then the structure can be locked once a candidate is found. + */ +static int drives_find(struct drives* drives, int startingat, const char* name, int namelen) { + int i; + for (i = startingat; i < DRIVES_MAX; i++) { + if (drives_namesmatch(drives, i, name, namelen) && drives->entries[i].handler != DRIVES_HANDLER_NONE) { + return i; + } + } + return -1; // Not found. +} + +int drives_setup(struct drives* drives, int handlertype, void* handlerdata, const char* name) { + acquire(&(drives->lock)); + + int nlen = drives_namelen(name); + int idx = drives_find(drives, 0, name, nlen); + if (idx >= 0) { // Already exists + release(&(drives->lock)); + return -1; + } + + for (idx = 0; idx < DRIVES_MAX; idx++) { + if (drives->entries[idx].handler == DRIVES_HANDLER_NONE) { + drives->entries[idx].nusers = 0; + drives->entries[idx].handler = handlertype; + drives->entries[idx].handlerdata = handlerdata; + int namei; + for (namei = 0; namei < nlen; namei++) { + drives->entries[idx].name[namei] = name[namei]; + } + for (; namei < DRIVES_NAMEMAX; namei++) { + drives->entries[idx].name[namei] = 0; + } + release(&(drives->lock)); + return idx; + } + } + // No free slots + release(&(drives->lock)); + return -1; +} + +int drives_open(struct drives* drives, const char* name, int* handlertype, void** handlerdata) { + acquire(&(drives->lock)); + int nlen = drives_namelen(name); + int idx = drives_find(drives, 0, name, nlen); + + if (idx < 0) { + release(&(drives->lock)); + return idx; + } + + drives->entries[idx].nusers++; + + release(&(drives->lock)); + + return idx; +} + +int drives_opensimple(struct drives* drives, int driven) { + if (drives == NULL || driven < 0 || driven > DRIVES_MAX) { + return -1; // Return -1 to indicate error + } + acquire(&(drives->lock)); + + drives->entries[driven].nusers++; + + release(&(drives->lock)); + + return driven; +} + +/* Safely decrements the user count of the given drive number, returning -1. */ +int drives_close(struct drives* drives, int driven) { + if (drives == NULL || driven < 0 || driven > DRIVES_MAX) { + return -1; // Always returns -1 (whether driven is invalid or not) + } + acquire(&(drives->lock)); + drives->entries[driven].nusers--; + release(&(drives->lock)); + return -1; // Always returns -1 +} + +/* Safely increments the user count of the given drive number, returning the number. */ +int drives_dup(struct drives* drives, int driven) { + if (driven < 0 || driven > DRIVES_MAX) { + return -1; // Returns -1 if invalid + } + acquire(&(drives->lock)); + drives->entries[driven].nusers++; + release(&(drives->lock)); + return driven; +} + +fsinstance_t* drives_fsbegin(struct drives* drives, int hint, char* path) { + int lench = drives_namelencheck(path); + //printf("from path '%s' got lencheck %d\n", path, lench); + //int nskip = lench < 0 ? 0 : lench+1; + int dn = hint; + acquire(&(drives->lock)); + if (lench >= 0) { + dn = drives_find(drives, 0, path, lench); + //printf("Found drive %d handler %d\n", dn, drives->entries[dn].handler); + } + if (dn < 0) { + release(&(drives->lock)); + return 0ULL; + } + fsinstance_t* instance = drives->entries[dn].handlerdata; + release(&drives->lock); + fsinstance_begin(instance); + return instance; +} + +void drives_fsend(struct drives* drives, fsinstance_t* instance) { + if (instance != NULL) { + fsinstance_end(instance); + // TODO: Add some more bookkeeping, i.e. begin/end should also imply an additional open/close of the relevant drive + } +} + +void* drives_fsnode(struct drives* drives, int hint, char* path, int* fstypevar) { + int lench = drives_namelencheck(path); + //printf("from path '%s' got lencheck %d\n", path, lench); + int nskip = 0; + if (lench >= 0) { + if (path[lench+1] == '/') { + nskip = lench + 1; + } else { + // Replace the ':' with a '/' to ensure we're in the drive's root + path[lench] = '/'; + nskip = lench; + } + } + int dn = hint; + acquire(&(drives->lock)); + if (lench >= 0) { + dn = drives_find(drives, 0, path, lench); + //printf("Found drive %d handler %d\n", dn, drives->entries[dn].handler); + } + if (dn < 0) { + release(&(drives->lock)); + return 0ULL; + } + if (fstypevar) { + *fstypevar = drives->entries[dn].handler; + } + if (drives->entries[dn].handler != DRIVES_HANDLER_FS) { + release(&(drives->lock)); + return 0ULL; + } + fsinstance_t* instance = drives->entries[dn].handlerdata; + release(&(drives->lock)); + void* result = fsinstance_lookup(instance, path+nskip); + return result; +} + +void* drives_fsparent(struct drives* drives, int hint, char* path, int* fstypevar, char* namevar, int namelen) { + if (namelen < FSFORMAT_NAMESIZE_NEW) { + return NULL; + } + int lench = drives_namelencheck(path); + //printf("from path '%s' got lencheck %d\n", path, lench); + int nskip = 0; + if (lench >= 0) { + if (path[lench+1] == '/') { + nskip = lench + 1; + } else { + // Replace the ':' with a '/' to ensure we're in the drive's root + path[lench] = '/'; + nskip = lench; + } + } + int dn = hint; + acquire(&(drives->lock)); + if (lench >= 0) { + dn = drives_find(drives, 0, path, lench); + //printf("Found drive %d handler %d\n", dn, drives->entries[dn].handler); + } + if (dn < 0) { + release(&(drives->lock)); + return NULL; + } + if (fstypevar) { + *fstypevar = drives->entries[dn].handler; + } + if (drives->entries[dn].handler != DRIVES_HANDLER_FS) { + release(&(drives->lock)); + return NULL; + } + fsinstance_t* instance = drives->entries[dn].handlerdata; + release(&(drives->lock)); + void* result = fsinstance_lookupparent(instance, path+nskip, namevar); + return result; +} + +int drives_getinfo(struct drives* drives, int drivenumber, struct __syscdefs_driveinfo* structure) { + if (!structure) { + return -1; + } + + // This will mark the drive as being opened but will also validate the + // drive number first so we should use it's return value as the new + // number as long as it's >= 0. + int idx = drives_opensimple(drives, drivenumber); + + // The structure is cleared to zeroes before setting or returning, + // even/especially in the case of the drive not existing! + memset(structure, 0, sizeof(struct __syscdefs_driveinfo)); + + // If the value returned by drives_opensimple is <0 then the drive + // number doesn't exist or is out of range. + if (idx < 0) { + return -1; + } + + if (drives->entries[idx].handler == DRIVES_HANDLER_FS) { + fsinstance_t* instance = drives->entries[idx].handlerdata; + memcpy(structure->name, drives->entries[idx].name, __SYSCDEFS_DRIVES_NAMEMAX); + structure->drivenumber = idx; + structure->blocksize = 4096; // TODO: Make this configurable!!! + structure->totalblocks = instance->superblock->totalblocks; + structure->freedatablocks = fsinstance_countfreeblocks(instance, instance->fslog_device); + if (instance->fsversion == 0) { + strcat(structure->fsname, "xv6-compatible (v0)"); + } else if (instance->fsversion == 1) { + strcat(structure->fsname, "Herodotus FS v1"); + } else { + strcat(structure->fsname, "Unrecognised version"); + } + } else { + return -1; + } + + drives_close(drives, idx); + + return 0; +} diff --git a/drives.h b/drives.h new file mode 100644 index 0000000..7ec9ea9 --- /dev/null +++ b/drives.h @@ -0,0 +1,76 @@ +/* NEW CODE implementing a simple system like "drive letters" but longer names, + * Copyright (C) 2024 Zak Fenton + * NO WARRANTY USE AT YOUR OWN RISK etc. under terms of UNLICENSE or MIT license + */ + +#ifndef _DRIVES_H +#define _DRIVES_H + +// The logic for managing filesystem instances themselves is separate +// to the management of the drives structure, but the drives system acts +// as a convenience wrapper over the filesystem logic. +#include "fsinstance.h" +#include "syscdefs.h" + +#define DRIVES_HANDLER_NONE 0 +#define DRIVES_HANDLER_FS 1 + +/* The maximum number of drives. */ +#define DRIVES_MAX 10 + +/* The maximum length of a drive name. */ +#define DRIVES_NAMEMAX 24 + +/* An entry for a single drive. */ +struct drives_entry { + int handler; + int nusers; + void* handlerdata; + char name[DRIVES_NAMEMAX]; +}; + +/* The drives structure. */ +struct drives { + sched_spinlock_t lock; + struct drives_entry entries[DRIVES_MAX]; +}; + +/* Allocates and initialises a new drives structure. Note that functions for + * dealing with multiple drives structures will be dealt with later! + */ +struct drives* drives_alloc(); + +/* Allocates a drive in the drives structure with the given data, returning the + * drive number on success or -1 on failure. + */ +int drives_setup(struct drives* drives, int handlertype, void* handlerdata, const char* name); + +/* Finds the corresponding drive and retrieves the fields. This should lock the + * structure while operating, incrementing nusers before releasing the lock. + * + * Returns the drive number on success or -1 on failure. + */ +int drives_open(struct drives* drives, const char* name, int* handlertype, void** handlerdata); + +/* Safely decrements the user count of the given drive number, returning -1. */ +int drives_close(struct drives* drives, int driven); + +/* Safely increments the user count of the given drive number, returning the number. */ +int drives_dup(struct drives* drives, int driven); + +/* Looks up the (filesystem-specific) inode structure for a given path, storing + * the filesystem type in a variable at the given pointer (0 on failure). + */ +void* drives_fsnode(struct drives* drives, int hint, char* path, int* fstypevar); +void* drives_fsparent(struct drives* drives, int hint, char* path, int* fstypevar, char* namevar, int namelen); + +/* Begin/end a filesystem operation, also returning the appropriate fsinstance for the hint drive and/or path. */ +fsinstance_t* drives_fsbegin(struct drives* drives, int hint, char* path); +void drives_fsend(struct drives* drives, fsinstance_t* instance); + +/* Gets information about a drive, filling a structure with information about the drive. */ +int drives_getinfo(struct drives* drives, int drivenumber, struct __syscdefs_driveinfo* structure); + +/* From ifndef at top of file: */ +#endif + diff --git a/entry.S b/entry.S new file mode 100644 index 0000000..d19710b --- /dev/null +++ b/entry.S @@ -0,0 +1,24 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) + // qemu -kernel loads the kernel at 0x80000000 + // and causes each hart (i.e. CPU) to jump there. + // kernel.ld causes the following code to + // be placed at 0x80000000. + +.option norvc +.section .text +.global _entry +_entry: + // set up a stack for C. + // stack0 is declared in start.c, + // with a 4096-byte stack per CPU. + // sp = stack0 + (hartid * 4096) + la sp, stack0 + li a0, 1024*4 + csrr a1, mhartid + addi a1, a1, 1 + mul a0, a0, a1 + add sp, sp, a0 + // jump to start() in start.c + call start +spin: + j spin diff --git a/exec.c b/exec.c new file mode 100644 index 0000000..7357b4b --- /dev/null +++ b/exec.c @@ -0,0 +1,195 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "sched.h" +#include "proc.h" +#include "defs.h" +#include "drives.h" +#include "fsinstance.h" +//#include "elf.h" +#include +#include "kprintf.h" + +static int loadseg(pde_t *, uint64, fsinstance_inode_t *, uint, uint); + +int flags2perm(int flags) +{ + int perm = 0; + if(flags & 0x1) + perm = PTE_X; + if(flags & 0x2) + perm |= PTE_W; + return perm; +} + +// TODO: This should probably also look up programs from PATH or else +// be execv instead, check manpages or standards thoroughly for correct +// behaviour/names. +int +execve(char *path, char **argv, char **envv) +{ + //printf("Attempting exec of '%s'\n", path); + char *s, *last; + int i, off; + uint64 argc, envc, sz = 0, sp, ustack[MAXARG*2+2], stackbase; + Elf64_Ehdr elf; + fsinstance_inode_t *ip; + Elf64_Phdr ph; + pagetable_t pagetable = 0, oldpagetable; + struct proc *p = myproc(); + + fsinstance_t* instance = drives_fsbegin(p->drives, p->cwdrive, path); + + int typevar; + if((ip = drives_fsnode(p->drives, p->cwdrive, path, &typevar)) == NULL || typevar != DRIVES_HANDLER_FS){ + printf("BAD EXEC\n"); + drives_fsend(p->drives, instance); + return -1; + } + fsinstance_inode_lockandload(ip); + + // Check ELF header + if(fsinstance_inode_read(ip, 0, (uint64)&elf, 0, sizeof(elf)) != sizeof(elf)) + goto bad; + + if (elf.e_ident[0] != ELFMAG0 || elf.e_ident[1] != ELFMAG1 || elf.e_ident[2] != ELFMAG2 || elf.e_ident[3] != ELFMAG3) { + goto bad; + } + + if((pagetable = proc_pagetable(p)) == 0) + goto bad; + + // Load program into memory. + for(i=0, off=elf.e_phoff; idrives, instance); + ip = 0; + + p = myproc(); + uint64 oldsz = p->sz; + + // Allocate some pages at the next page boundary. + // Make the first inaccessible as a stack guard. + // Use the rest as the user stack. + sz = PGROUNDUP(sz); + uint64 sz1; + if((sz1 = uvmalloc(pagetable, sz, sz + (USERSTACK+1)*PGSIZE, PTE_W)) == 0) + goto bad; + sz = sz1; + uvmclear(pagetable, sz-(USERSTACK+1)*PGSIZE); + sp = sz; + stackbase = sp - USERSTACK*PGSIZE; + + // Push argument strings, prepare rest of stack in ustack. + for(argc = 0; argv[argc]; argc++) { + if(argc >= MAXARG) + goto bad; + sp -= strlen(argv[argc]) + 1; + sp -= sp % 16; // riscv sp must be 16-byte aligned + if(sp < stackbase) + goto bad; + if(copyout(pagetable, sp, argv[argc], strlen(argv[argc]) + 1) < 0) + goto bad; + ustack[argc] = sp; + } + ustack[argc] = 0; + // Push env strings + for(envc = 0; envv[envc]; envc++) { + if(envc >= MAXARG) + goto bad; + sp -= strlen(envv[envc]) + 1; + sp -= sp % 16; // riscv sp must be 16-byte aligned + if(sp < stackbase) + goto bad; + if(copyout(pagetable, sp, envv[envc], strlen(envv[envc]) + 1) < 0) + goto bad; + ustack[argc+envc] = sp; + } + ustack[argc+envc] = 0; + + // push the array of argv[] pointers. + sp -= (argc+1+envc+1) * sizeof(uint64); + sp -= sp % 16; + if(sp < stackbase) + goto bad; + if(copyout(pagetable, sp, (char *)ustack, (argc+1+envc+1)*sizeof(uint64)) < 0) + goto bad; + + // arguments to user main(argc, argv) + // argc is returned via the system call return + // value, which goes in a0. + p->trapframe->a1 = sp; + p->trapframe->a2 = sp + (argc+1)*sizeof(uint64); + + // Save program name for debugging. + for(last=s=path; *s; s++) + if(*s == '/') + last = s+1; + safestrcpy(p->name, last, PROC_NAME_SIZE /*sizeof(p->name) TODO */); + + // Commit to the user image. + oldpagetable = p->pagetable; + p->pagetable = pagetable; + p->sz = sz; + p->trapframe->epc = elf.e_entry; // initial program counter = main + p->trapframe->sp = sp; // initial stack pointer + proc_freepagetable(oldpagetable, oldsz, p->mainthread == 0); + p->mainthread = 0ULL; + + return argc; // this ends up in a0, the first argument to main(argc, argv) + + bad: + printf("BAD EXEC\n"); + if(pagetable) + proc_freepagetable(pagetable, sz, 1); + if(ip){ + fsinstance_inode_unlockandunget(ip); + drives_fsend(p->drives, instance); + } + return -1; +} + +// Load a program segment into pagetable at virtual address va. +// va must be page-aligned +// and the pages from va to va+sz must already be mapped. +// Returns 0 on success, -1 on failure. +static int +loadseg(pagetable_t pagetable, uint64 va, fsinstance_inode_t *ip, uint offset, uint sz) +{ + uint i, n; + uint64 pa; + + for(i = 0; i < sz; i += PGSIZE){ + pa = walkaddr(pagetable, va + i); + if(pa == 0) + panic("loadseg: address should exist"); + if(sz - i < PGSIZE) + n = sz - i; + else + n = PGSIZE; + if(fsinstance_inode_read(ip, 0, (uint64)pa, offset+i, n) != n) + return -1; + } + + return 0; +} diff --git a/fcntl.h b/fcntl.h new file mode 100644 index 0000000..72ab5c4 --- /dev/null +++ b/fcntl.h @@ -0,0 +1,7 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +#define O_RDONLY 0x000 +#define O_WRONLY 0x001 +#define O_RDWR 0x002 +#define O_CREATE 0x200 +#define O_TRUNC 0x400 +#define O_APPEND 0x800 diff --git a/file.c b/file.c new file mode 100644 index 0000000..1fc7ef6 --- /dev/null +++ b/file.c @@ -0,0 +1,198 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +// +// Support functions for system calls that involve file descriptors. +// + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "fs.h" +#include "sched.h" +#include "file.h" +#include "stat.h" +#include "proc.h" +#include "kprintf.h" + +struct devsw devsw[NDEV]; +//struct { + sched_spinlock_t ftable_lock; + struct file ftable_file[NFILE]; +//} ftable; + +void +fileinit(void) +{ + initlock(&ftable_lock, "ftable"); +} + +// Allocate a file structure. +struct file* +filealloc(void) +{ + struct file *f; + + acquire(&ftable_lock); + for(f = ftable_file; f < ftable_file + NFILE; f++){ + if(f->ref == 0){ + f->ref = 1; + release(&ftable_lock); + return f; + } + } + release(&ftable_lock); + return 0; +} + +// Increment ref count for file f. +struct file* +filedup(struct file *f) +{ + // kqueue objects are not duplicated on a call to fork etc. + if (f->type == FD_KQUEUE) { + return NULL; + } + + acquire(&ftable_lock); + if(f->ref < 1) + panic("filedup"); + f->ref++; + release(&ftable_lock); + return f; +} + +// Close file f. (Decrement ref count, close when reaches 0.) +void +fileclose(struct file *f) +{ + struct file ff; + + acquire(&ftable_lock); + if(f->ref < 1) + panic("fileclose"); + if(--f->ref > 0){ + release(&ftable_lock); + return; + } + //ff = *f; + memmove(&ff, f, sizeof(struct file)); + f->ref = 0; + f->type = FD_NONE; + release(&ftable_lock); + + if(ff.type == FD_PIPE){ + pipeclose(ff.pipe, ff.writable); + } else if(ff.type == FD_INODE || ff.type == FD_DEVICE){ + fsinstance_t* instance = ff.ip->instance; + fsinstance_begin(instance); + fsinstance_inode_unget(ff.ip); + fsinstance_end(instance); + } +} + +// Get metadata about file f. +// addr is a user virtual address, pointing to a struct stat. +int +filestat(struct file *f, uint64 addr) +{ + struct proc *p = myproc(); + struct stat st; + + if(f->type == FD_INODE || f->type == FD_DEVICE){ + fsinstance_inode_lockandload(f->ip); + fsinstance_inode_getstatinfo(f->ip, &st); + fsinstance_inode_unlock(f->ip); + if(copyout(p->pagetable, addr, (char *)&st, sizeof(st)) < 0) + return -1; + return 0; + } + return -1; +} + +// Read from file f. +// addr is a user virtual address. +int +fileread(struct file *f, uint64 addr, int n) +{ + int (*read)(int, uint64, int); + int r = 0; + + if(f->readable == 0) + return -1; + + if(f->type == FD_PIPE){ + r = piperead(f->pipe, addr, n); + } else if(f->type == FD_DEVICE){ + if(f->major < 0 || f->major >= NDEV || !devsw[f->major].read) + return -1; + read = devsw[f->major].read; + r = read(1, addr, n); + } else if(f->type == FD_INODE){ + fsinstance_inode_lockandload(f->ip); + if((r = fsinstance_inode_read(f->ip, 1, addr, f->off, n)) > 0) + f->off += r; + fsinstance_inode_unlock(f->ip); + } else { + panic("fileread"); + } + + return r; +} + +// Write to file f. +// addr is a user virtual address. +int +filewrite(struct file *f, uint64 addr, int n) +{ + int (*write)(int, uint64, int); + int r, ret = 0; + + if(f->writable == 0) + return -1; + + if(f->type == FD_PIPE){ + ret = pipewrite(f->pipe, addr, n); + } else if(f->type == FD_DEVICE){ + if(f->major < 0 || f->major >= NDEV || !devsw[f->major].write) + return -1; + write = devsw[f->major].write; + ret = write(1, addr, n); + } else if(f->type == FD_INODE){ + // write a few blocks at a time to avoid exceeding + // the maximum log transaction size, including + // i-node, indirect block, allocation blocks, + // and 2 blocks of slop for non-aligned writes. + // this really belongs lower down, since fsinstance_inode_write() + // might be writing a device like the console. + int max = ((MAXOPBLOCKS-1-1-2) / 2) * BSIZE; + int i = 0; + while(i < n){ + int n1 = n - i; + if(n1 > max) + n1 = max; + + fsinstance_begin(f->ip->instance); + fsinstance_inode_lockandload(f->ip); + // Here is where it needs modification for append + if (f->writable == 2) { // Append + r = fsinstance_inode_write(f->ip, 1, addr + i, f->ip->size, n1); + } else if ((r = fsinstance_inode_write(f->ip, 1, addr + i, f->off, n1)) > 0) { + f->off += r; + } + fsinstance_inode_unlock(f->ip); + fsinstance_end(f->ip->instance); + + if(r != n1){ + // error from writei + break; + } + i += r; + } + ret = (i == n ? n : -1); + } else { + panic("filewrite"); + } + + return ret; +} + diff --git a/file.h b/file.h new file mode 100644 index 0000000..9ac7506 --- /dev/null +++ b/file.h @@ -0,0 +1,28 @@ +#include "fsinstance.h" + +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +struct file { + enum { FD_NONE, FD_PIPE, FD_INODE, FD_DEVICE, FD_KQUEUE } type; + int ref; // reference count + char readable; + char writable; // NOTE: This is now set to 2 for append + struct pipe *pipe; // FD_PIPE + fsinstance_inode_t *ip; // FD_INODE and FD_DEVICE + uint off; // FD_INODE + short major; // FD_DEVICE +}; + +// These cause warnings as conflicts with non-macro uses of the words +//#define major(dev) ((dev) >> 16 & 0xFFFF) +//#define minor(dev) ((dev) & 0xFFFF) +#define mkdev(m,n) ((uint)((m)<<16| (n))) + +// map major device number to device functions. +struct devsw { + int (*read)(int, uint64, int); + int (*write)(int, uint64, int); +}; + +extern struct devsw devsw[]; + +#define CONSOLE 1 diff --git a/fpu.c b/fpu.c new file mode 100644 index 0000000..d8e5609 --- /dev/null +++ b/fpu.c @@ -0,0 +1,92 @@ +// NEW CODE for setting up the floating point unit +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" +#include "sched.h" +#include "proc.h" +#include "fpu.h" +#include "kprintf.h" + +int _fpu_exists; + +int fpu_status_read() { + return (int) ((r_sstatus() >> 13) & 3); +} + +void fpu_status_write(int status) { + status = status & 3; + uint64 mask = ~(3ULL << 13); + w_sstatus((r_sstatus() & mask) | (status << 13)); +} + +void fpu_init() { + int s1 = fpu_status_read(); + printf("FPU status is %d\n", s1); + if (s1 != 0) { + printf("ERROR: FPU should be in state 0 on startup. Will not initialise FPU.\n"); + return; + } + fpu_status_write(1); + int s2 = fpu_status_read(); + printf("FPU status is %d\n", s2); + if (s2 != 1) { + printf("FPU is not present or not initialisable.\n"); + _fpu_exists = 0; + return; + } else { + _fpu_exists = 1; + printf("FPU appears to be present and now initialised!\n"); + } + + fpu_status_write(0); + int s3 = fpu_status_read(); + printf("FPU status is %d\n", s3); +} + +void fpu_setupinitial() { + if (_fpu_exists) { + fpu_status_write(3); + } +} + +int fpu_instrsizeattrap(struct proc* p) { + uint32 instr; + if (copyin(p->pagetable, (char*)&instr, p->trapframe->epc, 4) == -1) { + printf("fpu_instrtrap(): invalid instruction address at %p, killing process\n", (void*)p->trapframe->epc); + setkilled(p); + return 0; + } + if ((instr & 3) == 3) { + return 4; + } else { + return 2; + } +} + +void fpu_instrtrap(struct proc* p) { + uint32 instr; + if (copyin(p->pagetable, (char*)&instr, p->trapframe->epc, 4) == -1) { + printf("fpu_instrtrap(): invalid instruction address at %p, killing process\n", (void*)p->trapframe->epc); + setkilled(p); + return; + } + + uint32 opcode = (instr & 0x7F); + + if (fpu_status_read() == 0 && (opcode == 0x53 || opcode == 0x07 || opcode == 0x27 || opcode == 0x43 || opcode == 0x47 || opcode == 0x4B || opcode == 0x4F)) { + printf("fpu_instrtrap(): floating point instruction at %p, TODO\n", (void*)p->trapframe->epc); + if (p->fpu_saved) { + fpu_status_write(1); // Clean state but enabled + fpu_restore(&p->fpu_context); + fpu_status_write(1); // Clean state but enabled + } else { + fpu_setupinitial(); + } + p->fpu_active = 1; + } else { + printf("fpu_instrtrap(): invalid instruction %p at address %p, killing process\n", (void*)(uint64)instr, (void*)p->trapframe->epc); + setkilled(p); + } +} diff --git a/fpu.h b/fpu.h new file mode 100644 index 0000000..28cffa7 --- /dev/null +++ b/fpu.h @@ -0,0 +1,54 @@ +// NEW CODE for setting up the FPU +#ifndef FPU_H +#define FPU_H + +void fpu_setupinitial(); +//void fpu_instrtrap(struct proc* p); + +typedef struct fpu_context fpu_context_t; +typedef unsigned long long fpu_reg_t; + +struct fpu_context { + fpu_reg_t controlreg; + fpu_reg_t f0; + fpu_reg_t f1; + fpu_reg_t f2; + fpu_reg_t f3; + fpu_reg_t f4; + fpu_reg_t f5; + fpu_reg_t f6; + fpu_reg_t f7; + fpu_reg_t f8; + fpu_reg_t f9; + fpu_reg_t f10; + fpu_reg_t f11; + fpu_reg_t f12; + fpu_reg_t f13; + fpu_reg_t f14; + fpu_reg_t f15; + fpu_reg_t f16; + fpu_reg_t f17; + fpu_reg_t f18; + fpu_reg_t f19; + fpu_reg_t f20; + fpu_reg_t f21; + fpu_reg_t f22; + fpu_reg_t f23; + fpu_reg_t f24; + fpu_reg_t f25; + fpu_reg_t f26; + fpu_reg_t f27; + fpu_reg_t f28; + fpu_reg_t f29; + fpu_reg_t f30; + fpu_reg_t f31; +}; + +int fpu_status_read(); +void fpu_status_write(int status); + +// These are defined in the new asm code +void fpu_save(fpu_context_t* ctx); +void fpu_restore(fpu_context_t* ctx); + +#endif diff --git a/fs.h b/fs.h new file mode 100644 index 0000000..8fdce9f --- /dev/null +++ b/fs.h @@ -0,0 +1,36 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +// On-disk file system format. +// Both the kernel and user programs use this header file. + + +#define ROOTINO 1 // root i-number +#define BSIZE 4096 // 1024 // block size + +// Disk layout: +// [ boot block | super block | log | inode blocks | +// free bit map | data blocks] +// +// mkfs computes the super block and builds an initial file system. The +// super block describes the disk layout + +#define FSMAGIC 0x10203040 + +#define NDIRECT 12 +#define NINDIRECT (BSIZE / sizeof(uint)) +#define MAXFILE (NDIRECT + NINDIRECT) + +// Inodes per block. TODO: This stuff will gradually be replaced with calculations on configurable values. -Zak +#define IPB (BSIZE / sizeof(fsformat_inode_t)) + +// Block containing inode i +//#define IBLOCK(i, sb) ((i) / IPB + (sb)->inodestart) + +// Bitmap bits per block +#define BPB (BSIZE*8) + +// Block of free map containing bit for block b +//#define BBLOCK(b, sb) ((b)/BPB + (sb)->bmapstart) + +// Directory is a file containing a sequence of dirent structures. +#define DIRSIZ 14 + diff --git a/fsinstance.c b/fsinstance.c new file mode 100644 index 0000000..b37120c --- /dev/null +++ b/fsinstance.c @@ -0,0 +1,1121 @@ +// NEW CODE +#include "fsinstance.h" +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "sched.h" +#include "defs.h" +#include "fs.h" +#include "stat.h" +#include "proc.h" +#include "kprintf.h" +#include "../mkfs/fsformat.h" +void* memcpy(void*, void*, long); + +fsinstance_t* fsinstance_alloc() { + fsinstance_t* result = kalloc(); + if (result) { + result->superblock = kalloc(); + if (!result->superblock) { + kfree(result); + return NULL; + } + result->itbl = kalloc(); + if (!result->itbl) { + kfree(result->superblock); + kfree(result); + return NULL; + } + initlock(&result->itbl_spin, "itbl_spin"); + for (int i = 0; i < NINODE; i++) { + result->itbl[i] = kalloc(); + if (!result->itbl[i]) { + printf("Failed to preallocate inode\n"); + while (--i > 0) { + kfree(result->itbl[i]); + } + + kfree(result->superblock); + kfree(result->itbl); + kfree(result); + return NULL; + } + memset(result->itbl[i], 0, 4096); + initsleeplock(&(result->itbl[i]->lock), "inode"); + } + memset(result->superblock, 0, sizeof(fsformat_superblock_t)); + return result; + } else { + return NULL; + } +} + +void fsinstance_loadsuperblock(fsinstance_t* instance, unsigned int device) { + diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, device, 1); + memcpy(instance->superblock, buffer->data, sizeof(fsformat_superblock_t)); + diskio_buffer_release(buffer); +} + +void* fsinstance_init(fsinstance_t* instance, unsigned int device) { + FSINSTANCE_CHECK(instance); + fsinstance_loadsuperblock(instance, device); + if (instance->superblock->magic == FSFORMAT_MAGIC_OLD) { + // If the magic number is the old one that means fs is "version 0". + instance->fsversion = 0; + fsinstance_inittransactions(instance, device, BSIZE); // TODO: Make block size fully configurable. + // TODO: Add magic number for new filesystem and code to track version etc. + } else if (instance->superblock->magic == FSFORMAT_MAGIC_NEW) { + // If the magic number is the new one that means the filesystem's + // version number is >= 1 and some extended fields of the superblock + // are available. + instance->fsversion = instance->superblock->extd_versionflags & 0xFF; + // TODO: Check block size etc. + fsinstance_inittransactions(instance, device, instance->superblock->extd_blocksize); // TODO: Make block size fully configurable. + // TODO: Add magic number for new filesystem and code to track version etc. + } else { + panic("fsinstance_init: Bad filesystem (superblock magic number doesn't match any known options)"); + } + + return instance->superblock; +} + +void fsinstance_resetblocktozero(fsinstance_t* instance, unsigned int device, unsigned int blocknumber) { + diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, device, blocknumber); + memset(buffer->data, 0, BSIZE); // TODO: Configurable block size + fsinstance_writelogged(instance, buffer); + diskio_buffer_release(buffer); +} + +#define FSINSTANCE_BITMAPBLOCK(n) \ + (instance->superblock->firstbitmapblock + ((n)/bitsperblock)) + +// Attemptes to allocate a block, clearing the block to zeroes and +// returning it's block number on success or printing a warning and +// returning 0 if out of space. +unsigned int fsinstance_allocdatablock(fsinstance_t* instance, unsigned int device) { + FSINSTANCE_CHECK(instance); + + int bitsperblock = BSIZE*8; // TODO: Configurable block size + + for (int blocknumber = 0; blocknumber < instance->superblock->totalblocks; blocknumber += bitsperblock) { + diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, device, FSINSTANCE_BITMAPBLOCK(blocknumber)); + for (int bitindex = 0; bitindex < bitsperblock && (blocknumber + bitindex) < instance->superblock->totalblocks; bitindex++) { + int mask = 1 << (bitindex & 7); // bitindex % 8 + int byteindex = bitindex >> 3; // bitindex / 8 + if ((buffer->data[byteindex] & mask) == 0) { + // The block is marked as free so we should mark it as used. + buffer->data[byteindex] |= mask; + fsinstance_writelogged(instance, buffer); + // Then we can release the buffer since we'll be returning soon. + diskio_buffer_release(buffer); + // Now we have the final block number, zero it and return. + unsigned int finalnumber = blocknumber + bitindex; + fsinstance_resetblocktozero(instance, device, finalnumber); + return finalnumber; + } + } + diskio_buffer_release(buffer); + } + + printf("fsinstance_allocdatablock: No more free blocks, filesystem is full!\n"); + return 0; +} + +// Counts the free blocks (NOTE: This should be run inside a transaction +// for consistency, although it doesn't modify any blocks). Returns the +// number of blocks marked free for data or directory allocation. +unsigned int fsinstance_countfreeblocks(fsinstance_t* instance, unsigned int device) { + FSINSTANCE_CHECK(instance); + + int bitsperblock = BSIZE*8; // TODO: Configurable block size + + unsigned int count = 0; + + for (int blocknumber = 0; blocknumber < instance->superblock->totalblocks; blocknumber += bitsperblock) { + diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, device, FSINSTANCE_BITMAPBLOCK(blocknumber)); + for (int bitindex = 0; bitindex < bitsperblock && (blocknumber + bitindex) < instance->superblock->totalblocks; bitindex++) { + int mask = 1 << (bitindex & 7); // bitindex % 8 + int byteindex = bitindex >> 3; // bitindex / 8 + if ((buffer->data[byteindex] & mask) == 0) { + // The block is marked as free so we simply imcrement the count + count++; + } + } + diskio_buffer_release(buffer); + } + + return count; +} + +// Marks a data block as free in the used/free bitmap (this assumes that +// any reference to it will have been reset separately). +void fsinstance_freedatablock(fsinstance_t* instance, unsigned int device, unsigned int blocknumber) { + FSINSTANCE_CHECK(instance); + + int bitsperblock = BSIZE*8; // TODO: Configurable block size + + diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, device, FSINSTANCE_BITMAPBLOCK(blocknumber)); + unsigned int bitindex = blocknumber % bitsperblock; + unsigned int byteindex = bitindex >> 3; // bitindex / 8 + int mask = 1 << (bitindex & 7); // bitindex % 8 + if ((buffer->data[byteindex] & mask) == 0) { + panic("fsinstance_freedatablock: Caller attempted to free a block which is already marked free!"); + } + buffer->data[byteindex] &= ~mask; // And it with every bit EXCEPT the one we want to clear + fsinstance_writelogged(instance, buffer); + diskio_buffer_release(buffer); +} + +// TODO: Configurable block size +#define FSINSTANCE_INODEBLOCK(n) \ + (((n) / IPB) + instance->superblock->firstinodeblock) + +// Looks up the inode number on the filesystem and returns the in-memory +// inode structure with reference count adjusted for the caller, without +// locking or loading the inode. +fsinstance_inode_t* fsinstance_getinode(fsinstance_t* instance, unsigned int device, unsigned int inodenumber) { + FSINSTANCE_CHECK(instance); + + acquire(&instance->itbl_spin); + + fsinstance_inode_t* emptyinode = NULL; + + for (int inodeindex = 0; inodeindex < NINODE; inodeindex++) { // TODO: Flexible size of in-memory inode table + fsinstance_inode_t* inode = instance->itbl[inodeindex]; + if (inode->referencecount > 0 && inode->inodenumber == inodenumber && inode->device == device) { // NOTE: Checks for device are probably unnecessary unless backends are combined for some reason + // This inode already has an in-memory reference, so just return it + inode->referencecount = inode->referencecount + 1; + release(&instance->itbl_spin); // After unlocking the inode table. + return inode; + } + if (inode->referencecount == 0 && emptyinode == NULL) { + // Save the empty inode until we've checked them all for a match. + emptyinode = inode; + } + } + + // If no match was found, we need to (hopefully) fill an empty inode. + + if (emptyinode == NULL) { + panic("fsinstance_getinode: the in-memory inode table is exhausted, future versions should have better handling for this case!\n"); // TODO: Better handling! + } + + emptyinode->instance = instance; + emptyinode->device = device; + emptyinode->inodenumber = inodenumber; + emptyinode->referencecount = 1; + emptyinode->valid = 0; // Mark this as having not been loaded yet! + + // Be sure to unlock the inode table before returning. + release(&instance->itbl_spin); + + return emptyinode; +} + +// Allocates an inode, marking it with the given type. Returns the +// allocated inode (without locking it) on success or prints a warning +// and returns NULL on failure. +fsinstance_inode_t* fsinstance_allocinode(fsinstance_t* instance, unsigned int device, short inodetype) { + FSINSTANCE_CHECK(instance); + + for (int inodenumber = 1; inodenumber < instance->superblock->inodelimit; inodenumber++) { + // First load the block containing the inode + diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, device, FSINSTANCE_INODEBLOCK(inodenumber)); + // Then get the inode offset within the block. + fsformat_inode_t* diskinode = ((fsformat_inode_t*)&(buffer->data[0])) + (inodenumber % IPB); // TODO: Configurable block size + + if (diskinode->type == 0) { + // This inode is marked unused, so mark it with the type & save it + memset(diskinode, 0, sizeof(fsformat_inode_t)); // Be sure to zero + diskinode->type = inodetype; + fsinstance_writelogged(instance, buffer); + + // Now we can release the buffer and return the new inode. + diskio_buffer_release(buffer); + return fsinstance_getinode(instance, device, inodenumber); + } + + diskio_buffer_release(buffer); + } + + printf("fsinstance_allocinode: No more inodes, filesystem inode table is full!\n"); + return NULL; +} + +// Increases the reference count of an inode, locking on the inode table +// and returning the inode pointer to simulate a copy operation. +fsinstance_inode_t* fsinstance_inode_copyref(fsinstance_inode_t* inode) { + acquire(&inode->instance->itbl_spin); + + inode->referencecount = inode->referencecount + 1; + + release(&inode->instance->itbl_spin); + return inode; +} + +// Saves an inode to disk. This should be called after modifying any +// inode field that's saved to disk, and must be called while the caller +// holds the inode's lock. +void fsinstance_inode_save(fsinstance_inode_t* inode) { + fsinstance_t* instance = inode->instance; + FSINSTANCE_CHECK(instance); + + // Begin by loading the block with the on-disk inode + diskio_buffer_t* buffer = diskio_buffer_read(inode->instance->cache, inode->device, FSINSTANCE_INODEBLOCK(inode->inodenumber)); + // Get the inode offset within the block. + fsformat_inode_t* diskinode = ((fsformat_inode_t*)&(buffer->data[0])) + (inode->inodenumber % IPB); // TODO: Configurable block size + + // Update the fields of the on-disk inode structure + diskinode->type = inode->type; + diskinode->device_major = inode->major; + diskinode->device_minor = inode->minor; + diskinode->linkcount = inode->nlink; + diskinode->totalbytes = inode->size; + + // Copy the direct+indirect addresses + memcpy(diskinode->addrs, inode->addrs, sizeof(unsigned int)*(NDIRECT + 1)); + + // Save it to disk (or at least to the transaction log!) + fsinstance_writelogged(inode->instance, buffer); + + // Finally release the buffer. + diskio_buffer_release(buffer); +} + +// Unlocks an inode but does not perform any other actions like saving. +void fsinstance_inode_unlock(fsinstance_inode_t* inode) { + if (inode == NULL) { + panic("fsinstance_inode_unlock: inode argument is NULL"); + } else if (!holdingsleep(&inode->lock)) { + panic("fsinstance_inode_unlock: inode argument is not locked"); + } else if (inode->referencecount < 1) { + panic("fsinstance_inode_unlock: inode argument reference count is below 1"); + } + releasesleep(&inode->lock); +} + +// Locks the inode, and reads it into memory for the first time if it +// hasn't been loaded yet. +void fsinstance_inode_lockandload(fsinstance_inode_t* inode) { + fsinstance_t* instance = inode->instance; + FSINSTANCE_CHECK(instance); + + if (inode == NULL) { + panic("fsinstance_inode_lockandload: inode argument is NULL"); + } else if (inode->referencecount < 1) { + panic("fsinstance_inode_lockandload: inode argument reference count is below 1"); + } + + acquiresleep(&inode->lock); + + // Load the inode from disk but only if it hasn't already been loaded. + if (!inode->valid) { + diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, inode->device, FSINSTANCE_INODEBLOCK(inode->inodenumber)); + // Get the inode offset within the block. + fsformat_inode_t* diskinode = ((fsformat_inode_t*)&(buffer->data[0])) + (inode->inodenumber % IPB); // TODO: Configurable block size + + inode->type = diskinode->type; + inode->major = diskinode->device_major; + inode->minor = diskinode->device_minor; + inode->nlink = diskinode->linkcount; + inode->size = diskinode->totalbytes; + + memcpy(inode->addrs, diskinode->addrs, sizeof(unsigned int) * (NDIRECT + 1)); + + diskio_buffer_release(buffer); + + inode->valid = 1; + + if (inode->type == 0) { + panic("fsinstance_inode_lockandload: inode type is zero, filesystem is corrupt or caller is buggy"); + } + } +} + +void fsinstance_deleteindirect(fsinstance_t* instance, unsigned int blocknumber) { + diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, instance->fslog_device, blocknumber); + unsigned int* addrs = (void*) buffer->data; + for (int indirectindex = 0; indirectindex < NINDIRECT; indirectindex++) { // TODO: Make block size configurable + unsigned int innerblock = addrs[indirectindex]; + if (innerblock != 0) { + fsinstance_freedatablock(instance, instance->fslog_device, innerblock); + } + // NOTE: addrs[indirectindex] doesn't need to be reset because + // we'll be freeing the whole table! + } + diskio_buffer_release(buffer); + // Free the page holding the list of indirect pages: + fsinstance_freedatablock(instance, instance->fslog_device, blocknumber); +} + +// Deletes the entire contents of an inode but does not perform any +// other action such as deleting the inode (it's size will become 0). +// The inode must be locked by the caller. +void fsinstance_inode_deletecontents(fsinstance_inode_t* inode) { + fsinstance_t* instance = inode->instance; + FSINSTANCE_CHECK(instance); + + // We can begin by setting the size to zero, since we won't be relying + // on that to check which pages to free + inode->size = 0; + + // Do the hard part first, check for indirect blocks. These are stored + // as a block full of block addresses, with it's own address stored at + // the end of the list of "direct" block addresses. + unsigned int indirectpage = inode->addrs[NDIRECT]; // Value at end of array + if (indirectpage != 0) { + // Reset the reference from the inode because we'll free the table. + inode->addrs[NDIRECT] = 0; + diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, inode->device, indirectpage); + unsigned int* addrs = (void*) buffer->data; + for (int indirectindex = 0; indirectindex < NINDIRECT; indirectindex++) { // TODO: Make block size configurable + unsigned int blocknumber = addrs[indirectindex]; + if (blocknumber != 0) { + if (instance->fsversion == 0) { + // Simply delete the referenced block + fsinstance_freedatablock(instance, inode->device, blocknumber); + } else { + // Do a deep delete of a block-of-blockrefs + fsinstance_deleteindirect(instance, blocknumber); + } + } + // NOTE: addrs[indirectindex] doesn't need to be reset because + // we'll be freeing the whole table! + } + diskio_buffer_release(buffer); + // Free the page holding the list of indirect pages: + fsinstance_freedatablock(instance, inode->device, indirectpage); + } + + // Delete the "direct" blocks, these are simple block addresses. + for (int directindex = 0; directindex < NDIRECT; directindex++) { + if (inode->addrs[directindex]) { + fsinstance_freedatablock(instance, inode->device, inode->addrs[directindex]); + inode->addrs[directindex] = 0; + } + } + + // Finally save the inode. + fsinstance_inode_save(inode); +} + +// The inverse of a get operation, and is also responsible for +// deleting inodes which have been "unlinked". This must be called from +// within a transaction. +void fsinstance_inode_unget(fsinstance_inode_t* inode) { + fsinstance_t* instance = inode->instance; + acquire(&instance->itbl_spin); + + if (inode->valid && inode->nlink == 0 && inode->referencecount == 1) { + // Since referencecount is 1 we are the only user of the inode so + // this lock should instantly belong to us. + acquiresleep(&inode->lock); + + // Release the itable lock while deleting. + release(&instance->itbl_spin); + + // Delete any data (or dirent) blocks associated with this inode + // before deleting the rest of the structure. + fsinstance_inode_deletecontents(inode); + + inode->type = 0; + fsinstance_inode_save(inode); + inode->valid = 0; // Mark it as not being a loaded inode + + releasesleep(&inode->lock); + + acquire(&instance->itbl_spin); + } + + // Finally decrement the reference count and unlock the inode table. + inode->referencecount = inode->referencecount - 1; + release(&instance->itbl_spin); +} + +void fsinstance_inode_unlockandunget(fsinstance_inode_t* inode) { + fsinstance_inode_unlock(inode); + fsinstance_inode_unget(inode); +} + +// Copy information for a "stat" system call to a (kernel-mode or +// filesystem-mode) buffer. +void fsinstance_inode_getstatinfo(fsinstance_inode_t* inode, struct stat* output) { + output->type = inode->type; + output->size = inode->size; + output->dev = inode->device; + output->ino = inode->inodenumber; + output->nlink = inode->nlink; +} + +// Returns the actual block number of the given logical block of a file, +// allocating an associated block if it doesn't exist. +unsigned int fsinstance_inode_getactualblock(fsinstance_inode_t* inode, unsigned int blocknumber) { + fsinstance_t* instance = inode->instance; + + if (blocknumber < NDIRECT) { + unsigned int a = inode->addrs[blocknumber]; + if (a == 0) { + a = fsinstance_allocdatablock(instance, inode->device); + inode->addrs[blocknumber] = a; + } + return a; + } + + unsigned int indirectindex = blocknumber - NDIRECT; + + unsigned int tableblock; + if (instance->fsversion == 0) { + tableblock = inode->addrs[NDIRECT]; + if (tableblock == 0) { + tableblock = fsinstance_allocdatablock(instance, inode->device); + inode->addrs[NDIRECT] = tableblock; + if (tableblock == 0) { + return 0; + } + } + } else { + unsigned int tableotables = inode->addrs[instance->superblock->extd_directblocks]; + if (tableotables == 0) { + tableotables = fsinstance_allocdatablock(instance, inode->device); + inode->addrs[instance->superblock->extd_directblocks] = tableotables; + if (tableotables == 0) { + return 0; + } + } + //printf("using tableotables at %d\n", tableotables); + diskio_buffer_t* ttbuffer = diskio_buffer_read(instance->cache, inode->device, tableotables); + unsigned int* tt = (void*) ttbuffer->data; + tableblock = tt[indirectindex / (instance->superblock->extd_blocksize / 4)]; + if (tableblock == 0) { + tableblock = fsinstance_allocdatablock(instance, inode->device); + tt[indirectindex / (instance->superblock->extd_blocksize / 4)] = tableblock; + if (tableblock == 0) { + return 0; + } + fsinstance_writelogged(instance, ttbuffer); + } + diskio_buffer_release(ttbuffer); + indirectindex %= (instance->superblock->extd_blocksize / 4); + } + //printf("using tableblock at %d\n", tableblock); + + if (indirectindex >= NINDIRECT) { + panic("fsinstance_inode_getactualblock: Block index is out of range"); + } + diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, inode->device, tableblock); + unsigned int* table = (void*) buffer->data; + unsigned int finaladdr = table[indirectindex]; + if (finaladdr == 0) { + finaladdr = fsinstance_allocdatablock(instance, inode->device); + table[indirectindex] = finaladdr; + fsinstance_writelogged(instance, buffer); + } + diskio_buffer_release(buffer); + //printf("using finaladdr at %d\n", finaladdr); + return finaladdr; +} + +#define FSINSTANCE_MIN(x,y) \ + (((x) >= (y)) ? (y) : (x)) +#define FSINSTANCE_MAX(x,y) \ + (((x) >= (y)) ? (x) : (y)) + +// Reads from an inode's data into memory. The memory can be either in +// user-land or kernel-land, with isuserland set to 1 in the case of +// reading to a user-land buffer. The caller is expected to have locked +// the inode. +int fsinstance_inode_read(fsinstance_inode_t* inode, int isuserland, unsigned long long address, unsigned int offset, unsigned int size) { + fsinstance_t* instance = inode->instance; + + if (offset > inode->size || offset + size < offset) { // Check for overflows as well as reading from beyond the end of file + return 0; // No bytes read. + } + + unsigned int realsize; + if (offset + size > inode->size) { + realsize = inode->size - offset; + } else { + realsize = size; + } + + unsigned int i = 0; + while (i < realsize) { + unsigned int blocknumber = fsinstance_inode_getactualblock(inode, offset / BSIZE); // TODO: Make block size configurable + if (!blocknumber) { + return -1; + } + + diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, inode->device, blocknumber); + + unsigned int nread = FSINSTANCE_MIN(realsize - i, BSIZE - offset%BSIZE); + if (either_copyout(isuserland, address, buffer->data + offset % BSIZE, nread) == -1) { + diskio_buffer_release(buffer); + return -1; + } + + diskio_buffer_release(buffer); + + i += nread; + address += nread; + offset += nread; + } + + return i; +} + +// Writes to an inode's data from memory. The address can be either +// in user-land or kernel-land, with isuserland set to 1 in the case of +// reading to a user-land buffer. The caller is expected to have locked +// the inode. +int fsinstance_inode_write(fsinstance_inode_t* inode, int isuserland, unsigned long long address, unsigned int offset, unsigned int size) { + fsinstance_t* instance = inode->instance; + + if (offset > inode->size || offset + size < offset) { + return -1; + } + if (offset + size > MAXFILE * BSIZE) { // TODO: Adjustable limits. + return -1; + } + + unsigned int i = 0; + while (i < size) { + unsigned int blocknumber = fsinstance_inode_getactualblock(inode, offset / BSIZE); // TODO: Make block size configurable + if (blocknumber == 0) { + goto savetodisk; + } + diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, inode->device, blocknumber); + unsigned int nwritten = FSINSTANCE_MIN(size - i, BSIZE - offset % BSIZE); + if (either_copyin(buffer->data + offset % BSIZE, isuserland, address, nwritten) == -1) { + diskio_buffer_release(buffer); + goto savetodisk; + } + + fsinstance_writelogged(inode->instance, buffer); + + diskio_buffer_release(buffer); + + i += nwritten; + address += nwritten; + offset += nwritten; + } + + savetodisk: + + inode->size = FSINSTANCE_MAX(offset, inode->size); + + // Save the inode information to disk, especially as size and block + // addresses may have changed (NOTE: the data itself will have already + // been written to the associated blocks but they won't be linked in + // to the file properly on disk until it's saved). + fsinstance_inode_save(inode); + + return i; +} + +int fsinstance_nameseq(fsinstance_t* instance, const char* namea, const char* nameb) { + return (strncmp(namea, nameb, instance->fsversion == 0 ? FSFORMAT_NAMESIZE_OLD : FSFORMAT_NAMESIZE_NEW) == 0) ? 1 : 0; // TODO: Compare longer names... +} + +fsinstance_inode_t* fsinstance_inode_lookup(fsinstance_inode_t* directory, char* filename, unsigned int* entryoffsetvar) { + // Begin by checking that the directory inode and filesystem instance + // are valid, as these checks may be especially essential when + // searching through the filesystem! + fsinstance_t* instance = directory->instance; + FSINSTANCE_CHECK(instance); + if (directory->type != T_DIR) { // TODO: Replace these constants + panic("fsinstance_inode_lookup: Bad inode type, expecting directory but inode is not a directory"); + } + + // Convenience variables for directory size & entry size calculations. + // dirsize divided by entrysize should give the number of entries. + unsigned int dirsize = directory->size; + unsigned int entrysize = (unsigned int) (instance->fsversion == 0 ? sizeof(fsformat_dirent_v0_t) : sizeof(fsformat_dirent_v1_t)); + + // Then traverse through, reading 1 entry at a time until either + // finding a match or getting to the end of the directory. + for (unsigned int offset = 0; offset < dirsize; offset += entrysize) { + if (instance->fsversion == 0) { + fsformat_dirent_v0_t entry; + int r = fsinstance_inode_read(directory, 0, (unsigned long long) &entry, offset, entrysize); + if (r != entrysize) { + // TODO: Replace panics within the filesystem with some kind of + // unmount-when-corrupted system. + panic("fsinstance_inode_lookup: Read failed internally"); + } + // If found set the offset variable (if it isn't NULL), get the + // file's inode and return it. + if (fsinstance_nameseq(instance, filename, entry.filename)) { + if (entryoffsetvar != NULL) { + *entryoffsetvar = offset; + } + return fsinstance_getinode(instance, directory->device, entry.inodenumber); + } + } else { + fsformat_dirent_v1_t entry; + int r = fsinstance_inode_read(directory, 0, (unsigned long long) &entry, offset, entrysize); + if (r != entrysize) { + // TODO: Replace panics within the filesystem with some kind of + // unmount-when-corrupted system. + panic("fsinstance_inode_lookup: Read failed internally"); + } + // If found set the offset variable (if it isn't NULL), get the + // file's inode and return it. + if (fsinstance_nameseq(instance, filename, entry.filename)) { + if (entryoffsetvar != NULL) { + *entryoffsetvar = offset; + } + return fsinstance_getinode(instance, directory->device, entry.datainode); + } + } + } + + // If the search finished without returning that means there was no + // match, so just return NULL. + return NULL; +} + +// Checks if a directory has a given filename in it, returning 1 if +// there is a match and 0 otherwise. +int fsinstance_inode_hasentry(fsinstance_inode_t* directory, char* filename) { + fsinstance_inode_t* inode = fsinstance_inode_lookup(directory, filename, NULL); + if (inode == NULL) { + return 0; + } else { + fsinstance_inode_unget(inode); + return 1; + } +} + +// Attempts to insert a new directory entry into the given directory, +// returning 0 if successful or -1 otherwise. This function +int fsinstance_inode_insert(fsinstance_inode_t* directory, char* filename, unsigned int inodenumber) { + fsinstance_t* instance = directory->instance; + FSINSTANCE_CHECK(instance); + + if (fsinstance_inode_hasentry(directory, filename)) { + return -1; + } + + // Convenience variables for directory size & entry size calculations. + // dirsize divided by entrysize should give the number of entries. + unsigned int dirsize = directory->size; + unsigned int entrysize = (unsigned int) (instance->fsversion == 0 ? sizeof(fsformat_dirent_v0_t) : sizeof(fsformat_dirent_v1_t)); + + unsigned int offset = 0; + while (offset < dirsize) { + if (instance->fsversion == 0) { + fsformat_dirent_v0_t entry; + int r = fsinstance_inode_read(directory, 0, (unsigned long long) &entry, offset, entrysize); + if (r != entrysize) { + // TODO: Replace panics within the filesystem with some kind of + // unmount-when-corrupted system. + panic("fsinstance_inode_insert: Read failed internally"); + } + if (!entry.inodenumber) { + goto foundentry; + } + } else { + fsformat_dirent_v1_t entry; + int r = fsinstance_inode_read(directory, 0, (unsigned long long) &entry, offset, entrysize); + if (r != entrysize) { + // TODO: Replace panics within the filesystem with some kind of + // unmount-when-corrupted system. + panic("fsinstance_inode_insert: Read failed internally"); + } + if (!entry.datainode) { + goto foundentry; + } + } + offset += entrysize; + } + + // When the code reaches foundentry:, it will have either found a free + // entry or searched to the end of the directory. Either way we can + // now simply write the directory entry to that offset and it will be + // added! + foundentry: + if (instance->fsversion == 0) { + fsformat_dirent_v0_t newentry; + strncpy(newentry.filename, filename, FSFORMAT_NAMESIZE_OLD); // TODO: Support for longer names + newentry.inodenumber = inodenumber; + if (fsinstance_inode_write(directory, 0, (unsigned long long) &newentry, offset, entrysize) != entrysize) { + return -1; + } + } else { + fsformat_dirent_v1_t newentry; + strncpy(newentry.filename, filename, FSFORMAT_NAMESIZE_NEW); // TODO: Support for longer names + newentry.datainode = inodenumber; + newentry.metainode = 0xFFFFFFFF; + if (fsinstance_inode_write(directory, 0, (unsigned long long) &newentry, offset, entrysize) != entrysize) { + return -1; + } + } + + // If we got to the end then return 0 to indicate success. + return 0; +} + +#define FSINSTANCE_SKIPSLASHES(iter) \ + while (*iter == '/' || *iter == '\\') { \ + iter++; \ + } +#define FSINSTANCE_SKIPFILENAME(iter) \ + while (*iter != 0 && *iter != '/' && *iter != '\\') { \ + iter++; \ + } + +// Scan to the next part of the path, reading the first part into +// filenamevar (which must have FSFORMAT_NAMESIZE reserved bytes). +// Returns the path iterator at the start of the next filename/dirname +// or NULL to indicate end of string. +char* fsinstance_scanfilename(fsinstance_t* instance, char* pathiterator, char* filenamevar) { + FSINSTANCE_SKIPSLASHES(pathiterator); + + char* namestart = pathiterator; + if (*namestart == 0) { + return 0; // End of path + } + + FSINSTANCE_SKIPFILENAME(pathiterator); + + int namelen = pathiterator - namestart; + + memcpy(filenamevar, namestart, FSINSTANCE_MIN(namelen, FSFORMAT_NAMESIZE_OLD)); // TODO: Support for longer filenames + if (namelen < FSFORMAT_NAMESIZE_OLD) { + filenamevar[namelen] = 0; + } + + FSINSTANCE_SKIPSLASHES(pathiterator); // This is probably unnecessary? + + return pathiterator; +} + +// The internal path lookup function (used by the simple lookups). The +// filenamevar must point to an array of FSINSTANCE_NAMESIZE bytes which +// will be used as a scratch variable as well as to look up the filename +// within the parent directory (if getparent is non-zero). Any file +// lookup needs to happen within a transaction. +fsinstance_inode_t* fsinstance_lookuppath(fsinstance_t* instance, char* path, int getparent, char* filenamevar) { + FSINSTANCE_CHECK(instance); + + fsinstance_inode_t* inode; + if (*path == '/' || *path == '\\') { + inode = fsinstance_getinode(instance, instance->fslog_device, ROOTINO); // TODO: Make all this configurable + } else { + inode = fsinstance_inode_copyref(myproc()->cwd); // TODO: This shouldn't be here... It should be passed in + } + + char* pathiterator = path; + do { + pathiterator = fsinstance_scanfilename(instance, pathiterator, filenamevar); + if (!pathiterator) { + goto finished; + } + fsinstance_inode_lockandload(inode); + if (inode->type != T_DIR) { + fsinstance_inode_unlockandunget(inode); + return NULL; + } + // If searching for the file's parent directory, stop when there is + // no more path left without looking up the filename. + if (*pathiterator == 0 && getparent) { + fsinstance_inode_unlock(inode); + return inode; + } + + fsinstance_inode_t* subnode = fsinstance_inode_lookup(inode, filenamevar, NULL); + if (subnode == NULL) { + fsinstance_inode_unlockandunget(inode); + return NULL; + } + fsinstance_inode_unlockandunget(inode); + inode = subnode; + } while(1); + + finished: + + // If we reached here and we're looking for a parent directory then + // there mustn't be one. + if (getparent) { + fsinstance_inode_unget(inode); + return NULL; + } + + // Otherwise we're just looking for whatever is at the end of the path + return inode; +} + +// Simple path lookup of a path's filename within a parent directory. +// The filename variable needs to point to at least FSFORMAT_NAMESIZE +// bytes of reserved memory. Any file lookup needs to happen within a +// transaction. +fsinstance_inode_t* fsinstance_lookupparent(fsinstance_t* instance, char* path, char* filenamevar) { + return fsinstance_lookuppath(instance, path, 1, filenamevar); +} + +// Simple path lookup, returning the inode matching the path or NULL if +// not found. Any file lookup needs to happen within a transaction. +fsinstance_inode_t* fsinstance_lookup(fsinstance_t* instance, char* path) { + char scratchvar[FSFORMAT_NAMESIZE_NEW]; // Uses the new size as this is larger + return fsinstance_lookuppath(instance, path, 0, scratchvar); +} + +// Saves the log header to disk, this is used internally to set the disk +// into a state where the transaction is finishable. +void fsinstance_savetransaction(fsinstance_t* instance) { + // First read in the structure. + diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, instance->fslog_device, instance->fslog_start); + fsinstance_logheader_t* header = (void*) buffer->data; + + // Then change the values to our in-memory copy + int total = instance->fslog_header.number; + int count = 0; + while (count < total) { + header->blocks[count] = instance->fslog_header.blocks[count]; + count++; + } + header->number = total; + + // Then save the copy to disk + diskio_buffer_write(buffer); + diskio_buffer_release(buffer); +} + +// Loads the log header from disk, this is used internally to reload the +// structure at startup. +void fsinstance_loadtransaction(fsinstance_t* instance) { + // First read in the log header + diskio_buffer_t* buffer = diskio_buffer_read(instance->cache, instance->fslog_device, instance->fslog_start); + fsinstance_logheader_t* header = (void*) buffer->data; + + // Then copy the structure values into our in-memory copy + int total = header->number; + int count = 0; + while (count < total) { + instance->fslog_header.blocks[count] = header->blocks[count]; + count++; + } + instance->fslog_header.number = total; + + // Then release the buffer. + diskio_buffer_release(buffer); + + printf("fsinstance_loadtransaction: got %d block references\n", total); +} + +// Called internally to save the cached blocks included in this +// transaction. +void fsinstance_savecache(fsinstance_t* instance) { + int total = instance->fslog_header.number; + int count = 0; + while (count < total) { + // Get the block we want logged. + diskio_buffer_t* src = diskio_buffer_read(instance->cache, instance->fslog_device, instance->fslog_header.blocks[count]); + // Then get the block in the log we'll be storing it to (note, the + diskio_buffer_t* dst = diskio_buffer_read(instance->cache, instance->fslog_device, instance->fslog_start + 1 + count); + + memcpy(dst->data, src->data, BSIZE); // TODO: Make block size configurable + + // Save the block data into the log. + diskio_buffer_write(dst); + + diskio_buffer_release(src); + diskio_buffer_release(dst); + + count++; + } +} + +// Called internally to copy the logged blocks to their final locations. +// This is mostly an inverse of fsinstance_savecache. +void fsinstance_applytransactionblocks(fsinstance_t* instance, int rebootmode) { + int total = instance->fslog_header.number; + int count = 0; + while (count < total) { + // Get the block we want logged. + diskio_buffer_t* src = diskio_buffer_read(instance->cache, instance->fslog_device, instance->fslog_start + 1 + count); + // Then get the block in the log we'll be storing it to (note, the + diskio_buffer_t* dst = diskio_buffer_read(instance->cache, instance->fslog_device, instance->fslog_header.blocks[count]); + + memcpy(dst->data, src->data, BSIZE); // TODO: Make block size configurable + + // Save the block data into the log. + diskio_buffer_write(dst); + + if (!rebootmode) { + diskio_buffer_dereference(dst); + } + + diskio_buffer_release(src); + diskio_buffer_release(dst); + + count++; + } +} + +// Called internally to reset the on-disk transaction to a "zero blocks" +// state. +void fsinstance_resettransaction(fsinstance_t* instance) { + // Just reset the log count and save the transaction header. + instance->fslog_header.number = 0; + fsinstance_savetransaction(instance); +} + +// Called at startup time or whenever else the disk is mounted to check +// and apply any partially-applied writes, i.e. an "after-reboot check". +void fsinstance_rebootcheck(fsinstance_t* instance) { + // This will load and perform any transaction left on the device, + // which will only be left in the event that a transaction was partly + // completed, therefore applying the whole transaction's blocks will + // complete the transaction and return the filesystem to a consistent + // state. + // In the typical case (when the filesystem is already consistent), + // there will be nothing in the log to commit. + fsinstance_loadtransaction(instance); + fsinstance_applytransactionblocks(instance, 1); // 1 == reboot check + + // Reset the log's block count to zero and save. + fsinstance_resettransaction(instance); +} + +// Called internally to perform a "commit" of the current transaction +// using the other internal functions. This will have no effect in cases +// where no actual data has been modified in this transaction. +void fsinstance_innercommit(fsinstance_t* instance) { + if (instance->fslog_header.number == 0) { + return; + } + + // First the block data of the transaction is written to the reserved + // log blocks. + fsinstance_savecache(instance); + + // Then the transaction header itself is written, meaning that as long + // as this part is either written or not written the filesystem will + // be left in a state before or after the transaction was committed. + fsinstance_savetransaction(instance); + + // Then attempt to copy the blocks to their intended final locations, + // which may fail and be retried by the reboot-check if the system + // loses power or reboots during copying. + fsinstance_applytransactionblocks(instance, 0); // 0 = this call isn't the reboot check + + // Reset the log's block count to zero and save. + fsinstance_resettransaction(instance); +} + +// This is like the diskio_buffer_write function except that it just +// adds the block to the log to be committed later. +void fsinstance_writelogged(fsinstance_t* instance, diskio_buffer_t* buffer) { + acquire(&instance->fslog_lock); + + if (instance->fslog_header.number >= instance->fslog_size || instance->fslog_header.number >= FSINSTANCE_MAXLOGBLOCKS) { + panic("fsinstance_writelogged: The filesystem is not configured for transactions this large"); + } + + if (instance->fslog_outstanding <= 0) { + panic("fsinstance_writelogged: Caller attempted to write without being inside of a transaction's begin/end sequence"); + } + + int blockindex = 0; + while (blockindex < instance->fslog_header.number) { + if (instance->fslog_header.blocks[blockindex] == buffer->blocknumber) { + goto logged; + } + blockindex++; + } + + instance->fslog_header.blocks[blockindex] = buffer->blocknumber; + + if (blockindex == instance->fslog_header.number) { + diskio_buffer_reference(buffer); // It's now "referenced" by the log + instance->fslog_header.number++; + } + + logged: + release(&instance->fslog_lock); +} + +// Called at the beginning of a filesystem transaction, waits until the +// filesystem is ready to accept more transactions and then returns with +// a new transaction marked as being in progress. +void fsinstance_begin(fsinstance_t* instance) { + acquire(&instance->fslog_lock); + + do { + if (instance->fslog_committing || instance->fslog_header.number + ((instance->fslog_outstanding + 1) * FSINSTANCE_MAXBLOCKSPEROP) > instance->fslog_size) { + // Adding another operation might overflow the log, so sleep until + // it's committed. This is also the case if another transaction + // set is already committing. + sleep(&instance->fslog_sleepvariable, &instance->fslog_lock); + } else { + instance->fslog_outstanding++; + release(&instance->fslog_lock); + goto ready; + } + } while(1); + + ready: + return; +} + +// Called at the end of a filesystem transaction. Commits the whole set +// of transactions if this is the last one in progress. +void fsinstance_end(fsinstance_t* instance) { + acquire(&instance->fslog_lock); + + if (instance->fslog_outstanding < 1) { + panic("fsinstance_end: No transaction is in progress, need to call fsinstance_begin first!"); + } else if (instance->fslog_committing) { + panic("fsinstance_end: This transaction set is already being committed, likely a multiprocessing bug"); + } + + instance->fslog_outstanding--; + + int reallycommit = 0; + + if (instance->fslog_outstanding == 0) { + reallycommit = 1; + instance->fslog_committing = 1; + } else { + // Wake up any processes waiting in fsinstance_begin. + sched_wake(&instance->fslog_sleepvariable); + } + + // Release the log before any commit may be attempted, since that may + // otherwise cause us to sleep with a lock held! + release(&instance->fslog_lock); + + if (reallycommit) { + fsinstance_innercommit(instance); + + acquire(&instance->fslog_lock); + + instance->fslog_committing = 0; + sched_wake(&instance->fslog_sleepvariable); + + release(&instance->fslog_lock); + } +} + +// This is called internally to initialise/"reboot" the filesystem +// transaction logging system. This will initialise the logging-related +// variables and call fsinstance_rebootcheck to perform any unfinished +// transactions to leave the filesystem structures in a consistent state +void fsinstance_inittransactions(fsinstance_t* instance, unsigned int device, int blocksize) { + if (blocksize <= sizeof(fsinstance_logheader_t)) { + panic("fsinstance_inittransactions: Block size given by the caller is too small for the log header"); + } + + initlock(&instance->fslog_lock, "fslog_lock"); + + instance->fslog_start = instance->superblock->firstlogblock; + instance->fslog_size = instance->superblock->logblocks; + instance->fslog_device = device; + + // This does most of the actual work of "rebooting" the filesystem, by + // finishing any partly-completed transactions to restore filesystem + // structures to a consistent state. + fsinstance_rebootcheck(instance); +} diff --git a/fsinstance.h b/fsinstance.h new file mode 100644 index 0000000..abae6f3 --- /dev/null +++ b/fsinstance.h @@ -0,0 +1,252 @@ +// This is NEW CODE +#ifndef _FSINSTANCE_H +#define _FSINSTANCE_H + +#include "param.h" +#include "sched.h" + +#ifndef NULL +#define NULL ((void*)0ULL) +#endif + +#include "mkfs/fsformat.h" + +#define FSINSTANCE_MAXBLOCKSPEROP 10 +#define FSINSTANCE_MAXOPSPERLOG 3 +// This shouly definitely be updated to a higher number +#define FSINSTANCE_MAXLOGBLOCKS (FSINSTANCE_MAXBLOCKSPEROP * FSINSTANCE_MAXOPSPERLOG) + +// The fsinstance struct contains the information used by a single instance +// of the filesystem (that is, by one mounted partition). +typedef struct fsinstance fsinstance_t; + +// A log header is used by the filesystem logging system to track the blocks +// in the log. +typedef struct fsinstance_logheader fsinstance_logheader_t; + +struct fsinstance_logheader { + int number; + int blocks[FSINSTANCE_MAXLOGBLOCKS]; +}; + +// This is an inode structure as it exists in kernel memory, only the second +// part of the structure is written to disk. +typedef struct fsinstance_inode fsinstance_inode_t; + +// TODO: This should replace NDIRECT in old code +#define FSINSTANCE_DIRECTADDRS 12 + +// NOTE: The internal structure mostly resembles the old code for now but might chenge +struct fsinstance_inode { + fsinstance_t* instance; + int device; + int referencecount; + unsigned int inodenumber; // Inode number, specific to this filesystem instance + int valid; // Set to true if it has been loaded properly + + sched_sleeplock_t lock; + + short type; + short major; + short minor; + short nlink; + unsigned int size; // TODO: Make bigger. + unsigned int addrs[FSINSTANCE_DIRECTADDRS+1]; +}; + +struct fsinstance { + struct diskio_cache* cache; + // The superblock structure is now allocated by fsinstance_alloc and kept + // with the fsinstance structure instead of being a global variable. + fsformat_superblock_t* superblock; + sched_spinlock_t itbl_spin; + fsinstance_inode_t** itbl; + sched_spinlock_t fslog_lock; + fsinstance_logheader_t fslog_header; + int fsversion; // 0 for old xv6-compatible, 1+ for new versions + int fslog_sleepvariable; // Only used as a pointer to sleep on, to avoid multiple parties sleeping on the instance itself + int fslog_device; + int fslog_start; + int fslog_size; + int fslog_outstanding; + int fslog_committing; +}; + +// Allocates an empty filesystem instance. +fsinstance_t* fsinstance_alloc(); +void* fsinstance_init(fsinstance_t* instance, unsigned int device); +void fsinstance_resetblocktozero(fsinstance_t* instance, unsigned int device, unsigned int blocknumber); + +// Counts the free blocks (NOTE: This should be run inside a transaction +// for consistency, although it doesn't modify any blocks). Returns the +// number of blocks marked free for data or directory allocation. +unsigned int fsinstance_countfreeblocks(fsinstance_t* instance, unsigned int device); + +// Attemptes to allocate a block, clearing the block to zeroes and +// returning it's block number on success or printing a warning and +// returning 0 if out of space. +unsigned int fsinstance_allocdatablock(fsinstance_t* instance, unsigned int device); + +// Marks a data block as free in the used/free bitmap (this assumes that +// any reference to it will have been reset separately). +void fsinstance_freedatablock(fsinstance_t* instance, unsigned int device, unsigned int blocknumber); + +// Looks up the inode number on the filesystem and returns the in-memory +// inode structure with reference count adjusted for the caller, without +// locking or loading the inode. +fsinstance_inode_t* fsinstance_getinode(fsinstance_t* instance, unsigned int device, unsigned int inodenumber); + +// Allocates an inode, marking it with the given type. Returns the +// allocated inode (without locking it) on success or prints a warning +// and returns NULL on failure. +fsinstance_inode_t* fsinstance_allocinode(fsinstance_t* instance, unsigned int device, short inodetype); + +// Increases the reference count of an inode, locking on the inode table +// and returning the inode pointer to simulate a copy operation. +fsinstance_inode_t* fsinstance_inode_copyref(fsinstance_inode_t* inode); + +// Saves an inode to disk. This should be called after modifying any +// inode field that's saved to disk, and must be called while the caller +// holds the inode's lock. +void fsinstance_inode_save(fsinstance_inode_t* inode); + +// Locks the inode, and reads it into memory for the first time if it +// hasn't been loaded yet. +void fsinstance_inode_lockandload(fsinstance_inode_t* inode); + +// Unlocks an inode but does not perform any other actions like saving. +void fsinstance_inode_unlock(fsinstance_inode_t* inode); + +// Deletes the entire contents of an inode but does not perform any +// other action such as deleting the inode (it's size will become 0). +// The inode must be locked by the caller. +void fsinstance_inode_deletecontents(fsinstance_inode_t* inode); + +// The inverse of a get operation, and is also responsible for +// deleting inodes which have been "unlinked". This must be called from +// within a transaction. +void fsinstance_inode_unget(fsinstance_inode_t* inode); + +// Performs an "unlock" then an "unget" call in succession. +void fsinstance_inode_unlockandunget(fsinstance_inode_t* inode); + +// Defined elsehwere: +struct stat; + +// Copy information for a "stat" system call to a (kernel-mode or +// filesystem-mode) buffer. The inode must be locked by the caller. +void fsinstance_inode_getstatinfo(fsinstance_inode_t* inode, struct stat* output); + +// Returns the actual block number of the given logical block of a file, +// allocating an associated block if it doesn't exist. +unsigned int fsinstance_inode_getactualblock(fsinstance_inode_t* inode, unsigned int logicalblocknumber); + +// Reads from an inode's data into memory. The memory can be either in +// user-land or kernel-land, with isuserland set to 1 in the case of +// filling a user-land buffer. The caller is expected to have locked +// the inode. +int fsinstance_inode_read(fsinstance_inode_t* inode, int isuserland, unsigned long long address, unsigned int offset, unsigned int size); + +// Writes to an inode's data from memory. The address can be either +// in user-land or kernel-land, with isuserland set to 1 in the case of +// reading from a user-land buffer. The caller is expected to have +// locked the inode. +int fsinstance_inode_write(fsinstance_inode_t* inode, int isuserland, unsigned long long address, unsigned int offset, unsigned int size); + + + +int fsinstance_nameseq(fsinstance_t* instance, const char* namea, const char* nameb); + +fsinstance_inode_t* fsinstance_inode_lookup(fsinstance_inode_t* directory, char* filename, unsigned int* entryoffsetvar); + +// Checks if a directory has a given filename in it, returning 1 if +// there is a match and 0 otherwise. +int fsinstance_inode_hasentry(fsinstance_inode_t* directory, char* filename); + +// Attempts to insert a new directory entry into the given directory, +// returning 0 if successful or -1 otherwise. This function +int fsinstance_inode_insert(fsinstance_inode_t* directory, char* filename, unsigned int inodenumber); + +// Scan to the next part of the path, reading the first part into +// filenamevar (which must have FSFORMAT_NAMESIZE reserved bytes). +// Returns the path iterator at the start of the next filename/dirname +// or NULL to indicate end of string. +char* fsinstance_scanfilename(fsinstance_t* instance, char* pathiterator, char* filenamevar); + +// The internal path lookup function (used by the simple lookups). The +// filenamevar must point to an array of FSINSTANCE_NAMESIZE bytes which +// will be used as a scratch variable as well as to look up the filename +// within the parent directory (if getparent is non-zero). Any file +// lookup needs to happen within a transaction. +fsinstance_inode_t* fsinstance_lookuppath(fsinstance_t* instance, char* path, int getparent, char* filenamevar); + +// Simple path lookup of a path's filename within a parent directory. +// The filename variable needs to point to at least FSFORMAT_NAMESIZE +// bytes of reserved memory. Any file lookup needs to happen within a +// transaction. +fsinstance_inode_t* fsinstance_lookupparent(fsinstance_t* instance, char* path, char* filenamevar); + +// Simple path lookup, returning the inode matching the path or NULL if +// not found. Any file lookup needs to happen within a transaction. +fsinstance_inode_t* fsinstance_lookup(fsinstance_t* instance, char* path); + +// Saves the log header to disk, this is used internally to set the disk +// into a state where the transaction is finishable (or cleared). +void fsinstance_savetransaction(fsinstance_t* instance); + +// Loads the log header from disk, this is used internally to reload the +// structure at startup. +void fsinstance_loadtransaction(fsinstance_t* instance); + +// Called internally to save the cached blocks included in this +// transaction. +void fsinstance_savecache(fsinstance_t* instance); + +// Called internally to copy the logged blocks to their final locations. +// This is mostly an inverse of fsinstance_savecache. +void fsinstance_applytransactionblocks(fsinstance_t* instance, int rebootmode); + +// Called internally to reset the on-disk transaction to a "zero blocks" +// state. +void fsinstance_resettransaction(fsinstance_t* instance); + +// Called at startup time or whenever else the disk is mounted to check +// and apply any partially-applied writes, i.e. an "after-reboot check". +void fsinstance_rebootcheck(fsinstance_t* instance); + +// Called internally to perform a "commit" of the current transaction +// using the other internal functions. This will have no effect in cases +// where no actual data has been modified in this transaction. +void fsinstance_innercommit(fsinstance_t* instance); + +// This is like the diskio_buffer_write function except that it just +// adds the block to the log to be committed later. +struct diskio_buffer; // Defined elsewhere +void fsinstance_writelogged(fsinstance_t* instance, struct diskio_buffer* buffer); + +// Called at the beginning of a filesystem transaction, waits until the +// filesystem is ready to accept more transactions and then returns with +// a new transaction marked as being in progress. +void fsinstance_begin(fsinstance_t* instance); + +// Called at the end of a filesystem transaction. Commits the whole set +// of transactions if this is the last one in progress. +void fsinstance_end(fsinstance_t* instance); + +// This is called internally to initialise/"reboot" the filesystem +// transaction logging system. This will initialise the logging-related +// variables and call fsinstance_rebootcheck to perform any unfinished +// transactions to leave the filesystem structures in a consistent state +void fsinstance_inittransactions(fsinstance_t* instance, unsigned int device, int blocksize); + +// Panics if a filesystem instance is not valid. +#define FSINSTANCE_CHECK(i) \ + do { \ + if ((i) == NULL || (i->superblock == NULL)) { \ + printf("bad fsinstance!\n"); \ + panic((char*)__func__); \ + } \ + } while(0) + +// From ifndef at top of file: +#endif diff --git a/kernel.ld b/kernel.ld new file mode 100644 index 0000000..ee04f22 --- /dev/null +++ b/kernel.ld @@ -0,0 +1,44 @@ +OUTPUT_ARCH( "riscv" ) +ENTRY( _entry ) + +SECTIONS +{ + /* + * ensure that entry.S / _entry is at 0x80000000, + * where qemu's -kernel jumps. + */ + . = 0x80000000; + + .text : { + *(.text .text.*) + . = ALIGN(0x1000); + _trampoline = .; + *(trampsec) + . = ALIGN(0x1000); + ASSERT(. - _trampoline == 0x1000, "error: trampoline larger than one page"); + PROVIDE(etext = .); + } + + .rodata : { + . = ALIGN(16); + *(.srodata .srodata.*) /* do not need to distinguish this from .rodata */ + . = ALIGN(16); + *(.rodata .rodata.*) + } + + .data : { + . = ALIGN(16); + *(.sdata .sdata.*) /* do not need to distinguish this from .data */ + . = ALIGN(16); + *(.data .data.*) + } + + .bss : { + . = ALIGN(16); + *(.sbss .sbss.*) /* do not need to distinguish this from .bss */ + . = ALIGN(16); + *(.bss .bss.*) + } + + PROVIDE(end = .); +} diff --git a/kernelvec.S b/kernelvec.S new file mode 100644 index 0000000..5d2eea9 --- /dev/null +++ b/kernelvec.S @@ -0,0 +1,67 @@ + // + // interrupts and exceptions while in supervisor + // mode come here. + // + // the current stack is a kernel stack. + // push registers, call kerneltrap(). + // when kerneltrap() returns, restore registers, return. + // + +.option norvc +.globl kerneltrap +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +.globl kernelvec +.align 4 +kernelvec: + // make room to save registers. + addi sp, sp, -256 + + // save caller-saved registers. + sd ra, 0(sp) + sd sp, 8(sp) + sd gp, 16(sp) + sd tp, 24(sp) + sd t0, 32(sp) + sd t1, 40(sp) + sd t2, 48(sp) + sd a0, 72(sp) + sd a1, 80(sp) + sd a2, 88(sp) + sd a3, 96(sp) + sd a4, 104(sp) + sd a5, 112(sp) + sd a6, 120(sp) + sd a7, 128(sp) + sd t3, 216(sp) + sd t4, 224(sp) + sd t5, 232(sp) + sd t6, 240(sp) + + // call the C trap handler in trap.c + call kerneltrap + + // restore registers. + ld ra, 0(sp) + ld sp, 8(sp) + ld gp, 16(sp) + // not tp (contains hartid), in case we moved CPUs + ld t0, 32(sp) + ld t1, 40(sp) + ld t2, 48(sp) + ld a0, 72(sp) + ld a1, 80(sp) + ld a2, 88(sp) + ld a3, 96(sp) + ld a4, 104(sp) + ld a5, 112(sp) + ld a6, 120(sp) + ld a7, 128(sp) + ld t3, 216(sp) + ld t4, 224(sp) + ld t5, 232(sp) + ld t6, 240(sp) + + addi sp, sp, 256 + + // return to whatever we were doing in the kernel. + sret diff --git a/kprintf.c b/kprintf.c new file mode 100644 index 0000000..b5a49a2 --- /dev/null +++ b/kprintf.c @@ -0,0 +1,286 @@ +// This is NEW CODE replacing the old printf with one based on my libc +#include "types.h" +#include "param.h" +#include "riscv.h" +#include "defs.h" +#include "kprintf.h" +#include "sched.h" + +volatile int kprintf_shouldlock = 0; +sched_spinlock_t kprintf_spin; + +// These are non-standard, but the other *printf functions need to be implemented somehow, +// so fnprintf/vfnprintf just use a callback function for output of n bytes of string output. +typedef int(*_libc_fnprintf_fn_t)(const char* str, int n, void* udata); +int _LIBC_PRINTF_CALLCONV _libc_fnprintf(_libc_fnprintf_fn_t fn, void* udata, const char* fmt, ...); +int _LIBC_PRINTF_CALLCONV _libc_vfnprintf(_libc_fnprintf_fn_t fn, void* udata, const char* fmt, va_list list); + +// These are non-standard, but the other *printf functions need to be implemented somehow, +// so fnprintf/vfnprintf just use a callback function for output of n bytes of string output. +int _LIBC_PRINTF_CALLCONV _libc_fnprintf(_libc_fnprintf_fn_t fn, void* udata, const char* fmt, ...) { + va_list varargs; + va_start(varargs, fmt); + uintptr_t result = _libc_vfnprintf(fn, udata, fmt, varargs); + va_end(varargs); + return result; +} + +#define DUMPTOFN() \ + if (dayswithoutincident) { \ + if (fn(f-dayswithoutincident, dayswithoutincident, udata) != dayswithoutincident) { \ + return EOF; \ + } \ + written += dayswithoutincident; \ + dayswithoutincident = 0; \ + } + +volatile int kprintf_inpanic = 0; + +void kprintf_panic(const char* reason) { + kprintf_shouldlock = 0; + kprintf("KERNEL PANIC: %s\n", reason); + kprintf_inpanic = 1; + + // TODO: Disable interrupts? + + while (1) { + // Hot loop + } +} + +int _LIBC_PRINTF_CALLCONV _libc_vfnprintf(_libc_fnprintf_fn_t fn, void* udata, const char* fmt, va_list list) { + int dayswithoutincident = 0; + int written = 0; + const char* f = fmt; + int c; + while ((c = *f)) { + if (c == '%') { + DUMPTOFN(); + //int has_decimals = 0; + int decimals = 0; + nextctrl: + int tc = *(f+1); + int longcount = 0; // for counting %ld/%lld + switch (tc) { + case '%': { + f++; + dayswithoutincident++; // Just let the second '%' be printed with the next string part + } break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + decimals = decimals * 10 + (tc - '0'); + f++; + } goto nextctrl; + #ifdef UNUSED + case '.': { + has_decimals = 1; + f++; + } goto nextctrl; + case 'f': { + char buf[20]; + int i = 20; + double fval = va_arg(list, double); + //fval = -543.210987654321; + long ival = (long) fval; + int isneg = ival < 0; + unsigned long uval = isneg ? -ival : ival; + do { + buf[--i] = (char)('0' + uval % 10); + uval /= 10; + } while (uval > 0); + if (ival < 0) { + buf[--i] = '-'; + } + int nwr = fn(buf+i, 20-i, udata); + if (nwr != 20-i) { + return EOF; + } + written += nwr; + if (!has_decimals) { + decimals = 6; + } + double x = fabs(fval - (double) ival); + if (fn(".", 1, udata) != 1) { + return EOF; + } + written++; + while (decimals > 0) { + x *= 10; + decimals--; + int ix = (int) x; + char xch = (char) ('0'+(ix%10)); + if (fn(&xch, 1, udata) != 1) { + return EOF; + } + written++; + } + f++; + } break; + #endif + case 'S': { // Formatted substring, I thought there was a standard % for it + char* s = va_arg(list, char*); + if (!s) { + s = "(null)"; + } + int nwr = _libc_vfnprintf(fn, udata, s, list); + if (nwr == EOF) { + return EOF; + } + written += nwr; + f++; + } break; + + case 's': { + char* s = va_arg(list, char*); + if (!s) { + s = "(null)"; + } + int len = strlen(s); + int nwr = fn(s, len, udata); + if (nwr != len) { + return EOF; + } + written += nwr; + f++; + } break; + + case 'c': { + int chr = va_arg(list, int); + char chrc = (char) chr; + if (fn(&chrc, 1, udata) != 1) { + return EOF; + } + written++; + f++; + } break; + + case 'l': // %ld/%lx is just handled as a special case of %d/%x + longcount++; + f++; + goto nextctrl; + + case 'u': // Unsigned is just handled as a special case of %d + case 'd': { + // TODO: Testing/edge case for negative maximum? + char buf[20]; + int i = 20; + if (longcount) { + long val = va_arg(list, long); + int isneg = ((tc == 'd') && (val < 0)); + unsigned long uval = isneg ? -val : val; + // Not actually needed, unless you want a full string: buf[i--] = '0'; + do { + buf[--i] = (char)('0' + uval % 10); + uval /= 10; + } while (uval > 0); + if (isneg) { + buf[--i] = '-'; + } + } else { + int val = va_arg(list, int); + int isneg = ((tc == 'd') && (val < 0)); + unsigned int uval = isneg ? -val : val; + // Not actually needed, unless you want a full string: buf[i--] = '0'; + do { + buf[--i] = (char)('0' + uval % 10); + uval /= 10; + } while (uval > 0); + if (isneg) { + buf[--i] = '-'; + } + } + int nwr = fn(buf+i, 20-i, udata); + if (nwr != 20-i) { + return EOF; + } + written += nwr; + f++; + } break; + + case 'p': // Pointer is treated as %lx + longcount++; + case 'x': + case 'X': { // Hex is handled a separate case to %d because the loop is slightly slower and sign is never used + const char* digits = (tc == 'x') ? "0123456789abcdef" : "0123456789ABCDEF"; + char buf[16]; // Size is easier to predict for hex, two characters of digits per byte + int i = 16; + if (longcount) { + unsigned long uval = va_arg(list, unsigned long); + // Not actually needed, unless you want a full string: buf[i--] = '0'; + do { + buf[--i] = digits[uval % 16]; + uval /= 16; + } while (uval > 0); + } else { + unsigned int uval = va_arg(list, unsigned int); + // Not actually needed, unless you want a full string: buf[i--] = '0'; + do { + buf[--i] = digits[uval % 16]; + uval /= 16; + } while (uval > 0); + } + int nwr = fn(buf+i, 16-i, udata); + if (nwr != 16-i) { + return EOF; + } + written += nwr; + f++; + } break; + + default: { // For now just print %x??? when 'x' is unknown, but this is probably unsafe and an error should be reported properly instead (TODO) + // Also skip the argument (assume all arguments are word-sized for now) + /* void* _ignored = */ va_arg(list, void*); + if (fn(f, 2, udata) != 2) { + return EOF; + } + if (fn("???", 3, udata) != 3) { + return EOF; + } + written+=5; + f++; + } + } + } else { + dayswithoutincident++; + } + f++; + } + DUMPTOFN(); + return written; +} + +int _kprintf_backend(const char* str, int n, void* udata) { + for (int i = 0; i < n; i++) { + consputc(str[i]); + } + return n; +} + +int _LIBC_PRINTF_CALLCONV kprintf(const char* fmt, ...) { + //void* udata = (void*)((uintptr_t)1); // stdout + int shouldlock = kprintf_shouldlock; + if (shouldlock) { + acquire(&kprintf_spin); + } + va_list varargs; + va_start(varargs, fmt); + uintptr_t result = _libc_vfnprintf(&_kprintf_backend, NULL, fmt, varargs); + va_end(varargs); + if (shouldlock) { + release(&kprintf_spin); + } + return result; +} + +void kprintf_init() { + initlock(&kprintf_spin, "kprintf_spin"); + kprintf_shouldlock = 1; +} diff --git a/kprintf.h b/kprintf.h new file mode 100644 index 0000000..3060124 --- /dev/null +++ b/kprintf.h @@ -0,0 +1,40 @@ +// This is NEW CODE replacing the old printf with one based on my libc +#ifndef _KPRINTF_H +#define _KPRINTF_H + +#ifndef NULL +#define NULL ((void*)0ULL) +#endif + +#ifdef _ZCC +#include "cc_stdarg.h" +#else +#include +#endif + +#define EOF ((int)-1) +typedef unsigned long uintptr_t; + +#ifdef _ZCC +#define _LIBC_PRINTF_CALLCONV __classic_call +#else +#define _LIBC_PRINTF_CALLCONV +#endif + +extern volatile int kprintf_inpanic; + +#ifdef _ZCC +void kprintf_panic(const char* reason); +#else +void kprintf_panic(const char* reason) __attribute__((noreturn)); +#endif + +int _LIBC_PRINTF_CALLCONV kprintf(const char* fmt, ...); + +void kprintf_init(); + +#define printf kprintf +#define panic kprintf_panic + +// From ifndef at top of file: +#endif diff --git a/kqueue.c b/kqueue.c new file mode 100644 index 0000000..200456c --- /dev/null +++ b/kqueue.c @@ -0,0 +1 @@ +// This is NEW CODE eventually implementing "kqueue" event management diff --git a/kqueue.h b/kqueue.h new file mode 100644 index 0000000..f957421 --- /dev/null +++ b/kqueue.h @@ -0,0 +1,8 @@ +// This is NEW CODE by Zak with a new kqueue-style mechanism +#ifndef _KQUEUE_H +#define _KQUEUE_H + + + +// From ifndef at top of file: +#endif diff --git a/main.c b/main.c new file mode 100644 index 0000000..22de9c0 --- /dev/null +++ b/main.c @@ -0,0 +1,94 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" +#include "sched.h" +#include "vmrd.h" +#include "kprintf.h" + +void syscall_init(); +void fpu_init(); +void resources_init(); // autogenerated + +volatile static int started = 0; + +// start() jumps here in supervisor mode on all CPUs. +void +main() +{ + if(SCHED_CORE_THISNUMBER_NOINTERRUPTS() == 0){ + consoleinit(); + kprintf_init(); + kprintf("\nRealtime64 Kernel starting up...\n"); + #ifdef _ZCC + printf("[compiled with new C compiler]\n"); + #else + printf("[compiled with legacy C compiler]\n"); + #endif + printf("\n"); + + timeslice_max = 1000000; + timeslice_min = 1000; + + printf("physpg_initbegin()...\n"); + physpg_initbegin(); + printf("physpg_setrange(PHYSPG_FREERAM, %p, %p)...\n", end, (void*)PHYSTOP); + physpg_setrange(PHYSPG_FREERAM, end, (void*)PHYSTOP); + printf("physpg_initend()...\n"); + physpg_initend(); + printf("kvminit()...\n"); + kvminit(); // create kernel page table + printf("kvminithart()...\n"); + kvminithart(); // turn on paging + printf("resources_init()...\n"); + resources_init(); + printf("procinit()...\n"); + procinit(); // process table + printf("trapinit()...\n"); + trapinit(); // trap vectors + printf("trapinithart()...\n"); + trapinithart(); // install kernel trap vector + printf("plicinit()...\n"); + plicinit(); // set up interrupt controller + printf("plicinithart()...\n"); + plicinithart(); // ask PLIC for device interrupts + printf("fileinit()...\n"); + fileinit(); // file table + printf("vmrd_present()?\n"); + if (vmrd_present()) { + printf("YES, using vmrd_init()...\n"); + vmrd_init(); + } else { + printf("NO, using virtio_disk_init()...\n"); + virtio_disk_init(); // emulated hard disk + } + printf("syscall_init()...\n"); + syscall_init(); // syscall table must now be initialised at startup + printf("fpu_init()...\n"); + fpu_init(); + printf("userinit()...\n"); + userinit(); // first user process + printf("Kernel booted?\n"); + printf("%d MB total %d MB free\n", (int) (physpg_totalram()/MB), (int)(physpg_freeram()/MB)); + __sync_synchronize(); + started = 1; + } else { + //consputc('+'); + //while(1){} + while(started == 0) + ; + __sync_synchronize(); + printf("hart %d starting\n", SCHED_CORE_THISNUMBER_NOINTERRUPTS()); + //printf("kvminithart()...\n"); + kvminithart(); // turn on paging + //printf("trapinithart()...\n"); + trapinithart(); // install kernel trap vector + //printf("plicinithart()...\n"); + plicinithart(); // ask PLIC for device interrupts + } + + //printf("ENTERING SCHEDULER ON HART %d\n", SCHED_CORE_THISNUMBER_NOINTERRUPTS()); + scheduler(); +} diff --git a/memlayout.h b/memlayout.h new file mode 100644 index 0000000..db6ad13 --- /dev/null +++ b/memlayout.h @@ -0,0 +1,61 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +// Physical memory layout + +// qemu -machine virt is set up like this, +// based on qemu's hw/riscv/virt.c: +// +// 00001000 -- boot ROM, provided by qemu +// 02000000 -- CLINT +// 0C000000 -- PLIC +// 10000000 -- uart0 +// 10001000 -- virtio disk +// 80000000 -- boot ROM jumps here in machine mode +// -kernel loads the kernel here +// unused RAM after 80000000. + +// the kernel uses physical memory thus: +// 80000000 -- entry.S, then kernel text and data +// end -- start of kernel page allocation area +// PHYSTOP -- end RAM used by the kernel + +// qemu puts UART registers here in physical memory. +#define UART0 0x10000000UL +#define UART0_IRQ 10 + +// virtio mmio interface +#define VIRTIO0 0x10001000UL +#define VIRTIO0_IRQ 1 + +// qemu puts platform-level interrupt controller (PLIC) here. +#define PLIC 0x0c000000UL +#define PLIC_PRIORITY (PLIC + 0x0) +#define PLIC_PENDING (PLIC + 0x1000) +#define PLIC_SENABLE(hart) (PLIC + 0x2080 + (hart)*0x100) +#define PLIC_SPRIORITY(hart) (PLIC + 0x201000 + (hart)*0x2000) +#define PLIC_SCLAIM(hart) (PLIC + 0x201004 + (hart)*0x2000) + +// the kernel expects there to be RAM +// for use by the kernel and user pages +// from physical address 0x80000000 to PHYSTOP. +//#define KERNBASE 0x80000000UL +#define KERNBASE ((unsigned long)((void*)&_entry)) +#define PHYSTOP (KERNBASE + 128*1024*1024) + +// map the trampoline page to the highest address, +// in both user and kernel space. +#define TRAMPOLINE (MAXVA - PGSIZE) + +// map kernel stacks beneath the trampoline, +// each surrounded by invalid guard pages. +#define KSTACK(p) (TRAMPOLINE - ((p)+1)* 2*PGSIZE) + +// User memory layout. +// Address zero first: +// text +// original data and bss +// fixed-size stack +// expandable heap +// ... +// TRAPFRAME (p->trapframe, used by the trampoline) +// TRAMPOLINE (the same page as in the kernel) +#define TRAPFRAME (TRAMPOLINE - PGSIZE) diff --git a/param.h b/param.h new file mode 100644 index 0000000..640e50b --- /dev/null +++ b/param.h @@ -0,0 +1,18 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +#define NPRIO 8 // Priority 1 is HIGH PRIORITY, priority NPRIO is LOW PRIORITY +#define INITPRIO (NPRIO/2-1) // This assumes at least 4 or so priorities to be useful +#define NPROC 1024 // maximum number of processes +#define NCPU 64 // maximum number of CPUs +#define NOFILE 16 // open files per process +#define NFILE 100 // open files per system +#define NINODE 50 // maximum number of active i-nodes +#define NDEV 10 // maximum major device number +#define ROOTDEV 1 // device number of file system root disk +#define MAXARG 32 // max exec arguments +#define MAXOPBLOCKS 10 // max # of blocks any FS op writes +#define LOGSIZE (MAXOPBLOCKS*3) // max data blocks in on-disk log +#define NBUF (MAXOPBLOCKS*3) // size of disk block cache (this is now only a default setting internally) +//#define FSSIZE 2000 // size of file system in blocks +#define MAXPATH 128 // maximum file path name +#define USERSTACK 1 // user stack pages + diff --git a/physpg.c b/physpg.c new file mode 100644 index 0000000..c21e959 --- /dev/null +++ b/physpg.c @@ -0,0 +1,235 @@ +/* NEW CODE implementing a more extensible physical memory manager + * Copyright (C) 2024 Zak Fenton + * NO WARRANTY USE AT YOUR OWN RISK etc. under terms of UNLICENSE or MIT license + */ + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "sched.h" +#include "riscv.h" +#include "defs.h" +#include "kprintf.h" + +typedef struct page page_t; +struct page { + uint64 vals[512]; +}; + +typedef struct freepage freepage_t; +struct freepage { + uint64 nfreehere; // This is checked against the main free count + freepage_t* nextfree; +}; + +typedef struct range range_t; +struct range { + int mode; + int pad; + uint64 start; + uint64 end; +}; + +#define NRANGE 64 +range_t range[NRANGE]; + +sched_spinlock_t physpg_lock; +freepage_t* firstfreepage; +uint64 nfreepages; +uint64 ntotalpages; +// This is not a CPU-mappable page map just metadata +page_t* metadatamap; + +void physpg_initbegin() { + nfreepages = 0; + ntotalpages = 0; + + for (int i = 0; i < NRANGE; i++) { + range[i].mode = -1; + range[i].start = 0; + range[i].end = 0; + } + + initlock(&physpg_lock, "physpg"); +} + +uint64 physpg_freeram() { + return nfreepages * PGSIZE; +} + +uint64 physpg_totalram() { + return ntotalpages * PGSIZE; +} + +uint64* physpg_metavar(uint64 addr); + +void* physpg_alloc1(int mode) { + freepage_t* p; + acquire(&physpg_lock); + if (firstfreepage) { + p = firstfreepage; + if (mode != PHYSPG_METADATA) { + uint64* v = physpg_metavar((uint64) p); + int vt = (int) ((*v)>>32); + if (vt != PHYSPG_FREERAM) { + panic("physpg_alloc1: memory corruption, free-listed page is not marked free in metadata"); + } + *v = (((uint64)mode)<<32) | 1ULL; + } + if (p->nfreehere != nfreepages) { + printf("badptr=%p nfreehere=%d nfreepages=%d\n", p, (int) (p->nfreehere), (int) nfreepages); + panic("physpg_alloc1: memory corruption, mismatch of free counts"); + } + firstfreepage = p->nextfree; + nfreepages--; + } else { + p = 0; + } + release(&physpg_lock); + return p; +} + +void physpg_free1(int mode, void* physpg) { + //printf("freeing %p\n", physpg); + freepage_t* p = physpg; + acquire(&physpg_lock); + if (metadatamap && mode != PHYSPG_METADATA) { + uint64* v = physpg_metavar((uint64) p); + int vt = (int) ((*v)>>32); + if (vt != mode) { + panic("physpg_free1: memory corruption, mismatch of metadata modes"); + } + *v = (((uint64)PHYSPG_FREERAM)<<32); + } + p->nextfree = firstfreepage; + if (firstfreepage && firstfreepage->nfreehere != nfreepages) { + panic("physpg_free1: memory corruption, mismatch of free counts"); + } + p->nfreehere = ++nfreepages; + firstfreepage = p; + release(&physpg_lock); +} + +void physpg_setrange(int mode, void* start, void* end) { + start = (void*) PGROUNDUP((uint64) start); + end = (void*) PGROUNDDOWN((uint64) end); + if (end <= start) { + panic("physpg_setrange: invalid range"); + } + // Set idx to the index of a range struct with mode=-1 + int idx; + for (idx = 0; range[idx].mode >= 0; idx++) { + if (idx >= NRANGE) { + panic("physpg_setrange: too many ranges"); + } + } + range[idx].mode = mode; + range[idx].start = (uint64) start; + range[idx].end = (uint64) end; + char* x; + for (x = (char*)start; x < (char*) end; x += PGSIZE) { + if (mode == PHYSPG_FREERAM) { + physpg_free1(0, x); + } + ntotalpages++; + } +} + +uint64* physpg_maplookupvar(page_t* map, uint64 addr, int createmode) { + if (!map) { + return (void*) 0ULL; + } + int idx2 = (int) ((addr >> 30) & 0x1FF); + int idx1 = (int) ((addr >> 21) & 0x1FF); + int idx0 = (int) ((addr >> 12) & 0x1FF); + //printf("looking up addr %p in %p\n", (void*)addr, map); + if (!map->vals[idx2]) { + if (createmode > 0) { + printf("Creating level 2 table #%d\n", idx2); + map->vals[idx2] = (uint64) physpg_alloc1(createmode); + memset((void*) (map->vals[idx2]), 0, PGSIZE); + } else { + return (void*) 0ULL; + } + } + page_t* intermediatepage = (void*) (map->vals[idx2]); + if (!intermediatepage->vals[idx1]) { + if (createmode > 0) { + printf("Creating level 1 table #%d\n", idx1); + intermediatepage->vals[idx1] = (uint64) physpg_alloc1(createmode); + memset((void*) (intermediatepage->vals[idx1]), 0, PGSIZE); + } else { + return (void*) 0ULL; + } + } + page_t* lookuppage = (void*) (intermediatepage->vals[idx1]); + return lookuppage->vals + idx0; +} + +uint64* physpg_metavar(uint64 addr) { + uint64* varaddr = physpg_maplookupvar(metadatamap, addr, PHYSPG_METADATA); + if (!varaddr) { + panic("physpg_metadatavar: no variable for address"); + } + return varaddr; +} + +uint64 physpg_mapget(page_t* map, uint64 addr) { + uint64* var = physpg_maplookupvar(map, addr, -1); + if (var) { + return *var; + } else { + return 0; + } +} + +void physpg_mapset(page_t* map, uint64 addr, int createmode, uint64 val) { + uint64* var = physpg_maplookupvar(map, addr, createmode); + if (!var) { + panic("physpg_mapset got NULL variable"); + } + *var = val; +} + +// To finalise initialisation of the page manager, first go through the +// set of ranges building a page map of metadata variables then go through the +// pages used by the metadata map marking each page used to construct the map +// itself as metadata. +// After this, allocation of (non-metadata) pages can be streamlined to always +// track the allocations and any reference counts etc. in the map. This also +// allows to easily check that a kalloc'd/kfree'd page is actually in +// the expected state to begin with. +void physpg_initend() { + uint64* v; + metadatamap = physpg_alloc1(PHYSPG_METADATA); + if (!metadatamap) { + panic("physpg_initend: can't allocate metadata, initialisation failure"); + } + memset(metadatamap, 0, PGSIZE); + + for (int i = 0; i < NRANGE; i++) { + if (range[i].mode >= 0) { + for (uint64 pg = range[i].start; pg < range[i].end; pg += PGSIZE) { + v = physpg_metavar(pg); + *v = ((uint64)(range[i].mode))<<32; + } + } + } + + v = physpg_metavar((uint64)metadatamap); + *v = ((uint64)PHYSPG_METADATA)<<32; + for (int outer = 0; outer < 512; outer++) { + page_t* intermediatepg = (void*) (metadatamap->vals[outer]); + if (intermediatepg) { + v = physpg_metavar((uint64) intermediatepg); + *v = ((uint64)PHYSPG_METADATA)<<32; + for (int inner = 0; inner < 512; inner++) { + page_t* innerpg = (void*) (intermediatepg->vals[inner]); + if (innerpg) { + v = physpg_metavar((uint64) innerpg); + *v = ((uint64)PHYSPG_METADATA)<<32; + } + } + } + } +} diff --git a/pipe.c b/pipe.c new file mode 100644 index 0000000..e58da13 --- /dev/null +++ b/pipe.c @@ -0,0 +1,130 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "sched.h" +#include "proc.h" +#include "fs.h" +#include "file.h" + +#define PIPESIZE 512 + +struct pipe { + sched_spinlock_t lock; + char data[PIPESIZE]; + uint nread; // number of bytes read + uint nwrite; // number of bytes written + int readopen; // read fd is still open + int writeopen; // write fd is still open +}; + +int +pipealloc(struct file **f0, struct file **f1) +{ + struct pipe *pi; + + pi = 0; + *f0 = *f1 = 0; + if((*f0 = filealloc()) == 0 || (*f1 = filealloc()) == 0) + goto bad; + if((pi = (struct pipe*)kalloc()) == 0) + goto bad; + pi->readopen = 1; + pi->writeopen = 1; + pi->nwrite = 0; + pi->nread = 0; + initlock(&pi->lock, "pipe"); + (*f0)->type = FD_PIPE; + (*f0)->readable = 1; + (*f0)->writable = 0; + (*f0)->pipe = pi; + (*f1)->type = FD_PIPE; + (*f1)->readable = 0; + (*f1)->writable = 1; + (*f1)->pipe = pi; + return 0; + + bad: + if(pi) + kfree((char*)pi); + if(*f0) + fileclose(*f0); + if(*f1) + fileclose(*f1); + return -1; +} + +void +pipeclose(struct pipe *pi, int writable) +{ + acquire(&pi->lock); + if(writable){ + pi->writeopen = 0; + sched_wake(&pi->nread); + } else { + pi->readopen = 0; + sched_wake(&pi->nwrite); + } + if(pi->readopen == 0 && pi->writeopen == 0){ + release(&pi->lock); + kfree((char*)pi); + } else + release(&pi->lock); +} + +int +pipewrite(struct pipe *pi, uint64 addr, int n) +{ + int i = 0; + struct proc *pr = myproc(); + + acquire(&pi->lock); + while(i < n){ + if(pi->readopen == 0 || killed(pr)){ + release(&pi->lock); + return -1; + } + if(pi->nwrite == pi->nread + PIPESIZE){ //DOC: pipewrite-full + sched_wake(&pi->nread); + sleep(&pi->nwrite, &pi->lock); + } else { + char ch; + if(copyin(pr->pagetable, &ch, addr + i, 1) == -1) + break; + pi->data[pi->nwrite++ % PIPESIZE] = ch; + i++; + } + } + sched_wake(&pi->nread); + release(&pi->lock); + + return i; +} + +int +piperead(struct pipe *pi, uint64 addr, int n) +{ + int i; + struct proc *pr = myproc(); + char ch; + + acquire(&pi->lock); + while(pi->nread == pi->nwrite && pi->writeopen){ //DOC: pipe-empty + if(killed(pr)){ + release(&pi->lock); + return -1; + } + sleep(&pi->nread, &pi->lock); //DOC: piperead-sleep + } + for(i = 0; i < n; i++){ //DOC: piperead-copy + if(pi->nread == pi->nwrite) + break; + ch = pi->data[pi->nread++ % PIPESIZE]; + if(copyout(pr->pagetable, addr + i, &ch, 1) == -1) + break; + } + sched_wake(&pi->nwrite); //DOC: piperead-wakeup + release(&pi->lock); + return i; +} diff --git a/plic.c b/plic.c new file mode 100644 index 0000000..1b27834 --- /dev/null +++ b/plic.c @@ -0,0 +1,49 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" +#include "sched.h" + +// +// the riscv Platform Level Interrupt Controller (PLIC). +// + +void +plicinit(void) +{ + // set desired IRQ priorities non-zero (otherwise disabled). + *(uint32*)(PLIC + UART0_IRQ*4) = 1; + *(uint32*)(PLIC + VIRTIO0_IRQ*4) = 1; +} + +void +plicinithart(void) +{ + int hart = SCHED_CORE_THISNUMBER_NOINTERRUPTS(); + + // set enable bits for this hart's S-mode + // for the uart and virtio disk. + *(uint32*)PLIC_SENABLE(hart) = (1 << UART0_IRQ) | (1 << VIRTIO0_IRQ); + + // set this hart's S-mode priority threshold to 0. + *(uint32*)PLIC_SPRIORITY(hart) = 0; +} + +// ask the PLIC what interrupt we should serve. +int +plic_claim(void) +{ + int hart = SCHED_CORE_THISNUMBER_NOINTERRUPTS(); + int irq = *(uint32*)PLIC_SCLAIM(hart); + return irq; +} + +// tell the PLIC we've served this IRQ. +void +plic_complete(int irq) +{ + int hart = SCHED_CORE_THISNUMBER_NOINTERRUPTS(); + *(uint32*)PLIC_SCLAIM(hart) = irq; +} diff --git a/proc.c b/proc.c new file mode 100644 index 0000000..8220bb2 --- /dev/null +++ b/proc.c @@ -0,0 +1,754 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "sched.h" +#include "proc.h" +#include "defs.h" +#include "drives.h" +#include "bitarray.h" +#include "fpu.h" +#include "sched.h" +#include "kprintf.h" + +extern struct bitarray *runnablearrays[NPRIO]; +extern struct bitarray *exhaustedarrays[NPRIO]; +extern struct bitarray *sleeping; + +struct proc proc[NPROC]; + +struct proc *initproc; + +int nextpid = 1; +sched_spinlock_t pid_lock; + +extern void forkret(void); +static void freeproc(struct proc *p); + +extern char trampoline[]; // trampoline.S + +// helps ensure that wakeups of wait()ing +// parents are not lost. helps obey the +// memory model when using p->parent. +// must be acquired before any p->lock. +sched_spinlock_t wait_lock; +void printptr(void* x); +// Allocate a page for each process's kernel stack. +// Map it high in memory, followed by an invalid +// guard page. +void +proc_mapstacks(pagetable_t kpgtbl) +{ + struct proc *p; + + for(p = proc; p < &proc[NPROC]; p++) { + char *pa = kalloc(); + if(pa == 0) + panic("kalloc"); + uint64 va = KSTACK((int) (p - proc)); + kvmmap(kpgtbl, va, (uint64)pa, PGSIZE, PTE_R | PTE_W); + } +} + +// initialize the proc table. +void +procinit(void) +{ + struct proc *p; + int i; + + for (i = 0; i < NPRIO; i++) { + runnablearrays[i] = bitarrayalloc(NPROC); + exhaustedarrays[i] = bitarrayalloc(NPROC); + } + sleeping = bitarrayalloc(NPROC); + + initlock(&pid_lock, "nextpid"); + initlock(&wait_lock, "wait_lock"); + for(p = proc; p < &proc[NPROC]; p++) { + initlock(&p->lock, "proc"); + p->state = SCHED_STATE_UNUSED; + p->kstack = KSTACK((int) (p - proc)); + } +} + +// Return the current struct proc *, or zero if none. +struct proc* +myproc(void) +{ + push_off(); + sched_core_t *c = SCHED_CORE_THIS_NOINTERRUPTS(); + struct proc *p = c->process; + pop_off(); + return p; +} + +int +allocpid() +{ + int pid; + + acquire(&pid_lock); + pid = nextpid; + nextpid = nextpid + 1; + release(&pid_lock); + + return pid; +} + +// Look in the process table for an SCHED_STATE_UNUSED proc. +// If found, initialize state required to run in the kernel, +// and return with p->lock held. +// If there are no free procs, or a memory allocation fails, return 0. +static struct proc* +allocproc(int withpgtbl) +{ + struct proc *p; + + for(p = proc; p < &proc[NPROC]; p++) { + acquire(&p->lock); + if(p->state == SCHED_STATE_UNUSED) { + goto found; + } else { + release(&p->lock); + } + } + return 0; + +found: + p->pid = allocpid(); + p->state = SCHED_STATE_USED; + + // Allocate a trapframe page. + if((p->trapframe = (sched_frame_t *)kalloc()) == 0){ + freeproc(p); + release(&p->lock); + return 0; + } + + // An empty user page table. + if (withpgtbl) { + p->pagetable = proc_pagetable(p); + if(p->pagetable == 0){ + freeproc(p); + release(&p->lock); + return 0; + } + } + + // Set up new context to start executing at forkret, + // which returns to user space. + memset(&p->context, 0, sizeof(sched_context_t)); + p->context.ra = (uint64)(&forkret); + p->context.sp = p->kstack + PGSIZE; + + return p; +} + +// free a proc structure and the data hanging from it, +// including user pages. +// p->lock must be held. +static void +freeproc(struct proc *p) +{ + if(p->trapframe) + kfree((void*)(p->trapframe)); + p->trapframe = 0; + if(p->pagetable) + proc_freepagetable(p->pagetable, p->mainthread /*&& p->mainthread != p*/ ? 0 : p->sz, 0); + p->pagetable = 0; + p->sz = 0; + p->pid = 0; + p->parent = 0; + p->mainthread = 0; + p->drives = 0; + p->cwdrive = 0; + p->fpu_active = 0; + p->fpu_saved = 0; + p->name[0] = 0; + p->chan = 0; + p->killed = 0; + p->xstate = 0; + p->state = SCHED_STATE_UNUSED; + p->timeslice = 0; + //sched_restate_alreadylocked(p, SCHED_STATE_UNUSED); +} + +// Create a user page table for a given process, with no user memory, +// but with trampoline and trapframe pages. +pagetable_t +proc_pagetable(struct proc *p) +{ + pagetable_t pagetable; + + // An empty page table. + pagetable = uvmcreate(); + if(pagetable == 0) + return 0; + + // map the trampoline code (for system call return) + // at the highest user virtual address. + // only the supervisor uses it, on the way + // to/from user space, so not PTE_U. + if(mappages(pagetable, TRAMPOLINE, PGSIZE, + (uint64)trampoline, PTE_R | PTE_X) < 0){ + uvmfree(pagetable, 0, 0); + return 0; + } + + // map the trapframe page just below the trampoline page, for + // trampoline.S. + if(mappages(pagetable, TRAPFRAME, PGSIZE, + (uint64)(p->trapframe), PTE_R | PTE_W) < 0){ + uvmunmap(pagetable, TRAMPOLINE, 1, 0); + uvmfree(pagetable, 0, 0); + return 0; + } + + return pagetable; +} + +// Free a process's page table, and free the +// physical memory it refers to. +void +proc_freepagetable(pagetable_t pagetable, uint64 sz, int reallyfree) +{ + uvmunmap(pagetable, TRAMPOLINE, 1, 0); + uvmunmap(pagetable, TRAPFRAME, 1, 0); + uvmfree(pagetable, sz, reallyfree); +} + +// a user program that calls exec("/init") +// assembled from ../user/initcode.S +// od -t xC ../user/initcode +uchar initcode[] = { + /* version with execve: */ + 0x17, 0x05, 0x00, 0x00, 0x03, 0x35, 0x05, 0x05, 0x97, 0x05, 0x00, 0x00, 0x83, 0xb5, 0x05, 0x05, + 0x13, 0x86, 0x85, 0x00, 0x93, 0x08, 0x30, 0x02, 0x73, 0x00, 0x00, 0x00, 0x89, 0x48, 0x73, 0x00, + 0x00, 0x00, 0xef, 0xf0, 0xbf, 0xff, 0x2f, 0x69, 0x6e, 0x69, 0x74, 0x00, 0x00, 0x01, 0x00, 0x13, + 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0x26, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 + +}; + +int first; + +// Set up first user process. +void +userinit(void) +{ + struct proc *p; + + first = 1; + + p = allocproc(1); + initproc = p; + + // allocate one user page and copy initcode's instructions + // and data into it. + uvmfirst(p->pagetable, initcode, 0x160 /*sizeof(initcode)*/); + p->sz = PGSIZE; + + //printf("Initcode starts with 0x%x, 0x%x, 0x%x ...\n", initcode[0], initcode[1], initcode[2]); + //printf("... 0x%x, 0x%x, 0x%x ...\n", initcode[3], initcode[4], initcode[5]); + //printf("... 0x%x, 0x%x, 0x%x ...\n", initcode[6], initcode[7], initcode[8]); + + // prepare for the very first "return" from kernel to user. + p->trapframe->epc = 0; // user program counter + p->trapframe->sp = PGSIZE; // user stack pointer + + safestrcpy(p->name, "initcode", PROC_NAME_SIZE /*sizeof(p->name)*/); + printf("drives_alloc()...\n"); + p->drives = drives_alloc(); + printf("drives_alloc() returned %p\n", p->drives); + + printf("diskio_cache_alloc()...\n"); + diskio_cache_t* cache = diskio_cache_alloc(NBUF, DISKIO_BLOCK_SIZE); + printf("diskio_cache_alloc() returned %p\n", cache); + + printf("fsinstance_alloc()...\n"); + fsinstance_t* instance = fsinstance_alloc(); + printf("fsinstance_alloc() returned %p\n", instance); + instance->fslog_device = ROOTDEV; // Very important, this must be set before the call to fsinstance_lookup() or everything will break later + + instance->cache = cache; + + printf("drives_setup()...\n"); + int dn = drives_setup(p->drives, DRIVES_HANDLER_FS, instance, "BOOT"); + printf("drives_setup() returned %d\n", dn); + + p->cwdrive = drives_open(p->drives, "BOOT", 0ULL, 0ULL); + p->cwd = fsinstance_lookup(p->drives->entries[p->cwdrive].handlerdata, "/"); + + p->prio = INITPRIO; + p->maxprio = 1; + p->affinitymask = 0xFFFFFFFFFFFFFFFFULL; + + sched_restate_alreadylocked(p, SCHED_STATE_RUNNABLE); + + release(&p->lock); + //bitarray_set(runnablearrays[INITPRIO], (int) (p - proc), 1); +} + +void setupthread(struct proc* p, struct proc* main, uint64 func, uint64 stack, uint64 arg) { + p->mainthread = main; + p->trapframe->epc = func; + p->trapframe->sp = stack; + p->trapframe->a0 = arg; +} + +void cleanupthread(struct proc* p) { + p->mainthread = 0; + p->pagetable = 0; +} + +// Grow or shrink user memory by n bytes. +// Return 0 on success, -1 on failure. +int +growproc(int n) +{ + uint64 sz; + struct proc *p = myproc(); + + sz = p->sz; + if(n > 0){ + if((sz = uvmalloc(p->pagetable, sz, sz + n, PTE_W)) == 0) { + return -1; + } + } else if(n < 0){ + sz = uvmdealloc(p->pagetable, sz, sz + n); + } + p->sz = sz; + return 0; +} + +// Create a new process, copying the parent. +// Sets up child kernel stack to return as if from fork() system call. +int +fork(void) +{ + int i, pid; + struct proc *np; + struct proc *p = myproc(); + + // Allocate process. + if((np = allocproc(1)) == 0){ + return -1; + } + + // Copy user memory from parent to child. + if(uvmcopy(p->pagetable, np->pagetable, p->sz) < 0){ + freeproc(np); + release(&(np->lock)); + return -1; + } + np->sz = p->sz; + + // copy saved user registers. + memmove(np->trapframe, p->trapframe, sizeof(sched_frame_t)); + + // Cause fork to return 0 in the child. + np->trapframe->a0 = 0; + + np->prio = p->prio; + np->maxprio = p->maxprio; + np->affinitymask = p->affinitymask; + + // increment reference counts on open file descriptors. + for(i = 0; i < NOFILE; i++) + if(p->ofile[i]) + np->ofile[i] = filedup(p->ofile[i]); + np->cwdrive = drives_dup(p->drives, p->cwdrive); + np->drives = p->drives; // refcount should be handled by copying drive number + np->cwd = fsinstance_inode_copyref(p->cwd); + + safestrcpy(np->name, p->name, PROC_NAME_SIZE /*sizeof(p->name)*/); + + pid = np->pid; + + release(&(np->lock)); + + acquire(&wait_lock); + np->parent = p; + np->mainthread = 0ULL; // Never inherit threads, start in single-thread mode. + release(&wait_lock); + + acquire(&(np->lock)); + sched_restate_alreadylocked(np, SCHED_STATE_RUNNABLE); + release(&(np->lock)); + + return pid; +} + +// NOTE: This is partly new code but the rest was copied from fork() +int +thrd(uint64 fnc, uint64 stk, uint64 arg) +{ + struct proc* np = allocproc(1); + struct proc* p = myproc(); + if (np) { + struct proc* mainthread = p->mainthread; + if (mainthread == 0ULL) { + mainthread = p; + p->mainthread = p; + } + //np->pagetable = p->pagetable; + if(uvmcopyshallow(p->pagetable, np->pagetable, p->sz) < 0){ + freeproc(np); + release(&(np->lock)); + return -1; + } + np->sz = p->sz; // TODO... + // copy saved user registers. + //memmove(np->trapframe, p->trapframe, sizeof(trapframe_t)); + setupthread(np, mainthread, fnc, stk, arg); + + np->prio = p->prio; + np->maxprio = p->maxprio; + np->affinitymask = p->affinitymask; + + // increment reference counts on open file descriptors. + for(int i = 0; i < NOFILE; i++) + if(p->ofile[i]) + np->ofile[i] = filedup(p->ofile[i]); + np->cwdrive = drives_dup(p->drives, p->cwdrive); + np->drives = p->drives; // refcount should be handled by copying drive number + np->cwd = fsinstance_inode_copyref(p->cwd); + + safestrcpy(np->name, p->name, PROC_NAME_SIZE /*sizeof(p->name)*/); + + int pid = np->pid; + + release(&(np->lock)); + + acquire(&wait_lock); + np->parent = p; + //np->mainthread = 0ULL; + release(&wait_lock); + + acquire(&(np->lock)); + sched_restate_alreadylocked(np, SCHED_STATE_RUNNABLE); + release(&(np->lock)); + + return pid; + } + return -1; +} + +// Pass p's abandoned children to init. +// Caller must hold wait_lock. +void +reparent(struct proc *p) +{ + struct proc *pp; + + for(pp = proc; pp < &proc[NPROC]; pp++){ + if(pp->parent == p){ + pp->parent = initproc; + sched_wake(initproc); + } + } +} + +// Exit the current process. Does not return. +// An exited process remains in the zombie state +// until its parent calls wait(). +void +exit(int status) +{ + struct proc *p = myproc(); + + // Shutdown FPU + fpu_status_write(0); + + if(p == initproc) + panic("init exiting"); + + // If this is the main thread of a multithreaded program, kill all threads before continuing + /*if (p->mainthread == p) { + for (int i = 0; i < NPROC; i++) { + struct proc* thr = &proc[i]; + if (thr != p) { + acquire(&thr->lock); + int tpid = thr->pid; + int shouldkill = thr->mainthread == p; + release(&thr->lock); + if (shouldkill) kill(tpid); + } + } + } else if (p->mainthread) { + cleanupthread(p); + }*/ + + // Close all open files. + for(int fd = 0; fd < NOFILE; fd++){ + if(p->ofile[fd]){ + struct file *f = p->ofile[fd]; + fileclose(f); + p->ofile[fd] = 0; + } + } + + fsinstance_t* instance = drives_fsbegin(p->drives, p->cwdrive, ""); + fsinstance_inode_unget(p->cwd); + drives_fsend(p->drives, instance); + p->cwdrive = drives_close(p->drives, p->cwdrive); + p->cwd = 0; + p->drives = 0; + + acquire(&wait_lock); + + // Give any children to init. + reparent(p); + + // Parent might be sleeping in wait(). + sched_wake(p->parent); + + acquire(&p->lock); + + p->xstate = status; + p->state = SCHED_STATE_ZOMBIE; + // unnecessary as proc is SCHED_STATE_RUNNING: sched_restate_alreadylocked(p, SCHED_STATE_ZOMBIE); + + release(&wait_lock); + + // Jump into the scheduler, never to return. + sched(); + panic("zombie exit"); +} + +// Wait for a child process to exit and return its pid. +// Return -1 if this process has no children. +int +wait(uint64 addr) +{ + struct proc *pp; + int havekids, pid; + struct proc *p = myproc(); + + acquire(&wait_lock); + + for(;;){ + // Scan through table looking for exited children. + havekids = 0; + for(pp = proc; pp < &proc[NPROC]; pp++){ + if(pp->parent == p){ + // make sure the child isn't still in exit() or swtch() [now sched_switchcontext()] + acquire(&pp->lock); + + havekids = 1; + if(pp->state == SCHED_STATE_ZOMBIE){ + // Found one. + pid = pp->pid; + if(addr != 0 && copyout(p->pagetable, addr, (char *)&pp->xstate, + sizeof(int /*pp->xstate*/)) < 0) { + release(&pp->lock); + release(&wait_lock); + return -1; + } + freeproc(pp); + release(&pp->lock); + release(&wait_lock); + return pid; + } + release(&pp->lock); + } + } + + // No point waiting if we don't have any children. + if(!havekids || killed(p)){ + release(&wait_lock); + return -1; + } + + // Wait for a child to exit. + sleep(p, &wait_lock); //DOC: wait-sleep + } +} + +// Switch to scheduler. Must hold only p->lock +// and have changed proc->state. Saves and restores +// intena because intena is a property of this +// kernel thread, not this CPU. It should +// be proc->intena and proc->noff, but that would +// break in the few places where a lock is held but +// there's no process. +void +sched(void) +{ + int intena; + struct proc *p = myproc(); + + if(!holding(&p->lock)) + panic("sched p->lock"); + if(SCHED_CORE_THIS_NOINTERRUPTS()->interruptsoff_depth != 1) + panic("sched locks"); + if(p->state == SCHED_STATE_RUNNING) + panic("sched running"); + if(intr_get()) + panic("sched interruptible"); + + intena = SCHED_CORE_THIS_NOINTERRUPTS()->interruptsoff_wereinterruptson; + sched_switchcontext(&p->context, &SCHED_CORE_THIS_NOINTERRUPTS()->registers); + //swtch(&p->context, &mycpu()->context); + SCHED_CORE_THIS_NOINTERRUPTS()->interruptsoff_wereinterruptson = intena; +} + +// Give up the CPU for one scheduling round. +void +yield(void) +{ + struct proc *p = myproc(); + bitarray_set(runnablearrays[p->prio], (int) (p - proc), 1); + acquire(&p->lock); + sched_restate_alreadylocked(p, SCHED_STATE_RUNNABLE); + sched(); + release(&p->lock); +} + +// A fork child's very first scheduling by scheduler() +// will swtch to forkret. +void +forkret(void) +{ + //static int first = 1; + + // Still holding p->lock from scheduler. + release(&myproc()->lock); + + if (first) { + // File system initialization must be run in the context of a + // regular process (e.g., because it calls sleep), and thus cannot + // be run from main(). + + struct proc* p = myproc(); + + printf("fsinstance_init()...\n"); + void * fsp = fsinstance_init(p->drives->entries[p->cwdrive].handlerdata, ROOTDEV); + printf("fsinstance_init() returned %p\n", fsp); + + printf("diskio_mountallramdisks()...\n"); + diskio_mountallramdisks(p->drives); + printf("diskio_mountallramdisks() returned.\n"); + + // TODO: instance->superblock = fsp; + + first = 0; + // ensure other cores see first=0. + __sync_synchronize(); + } + + usertrapret(); +} + +// Atomically release lock and sleep on chan. +// Reacquires lock when awakened. +void +sleep(void *chan, sched_spinlock_t *lk) +{ + struct proc *p = myproc(); + + // Must acquire p->lock in order to + // change p->state and then call sched. + // Once we hold p->lock, we can be + // guaranteed that we won't miss any wakeup + // (wakeup locks p->lock), + // so it's okay to release lk. + + // Should be unnecessary as proc is SCHED_STATE_RUNNING: + // bitarray_set(runnables, (int) (p - proc), 0); + bitarray_set(sleeping, (int) (p - proc), 1); + + acquire(&p->lock); //DOC: sleeplock1 + release(lk); + + // Go to sleep. + p->chan = chan; + p->state = SCHED_STATE_SLEEPING; + //sched_restate_alreadylocked(p, SCHED_STATE_SLEEPING); + + sched(); + + // Tidy up. + p->chan = 0; + + // Reacquire original lock. + release(&p->lock); + acquire(lk); +} + +// Kill the process with the given pid. +// The victim won't exit until it tries to return +// to user space (see usertrap() in trap.c). +int +kill(int pid) +{ + struct proc *p; + + for(p = proc; p < &proc[NPROC]; p++){ + acquire(&p->lock); + if(p->pid == pid){ + p->killed = 1; + if(p->state == SCHED_STATE_SLEEPING){ + // Wake process from sleep(). + sched_restate_alreadylocked(p, SCHED_STATE_RUNNABLE); + } + release(&p->lock); + return 0; + } + release(&p->lock); + } + return -1; +} + +void +setkilled(struct proc *p) +{ + acquire(&p->lock); + p->killed = 1; + release(&p->lock); +} + +int +killed(struct proc *p) +{ + int k; + + acquire(&p->lock); + k = p->killed; + release(&p->lock); + return k; +} + +// Copy to either a user address, or kernel address, +// depending on usr_dst. +// Returns 0 on success, -1 on error. +int +either_copyout(int user_dst, uint64 dst, void *src, uint64 len) +{ + struct proc *p = myproc(); + if(user_dst){ + return copyout(p->pagetable, dst, src, len); + } else { + memmove((char *)dst, src, len); + return 0; + } +} + +// Copy from either a user address, or kernel address, +// depending on usr_src. +// Returns 0 on success, -1 on error. +int +either_copyin(void *dst, int user_src, uint64 src, uint64 len) +{ + struct proc *p = myproc(); + if(user_src){ + return copyin(p->pagetable, dst, src, len); + } else { + memmove(dst, (char*)src, len); + return 0; + } +} diff --git a/proc.h b/proc.h new file mode 100644 index 0000000..2e13a07 --- /dev/null +++ b/proc.h @@ -0,0 +1,47 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) + +#include "fpu.h" +#include "sched.h" +#include "fsinstance.h" + +#define PROC_NAME_SIZE 16 + +// Per-process state +struct proc { + sched_spinlock_t lock; + + // p->lock must be held when using these: + sched_state_t state; // Process state + void *chan; // If non-zero, sleeping on chan + int killed; // If non-zero, have been killed + int prio; // Priority level + int maxprio; // Maximum priority level (LOWEST number the program can set) + int xstate; // Exit status to be returned to parent's wait + int pid; // Process ID + int cwdrive; // Current drive index, paired to cwd + struct drives* drives; // Drives structure, could differ in the future + int timeslice; + int reserved; + + // wait_lock must be held when using this: + struct proc *parent; // Parent process + + struct proc *mainthread; // Pointer to the main-thread process (possibly this or parent) or 0 if single-threaded + + // these are private to the process, so p->lock need not be held. + uint64 kstack; // Virtual address of kernel stack + uint64 sz; // Size of process memory (bytes) + pagetable_t pagetable; // User page table + sched_frame_t *trapframe; // data page for trampoline.S + sched_context_t context; + //struct context context; // swtch() here to run process + fpu_context_t fpu_context; + struct file *ofile[NOFILE]; // Open files + fsinstance_inode_t *cwd; // Current directory + int fpu_active; + int fpu_saved; + char name[PROC_NAME_SIZE]; // Process name (debugging) + + // this in theory should maybe lock on affinity_lock, but is probably safe to test without locking (what is the worst case scenario?) + uint64 affinitymask; // Mask of whether this process should run on CPU #0 to #63 (this can be extended) +}; diff --git a/riscv.S b/riscv.S new file mode 100644 index 0000000..9f7ea6e --- /dev/null +++ b/riscv.S @@ -0,0 +1,168 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6, although it was converted from inline functions to plain assembly) +.option norvc +.text +.globl r_mhartid +r_mhartid: + csrr a0, mhartid + ret +.globl r_mstatus +r_mstatus: + csrr a0, mstatus + ret +.globl w_mstatus +w_mstatus: + csrw mstatus, a0 + ret +.globl w_mepc +w_mepc: + csrw mepc, a0 + ret +.globl r_sstatus +r_sstatus: + csrr a0, sstatus + ret +.globl w_sstatus +w_sstatus: + csrw sstatus, a0 + ret +.globl r_sip +r_sip: + csrr a0, sip + ret +.globl w_sip +w_sip: + csrw sip, a0 + ret +.globl r_sie +r_sie: + csrr a0, sie + ret +.globl w_sie +w_sie: + csrw sie, a0 + ret +.globl r_mie +r_mie: + csrr a0, mie + ret +.globl w_mie +w_mie: + csrw mie, a0 + ret +.globl w_sepc +w_sepc: + csrw sepc, a0 + ret +.globl r_sepc +r_sepc: + csrr a0, sepc + ret +.globl r_medeleg +r_medeleg: + csrr a0, medeleg + ret +.globl w_medeleg +w_medeleg: + csrw medeleg, a0 + ret +.globl r_mideleg +r_mideleg: + csrr a0, mideleg + ret +.globl w_mideleg +w_mideleg: + csrw mideleg, a0 + ret +.globl w_stvec +w_stvec: + csrw stvec, a0 + ret +.globl r_stvec +r_stvec: + csrr a0, stvec + ret +.globl r_stimecmp +r_stimecmp: + csrr a0, 0x14d + ret +.globl w_stimecmp +w_stimecmp: + csrw 0x14d, a0 + ret +.globl r_menvcfg +r_menvcfg: + csrr a0, 0x30a + ret +.globl w_menvcfg +w_menvcfg: + csrw 0x30a, a0 + ret +.globl w_pmpcfg0 +w_pmpcfg0: + csrw pmpcfg0, a0 + ret +.globl w_pmpaddr0 +w_pmpaddr0: + csrw pmpaddr0, a0 + ret +.globl w_satp +w_satp: + csrw satp, a0 + ret +.globl r_satp +r_satp: + csrr a0, satp + ret +.globl r_scause +r_scause: + csrr a0, scause + ret +.globl r_stval +r_stval: + csrr a0, stval + ret +.globl w_mcounteren +w_mcounteren: + csrw mcounteren, a0 + ret +.globl r_mcounteren +r_mcounteren: + csrr a0, mcounteren + ret +.globl r_time +r_time: + csrr a0, time + ret +.globl intr_on +intr_on: + csrr a5, sstatus + ori a5,a5,2 + csrw sstatus, a5 + ret +.globl intr_off +intr_off: + csrr a5, sstatus + andi a5,a5,-3 + csrw sstatus, a5 + ret +.globl intr_get +intr_get: + csrr a0, sstatus + bexti a0,a0,1 + ret +.globl r_sp +r_sp: + mv a0, sp + ret +.globl w_tp +w_tp: + mv tp, a0 + ret +.globl r_ra +r_ra: + mv a0, ra + ret +.globl sfence_vma +sfence_vma: + sfence.vma zero, zero + ret diff --git a/riscv.h b/riscv.h new file mode 100644 index 0000000..0612803 --- /dev/null +++ b/riscv.h @@ -0,0 +1,204 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +#ifndef __ASSEMBLER__ +/* This file was originally filled with inline assembly routines but + * they have been moved to functions in riscv.S to ease porting between + * compilers (the alternative would be to have #ifdefs for different + * inline asm syntaxes). + */ + +// which hart (core) is this? +uint64 r_mhartid(); + +// Machine Status Register, mstatus + +#define MSTATUS_MPP_MASK (3L << 11) // previous mode. +#define MSTATUS_MPP_M (3L << 11) +#define MSTATUS_MPP_S (1L << 11) +#define MSTATUS_MPP_U (0L << 11) +#define MSTATUS_MIE (1L << 3) // machine-mode interrupt enable. + +uint64 r_mstatus(); + +void w_mstatus(uint64 x); + +// machine exception program counter, holds the +// instruction address to which a return from +// exception will go. +void w_mepc(uint64 x); + +// Supervisor Status Register, sstatus + +#define SSTATUS_SPP (1L << 8) // Previous mode, 1=Supervisor, 0=User +#define SSTATUS_SPIE (1L << 5) // Supervisor Previous Interrupt Enable +#define SSTATUS_UPIE (1L << 4) // User Previous Interrupt Enable +#define SSTATUS_SIE (1L << 1) // Supervisor Interrupt Enable +#define SSTATUS_UIE (1L << 0) // User Interrupt Enable + +uint64 r_sstatus(); + +void w_sstatus(uint64 x); + +// Supervisor Interrupt Pending +uint64 r_sip(); + +void w_sip(uint64 x); + +// Supervisor Interrupt Enable +#define SIE_SEIE (1L << 9) // external +#define SIE_STIE (1L << 5) // timer +#define SIE_SSIE (1L << 1) // software +uint64 r_sie(); + +void w_sie(uint64 x); + +// Machine-mode Interrupt Enable +#define MIE_STIE (1L << 5) // supervisor timer +uint64 r_mie(); + +void w_mie(uint64 x); + +// supervisor exception program counter, holds the +// instruction address to which a return from +// exception will go. +void w_sepc(uint64 x); + +uint64 r_sepc(); + +// Machine Exception Delegation +uint64 r_medeleg(); + +void w_medeleg(uint64 x); + +// Machine Interrupt Delegation +uint64 r_mideleg(); + +void w_mideleg(uint64 x); + +// Supervisor Trap-Vector Base Address +// low two bits are mode. +void w_stvec(uint64 x); + +uint64 r_stvec(); + +// Supervisor Timer Comparison Register +uint64 r_stimecmp(); + +void w_stimecmp(uint64 x); + +// Machine Environment Configuration Register +uint64 r_menvcfg(); + +void w_menvcfg(uint64 x); + +// Physical Memory Protection +void w_pmpcfg0(uint64 x); + +void w_pmpaddr0(uint64 x); + +// use riscv's sv39 page table scheme. +#define SATP_SV39 (8L << 60) + +#define MAKE_SATP(pagetable) (SATP_SV39 | (((uint64)(pagetable)) >> 12)) + +// supervisor address translation and protection; +// holds the address of the page table. +void w_satp(uint64 x); + +uint64 r_satp(); + +// Supervisor Trap Cause +uint64 r_scause(); + +// Supervisor Trap Value +uint64 r_stval(); + +// Machine-mode Counter-Enable +void w_mcounteren(uint64 x); + +uint64 r_mcounteren(); + +// machine-mode cycle counter +uint64 r_time(); + +/* These should really be moved back into C but were included in the .s + * for consistency. + * +// enable device interrupts +void +intr_on() +{ + w_sstatus(r_sstatus() | SSTATUS_SIE); +} + +// disable device interrupts +void +intr_off() +{ + w_sstatus(r_sstatus() & ~SSTATUS_SIE); +} + +// are device interrupts enabled? +int +intr_get() +{ + uint64 x = r_sstatus(); + return (x & SSTATUS_SIE) != 0; +} +*/ + +// enable device interrupts +void intr_on(); + +// disable device interrupts +void intr_off(); + +// are device interrupts enabled? +int intr_get(); + +uint64 r_sp(); + +// read and write tp, the thread pointer, which xv6 uses to hold +// this core's hartid (core number), the index into cpus[]. +//uint64 r_tp(); + +void w_tp(uint64 x); + +uint64 r_ra(); + +// flush the TLB. +void sfence_vma(); + +typedef uint64 pte_t; +typedef uint64 *pagetable_t; // 512 PTEs + +#endif // __ASSEMBLER__ + +#define PGSIZE 4096 // bytes per page +#define PGSHIFT 12 // bits of offset within a page + +#define PGROUNDUP(sz) (((sz)+PGSIZE-1) & ~(PGSIZE-1)) +#define PGROUNDDOWN(a) (((a)) & ~(PGSIZE-1)) + +#define PTE_V (1L << 0) // valid +#define PTE_R (1L << 1) +#define PTE_W (1L << 2) +#define PTE_X (1L << 3) +#define PTE_U (1L << 4) // user can access + +// shift a physical address to the right place for a PTE. +#define PA2PTE(pa) ((((uint64)pa) >> 12) << 10) + +#define PTE2PA(pte) (((pte) >> 10) << 12) + +#define PTE_FLAGS(pte) ((pte) & 0x3FF) + +// extract the three 9-bit page table indices from a virtual address. +#define PXMASK 0x1FF // 9 bits +#define PXSHIFT(level) (PGSHIFT+(9*(level))) +#define PX(level, va) ((((uint64) (va)) >> PXSHIFT(level)) & PXMASK) + +// one beyond the highest possible virtual address. +// MAXVA is actually one bit less than the max allowed by +// Sv39, to avoid having to sign-extend virtual addresses +// that have the high bit set. +#define MAXVA (1L << (9 + 9 + 9 + 12 - 1)) diff --git a/sched.c b/sched.c new file mode 100644 index 0000000..24300bb --- /dev/null +++ b/sched.c @@ -0,0 +1,308 @@ +// This is NEW CODE written by Zak, separated from the old proc.c code + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "sched.h" +#include "defs.h" +#include "sched.h" +#include "proc.h" +#include "bitarray.h" +#include "fpu.h" +#include "kprintf.h" + +struct bitarray *runnablearrays[NPRIO]; +struct bitarray *exhaustedarrays[NPRIO]; +struct bitarray *sleeping; +sched_core_t sched_cores[SCHED_CORE_MAX]; +extern struct proc proc[NPROC]; +int timeslice_max; +int timeslice_min; + +int sched_timesliceforpriority(int priority) { + if (priority < 0) priority = 0; + if (priority >= NPRIO) priority = NPRIO - 1; + return timeslice_min + ((timeslice_max - timeslice_min) / (priority + 1)); +} + +/* Attempts to restate a process. This should be avoided in situations where + * you're already locking other processes, you may need to use more specific code in + * some cases! This function assumes that the caller has locked the process. + */ +void sched_restate_alreadylocked(struct proc* p, sched_state_t s) { + if (p->state == s) { + return; + } + if (p->state == SCHED_STATE_RUNNABLE) { + bitarray_set(runnablearrays[p->prio], (int) (p - proc), 0); + bitarray_set(exhaustedarrays[p->prio], (int) (p - proc), 0); + } else if (p->state == SCHED_STATE_SLEEPING) { + bitarray_set(sleeping, (int) (p - proc), 0); + } + if (s == SCHED_STATE_RUNNABLE) { + //if (p->timeslice > 0) { + bitarray_set(runnablearrays[p->prio], (int) (p - proc), 1); + //} else { + // bitarray_set(exhaustedarrays[p->prio], (int) (p - proc), 1); + //} + for (int i = 0; i < 64; i++) { + unsigned long foo = 1 << i; + if (p->affinitymask & foo) { + sched_cores[i].preempted = 1; + } + } + } else if (s == SCHED_STATE_SLEEPING) { + bitarray_set(sleeping, (int) (p - proc), 1); + } + p->state = s; +} + +// My simplified function to set processor affinity of the caller (TODO: Finish this...) +int affin(uint64 mask) { + myproc()->affinitymask = mask; + yield(); + return 0; +} + +/* Wake any processes sleeping on the given pointer, ideally preempting + * any lower priority processes that might be running. + * + * NOTE: This can't be called while locking any processes. + */ +void sched_wake(void* pointer) { + struct proc* me = myproc(); + for (int i = 0; i >= 0 && i < NPROC; i = bitarray_findlowest(sleeping, i+1)) { + struct proc* pr = &proc[i]; + if (pr == me) { + // Skip the calling process! + } else { + acquire(&pr->lock); + if (pr->chan == pointer) { + // If the pointer matches it will almost certainly be a candidate, + // but make sure it's actually sleeping first! + if (pr->state == SCHED_STATE_SLEEPING) { + // Then set the state to SCHED_STATE_RUNNABLE and preempt any lower priority + sched_restate_alreadylocked(pr, SCHED_STATE_RUNNABLE); + } + } + release(&pr->lock); + } + } +} + +/* Scans for the next SCHED_STATE_RUNNABLE-looking candidate, ideally WITHOUT locking each proc. */ +struct proc* sched_nextcandidate(int prio, struct proc* p) { + p++; + if (p < &proc[NPROC]) { + int idx = bitarray_findlowest(runnablearrays[prio], p->prio == prio ? (int) (p - proc) : 0); + if (idx >= 0) return &proc[idx]; + } + /*for(; p < &proc[NPROC]; p++) { + if (p->state == SCHED_STATE_RUNNABLE) { + return p; + } + }*/ + return 0ULL; +} + +// Called when the scheduler is idle to wait for interrupts +void sched_waitforinterrupts() { + intr_on(); // Ensure interrupts are actually on +#ifdef _ZCC + // Use straightforward inline assembler syntax + __asm { + wfi + }; +#else + asm volatile("wfi"); // GNU syntax +#endif +} + +int sched_reviveexhausted(int prioidx) { + int exhausted = 0; + int count = 0; + while ((exhausted = bitarray_poplowest(exhaustedarrays[prioidx], exhausted)) >= 0) { + //printf("Reviving process #%d (index #%d)\n", proc[exhausted].pid, exhausted); + proc[exhausted].timeslice = sched_timesliceforpriority(prioidx); + bitarray_set(runnablearrays[proc[exhausted].prio], exhausted, 1); + count++; + } + return count; +} + +// The scheduler is entered on each CPU core after initialisation, it loops +// selecting processes to switch to and (ideally) powering down the CPU when +// none are found. The new scheduler handles CPU affinity as well as priority, +// but is not yet an optimal algorithm. +void scheduler() { + sched_core_t* thiscpu = SCHED_CORE_THIS_NOINTERRUPTS(); + uint64 affinitymask = 1ULL << SCHED_CORE_THISNUMBER_NOINTERRUPTS(); + thiscpu->allowstarvation = 1; //(SCHED_CORE_THISNUMBER_NOINTERRUPTS() > 0 ? 1 : 0); // Default policy, run low-priority processes only on the main core + thiscpu->process = (void*)0ULL; + + while (1) { + int found_any = 0; + int found_here = 0; + int prioidx; + intr_on(); // Enable interrupts in case they are disabled + for (prioidx = 0; prioidx < NPRIO; prioidx++) { + do { + found_here = 0; + struct proc* thisproc; + //printf("PRIO%d\n", prioidx); + for (thisproc = proc; thisproc != 0ULL; thisproc = sched_nextcandidate(prioidx, thisproc)) { + acquire(&thisproc->lock); + + if ((thisproc->affinitymask & affinitymask) && thisproc->state == SCHED_STATE_RUNNABLE) { + bitarray_set(runnablearrays[prioidx] /* check this is == runnablearrays[thisproc->prio] ? */, (int) (thisproc - proc), 0); + bitarray_set(runnablearrays[thisproc->prio], (int) (thisproc - proc), 0); + //enterprocess: + thisproc->state = SCHED_STATE_RUNNING; // Set the processes state to running, then unset it in the runnable arrays: + thiscpu->process = thisproc; // Set the CPU's process to this process + sched_switchcontext(&thiscpu->registers, &thisproc->context); // Perform the actual context switch + thisproc->timeslice -= 10000; + //printf("Timeslice=%d\n", thisproc->timeslice); + + if (thisproc->state == SCHED_STATE_RUNNING || thisproc->state == SCHED_STATE_RUNNABLE) { + if (thisproc->timeslice > 0) { + /* + thisproc->state = SCHED_STATE_RUNNABLE; + bitarray_set(runnablearrays[thisproc->prio], (int) (thisproc - proc), 1); + //goto enterprocess; + found_any = found_here = 1; + */ + } else { + //thisproc->state = SCHED_STATE_RUNNABLE; + //bitarray_set(runnablearrays[thisproc->prio], (int) (thisproc - proc), 0); + //bitarray_set(exhaustedarrays[thisproc->prio], (int) (thisproc - proc), 1); + } + } + + // Check if the FPU status is dirty + int fpustat = fpu_status_read(); + if (fpustat != 0) { + printf("FPU Status %d, saving state\n", fpustat); + fpu_save(&thiscpu->process->fpu_context); + thiscpu->process->fpu_active = 0; + thiscpu->process->fpu_saved = 1; + fpu_status_write(0); + printf("FPU Status %d after saving state\n", fpu_status_read()); + } + thiscpu->process = (void*)0ULL; // Process is finished, immediately exit it's context + } + + release(&thisproc->lock); + + if (found_here) { + int check = prioidx; + for (prioidx = 0; prioidx < check-1 && (bitarray_findlowest(runnablearrays[prioidx], 0) < 0 || (thiscpu->allowstarvation && (bitarray_findlowest(exhaustedarrays[prioidx], 0) < 0))); prioidx++) { + found_here = 0; // Reset the outer loop if more important processes may be runnable + //break; // End the inner loop + } + } + } + } while (found_here); + if (thiscpu->allowstarvation) { + // sched_reviveexhausted(prioidx); + } + } + if (!thiscpu->allowstarvation) { + for (int i = 0; i < NPRIO; i++) { + if (sched_reviveexhausted(i) > 0) { + found_any = 1; + } + } + } + if (!found_any) { + //printf("Not found any processes\n"); + // This function will enable interrupts and put the CPU in wait mode + sched_waitforinterrupts(); + } + } +} + +const char* sched_statename(sched_state_t s, int padded) { + switch (s) { + case SCHED_STATE_UNUSED: + return padded ? "SCHED_STATE_UNUSED " : "SCHED_STATE_UNUSED"; + case SCHED_STATE_USED: + return padded ? "SCHED_STATE_USED " : "SCHED_STATE_USED"; + case SCHED_STATE_SLEEPING: + return padded ? "SCHED_STATE_SLEEPING" : "SCHED_STATE_SLEEPING"; + case SCHED_STATE_RUNNABLE: + return padded ? "SCHED_STATE_RUNNABLE" : "SCHED_STATE_RUNNABLE"; + case SCHED_STATE_RUNNING: + return padded ? "SCHED_STATE_RUNNING " : "SCHED_STATE_RUNNING"; + case SCHED_STATE_ZOMBIE: + return padded ? "SCHED_STATE_ZOMBIE " : "SCHED_STATE_ZOMBIE"; + default: + return padded ? "BADSTATE" : "BADSTATE"; + } +} + +// Called when CTRL+p is pressed +void sched_dumpstatus() { + int i; // Iterator variable is reused + + printf("\n"); // Print a newline to improve alignment + + for (i = 0; i < NPROC; i++) { + if (proc[i].state != SCHED_STATE_UNUSED) { + printf("proc[%d] pid=%d state=%s name=\"%s\" prio=%d maxprio=%d\n", i, proc[i].pid, sched_statename(proc[i].state, 1), proc[i].name, proc[i].prio, proc[i].maxprio); + } + } + + printf("Sleeping: "); + for (i = 0; i < NPROC; i++) { + if (bitarray_getnolock(sleeping, i)) { + printf("%d ", i); + } + } + printf("\n"); + int prio; + for (prio = 0; prio < NPRIO; prio++) { + printf("Runnables priority #%d (main set): ", prio); + for (i = 0; i < NPROC; i++) { + if (bitarray_getnolock(runnablearrays[prio], i)) { + printf("%d ", i); + } + } + printf("\n"); + printf("Runnables priority #%d (exhausted): ", prio); + for (i = 0; i < NPROC; i++) { + if (bitarray_getnolock(exhaustedarrays[prio], i)) { + printf("%d ", i); + } + } + printf("\n"); + } + printf("RAM: %d MB total %d MB free\n", (int) (physpg_totalram()/MB), (int)(physpg_freeram()/MB)); +} + +/* + * hypothetical thread-sync code I never finished +int +thrsync() { + struct proc* p = myproc(); + struct proc* mainthread = p->mainthread; + if (!mainthread) { + return 0; + } + int unsynced; + do { + unsynced = 0; + for (int i = 0; i < NPROC; i++) { + struct proc* thr = &proc[i]; + if (thr != p) { + acquire(&thr->lock); + if (thr->mainthread == mainthread) { + // TODO ... + } + release(&thr->lock); + } + } + } while (unsynced); + return unsynced; +} +*/ diff --git a/sched.h b/sched.h new file mode 100644 index 0000000..0841fb7 --- /dev/null +++ b/sched.h @@ -0,0 +1,171 @@ +// This is NEW CODE a header to interact with the scheduler +#ifndef _SCHED_H +#define _SCHED_H + +// The size of one register on the processor. +typedef unsigned long long sched_reg_t; + +// The context saved between scheduler states must match EXACTLY the one in +// sched_switchcontext (see schedasm.S). This only needs to save the kernel's +// C-like execution state. +typedef struct sched_context sched_context_t; + +struct sched_context { + // WARNING: REMEMBER TO CHANGE schedasm.S if this ordering is modified! + sched_reg_t s0; + sched_reg_t s1; + sched_reg_t s2; + sched_reg_t s3; + sched_reg_t s4; + sched_reg_t s5; + sched_reg_t s6; + sched_reg_t s7; + sched_reg_t s8; + sched_reg_t s9; + sched_reg_t s10; + sched_reg_t s11; + sched_reg_t ra; + sched_reg_t sp; +}; + +// Trap frame used by the trampoline code (currently being migrated to +// new code) +// Data for each process is stored in the page below the user's +// trampoline code, but will be at an arbitrary address in kernel-mode +// (TODO: Better mapping for multithreading?). +// This is currently saved/restored by the old xv6 code so will be +// rearranged a bit as that is migrated (for now it should mostly match +// the old layout but with new types). +// See notes in trampoline.S +typedef struct sched_frame sched_frame_t; + +struct sched_frame { + // Offset 0: + sched_reg_t kmode_satp; // Kernel-mode page table + sched_reg_t kmode_sp; // Kernel-mode stack pointer + sched_reg_t kmode_trap; // Address of usertrap() function or similar + sched_reg_t epc; // User-mode program counter + // Offset 32: + sched_reg_t kmode_hartid; // Kernel-mode thread pointer register + sched_reg_t ra; + sched_reg_t sp; + sched_reg_t gp; + // Offset 64: + sched_reg_t tp; + sched_reg_t t0; + sched_reg_t t1; + sched_reg_t t2; + // Offset 96 + sched_reg_t s0; + sched_reg_t s1; + sched_reg_t a0; + sched_reg_t a1; + // Offset 128 + sched_reg_t a2; + sched_reg_t a3; + sched_reg_t a4; + sched_reg_t a5; + // Offset 160 + sched_reg_t a6; + sched_reg_t a7; + sched_reg_t s2; + sched_reg_t s3; + // Offset 192 + sched_reg_t s4; + sched_reg_t s5; + sched_reg_t s6; + sched_reg_t s7; + // Offset 224: + sched_reg_t s8; + sched_reg_t s9; + sched_reg_t s10; + sched_reg_t s11; + // Offset 256: + sched_reg_t t3; + sched_reg_t t4; + sched_reg_t t5; + sched_reg_t t6; +}; + + +// Switches context by storing registers into oldstruct and loading different +// values from newstruct, then returns oldstruct as a value to the new context. +sched_context_t* sched_switchcontext(sched_context_t* oldstruct, sched_context_t* newstruct); + +// Each CPU core is referred to as "core" in this system, since there may be a +// need to represent more complex topologies with multiple clusters of multiple +// cores etc. where each layer would otherwise all be confusable for "CPUs". +// A CPU core is defined as physical or virtual hardware implementing 1 main +// thread of execution. +typedef struct sched_core sched_core_t; + +// Spinlocks are still mostly implemented by old code but are defined here +// due to being tightly integrated into the scheduler. +typedef struct sched_spinlock sched_spinlock_t; + +struct sched_spinlock { + int lockvar; + int padding; + char* debugstring; // Name for debugging + sched_core_t* core; // The CPU core that's using this lock +}; + +typedef struct sched_sleeplock sched_sleeplock_t; + +struct sched_sleeplock { + unsigned int islocked; + int padding; + sched_spinlock_t spin; // The spinlock used internally + int pid; // The process id of the waiting process + char* debugstring; // Name for debugging (TODO: This is kind of redundant as spinlock has a debugstring already. +}; + +struct sched_core { + struct proc* process; + sched_context_t registers; + int preempted; + int allowstarvation; + int interruptsoff_depth; + int interruptsoff_wereinterruptson; +}; + +#define SCHED_CORE_MAX 64 + +extern sched_core_t sched_cores[SCHED_CORE_MAX]; + +// These are implemented in schedasm.S +sched_reg_t sched_cputhreadpointer_get(); +void sched_cputhreadpointer_set(sched_reg_t value); + +// Returns the core number, can't be called with interrupts enabled in case process is rescheduled +#define SCHED_CORE_THISNUMBER_NOINTERRUPTS() \ + ((int)sched_cputhreadpointer_get()) + +// Returns this core's sched_core_t*, can't be called with interrupts enabled in case process is rescheduled +#define SCHED_CORE_THIS_NOINTERRUPTS() \ + (&sched_cores[SCHED_CORE_THISNUMBER_NOINTERRUPTS()]) + +typedef int sched_state_t; + +#define SCHED_STATE_UNUSED ((sched_state_t)0) +#define SCHED_STATE_USED ((sched_state_t)1) +#define SCHED_STATE_SLEEPING ((sched_state_t)2) +#define SCHED_STATE_RUNNABLE ((sched_state_t)3) +#define SCHED_STATE_RUNNING ((sched_state_t)4) +#define SCHED_STATE_ZOMBIE ((sched_state_t)5) + +void sched_restate_alreadylocked(struct proc* p, sched_state_t s); + +typedef struct sched_task sched_task_t; +typedef struct sched_thread sched_thread_t; + +extern int timeslice_min; +extern int timeslice_max; + +void sched_wake(void* pointer); + +void sched_dumpstatus(); // Dump scheduler information to console. + +// From ifndef at top of file: +#endif + diff --git a/schedasm.S b/schedasm.S new file mode 100644 index 0000000..440a666 --- /dev/null +++ b/schedasm.S @@ -0,0 +1,48 @@ +// This is NEW CODE similar to xv6's swtch.S but written for the new sched.h structures +// IMPORTANT NOTE: This code must match the structure in sched.h precisely! + +.text +.option norvc +.globl sched_switchcontext +sched_switchcontext: + sd s0, 0 (a0) + sd s1, 8 (a0) + sd s2, 16 (a0) + sd s3, 24 (a0) + sd s4, 32 (a0) + sd s5, 40 (a0) + sd s6, 48 (a0) + sd s7, 56 (a0) + sd s8, 64 (a0) + sd s9, 72 (a0) + sd s10, 80 (a0) + sd s11, 88 (a0) + sd ra, 96 (a0) + sd sp, 104 (a0) + + ld sp, 104 (a1) + ld ra, 96 (a1) + ld s11, 88 (a1) + ld s10, 80 (a1) + ld s9, 72 (a1) + ld s8, 64 (a1) + ld s7, 56 (a1) + ld s6, 48 (a1) + ld s5, 40 (a1) + ld s4, 32 (a1) + ld s3, 24 (a1) + ld s2, 16 (a1) + ld s1, 8 (a1) + ld s0, 0 (a1) + + ret + +.globl sched_cputhreadpointer_set +sched_cputhreadpointer_set: + add tp, a0, zero + ret + +.globl sched_cputhreadpointer_get +sched_cputhreadpointer_get: + add a0, tp, zero + ret diff --git a/sleeplock.c b/sleeplock.c new file mode 100644 index 0000000..6d8823d --- /dev/null +++ b/sleeplock.c @@ -0,0 +1,56 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +// Sleeping locks + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "sched.h" +#include "proc.h" +#include "sched.h" + +void +initsleeplock(sched_sleeplock_t *lk, char *name) +{ + initlock(&lk->spin, "sleep lock"); + lk->debugstring = name; + lk->islocked = 0; + lk->pid = 0; +} + +void +acquiresleep(sched_sleeplock_t *lk) +{ + acquire(&lk->spin); + while (lk->islocked) { + sleep(lk, &lk->spin); + } + lk->islocked = 1; + lk->pid = myproc()->pid; + release(&lk->spin); +} + +void +releasesleep(sched_sleeplock_t *lk) +{ + acquire(&lk->spin); + lk->islocked = 0; + lk->pid = 0; + sched_wake(lk); + release(&lk->spin); +} + +int +holdingsleep(sched_sleeplock_t *lk) +{ + int r; + + acquire(&lk->spin); + r = lk->islocked && (lk->pid == myproc()->pid); + release(&lk->spin); + return r; +} + + + diff --git a/spinlock.c b/spinlock.c new file mode 100644 index 0000000..babf2c0 --- /dev/null +++ b/spinlock.c @@ -0,0 +1,113 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +// Mutual exclusion spin locks. + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "sched.h" +#include "riscv.h" +#include "proc.h" +#include "defs.h" +#include "sched.h" +#include "kprintf.h" + +void +initlock(sched_spinlock_t *lk, char *name) +{ + lk->debugstring = name; + lk->lockvar = 0; + lk->core = 0; +} + +// Acquire the lock. +// Loops (spins) until the lock is acquired. +void +acquire(sched_spinlock_t *lk) +{ + push_off(); // disable interrupts to avoid deadlock. + if(holding(lk)) + panic("acquire"); + + // On RISC-V, sync_lock_test_and_set turns into an atomic swap: + // a5 = 1 + // s1 = &lk->lockvar + // amoswap.w.aq a5, a5, (s1) + while(__sync_lock_test_and_set(&(lk->lockvar), 1) != 0) + ; + + // Tell the C compiler and the processor to not move loads or stores + // past this point, to ensure that the critical section's memory + // references happen strictly after the lock is acquired. + // On RISC-V, this emits a fence instruction. + __sync_synchronize(); + + // Record info about lock acquisition for holding() and debugging. + lk->core = SCHED_CORE_THIS_NOINTERRUPTS(); +} + +// Release the lock. +void +release(sched_spinlock_t *lk) +{ + if(!holding(lk)) + panic("release"); + + lk->core = 0; + + // Tell the C compiler and the CPU to not move loads or stores + // past this point, to ensure that all the stores in the critical + // section are visible to other CPUs before the lock is released, + // and that loads in the critical section occur strictly before + // the lock is released. + // On RISC-V, this emits a fence instruction. + __sync_synchronize(); + + // Release the lock, equivalent to lk->lockvar = 0. + // This code doesn't use a C assignment, since the C standard + // implies that an assignment might be implemented with + // multiple store instructions. + // On RISC-V, sync_lock_release turns into an atomic swap: + // s1 = &lk->lockvar + // amoswap.w zero, zero, (s1) + __sync_lock_release(&lk->lockvar); + + pop_off(); +} + +// Check whether this cpu is holding the lock. +// Interrupts must be off. +int +holding(sched_spinlock_t *lk) +{ + int r; + r = (lk->lockvar && lk->core == SCHED_CORE_THIS_NOINTERRUPTS()); + return r; +} + +// push_off/pop_off are like intr_off()/intr_on() except that they are matched: +// it takes two pop_off()s to undo two push_off()s. Also, if interrupts +// are initially off, then push_off, pop_off leaves them off. + +void +push_off(void) +{ + int old = intr_get(); + + intr_off(); + if(SCHED_CORE_THIS_NOINTERRUPTS()->interruptsoff_depth == 0) + SCHED_CORE_THIS_NOINTERRUPTS()->interruptsoff_wereinterruptson = old; + SCHED_CORE_THIS_NOINTERRUPTS()->interruptsoff_depth += 1; +} + +void +pop_off(void) +{ + sched_core_t* c = SCHED_CORE_THIS_NOINTERRUPTS(); + if(intr_get()) + panic("pop_off - interruptible"); + if(c->interruptsoff_depth < 1) + panic("pop_off"); + c->interruptsoff_depth -= 1; + if(c->interruptsoff_depth == 0 && c->interruptsoff_wereinterruptson) + intr_on(); +} diff --git a/start.c b/start.c new file mode 100644 index 0000000..0ae4143 --- /dev/null +++ b/start.c @@ -0,0 +1,76 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "defs.h" +#include "sched.h" + +void main(); +void timerinit(); + +// entry.S needs one stack per CPU. +/*TODO __attribute__ ((aligned (16))) char stack0[4096 * NCPU]; */ +uint64 stack0[512*SCHED_CORE_MAX]; + +// entry.S jumps here in machine mode on stack0. +void +start() +{ + //consputc(65); + // set M Previous Privilege mode to Supervisor, for mret. + unsigned long x = r_mstatus(); + x &= ~MSTATUS_MPP_MASK; + x |= MSTATUS_MPP_S; + w_mstatus(x); + + // set M Exception Program Counter to main, for mret. + // requires gcc -mcmodel=medany + w_mepc((uint64)(&main)); + + // disable paging for now. + w_satp(0); + + // delegate all interrupts and exceptions to supervisor mode. + w_medeleg(0xffff); + w_mideleg(0xffff); + w_sie(r_sie() | SIE_SEIE | SIE_STIE | SIE_SSIE); + + // configure Physical Memory Protection to give supervisor mode + // access to all of physical memory. + w_pmpaddr0(0x3fffffffffffffull); + w_pmpcfg0(0xf); + + // ask for clock interrupts. + timerinit(); + + // keep each CPU's hartid in its tp register, for SCHED_CORE_THISNUMBER_NOINTERRUPTS(). + int _id = r_mhartid(); + sched_cputhreadpointer_set(_id); + + // switch to supervisor mode and jump to main(). + #ifdef _ZCC + __asm { + mret + }; + #else + asm volatile("mret"); + #endif +} + +// ask each hart to generate timer interrupts. +void +timerinit() +{ + // enable supervisor-mode timer interrupts. + w_mie(r_mie() | MIE_STIE); + + // enable the sstc extension (i.e. stimecmp). + w_menvcfg(r_menvcfg() | (1L << 63)); + + // allow supervisor to use stimecmp and time. + w_mcounteren(r_mcounteren() | 2); + + // ask for the very first timer interrupt. + w_stimecmp(r_time() + 1000000); +} diff --git a/stat.h b/stat.h new file mode 100644 index 0000000..5a40d24 --- /dev/null +++ b/stat.h @@ -0,0 +1,13 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +#define T_DIR 1 // Directory +#define T_FILE 2 // File +#define T_DEVICE 3 // Device + +struct stat { + int dev; // File system's disk device + uint ino; // Inode number + short type; // Type of file + short nlink; // Number of links to file + int _pad; // Only for compiler cross-compatibility, as alignment bugs are currently a problem + uint64 size; // Size of file in bytes +}; diff --git a/string.c b/string.c new file mode 100644 index 0000000..aa5424a --- /dev/null +++ b/string.c @@ -0,0 +1,82 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +#include "types.h" + +#define KERNEL_MODE +#include + +int +memcmp(const void *v1, const void *v2, uint n) +{ + const uchar *s1, *s2; + + s1 = v1; + s2 = v2; + while(n-- > 0){ + if(*s1 != *s2) + return *s1 - *s2; + s1++, s2++; + } + + return 0; +} + +void* +memmove(void *dst, const void *src, uint n) +{ + const char *s; + char *d; + + if(n == 0) + return dst; + + s = src; + d = dst; + if(s < d && s + n > d){ + s += n; + d += n; + while(n-- > 0) + *--d = *--s; + } else { + #ifdef _ZCC + //if (n >= 8 && (d + n < s || s + n < d)) { + // return memcpy_opt(dst, src, n); + /*uint smalln = n %8; + uint bign = n - smalln; + memcpy_quick64(dst, src, bign); + d += bign; + s += bign; + n = smalln;*/ + //} + #endif + while(n-- > 0) + *d++ = *s++; + } + + return dst; +} + +int +strncmp(const char *p, const char *q, uint n) +{ + while(n > 0 && *p && *p == *q) + n--, p++, q++; + if(n == 0) + return 0; + return (uchar)*p - (uchar)*q; +} + +// Like strncpy but guaranteed to NUL-terminate. +char* +safestrcpy(char *s, const char *t, int n) +{ + char *os; + + os = s; + if(n <= 0) + return os; + while(--n > 0 && (*s++ = *t++) != 0) + ; + *s = 0; + return os; +} + diff --git a/sync.c b/sync.c new file mode 100644 index 0000000..85921a1 --- /dev/null +++ b/sync.c @@ -0,0 +1,69 @@ +// This is NEW CODE to implement the sync operations supported by other C compilers + +#include "sync.h" + +#ifdef _ZCC + +void __naked __sync_synchronize() __asm { + //fence + fence rw,rw + ret +} +unsigned int __naked __sync_lock_test_and_set(unsigned int* p, unsigned int v) __asm { + amoswap.w.aq a1, a1, (a0) + mv a0, a1 + ret + // a5 = 1 + // s1 = &lk->locked + // amoswap.w.aq a5, a5, (s1) + //uint old = *p; + // *p = v; + //return old; +} +void __naked __sync_lock_release(unsigned int* p) __asm { + amoswap.w zero, zero, (a0) + ret + // *p = 0; +} +/* +void __naked __sync_synchronize() __asm { + addi sp,sp,-16 + sd s0,8(sp) + addi s0,sp,16 + fence rw,rw + nop + ld s0,8(sp) + addi sp,sp,16 + jr ra +} +uint __naked __sync_lock_test_and_set(uint*p, uint v) __asm { + addi sp,sp,-32 + sd s0,24(sp) + addi s0,sp,32 + sd a0,-24(s0) + mv a5,a1 + sw a5,-28(s0) + ld a5,-24(s0) + lw a4,-28(s0) + amoswap.w.aq a4,a4,0(a5) + sext.w a5,a4 + mv a0,a5 + ld s0,24(sp) + addi sp,sp,32 + jr ra +} +void __naked __sync_lock_release(uint* p) __asm { + addi sp,sp,-32 + sd s0,24(sp) + addi s0,sp,32 + sd a0,-24(s0) + ld a5,-24(s0) + fence rw,w + sw zero,0(a5) + nop + ld s0,24(sp) + addi sp,sp,32 + jr ra +} +*/ +#endif diff --git a/sync.h b/sync.h new file mode 100644 index 0000000..1c40c9b --- /dev/null +++ b/sync.h @@ -0,0 +1,12 @@ +// This is NEW CODE to implement the sync operations supported by other C compilers +#ifndef _SYNC_H +#define _SYNC_H + +#ifdef _ZCC +void __sync_synchronize(); +unsigned int __sync_lock_test_and_set(unsigned int* p, unsigned int v); +void __sync_lock_release(unsigned int* p); +#endif + +// From ifndef at top of file: +#endif diff --git a/syscall.c b/syscall.c new file mode 100644 index 0000000..9a6e9c5 --- /dev/null +++ b/syscall.c @@ -0,0 +1,82 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "sched.h" +#include "proc.h" +#include "syscall.h" +#include "defs.h" +#include "kprintf.h" + +// Fetch the uint64 at addr from the current process. +int +fetchaddr(uint64 addr, uint64 *ip) +{ + struct proc *p = myproc(); + if(addr >= p->sz || addr+sizeof(uint64) > p->sz) // both tests needed, in case of overflow + return -1; + if(copyin(p->pagetable, (char *)ip, addr, sizeof(*ip)) != 0) + return -1; + return 0; +} + +// Fetch the nul-terminated string at addr from the current process. +// Returns length of string, not including nul, or -1 for error. +int +fetchstr(uint64 addr, char *buf, int max) +{ + struct proc *p = myproc(); + if(copyinstr(p->pagetable, buf, addr, max) < 0) + return -1; + return strlen(buf); +} + +static uint64 +argraw(int n) +{ + struct proc *p = myproc(); + switch (n) { + case 0: + return p->trapframe->a0; + case 1: + return p->trapframe->a1; + case 2: + return p->trapframe->a2; + case 3: + return p->trapframe->a3; + case 4: + return p->trapframe->a4; + case 5: + return p->trapframe->a5; + } + panic("argraw"); + return -1; +} + +// Fetch the nth 32-bit system call argument. +void +argint(int n, int *ip) +{ + *ip = argraw(n); +} + +// Retrieve an argument as a pointer. +// Doesn't check for legality, since +// copyin/copyout will do that. +void +argaddr(int n, uint64 *ip) +{ + *ip = argraw(n); +} + +// Fetch the nth word-sized system call argument as a null-terminated string. +// Copies into buf, at most max. +// Returns string length if OK (including nul), -1 if error. +int +argstr(int n, char *buf, int max) +{ + uint64 addr; + argaddr(n, &addr); + return fetchstr(addr, buf, max); +} diff --git a/syscall.h b/syscall.h new file mode 100644 index 0000000..f4dc206 --- /dev/null +++ b/syscall.h @@ -0,0 +1,31 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +// System call numbers +#define SYS_fork 1 +#define SYS_exit 2 +#define SYS_wait 3 +#define SYS_pipe 4 +#define SYS_read 5 +#define SYS_kill 6 +#define SYS_fstat 8 +#define SYS_chdir 9 +#define SYS_dup 10 +#define SYS_getpid 11 +#define SYS_sbrk 12 +#define SYS_sleep 13 +#define SYS_uptime 14 +#define SYS_open 15 +#define SYS_write 16 +#define SYS_mknod 17 +#define SYS_unlink 18 +#define SYS_link 19 +#define SYS_mkdir 20 +#define SYS_close 21 +// New system calls start at 32 +#define SYS_prio 32 +#define SYS_affin 33 +#define SYS_thrd 34 +#define SYS_execve 35 +#define SYS_drvinf 36 +#define SYS_lsdir 37 +#define SYS_kqueue1 38 +#define SYS_kevent 39 diff --git a/syscdefs.h b/syscdefs.h new file mode 100644 index 0000000..e11e328 --- /dev/null +++ b/syscdefs.h @@ -0,0 +1,22 @@ +// This is NEW CODE defining structures & constants needed by system calls. +// This code should be used by both the kernel and the user-mode programs or +// libraries attempting to use any complex system calls. +#ifndef _SYSCDEFS_H_ +#define _SYSCDEFS_H_ + +// This needs to be the same limit as in drives.h, but probably doesn't need to +// change much: +#define __SYSCDEFS_DRIVES_NAMEMAX 24 + +struct __syscdefs_driveinfo { + int drivenumber; + unsigned int blocksize; + unsigned long long freedatablocks; + unsigned long long totalblocks; + char name[__SYSCDEFS_DRIVES_NAMEMAX]; + char fsname[__SYSCDEFS_DRIVES_NAMEMAX]; +}; + +// From ifndef at top of file: +#endif + diff --git a/sysfile.c b/sysfile.c new file mode 100644 index 0000000..1a1f3b1 --- /dev/null +++ b/sysfile.c @@ -0,0 +1,567 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +// +// File-system system calls. +// Mostly argument checking, since we don't trust +// user code, and calls into file.c and fs.c. +// + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "stat.h" +#include "sched.h" +#include "proc.h" +#include "fs.h" +#include "mkfs/fsformat.h" +#include "sched.h" +#include "file.h" +#include "fcntl.h" +#include "drives.h" +#include "kprintf.h" + +// Fetch the nth word-sized system call argument as a file descriptor +// and return both the descriptor and the corresponding struct file. +int +argfd(int n, int *pfd, struct file **pf) +{ + int fd; + struct file *f; + + argint(n, &fd); + if(fd < 0 || fd >= NOFILE || (f=myproc()->ofile[fd]) == 0) + return -1; + if(pfd) + *pfd = fd; + if(pf) + *pf = f; + return 0; +} + +// Allocate a file descriptor for the given file. +// Takes over file reference from caller on success. +static int +fdalloc(struct file *f) +{ + int fd; + struct proc *p = myproc(); + + for(fd = 0; fd < NOFILE; fd++){ + if(p->ofile[fd] == 0){ + p->ofile[fd] = f; + return fd; + } + } + return -1; +} + +uint64 +sys_dup(void) +{ + struct file *f; + int fd; + + if(argfd(0, 0, &f) < 0) + return -1; + if((fd=fdalloc(f)) < 0) + return -1; + filedup(f); + return fd; +} + +uint64 +sys_read(void) +{ + struct file *f; + int n; + uint64 p; + + argaddr(1, &p); + argint(2, &n); + if(argfd(0, 0, &f) < 0) + return -1; + return fileread(f, p, n); +} + +uint64 +sys_write(void) +{ + struct file *f; + int n; + uint64 p; + + argaddr(1, &p); + argint(2, &n); + if(argfd(0, 0, &f) < 0) + return -1; + + return filewrite(f, p, n); +} + +uint64 +sys_close(void) +{ + int fd; + struct file *f; + + if(argfd(0, &fd, &f) < 0) + return -1; + myproc()->ofile[fd] = 0; + fileclose(f); + return 0; +} + +uint64 +sys_fstat(void) +{ + struct file *f; + uint64 st; // user pointer to struct stat + + argaddr(1, &st); + if(argfd(0, 0, &f) < 0) + return -1; + return filestat(f, st); +} + +// Create the path new as a link to the same inode as old. +uint64 +sys_link(void) +{ + char name[FSFORMAT_NAMESIZE_NEW], new[MAXPATH], old[MAXPATH]; + fsinstance_inode_t *dp, *ip; + + if(argstr(0, old, MAXPATH) < 0 || argstr(1, new, MAXPATH) < 0) + return -1; + + fsinstance_t* instance = drives_fsbegin(myproc()->drives, myproc()->cwdrive, old); + if((ip = drives_fsnode(myproc()->drives, myproc()->cwdrive, old, 0)) == NULL) { // /*fsinstance_lookup(old)*/) == 0){ + drives_fsend(myproc()->drives, instance); + return -1; + } + + fsinstance_inode_lockandload(ip); + if(ip->type == T_DIR){ + fsinstance_inode_unlockandunget(ip); + drives_fsend(myproc()->drives, instance); + return -1; + } + + ip->nlink++; + fsinstance_inode_save(ip); + fsinstance_inode_unlock(ip); + + if((dp = drives_fsparent(myproc()->drives, myproc()->cwdrive, new, 0, name, FSFORMAT_NAMESIZE_NEW) /*fsinstance_lookupparent(new, name)*/) == NULL) + goto bad; + fsinstance_inode_lockandload(dp); + if(dp->instance != ip->instance || dp->device != ip->device || fsinstance_inode_insert(dp, name, ip->inodenumber) < 0){ + fsinstance_inode_unlockandunget(dp); + goto bad; + } + fsinstance_inode_unlockandunget(dp); + fsinstance_inode_unget(ip); + + drives_fsend(myproc()->drives, instance); + + return 0; + +bad: + fsinstance_inode_lockandload(ip); + ip->nlink--; + fsinstance_inode_save(ip); + fsinstance_inode_unlockandunget(ip); + drives_fsend(myproc()->drives, instance); + return -1; +} + +// Is the directory dp empty except for "." and ".." ? +static int +isdirempty(fsinstance_inode_t *dp) +{ + if (dp->instance->fsversion == 0) { + int off; + fsformat_dirent_v0_t de; + + for(off=2*sizeof(de); offsize; off+=sizeof(de)){ + if(fsinstance_inode_read(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) + panic("isdirempty: readi"); + if(de.inodenumber != 0) + return 0; + } + return 1; + } else { + int off; + fsformat_dirent_v1_t de; + + for(off=2*sizeof(de); offsize; off+=sizeof(de)){ + if(fsinstance_inode_read(dp, 0, (uint64)&de, off, sizeof(de)) != sizeof(de)) + panic("isdirempty: readi"); + if(de.datainode != 0) + return 0; + } + return 1; + } +} + +uint64 +sys_unlink(void) +{ + fsinstance_inode_t *ip, *dp; + fsformat_dirent_v0_t de_v0; + fsformat_dirent_v1_t de_v1; + char name[FSFORMAT_NAMESIZE_NEW], path[MAXPATH]; + uint off; + + if(argstr(0, path, MAXPATH) < 0) + return -1; + + struct proc* p = myproc(); + fsinstance_t* instance = drives_fsbegin(p->drives, p->cwdrive, path); + if((dp = drives_fsparent(myproc()->drives, myproc()->cwdrive, path, 0, name, FSFORMAT_NAMESIZE_NEW) /*fsinstance_lookupparent(path, name)*/) == 0){ + drives_fsend(myproc()->drives, instance); + return -1; + } + + fsinstance_inode_lockandload(dp); + + // Cannot unlink "." or "..". + if(fsinstance_nameseq(instance, name, ".") || fsinstance_nameseq(instance, name, "..")) + goto bad; + + if((ip = fsinstance_inode_lookup(dp, name, &off)) == 0) + goto bad; + fsinstance_inode_lockandload(ip); + + if(ip->nlink < 1) + panic("unlink: nlink < 1"); + if(ip->type == T_DIR && !isdirempty(ip)){ + fsinstance_inode_unlockandunget(ip); + goto bad; + } + + if (instance->fsversion == 0) { + memset(&de_v0, 0, sizeof(de_v0)); + if(fsinstance_inode_write(dp, 0, (uint64)&de_v0, off, sizeof(de_v0)) != sizeof(de_v0)) + panic("unlink: writei"); + } else { + memset(&de_v1, 0, sizeof(de_v1)); + if(fsinstance_inode_write(dp, 0, (uint64)&de_v1, off, sizeof(de_v1)) != sizeof(de_v1)) + panic("unlink: writei"); + } + if(ip->type == T_DIR){ + dp->nlink--; + fsinstance_inode_save(dp); + } + fsinstance_inode_unlockandunget(dp); + + ip->nlink--; + fsinstance_inode_save(ip); + fsinstance_inode_unlockandunget(ip); + + drives_fsend(p->drives, instance); + + return 0; + +bad: + fsinstance_inode_unlockandunget(dp); + drives_fsend(myproc()->drives, instance); + return -1; +} + +static fsinstance_inode_t* +create(char *path, short type, short major, short minor) +{ + fsinstance_inode_t *ip, *dp; + char name[FSFORMAT_NAMESIZE_NEW]; + + if((dp = drives_fsparent(myproc()->drives, myproc()->cwdrive, path, 0, name, FSFORMAT_NAMESIZE_NEW) /*fsinstance_lookupparent(path, name)*/) == 0) + return 0; + + fsinstance_inode_lockandload(dp); + + if((ip = fsinstance_inode_lookup(dp, name, 0)) != 0){ + fsinstance_inode_unlockandunget(dp); + fsinstance_inode_lockandload(ip); + if(type == T_FILE && (ip->type == T_FILE || ip->type == T_DEVICE)) + return ip; + fsinstance_inode_unlockandunget(ip); + return 0; + } + + if((ip = fsinstance_allocinode(dp->instance, dp->device, type)) == 0){ + fsinstance_inode_unlockandunget(dp); + return 0; + } + + fsinstance_inode_lockandload(ip); + ip->major = major; + ip->minor = minor; + ip->nlink = 1; + fsinstance_inode_save(ip); + + if(type == T_DIR){ // Create . and .. entries. + // No ip->nlink++ for ".": avoid cyclic ref count. + if(fsinstance_inode_insert(ip, ".", ip->inodenumber) < 0 || fsinstance_inode_insert(ip, "..", dp->inodenumber) < 0) + goto fail; + } + + if(fsinstance_inode_insert(dp, name, ip->inodenumber) < 0) + goto fail; + + if(type == T_DIR){ + // now that success is guaranteed: + dp->nlink++; // for ".." + fsinstance_inode_save(dp); + } + + fsinstance_inode_unlockandunget(dp); + + return ip; + + fail: + // something went wrong. de-allocate ip. + ip->nlink = 0; + fsinstance_inode_save(ip); + fsinstance_inode_unlockandunget(ip); + fsinstance_inode_unlockandunget(dp); + return 0; +} + +uint64 +sys_open(void) +{ + char path[MAXPATH]; + int fd, omode; + struct file *f; + fsinstance_inode_t *ip; + int n; + + argint(1, &omode); + if((n = argstr(0, path, MAXPATH)) < 0) + return -1; + + fsinstance_t* instance = drives_fsbegin(myproc()->drives, myproc()->cwdrive, path); + + if(omode & O_CREATE){ + ip = create(path, T_FILE, 0, 0); + if(ip == 0){ + drives_fsend(myproc()->drives, instance); + return -1; + } + } else { + if((ip = drives_fsnode(myproc()->drives, myproc()->cwdrive, path, 0) /*fsinstance_lookup(path)*/) == 0){ + drives_fsend(myproc()->drives, instance); + return -1; + } + fsinstance_inode_lockandload(ip); + if(ip->type == T_DIR && omode != O_RDONLY){ + fsinstance_inode_unlockandunget(ip); + drives_fsend(myproc()->drives, instance); + return -1; + } + } + + if(ip->type == T_DEVICE && (ip->major < 0 || ip->major >= NDEV)){ + fsinstance_inode_unlockandunget(ip); + drives_fsend(myproc()->drives, instance); + return -1; + } + + if((f = filealloc()) == 0 || (fd = fdalloc(f)) < 0){ + if(f) + fileclose(f); + fsinstance_inode_unlockandunget(ip); + drives_fsend(myproc()->drives, instance); + return -1; + } + + if(ip->type == T_DEVICE){ + f->type = FD_DEVICE; + f->major = ip->major; + } else { + f->type = FD_INODE; + f->off = 0; + } + f->ip = ip; + f->readable = !(omode & O_WRONLY); + f->writable = (omode & O_APPEND) ? 2 : (((omode & O_WRONLY) || (omode & O_RDWR)) ? 1 : 0); + + if((omode & O_TRUNC) && ip->type == T_FILE){ + fsinstance_inode_deletecontents(ip); + } + + fsinstance_inode_unlock(ip); + drives_fsend(myproc()->drives, instance); + + return fd; +} + +uint64 +sys_mkdir(void) +{ + char path[MAXPATH]; + fsinstance_inode_t *ip; + + if (argstr(0, path, MAXPATH) < 0) { + return -1; + } + + fsinstance_t* instance = drives_fsbegin(myproc()->drives, myproc()->cwdrive, path); + if ((ip = create(path, T_DIR, 0, 0)) == 0){ + drives_fsend(myproc()->drives, instance); + return -1; + } + fsinstance_inode_unlockandunget(ip); + drives_fsend(myproc()->drives, instance); + return 0; +} + +uint64 +sys_mknod(void) +{ + fsinstance_inode_t *ip; + char path[MAXPATH]; + int major, minor; + + fsinstance_t* instance = drives_fsbegin(myproc()->drives, myproc()->cwdrive, path); + argint(1, &major); + argint(2, &minor); + if((argstr(0, path, MAXPATH)) < 0 || + (ip = create(path, T_DEVICE, major, minor)) == 0){ + drives_fsend(myproc()->drives, instance); + return -1; + } + fsinstance_inode_unlockandunget(ip); + drives_fsend(myproc()->drives, instance); + return 0; +} + +uint64 +sys_chdir(void) +{ + char path[MAXPATH]; + fsinstance_inode_t *ip; + struct proc *p = myproc(); + + fsinstance_t* instance = drives_fsbegin(myproc()->drives, myproc()->cwdrive, path); + if(argstr(0, path, MAXPATH) < 0 || (ip = drives_fsnode(myproc()->drives, myproc()->cwdrive, path, 0) /*fsinstance_lookup(path)*/) == 0){ + drives_fsend(myproc()->drives, instance); + return -1; + } + fsinstance_inode_lockandload(ip); + if(ip->type != T_DIR){ + fsinstance_inode_unlockandunget(ip); + drives_fsend(myproc()->drives, instance); + return -1; + } + fsinstance_inode_unlock(ip); + fsinstance_inode_unget(p->cwd); + drives_fsend(myproc()->drives, instance); + p->cwd = ip; + return 0; +} + +uint64 +sys_execve(void) +{ + char path[MAXPATH], *argv[MAXARG]; + int i; + uint64 uargv, uarg; + char *envv[MAXARG]; + uint64 uenvv, uenv; + + //printf("Doing execve syscall...\n"); + //vmrd_settracing(1); + + argaddr(1, &uargv); + if(argstr(0, path, MAXPATH) < 0) { + printf("BAD PATH\n"); + return -1; + } + argaddr(2, &uenvv); + memset(argv, 0, sizeof(void*)*MAXARG /*sizeof(argv)*/); + memset(envv, 0, sizeof(void*)*MAXARG /*sizeof(argv)*/); + //printf("Path is '%s' uargv is %p uenvv is %p\n", path, uargv, uenv); + + for(i=0;; i++){ + //printf("i=%d\n", i); + if(i >= MAXARG /*NELEM(argv)*/){ + goto bad; + } + if(fetchaddr(uargv+sizeof(uint64)*i, (uint64*)&uarg) < 0){ + goto bad; + } + //printf("uarg=%p (from address %p or %p+%p)\n", uarg, uargv+sizeof(uint64)*i, uargv, sizeof(uint64)*i); + if(uarg == 0){ + argv[i] = 0; + break; + } + argv[i] = kalloc(); + if(argv[i] == 0) + goto bad; + if(fetchstr(uarg, argv[i], PGSIZE) < 0) + goto bad; + } + + for(i=0;; i++){ + if(i >= MAXARG /*NELEM(argv)*/){ + goto bad; + } + if(fetchaddr(uenvv+sizeof(uint64)*i, (uint64*)&uenv) < 0){ + goto bad; + } + if(uenv == 0){ + envv[i] = 0; + break; + } + envv[i] = kalloc(); + if(envv[i] == 0) + goto bad; + if(fetchstr(uenv, envv[i], PGSIZE) < 0) + goto bad; + } + + int ret = execve(path, argv, envv); + + for(i = 0; i < MAXARG /* NELEM(argv)*/ && argv[i] != 0; i++) + kfree(argv[i]); + + return ret; + + bad: + printf("Bad execve call\n"); + for(i = 0; i < MAXARG /*NELEM(argv)*/ && argv[i] != 0; i++) + kfree(argv[i]); + for(i = 0; i < MAXARG /*NELEM(argv)*/ && envv[i] != 0; i++) + kfree(envv[i]); + return -1; +} + +uint64 +sys_pipe(void) +{ + uint64 fdarray; // user pointer to array of two integers + struct file *rf, *wf; + int fd0, fd1; + struct proc *p = myproc(); + + argaddr(0, &fdarray); + if(pipealloc(&rf, &wf) < 0) + return -1; + fd0 = -1; + if((fd0 = fdalloc(rf)) < 0 || (fd1 = fdalloc(wf)) < 0){ + if(fd0 >= 0) + p->ofile[fd0] = 0; + fileclose(rf); + fileclose(wf); + return -1; + } + if(copyout(p->pagetable, fdarray, (char*)&fd0, sizeof(fd0)) < 0 || + copyout(p->pagetable, fdarray+sizeof(fd0), (char *)&fd1, sizeof(fd1)) < 0){ + p->ofile[fd0] = 0; + p->ofile[fd1] = 0; + fileclose(rf); + fileclose(wf); + return -1; + } + return 0; +} diff --git a/sysnew.c b/sysnew.c new file mode 100644 index 0000000..5612369 --- /dev/null +++ b/sysnew.c @@ -0,0 +1,253 @@ +// This is NEW CODE implementing system calls (however the headers etc. still need to be refactored) + +#include "syscall.h" +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "sched.h" +#include "proc.h" +#include "drives.h" +#include "file.h" +#include "stat.h" +#include "kprintf.h" + +//TODO: This is still using the "argfd" defined in sysfile.c for now. +int +argfd(int n, int *pfd, struct file **pf); +void* memcpy(void*, void*, long); + +// new syscall added by Zak, sets the priority+limit of a process +// currently only works on the current process's pid! +uint64 +sys_prio(void) +{ + int tpid; + int newprio; + int newlimit; + argint(0, &tpid); + argint(1, &newprio); + argint(2, &newlimit); + + if (tpid != myproc()->pid) { + return 0; + } + if (newprio >= NPRIO) { + newprio = NPRIO-1; + } + if (newlimit >= NPRIO) { + newlimit = NPRIO-1; + } + //struct proc* p = myproc(); + + if (newlimit < myproc()->maxprio) { + newlimit = myproc()->maxprio; + } + if (newprio < newlimit) { + newprio = newlimit; + } + + myproc()->prio = newprio; + myproc()->maxprio = newlimit; + + yield(); + + return 1; +} + +uint64 +sys_thrd(void) +{ + int frompid; + uint64 fnc; + uint64 stk; + uint64 arg; + argint(0, &frompid); + argaddr(1, &fnc); + argaddr(2, &stk); + argaddr(3, &arg); + if (stk & 7) { + return -1; + } + if (frompid != myproc()->pid && !(myproc()->mainthread && frompid == myproc()->mainthread->pid)) { + return -1; + } + return thrd(fnc, stk, arg); +} + +uint64 +sys_affin(void) +{ + int targetpid; + int range; + uint64 mask; + argint(0, &targetpid); + argint(1, &range); + argaddr(2, &mask); + if (targetpid != myproc()->pid || range != 0) { + return -1; + } + return affin(mask); +} + + +uint64 sys_drvinf() { + int driveid; + uint64 structaddr; + + argint(0, &driveid); + argaddr(1, &structaddr); + + // The actual information is first retrieved into a structure inside + // of kernel memory. + struct __syscdefs_driveinfo drvinf; + int result = drives_getinfo(myproc()->drives, driveid, &drvinf); + + // The structure is copied whether the function succeeded or not, it + // will zero the structure on failure. + copyout(myproc()->pagetable, structaddr, (void*) &drvinf, sizeof(struct __syscdefs_driveinfo)); + + return result; +} + + +int readdirent(struct file* f, uint64 addr, int n) { + if (n != sizeof(fsformat_dirent_v1_t)) { + return -1; + } + if (f->type != FD_INODE || f->ip->type != T_DIR) { + return -1; + } + + if (f->ip->instance->fsversion == 0) { + fsformat_dirent_v0_t dirent_old; + fsformat_dirent_v1_t dirent_new; + if (fsinstance_inode_read(f->ip, 0, (uint64) ((void*) &dirent_old), f->off, sizeof(fsformat_dirent_v0_t)) != sizeof(fsformat_dirent_v0_t)) { + return -1; + } else { + f->off += sizeof(fsformat_dirent_v0_t); + } + dirent_new.datainode = dirent_old.inodenumber; + dirent_new.metainode = -1; + memset(dirent_new.filename, 0, FSFORMAT_NAMESIZE_NEW); + memcpy(dirent_new.filename, dirent_old.filename, FSFORMAT_NAMESIZE_OLD); + copyout(myproc()->pagetable, addr, (void*) &dirent_new, sizeof(fsformat_dirent_v1_t)); + return (int) sizeof(fsformat_dirent_v1_t); + } else { + if (fsinstance_inode_read(f->ip, 1, addr, f->off, n) != n) { + return -1; + } + f->off += sizeof(fsformat_dirent_v1_t); + return (int) sizeof(fsformat_dirent_v1_t); + } +} + +uint64 sys_lsdir() { + struct file* f; + uint64 addr; + int size; + + if (argfd(0, 0, &f) < 0) { + return -1; + } + argaddr(1, &addr); + argint(2, &size); + + return readdirent(f, addr, size); +} + +uint64 sys_kqueue1() { + int flags; + argint(0, &flags); + kprintf_panic("sys_kqueue1: TODO!"); + return 0; +} + +uint64 sys_kevent() { + kprintf_panic("sys_kevent: TODO!"); + return 0; +} + +// Definitions of old syscalls +uint64 sys_fork(); +uint64 sys_exit(); +uint64 sys_wait(); +uint64 sys_pipe(); +uint64 sys_read(); +uint64 sys_kill(); +uint64 sys_execve(); +uint64 sys_fstat(); +uint64 sys_chdir(); +uint64 sys_dup(); +uint64 sys_getpid(); +uint64 sys_sbrk(); +uint64 sys_sleep(); +uint64 sys_uptime(); +uint64 sys_open(); +uint64 sys_write(); +uint64 sys_mknod(); +uint64 sys_unlink(); +uint64 sys_link(); +uint64 sys_mkdir(); +uint64 sys_close(); + +typedef uint64 (*syscall_t)(); + +// The system is currently hard-coded to handle a table of exactly 64 syscalls. +// This number is convenient for masking in a 64-bit system. +syscall_t syscalls[64]; + +/* This is invoked when the syscall doesn't exist. */ +uint64 sys_bad() { + printf("ERROR IN PROCESS %d '%s' BAD SYSCALL %d\n", myproc()->pid, myproc()->name, (int) myproc()->trapframe->a7); + return -1; +} + +void syscall_init() { + int i; + for (i = 0 ; i < 64; i++) { + syscalls[i] = &sys_bad; + } + syscalls[SYS_fork] = &sys_fork; + syscalls[SYS_exit] = &sys_exit; + syscalls[SYS_wait] = &sys_wait; + syscalls[SYS_pipe] = &sys_pipe; + syscalls[SYS_read] = &sys_read; + syscalls[SYS_kill] = &sys_kill; + syscalls[SYS_execve] = &sys_execve; + syscalls[SYS_fstat] = &sys_fstat; + syscalls[SYS_chdir] = &sys_chdir; + syscalls[SYS_dup] = &sys_dup; + syscalls[SYS_getpid] = &sys_getpid; + syscalls[SYS_sbrk] = &sys_sbrk; + syscalls[SYS_sleep] = &sys_sleep; + syscalls[SYS_uptime] = &sys_uptime; + syscalls[SYS_open] = &sys_open; + syscalls[SYS_write] = &sys_write; + syscalls[SYS_mknod] = &sys_mknod; + syscalls[SYS_unlink] = &sys_unlink; + syscalls[SYS_link] = &sys_link; + syscalls[SYS_mkdir] = &sys_mkdir; + syscalls[SYS_close] = &sys_close; + // New system calls: + syscalls[SYS_prio] = &sys_prio; + syscalls[SYS_affin] = &sys_affin; + syscalls[SYS_thrd] = &sys_thrd; + syscalls[SYS_drvinf] = &sys_drvinf; + syscalls[SYS_lsdir] = &sys_lsdir; + syscalls[SYS_kqueue1] = &sys_kqueue1; + syscalls[SYS_kevent] = &sys_kevent; +} + +// New syscall handling. This will just invoke sys_bad() if the syscall doesn't +// exist, this means there's one less error to check for. +void syscall() { + int x = myproc()->trapframe->a7; + + //printf("Doing syscall #%d (0x%x)\n", x & 63, x & 63); + // TODO: This should be checked against a mask to allow and/or redirect syscalls. + syscall_t func = syscalls[x & 63]; + //printf("Syscall func is at %p\n", func); + myproc()->trapframe->a0 = func(); +} diff --git a/sysproc.c b/sysproc.c new file mode 100644 index 0000000..1fd9c40 --- /dev/null +++ b/sysproc.c @@ -0,0 +1,94 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "sched.h" +#include "proc.h" + +uint64 +sys_exit(void) +{ + int n; + argint(0, &n); + exit(n); + return 0; // not reached +} + +uint64 +sys_getpid(void) +{ + return myproc()->pid; +} + +uint64 +sys_fork(void) +{ + return fork(); +} + +uint64 +sys_wait(void) +{ + uint64 p; + argaddr(0, &p); + return wait(p); +} + +uint64 +sys_sbrk(void) +{ + uint64 addr; + int n; + + argint(0, &n); + addr = myproc()->sz; + if(growproc(n) < 0) + return -1; + return addr; +} + +uint64 +sys_sleep(void) +{ + int n; + uint ticks0; + + argint(0, &n); + if(n < 0) + n = 0; + acquire(&tickslock); + ticks0 = ticks; + while(ticks - ticks0 < n){ + if(killed(myproc())){ + release(&tickslock); + return -1; + } + sleep(&ticks, &tickslock); + } + release(&tickslock); + return 0; +} + +uint64 +sys_kill(void) +{ + int pid; + + argint(0, &pid); + return kill(pid); +} + +// return how many clock tick interrupts have occurred +// since start. +uint64 +sys_uptime(void) +{ + uint xticks; + + acquire(&tickslock); + xticks = ticks; + release(&tickslock); + return xticks; +} diff --git a/trampoline.S b/trampoline.S new file mode 100644 index 0000000..0755808 --- /dev/null +++ b/trampoline.S @@ -0,0 +1,164 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) + // + // low-level code to handle traps from user space into + // the kernel, and returns from kernel to user. + // + // the kernel maps the page holding this code + // at the same virtual address (TRAMPOLINE) + // in user and kernel space so that it continues + // to work when it switches page tables. + // kernel.ld causes this code to start at + // a page boundary. + // + +// NOTES COPIED FROM OLD LAYOUT +// uservec in trampoline.S saves user registers in the trapframe, +// then initializes registers from the trapframe's +// kernel_sp, kernel_hartid, kernel_satp, and jumps to kernel_trap. +// usertrapret() and userret in trampoline.S set up +// the trapframe's kernel_* [NOW kmode_], restore user registers from the +// trapframe, switch to the user page table, and enter user space. +// the trapframe includes callee-saved user registers like s0-s11 because the +// return-to-user path via usertrapret() doesn't return through +// the entire kernel call stack. + +#include "riscv.h" +#include "memlayout.h" +.option norvc + +.section trampsec +.globl trampoline +.globl usertrap +trampoline: +.align 4 +.globl uservec +uservec: + // + // trap.c sets stvec to point here, so + // traps from user space start here, + // in supervisor mode, but with a + // user page table. + // + + // save user a0 in sscratch so + // a0 can be used to get at TRAPFRAME. + csrw sscratch, a0 + + // each process has a separate p->trapframe memory area, + // but it's mapped to the same virtual address + // (TRAPFRAME) in every process's user page table. + li a0, TRAPFRAME + + // save the user registers in TRAPFRAME + sd ra, 40(a0) + sd sp, 48(a0) + sd gp, 56(a0) + sd tp, 64(a0) + sd t0, 72(a0) + sd t1, 80(a0) + sd t2, 88(a0) + sd s0, 96(a0) + sd s1, 104(a0) + sd a1, 120(a0) + sd a2, 128(a0) + sd a3, 136(a0) + sd a4, 144(a0) + sd a5, 152(a0) + sd a6, 160(a0) + sd a7, 168(a0) + sd s2, 176(a0) + sd s3, 184(a0) + sd s4, 192(a0) + sd s5, 200(a0) + sd s6, 208(a0) + sd s7, 216(a0) + sd s8, 224(a0) + sd s9, 232(a0) + sd s10, 240(a0) + sd s11, 248(a0) + sd t3, 256(a0) + sd t4, 264(a0) + sd t5, 272(a0) + sd t6, 280(a0) + + // save the user a0 in p->trapframe->a0 + csrr t0, sscratch + sd t0, 112(a0) + + // initialize kernel stack pointer, from p->trapframe->kmode_sp + ld sp, 8(a0) + + // make tp hold the current hartid, from p->trapframe->kmode_hartid + ld tp, 32(a0) + + // load the address of usertrap(), from p->trapframe->kmode_trap + ld t0, 16(a0) + + // fetch the kernel page table address, from p->trapframe->kmode_satp. + ld t1, 0(a0) + + // wait for any previous memory operations to complete, so that + // they use the user page table. + sfence.vma zero, zero + + // install the kernel page table. + csrw satp, t1 + + // flush now-stale user entries from the TLB. + sfence.vma zero, zero + + // jump to usertrap(), which does not return + jr t0 + +.globl userret +userret: + // userret(pagetable) + // called by usertrapret() in trap.c to + // switch from kernel to user. + // a0: user page table, for satp. + + // switch to the user page table. + sfence.vma zero, zero + csrw satp, a0 + sfence.vma zero, zero + + li a0, TRAPFRAME + + // restore all but a0 from TRAPFRAME + ld ra, 40(a0) + ld sp, 48(a0) + ld gp, 56(a0) + ld tp, 64(a0) + ld t0, 72(a0) + ld t1, 80(a0) + ld t2, 88(a0) + ld s0, 96(a0) + ld s1, 104(a0) + ld a1, 120(a0) + ld a2, 128(a0) + ld a3, 136(a0) + ld a4, 144(a0) + ld a5, 152(a0) + ld a6, 160(a0) + ld a7, 168(a0) + ld s2, 176(a0) + ld s3, 184(a0) + ld s4, 192(a0) + ld s5, 200(a0) + ld s6, 208(a0) + ld s7, 216(a0) + ld s8, 224(a0) + ld s9, 232(a0) + ld s10, 240(a0) + ld s11, 248(a0) + ld t3, 256(a0) + ld t4, 264(a0) + ld t5, 272(a0) + ld t6, 280(a0) + + // restore user a0 + ld a0, 112(a0) + + // return to user mode and user pc. + // usertrapret() set up sstatus and sepc. + sret diff --git a/trap.c b/trap.c new file mode 100644 index 0000000..f6c13f1 --- /dev/null +++ b/trap.c @@ -0,0 +1,237 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "sched.h" +#include "proc.h" +#include "defs.h" +#include "fpu.h" +#include "sched.h" +#include "kprintf.h" + +sched_spinlock_t tickslock; +uint ticks; + +extern char trampoline[], uservec[], userret[]; + +// in kernelvec.S, calls kerneltrap(). +void kernelvec(); + +extern int devintr(); + +void +trapinit(void) +{ + initlock(&tickslock, "time"); +} + +// set up to take exceptions and traps while in the kernel. +void +trapinithart(void) +{ + w_stvec((uint64)(&kernelvec)); +} + +int fpu_instrsizeattrap(struct proc* p); +void fpu_instrtrap(struct proc* p); // Declared here to avoid the header needing proc + +// +// handle an interrupt, exception, or system call from user space. +// called from trampoline.S +// +void +usertrap(void) +{ + int which_dev = 0; + + if((r_sstatus() & SSTATUS_SPP) != 0) + panic("usertrap: not from user mode"); + + // send interrupts and exceptions to kerneltrap(), + // since we're now in the kernel. + w_stvec((uint64)(&kernelvec)); + + struct proc *p = myproc(); + + // save user program counter. + p->trapframe->epc = r_sepc(); + + if(r_scause() == 8){ + // system call + + if(killed(p)) + exit(-1); + + // sepc points to the ecall instruction, + // but we want to return to the next instruction. + p->trapframe->epc += fpu_instrsizeattrap(p); + + // an interrupt will change sepc, scause, and sstatus, + // so enable only now that we're done with those registers. + intr_on(); + + syscall(); + } else if (r_scause() == 2) { + fpu_instrtrap(p); + } else if((which_dev = devintr()) != 0){ + // ok + } else { + printf("usertrap(): unexpected scause 0x%lx pid=%d\n", r_scause(), p->pid); + printf(" sepc=0x%lx stval=0x%lx\n", r_sepc(), r_stval()); + setkilled(p); + } + + if(killed(p)) + exit(-1); + + // give up the CPU if this is a timer interrupt. + if(which_dev == 2) + yield(); + + usertrapret(); +} + +// +// return to user space +// +void +usertrapret(void) +{ + struct proc *p = myproc(); + + // we're about to switch the destination of traps from + // kerneltrap() to usertrap(), so turn off interrupts until + // we're back in user space, where usertrap() is correct. + intr_off(); + + // send syscalls, interrupts, and exceptions to uservec in trampoline.S + uint64 trampoline_uservec = TRAMPOLINE + (((uint64)uservec) - ((uint64)trampoline)); + w_stvec(trampoline_uservec); + + // set up trapframe values that uservec will need when + // the process next traps into the kernel. + p->trapframe->kmode_satp = r_satp(); // kernel page table + p->trapframe->kmode_sp = p->kstack + PGSIZE; // process's kernel stack + p->trapframe->kmode_trap = (uint64)(&usertrap); + p->trapframe->kmode_hartid = sched_cputhreadpointer_get(); // hartid for SCHED_CORE_THISNUMBER_NOINTERRUPTS() + + // set up the registers that trampoline.S's sret will use + // to get to user space. + + // set S Previous Privilege mode to User. + unsigned long x = r_sstatus(); + x &= ~SSTATUS_SPP; // clear SPP to 0 for user mode + x |= SSTATUS_SPIE; // enable interrupts in user mode + w_sstatus(x); + + // set S Exception Program Counter to the saved user pc. + w_sepc(p->trapframe->epc); + + // tell trampoline.S the user page table to switch to. + uint64 satp = MAKE_SATP(p->pagetable); + + // jump to userret in trampoline.S at the top of memory, which + // switches to the user page table, restores user registers, + // and switches to user mode with sret. + uint64 trampoline_userret = TRAMPOLINE + (((uint64)userret) - ((uint64)trampoline)); + void (*tret)(uint64); + tret = (void*)trampoline_userret; + //printf("Going in at %p...\n", tret); + //vmrd_settracing(1); + tret(satp); + //printf("Returned!?!?\n"); + //((void (*)(uint64))trampoline_userret)(satp); +} + +// interrupts and exceptions from kernel code go here via kernelvec, +// on whatever the current kernel stack is. +void +kerneltrap() +{ + int which_dev = 0; + uint64 sepc = r_sepc(); + uint64 fpumask = (3ULL << 13); + uint64 sstatus = r_sstatus(); + uint64 scause = r_scause(); + + if((sstatus & SSTATUS_SPP) == 0) + panic("kerneltrap: not from supervisor mode"); + if(intr_get() != 0) + panic("kerneltrap: interrupts enabled"); + + if((which_dev = devintr()) == 0){ + // interrupt or trap from an unknown source + printf("scause=0x%lx sepc=0x%lx stval=0x%lx\n", scause, r_sepc(), r_stval()); + panic("kerneltrap"); + } + + // give up the CPU if this is a timer interrupt. + if(which_dev == 2 && myproc() != 0/* && SCHED_CORE_THIS_NOINTERRUPTS()->preempted */) { + SCHED_CORE_THIS_NOINTERRUPTS()->preempted = 0; // This is an approximate variable + yield(); + } + + // the yield() may have caused some traps to occur, + // so restore trap registers for use by kernelvec.S's sepc instruction. + // BUT doesn't change the floating point state returned from the scheduler (TODO: Try to get all that context code in one place) + w_sepc(sepc); + w_sstatus((sstatus & ~fpumask) | (r_sstatus() & fpumask)); +} + +void +clockintr() +{ + if(SCHED_CORE_THISNUMBER_NOINTERRUPTS() == 0){ + acquire(&tickslock); + ticks++; + sched_wake(&ticks); + release(&tickslock); + } + + // ask for the next timer interrupt. this also clears + // the interrupt request. 1000000 is about a tenth + // of a second. + //w_stimecmp(r_time() + 1000000); + w_stimecmp(r_time() + 1000000); +} + +// check if it's an external interrupt or software interrupt, +// and handle it. +// returns 2 if timer interrupt, +// 1 if other device, +// 0 if not recognized. +int +devintr() +{ + uint64 scause = r_scause(); + if(scause == 0x8000000000000009UL){ + // this is a supervisor external interrupt, via PLIC. + + // irq indicates which device interrupted. + int irq = plic_claim(); + + if(irq == UART0_IRQ){ + uartintr(); + } else if(irq == VIRTIO0_IRQ){ + virtio_disk_intr(); + } else if(irq){ + printf("unexpected interrupt irq=%d\n", irq); + } + + // the PLIC allows each device to raise at most one + // interrupt at a time; tell the PLIC the device is + // now allowed to interrupt again. + if(irq) + plic_complete(irq); + + return 1; + } else if(scause == 0x8000000000000005UL){ + // timer interrupt. + clockintr(); + return 2; + } else { + return 0; + } +} + diff --git a/types.h b/types.h new file mode 100644 index 0000000..cd712c0 --- /dev/null +++ b/types.h @@ -0,0 +1,11 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +typedef unsigned int uint; +typedef unsigned short ushort; +typedef unsigned char uchar; + +typedef unsigned char uint8; +typedef unsigned short uint16; +typedef unsigned int uint32; +typedef unsigned long uint64; + +typedef uint64 pde_t; diff --git a/uart.c b/uart.c new file mode 100644 index 0000000..fc5b351 --- /dev/null +++ b/uart.c @@ -0,0 +1,192 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +// +// low-level driver routines for 16550a UART. +// + +#include "types.h" +#include "param.h" +#include "memlayout.h" +#include "riscv.h" +#include "sched.h" +#include "proc.h" +#include "defs.h" +#include "sched.h" +#include "kprintf.h" + +// the UART control registers are memory-mapped +// at address UART0. this macro returns the +// address of one of the registers. +#define Reg(reg) ((volatile unsigned char *)(UART0 + (reg))) + +// the UART control registers. +// some have different meanings for +// read vs write. +// see http://byterunner.com/16550.html +#define RHR 0 // receive holding register (for input bytes) +#define THR 0 // transmit holding register (for output bytes) +#define IER 1 // interrupt enable register +#define IER_RX_ENABLE (1<<0) +#define IER_TX_ENABLE (1<<1) +#define FCR 2 // FIFO control register +#define FCR_FIFO_ENABLE (1<<0) +#define FCR_FIFO_CLEAR (3<<1) // clear the content of the two FIFOs +#define ISR 2 // interrupt status register +#define LCR 3 // line control register +#define LCR_EIGHT_BITS (3<<0) +#define LCR_BAUD_LATCH (1<<7) // special mode to set baud rate +#define LSR 5 // line status register +#define LSR_RX_READY (1<<0) // input is waiting to be read from RHR +#define LSR_TX_IDLE (1<<5) // THR can accept another character to send + +#define ReadReg(reg) (*(Reg(reg))) +#define WriteReg(reg, v) (*(Reg(reg)) = (v)) + +// the transmit output buffer. +sched_spinlock_t uart_tx_lock; +#define UART_TX_BUF_SIZE 32 +char uart_tx_buf[UART_TX_BUF_SIZE]; +uint64 uart_tx_w; // write next to uart_tx_buf[uart_tx_w % UART_TX_BUF_SIZE] +uint64 uart_tx_r; // read next from uart_tx_buf[uart_tx_r % UART_TX_BUF_SIZE] + +void uartstart(); + +void +uartinit(void) +{ + // disable interrupts. + WriteReg(IER, 0x00); + + // special mode to set baud rate. + WriteReg(LCR, LCR_BAUD_LATCH); + + // LSB for baud rate of 38.4K. + WriteReg(0, 0x03); + + // MSB for baud rate of 38.4K. + WriteReg(1, 0x00); + + // leave set-baud mode, + // and set word length to 8 bits, no parity. + WriteReg(LCR, LCR_EIGHT_BITS); + + // reset and enable FIFOs. + WriteReg(FCR, FCR_FIFO_ENABLE | FCR_FIFO_CLEAR); + + // enable transmit and receive interrupts. + WriteReg(IER, IER_TX_ENABLE | IER_RX_ENABLE); + + initlock(&uart_tx_lock, "uart"); +} + +// add a character to the output buffer and tell the +// UART to start sending if it isn't already. +// blocks if the output buffer is full. +// because it may block, it can't be called +// from interrupts; it's only suitable for use +// by write(). +void +uartputc(int c) +{ + acquire(&uart_tx_lock); + + if(kprintf_inpanic){ + for(;;) + ; + } + while(uart_tx_w == uart_tx_r + UART_TX_BUF_SIZE){ + // buffer is full. + // wait for uartstart() to open up space in the buffer. + sleep(&uart_tx_r, &uart_tx_lock); + } + uart_tx_buf[uart_tx_w % UART_TX_BUF_SIZE] = c; + uart_tx_w += 1; + uartstart(); + release(&uart_tx_lock); +} + + +// alternate version of uartputc() that doesn't +// use interrupts, for use by kernel printf() and +// to echo characters. it spins waiting for the uart's +// output register to be empty. +void +uartputc_sync(int c) +{ + push_off(); + + if(kprintf_inpanic){ + for(;;) + ; + } + + // wait for Transmit Holding Empty to be set in LSR. + while((ReadReg(LSR) & LSR_TX_IDLE) == 0) + ; + WriteReg(THR, c); + + pop_off(); +} + +// if the UART is idle, and a character is waiting +// in the transmit buffer, send it. +// caller must hold uart_tx_lock. +// called from both the top- and bottom-half. +void +uartstart() +{ + while(1){ + if(uart_tx_w == uart_tx_r){ + // transmit buffer is empty. + ReadReg(ISR); + return; + } + + if((ReadReg(LSR) & LSR_TX_IDLE) == 0){ + // the UART transmit holding register is full, + // so we cannot give it another byte. + // it will interrupt when it's ready for a new byte. + return; + } + + int c = uart_tx_buf[uart_tx_r % UART_TX_BUF_SIZE]; + uart_tx_r += 1; + + // maybe uartputc() is waiting for space in the buffer. + sched_wake(&uart_tx_r); + + WriteReg(THR, c); + } +} + +// read one input character from the UART. +// return -1 if none is waiting. +int +uartgetc(void) +{ + if(ReadReg(LSR) & 0x01){ + // input data is ready. + return ReadReg(RHR); + } else { + return -1; + } +} + +// handle a uart interrupt, raised because input has +// arrived, or the uart is ready for more output, or +// both. called from devintr(). +void +uartintr(void) +{ + // read and process incoming characters. + while(1){ + int c = uartgetc(); + if(c == -1) + break; + consoleintr(c); + } + + // send buffered characters. + acquire(&uart_tx_lock); + uartstart(); + release(&uart_tx_lock); +} diff --git a/virtio.h b/virtio.h new file mode 100644 index 0000000..0a09cf6 --- /dev/null +++ b/virtio.h @@ -0,0 +1,97 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +// +// virtio device definitions. +// for both the mmio interface, and virtio descriptors. +// only tested with qemu. +// +// the virtio spec: +// https://docs.oasis-open.org/virtio/virtio/v1.1/virtio-v1.1.pdf +// + +// virtio mmio control registers, mapped starting at 0x10001000. +// from qemu virtio_mmio.h +#define VIRTIO_MMIO_MAGIC_VALUE 0x000 // 0x74726976 +#define VIRTIO_MMIO_VERSION 0x004 // version; should be 2 +#define VIRTIO_MMIO_DEVICE_ID 0x008 // device type; 1 is net, 2 is disk +#define VIRTIO_MMIO_VENDOR_ID 0x00c // 0x554d4551 +#define VIRTIO_MMIO_DEVICE_FEATURES 0x010 +#define VIRTIO_MMIO_DRIVER_FEATURES 0x020 +#define VIRTIO_MMIO_QUEUE_SEL 0x030 // select queue, write-only +#define VIRTIO_MMIO_QUEUE_NUM_MAX 0x034 // max size of current queue, read-only +#define VIRTIO_MMIO_QUEUE_NUM 0x038 // size of current queue, write-only +#define VIRTIO_MMIO_QUEUE_READY 0x044 // ready bit +#define VIRTIO_MMIO_QUEUE_NOTIFY 0x050 // write-only +#define VIRTIO_MMIO_INTERRUPT_STATUS 0x060 // read-only +#define VIRTIO_MMIO_INTERRUPT_ACK 0x064 // write-only +#define VIRTIO_MMIO_STATUS 0x070 // read/write +#define VIRTIO_MMIO_QUEUE_DESC_LOW 0x080 // physical address for descriptor table, write-only +#define VIRTIO_MMIO_QUEUE_DESC_HIGH 0x084 +#define VIRTIO_MMIO_DRIVER_DESC_LOW 0x090 // physical address for available ring, write-only +#define VIRTIO_MMIO_DRIVER_DESC_HIGH 0x094 +#define VIRTIO_MMIO_DEVICE_DESC_LOW 0x0a0 // physical address for used ring, write-only +#define VIRTIO_MMIO_DEVICE_DESC_HIGH 0x0a4 + +// status register bits, from qemu virtio_config.h +#define VIRTIO_CONFIG_S_ACKNOWLEDGE 1 +#define VIRTIO_CONFIG_S_DRIVER 2 +#define VIRTIO_CONFIG_S_DRIVER_OK 4 +#define VIRTIO_CONFIG_S_FEATURES_OK 8 + +// device feature bits +#define VIRTIO_BLK_F_RO 5 /* Disk is read-only */ +#define VIRTIO_BLK_F_SCSI 7 /* Supports scsi command passthru */ +#define VIRTIO_BLK_F_CONFIG_WCE 11 /* Writeback mode available in config */ +#define VIRTIO_BLK_F_MQ 12 /* support more than one vq */ +#define VIRTIO_F_ANY_LAYOUT 27 +#define VIRTIO_RING_F_INDIRECT_DESC 28 +#define VIRTIO_RING_F_EVENT_IDX 29 + +// this many virtio descriptors. +// must be a power of two. +#define NUM 8 + +// a single descriptor, from the spec. +struct virtq_desc { + uint64 addr; + uint32 len; + uint16 flags; + uint16 next; +}; +#define VRING_DESC_F_NEXT 1 // chained with another descriptor +#define VRING_DESC_F_WRITE 2 // device writes (vs read) + +// the (entire) avail ring, from the spec. +struct virtq_avail { + uint16 flags; // always zero + uint16 idx; // driver will write ring[idx] next + uint16 ring[NUM]; // descriptor numbers of chain heads + uint16 unused; +}; + +// one entry in the "used" ring, with which the +// device tells the driver about completed requests. +struct virtq_used_elem { + uint32 _id; // index of start of completed descriptor chain + uint32 len; +}; + +struct virtq_used { + uint16 flags; // always zero + uint16 idx; // device increments when it adds a ring[] entry + struct virtq_used_elem ring[NUM]; +}; + +// these are specific to virtio block devices, e.g. disks, +// described in Section 5.2 of the spec. + +#define VIRTIO_BLK_T_IN 0 // read the disk +#define VIRTIO_BLK_T_OUT 1 // write the disk + +// the format of the first descriptor in a disk request. +// to be followed by two more descriptors containing +// the block, and a one-byte status. +struct virtio_blk_req { + uint32 type; // VIRTIO_BLK_T_IN or ..._OUT + uint32 reserved; + uint64 sector; +}; diff --git a/virtio_disk.c b/virtio_disk.c new file mode 100644 index 0000000..e1953bb --- /dev/null +++ b/virtio_disk.c @@ -0,0 +1,331 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +// +// driver for qemu's virtio disk device. +// uses qemu's mmio interface to virtio. +// +// qemu ... -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 +// + +#include "types.h" +#include "riscv.h" +#include "defs.h" +#include "param.h" +#include "memlayout.h" +#include "sched.h" +#include "fs.h" +#include "diskio.h" +#include "virtio.h" +#include "kprintf.h" + +// the address of virtio mmio register r. +#define R(r) ((volatile uint32 *)(VIRTIO0 + (r))) + +//static struct disk { + // a set (not a ring) of DMA descriptors, with which the + // driver tells the device where to read and write individual + // disk operations. there are NUM descriptors. + // most commands consist of a "chain" (a linked list) of a couple of + // these descriptors. + struct virtq_desc *disk_desc; + + // a ring in which the driver writes descriptor numbers + // that the driver would like the device to process. it only + // includes the head descriptor of each chain. the ring has + // NUM elements. + struct virtq_avail *disk_avail; + + // a ring in which the device writes descriptor numbers that + // the device has finished processing (just the head of each chain). + // there are NUM used ring entries. + struct virtq_used *disk_used; + + // our own book-keeping. + char disk_free[NUM]; // is a descriptor free? + uint16 disk_used_idx; // we've looked this far in used[2..NUM]. + + // track info about in-flight operations, + // for use when completion interrupt arrives. + // indexed by first descriptor index of chain. + struct { + diskio_buffer_t *b; + char status; + } disk_info[NUM]; + + // disk command headers. + // one-for-one with descriptors, for convenience. + struct virtio_blk_req disk_ops[NUM]; + + sched_spinlock_t disk_vdisk_lock; + +//} disk; + +void +virtio_disk_init(void) +{ + uint32 status = 0; + + initlock(&disk_vdisk_lock, "virtio_disk"); + + if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 || + *R(VIRTIO_MMIO_VERSION) != 2 || + *R(VIRTIO_MMIO_DEVICE_ID) != 2 || + *R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){ + panic("could not find virtio disk"); + } + + // reset device + *R(VIRTIO_MMIO_STATUS) = status; + + // set ACKNOWLEDGE status bit + status |= VIRTIO_CONFIG_S_ACKNOWLEDGE; + *R(VIRTIO_MMIO_STATUS) = status; + + // set DRIVER status bit + status |= VIRTIO_CONFIG_S_DRIVER; + *R(VIRTIO_MMIO_STATUS) = status; + + // negotiate features + uint64 features = *R(VIRTIO_MMIO_DEVICE_FEATURES); + features &= ~(1 << VIRTIO_BLK_F_RO); + features &= ~(1 << VIRTIO_BLK_F_SCSI); + features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE); + features &= ~(1 << VIRTIO_BLK_F_MQ); + features &= ~(1 << VIRTIO_F_ANY_LAYOUT); + features &= ~(1 << VIRTIO_RING_F_EVENT_IDX); + features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC); + *R(VIRTIO_MMIO_DRIVER_FEATURES) = features; + + // tell device that feature negotiation is complete. + status |= VIRTIO_CONFIG_S_FEATURES_OK; + *R(VIRTIO_MMIO_STATUS) = status; + + // re-read status to ensure FEATURES_OK is set. + status = *R(VIRTIO_MMIO_STATUS); + if(!(status & VIRTIO_CONFIG_S_FEATURES_OK)) + panic("virtio disk FEATURES_OK unset"); + + // initialize queue 0. + *R(VIRTIO_MMIO_QUEUE_SEL) = 0; + + // ensure queue 0 is not in use. + if(*R(VIRTIO_MMIO_QUEUE_READY)) + panic("virtio disk should not be ready"); + + // check maximum queue size. + uint32 max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX); + if(max == 0) + panic("virtio disk has no queue 0"); + if(max < NUM) + panic("virtio disk max queue too short"); + + // allocate and zero queue memory. + disk_desc = kalloc(); + disk_avail = kalloc(); + disk_used = kalloc(); + if(!disk_desc || !disk_avail || !disk_used) + panic("virtio disk kalloc"); + memset(disk_desc, 0, PGSIZE); + memset(disk_avail, 0, PGSIZE); + memset(disk_used, 0, PGSIZE); + + // set queue size. + *R(VIRTIO_MMIO_QUEUE_NUM) = NUM; + + // write physical addresses. + *R(VIRTIO_MMIO_QUEUE_DESC_LOW) = (uint64)(disk_desc); + *R(VIRTIO_MMIO_QUEUE_DESC_HIGH) = (uint64)(disk_desc) >> 32; + *R(VIRTIO_MMIO_DRIVER_DESC_LOW) = (uint64)(disk_avail); + *R(VIRTIO_MMIO_DRIVER_DESC_HIGH) = (uint64)(disk_avail) >> 32; + *R(VIRTIO_MMIO_DEVICE_DESC_LOW) = (uint64)(disk_used); + *R(VIRTIO_MMIO_DEVICE_DESC_HIGH) = (uint64)(disk_used) >> 32; + + // queue is ready. + *R(VIRTIO_MMIO_QUEUE_READY) = 0x1; + + // all NUM descriptors start out unused. + for(int i = 0; i < NUM; i++) + disk_free[i] = 1; + + // tell device we're completely ready. + status |= VIRTIO_CONFIG_S_DRIVER_OK; + *R(VIRTIO_MMIO_STATUS) = status; + + // plic.c and trap.c arrange for interrupts from VIRTIO0_IRQ. +} + +// find a free descriptor, mark it non-free, return its index. +static int +alloc_desc() +{ + for(int i = 0; i < NUM; i++){ + if(disk_free[i]){ + disk_free[i] = 0; + return i; + } + } + return -1; +} + +// mark a descriptor as free. +static void +free_desc(int i) +{ + if(i >= NUM) + panic("free_desc 1"); + if(disk_free[i]) + panic("free_desc 2"); + disk_desc[i].addr = 0; + disk_desc[i].len = 0; + disk_desc[i].flags = 0; + disk_desc[i].next = 0; + disk_free[i] = 1; + sched_wake(&disk_free[0]); +} + +// free a chain of descriptors. +static void +free_chain(int i) +{ + while(1){ + int flag = disk_desc[i].flags; + int nxt = disk_desc[i].next; + free_desc(i); + if(flag & VRING_DESC_F_NEXT) + i = nxt; + else + break; + } +} + +// allocate three descriptors (they need not be contiguous). +// disk transfers always use three descriptors. +static int +alloc3_desc(int *idx) +{ + for(int i = 0; i < 3; i++){ + idx[i] = alloc_desc(); + if(idx[i] < 0){ + for(int j = 0; j < i; j++) + free_desc(idx[j]); + return -1; + } + } + return 0; +} + +void +virtio_disk_rw(diskio_buffer_t *b, int write) +{ + uint64 sector = b->blocknumber * (BSIZE / 512); + + acquire(&disk_vdisk_lock); + + // the spec's Section 5.2 says that legacy block operations use + // three descriptors: one for type/reserved/sector, one for the + // data, one for a 1-byte status result. + + // allocate the three descriptors. + int idx[3]; + while(1){ + if(alloc3_desc(idx) == 0) { + break; + } + sleep(&disk_free[0], &disk_vdisk_lock); + } + + // format the three descriptors. + // qemu's virtio-blk.c reads them. + + struct virtio_blk_req *buf0 = &disk_ops[idx[0]]; + + if(write) + buf0->type = VIRTIO_BLK_T_OUT; // write the disk + else + buf0->type = VIRTIO_BLK_T_IN; // read the disk + buf0->reserved = 0; + buf0->sector = sector; + + disk_desc[idx[0]].addr = (uint64) buf0; + disk_desc[idx[0]].len = sizeof(struct virtio_blk_req); + disk_desc[idx[0]].flags = VRING_DESC_F_NEXT; + disk_desc[idx[0]].next = idx[1]; + + disk_desc[idx[1]].addr = (uint64) (b->data); + disk_desc[idx[1]].len = BSIZE; + if(write) + disk_desc[idx[1]].flags = 0; // device reads b->data + else + disk_desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data + disk_desc[idx[1]].flags |= VRING_DESC_F_NEXT; + disk_desc[idx[1]].next = idx[2]; + + disk_info[idx[0]].status = 0xff; // device writes 0 on success + disk_desc[idx[2]].addr = (uint64) &disk_info[idx[0]].status; + disk_desc[idx[2]].len = 1; + disk_desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status + disk_desc[idx[2]].next = 0; + + // record diskio_buffer_t for virtio_disk_intr(). + b->isdisk = 1; + disk_info[idx[0]].b = b; + + // tell the device the first index in our chain of descriptors. + disk_avail->ring[disk_avail->idx % NUM] = idx[0]; + + __sync_synchronize(); + + // tell the device another avail ring entry is available. + disk_avail->idx += 1; // not % NUM ... + + __sync_synchronize(); + + *R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number + + // Wait for virtio_disk_intr() to say request has finished. + while(b->isdisk == 1) { + sleep(b, &disk_vdisk_lock); + } + + disk_info[idx[0]].b = 0; + free_chain(idx[0]); + + //printf("%s %d\n", write ? "written" : "read", *((int*)(b->data))); + + release(&disk_vdisk_lock); +} + +void +virtio_disk_intr() +{ + //printf("Disk interrupt\n"); + acquire(&disk_vdisk_lock); + + // the device won't raise another interrupt until we tell it + // we've seen this interrupt, which the following line does. + // this may race with the device writing new entries to + // the "used" ring, in which case we may process the new + // completion entries in this interrupt, and have nothing to do + // in the next interrupt, which is harmless. + *R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3; + + __sync_synchronize(); + + // the device increments disk_used->idx when it + // adds an entry to the used ring. + + while(disk_used_idx != disk_used->idx){ + __sync_synchronize(); + int _id = disk_used->ring[disk_used_idx % NUM]._id; + + if(disk_info[_id].status != 0) + panic("virtio_disk_intr status"); + + diskio_buffer_t *b = disk_info[_id].b; + b->isdisk = 0; // disk is done with buf + sched_wake(b); + + disk_used_idx += 1; + } + + release(&disk_vdisk_lock); +} diff --git a/vm.c b/vm.c new file mode 100644 index 0000000..5193412 --- /dev/null +++ b/vm.c @@ -0,0 +1,483 @@ +// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) +#include "param.h" +#include "types.h" +#include "memlayout.h" +//#include "elf.h" +#include "riscv.h" +#include "defs.h" +#include "fs.h" +#include "kprintf.h" + +/* + * the kernel's page table. + */ +pagetable_t kernel_pagetable; + +extern char etext[]; // kernel.ld sets this to end of kernel code. + +extern char trampoline[]; // trampoline.S + +// Make a direct-map page table for the kernel. +pagetable_t +kvmmake(void) +{ + pagetable_t kpgtbl; + + kpgtbl = (pagetable_t) kalloc(); + memset(kpgtbl, 0, PGSIZE); + + // uart registers + kvmmap(kpgtbl, UART0, UART0, PGSIZE, PTE_R | PTE_W); + + // virtio mmio disk interface + kvmmap(kpgtbl, VIRTIO0, VIRTIO0, PGSIZE, PTE_R | PTE_W); + + // PLIC + kvmmap(kpgtbl, PLIC, PLIC, 0x4000000, PTE_R | PTE_W); + + // map kernel text executable and read-only. + kvmmap(kpgtbl, KERNBASE, KERNBASE, (uint64)etext-KERNBASE, PTE_R | PTE_X); + + // map kernel data and the physical RAM we'll make use of. + kvmmap(kpgtbl, (uint64)etext, (uint64)etext, PHYSTOP-(uint64)etext, PTE_R | PTE_W); + + // map the trampoline for trap entry/exit to + // the highest virtual address in the kernel. + kvmmap(kpgtbl, TRAMPOLINE, (uint64)trampoline, PGSIZE, PTE_R | PTE_X); + + // allocate and map a kernel stack for each process. + proc_mapstacks(kpgtbl); + + return kpgtbl; +} + +// Initialize the one kernel_pagetable +void +kvminit(void) +{ + kernel_pagetable = kvmmake(); +} + +// Switch h/w page table register to the kernel's page table, +// and enable paging. +void +kvminithart() +{ + // wait for any previous writes to the page table memory to finish. + sfence_vma(); + + w_satp(MAKE_SATP(kernel_pagetable)); + + // flush stale entries from the TLB. + sfence_vma(); +} + +// Return the address of the PTE in page table pagetable +// that corresponds to virtual address va. If alloc!=0, +// create any required page-table pages. +// +// The risc-v Sv39 scheme has three levels of page-table +// pages. A page-table page contains 512 64-bit PTEs. +// A 64-bit virtual address is split into five fields: +// 39..63 -- must be zero. +// 30..38 -- 9 bits of level-2 index. +// 21..29 -- 9 bits of level-1 index. +// 12..20 -- 9 bits of level-0 index. +// 0..11 -- 12 bits of byte offset within the page. +pte_t * +walk(pagetable_t pagetable, uint64 va, int alloc) +{ + if(va >= MAXVA) + panic("walk"); + + for(int level = 2; level > 0; level--) { + pte_t *pte = &pagetable[PX(level, va)]; + if(*pte & PTE_V) { + pagetable = (pagetable_t)PTE2PA(*pte); + } else { + if(!alloc || (pagetable = (pde_t*)kalloc()) == 0) + return 0; + memset(pagetable, 0, PGSIZE); + *pte = PA2PTE(pagetable) | PTE_V; + } + } + return &pagetable[PX(0, va)]; +} + +// Look up a virtual address, return the physical address, +// or 0 if not mapped. +// Can only be used to look up user pages. +uint64 +walkaddr(pagetable_t pagetable, uint64 va) +{ + pte_t *pte; + uint64 pa; + + if(va >= MAXVA) + return 0; + + pte = walk(pagetable, va, 0); + if(pte == 0) + return 0; + if((*pte & PTE_V) == 0) + return 0; + if((*pte & PTE_U) == 0) + return 0; + pa = PTE2PA(*pte); + return pa; +} + +// add a mapping to the kernel page table. +// only used when booting. +// does not flush TLB or enable paging. +void +kvmmap(pagetable_t kpgtbl, uint64 va, uint64 pa, uint64 sz, int perm) +{ + if(mappages(kpgtbl, va, sz, pa, perm) != 0) + panic("kvmmap"); +} + +// Create PTEs for virtual addresses starting at va that refer to +// physical addresses starting at pa. +// va and size MUST be page-aligned. +// Returns 0 on success, -1 if walk() couldn't +// allocate a needed page-table page. +int +mappages(pagetable_t pagetable, uint64 va, uint64 size, uint64 pa, int perm) +{ + uint64 a, last; + pte_t *pte; + + if((va % PGSIZE) != 0) + panic("mappages: va not aligned"); + + if((size % PGSIZE) != 0) + panic("mappages: size not aligned"); + + if(size == 0) + panic("mappages: size"); + + a = va; + last = va + size - PGSIZE; + for(;;){ + if((pte = walk(pagetable, a, 1)) == 0) + return -1; + if(*pte & PTE_V) + panic("mappages: remap"); + *pte = PA2PTE(pa) | perm | PTE_V; + if(a == last) + break; + a += PGSIZE; + pa += PGSIZE; + } + return 0; +} + +// Remove npages of mappings starting from va. va must be +// page-aligned. The mappings must exist. +// Optionally free the physical memory. +void +uvmunmap(pagetable_t pagetable, uint64 va, uint64 npages, int do_free) +{ + uint64 a; + pte_t *pte; + + if((va % PGSIZE) != 0) + panic("uvmunmap: not aligned"); + + for(a = va; a < va + npages*PGSIZE; a += PGSIZE){ + if((pte = walk(pagetable, a, 0)) == 0) + panic("uvmunmap: walk"); + if((*pte & PTE_V) == 0) + panic("uvmunmap: not mapped"); + if(PTE_FLAGS(*pte) == PTE_V) + panic("uvmunmap: not a leaf"); + if(do_free){ + uint64 pa = PTE2PA(*pte); + kfree((void*)pa); + } + *pte = 0; + } +} + +// create an empty user page table. +// returns 0 if out of memory. +pagetable_t +uvmcreate() +{ + pagetable_t pagetable; + pagetable = (pagetable_t) kalloc(); + if(pagetable == 0) + return 0; + memset(pagetable, 0, PGSIZE); + return pagetable; +} + +// Load the user initcode into address 0 of pagetable, +// for the very first process. +// sz must be less than a page. +void +uvmfirst(pagetable_t pagetable, uchar *src, uint sz) +{ + char *mem; + + if(sz >= PGSIZE) + panic("uvmfirst: more than a page"); + mem = kalloc(); + memset(mem, 0, PGSIZE); + mappages(pagetable, 0, PGSIZE, (uint64)mem, PTE_W|PTE_R|PTE_X|PTE_U); + memmove(mem, src, sz); +} + +// Allocate PTEs and physical memory to grow process from oldsz to +// newsz, which need not be page aligned. Returns new size or 0 on error. +uint64 +uvmalloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz, int xperm) +{ + char *mem; + uint64 a; + + if(newsz < oldsz) + return oldsz; + + oldsz = PGROUNDUP(oldsz); + for(a = oldsz; a < newsz; a += PGSIZE){ + mem = kalloc(); + if(mem == 0){ + uvmdealloc(pagetable, a, oldsz); + return 0; + } + memset(mem, 0, PGSIZE); + if(mappages(pagetable, a, PGSIZE, (uint64)mem, PTE_R|PTE_U|xperm) != 0){ + kfree(mem); + uvmdealloc(pagetable, a, oldsz); + return 0; + } + } + return newsz; +} + +// Deallocate user pages to bring the process size from oldsz to +// newsz. oldsz and newsz need not be page-aligned, nor does newsz +// need to be less than oldsz. oldsz can be larger than the actual +// process size. Returns the new process size. +uint64 +uvmdealloc(pagetable_t pagetable, uint64 oldsz, uint64 newsz) +{ + if(newsz >= oldsz) + return oldsz; + + if(PGROUNDUP(newsz) < PGROUNDUP(oldsz)){ + int npages = (PGROUNDUP(oldsz) - PGROUNDUP(newsz)) / PGSIZE; + uvmunmap(pagetable, PGROUNDUP(newsz), npages, 1); + } + + return newsz; +} + +// Recursively free page-table pages. +// All leaf mappings must already have been removed. +void +freewalk(pagetable_t pagetable, int reallyfree) +{ + // there are 2^9 = 512 PTEs in a page table. + for(int i = 0; i < 512; i++){ + pte_t pte = pagetable[i]; + if((pte & PTE_V) && (pte & (PTE_R|PTE_W|PTE_X)) == 0){ + // this PTE points to a lower-level page table. + uint64 child = PTE2PA(pte); + freewalk((pagetable_t)child, reallyfree); + pagetable[i] = 0; + } else if(reallyfree && pte & PTE_V){ + panic("freewalk: leaf"); + } + } + kfree((void*)pagetable); +} + +// Free user memory pages, +// then free page-table pages. +void +uvmfree(pagetable_t pagetable, uint64 sz, int reallyfree) +{ + if(sz > 0) + uvmunmap(pagetable, 0, PGROUNDUP(sz)/PGSIZE, 1); + freewalk(pagetable, reallyfree); +} + +// Given a parent process's page table, copy +// its memory into a child's page table. +// Copies both the page table and the +// physical memory. +// returns 0 on success, -1 on failure. +// frees any allocated pages on failure. +int +uvmcopy(pagetable_t old, pagetable_t new, uint64 sz) +{ + pte_t *pte; + uint64 pa, i; + uint flags; + char *mem; + + for(i = 0; i < sz; i += PGSIZE){ + if((pte = walk(old, i, 0)) == 0) + panic("uvmcopy: pte should exist"); + if((*pte & PTE_V) == 0) + panic("uvmcopy: page not present"); + pa = PTE2PA(*pte); + flags = PTE_FLAGS(*pte); + if((mem = kalloc()) == 0) + goto err; + memmove(mem, (char*)pa, PGSIZE); + if(mappages(new, i, PGSIZE, (uint64)mem, flags) != 0){ + kfree(mem); + goto err; + } + } + return 0; + + err: + uvmunmap(new, 0, i / PGSIZE, 1); // TODO: Reclaim other allocated pages? + return -1; +} + +int +uvmcopyshallow(pagetable_t old, pagetable_t new, uint64 sz) +{ + pte_t *pte; + uint64 pa, i; + uint flags; + //char *mem; + + for(i = 0; i < sz; i += PGSIZE){ + if((pte = walk(old, i, 0)) == 0) + panic("uvmcopy: pte should exist"); + if((*pte & PTE_V) == 0) + panic("uvmcopy: page not present"); + pa = PTE2PA(*pte); + flags = PTE_FLAGS(*pte); + //if((mem = kalloc()) == 0) + // goto err; + //memmove(mem, (char*)pa, PGSIZE); + if(mappages(new, i, PGSIZE, (uint64)pa, flags) != 0){ + //kfree(mem); + goto err; + } + } + return 0; + + err: + uvmunmap(new, 0, i / PGSIZE, 0); + return -1; +} + +// mark a PTE invalid for user access. +// used by exec for the user stack guard page. +void +uvmclear(pagetable_t pagetable, uint64 va) +{ + pte_t *pte; + + pte = walk(pagetable, va, 0); + if(pte == 0) + panic("uvmclear"); + *pte &= ~PTE_U; +} + +// Copy from kernel to user. +// Copy len bytes from src to virtual address dstva in a given page table. +// Return 0 on success, -1 on error. +int +copyout(pagetable_t pagetable, uint64 dstva, char *src, uint64 len) +{ + uint64 n, va0, pa0; + pte_t *pte; + + while(len > 0){ + va0 = PGROUNDDOWN(dstva); + if(va0 >= MAXVA) + return -1; + pte = walk(pagetable, va0, 0); + if(pte == 0 || (*pte & PTE_V) == 0 || (*pte & PTE_U) == 0 || + (*pte & PTE_W) == 0) + return -1; + pa0 = PTE2PA(*pte); + n = PGSIZE - (dstva - va0); + if(n > len) + n = len; + memmove((void *)(pa0 + (dstva - va0)), src, n); + + len -= n; + src += n; + dstva = va0 + PGSIZE; + } + return 0; +} + +// Copy from user to kernel. +// Copy len bytes to dst from virtual address srcva in a given page table. +// Return 0 on success, -1 on error. +int +copyin(pagetable_t pagetable, char *dst, uint64 srcva, uint64 len) +{ + uint64 n, va0, pa0; + + while(len > 0){ + va0 = PGROUNDDOWN(srcva); + pa0 = walkaddr(pagetable, va0); + if(pa0 == 0) + return -1; + n = PGSIZE - (srcva - va0); + if(n > len) + n = len; + memmove(dst, (void *)(pa0 + (srcva - va0)), n); + + len -= n; + dst += n; + srcva = va0 + PGSIZE; + } + return 0; +} + +// Copy a null-terminated string from user to kernel. +// Copy bytes to dst from virtual address srcva in a given page table, +// until a '\0', or max. +// Return 0 on success, -1 on error. +int +copyinstr(pagetable_t pagetable, char *dst, uint64 srcva, uint64 max) +{ + uint64 n, va0, pa0; + int got_null = 0; + + while(got_null == 0 && max > 0){ + va0 = PGROUNDDOWN(srcva); + pa0 = walkaddr(pagetable, va0); + if(pa0 == 0) + return -1; + n = PGSIZE - (srcva - va0); + if(n > max) + n = max; + + char *p = (char *) (pa0 + (srcva - va0)); + while(n > 0){ + if(*p == '\0'){ + *dst = '\0'; + got_null = 1; + break; + } else { + *dst = *p; + } + --n; + --max; + p++; + dst++; + } + + srcva = va0 + PGSIZE; + } + if(got_null){ + return 0; + } else { + return -1; + } +} diff --git a/vmrd.c b/vmrd.c new file mode 100644 index 0000000..9535bb6 --- /dev/null +++ b/vmrd.c @@ -0,0 +1,65 @@ +// This is NEW CODE, a VM ram/disk access driver for use in the VM +// This driver is intended to be very simple to read/write blocks. +#include "types.h" +#include "param.h" +#include "riscv.h" +#include "defs.h" +#include "vmrd.h" + +sched_spinlock_t vmrd_lock; + +// This doesn't really belong here but is used to enable/disable debug +// tracing in the VM. +void vmrd_settracing(int onoroff) { + if (onoroff) { + vmrd_action = 't'; + } else { + vmrd_action = 'e'; + } + return; +} + +// Returns a major version number >= 1 if the virtual ram/disk device is +// present and 0 otherwise. +int vmrd_present() { + if (vmrd_magic == VMRD_MAGIC) { + return 1; // Only 1 version exists for now + } else { + return 0; + } +} + + +// Initialises vmrd, returning a non-zero value on failure (e.g. if not present) +int vmrd_init() { + if (!vmrd_present()) { + return -1; + } + + initlock(&vmrd_lock, "VMRD"); + + return 0; +} + +// Performs a read/write of a block. +int vmrd_rw(diskio_buffer_t* buffer, int writing) { + acquire(&vmrd_lock); + + vmrd_blksize = (unsigned int) buffer->owner->blocksize; + vmrd_memaddr = (unsigned long) buffer->data; + vmrd_blkaddr = buffer->blocknumber; + vmrd_action = writing ? 'w' : 'r'; + + while (vmrd_action != 's') { + if (vmrd_action == 'f') { + + release(&vmrd_lock); + + return -1; + } + } + + release(&vmrd_lock); + + return 0; +} diff --git a/vmrd.h b/vmrd.h new file mode 100644 index 0000000..5672062 --- /dev/null +++ b/vmrd.h @@ -0,0 +1,42 @@ +// This is NEW CODE, a VM ram/disk access driver for use in the VM +// This driver is intended to be very simple to read/write blocks. +#ifndef _VMRD_H +#define _VMRD_H + +#include "diskio.h" + +// The address of the first register is the same as for virtio disk but +// the interface (to simplify kernel memory maps) but will use a +// different magic number. +#define VMRD_BASEADDRESS 0x0000000010001000UL +#define VMRD_REG_MAGIC 0 +#define VMRD_REG_BLKSIZE 4 +#define VMRD_REG_MEMADDR 8 +#define VMRD_REG_BLKADDR 16 +#define VMRD_REG_ACTION 24 +// The magic number is BA5DB105 +#define VMRD_MAGIC 0xBA5DB105 + +// These defines work like global variables for accessing the registers. +#define vmrd_magic *((unsigned int*)(VMRD_BASEADDRESS + VMRD_REG_MAGIC)) +#define vmrd_blksize *((unsigned int*)(VMRD_BASEADDRESS + VMRD_REG_BLKSIZE)) +#define vmrd_memaddr *((unsigned long*)(VMRD_BASEADDRESS + VMRD_REG_MEMADDR)) +#define vmrd_blkaddr *((unsigned long*)(VMRD_BASEADDRESS + VMRD_REG_BLKADDR)) +#define vmrd_action *((unsigned int*)(VMRD_BASEADDRESS + VMRD_REG_ACTION)) + +// Returns a major version number >= 1 if the virtual ram/disk device is +// present and 0 otherwise. +int vmrd_present(); + +// Initialises vmrd, returning a non-zero value on failure (e.g. if not present) +int vmrd_init(); + +// Performs a read/write of a block. +int vmrd_rw(diskio_buffer_t* buffer, int writing); + +// This doesn't really belong here but is used to enable/disable debug +// tracing in the VM. +void vmrd_settracing(int onoroff); + +// From ifndef at end of file: +#endif