// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6) // // driver for qemu's virtio disk device. // uses qemu's mmio interface to virtio. // // qemu ... -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0 // #include "types.h" #include "riscv.h" #include "defs.h" #include "param.h" #include "memlayout.h" #include "sched.h" #include "fs.h" #include "diskio.h" #include "virtio.h" #include "kprintf.h" // the address of virtio mmio register r. #define R(r) ((volatile uint32 *)(VIRTIO0 + (r))) //static struct disk { // a set (not a ring) of DMA descriptors, with which the // driver tells the device where to read and write individual // disk operations. there are NUM descriptors. // most commands consist of a "chain" (a linked list) of a couple of // these descriptors. struct virtq_desc *disk_desc; // a ring in which the driver writes descriptor numbers // that the driver would like the device to process. it only // includes the head descriptor of each chain. the ring has // NUM elements. struct virtq_avail *disk_avail; // a ring in which the device writes descriptor numbers that // the device has finished processing (just the head of each chain). // there are NUM used ring entries. struct virtq_used *disk_used; // our own book-keeping. char disk_free[NUM]; // is a descriptor free? uint16 disk_used_idx; // we've looked this far in used[2..NUM]. // track info about in-flight operations, // for use when completion interrupt arrives. // indexed by first descriptor index of chain. struct { diskio_buffer_t *b; char status; } disk_info[NUM]; // disk command headers. // one-for-one with descriptors, for convenience. struct virtio_blk_req disk_ops[NUM]; sched_spinlock_t disk_vdisk_lock; //} disk; void virtio_disk_init(void) { uint32 status = 0; initlock(&disk_vdisk_lock, "virtio_disk"); if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 || *R(VIRTIO_MMIO_VERSION) != 2 || *R(VIRTIO_MMIO_DEVICE_ID) != 2 || *R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){ panic("could not find virtio disk"); } // reset device *R(VIRTIO_MMIO_STATUS) = status; // set ACKNOWLEDGE status bit status |= VIRTIO_CONFIG_S_ACKNOWLEDGE; *R(VIRTIO_MMIO_STATUS) = status; // set DRIVER status bit status |= VIRTIO_CONFIG_S_DRIVER; *R(VIRTIO_MMIO_STATUS) = status; // negotiate features uint64 features = *R(VIRTIO_MMIO_DEVICE_FEATURES); features &= ~(1 << VIRTIO_BLK_F_RO); features &= ~(1 << VIRTIO_BLK_F_SCSI); features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE); features &= ~(1 << VIRTIO_BLK_F_MQ); features &= ~(1 << VIRTIO_F_ANY_LAYOUT); features &= ~(1 << VIRTIO_RING_F_EVENT_IDX); features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC); *R(VIRTIO_MMIO_DRIVER_FEATURES) = features; // tell device that feature negotiation is complete. status |= VIRTIO_CONFIG_S_FEATURES_OK; *R(VIRTIO_MMIO_STATUS) = status; // re-read status to ensure FEATURES_OK is set. status = *R(VIRTIO_MMIO_STATUS); if(!(status & VIRTIO_CONFIG_S_FEATURES_OK)) panic("virtio disk FEATURES_OK unset"); // initialize queue 0. *R(VIRTIO_MMIO_QUEUE_SEL) = 0; // ensure queue 0 is not in use. if(*R(VIRTIO_MMIO_QUEUE_READY)) panic("virtio disk should not be ready"); // check maximum queue size. uint32 max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX); if(max == 0) panic("virtio disk has no queue 0"); if(max < NUM) panic("virtio disk max queue too short"); // allocate and zero queue memory. disk_desc = kalloc(); disk_avail = kalloc(); disk_used = kalloc(); if(!disk_desc || !disk_avail || !disk_used) panic("virtio disk kalloc"); memset(disk_desc, 0, PGSIZE); memset(disk_avail, 0, PGSIZE); memset(disk_used, 0, PGSIZE); // set queue size. *R(VIRTIO_MMIO_QUEUE_NUM) = NUM; // write physical addresses. *R(VIRTIO_MMIO_QUEUE_DESC_LOW) = (uint64)(disk_desc); *R(VIRTIO_MMIO_QUEUE_DESC_HIGH) = (uint64)(disk_desc) >> 32; *R(VIRTIO_MMIO_DRIVER_DESC_LOW) = (uint64)(disk_avail); *R(VIRTIO_MMIO_DRIVER_DESC_HIGH) = (uint64)(disk_avail) >> 32; *R(VIRTIO_MMIO_DEVICE_DESC_LOW) = (uint64)(disk_used); *R(VIRTIO_MMIO_DEVICE_DESC_HIGH) = (uint64)(disk_used) >> 32; // queue is ready. *R(VIRTIO_MMIO_QUEUE_READY) = 0x1; // all NUM descriptors start out unused. for(int i = 0; i < NUM; i++) disk_free[i] = 1; // tell device we're completely ready. status |= VIRTIO_CONFIG_S_DRIVER_OK; *R(VIRTIO_MMIO_STATUS) = status; // plic.c and trap.c arrange for interrupts from VIRTIO0_IRQ. } // find a free descriptor, mark it non-free, return its index. static int alloc_desc() { for(int i = 0; i < NUM; i++){ if(disk_free[i]){ disk_free[i] = 0; return i; } } return -1; } // mark a descriptor as free. static void free_desc(int i) { if(i >= NUM) panic("free_desc 1"); if(disk_free[i]) panic("free_desc 2"); disk_desc[i].addr = 0; disk_desc[i].len = 0; disk_desc[i].flags = 0; disk_desc[i].next = 0; disk_free[i] = 1; sched_wake(&disk_free[0]); } // free a chain of descriptors. static void free_chain(int i) { while(1){ int flag = disk_desc[i].flags; int nxt = disk_desc[i].next; free_desc(i); if(flag & VRING_DESC_F_NEXT) i = nxt; else break; } } // allocate three descriptors (they need not be contiguous). // disk transfers always use three descriptors. static int alloc3_desc(int *idx) { for(int i = 0; i < 3; i++){ idx[i] = alloc_desc(); if(idx[i] < 0){ for(int j = 0; j < i; j++) free_desc(idx[j]); return -1; } } return 0; } void virtio_disk_rw(diskio_buffer_t *b, int write) { uint64 sector = b->blocknumber * (BSIZE / 512); acquire(&disk_vdisk_lock); // the spec's Section 5.2 says that legacy block operations use // three descriptors: one for type/reserved/sector, one for the // data, one for a 1-byte status result. // allocate the three descriptors. int idx[3]; while(1){ if(alloc3_desc(idx) == 0) { break; } sleep(&disk_free[0], &disk_vdisk_lock); } // format the three descriptors. // qemu's virtio-blk.c reads them. struct virtio_blk_req *buf0 = &disk_ops[idx[0]]; if(write) buf0->type = VIRTIO_BLK_T_OUT; // write the disk else buf0->type = VIRTIO_BLK_T_IN; // read the disk buf0->reserved = 0; buf0->sector = sector; disk_desc[idx[0]].addr = (uint64) buf0; disk_desc[idx[0]].len = sizeof(struct virtio_blk_req); disk_desc[idx[0]].flags = VRING_DESC_F_NEXT; disk_desc[idx[0]].next = idx[1]; disk_desc[idx[1]].addr = (uint64) (b->data); disk_desc[idx[1]].len = BSIZE; if(write) disk_desc[idx[1]].flags = 0; // device reads b->data else disk_desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data disk_desc[idx[1]].flags |= VRING_DESC_F_NEXT; disk_desc[idx[1]].next = idx[2]; disk_info[idx[0]].status = 0xff; // device writes 0 on success disk_desc[idx[2]].addr = (uint64) &disk_info[idx[0]].status; disk_desc[idx[2]].len = 1; disk_desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status disk_desc[idx[2]].next = 0; // record diskio_buffer_t for virtio_disk_intr(). b->isdisk = 1; disk_info[idx[0]].b = b; // tell the device the first index in our chain of descriptors. disk_avail->ring[disk_avail->idx % NUM] = idx[0]; __sync_synchronize(); // tell the device another avail ring entry is available. disk_avail->idx += 1; // not % NUM ... __sync_synchronize(); *R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number // Wait for virtio_disk_intr() to say request has finished. while(b->isdisk == 1) { sleep(b, &disk_vdisk_lock); } disk_info[idx[0]].b = 0; free_chain(idx[0]); //printf("%s %d\n", write ? "written" : "read", *((int*)(b->data))); release(&disk_vdisk_lock); } void virtio_disk_intr() { //printf("Disk interrupt\n"); acquire(&disk_vdisk_lock); // the device won't raise another interrupt until we tell it // we've seen this interrupt, which the following line does. // this may race with the device writing new entries to // the "used" ring, in which case we may process the new // completion entries in this interrupt, and have nothing to do // in the next interrupt, which is harmless. *R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3; __sync_synchronize(); // the device increments disk_used->idx when it // adds an entry to the used ring. while(disk_used_idx != disk_used->idx){ __sync_synchronize(); int _id = disk_used->ring[disk_used_idx % NUM]._id; if(disk_info[_id].status != 0) panic("virtio_disk_intr status"); diskio_buffer_t *b = disk_info[_id].b; b->isdisk = 0; // disk is done with buf sched_wake(b); disk_used_idx += 1; } release(&disk_vdisk_lock); }