slkern/virtio_disk.c

332 lines
8.7 KiB
C
Raw Normal View History

// TODO: CHECK/REPLACE/UPDATE OLD CODE (this file is based on xv6)
//
// driver for qemu's virtio disk device.
// uses qemu's mmio interface to virtio.
//
// qemu ... -drive file=fs.img,if=none,format=raw,id=x0 -device virtio-blk-device,drive=x0,bus=virtio-mmio-bus.0
//
#include "types.h"
#include "riscv.h"
#include "defs.h"
#include "param.h"
#include "memlayout.h"
#include "sched.h"
#include "fs.h"
#include "diskio.h"
#include "virtio.h"
#include "kprintf.h"
// the address of virtio mmio register r.
#define R(r) ((volatile uint32 *)(VIRTIO0 + (r)))
//static struct disk {
// a set (not a ring) of DMA descriptors, with which the
// driver tells the device where to read and write individual
// disk operations. there are NUM descriptors.
// most commands consist of a "chain" (a linked list) of a couple of
// these descriptors.
struct virtq_desc *disk_desc;
// a ring in which the driver writes descriptor numbers
// that the driver would like the device to process. it only
// includes the head descriptor of each chain. the ring has
// NUM elements.
struct virtq_avail *disk_avail;
// a ring in which the device writes descriptor numbers that
// the device has finished processing (just the head of each chain).
// there are NUM used ring entries.
struct virtq_used *disk_used;
// our own book-keeping.
char disk_free[NUM]; // is a descriptor free?
uint16 disk_used_idx; // we've looked this far in used[2..NUM].
// track info about in-flight operations,
// for use when completion interrupt arrives.
// indexed by first descriptor index of chain.
struct {
diskio_buffer_t *b;
char status;
} disk_info[NUM];
// disk command headers.
// one-for-one with descriptors, for convenience.
struct virtio_blk_req disk_ops[NUM];
sched_spinlock_t disk_vdisk_lock;
//} disk;
void
virtio_disk_init(void)
{
uint32 status = 0;
initlock(&disk_vdisk_lock, "virtio_disk");
if(*R(VIRTIO_MMIO_MAGIC_VALUE) != 0x74726976 ||
*R(VIRTIO_MMIO_VERSION) != 2 ||
*R(VIRTIO_MMIO_DEVICE_ID) != 2 ||
*R(VIRTIO_MMIO_VENDOR_ID) != 0x554d4551){
panic("could not find virtio disk");
}
// reset device
*R(VIRTIO_MMIO_STATUS) = status;
// set ACKNOWLEDGE status bit
status |= VIRTIO_CONFIG_S_ACKNOWLEDGE;
*R(VIRTIO_MMIO_STATUS) = status;
// set DRIVER status bit
status |= VIRTIO_CONFIG_S_DRIVER;
*R(VIRTIO_MMIO_STATUS) = status;
// negotiate features
uint64 features = *R(VIRTIO_MMIO_DEVICE_FEATURES);
features &= ~(1 << VIRTIO_BLK_F_RO);
features &= ~(1 << VIRTIO_BLK_F_SCSI);
features &= ~(1 << VIRTIO_BLK_F_CONFIG_WCE);
features &= ~(1 << VIRTIO_BLK_F_MQ);
features &= ~(1 << VIRTIO_F_ANY_LAYOUT);
features &= ~(1 << VIRTIO_RING_F_EVENT_IDX);
features &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
*R(VIRTIO_MMIO_DRIVER_FEATURES) = features;
// tell device that feature negotiation is complete.
status |= VIRTIO_CONFIG_S_FEATURES_OK;
*R(VIRTIO_MMIO_STATUS) = status;
// re-read status to ensure FEATURES_OK is set.
status = *R(VIRTIO_MMIO_STATUS);
if(!(status & VIRTIO_CONFIG_S_FEATURES_OK))
panic("virtio disk FEATURES_OK unset");
// initialize queue 0.
*R(VIRTIO_MMIO_QUEUE_SEL) = 0;
// ensure queue 0 is not in use.
if(*R(VIRTIO_MMIO_QUEUE_READY))
panic("virtio disk should not be ready");
// check maximum queue size.
uint32 max = *R(VIRTIO_MMIO_QUEUE_NUM_MAX);
if(max == 0)
panic("virtio disk has no queue 0");
if(max < NUM)
panic("virtio disk max queue too short");
// allocate and zero queue memory.
disk_desc = kalloc();
disk_avail = kalloc();
disk_used = kalloc();
if(!disk_desc || !disk_avail || !disk_used)
panic("virtio disk kalloc");
memset(disk_desc, 0, PGSIZE);
memset(disk_avail, 0, PGSIZE);
memset(disk_used, 0, PGSIZE);
// set queue size.
*R(VIRTIO_MMIO_QUEUE_NUM) = NUM;
// write physical addresses.
*R(VIRTIO_MMIO_QUEUE_DESC_LOW) = (uint64)(disk_desc);
*R(VIRTIO_MMIO_QUEUE_DESC_HIGH) = (uint64)(disk_desc) >> 32;
*R(VIRTIO_MMIO_DRIVER_DESC_LOW) = (uint64)(disk_avail);
*R(VIRTIO_MMIO_DRIVER_DESC_HIGH) = (uint64)(disk_avail) >> 32;
*R(VIRTIO_MMIO_DEVICE_DESC_LOW) = (uint64)(disk_used);
*R(VIRTIO_MMIO_DEVICE_DESC_HIGH) = (uint64)(disk_used) >> 32;
// queue is ready.
*R(VIRTIO_MMIO_QUEUE_READY) = 0x1;
// all NUM descriptors start out unused.
for(int i = 0; i < NUM; i++)
disk_free[i] = 1;
// tell device we're completely ready.
status |= VIRTIO_CONFIG_S_DRIVER_OK;
*R(VIRTIO_MMIO_STATUS) = status;
// plic.c and trap.c arrange for interrupts from VIRTIO0_IRQ.
}
// find a free descriptor, mark it non-free, return its index.
static int
alloc_desc()
{
for(int i = 0; i < NUM; i++){
if(disk_free[i]){
disk_free[i] = 0;
return i;
}
}
return -1;
}
// mark a descriptor as free.
static void
free_desc(int i)
{
if(i >= NUM)
panic("free_desc 1");
if(disk_free[i])
panic("free_desc 2");
disk_desc[i].addr = 0;
disk_desc[i].len = 0;
disk_desc[i].flags = 0;
disk_desc[i].next = 0;
disk_free[i] = 1;
sched_wake(&disk_free[0]);
}
// free a chain of descriptors.
static void
free_chain(int i)
{
while(1){
int flag = disk_desc[i].flags;
int nxt = disk_desc[i].next;
free_desc(i);
if(flag & VRING_DESC_F_NEXT)
i = nxt;
else
break;
}
}
// allocate three descriptors (they need not be contiguous).
// disk transfers always use three descriptors.
static int
alloc3_desc(int *idx)
{
for(int i = 0; i < 3; i++){
idx[i] = alloc_desc();
if(idx[i] < 0){
for(int j = 0; j < i; j++)
free_desc(idx[j]);
return -1;
}
}
return 0;
}
void
virtio_disk_rw(diskio_buffer_t *b, int write)
{
uint64 sector = b->blocknumber * (BSIZE / 512);
acquire(&disk_vdisk_lock);
// the spec's Section 5.2 says that legacy block operations use
// three descriptors: one for type/reserved/sector, one for the
// data, one for a 1-byte status result.
// allocate the three descriptors.
int idx[3];
while(1){
if(alloc3_desc(idx) == 0) {
break;
}
sleep(&disk_free[0], &disk_vdisk_lock);
}
// format the three descriptors.
// qemu's virtio-blk.c reads them.
struct virtio_blk_req *buf0 = &disk_ops[idx[0]];
if(write)
buf0->type = VIRTIO_BLK_T_OUT; // write the disk
else
buf0->type = VIRTIO_BLK_T_IN; // read the disk
buf0->reserved = 0;
buf0->sector = sector;
disk_desc[idx[0]].addr = (uint64) buf0;
disk_desc[idx[0]].len = sizeof(struct virtio_blk_req);
disk_desc[idx[0]].flags = VRING_DESC_F_NEXT;
disk_desc[idx[0]].next = idx[1];
disk_desc[idx[1]].addr = (uint64) (b->data);
disk_desc[idx[1]].len = BSIZE;
if(write)
disk_desc[idx[1]].flags = 0; // device reads b->data
else
disk_desc[idx[1]].flags = VRING_DESC_F_WRITE; // device writes b->data
disk_desc[idx[1]].flags |= VRING_DESC_F_NEXT;
disk_desc[idx[1]].next = idx[2];
disk_info[idx[0]].status = 0xff; // device writes 0 on success
disk_desc[idx[2]].addr = (uint64) &disk_info[idx[0]].status;
disk_desc[idx[2]].len = 1;
disk_desc[idx[2]].flags = VRING_DESC_F_WRITE; // device writes the status
disk_desc[idx[2]].next = 0;
// record diskio_buffer_t for virtio_disk_intr().
b->isdisk = 1;
disk_info[idx[0]].b = b;
// tell the device the first index in our chain of descriptors.
disk_avail->ring[disk_avail->idx % NUM] = idx[0];
__sync_synchronize();
// tell the device another avail ring entry is available.
disk_avail->idx += 1; // not % NUM ...
__sync_synchronize();
*R(VIRTIO_MMIO_QUEUE_NOTIFY) = 0; // value is queue number
// Wait for virtio_disk_intr() to say request has finished.
while(b->isdisk == 1) {
sleep(b, &disk_vdisk_lock);
}
disk_info[idx[0]].b = 0;
free_chain(idx[0]);
//printf("%s %d\n", write ? "written" : "read", *((int*)(b->data)));
release(&disk_vdisk_lock);
}
void
virtio_disk_intr()
{
//printf("Disk interrupt\n");
acquire(&disk_vdisk_lock);
// the device won't raise another interrupt until we tell it
// we've seen this interrupt, which the following line does.
// this may race with the device writing new entries to
// the "used" ring, in which case we may process the new
// completion entries in this interrupt, and have nothing to do
// in the next interrupt, which is harmless.
*R(VIRTIO_MMIO_INTERRUPT_ACK) = *R(VIRTIO_MMIO_INTERRUPT_STATUS) & 0x3;
__sync_synchronize();
// the device increments disk_used->idx when it
// adds an entry to the used ring.
while(disk_used_idx != disk_used->idx){
__sync_synchronize();
int _id = disk_used->ring[disk_used_idx % NUM]._id;
if(disk_info[_id].status != 0)
panic("virtio_disk_intr status");
diskio_buffer_t *b = disk_info[_id].b;
b->isdisk = 0; // disk is done with buf
sched_wake(b);
disk_used_idx += 1;
}
release(&disk_vdisk_lock);
}