diff --git a/Cargo.lock b/Cargo.lock index 2d63927..41fb29f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5,6 +5,11 @@ name = "adler32" version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "arc-swap" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "backtrace" version = "0.3.37" @@ -201,13 +206,14 @@ dependencies = [ ] [[package]] -name = "pH" +name = "ph" version = "0.1.0" dependencies = [ "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)", "libcitadel 0.1.0 (git+https://github.com/brl/citadel-tools?rev=44d5ce660f1f5cf8a3ad1060b143926a99be5148)", + "signal-hook 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)", "termios 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -286,6 +292,24 @@ dependencies = [ "syn 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "signal-hook" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)", + "signal-hook-registry 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", +] + +[[package]] +name = "signal-hook-registry" +version = "1.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "arc-swap 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)", + "libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "sodiumoxide" version = "0.2.4" @@ -426,6 +450,7 @@ dependencies = [ [metadata] "checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" +"checksum arc-swap 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f1a1eca3195b729bbd64e292ef2f5fff6b1c28504fed762ce2b1013dde4d8e92" "checksum backtrace 0.3.37 (registry+https://github.com/rust-lang/crates.io-index)" = "5180c5a20655b14a819b652fd2378fa5f1697b6c9ddad3e695c2f9cedf6df4e2" "checksum backtrace-sys 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)" = "82a830b4ef2d1124a711c71d263c5abdc710ef8e907bd508c88be475cebc422b" "checksum bincode 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9f2fb9e29e72fd6bc12071533d5dc7664cb01480c59406f656d7ac25c7bd8ff7" @@ -459,6 +484,8 @@ dependencies = [ "checksum same-file 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "585e8ddcedc187886a30fa705c47985c3fa88d06624095856b36ca0b82ff4421" "checksum serde 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)" = "9796c9b7ba2ffe7a9ce53c2287dfc48080f4b2b362fcc245a259b3a7201119dd" "checksum serde_derive 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)" = "4b133a43a1ecd55d4086bd5b4dc6c1751c68b1bfbeba7a5040442022c7e7c02e" +"checksum signal-hook 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4f61c4d59f3aaa9f61bba6450a9b80ba48362fd7d651689e7a10c453b1f6dc68" +"checksum signal-hook-registry 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1797d48f38f91643908bb14e35e79928f9f4b3cefb2420a564dde0991b4358dc" "checksum sodiumoxide 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1c99301d6d59f205d2abc4f0bd168edb5cd8a42bdbe99539773a21360d2a8ff7" "checksum syn 0.15.44 (registry+https://github.com/rust-lang/crates.io-index)" = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" "checksum syn 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "66850e97125af79138385e9b88339cbcd037e3f28ceab8c5ad98e64f0f1f80bf" diff --git a/Cargo.toml b/Cargo.toml index 98f1f23..471a09e 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,5 @@ [package] -name = "pH" +name = "ph" version = "0.1.0" authors = ["Bruce Leidl "] edition = "2018" @@ -9,4 +9,5 @@ byteorder="1.0.0" libc = "*" termios = "0.2.2" lazy_static = "1.4.0" +signal-hook = "0.1.10" libcitadel = { git = "https://github.com/brl/citadel-tools", rev="44d5ce660f1f5cf8a3ad1060b143926a99be5148" } diff --git a/src/bin/pH.rs b/src/bin/pH.rs new file mode 100644 index 0000000..ae7b159 --- /dev/null +++ b/src/bin/pH.rs @@ -0,0 +1,9 @@ +#![allow(non_snake_case)] + +use ph::VmConfig; + +fn main() { + VmConfig::new() + .ram_size_megs(2048) + .boot(); +} diff --git a/src/devices/virtio_9p/mod.rs b/src/devices/virtio_9p/mod.rs index bbe51b5..1aa026b 100644 --- a/src/devices/virtio_9p/mod.rs +++ b/src/devices/virtio_9p/mod.rs @@ -4,8 +4,7 @@ use std::thread; use std::path::{PathBuf, Path}; use crate::memory::{GuestRam, MemoryManager}; -use crate::virtio::{self,VirtioBus,VirtioDeviceOps, VirtQueue}; -use crate::vm::Result; +use crate::virtio::{self,VirtioBus,VirtioDeviceOps, VirtQueue, Result}; use crate::devices::virtio_9p::server::Server; use crate::devices::virtio_9p::filesystem::{FileSystem, FileSystemOps}; use self::pdu::PduParser; diff --git a/src/devices/virtio_9p/server.rs b/src/devices/virtio_9p/server.rs index 83ad0f3..9b43901 100644 --- a/src/devices/virtio_9p/server.rs +++ b/src/devices/virtio_9p/server.rs @@ -664,7 +664,7 @@ impl Server { if n == 0 { break; } - pp.chain.inc_offset(n, true); + pp.chain.inc_write_offset(n); nread += n as u32; } pp.w32_at(0, nread as u32); @@ -692,7 +692,7 @@ impl Server { if n == 0 { break; } - pp.chain.inc_offset(n, false); + pp.chain.inc_read_offset(n); nread += n as u32; } pp.read_done()?; diff --git a/src/devices/virtio_block.rs b/src/devices/virtio_block.rs index 5b3af68..25f2c66 100644 --- a/src/devices/virtio_block.rs +++ b/src/devices/virtio_block.rs @@ -1,46 +1,43 @@ -use crate::{vm, disk}; -use crate::virtio::{VirtioBus, VirtioDeviceOps, VirtQueue, DeviceConfigArea, Chain}; -use std::sync::{RwLock, Arc}; -use crate::memory::MemoryManager; -use std::{result, io, fmt, thread}; -use crate::devices::virtio_block::Error::IoChainError; use std::io::Write; +use std::sync::{RwLock, Arc}; +use std::{result, io, fmt, thread}; + +use crate::{disk, virtio}; +use crate::virtio::{VirtioBus, VirtioDeviceOps, VirtQueue, DeviceConfigArea, Chain}; +use crate::memory::MemoryManager; use crate::disk::DiskImage; const VIRTIO_BLK_F_RO: u64 = (1 << 5); -//const VIRTIO_BLK_F_BLK_SIZE: u64 = (1 << 6); +const VIRTIO_BLK_F_BLK_SIZE: u64 = (1 << 6); const VIRTIO_BLK_F_FLUSH: u64 = (1 << 9); -//const VIRTIO_BLK_F_DISCARD: u64 = (1 << 13); -//const VIRTIO_BLK_F_WRITE_ZEROES: u64 = (1 << 14); +const VIRTIO_BLK_F_SEG_MAX: u64 = (1 << 2); const VIRTIO_BLK_T_IN: u32 = 0; const VIRTIO_BLK_T_OUT: u32 = 1; const VIRTIO_BLK_T_FLUSH: u32 = 4; const VIRTIO_BLK_T_GET_ID: u32 = 8; -//const VIRTIO_BLK_T_DISCARD: u32 = 11; -//const VIRTIO_BLK_T_WRITE_ZEROES: u32 = 13; const VIRTIO_BLK_S_OK: u8 = 0; const VIRTIO_BLK_S_IOERR: u8 = 1; const VIRTIO_BLK_S_UNSUPP: u8 = 2; -const SECTOR_SIZE: usize = 512; +const SECTOR_SHIFT: usize = 9; +const SECTOR_SIZE: usize = 1 << SECTOR_SHIFT; + +const QUEUE_SIZE: usize = 256; -// TODO: -// - feature bits -// - disk image write overlay -// - better error handling for i/o enum Error { IoChainError(io::Error), DiskRead(disk::Error), DiskWrite(disk::Error), DiskFlush(disk::Error), - VirtQueueWait(vm::Error), + VirtQueueWait(virtio::Error), + InvalidReadDescriptor(usize), } impl From for Error { fn from(e: io::Error) -> Self { - IoChainError(e) + Error::IoChainError(e) } } @@ -53,6 +50,7 @@ impl fmt::Display for Error { DiskWrite(e) => write!(f, "error writing disk image: {}", e), DiskFlush(e) => write!(f, "error flushing disk image: {}", e), VirtQueueWait(e) =>write!(f, "error waiting on virtqueue: {}", e), + InvalidReadDescriptor(sz) => write!(f, "virtqueue read descriptor size ({}) is invalid. Not a multiple of sector size", sz), } } } @@ -64,11 +62,20 @@ pub struct VirtioBlock { enabled_features: u64, } +const HEADER_SIZE: usize = 16; + const VIRTIO_ID_BLOCK: u16 = 2; +const CAPACITY_OFFSET: usize = 0; +const SEG_MAX_OFFSET: usize = 12; +const BLK_SIZE_OFFSET: usize = 20; +const CONFIG_SIZE: usize = 24; impl VirtioBlock { - pub fn new(disk_image: D) -> Self { - let mut config = DeviceConfigArea::new(8); - config.write_u64(0, disk_image.sector_count()); + + fn new(disk_image: D) -> Self { + let mut config = DeviceConfigArea::new(CONFIG_SIZE); + config.write_u64(CAPACITY_OFFSET, disk_image.sector_count()); + config.write_u32(SEG_MAX_OFFSET, QUEUE_SIZE as u32 - 2); + config.write_u32(BLK_SIZE_OFFSET, 1024); VirtioBlock { disk_image: Some(disk_image), config, @@ -76,18 +83,21 @@ impl VirtioBlock { } } - pub fn create(vbus: &mut VirtioBus, disk_image: D) -> vm::Result<()> { - let feature_bits = if disk_image.read_only() { - VIRTIO_BLK_F_FLUSH|VIRTIO_BLK_F_RO - } else { - VIRTIO_BLK_F_FLUSH - }; + pub fn create(vbus: &mut VirtioBus, disk_image: D) -> virtio::Result<()> { + let feature_bits = VIRTIO_BLK_F_FLUSH | + VIRTIO_BLK_F_BLK_SIZE | + VIRTIO_BLK_F_SEG_MAX | + if disk_image.read_only() { + VIRTIO_BLK_F_RO + } else { + 0 + }; let dev = Arc::new(RwLock::new(VirtioBlock::new(disk_image))); vbus.new_virtio_device(VIRTIO_ID_BLOCK, dev) - .set_queue_sizes(&[256]) - .set_config_size(8) + .set_queue_sizes(&[QUEUE_SIZE]) + .set_config_size(CONFIG_SIZE) .set_features(feature_bits) .register() } @@ -109,20 +119,18 @@ impl VirtioDeviceOps for VirtioBlock { fn start(&mut self, _: &MemoryManager, mut queues: Vec) { let vq = queues.pop().unwrap(); - let mut dev = match self.disk_image.take() { - Some(d) => VirtioBlockDevice::new(vq, d), - None => { - warn!("Unable to start virtio-block device. Already started?"); - return; - } - }; + let mut disk = self.disk_image.take().expect("No disk image?"); + if let Err(err) = disk.open() { + warn!("Unable to start virtio-block device: {}", err); + return; + } + let mut dev = VirtioBlockDevice::new(vq, disk); thread::spawn(move || { if let Err(err) = dev.run() { warn!("Error running virtio block device: {}", err); } }); - } } @@ -138,29 +146,31 @@ impl VirtioBlockDevice { fn run(&mut self) -> Result<()> { loop { - let chain = self.vq.wait_next_chain() + let mut chain = self.vq.wait_next_chain() .map_err(Error::VirtQueueWait)?; - match MessageHandler::read_header(&mut self.disk, chain) { - Ok(mut handler) => handler.process_message(), - Err(e) => { - warn!("Error handling virtio_block message: {}", e); + while chain.remaining_read() >= HEADER_SIZE { + match MessageHandler::read_header(&mut self.disk, &mut chain) { + Ok(mut handler) => handler.process_message(), + Err(e) => { + warn!("Error handling virtio_block message: {}", e); + } } } } } } -struct MessageHandler<'a, D: DiskImage> { +struct MessageHandler<'a,'b, D: DiskImage> { disk: &'a mut D, - chain: Chain, + chain: &'b mut Chain, msg_type: u32, sector: u64, } -impl <'a, D: DiskImage> MessageHandler<'a, D> { +impl <'a,'b, D: DiskImage> MessageHandler<'a,'b, D> { - fn read_header(disk: &'a mut D, mut chain: Chain) -> Result { + fn read_header(disk: &'a mut D, chain: &'b mut Chain) -> Result { let msg_type = chain.r32()?; let _ = chain.r32()?; let sector = chain.r64()?; @@ -192,30 +202,39 @@ impl <'a, D: DiskImage> MessageHandler<'a, D> { } } - fn sector_round(sz: usize) -> usize { - (sz / SECTOR_SIZE) * SECTOR_SIZE - } - fn handle_io_in(&mut self) -> Result<()> { - let current = self.chain.current_write_slice(); - let len = Self::sector_round(current.len()); - let buffer = &mut current[..len]; + loop { + let current = self.chain.current_write_slice(); + let nsectors = current.len() >> SECTOR_SHIFT; + if nsectors == 0 { + return Ok(()) + } + let len = nsectors << SECTOR_SHIFT; + let buffer = &mut current[..len]; - self.disk.read_sectors(self.sector, buffer) - .map_err(Error::DiskRead)?; - self.chain.inc_offset(len, true); - Ok(()) + self.disk.read_sectors(self.sector, buffer) + .map_err(Error::DiskRead)?; + self.chain.inc_write_offset(len); + self.sector += nsectors as u64; + } } fn handle_io_out(&mut self) -> Result<()> { - let current = self.chain.current_read_slice(); - let len = Self::sector_round(current.len()); - let buffer = ¤t[..len]; + loop { + let current = self.chain.current_read_slice(); + if current.len() & (SECTOR_SIZE-1) != 0 { + return Err(Error::InvalidReadDescriptor(current.len())); + } + let nsectors = current.len() >> SECTOR_SHIFT; + if nsectors == 0 { + return Ok(()) + } + self.disk.write_sectors(self.sector, current) + .map_err(Error::DiskWrite)?; - self.disk.write_sectors(self.sector, buffer) - .map_err(Error::DiskWrite)?; - self.chain.inc_offset(len, false); - Ok(()) + self.chain.inc_read_offset(nsectors << SECTOR_SHIFT); + self.sector += nsectors as u64; + } } fn handle_io_flush(&mut self) -> Result<()> { diff --git a/src/devices/virtio_net.rs b/src/devices/virtio_net.rs index 3ff92c4..368a61b 100644 --- a/src/devices/virtio_net.rs +++ b/src/devices/virtio_net.rs @@ -1,6 +1,6 @@ -use crate::virtio::{VirtioDeviceOps, VirtQueue, VirtioBus}; +use crate::virtio::{VirtioDeviceOps, VirtQueue, VirtioBus, Chain}; use crate::memory::MemoryManager; -use crate::{vm, system}; +use crate::{system, virtio}; use std::sync::{RwLock, Arc}; use std::{fmt, result, thread, io}; use crate::system::{EPoll,Event}; @@ -15,7 +15,7 @@ const MAC_ADDR_LEN: usize = 6; pub enum Error { ChainWrite(io::Error), ChainRead(io::Error), - ChainIoEvent(vm::Error), + ChainIoEvent(system::Error), SetupPoll(system::Error), TapRead(io::Error), TapWrite(io::Error), @@ -42,35 +42,41 @@ type Result = result::Result; const VIRTIO_NET_F_CSUM: u64 = 1; const VIRTIO_NET_F_GUEST_CSUM: u64 = 1 << 1; const VIRTIO_NET_F_GUEST_TSO4: u64 = 1 << 7; -const VIRTIO_NET_F_GUEST_UFO: u64 = 1 << 10; +const VIRTIO_NET_F_GUEST_TSO6: u64 = 1 << 8; +const VIRTIO_NET_F_GUEST_ECN : u64 = 1 << 9; const VIRTIO_NET_F_HOST_TSO4: u64 = 1 << 11; -const VIRTIO_NET_F_HOST_UFO: u64 = 1 << 14; +const VIRTIO_NET_F_HOST_TSO6: u64 = 1 << 12; +const VIRTIO_NET_F_HOST_ECN: u64 = 1 << 13; -//const VIRTIO_NET_HDR_SIZE: i32 = 12; +const VIRTIO_NET_HDR_SIZE: i32 = 12; pub struct VirtioNet { + _features_supported: u64, tap: Option, } impl VirtioNet { - fn new(tap: Tap) -> Self { + fn new(tap: Tap, features_supported: u64) -> Self { VirtioNet{ + _features_supported: features_supported, tap: Some(tap) } } - pub fn create(vbus: &mut VirtioBus, tap: Tap) -> vm::Result<()> { - tap.set_offload(TUN_F_CSUM | TUN_F_UFO | TUN_F_TSO4 | TUN_F_TSO6).unwrap(); - tap.set_vnet_hdr_size(12).unwrap(); - let dev = Arc::new(RwLock::new(VirtioNet::new(tap))); + pub fn create(vbus: &mut VirtioBus, tap: Tap) -> virtio::Result<()> { + tap.set_offload(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6| TUN_F_TSO_ECN).unwrap(); + tap.set_vnet_hdr_size(VIRTIO_NET_HDR_SIZE).unwrap(); let feature_bits = VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | - VIRTIO_NET_F_GUEST_UFO | + VIRTIO_NET_F_GUEST_TSO6 | + VIRTIO_NET_F_GUEST_ECN | VIRTIO_NET_F_HOST_TSO4 | - VIRTIO_NET_F_HOST_UFO; + VIRTIO_NET_F_HOST_TSO6 | + VIRTIO_NET_F_HOST_ECN; + let dev = Arc::new(RwLock::new(VirtioNet::new(tap, feature_bits))); vbus.new_virtio_device(VIRTIO_ID_NET, dev) .set_queue_sizes(&[256, 256]) .set_config_size(MAC_ADDR_LEN) @@ -82,7 +88,7 @@ impl VirtioNet { pub const TUN_F_CSUM: u32 = 1; pub const TUN_F_TSO4: u32 = 2; pub const TUN_F_TSO6: u32 = 4; -pub const TUN_F_UFO: u32= 16; +pub const TUN_F_TSO_ECN: u32 = 8; impl VirtioDeviceOps for VirtioNet { fn start(&mut self, _memory: &MemoryManager, mut queues: Vec) { @@ -170,8 +176,6 @@ impl VirtioNetDevice { self.tap.write_all(&self.tx_frame[..n]) .map_err(Error::TapWrite)?; } - - chain.skip_readable(); chain.flush_chain() } Ok(()) @@ -181,17 +185,15 @@ impl VirtioNetDevice { self.rx_bytes != 0 } - fn receive_frame(&mut self) -> Result { - if let Some(mut chain) = self.rx.next_chain() { + fn receive_frame(&mut self, chain: &mut Chain) -> Result { + if chain.remaining_write() < self.rx_bytes { + notify!("not enough space for frame"); + Ok(false) + } else { chain.write_all(&self.rx_frame[..self.rx_bytes]) .map_err(Error::ChainWrite)?; self.rx_bytes = 0; - // XXX defer interrupt - chain.flush_chain(); Ok(true) - } else { - self.disable_tap_events(); - Ok(false) } } @@ -202,7 +204,6 @@ impl VirtioNetDevice { Ok(true) }, Err(e) => if let Some(libc::EAGAIN) = e.raw_os_error() { - // handle deferred interrupts Ok(false) } else { Err(Error::TapRead(e)) @@ -210,16 +211,40 @@ impl VirtioNetDevice { } } + fn next_rx_chain(&mut self) -> Option { + self.rx.next_chain().or_else(|| { + self.disable_tap_events(); + None + }) + } + fn handle_rx_tap(&mut self) -> Result<()> { + // tap wants to send packets to guest, is an rx chain available? + let mut chain = match self.next_rx_chain() { + Some(chain) => chain, + None => return Ok(()), + }; + + // If there is already an rx packet pending to send to guest + // first write it to rx chain. if self.pending_rx() { - if !self.receive_frame()? { + if !self.receive_frame(&mut chain)? { return Ok(()) } } while self.tap_read()? { - if !self.receive_frame()? { - break; + if chain.remaining_write() < self.rx_bytes { + // chain is full but there is still data to deliver, + // see if there is another rx chain available. + chain = match self.rx.next_chain() { + Some(chain) => chain, + None => return Ok(()), + }; + } + + if !self.receive_frame(&mut chain)? { + return Ok(()); } } Ok(()) @@ -227,10 +252,12 @@ impl VirtioNetDevice { fn handle_rx_queue(&mut self) -> Result<()> { self.rx.ioevent().read().unwrap(); + if !self.tap_event_enabled { + self.enable_tap_poll(); + } + if self.pending_rx() { - if self.receive_frame()? { - self.enable_tap_poll(); - } + self.handle_rx_tap()?; } Ok(()) } diff --git a/src/devices/virtio_rng.rs b/src/devices/virtio_rng.rs index b8174b6..fb9ce32 100644 --- a/src/devices/virtio_rng.rs +++ b/src/devices/virtio_rng.rs @@ -3,9 +3,8 @@ use std::sync::{Arc,RwLock}; use std::thread; use std::fs::File; -use crate::virtio::{VirtioDeviceOps,VirtioBus,VirtQueue}; +use crate::virtio::{VirtioDeviceOps,VirtioBus,VirtQueue,Result}; use crate::memory::MemoryManager; -use crate::vm::Result; const VIRTIO_ID_RANDOM: u16 = 4; diff --git a/src/devices/virtio_serial.rs b/src/devices/virtio_serial.rs index 7078247..86180bf 100644 --- a/src/devices/virtio_serial.rs +++ b/src/devices/virtio_serial.rs @@ -3,9 +3,8 @@ use std::io::{self,Write,Read}; use std::thread::spawn; use termios::*; -use crate::virtio::{VirtioDeviceOps,VirtioBus, VirtQueue}; +use crate::virtio::{VirtioDeviceOps,VirtioBus, VirtQueue,Result}; use crate::memory::MemoryManager; -use crate::vm::Result; const VIRTIO_ID_CONSOLE: u16 = 3; diff --git a/src/devices/virtio_wl/device.rs b/src/devices/virtio_wl/device.rs index d481834..2a8d2eb 100644 --- a/src/devices/virtio_wl/device.rs +++ b/src/devices/virtio_wl/device.rs @@ -2,10 +2,10 @@ use std::os::unix::io::{AsRawFd,RawFd}; use std::sync::{RwLock, Arc}; use std::thread; -use crate::{vm, system}; -use crate::system::EPoll; +use crate::{system, virtio}; +use crate::system::{EPoll,EventFd}; use crate::memory::{MemoryManager, DrmDescriptor}; -use crate::virtio::{VirtQueue, EventFd, Chain, VirtioBus, VirtioDeviceOps}; +use crate::virtio::{VirtQueue, VirtioBus, VirtioDeviceOps, Chain}; use crate::devices::virtio_wl::{vfd::VfdManager, consts::*, Error, Result, VfdObject}; use crate::system::ioctl::ioctl_with_ref; @@ -27,7 +27,7 @@ impl VirtioWayland { VirtioWayland { feature_bits: 0 } } - pub fn create(vbus: &mut VirtioBus) -> vm::Result<()> { + pub fn create(vbus: &mut VirtioBus) -> virtio::Result<()> { let dev = Arc::new(RwLock::new(VirtioWayland::new())); vbus.new_virtio_device(VIRTIO_ID_WL, dev) .set_num_queues(2) @@ -40,7 +40,7 @@ impl VirtioWayland { } fn create_device(memory: MemoryManager, in_vq: VirtQueue, out_vq: VirtQueue, transition: bool) -> Result { - let kill_evt = EventFd::new().map_err(Error::IoEventError)?; + let kill_evt = EventFd::new().map_err(Error::EventFdCreate)?; let dev = WaylandDevice::new(memory, in_vq, out_vq, kill_evt, transition)?; Ok(dev) } diff --git a/src/devices/virtio_wl/mod.rs b/src/devices/virtio_wl/mod.rs index 284d815..958ba52 100644 --- a/src/devices/virtio_wl/mod.rs +++ b/src/devices/virtio_wl/mod.rs @@ -1,7 +1,7 @@ use std::os::unix::io::RawFd; use std::{result, io, fmt}; -use crate::{vm, system}; +use crate::system; use crate::memory::Error as MemError; use crate::system::FileDesc; @@ -82,13 +82,14 @@ pub trait VfdObject { #[derive(Debug)] pub enum Error { - IoEventError(vm::Error), + IoEventError(system::Error), + EventFdCreate(system::Error), ChainIoError(io::Error), UnexpectedCommand(u32), ShmAllocFailed(system::Error), RegisterMemoryFailed(MemError), CreatePipesFailed(system::Error), - SocketReceive(system::Error), + SocketReceive(system::ErrnoError), SocketConnect(io::Error), PipeReceive(io::Error), SendVfd(io::Error), @@ -96,7 +97,7 @@ pub enum Error { TooManySendVfds(usize), FailedPollContextCreate(system::Error), FailedPollAdd(system::Error), - DmaSync(system::Error), + DmaSync(system::ErrnoError), DmaBuf(MemError), DmaBufSize(system::Error), } @@ -106,6 +107,7 @@ impl fmt::Display for Error { use Error::*; match self { IoEventError(e) => write!(f, "error reading from ioevent fd: {}", e), + EventFdCreate(e) => write!(f, "error creating eventfd: {}", e), ChainIoError(e) => write!(f, "i/o error on virtio chain operation: {}", e), UnexpectedCommand(cmd) => write!(f, "unexpected virtio wayland command: {}", cmd), ShmAllocFailed(e) => write!(f, "failed to allocate shared memory: {}", e), diff --git a/src/disk/memory.rs b/src/disk/memory.rs index 9b1f73b..e87b021 100644 --- a/src/disk/memory.rs +++ b/src/disk/memory.rs @@ -1,17 +1,18 @@ -use crate::system::{MemoryFd, BitVec}; +use crate::system::MemoryFd; +use crate::util::BitSet; use crate::disk::{Result, Error, SECTOR_SIZE, DiskImage}; use std::io::SeekFrom; pub struct MemoryOverlay { memory: MemoryFd, - written_sectors: BitVec, + written_sectors: BitSet, } impl MemoryOverlay { pub fn new() -> Result { let memory = MemoryFd::new_memfd(0, false) .map_err(Error::MemoryOverlayCreate)?; - let written_sectors = BitVec::new(); + let written_sectors = BitSet::new(); Ok(MemoryOverlay { memory, written_sectors }) } @@ -30,14 +31,14 @@ impl MemoryOverlay { for n in 0..sector_count { let idx = start as usize + n; - self.written_sectors.set_bit(idx); + self.written_sectors.insert(idx); } Ok(()) } pub fn read_sectors(&mut self, disk: &mut D, start: u64, buffer: &mut [u8]) -> Result<()> { let sector_count = buffer.len() / SECTOR_SIZE; - if (0..sector_count).all(|i| !self.written_sectors.get_bit(i)) { + if (0..sector_count).all(|i| !self.written_sectors.get(i)) { return disk.read_sectors(start, buffer); } @@ -45,7 +46,7 @@ impl MemoryOverlay { let sector = start + n as u64; let offset = n * SECTOR_SIZE; let sector_buffer = &mut buffer[offset..offset+SECTOR_SIZE]; - if self.written_sectors.get_bit(sector as usize) { + if self.written_sectors.get(sector as usize) { self.read_single_sector(sector, sector_buffer)?; } else { disk.read_sectors(sector, sector_buffer)?; diff --git a/src/disk/mod.rs b/src/disk/mod.rs index 6191c39..2bb07c5 100644 --- a/src/disk/mod.rs +++ b/src/disk/mod.rs @@ -23,6 +23,7 @@ pub enum OpenType { } pub trait DiskImage: Sync+Send { + fn open(&mut self) -> Result<()>; fn read_only(&self) -> bool; fn sector_count(&self) -> u64; fn disk_file(&mut self) -> Result<&mut File>; @@ -61,6 +62,7 @@ pub type Result = result::Result; #[derive(Debug)] pub enum Error { ReadOnly, + ImageDoesntExit(PathBuf), DiskOpen(PathBuf,io::Error), DiskOpenTooShort(PathBuf), DiskRead(io::Error), @@ -78,6 +80,7 @@ impl fmt::Display for Error { use Error::*; match self { ReadOnly => write!(f, "attempted write to read-only device"), + ImageDoesntExit(path) => write!(f, "disk image {} does not exist", path.display()), DiskOpen(path, err) => write!(f, "failed to open disk image {}: {}", path.display(), err), DiskOpenTooShort(path) => write!(f, "failed to open disk image {} because file is too short", path.display()), DiskRead(err) => write!(f, "error reading from disk image: {}", err), diff --git a/src/disk/raw.rs b/src/disk/raw.rs index dff956f..415e176 100644 --- a/src/disk/raw.rs +++ b/src/disk/raw.rs @@ -3,7 +3,7 @@ use std::fs::{File, OpenOptions}; use std::io::{Write, Read, SeekFrom, Seek}; use crate::disk::Error::DiskRead; use crate::disk::memory::MemoryOverlay; -use std::path::PathBuf; +use std::path::{PathBuf, Path}; pub struct RawDiskImage { @@ -17,24 +17,37 @@ pub struct RawDiskImage { } impl RawDiskImage { - pub fn new>(path: P, open_type: OpenType) -> Self { + fn get_nsectors(path: &Path, offset: usize) -> Result { + if let Ok(meta) = path.metadata() { + Ok((meta.len() - offset as u64) / SECTOR_SIZE as u64) + } else { + Err(Error::ImageDoesntExit(path.to_path_buf())) + } + } + + #[allow(dead_code)] + pub fn new>(path: P, open_type: OpenType) -> Result { Self::new_with_offset(path, open_type, 0) } - pub fn new_with_offset>(path: P, open_type: OpenType, offset: usize) -> Self { + pub fn new_with_offset>(path: P, open_type: OpenType, offset: usize) -> Result { let path = path.into(); - RawDiskImage { + let nsectors = Self::get_nsectors(&path, offset)?; + Ok(RawDiskImage { path, open_type, file: None, offset, - nsectors: 0, + nsectors, disk_image_id: Vec::new(), overlay: None, - } + }) } - pub fn open(&mut self) -> Result<()> { +} + +impl DiskImage for RawDiskImage { + fn open(&mut self) -> Result<()> { let meta = self.path.metadata() .map_err(|e| Error::DiskOpen(self.path.clone(), e))?; @@ -42,8 +55,6 @@ impl RawDiskImage { return Err(Error::DiskOpenTooShort(self.path.clone())) } - self.nsectors = (meta.len() - self.offset as u64) / SECTOR_SIZE as u64; - let file = OpenOptions::new() .read(true) .write(self.open_type == OpenType::ReadWrite) @@ -59,9 +70,7 @@ impl RawDiskImage { } Ok(()) } -} -impl DiskImage for RawDiskImage { fn read_only(&self) -> bool { self.open_type == OpenType::ReadOnly } diff --git a/src/disk/realmfs.rs b/src/disk/realmfs.rs index 896c35d..75debd1 100644 --- a/src/disk/realmfs.rs +++ b/src/disk/realmfs.rs @@ -11,17 +11,18 @@ pub struct RealmFSImage { // Just pass everything through to raw image for now impl RealmFSImage { - pub fn new>(path: P, open_type: OpenType) -> Self { + pub fn new>(path: P, open_type: OpenType) -> Result { + assert_ne!(open_type, OpenType::ReadWrite); let offset = HEADER_SECTOR_COUNT * SECTOR_SIZE; - let raw = RawDiskImage::new_with_offset(path, open_type, offset); - RealmFSImage { raw } - } - pub fn open(&mut self) -> Result<()> { - self.raw.open() + let raw = RawDiskImage::new_with_offset(path, open_type, offset)?; + Ok(RealmFSImage { raw }) } } impl DiskImage for RealmFSImage { + fn open(&mut self) -> Result<()> { + self.raw.open() + } fn read_only(&self) -> bool { self.raw.read_only() } diff --git a/src/kvm/error.rs b/src/kvm/error.rs new file mode 100644 index 0000000..9fb2fb8 --- /dev/null +++ b/src/kvm/error.rs @@ -0,0 +1,36 @@ +use std::{fmt, result}; + +use crate::system::Error as SysError; +use crate::system::ErrnoError; +pub type Result = result::Result; + +#[derive(Debug)] +pub enum Error { + OpenKvm(ErrnoError), + MissingRequiredExtension(u32), + BadVersion, + IoctlError(&'static str, ErrnoError), + IoEventCreate(SysError), +} + +impl Error { + pub fn is_interrupted(&self) -> bool { + match self { + Error::IoctlError(_, e) => e.is_interrupted(), + _ => false, + } + } +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use Error::*; + match self { + OpenKvm(e) => write!(f, "could not open /dev/kvm: {}", e), + MissingRequiredExtension(ext) => write!(f, "kernel does not support a required kvm extension: {}", ext), + BadVersion => write!(f, "unexpected kvm api version"), + IoctlError(name, err) => write!(f, "failed to call {} ioctl: {}", name, err), + IoEventCreate(e) => write!(f, "failed to create ioeventfd: {}", e), + } + } +} diff --git a/src/kvm/ioctl.rs b/src/kvm/ioctl.rs index 832bfde..2cbfe36 100644 --- a/src/kvm/ioctl.rs +++ b/src/kvm/ioctl.rs @@ -1,23 +1,20 @@ use libc::{self, c_char, c_ulong}; use std::os::unix::io::RawFd; use std::ffi::CString; -use std::fmt; use crate::system::ioctl::{ioctl_with_val,ioctl_with_ref,ioctl_with_mut_ref}; -use crate::vm::{Result,Error,ErrorKind}; -use crate::system; - +use crate::kvm::{Result, Error}; +use crate::system::ErrnoError; +use crate::vm::arch::KvmRegs; +use std::result; const KVMIO: u64 = 0xAE; const KVM_GET_API_VERSION: c_ulong = io! (KVMIO, 0x00); const KVM_CREATE_VM: c_ulong = io! (KVMIO, 0x01); const KVM_CHECK_EXTENSION: c_ulong = io! (KVMIO, 0x03); -const KVM_GET_SUPPORTED_CPUID: c_ulong = iorw! (KVMIO, 0x05, 8); -const KVM_SET_TSS_ADDR: c_ulong = io! (KVMIO, 0x47); const KVM_CREATE_IRQCHIP: c_ulong = io! (KVMIO, 0x60); -const KVM_CREATE_PIT2: c_ulong = iow! (KVMIO, 0x77, 64); const KVM_GET_VCPU_MMAP_SIZE: c_ulong = io! (KVMIO, 0x04); const KVM_CREATE_VCPU: c_ulong = io! (KVMIO, 0x41); const KVM_SET_USER_MEMORY_REGION: c_ulong = iow! (KVMIO, 0x46, 32); @@ -27,14 +24,6 @@ const KVM_IOEVENTFD: c_ulong = iow! (KVMIO, 0x79, 64); const KVM_RUN: c_ulong = io! (KVMIO, 0x80); const KVM_GET_REGS: c_ulong = ior! (KVMIO, 0x81, 144); const KVM_SET_REGS: c_ulong = iow! (KVMIO, 0x82, 144); -const KVM_GET_SREGS: c_ulong = ior! (KVMIO, 0x83, 312); -const KVM_SET_SREGS: c_ulong = iow! (KVMIO, 0x84, 312); -const KVM_SET_MSRS: c_ulong = iow! (KVMIO, 0x89, 8); -const KVM_SET_FPU: c_ulong = iow! (KVMIO, 0x8d, 416); -const KVM_GET_LAPIC: c_ulong = ior! (KVMIO, 0x8e, 1024); -const KVM_SET_LAPIC: c_ulong = iow! (KVMIO, 0x8f, 1024); -const KVM_SET_CPUID2: c_ulong = iow! (KVMIO, 0x90, 8); - struct InnerFd(RawFd); impl InnerFd { @@ -53,20 +42,18 @@ fn raw_open_kvm() -> Result { let path = CString::new("/dev/kvm").unwrap(); let fd = unsafe { libc::open(path.as_ptr() as *const c_char, libc::O_RDWR) }; if fd < 0 { - return Err(Error::from_last_errno()); + return Err(Error::OpenKvm(ErrnoError::last_os_error())); } Ok(fd) } impl SysFd { pub fn open() -> Result { - match raw_open_kvm() { - Ok(fd) => Ok(SysFd(InnerFd(fd))), - Err(e) => Err(Error::new(ErrorKind::OpenDeviceFailed, e)), - } + let fd = raw_open_kvm()?; + Ok(SysFd(InnerFd(fd))) } - fn raw(&self) -> RawFd { self.0.raw() } + pub fn raw(&self) -> RawFd { self.0.raw() } } pub struct VmFd(InnerFd); @@ -75,7 +62,7 @@ impl VmFd { fn new(fd: RawFd) -> VmFd { VmFd( InnerFd(fd) ) } - fn raw(&self) -> RawFd { self.0.raw() } + pub fn raw(&self) -> RawFd { self.0.raw() } } pub struct VcpuFd(InnerFd); @@ -91,21 +78,21 @@ impl VcpuFd { pub fn kvm_check_extension(sysfd: &SysFd, extension: u32) -> Result { unsafe { ioctl_with_val(sysfd.raw(), KVM_CHECK_EXTENSION, extension as c_ulong) - .map_err(|e| ioctl_err("KVM_CHECK_EXTENSION", e)) + .map_err(|e| Error::IoctlError("KVM_CHECK_EXTENSION", e)) } } pub fn kvm_get_api_version(sysfd: &SysFd) -> Result { unsafe { ioctl_with_val(sysfd.raw(), KVM_GET_API_VERSION, 0) - .map_err(|e| ioctl_err("KVM_GET_API_VERSION", e)) + .map_err(|e| Error::IoctlError("KVM_GET_API_VERSION", e)) } } pub fn kvm_create_vm(sysfd: &SysFd) -> Result { let fd = unsafe { ioctl_with_val(sysfd.raw(), KVM_CREATE_VM, 0) - .map_err(|e| ioctl_err("KVM_CREATE_VM", e))? + .map_err(|e| Error::IoctlError("KVM_CREATE_VM", e))? }; Ok(VmFd::new(fd as RawFd)) } @@ -113,73 +100,7 @@ pub fn kvm_create_vm(sysfd: &SysFd) -> Result { pub fn kvm_get_vcpu_mmap_size(sysfd: &SysFd) -> Result { unsafe { ioctl_with_val(sysfd.raw(), KVM_GET_VCPU_MMAP_SIZE, 0) - .map_err(|e| ioctl_err("KVM_GET_VCPU_MMAP_SIZE", e)) - } -} - -#[derive(Copy, Clone, Default)] -#[repr(C)] -pub struct KvmCpuIdEntry { - pub function: u32, - pub index: u32, - pub flags: u32, - pub eax: u32, - pub ebx: u32, - pub ecx: u32, - pub edx: u32, - padding: [u32; 3] -} - -const KVM_CPUID_MAX_ENTRIES:usize = 256; - -#[repr(C)] -pub struct KvmCpuId2 { - nent: u32, - padding: u32, - entries: [KvmCpuIdEntry; KVM_CPUID_MAX_ENTRIES] -} - -impl KvmCpuId2 { - pub fn new() -> KvmCpuId2 { - KvmCpuId2 { - nent: KVM_CPUID_MAX_ENTRIES as u32, - padding: 0, - entries: [Default::default(); KVM_CPUID_MAX_ENTRIES], - } - } - - pub fn new_from_entries(entries: Vec) -> KvmCpuId2 { - let mut cpuid = KvmCpuId2::new(); - let sz = entries.len(); - assert!(sz <= KVM_CPUID_MAX_ENTRIES, "Too many cpuid entries"); - for i in 0..sz { - cpuid.entries[i] = entries[i]; - } - cpuid.nent = sz as u32; - cpuid - } - - pub fn get_entries(&self) -> Vec { - let mut entries = Vec::new(); - let sz = self.nent as usize; - for i in 0..sz { - entries.push(self.entries[i]); - } - entries - } -} - -pub fn kvm_get_supported_cpuid(sysfd: &SysFd, cpuid: &mut KvmCpuId2) -> Result { - unsafe { - ioctl_with_mut_ref(sysfd.raw(), KVM_GET_SUPPORTED_CPUID, cpuid) - .map_err(|e| ioctl_err("KVM_GET_SUPPORTED_CPUID", e)) - } -} - -pub fn kvm_set_cpuid2(cpufd: &VcpuFd, cpuid: &KvmCpuId2) -> Result { - unsafe { - ioctl_with_ref(cpufd.raw(), KVM_SET_CPUID2, cpuid) - .map_err(|e| ioctl_err("KVM_SET_CPUID2", e)) + .map_err(|e| Error::IoctlError("KVM_GET_VCPU_MMAP_SIZE", e)) } } @@ -204,50 +125,18 @@ impl KvmUserspaceMemoryRegion { } } -pub fn kvm_set_user_memory_region(vmfd: &VmFd, region: &KvmUserspaceMemoryRegion) -> Result { - unsafe { - ioctl_with_ref(vmfd.raw(), KVM_SET_USER_MEMORY_REGION, region) - .map_err(|e| ioctl_err("KVM_SET_USER_MEMORY_REGION", e)) - } +pub fn kvm_set_user_memory_region(vmfd: &VmFd, region: &KvmUserspaceMemoryRegion) -> Result<()> { + call_ioctl_with_ref("KVM_SET_USER_MEMORY_REGION",vmfd.raw(), KVM_SET_USER_MEMORY_REGION, region) } -#[repr(C)] -pub struct KvmPitConfig { - flags: u32, - padding: [u32; 15], -} - -impl KvmPitConfig { - pub fn new(flags: u32) -> KvmPitConfig { - KvmPitConfig { flags, padding: [0; 15] } - } -} - -pub fn kvm_create_pit2(vmfd: &VmFd, config: &KvmPitConfig) -> Result { - unsafe { - ioctl_with_ref(vmfd.raw(), KVM_CREATE_PIT2, config) - .map_err(|e| ioctl_err("KVM_CREATE_PIT2", e)) - } -} - -pub fn kvm_create_irqchip(vmfd: &VmFd) -> Result { - unsafe { - ioctl_with_val(vmfd.raw(), KVM_CREATE_IRQCHIP, 0) - .map_err(|e| ioctl_err("KVM_CREATE_IRQCHIP", e)) - } -} - -pub fn kvm_set_tss_addr(vmfd: &VmFd, addr: u32) -> Result { - unsafe { - ioctl_with_val(vmfd.raw(), KVM_SET_TSS_ADDR, addr as c_ulong) - .map_err(|e| ioctl_err("KVM_SET_TSS_ADDR", e)) - } +pub fn kvm_create_irqchip(vmfd: &VmFd) -> Result<()> { + call_ioctl_with_val("KVM_CREATE_IRQCHIP", vmfd.raw(), KVM_CREATE_IRQCHIP, 0) } pub fn kvm_create_vcpu(vmfd: &VmFd, cpu_id: u32) -> Result { let fd = unsafe { ioctl_with_val(vmfd.raw(), KVM_CREATE_VCPU, cpu_id as c_ulong) - .map_err(|e| ioctl_err("KVM_CREATE_VCPU", e))? + .map_err(|e| Error::IoctlError("KVM_CREATE_VCPU", e))? }; Ok(VcpuFd::new(fd as RawFd)) } @@ -264,11 +153,8 @@ impl KvmIrqLevel { } } -pub fn kvm_irq_line(vmfd: &VmFd, level: &KvmIrqLevel) -> Result { - unsafe { - ioctl_with_ref(vmfd.raw(), KVM_IRQ_LINE, level) - .map_err(|e| ioctl_err("KVM_IRQ_LINE", e)) - } +pub fn kvm_irq_line(vmfd: &VmFd, level: &KvmIrqLevel) -> Result<()> { + call_ioctl_with_ref("KVM_IRQ_LINE", vmfd.raw(), KVM_IRQ_LINE, level) } #[repr(C)] @@ -287,11 +173,8 @@ impl KvmIrqFd { } } -pub fn kvm_irqfd(vmfd: &VmFd, irqfd: &KvmIrqFd) -> Result { - unsafe { - ioctl_with_ref(vmfd.raw(), KVM_IRQFD, irqfd) - .map_err(|e| ioctl_err("KVM_IRQFD", e)) - } +pub fn kvm_irqfd(vmfd: &VmFd, irqfd: &KvmIrqFd) -> Result<()> { + call_ioctl_with_ref("KVM_IRQFD", vmfd.raw(), KVM_IRQFD, irqfd) } pub const IOEVENTFD_FLAG_DATAMATCH: u32 = 1; @@ -329,285 +212,45 @@ impl KvmIoEventFd { } } -pub fn kvm_ioeventfd(vmfd: &VmFd, ioeventfd: &KvmIoEventFd) -> Result { +pub fn kvm_ioeventfd(vmfd: &VmFd, ioeventfd: &KvmIoEventFd) -> Result<()> { + call_ioctl_with_ref("KVM_IOEVENTFD", vmfd.raw(), KVM_IOEVENTFD, ioeventfd) +} + +pub fn kvm_get_regs(cpufd: &VcpuFd, regs: &mut KvmRegs) -> Result<()> { + call_ioctl_with_mut_ref("KVM_GET_REGS", cpufd.raw(), KVM_GET_REGS, regs) +} + +pub fn kvm_set_regs(cpufd: &VcpuFd, regs: &KvmRegs) -> Result<()> { + call_ioctl_with_ref("KVM_SET_REGS", cpufd.raw(), KVM_SET_REGS, regs) +} + +pub fn kvm_run(cpufd: &VcpuFd) -> Result<()> { + call_ioctl_with_val("KVM_RUN", cpufd.raw(), KVM_RUN, 0) +} + +fn call_ioctl(name: &'static str, result: result::Result) -> Result<()> { + result.map_err(|e| Error::IoctlError(name, e))?; + Ok(()) +} + +fn call_ioctl_with_ref(name: &'static str, fd: RawFd, request: c_ulong, arg: &T) -> Result<()> { unsafe { - ioctl_with_ref(vmfd.raw(), KVM_IOEVENTFD, ioeventfd) - .map_err(|e| ioctl_err("KVM_IOEVENTFD", e)) + ioctl_with_ref(fd, request, arg) + .map_err(|e| Error::IoctlError(name, e))?; + Ok(()) } } - -#[repr(C)] -pub struct KvmLapicState { - pub regs: [u8; 1024] -} - -impl KvmLapicState { - pub fn new() -> KvmLapicState { - KvmLapicState { regs: [0; 1024] } - } -} - -pub fn kvm_get_lapic(cpufd: &VcpuFd, lapic_state: &mut KvmLapicState) -> Result { +fn call_ioctl_with_mut_ref(name: &'static str, fd: RawFd, request: c_ulong, arg: &mut T) -> Result<()> { unsafe { - ioctl_with_mut_ref(cpufd.raw(), KVM_GET_LAPIC, lapic_state) - .map_err(|e| ioctl_err("KVM_GET_LAPIC", e)) + ioctl_with_mut_ref(fd, request, arg) + .map_err(|e| Error::IoctlError(name, e))?; + Ok(()) } } -pub fn kvm_set_lapic(cpufd: &VcpuFd, lapic_state: &KvmLapicState) -> Result { +fn call_ioctl_with_val(name: &'static str, fd: RawFd, request: c_ulong, val: c_ulong) -> Result<()> { unsafe { - ioctl_with_ref(cpufd.raw(), KVM_SET_LAPIC, lapic_state) - .map_err(|e| ioctl_err("KVM_SET_LAPIC", e)) + call_ioctl(name, ioctl_with_val(fd, request, val)) } } - -#[derive(Copy, Clone, Default)] -#[repr(C)] -pub struct KvmSegment { - base: u64, - limit: u32, - selector: u16, - stype: u8, - present: u8, - dpl: u8, - db: u8, - s: u8, - l: u8, - g: u8, - avl: u8, - unusable: u8, - padding: u8, -} - -impl KvmSegment { - pub fn new(base: u64, limit: u32, selector: u16, flags: u16) -> KvmSegment { - let mut seg = KvmSegment{ ..Default::default() }; - seg.setup(base, limit, selector, flags); - seg - } - - pub fn setup(&mut self, base: u64, limit: u32, selector: u16, flags: u16) { - self.base = base; - self.limit = limit; - self.selector = selector; - self.stype = (flags & 0xF) as u8; - self.present = ((flags >> 7) & 0x1) as u8; - self.dpl = ((flags >> 5) & 0x3) as u8; - self.db = ((flags >> 14) & 0x1) as u8; - self.s = ((flags >> 4) & 0x1) as u8; - self.l = ((flags >> 13) & 0x1) as u8; - self.g = ((flags >> 15) & 0x1) as u8; - self.avl = ((flags >> 12) & 0x1) as u8; - self.unusable = if self.present == 1 { 0 } else { 1 } - } -} - -impl fmt::Debug for KvmSegment { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "(base: {:x} limit {:x} selector: {:x} type: {:x} p: {} dpl: {} db: {} s: {} l: {} g: {} avl: {} unuse: {})", - self.base, self.limit, self.selector, self.stype, self.present, self.dpl, self.db, self.s, self.l, self.g, self.avl, self.unusable) - } -} - -#[derive(Copy, Clone, Default)] -#[repr(C)] -pub struct KvmDtable { - pub base: u64, - pub limit: u16, - padding: [u16; 3], -} -impl fmt::Debug for KvmDtable { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "(base: {:x} limit {:x})", self.base, self.limit) - } -} - - - -#[derive(Copy, Clone, Default)] -#[repr(C)] -pub struct KvmSRegs { - pub cs: KvmSegment, - pub ds: KvmSegment, - pub es: KvmSegment, - pub fs: KvmSegment, - pub gs: KvmSegment, - pub ss: KvmSegment, - pub tr: KvmSegment, - pub ldt: KvmSegment, - pub gdt: KvmDtable, - pub itd: KvmDtable, - pub cr0: u64, - pub cr2: u64, - pub cr3: u64, - pub cr4: u64, - pub cr8: u64, - pub efer: u64, - pub apic_base: u64, - pub interrupt_bitmap: [u64; 4], -} -impl fmt::Debug for KvmSRegs { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "cs: {:?}\nds: {:?}\nes: {:?}\nfs: {:?}\n", self.cs, self.ds, self.es, self.fs)?; - write!(f, "gs: {:?}\nss: {:?}\ntr: {:?}\nldt: {:?}\n", self.gs, self.ss, self.tr, self.ldt)?; - write!(f, "gdt: {:?} itd: {:?}\n", self.gdt, self.itd)?; - write!(f, "cr0: {:x} cr2: {:x} cr3: {:x} cr4: {:x}\n", self.cr0, self.cr2, self.cr3, self.cr4)?; - write!(f, "efer: {:x} apic_base: {:x}\n", self.efer, self.apic_base) - } -} - -impl KvmSRegs { - pub fn new() -> KvmSRegs { - KvmSRegs { ..Default::default() } - } -} - -pub fn kvm_get_sregs(cpufd: &VcpuFd, sregs: &mut KvmSRegs) -> Result { - unsafe { - ioctl_with_mut_ref(cpufd.raw(), KVM_GET_SREGS, sregs) - .map_err(|e| ioctl_err("KVM_GET_SREGS", e)) - } -} - -pub fn kvm_set_sregs(cpufd: &VcpuFd, sregs: &KvmSRegs) -> Result { - unsafe { - ioctl_with_ref(cpufd.raw(), KVM_SET_SREGS, sregs) - .map_err(|e| ioctl_err("KVM_SET_SREGS", e)) - } -} - -#[derive(Copy, Clone, Default)] -#[repr(C)] -pub struct KvmRegs { - pub rax: u64, pub rbx: u64, pub rcx: u64, pub rdx: u64, - pub rsi: u64, pub rdi: u64, pub rsp: u64, pub rbp: u64, - pub r8: u64, pub r9: u64, pub r10: u64, pub r11: u64, - pub r12: u64, pub r13: u64, pub r14: u64, pub r15: u64, - pub rip: u64, pub rflags: u64, -} - -impl fmt::Debug for KvmRegs { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "rax 0x{:x} rbx 0x{:x} rcx 0x{:x} rdx 0x{:x}\n", self.rax, self.rbx, self.rcx, self.rdx)?; - write!(f, "rsi 0x{:x} rdi 0x{:x} rsp 0x{:x} rbp 0x{:x}\n", self.rsi, self.rdi, self.rsp, self.rbp)?; - write!(f, "r8 0x{:x} r9 0x{:x} r10 0x{:x} r11 0x{:x}\n", self.r8, self.r9, self.r10, self.r11)?; - write!(f, "r12 0x{:x} r13 0x{:x} r14 0x{:x} r15 0x{:x}\n", self.r12, self.r13, self.r14, self.r15)?; - write!(f, "rip 0x{:x} rflags 0x{:x}\n", self.rip, self.rflags) - } - -} - -impl KvmRegs { - pub fn new() -> KvmRegs { - KvmRegs { ..Default::default() } - } -} - - -pub fn kvm_get_regs(cpufd: &VcpuFd, regs: &mut KvmRegs) -> Result { - unsafe { - ioctl_with_mut_ref(cpufd.raw(), KVM_GET_REGS, regs) - .map_err(|e| ioctl_err("KVM_GET_REGS", e)) - } -} - -pub fn kvm_set_regs(cpufd: &VcpuFd, regs: &KvmRegs) -> Result { - unsafe { - ioctl_with_ref(cpufd.raw(), KVM_SET_REGS, regs) - .map_err(|e| ioctl_err("KVM_SET_REGS", e)) - } -} - -#[derive(Copy)] -#[repr(C)] -pub struct KvmFpu { - fpr: [u8; 128], - pub fcw: u16, - fsw: u16, - ftwx: u8, - pad1: u8, - last_opcode: u16, - last_ip: u64, - last_dp: u64, - xmm: [u8; 256], - pub mxcsr: u32, - pad2: u32, -} - -impl Clone for KvmFpu { - fn clone(&self) -> KvmFpu { *self } -} -impl KvmFpu { - pub fn new() -> KvmFpu { - KvmFpu { - fpr: [0; 128], - fcw: 0, - fsw: 0, - ftwx: 0, pad1: 0, - last_opcode: 0, - last_ip: 0, - last_dp: 0, - xmm: [0; 256], - mxcsr: 0, - pad2: 0 - } - } -} - -pub fn kvm_set_fpu(cpufd: &VcpuFd, fpu: &KvmFpu) -> Result { - unsafe { - ioctl_with_ref(cpufd.raw(), KVM_SET_FPU, fpu ) - .map_err(|e| ioctl_err("KVM_SET_FPU", e)) - } -} - -#[derive(Copy, Clone, Default)] -#[repr(C)] -struct KvmMsrEntry { - index: u32, - reserved: u32, - data: u64 -} - -#[repr(C)] -pub struct KvmMsrs { - nent: u32, - padding: u32, - entries: [KvmMsrEntry; 100] -} - -impl KvmMsrs { - pub fn new() -> KvmMsrs { - KvmMsrs{ nent: 0, padding: 0, entries: [Default::default(); 100]} - } - - pub fn add(&mut self, index: u32, data: u64) { - self.entries[self.nent as usize].index = index; - self.entries[self.nent as usize].data = data; - self.nent += 1; - } -} - -pub fn kvm_set_msrs(cpufd: &VcpuFd, msrs: &KvmMsrs) -> Result { - unsafe { - ioctl_with_ref(cpufd.raw(), KVM_SET_MSRS, msrs) - .map_err(|e| ioctl_err("KVM_SET_MSRS", e)) - } -} - -pub fn kvm_run(cpufd: &VcpuFd) -> Result { - unsafe { - ioctl_with_val(cpufd.raw(), KVM_RUN, 0) - .map_err(|e| ioctl_err("KVM_RUN", e)) - } -} - -pub fn ioctl_err(ioctl_name: &'static str, e: system::Error) -> Error { - if e.is_interrupted() { - Error::new(ErrorKind::Interrupted, e) - } else { - Error::new(ErrorKind::IoctlFailed(ioctl_name), e) - } -} - diff --git a/src/kvm/ioeventfd.rs b/src/kvm/ioeventfd.rs new file mode 100644 index 0000000..98f5624 --- /dev/null +++ b/src/kvm/ioeventfd.rs @@ -0,0 +1,43 @@ +use std::sync::Arc; +use std::os::unix::io::{AsRawFd,RawFd}; + +use crate::kvm::{Kvm,Result,Error}; +use crate::system::EventFd; +use crate::system; + +pub struct IoEventFd { + kvm: Kvm, + addr: u64, + evt: Arc +} + +impl IoEventFd { + pub fn new(kvm: &Kvm, address: u64) -> Result { + let evt = EventFd::new().map_err(Error::IoEventCreate)?; + kvm.ioeventfd_add(address, evt.as_raw_fd())?; + Ok(IoEventFd { + kvm: kvm.clone(), + addr: address, + evt: evt.into(), + }) + } + pub fn read(&self) -> system::Result { + self.evt.read() + } + + pub fn write(&self, v: u64) -> system::Result<()> { + self.evt.write(v) + } +} + +impl Drop for IoEventFd { + fn drop(&mut self) { + let _ = self.kvm.ioeventfd_del(self.addr, self.evt.as_raw_fd()); + } +} + +impl AsRawFd for IoEventFd { + fn as_raw_fd(&self) -> RawFd { + self.evt.as_raw_fd() + } +} diff --git a/src/kvm/mod.rs b/src/kvm/mod.rs index 62a1853..4848ef8 100644 --- a/src/kvm/mod.rs +++ b/src/kvm/mod.rs @@ -2,9 +2,13 @@ use std::os::unix::io::RawFd; use std::sync::Arc; mod ioctl; +mod ioeventfd; +mod error; -use crate::vm::{Result,Error,ErrorKind}; -pub use self::ioctl::{KvmCpuIdEntry,KvmLapicState, KvmSRegs, KvmRegs, KvmFpu, KvmMsrs, KvmSegment}; +pub use error::{Result,Error}; +pub use ioeventfd::IoEventFd; + +use crate::vm::arch::KvmRegs; pub const KVM_CAP_IRQCHIP: u32 = 0; pub const KVM_CAP_HLT: u32 = 1; @@ -20,21 +24,29 @@ pub const KVM_CAP_IOEVENTFD: u32 = 36; pub struct Kvm { sysfd: Arc, vmfd: Arc, - vcpus: Vec, } fn check_extensions(sysfd: &ioctl::SysFd, extensions: &[u32]) -> Result<()> { for e in extensions { - if ioctl::kvm_check_extension(&sysfd, *e)? == 0 { - return Err(Error::from(ErrorKind::MissingRequiredExtension(*e))); - } + check_extension(sysfd, *e)?; } Ok(()) } +fn check_extension(sysfd: &ioctl::SysFd, extension: u32) -> Result<()> { + let ret = ioctl::kvm_check_extension(&sysfd, extension)?; + if ret == 0 { + Err(Error::MissingRequiredExtension(extension)) + } else { + Ok(()) + } +} + fn check_version(sysfd: &ioctl::SysFd) -> Result<()> { - if ioctl::kvm_get_api_version(&sysfd)? != 12 { - return Err(Error::from(ErrorKind::BadVersion)); + let version= ioctl::kvm_get_api_version(&sysfd)?; + + if version != 12 { + return Err(Error::BadVersion); } Ok(()) } @@ -46,17 +58,16 @@ impl Kvm { check_version(&sysfd)?; check_extensions(&sysfd, &required_extensions)?; - let vmfd= ioctl::kvm_create_vm(&sysfd) - .map_err(|_| Error::from(ErrorKind::CreateVmFailed))?; + let vmfd= ioctl::kvm_create_vm(&sysfd)?; Ok(Kvm{ sysfd: Arc::new(sysfd), vmfd: Arc::new(vmfd), - vcpus: Vec::new(), }) } pub fn add_memory_region(&self, slot: u32, guest_address: u64, host_address: u64, size: usize) -> Result<()> { + let region = ioctl::KvmUserspaceMemoryRegion::new(slot, guest_address, host_address, size as u64); ioctl::kvm_set_user_memory_region(&self.vmfd, ®ion)?; Ok(()) @@ -68,22 +79,11 @@ impl Kvm { Ok(()) } - pub fn create_pit2(&self) -> Result<()> { - let pit_config = ioctl::KvmPitConfig::new(0); - ioctl::kvm_create_pit2(&self.vmfd, &pit_config)?; - Ok(()) - } - pub fn create_irqchip(&self) -> Result<()> { ioctl::kvm_create_irqchip(&self.vmfd)?; Ok(()) } - pub fn set_tss_addr(&self, addr: u32) -> Result<()> { - ioctl::kvm_set_tss_addr(&self.vmfd, addr)?; - Ok(()) - } - pub fn irq_line(&self, irq: u32, level: u32) -> Result<()> { let irq_level = ioctl::KvmIrqLevel::new(irq, level); ioctl::kvm_irq_line(&self.vmfd, &irq_level)?; @@ -92,40 +92,28 @@ impl Kvm { pub fn irqfd(&self, fd: u32, gsi: u32) -> Result<()> { let irqfd = ioctl::KvmIrqFd::new(fd, gsi); - ioctl::kvm_irqfd(&self.vmfd, &irqfd)?; - Ok(()) + ioctl::kvm_irqfd(&self.vmfd, &irqfd) } pub fn ioeventfd_add(&self, address: u64, fd: RawFd) -> Result<()> { // XXX check for zero length capability let ioeventfd = ioctl::KvmIoEventFd::new_with_addr_fd(address, fd); - ioctl::kvm_ioeventfd(&self.vmfd, &ioeventfd)?; - Ok(()) + ioctl::kvm_ioeventfd(&self.vmfd, &ioeventfd) } pub fn ioeventfd_del(&self, address: u64, fd: RawFd) -> Result<()> { let mut ioeventfd = ioctl::KvmIoEventFd::new_with_addr_fd(address, fd); ioeventfd.set_deassign(); - ioctl::kvm_ioeventfd(&self.vmfd, &ioeventfd)?; - Ok(()) + ioctl::kvm_ioeventfd(&self.vmfd, &ioeventfd) } - pub fn create_vcpus(&mut self, ncpus: usize) -> Result<()> { - for id in 0..ncpus { - let vcpu = self.new_vcpu(id)?; - vcpu.setup_lapic()?; - self.vcpus.push(vcpu); - } - Ok(()) - } - - fn new_vcpu(&self, id: usize) -> Result { + pub fn new_vcpu(&self, id: usize) -> Result { let cpufd = ioctl::kvm_create_vcpu(&self.vmfd, id as u32)?; Ok(KvmVcpu::new(id, Arc::new(cpufd), self.sysfd.clone())) } - pub fn get_vcpus(&self) -> Vec { - self.vcpus.clone() + pub fn vmfd(&self) -> RawFd { + self.vmfd.raw() } } @@ -134,14 +122,8 @@ pub struct KvmVcpu { id: usize, cpufd: Arc, sysfd: Arc, - } -const APIC_MODE_EXTINT: u8 = 0x7; -const APIC_MODE_NMI: u8 = 0x4; -const APIC_LVT_LINT0_OFFSET: usize = 0x350; -const APIC_LVT_LINT1_OFFSET: usize = 0x360; - impl KvmVcpu { fn new(id: usize, cpufd: Arc, sysfd: Arc) -> KvmVcpu { KvmVcpu { id, cpufd, sysfd } @@ -151,38 +133,8 @@ impl KvmVcpu { self.cpufd.raw() } - pub fn get_supported_cpuid(&self) -> Result> { - let mut cpuid = ioctl::KvmCpuId2::new(); - ioctl::kvm_get_supported_cpuid(&self.sysfd, &mut cpuid)?; - Ok(cpuid.get_entries()) - } - - pub fn set_cpuid2(&self, entries: Vec) -> Result<()> { - let cpuid = ioctl::KvmCpuId2::new_from_entries(entries); - ioctl::kvm_set_cpuid2(&self.cpufd, &cpuid)?; - Ok(()) - } - - pub fn get_lapic(&self) -> Result { - let mut lapic = KvmLapicState::new(); - ioctl::kvm_get_lapic(&self.cpufd, &mut lapic)?; - Ok(lapic) - } - - pub fn set_lapic(&self, lapic_state: &KvmLapicState) -> Result<()> { - ioctl::kvm_set_lapic(&self.cpufd, &lapic_state)?; - Ok(()) - } - - pub fn get_sregs(&self) -> Result { - let mut sregs = KvmSRegs::new(); - ioctl::kvm_get_sregs(&self.cpufd, &mut sregs)?; - Ok(sregs) - } - - pub fn set_sregs(&self, sregs: &KvmSRegs) -> Result<()> { - ioctl::kvm_set_sregs(&self.cpufd, &sregs)?; - Ok(()) + pub fn sys_raw_fd(&self) -> RawFd { + self.sysfd.raw() } pub fn get_regs(&self) -> Result { @@ -201,29 +153,8 @@ impl KvmVcpu { Ok(()) } - pub fn set_fpu(&self, fpu: &KvmFpu) -> Result<()> { - ioctl::kvm_set_fpu(&self.cpufd, &fpu)?; - Ok(()) - } - - pub fn set_msrs(&self, msrs: &KvmMsrs) -> Result<()> { - ioctl::kvm_set_msrs(&self.cpufd, &msrs)?; - Ok(()) - } - pub fn get_vcpu_mmap_size(&self) -> Result { Ok(ioctl::kvm_get_vcpu_mmap_size(&self.sysfd)? as usize) } - - pub fn setup_lapic(&self) -> Result<()> { - let mut lapic = self.get_lapic()?; - // delivery mode - lapic.regs[APIC_LVT_LINT0_OFFSET + 1] &= 0xF8; - lapic.regs[APIC_LVT_LINT0_OFFSET + 1] |= APIC_MODE_EXTINT; - lapic.regs[APIC_LVT_LINT1_OFFSET + 1] &= 0xF8; - lapic.regs[APIC_LVT_LINT1_OFFSET + 1] |= APIC_MODE_NMI; - self.set_lapic(&lapic)?; - Ok(()) - } } diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..bbbd230 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,15 @@ +#[macro_use] +extern crate lazy_static; +#[macro_use] +mod system; +#[macro_use] +pub mod util; +mod vm; +mod memory; +mod devices; +mod kvm; +mod virtio; +mod disk; + +pub use util::{Logger,LogLevel}; +pub use vm::VmConfig; diff --git a/src/main.rs b/src/main.rs deleted file mode 100644 index 0388e4e..0000000 --- a/src/main.rs +++ /dev/null @@ -1,21 +0,0 @@ -#![allow(non_snake_case)] - -#[macro_use] extern crate lazy_static; - -#[macro_use] mod log; -mod vm; -#[macro_use] -mod system; -mod memory; -mod devices; -mod kvm; -mod virtio; -mod disk; - -pub use log::{Logger,LogLevel}; - -fn main() { - vm::VmConfig::new() - .ram_size_megs(1024) - .boot(); -} diff --git a/src/memory/manager.rs b/src/memory/manager.rs index 42d6791..52637b6 100644 --- a/src/memory/manager.rs +++ b/src/memory/manager.rs @@ -4,9 +4,11 @@ use std::sync::{Arc, RwLock}; use crate::memory::{GuestRam, SystemAllocator, Mapping, Error, Result}; use crate::kvm::Kvm; -use crate::system::{BitVec, FileDesc}; +use crate::system::FileDesc; +use crate::util::BitSet; use crate::memory::drm::{DrmBufferAllocator, DrmDescriptor}; use std::io::SeekFrom; +use crate::memory::ram::MemoryRegion; #[derive(Clone)] pub struct MemoryManager { @@ -35,14 +37,16 @@ impl MemoryManager { &self.ram } - pub fn kvm_mut(&mut self) -> &mut Kvm { - &mut self.kvm - } - pub fn kvm(&self) -> &Kvm { &self.kvm } + pub fn set_ram_regions(&mut self, regions: Vec) { + let mut devmem = self.device_memory.write().unwrap(); + devmem.set_slots_occupied(0, regions.len()); + self.ram.set_regions(regions); + } + pub fn register_device_memory(&self, fd: RawFd, size: usize) -> Result<(u64, u32)> { let mut devmem = self.device_memory.write().unwrap(); devmem.register(self.kvm(), fd, size) @@ -82,19 +86,25 @@ impl MemoryRegistration { } struct DeviceMemory { - slots: BitVec, + slots: BitSet, mappings: HashMap, allocator: SystemAllocator, } impl DeviceMemory { fn new(ram_region_count: usize, allocator: SystemAllocator) -> DeviceMemory { - let mut slots = BitVec::new(); - for i in 0..ram_region_count { - slots.set_bit(i); - } - DeviceMemory { - slots, mappings: HashMap::new(), allocator + let mut devmem = DeviceMemory { + slots: BitSet::new(), + mappings: HashMap::new(), + allocator + }; + devmem.set_slots_occupied(0, ram_region_count); + devmem + } + + fn set_slots_occupied(&mut self, first: usize, count: usize) { + for i in first..first+count { + self.slots.insert(i) } } @@ -134,12 +144,16 @@ impl DeviceMemory { } fn allocate_slot(&mut self) -> u32 { - let slot = self.slots.first_unset(); - self.slots.set_bit(slot); - slot as u32 + for i in 0.. { + if !self.slots.get(i) { + self.slots.insert(i); + return i as u32; + } + } + unreachable!() } fn free_slot(&mut self, slot: u32) { - self.slots.clear_bit(slot as usize) + self.slots.remove(slot as usize) } } diff --git a/src/memory/mmap.rs b/src/memory/mmap.rs index 6223fc8..a7661a4 100644 --- a/src/memory/mmap.rs +++ b/src/memory/mmap.rs @@ -5,7 +5,7 @@ use std::mem; use std::io::Write; use std::os::unix::io::RawFd; -use crate::vm::{Result,Error,ErrorKind}; +use crate::system::{Result,Error}; pub struct Mapping { ptr: *mut u8, @@ -65,7 +65,7 @@ impl Mapping { /// fn check_offset(&self, offset: usize) -> Result<()> { if offset > self.size { - Err(Error::from(ErrorKind::InvalidMappingOffset(offset))) + Err(Error::InvalidOffset) } else { Ok(()) } @@ -105,7 +105,7 @@ impl Mapping { self.check_offset(offset + bytes.len())?; unsafe { let mut slice: &mut [u8] = &mut self.as_mut_slice()[offset..]; - slice.write_all(bytes).map_err(|_| Error::from(ErrorKind::InvalidMappingOffset(offset))) + slice.write_all(bytes).map_err(|_| Error::InvalidOffset) } } @@ -138,7 +138,7 @@ impl Mapping { pub fn set_mergeable(&self) -> Result<()> { unsafe { if libc::madvise(self.ptr as *mut libc::c_void, self.size, libc::MADV_MERGEABLE) == -1 { - return Err(Error::from_last_errno()); + return Err(Error::last_os_error()); } } Ok(()) @@ -166,7 +166,7 @@ unsafe fn mmap_allocate(size: usize, flags: libc::c_int, fd: libc::c_int) -> Res flags, fd, 0); if p.is_null() || p == libc::MAP_FAILED { - return Err(Error::from_last_errno()); + return Err(Error::last_os_error()); } Ok(p as *mut u8) } \ No newline at end of file diff --git a/src/memory/mod.rs b/src/memory/mod.rs index 2139ab3..fcf77ff 100644 --- a/src/memory/mod.rs +++ b/src/memory/mod.rs @@ -8,30 +8,24 @@ mod allocator; pub use self::allocator::SystemAllocator; pub use self::address::AddressRange; pub use self::mmap::Mapping; -pub use self::ram::GuestRam; -pub use self::ram::{PCI_MMIO_RESERVED_BASE,HIMEM_BASE}; +pub use self::ram::{GuestRam,MemoryRegion}; pub use manager::MemoryManager; pub use drm::{DrmDescriptor,DrmPlaneDescriptor}; -use crate::vm::Error as VmError; use std::{result, fmt, io}; -use crate::system; - -pub const KVM_KERNEL_LOAD_ADDRESS: u64 = 0x1000000; -pub const KERNEL_CMDLINE_ADDRESS: u64 = 0x20000; -pub const KERNEL_ZERO_PAGE: u64 = 0x7000; +use crate::{system, kvm}; #[derive(Debug)] pub enum Error { DeviceMemoryAllocFailed, - MappingFailed(VmError), - RegisterMemoryFailed(VmError), - UnregisterMemoryFailed(VmError), + MappingFailed(system::Error), + RegisterMemoryFailed(kvm::Error), + UnregisterMemoryFailed(kvm::Error), GbmCreateDevice(system::Error), GbmCreateBuffer(system::Error), OpenRenderNode(io::Error), - PrimeHandleToFD(system::Error), + PrimeHandleToFD(system::ErrnoError), CreateBuffer(io::Error), NoDrmAllocator, } diff --git a/src/memory/ram.rs b/src/memory/ram.rs index 84126a5..4a4750d 100644 --- a/src/memory/ram.rs +++ b/src/memory/ram.rs @@ -1,17 +1,10 @@ use std::sync::Arc; -use std::cmp; use std::mem; -use crate::memory::Mapping; +use crate::memory::{Mapping,AddressRange}; use crate::memory::mmap::Serializable; -use crate::memory::AddressRange; - -use crate::kvm::Kvm; -use crate::vm::{Result,Error,ErrorKind}; - -pub const HIMEM_BASE: u64 = (1 << 32); -pub const PCI_MMIO_RESERVED_SIZE: usize = (512 << 20); -pub const PCI_MMIO_RESERVED_BASE: u64 = HIMEM_BASE - PCI_MMIO_RESERVED_SIZE as u64; +use crate::system::{Result, Error}; +use crate::util::ByteBuffer; #[derive(Clone)] pub struct GuestRam { @@ -20,11 +13,11 @@ pub struct GuestRam { } impl GuestRam { - pub fn new(ram_size: usize, kvm: &Kvm) -> Result { - Ok(GuestRam { + pub fn new(ram_size: usize) -> GuestRam { + GuestRam { ram_size, - regions: Arc::new(create_regions(kvm, ram_size)?), - }) + regions: Vec::new().into(), + } } pub fn ram_size(&self) -> usize { @@ -50,6 +43,11 @@ impl GuestRam { region.slice(guest_address, size) } + pub fn mut_buffer(&self, guest_address: u64, size: usize) -> Result> { + let bytes = self.mut_slice(guest_address, size)?; + Ok(ByteBuffer::from_bytes_mut(bytes)) + } + pub fn mut_slice(&self, guest_address: u64, size: usize) -> Result<&mut[u8]> { let region = self.find_region(guest_address, size)?; region.mut_slice(guest_address, size) @@ -65,6 +63,10 @@ impl GuestRam { region.read_int(guest_address) } + pub fn set_regions(&mut self, regions: Vec) { + self.regions = regions.into(); + } + #[allow(dead_code)] pub fn end_addr(&self) -> u64 { self.regions.iter() @@ -78,54 +80,36 @@ impl GuestRam { fn find_region(&self, guest_address: u64, size: usize) -> Result<&MemoryRegion> { self.regions.iter() + .find(|r| r.contains(guest_address, size)) - .ok_or_else(|| Error::from(ErrorKind::InvalidAddress(guest_address))) + .ok_or(Error::InvalidAddress(guest_address)) } } -fn add_region(regions: &mut Vec, base: u64, size: usize, kvm: &Kvm) -> Result<()> { - let slot = regions.len() as u32; - let mr = MemoryRegion::new(base, size)?; - kvm.add_memory_region(slot, base, mr.mapping.address(), size) - .map_err(|e| Error::new(ErrorKind::RegisterMemoryFailed, e))?; - regions.push(mr); - Ok(()) -} - -fn create_regions(kvm: &Kvm, ram_size: usize) -> Result> { - let mut regions = Vec::new(); - - let lowmem_sz = cmp::min(ram_size, PCI_MMIO_RESERVED_BASE as usize); - add_region(&mut regions, 0, lowmem_sz, &kvm)?; - - if lowmem_sz < ram_size { - let himem_sz = ram_size - lowmem_sz; - add_region(&mut regions, HIMEM_BASE, himem_sz, &kvm)?; - } - - Ok(regions) -} - -struct MemoryRegion { +pub struct MemoryRegion { guest_range: AddressRange, mapping: Mapping, } impl MemoryRegion { - fn new(guest_base: u64, size: usize) -> Result { + pub fn new(guest_base: u64, size: usize) -> Result { Ok(MemoryRegion{ guest_range: AddressRange::new(guest_base, size), mapping: Mapping::new(size)?, }) } + pub fn base_address(&self) -> u64 { + self.mapping.address() + } + fn contains(&self, guest_addr: u64, size: usize) -> bool { self.guest_range.contains(guest_addr, size) } fn checked_offset(&self, guest_addr: u64, size: usize) -> Result { if self.contains(guest_addr, size) { Ok(self.guest_range.offset_of(guest_addr)) } else { - Err(Error::from(ErrorKind::InvalidAddress(guest_addr))) + Err(Error::InvalidAddress(guest_addr)) } } diff --git a/src/system/bitvec.rs b/src/system/bitvec.rs deleted file mode 100644 index 2ed08fe..0000000 --- a/src/system/bitvec.rs +++ /dev/null @@ -1,78 +0,0 @@ -use std::ops::Index; - -pub struct BitVec { - blocks: Vec, -} - -impl BitVec { - pub fn new() -> BitVec { - BitVec { blocks: Vec::new() } - } - - pub fn set_bit(&mut self, idx: usize) { - *self.mut_block(idx) |= Self::shifted_bit(idx) - } - - pub fn clear_bit(&mut self, idx: usize) { - if self.blocks.len() > Self::block_idx(idx) { - let bit = Self::shifted_bit(idx); - *self.mut_block(idx) &= !bit; - } - } - - pub fn get_bit(&self, n: usize) -> bool { - let off = n % 64; - let bit = 1 << off as u64; - self.get_block(n) & bit != 0 - } - - pub fn first_unset(&self) -> usize { - for (i,block) in self.blocks.iter().enumerate() { - if *block != u64::max_value() { - return (i * 64) + (0..64).find(|n| Self::shifted_bit(*n) & *block == 0).expect("..."); - } - } - self.blocks.len() * 64 - } - - fn shifted_bit(idx: usize) -> u64 { - let shift = (idx % 64) as u64; - (1 << shift) - } - - fn block_idx(idx: usize) -> usize { - idx / 64 - } - - fn get_block(&self, idx: usize) -> u64 { - let idx = Self::block_idx(idx); - if self.blocks.len() > idx { - self.blocks[idx] - } else { - 0 - } - } - - fn mut_block(&mut self, idx: usize) -> &mut u64 { - let idx = Self::block_idx(idx); - if self.blocks.len() <= idx { - self.blocks.resize_with(idx + 1, Default::default); - } - &mut self.blocks[idx] - } -} - -static TRUE: bool = true; -static FALSE: bool = false; - -impl Index for BitVec { - type Output = bool; - - fn index(&self, index: usize) -> &Self::Output { - if self.get_bit(index) { - &TRUE - } else { - &FALSE - } - } -} diff --git a/src/system/epoll.rs b/src/system/epoll.rs index 9a778c0..b496c86 100644 --- a/src/system/epoll.rs +++ b/src/system/epoll.rs @@ -79,7 +79,7 @@ impl EPoll { libc::epoll_wait(self.fd, events.events_ptr(), nevents, timeout) }; - if ret == -1 && Error::last_os_error() != Error::from_raw_os_error(EINTR) { + if ret == -1 && Error::last_errno() != EINTR { return Err(Error::last_os_error()); } else if ret as usize > events.len() { return Err(Error::from_raw_os_error(EINVAL)); diff --git a/src/system/errno.rs b/src/system/errno.rs index 809bb30..88e176b 100644 --- a/src/system/errno.rs +++ b/src/system/errno.rs @@ -13,8 +13,16 @@ impl Error { Error(e) } + pub fn errno(self) -> i32 { + self.0 + } + pub fn last_os_error() -> Error { - Error(unsafe { *__errno_location() }) + Error(Self::last_errno()) + } + + pub fn last_errno() -> i32 { + unsafe { *__errno_location() } } pub fn is_interrupted(&self) -> bool { @@ -42,10 +50,6 @@ impl From for io::Error { } } -pub fn errno_result() -> Result { - Err(Error::last_os_error()) -} - pub fn cvt(t: T) -> Result { if t.is_minus_one() { Err(Error::last_os_error()) diff --git a/src/system/eventfd.rs b/src/system/eventfd.rs new file mode 100644 index 0000000..0c5b3c6 --- /dev/null +++ b/src/system/eventfd.rs @@ -0,0 +1,54 @@ +use std::os::unix::io::{RawFd,AsRawFd}; + +use libc; + +use crate::system::{Result,Error}; + +pub struct EventFd(RawFd); + +const U64_SZ: usize = 8; + +impl EventFd { + pub fn new() -> Result { + let fd = unsafe { libc::eventfd(0, 0) }; + if fd < 0 { + return Err(Error::last_os_error()); + } + Ok(EventFd(fd)) + } + + pub fn write(&self, v: u64) -> Result<()> { + let ret = unsafe { libc::write(self.0, &v as *const _ as *const libc::c_void, U64_SZ) }; + if ret as usize != U64_SZ { + if ret < 0 { + return Err(Error::last_os_error()) + } + return Err(Error::EventFdWrite); + } + Ok(()) + } + + pub fn read(&self) -> Result { + let mut v = 0u64; + let ret = unsafe { libc::read(self.0, &mut v as *mut _ as *mut libc::c_void, U64_SZ) }; + if ret as usize != U64_SZ { + if ret < 0 { + return Err(Error::last_os_error()); + } + return Err(Error::EventFdRead); + } + Ok(v) + } +} + +impl Drop for EventFd { + fn drop(&mut self) { + let _ = unsafe { libc::close(self.0) }; + } +} + +impl AsRawFd for EventFd { + fn as_raw_fd(&self) -> RawFd { + self.0 + } +} diff --git a/src/system/ioctl.rs b/src/system/ioctl.rs index bfdffc3..b2080ec 100644 --- a/src/system/ioctl.rs +++ b/src/system/ioctl.rs @@ -1,6 +1,6 @@ use libc::{self, c_ulong, c_void}; use std::os::unix::io::RawFd; -use crate::system::{Error,Result}; +use crate::system::errno::{Result,Error}; pub const IOC_SIZEBITS: u64 = 14; pub const IOC_DIRBITS: u64 = 2; diff --git a/src/system/memfd.rs b/src/system/memfd.rs index 84ef7b4..b545c8d 100644 --- a/src/system/memfd.rs +++ b/src/system/memfd.rs @@ -2,7 +2,7 @@ use std::ffi::CString; use std::io::SeekFrom; use std::os::unix::io::{RawFd,AsRawFd}; -use crate::system::{Result, FileDesc, errno_result}; +use crate::system::{Error,Result, FileDesc}; use libc::{ self, c_char, c_uint, c_int, c_long,SYS_memfd_create, @@ -52,7 +52,7 @@ impl MemoryFd { let name = name.as_ptr() as *const c_char; let fd = unsafe { libc::syscall(SYS_memfd_create as c_long, name, flags) } as c_int; if fd < 0 { - errno_result() + Err(Error::last_os_error()) } else { Ok(FileDesc::new(fd)) } @@ -61,7 +61,7 @@ impl MemoryFd { fn add_seals(&self, flags: c_int) -> Result<()> { let ret = unsafe { libc::fcntl(self.fd.as_raw_fd(), libc::F_ADD_SEALS, flags) }; if ret < 0 { - errno_result() + Err(Error::last_os_error()) } else { Ok(()) } diff --git a/src/system/mod.rs b/src/system/mod.rs index 1d58a54..e7b9356 100644 --- a/src/system/mod.rs +++ b/src/system/mod.rs @@ -1,19 +1,100 @@ #[macro_use]pub mod ioctl; mod epoll; mod errno; -mod bitvec; +mod eventfd; mod socket; mod filedesc; mod memfd; mod tap; pub mod netlink; -pub use bitvec::BitVec; pub use filedesc::{FileDesc, FileFlags}; +pub use eventfd::EventFd; pub use memfd::MemoryFd; pub use epoll::{EPoll,Event}; -pub use errno::{Error,Result,errno_result}; pub use socket::ScmSocket; pub use netlink::NetlinkSocket; pub use tap::Tap; +use std::{fmt, result, io}; +pub use errno::Error as ErrnoError; + +pub type Result = result::Result; + +#[derive(Debug)] +pub enum Error { + Errno(errno::Error), + OpenKvmFailed(errno::Error), + InvalidOffset, + InvalidAddress(u64), + IoctlError(&'static str, errno::Error), + EventFdWrite, + EventFdRead, + +} + +impl Error { + pub fn last_os_error() -> Error { + Error::Errno(errno::Error::last_os_error()) + } + + pub fn last_errno() -> i32 { + errno::Error::last_errno() + } + + pub fn from_raw_os_error(e: i32) -> Error { + Error::Errno(errno::Error::from_raw_os_error(e)) + } + + pub fn inner_err(&self) -> Option<&errno::Error> { + match self { + Error::IoctlError(_,e) => Some(e), + Error::Errno(e) => Some(e), + Error::OpenKvmFailed(e) => Some(e), + _ => None, + } + } + + pub fn is_interrupted(&self) -> bool { + self.inner_err() + .map(|e| e.is_interrupted()) + .unwrap_or(false) + } +} + +impl std::error::Error for Error {} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use Error::*; + match self { + Errno(err) => err.fmt(f), + InvalidOffset => write!(f, "attempt to access invalid offset into mapping"), + InvalidAddress(addr) => write!(f, "attempt to access invalid address: {0:16x}", addr), + OpenKvmFailed(err) => write!(f, "failed to open /dev/kvm: {}", err), + IoctlError(name, err) => write!(f, "failed to call {} ioctl: {}", name, err), + EventFdWrite => write!(f, "failed writing to eventfd"), + EventFdRead => write!(f, "failed reading from eventfd"), + } + } +} +impl From for Error { + fn from(err: errno::Error) -> Error { + Error::Errno(err) + } +} + +impl From for Error { + fn from(e: io::Error) -> Self { + Error::from_raw_os_error(e.raw_os_error().unwrap_or_default()) + } +} + +impl From for io::Error { + fn from(e: Error) -> Self { + match e { + Error::Errno(e) => io::Error::from_raw_os_error(e.errno()), + e => io::Error::new(io::ErrorKind::Other, e), + } + } +} diff --git a/src/util/bitvec.rs b/src/util/bitvec.rs new file mode 100644 index 0000000..ab3c006 --- /dev/null +++ b/src/util/bitvec.rs @@ -0,0 +1,77 @@ +/// An efficiently stored array (or set) of bits. +/// +/// Bits can be set, cleared, or tested by index into the +/// array of bits. Since the methods are named to follow +/// the set collection convention you can also think of +/// it as a set which stores `usize` index values. +/// +pub struct BitSet { + blocks: Vec, +} + +impl BitSet { + + /// Create a new empty `BitSet` + pub fn new() -> BitSet { + BitSet { blocks: Vec::new() } + } + + /// Removes all entries from the set. + pub fn clear(&mut self) { + self.blocks.clear(); + } + + /// Inserts a bit into the set. Sets the entry at `idx` to `true`. + pub fn insert(&mut self, idx: usize) { + let (bit,block) = Self::bit_and_block(idx); + *self.block_mut(block) |= bit; + } + + /// Removes a bit from the set. Sets the entry at `idx` to `false`. + pub fn remove(&mut self, idx: usize) { + let (bit,block) = Self::bit_and_block(idx); + if self.blocks.len() > block { + *self.block_mut(block) &= !bit; + } + } + + /// Returns the value of the bit at `idx` + pub fn get(&self, idx: usize) -> bool { + let (bit,block) = Self::bit_and_block(idx); + if self.block(block) & bit != 0 { + return true; + } + false + } + + /// Convert a bit index `idx` into an index into + /// the block array and the corresponding bit value + /// inside of that block. + fn bit_and_block(idx: usize) -> (u64, usize) { + const SHIFT64: usize = 6; + const MASK64: usize = (1 << SHIFT64) - 1; + let bit = (1usize << (idx & MASK64)) as u64; + let block = idx >> SHIFT64; + (bit, block) + } + + /// Returns value stored at index `blk` or returns 0 if `blk` + /// is index larger than block array. + fn block(&self, blk: usize) -> u64 { + if self.blocks.len() > blk { + self.blocks[blk] + } else { + 0 + } + } + + /// Returns mutable reference to value stored at index `blk` + /// and will resize block vector if index is larger than block + /// array. + fn block_mut(&mut self, blk: usize) -> &mut u64 { + if self.blocks.len() <= blk { + self.blocks.resize_with(blk + 1, Default::default); + } + &mut self.blocks[blk] + } +} diff --git a/src/util/buffer.rs b/src/util/buffer.rs new file mode 100644 index 0000000..0506791 --- /dev/null +++ b/src/util/buffer.rs @@ -0,0 +1,386 @@ +/// Wraps a block of bytes and provides an interface for reading/writing integers and byte slices. +/// +/// The inner type `` be a `Vec[u8]` a slice `&[u8]` or a mutable slice `&mut [u8]`. +/// +/// Methods for reading data are provided for all inner object types, and for vectors and mutable slices +/// methods are also available for writing into the buffer. +/// +/// Reading from and writing to the buffer can either be at an absolute offset passed or +/// at the current offset. When using the current offset methods, the current offset will +/// be advanced by the size of the object read or written. +/// +/// The default endian ordering for integers read from or written to the buffer is the native +/// ordering of the system. Use `self.big_endian()` or `self.little_endian()` to set a specific +/// byte ordering. +pub struct ByteBuffer { + /// Byte-order of integers stored in this buffer + endian: Endian, + /// Current offset for reading or writing. + offset: usize, + /// The block of bytes wrapped by this buffer + inner: T, +} + +impl > ByteBuffer { + + /// Return a mutable slice of length `len` starting at `offset` into the buffer. + pub fn mut_at(&mut self, offset: usize, len: usize) -> &mut [u8] { + &mut self.inner.as_mut()[offset..offset+len] + } + + /// Write an integer or a `&[u8]` slice at the specified `offset` into the buffer. + /// + /// For integers, the type may be any of: u8, u16, u32, u64 + /// + pub fn write_at(&mut self, offset: usize, val: V) -> &mut Self { + let sz = val.size(); + let endian = self.endian; + val.write(self.mut_at(offset, sz), endian); + self + } + +} + +impl > ByteBuffer { + + /// Return a slice of length `len` starting at `offset` into the buffer. + /// + /// # Panics + /// + /// Panics if `offset + len` exceeds size of buffer. + /// + pub fn ref_at(&self, offset: usize, len: usize) -> &[u8] { + &self.inner.as_ref()[offset..offset+len] + } + + pub fn as_ref(&self) -> &[u8] { + &self.inner.as_ref() + } + + /// Read and return an integer value from the current offset and increment + /// the current offset by the byte size of the integer type. + /// + /// The integer type `V` may be any of: u8, u16, u32, u64 + /// + /// # Panics + /// + /// Panics if byte size of integer type added to current offset exceeds size + /// of buffer. + /// + /// # Examples + /// ``` + /// use ph::util::ByteBuffer; + /// + /// let bytes = &[0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF]; + /// let mut buffer = ByteBuffer::from_bytes(bytes).big_endian(); + /// + /// let n16 = buffer.read::(); + /// let n32: u32 = buffer.read(); + /// + /// assert_eq!(n16, 0xAABB); + /// assert_eq!(n32, 0xCCDDEEFF); + /// + /// ``` + pub fn read(&mut self) -> V { + let offset = self.offset; + self.offset += V::SIZE; + self.read_at(offset) + } + + /// Read and return an integer value from the specified `offset` into the buffer. + /// + /// The integer type `V` may be any of: u8, u16, u32, u64 + /// + /// # Panics + /// + /// Panics if byte size of integer type added to `offset` exceeds size + /// of buffer. + /// + /// # Examples + /// ``` + /// use ph::util::ByteBuffer; + /// + /// let bytes = &[0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF]; + /// let mut buffer = ByteBuffer::from_bytes(bytes).big_endian(); + /// + /// let n8 = buffer.read_at::(5); + /// let n16: u16 = buffer.read_at(2); + /// let n32: u32 = buffer.read_at(0); + /// + /// assert_eq!(n8, 0xFF); + /// assert_eq!(n16, 0xCCDD); + /// assert_eq!(n32, 0xAABBCCDD); + /// ``` + /// + pub fn read_at(&self, offset: usize) -> V { + let endian = self.endian; + V::read(self.ref_at(offset, V::SIZE), endian) + } + + /// Copy from the current offset into the slice `bytes` and increment the current + /// offset by the size of `bytes` + /// + /// # Panics + /// + /// Panics if `bytes.len()` added to current offset exceeds size of buffer. + /// + pub fn read_bytes(&mut self, bytes: &mut [u8]) { + let offset = self.offset; + self.offset += bytes.len(); + self.read_bytes_at(offset, bytes); + } + + /// Copy from the specified offset into the slice `bytes` + /// + /// # Panics + /// + /// Panics if `bytes.len() + offset` exceeds size of buffer. + /// + pub fn read_bytes_at(&self, offset: usize, bytes: &mut [u8]) { + bytes.copy_from_slice(self.ref_at(offset, bytes.len())); + } +} + +impl ByteBuffer { + fn new_with(inner: T) -> Self { + ByteBuffer { + endian: Endian::Native, + offset: 0, + inner, + } + } + + /// Set the current offset into the buffer to the value `offset` + /// + /// # Examples + /// + /// ``` + /// use ph::util::ByteBuffer; + /// + /// let mut buffer = ByteBuffer::from_bytes(&[0xAA, 0xBB, 0xCC, 0xDD]).big_endian(); + /// + /// buffer.set_offset(2); + /// let n: u8 = buffer.read(); + /// assert_eq!(n, 0xCC); + /// + /// buffer.set_offset(1); + /// let n: u16 = buffer.read(); + /// assert_eq!(n, 0xBBCC); + /// + /// ``` + pub fn set_offset(&mut self, offset: usize) { + self.offset = offset; + } + + /// Configure this `ByteBuffer` instance to write integers in big-endian byte order + /// + /// Caller must chain this to call to constructor because it consumes and returns + /// `self` argument. + /// + /// # Examples + /// + /// ``` + /// use ph::util::ByteBuffer; + /// + /// let mut buffer = ByteBuffer::from_bytes(&[0xAA, 0xBB, 0xCC, 0xDD]) + /// .big_endian(); + /// + /// let n: u32 = buffer.read(); + /// + /// assert_eq!(n, 0xAABBCCDD); + /// ``` + /// + pub fn big_endian(mut self) -> Self { + self.endian = Endian::Big; + self + } + + /// Configure this `ByteBuffer` instance to write integers in little-endian byte order + /// + /// Caller must chain this to call to constructor because it consumes and returns + /// `self` argument. + /// + /// # Examples + /// + /// ``` + /// use ph::util::ByteBuffer; + /// + /// let mut buffer = ByteBuffer::from_bytes(&[0xAA, 0xBB, 0xCC, 0xDD]) + /// .little_endian(); + /// + /// let n: u32 = buffer.read(); + /// assert_eq!(n, 0xDDCCBBAA); + /// + /// let n: u16 = buffer.read_at(2); + /// assert_eq!(n, 0xDDCC); + /// ``` + /// + pub fn little_endian(mut self) -> Self { + self.endian = Endian::Little; + self + } +} + +impl <'a> ByteBuffer<&'a [u8]> { + /// Create a new read-only `ByteBuffer` from the slice `bytes` + pub fn from_bytes(bytes: &'a [u8]) -> Self { + ByteBuffer::new_with(bytes) + } + + /// Return the byte length of the inner slice; + pub fn len(&self) -> usize { + self.inner.len() + } +} + +impl <'a> ByteBuffer<&'a mut [u8]> { + /// Create a new `ByteBuffer` from the mutable slice `bytes` + pub fn from_bytes_mut(bytes: &'a mut [u8]) -> Self { + ByteBuffer::new_with(bytes) + } + + /// Write an integer or a `&[u8]` slice at the current offset and increment + /// the current offset by the size of `val`. + /// + /// For integers, the type may be any of: u8, u16, u32, u64 + /// + pub fn write(&mut self, val: V) -> &mut Self { + let offset = self.offset; + self.offset += val.size(); + self.write_at(offset, val) + } + + /// Return the byte length of the inner slice; + pub fn len(&self) -> usize { + self.inner.len() + } +} + +impl ByteBuffer> { + /// Create a `size` length byte buffer and initialize the entire buffer with + /// `0u8` (zero bytes). + pub fn new(size: usize) -> Self { + Self::from_vec(vec![0u8; size]) + } + + /// Create an empty buffer (`self.len() == 0`) with an inner vector instance. + /// + /// Data can be appended to this buffer with `self.write()` + /// + pub fn new_empty() -> Self { + Self::from_vec(Vec::new()) + } + + /// Create a buffer from a `Vec` + pub fn from_vec(vec: Vec) -> Self { + Self::new_with(vec) + } + + /// Returns the byte length of the inner vector. + pub fn len(&self) -> usize { + self.inner.len() + } + + /// Write an integer or a `&[u8]` slice at the current offset and increment + /// the current offset by the size of `val`. + /// + /// For integers, the type may be any of: u8, u16, u32, u64 + /// + /// If the size of the integer type added to the current offset exceeds + /// the length of the vector, the vector will be resized. + /// + /// # Examples + /// ``` + /// use ph::util::ByteBuffer; + /// + /// let mut buf = ByteBuffer::new_empty().big_endian(); + /// + /// assert_eq!(buf.len(), 0); + /// + /// let n: u32 = 0xAABBCCDD; + /// + /// buf.write(n); + /// + /// assert_eq!(buf.as_ref(), &[0xAA, 0xBB, 0xCC, 0xDD]); + /// + /// buf.write(n); + /// + /// assert_eq!(buf.len(), 8); + /// + /// ``` + pub fn write(&mut self, val: V) -> &mut Self { + let offset = self.offset; + self.offset += val.size(); + if self.offset > self.inner.len() { + self.inner.resize(self.offset, 0); + } + self.write_at(offset, val) + } +} + +/// The byte-order configuration of a `ByteBuffer` +#[derive(Copy,Clone,Debug)] +pub enum Endian { + Big, + Little, + Native, +} + +/// An object type which can be read from a `ByteBuffer` with the +/// `self.read()` or `self.read_at()` methods. +pub trait Readable { + const SIZE: usize; + fn read(bytes: &[u8], endian: Endian) -> Self; +} + +/// An object type which can be written to a `ByteBuffer` with the +/// `self.write(val)` or `self.write_at(val)` methods. +pub trait Writeable { + fn size(&self) -> usize; + fn write(&self, bytes: &mut [u8], endian: Endian); +} + +impl Writeable for &[u8] { + fn size(&self) -> usize { + self.len() + } + fn write(&self, bytes: &mut [u8], _endian: Endian) { + bytes.copy_from_slice(self); + } +} + +macro_rules! storeable_int { + {$T:ty} => { + impl Writeable for $T { + fn size(&self) -> usize { + ::std::mem::size_of::<$T>() + } + fn write(&self, bytes: &mut [u8], endian: Endian) { + bytes.copy_from_slice(&match endian { + Endian::Big => self.to_be_bytes(), + Endian::Little => self.to_le_bytes(), + Endian::Native => self.to_ne_bytes(), + }); + } + } + + impl Readable for $T { + const SIZE: usize = ::std::mem::size_of::<$T>(); + + fn read(bytes: &[u8], endian: Endian) -> Self { + let mut buf = [0u8; Self::SIZE]; + buf.copy_from_slice(&bytes[..Self::SIZE]); + match endian { + Endian::Big => <$T>::from_be_bytes(buf), + Endian::Little=> <$T>::from_le_bytes(buf), + Endian::Native=> <$T>::from_ne_bytes(buf), + } + } + } + } +} + +storeable_int!(u8); +storeable_int!(u16); +storeable_int!(u32); +storeable_int!(u64); diff --git a/src/log.rs b/src/util/log.rs similarity index 100% rename from src/log.rs rename to src/util/log.rs diff --git a/src/util/mod.rs b/src/util/mod.rs new file mode 100644 index 0000000..9d1b630 --- /dev/null +++ b/src/util/mod.rs @@ -0,0 +1,8 @@ +mod bitvec; +mod buffer; +#[macro_use] +mod log; + +pub use bitvec::BitSet; +pub use buffer::ByteBuffer; +pub use log::{Logger,LogLevel}; diff --git a/src/virtio/bus.rs b/src/virtio/bus.rs index 1b8e36d..551932f 100644 --- a/src/virtio/bus.rs +++ b/src/virtio/bus.rs @@ -5,7 +5,7 @@ use crate::memory::{AddressRange, MemoryManager}; use super::{VirtioDevice,VirtioDeviceOps,PciIrq}; use super::consts::*; use super::pci::PciBus; -use crate::vm::Result; +use crate::virtio::Result; use std::iter; diff --git a/src/virtio/chain.rs b/src/virtio/chain.rs index 41395ca..2613075 100644 --- a/src/virtio/chain.rs +++ b/src/virtio/chain.rs @@ -1,291 +1,312 @@ - +use std::fmt; use std::io::{self,Read,Write}; use crate::memory::GuestRam; -use super::VirtQueue; -use super::vring::Descriptor; -use byteorder::{WriteBytesExt, LittleEndian, ReadBytesExt}; - -pub struct Chain { +use crate::virtio::VirtQueue; +use crate::virtio::vring::Descriptor; +struct DescriptorList { memory: GuestRam, - - vq: VirtQueue, - - /// Number of remaining descriptors allowed in this chain. - ttl: u16, - - /// Current descriptor or `None` if at end of chain - current: Option, - - /// Offset for read/write into current descriptor + descriptors: Vec, offset: usize, - - /// Saved head index to place in used ring. Set to `None` - /// after writing to used ring. - head_idx: Option, - - /// Number of bytes written into writeable descriptors - /// in this chain. Will be written into used ring later. - wlen: usize, + total_size: usize, + consumed_size: usize, } - -impl Chain { - pub fn new(memory: GuestRam, vq: VirtQueue, head: u16, ttl: u16) -> Chain { - let first = vq.load_descriptor(head); - Chain { +impl DescriptorList { + fn new(memory: GuestRam) -> Self { + DescriptorList { memory, - vq, ttl, head_idx: Some(head), - current: first, - offset: 0, wlen: 0, + descriptors: Vec::new(), + offset: 0, + total_size: 0, + consumed_size: 0, } } - /// Applies a function to the current descriptor (if `Some`) or - /// returns default parameter `d` (if `None`). - pub fn with_current_descriptor(&self, d: U, f: F) -> U - where F: FnOnce(&Descriptor) -> U { - match self.current { - Some(ref desc) => f(desc), - None => d, - } + fn add_descriptor(&mut self, d: Descriptor) { + self.total_size += d.len as usize; + self.descriptors.push(d) } - /// Load and return next descriptor from chain. - /// - /// If `self.current` - /// - /// 1) holds a descriptor (`self.current.is_some()`) - /// 2) that descriptor has a next field (`desc.has_next()`) - /// 3) time-to-live is not zero (`self.ttl > 0`) - /// - /// then load and return the descriptor pointed to by the current - /// descriptor. Returns `None` otherwise. - /// - fn next_desc(&self) -> Option { - self.with_current_descriptor(None, |desc| { - if desc.has_next() && self.ttl > 0 { - self.vq.load_descriptor(desc.next) + fn reverse(&mut self) { + self.descriptors.reverse(); + } + + fn clear(&mut self) { + self.descriptors.clear(); + self.offset = 0; + } + + fn is_empty(&self) -> bool { + self.descriptors.is_empty() + } + + fn current(&self) -> Option<&Descriptor> { + self.descriptors.last() + } + + fn current_address(&self, size: usize) -> Option { + self.current().and_then(|d| { + if d.remaining(self.offset) >= size { + Some(d.addr + self.offset as u64) } else { None } }) } - /// Load next descriptor in chain into `self.current`. - /// - /// Set `self.current` to the next descriptor in chain or `None` if - /// at end of chain. - /// - pub fn load_next_descriptor(&mut self) { - self.current = self.next_desc(); - // Only decrement ttl if a new descriptor was loaded - if self.current.is_some() { - self.ttl -= 1; + fn inc(&mut self, len: usize) { + let d = match self.current() { + Some(d) => d, + None => { + warn!("Virtqueue increment called with no current descriptor"); + return; + } + }; + let remaining = d.remaining(self.offset); + if len > remaining { + warn!("Virtqueue descriptor buffer increment exceeds current size"); } - self.offset = 0; - } - - /// - /// Return `true` if current descriptor exists and is readable, otherwise - /// `false`. - /// - pub fn is_current_readable(&self) -> bool { - self.with_current_descriptor(false, |desc| !desc.is_write()) - } - - /// - /// If `current` is a writeable descriptor, keep loading new descriptors until - /// a readable descriptor is found or end of chain is reached. After this - /// call `current` will either be a readable descriptor or `None` if the - /// end of chain was reached. - /// - pub fn skip_readable(&mut self) { - while self.is_current_readable() { - self.load_next_descriptor(); + if len >= remaining { + self.consumed_size += remaining; + self.offset = 0; + self.descriptors.pop(); + } else { + self.consumed_size += len; + self.offset += len; } } - /// Return `true` if the end of the descriptor chain has been reached. - /// - /// When at end of chain `self.current` is `None`. - pub fn is_end_of_chain(&self) -> bool { - self.current.is_none() + fn read(&mut self, buf: &mut [u8]) -> usize { + if let Some(d) = self.current() { + let n = d.read_from(&self.memory, self.offset, buf); + self.inc(n); + return n; + } + 0 } - /// - /// Length field of current descriptor is returned or 0 if - /// at end of chain. - /// - fn current_size(&self) -> usize { - self.with_current_descriptor(0, |desc| desc.len as usize) + fn write(&mut self, buf: &[u8]) -> usize { + if let Some(d) = self.current() { + let n = d.write_to(&self.memory, self.offset, buf); + self.inc(n); + return n; + } + 0 } - /// - /// Increment `self.offset` with the number of bytes - /// read or written from `current` descriptor and - /// load next descriptor if `current` descriptor - /// has been fully consumed. - /// - fn _inc_offset(&mut self, sz: usize) { - self.offset += sz; - if self.offset >= self.current_size() { - self.load_next_descriptor(); + fn write_from_reader(&mut self, reader: R, size: usize) -> io::Result + where R: Read+Sized + { + if let Some(d) = self.current() { + let n = d.write_from_reader(&self.memory, self.offset, reader, size)?; + self.inc(n); + Ok(n) + } else { + Ok(0) } } - pub fn inc_offset(&mut self, sz: usize, write: bool) { - if write { - assert!(!self.is_current_readable()); - self.wlen += sz; - } - self._inc_offset(sz) - } - - - /// - /// Read from the `current` readable descriptor and return - /// the number of bytes read. - /// - /// If this read exhausts the `current` descriptor then the - /// next descriptor in chain will be loaded into `current`. - /// - /// Assumes that current is a readable descriptor so caller must - /// call `self.is_current_readable()` before calling this. - /// - fn read_current(&mut self, bytes: &mut[u8]) -> usize { - assert!(self.is_current_readable()); - - let nread = self.with_current_descriptor(0, |desc| { - desc.read_from(&self.memory, self.offset, bytes) - }); - self._inc_offset(nread); - nread - } - - /// - /// Write into the `current` writeable descriptor if it exists - /// and return the number of bytes read or 0 if at end of chain. - /// - /// If this write exausts the `current` descriptor then the - /// next descriptor in chain will be loaded into `current` - /// - /// Assumes that `current` is a writeable descriptor or `None` - /// so caller must call `self.skip_readable()` before calling this. - /// - fn write_current(&mut self, bytes: &[u8]) -> usize { - assert!(!self.is_current_readable()); - let sz = self.with_current_descriptor(0, |desc| { - desc.write_to(&self.memory, self.offset, bytes) - }); - self._inc_offset(sz); - sz - } - - /// - /// Write this chain head index (`self.head_idx`) and bytes written (`self.wlen`) - /// into used ring. Consumes `self.head_idx` so that used ring cannot - /// accidentally be written more than once. Since we have returned this - /// chain to the guest, it is no longer valid to access any descriptors in - /// this chain so `self.current` is set to `None`. - /// - pub fn flush_chain(&mut self) { - match self.head_idx { - Some(idx) => self.vq.put_used(idx, self.wlen as u32), - None => (), - } - self.current = None; - self.head_idx = None; - } - - pub fn current_write_address(&mut self, size: usize) -> Option { - self.skip_readable(); - self.current_address(size) - } - - pub fn current_address(&mut self, size: usize) -> Option { - self.with_current_descriptor(None, |desc| { - if desc.len as usize - self.offset < size { - None - } else { - Some(desc.addr + self.offset as u64) - } - }) - } - - pub fn get_wlen(&self) -> usize { - self.wlen - } - - #[allow(dead_code)] - pub fn debug(&self) { - self.with_current_descriptor((), |desc| { - println!("offset: {} desc: {:?}", self.offset, desc); - }); - } - - pub fn copy_from_reader(&mut self, r: R, size: usize) -> io::Result { - self.skip_readable(); - assert!(!self.is_current_readable()); - - let res = self.with_current_descriptor(Ok(0usize), |desc| { - desc.write_from_reader(&self.memory, self.offset,r, size) - }); - if let Ok(nread) = res { - self._inc_offset(nread); - self.wlen += nread; - } - res - } - - pub fn current_write_slice(&self) -> &mut [u8] { - match self.current { - Some(d) if d.is_write() && d.remaining(self.offset) > 0 => { - let size = d.remaining(self.offset); - self.memory.mut_slice(d.addr + self.offset as u64, size).unwrap_or(&mut []) - }, - _ => &mut [], + fn current_slice(&self) -> &[u8] { + if let Some(d) = self.current() { + let size = d.remaining(self.offset); + let addr = d.addr + self.offset as u64; + self.memory.slice(addr, size).unwrap_or(&[]) + } else { + &[] } } - pub fn current_read_slice(&self) -> &[u8] { - match self.current { - Some(d) if !d.is_write() && d.remaining(self.offset) > 0 => { - let size = d.remaining(self.offset); - self.memory.slice(d.addr + self.offset as u64, size).unwrap_or(&[]) - }, - _ => &[], + + fn current_mut_slice(&self) -> &mut [u8] { + if let Some(d) = self.current() { + let size = d.remaining(self.offset); + let addr = d.addr + self.offset as u64; + self.memory.mut_slice(addr, size).unwrap_or(&mut []) + } else { + &mut [] } } + fn remaining(&self) -> usize { + self.total_size - self.consumed_size + } +} + +impl fmt::Debug for DescriptorList { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "DList[size={}, [", self.total_size)?; + for d in self.descriptors.iter().rev() { + write!(f, "(#{}, 0x{:08x}, [{}]),", d.idx, d.addr, d.len)?; + } + write!(f, "]") + } +} + +pub struct Chain { + head: Option, + vq: VirtQueue, + readable: DescriptorList, + writeable: DescriptorList, +} + +impl Chain { + pub fn new(memory: GuestRam, vq: VirtQueue, head: u16, ttl: u16) -> Self { + let (readable,writeable) = Self::load_descriptors(memory, &vq, head, ttl); + Chain { + head: Some(head), + vq, + readable, + writeable, + } + } + + fn load_descriptors(memory: GuestRam, vq: &VirtQueue, head: u16, ttl: u16) -> (DescriptorList, DescriptorList) { + let mut readable = DescriptorList::new(memory.clone()); + let mut writeable = DescriptorList::new(memory); + let mut idx = head; + let mut ttl = ttl; + + while let Some(d) = vq.load_descriptor(idx) { + if ttl == 0 { + warn!("Descriptor chain length exceeded ttl"); + break; + } else { + ttl -= 1; + } + + if d.is_write() { + writeable.add_descriptor(d); + } else { + if !writeable.is_empty() { + warn!("Guest sent readable virtqueue descriptor after writeable descriptor in violation of specification"); + } + readable.add_descriptor(d); + } + if !d.has_next() { + break; + } + idx = d.next; + } + readable.reverse(); + writeable.reverse(); + return (readable, writeable); + } + pub fn w8(&mut self, n: u8) -> io::Result<()> { - self.write_u8(n) + self.write_all(&[n])?; + Ok(()) } - - #[allow(unused)] pub fn w16(&mut self, n: u16) -> io::Result<()> { - self.write_u16::(n) + self.write_all(&n.to_le_bytes())?; + Ok(()) } - pub fn w32(&mut self, n: u32) -> io::Result<()> { - self.write_u32::(n) + self.write_all(&n.to_le_bytes())?; + Ok(()) } - pub fn w64(&mut self, n: u64) -> io::Result<()> { - self.write_u64::(n) + self.write_all(&n.to_le_bytes())?; + Ok(()) } - #[allow(unused)] pub fn r16(&mut self) -> io::Result { - self.read_u16::() + let mut buf = [0u8; 2]; + self.read_exact(&mut buf)?; + Ok(u16::from_le_bytes(buf)) } - pub fn r32(&mut self) -> io::Result { - self.read_u32::() + let mut buf = [0u8; 4]; + self.read_exact(&mut buf)?; + Ok(u32::from_le_bytes(buf)) } pub fn r64(&mut self) -> io::Result { - self.read_u64::() + let mut buf = [0u8; 8]; + self.read_exact(&mut buf)?; + Ok(u64::from_le_bytes(buf)) + } + + pub fn flush_chain(&mut self) { + if let Some(head) = self.head.take() { + self.readable.clear(); + self.writeable.clear(); + self.vq.put_used(head, self.writeable.consumed_size as u32); + } + } + + pub fn current_write_address(&mut self, size: usize) -> Option { + self.writeable.current_address(size) + } + + pub fn remaining_read(&self) -> usize { + self.readable.remaining() + } + + pub fn remaining_write(&self) -> usize { + self.writeable.remaining() + } + + pub fn get_wlen(&self) -> usize { + self.writeable.consumed_size + } + + pub fn is_end_of_chain(&self) -> bool { + self.readable.is_empty() && self.writeable.is_empty() + } + + pub fn current_read_slice(&self) -> &[u8] { + self.readable.current_slice() + } + + pub fn inc_read_offset(&mut self, sz: usize) { + self.readable.inc(sz); + } + + pub fn inc_write_offset(&mut self, sz: usize) { + if !self.readable.is_empty() { + self.readable.clear(); + } + self.writeable.inc(sz); + } + + pub fn current_write_slice(&mut self) -> &mut [u8] { + self.writeable.current_mut_slice() + } + + pub fn copy_from_reader(&mut self, r: R, size: usize) -> io::Result + where R: Read+Sized + { + self.writeable.write_from_reader(r, size) + } +} + +impl Read for Chain { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + let mut nread = 0usize; + while nread < buf.len() { + nread += match self.readable.read(&mut buf[nread..]) { + 0 => return Ok(nread), + n => n, + }; + } + Ok(nread) + } +} +impl Write for Chain { + fn write(&mut self, buf: &[u8]) -> io::Result { + let mut nwrote = 0; + while nwrote < buf.len() { + match self.writeable.write(&buf[nwrote..]) { + 0 => return Ok(nwrote), + n => nwrote += n, + }; + } + Ok(nwrote) + } + + fn flush(&mut self) -> io::Result<()> { + Ok(()) } } @@ -295,30 +316,8 @@ impl Drop for Chain { } } -impl Read for Chain { - // nb: does not fail, but can read short - fn read(&mut self, buf: &mut [u8]) -> io::Result { - let mut nread = 0usize; - while self.is_current_readable() && nread < buf.len() { - nread += self.read_current(&mut buf[nread..]); - } - Ok(nread) - } -} - -impl Write for Chain { - // nb: does not fail, but can write short - fn write(&mut self, buf: &[u8]) -> io::Result { - self.skip_readable(); - let mut nwrote = 0usize; - while !self.is_end_of_chain() && nwrote < buf.len() { - nwrote += self.write_current(&buf[nwrote..]); - } - self.wlen += nwrote; - Ok(nwrote) - } - - fn flush(&mut self) -> io::Result<()> { - Ok(()) +impl fmt::Debug for Chain { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Chain {{ R {:?} W {:?} }}", self.readable, self.writeable) } } diff --git a/src/virtio/config.rs b/src/virtio/config.rs index 8ff473b..05531fe 100644 --- a/src/virtio/config.rs +++ b/src/virtio/config.rs @@ -1,13 +1,12 @@ use crate::memory::GuestRam; use std::sync::Arc; -use crate::vm::Result; - use super::VirtQueue; -use super::eventfd::IoEventFd; use super::vring::Vring; use super::virtqueue::InterruptLine; use super::bus::VirtioDeviceConfig; +use crate::virtio::{Result, Error}; +use crate::kvm::IoEventFd; /// /// Manages a set of virtqueues during device intitialization. @@ -116,7 +115,8 @@ fn create_ioeventfds(conf: &VirtioDeviceConfig) -> Result>> { let notify_base = conf.notify_mmio().base(); for i in 0..conf.num_queues() { - let evt = IoEventFd::new(conf.kvm(), notify_base + (4 * i as u64))?; + let evt = IoEventFd::new(conf.kvm(), notify_base + (4 * i as u64)) + .map_err(Error::CreateIoEventFd)?; v.push(Arc::new(evt)); } Ok(v) diff --git a/src/virtio/device.rs b/src/virtio/device.rs index 787fbd8..b440fbb 100644 --- a/src/virtio/device.rs +++ b/src/virtio/device.rs @@ -7,7 +7,7 @@ use super::VirtQueue; use super::config::VirtQueueConfig; use super::consts::*; use crate::vm::io::MmioOps; -use crate::vm::Result; +use crate::virtio::Result; pub trait VirtioDeviceOps: Send+Sync { fn reset(&mut self) {} diff --git a/src/virtio/eventfd.rs b/src/virtio/eventfd.rs deleted file mode 100644 index 7be3e6c..0000000 --- a/src/virtio/eventfd.rs +++ /dev/null @@ -1,93 +0,0 @@ -use std::sync::Arc; -use std::os::unix::io::{RawFd,AsRawFd}; - -use libc; - -use crate::vm::{Result,Error,ErrorKind}; -use crate::kvm::Kvm; - -pub struct EventFd(RawFd); - -const U64_SZ: usize = 8; - -impl EventFd { - pub fn new() -> Result { - let fd = unsafe { libc::eventfd(0, 0) }; - if fd < 0 { - return Err(Error::from_last_errno()); - } - Ok(EventFd(fd)) - } - - pub fn write(&self, v: u64) -> Result<()> { - let ret = unsafe { libc::write(self.0, &v as *const _ as *const libc::c_void, U64_SZ) }; - if ret as usize != U64_SZ { - if ret < 0 { - return Err(Error::new(ErrorKind::EventFdError, Error::from_last_errno())); - } - return Err(Error::new(ErrorKind::EventFdError, "write failed")); - } - Ok(()) - } - - pub fn read(&self) -> Result { - let mut v = 0u64; - let ret = unsafe { libc::read(self.0, &mut v as *mut _ as *mut libc::c_void, U64_SZ) }; - if ret as usize != U64_SZ { - if ret < 0 { - return Err(Error::new(ErrorKind::EventFdError, Error::from_last_errno())); - } - return Err(Error::new(ErrorKind::EventFdError, "read failed")); - } - Ok(v) - } -} - -impl Drop for EventFd { - fn drop(&mut self) { - let _ = unsafe { libc::close(self.0) }; - } -} - -impl AsRawFd for EventFd { - fn as_raw_fd(&self) -> RawFd { - self.0 - } -} - -pub struct IoEventFd { - kvm: Kvm, - addr: u64, - evt: Arc -} - -impl IoEventFd { - pub fn new(kvm: &Kvm, address: u64) -> Result { - let evt = Arc::new(EventFd::new()?); - kvm.ioeventfd_add(address, evt.as_raw_fd())?; - Ok(IoEventFd { - kvm: kvm.clone(), - addr: address, - evt, - }) - } - pub fn read(&self) -> Result { - self.evt.read() - } - - pub fn write(&self, v: u64) -> Result<()> { - self.evt.write(v) - } -} - -impl Drop for IoEventFd { - fn drop(&mut self) { - let _ = self.kvm.ioeventfd_del(self.addr, self.evt.as_raw_fd()); - } -} - -impl AsRawFd for IoEventFd { - fn as_raw_fd(&self) -> RawFd { - self.evt.as_raw_fd() - } -} diff --git a/src/virtio/mod.rs b/src/virtio/mod.rs index e1a90c1..54bb46b 100644 --- a/src/virtio/mod.rs +++ b/src/virtio/mod.rs @@ -3,7 +3,6 @@ mod chain; mod config; mod consts; mod device; -mod eventfd; mod pci; mod virtqueue; mod vring; @@ -14,10 +13,42 @@ pub use self::pci::PciIrq; pub use self::bus::VirtioBus; pub use self::device::{VirtioDevice,VirtioDeviceOps}; pub use self::chain::Chain; -pub use self::eventfd::EventFd; pub use self::device_config::DeviceConfigArea; use byteorder::{ByteOrder,LittleEndian}; +use std::{result, fmt}; +use crate::{system, kvm}; + +pub type Result = result::Result; + +#[derive(Debug)] +pub enum Error { + CreateEventFd(system::Error), + CreateIoEventFd(kvm::Error), + ReadIoEventFd(system::Error), + IrqFd(kvm::Error), + VringNotEnabled, + VringRangeInvalid(u64), + VringAvailInvalid(u64), + VringUsedInvalid(u64), +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use Error::*; + match self { + CreateIoEventFd(e) => write!(f, "failed to create IoEventFd for VirtQueue: {}", e), + CreateEventFd(e) => write!(f, "failed to create EventFd for VirtQueue: {}", e), + ReadIoEventFd(e) => write!(f, "failed to read from IoEventFd: {}", e), + IrqFd(e) => write!(f, "VirtQueue: {}", e), + VringNotEnabled => write!(f, "vring is not enabled"), + VringRangeInvalid(addr) => write!(f, "vring descriptor table range is invalid 0x{:x}", addr), + VringAvailInvalid(addr) => write!(f, "vring avail ring range range is invalid 0x{:x}", addr), + VringUsedInvalid(addr) => write!(f, "vring used ring range is invalid 0x{:x}", addr), + + } + } +} pub fn read_config_buffer(config: &[u8], offset: usize, size: usize) -> u64 { if offset + size > config.len() { diff --git a/src/virtio/pci.rs b/src/virtio/pci.rs index 5156b02..575d8e4 100644 --- a/src/virtio/pci.rs +++ b/src/virtio/pci.rs @@ -2,7 +2,7 @@ use std::sync::{Arc,RwLock}; use byteorder::{ByteOrder,LittleEndian}; use crate::vm::io::{IoDispatcher,IoPortOps}; -use crate::memory::PCI_MMIO_RESERVED_BASE; +use crate::vm::arch::PCI_MMIO_RESERVED_BASE; use crate::memory::AddressRange; use super::consts::*; diff --git a/src/virtio/virtqueue.rs b/src/virtio/virtqueue.rs index 2b3ccf1..ec711eb 100644 --- a/src/virtio/virtqueue.rs +++ b/src/virtio/virtqueue.rs @@ -4,13 +4,13 @@ use std::os::unix::io::AsRawFd; use crate::memory::GuestRam; use crate::kvm::Kvm; -use crate::vm::Result; - -use super::eventfd::{EventFd,IoEventFd}; +use crate::virtio::{Result,Error}; +use crate::system::EventFd; +use crate::kvm::IoEventFd; use super::consts::*; use super::vring::{Vring,Descriptor}; use super::bus::VirtioDeviceConfig; -use super::chain::Chain; +use crate::virtio::chain::Chain; #[derive(Clone)] pub struct VirtQueue { @@ -51,7 +51,8 @@ impl VirtQueue { pub fn wait_ready(&self) -> Result<()> { if self.vring.is_empty() { - let _ = self.ioeventfd.read()?; + let _ = self.ioeventfd.read() + .map_err(Error::ReadIoEventFd)?; } Ok(()) } @@ -129,11 +130,11 @@ pub struct QueueIter { } impl Iterator for QueueIter { - type Item = Chain; + type Item = Chain; fn next(&mut self) -> Option { self.vq.pop_avail_entry().map(|idx| { - Chain::new(self.vq.memory.clone(),self.vq.clone(),idx, self.vq.vring.size()) + Chain::new(self.vq.memory.clone(), self.vq.clone(), idx, self.vq.vring.size()) }) } } @@ -150,8 +151,9 @@ impl InterruptLine { } fn new(kvm: &Kvm, irq: u8) -> Result> { - let irqfd = EventFd::new()?; - kvm.irqfd(irqfd.as_raw_fd() as u32, irq as u32)?; + let irqfd = EventFd::new().map_err(Error::CreateEventFd)?; + kvm.irqfd(irqfd.as_raw_fd() as u32, irq as u32) + .map_err(Error::IrqFd)?; Ok(Arc::new(InterruptLine{ irqfd, isr: AtomicUsize::new(0) diff --git a/src/virtio/vring.rs b/src/virtio/vring.rs index dcb15d2..96193de 100644 --- a/src/virtio/vring.rs +++ b/src/virtio/vring.rs @@ -7,7 +7,7 @@ use std::io::{self, Read}; use crate::memory::GuestRam; use super::consts::*; -use crate::vm::{Result,Error,ErrorKind}; +use crate::virtio::{Result,Error}; /// /// A convenience wrapper around `AtomicUsize` @@ -275,25 +275,21 @@ impl Vring { } pub fn validate(&self) -> Result<()> { - fn vring_err(msg: T) -> Result<()> { - Err(Error::new(ErrorKind::InvalidVring, msg.to_string())) - } - if !self.enabled { - return vring_err("vring is not enabled"); + return Err(Error::VringNotEnabled); } let qsz = self.queue_size as usize; let desc_table_sz = 16 * qsz; let avail_ring_sz = 6 + 2 * qsz; let used_ring_sz = 6 + 8 * qsz; if !self.memory.is_valid_range(self.descriptors, desc_table_sz) { - return vring_err(format!("descriptor table range is invalid 0x{:x}", self.descriptors)); + return Err(Error::VringRangeInvalid(self.descriptors)); } if !self.memory.is_valid_range(self.avail_ring, avail_ring_sz) { - return vring_err(format!("avail ring range is invalid 0x{:x}", self.avail_ring)); + return Err(Error::VringAvailInvalid(self.avail_ring)); } if !self.memory.is_valid_range(self.used_ring, used_ring_sz) { - return vring_err(format!("used ring range is invalid 0x{:x}", self.used_ring)); + return Err(Error::VringUsedInvalid(self.used_ring)); } Ok(()) } diff --git a/src/vm/arch/error.rs b/src/vm/arch/error.rs new file mode 100644 index 0000000..2c46320 --- /dev/null +++ b/src/vm/arch/error.rs @@ -0,0 +1,31 @@ +use crate::{kvm, system, memory}; +use crate::system::ErrnoError; +use std::{fmt, result}; + +#[derive(Debug)] +pub enum Error { + MemoryManagerCreate(memory::Error), + MemoryRegister(kvm::Error), + MemoryRegionCreate(system::Error), + LoadKernel(system::Error), + KvmError(kvm::Error), + SystemError(system::Error), + IoctlError(&'static str, ErrnoError), +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use Error::*; + match self { + MemoryManagerCreate(err) => write!(f, "failed to create memory manager: {}", err), + MemoryRegister(err) => write!(f, "failed to register memory region: {}", err), + MemoryRegionCreate(err) => write!(f, "failed to create memory region: {}", err), + LoadKernel(err) => write!(f, "error loading kernel: {}", err), + KvmError(e) => e.fmt(f), + SystemError(e) => e.fmt(f), + IoctlError(name, err) => write!(f, "failed to call {} ioctl: {}", name, err), + } + } +} + +pub type Result = result::Result; diff --git a/src/vm/arch/mod.rs b/src/vm/arch/mod.rs new file mode 100644 index 0000000..85da601 --- /dev/null +++ b/src/vm/arch/mod.rs @@ -0,0 +1,27 @@ +use crate::kvm::{KvmVcpu, Kvm}; +pub use crate::vm::arch::x86::X86ArchSetup; +use crate::memory::MemoryManager; + +mod error; +mod x86; + +pub use x86::PCI_MMIO_RESERVED_BASE; + +pub use x86::KvmRegs; +pub use error::{Error,Result}; +use crate::vm::kernel_cmdline::KernelCmdLine; +use crate::vm::VmConfig; +use crate::virtio::PciIrq; + +pub fn create_setup(config: &VmConfig) -> X86ArchSetup { + X86ArchSetup::create(config) +} + +pub trait ArchSetup { + fn open_kvm(&self) -> Result; + fn create_memory(&mut self, kvm: &Kvm) -> Result; + fn setup_memory(&mut self, cmdline: &KernelCmdLine, pci_irqs: &[PciIrq]) -> Result<()>; + fn setup_vcpu(&self, vcpu: &KvmVcpu) -> Result<()>; +} + + diff --git a/src/vm/arch/x86/cpuid.rs b/src/vm/arch/x86/cpuid.rs new file mode 100644 index 0000000..362d1cb --- /dev/null +++ b/src/vm/arch/x86/cpuid.rs @@ -0,0 +1,121 @@ + +use std::os::unix::io::RawFd; +use crate::vm::arch::Result; +use crate::kvm::KvmVcpu; +use crate::vm::arch::x86::ioctl::{KVM_GET_SUPPORTED_CPUID, KVM_SET_CPUID2, call_ioctl_with_ref, call_ioctl_with_mut_ref}; + +const EBX_CLFLUSH_CACHELINE: u32 = 8; // Flush a cache line size. +const EBX_CLFLUSH_SIZE_SHIFT: u32 = 8; // Bytes flushed when executing CLFLUSH. +const _EBX_CPU_COUNT_SHIFT: u32 = 16; // Index of this CPU. +const EBX_CPUID_SHIFT: u32 = 24; // Index of this CPU. +const _ECX_EPB_SHIFT: u32 = 3; // "Energy Performance Bias" bit. +const _ECX_HYPERVISOR_SHIFT: u32 = 31; // Flag to be set when the cpu is running on a hypervisor. +const _EDX_HTT_SHIFT: u32 = 28; // Hyper Threading Enabled. + +pub fn setup_cpuid(vcpu: &KvmVcpu) -> Result<()> { + let mut cpuid = kvm_get_supported_cpuid(vcpu.sys_raw_fd())?; + let cpu_id = 0u32; // first vcpu + + for e in &mut cpuid { + match e.function { + 0 => { + e.ebx = 0x67627553; + e.ecx = 0x20487020; + e.edx = 0x68706172; + } + 1 => { + if e.index == 0 { + e.ecx |= 1<<31; + } + e.ebx = (cpu_id << EBX_CPUID_SHIFT) as u32 | + (EBX_CLFLUSH_CACHELINE << EBX_CLFLUSH_SIZE_SHIFT); + /* + if cpu_count > 1 { + entry.ebx |= (cpu_count as u32) << EBX_CPU_COUNT_SHIFT; + entry.edx |= 1 << EDX_HTT_SHIFT; + } + */ + } + 6 => { + e.ecx &= !(1<<3); + + } + 10 => { + if e.eax > 0 { + let version = e.eax & 0xFF; + let ncounters = (e.eax >> 8) & 0xFF; + if version != 2 || ncounters == 0 { + e.eax = 0; + } + } + + } + _ => {} + } + } + kvm_set_cpuid2(vcpu.raw_fd(), cpuid) +} + + +pub fn kvm_get_supported_cpuid(sysfd: RawFd) -> Result> { + let mut cpuid = KvmCpuId2::new(); + call_ioctl_with_mut_ref("KVM_GET_SUPPORTED_CPUID", sysfd, KVM_GET_SUPPORTED_CPUID, &mut cpuid)?; + Ok(cpuid.get_entries()) +} + +pub fn kvm_set_cpuid2(cpufd: RawFd, entries: Vec) -> Result<()> { + let cpuid = KvmCpuId2::new_from_entries(entries); + call_ioctl_with_ref("KVM_SET_CPUID2", cpufd, KVM_SET_CPUID2, &cpuid) +} + +#[derive(Copy, Clone, Default)] +#[repr(C)] +pub struct KvmCpuIdEntry { + pub function: u32, + pub index: u32, + pub flags: u32, + pub eax: u32, + pub ebx: u32, + pub ecx: u32, + pub edx: u32, + padding: [u32; 3] +} + +const KVM_CPUID_MAX_ENTRIES:usize = 256; + +#[repr(C)] +pub struct KvmCpuId2 { + nent: u32, + padding: u32, + entries: [KvmCpuIdEntry; KVM_CPUID_MAX_ENTRIES] +} + +impl KvmCpuId2 { + pub fn new() -> KvmCpuId2 { + KvmCpuId2 { + nent: KVM_CPUID_MAX_ENTRIES as u32, + padding: 0, + entries: [Default::default(); KVM_CPUID_MAX_ENTRIES], + } + } + + pub fn new_from_entries(entries: Vec) -> KvmCpuId2 { + let mut cpuid = KvmCpuId2::new(); + let sz = entries.len(); + assert!(sz <= KVM_CPUID_MAX_ENTRIES, "Too many cpuid entries"); + for i in 0..sz { + cpuid.entries[i] = entries[i]; + } + cpuid.nent = sz as u32; + cpuid + } + + pub fn get_entries(&self) -> Vec { + let mut entries = Vec::new(); + let sz = self.nent as usize; + for i in 0..sz { + entries.push(self.entries[i]); + } + entries + } +} diff --git a/src/vm/arch/x86/interrupts.rs b/src/vm/arch/x86/interrupts.rs new file mode 100644 index 0000000..8a4f200 --- /dev/null +++ b/src/vm/arch/x86/interrupts.rs @@ -0,0 +1,50 @@ +use std::os::unix::io::RawFd; + +use crate::system::ioctl::{ioctl_with_mut_ref, ioctl_with_ref}; +use crate::vm::arch::{Error,Result}; +use crate::vm::arch::x86::ioctl::{KVM_GET_LAPIC, KVM_SET_LAPIC}; + +#[repr(C)] +pub struct KvmLapicState { + pub regs: [u8; 1024] +} + +impl KvmLapicState { + pub fn new() -> KvmLapicState { + KvmLapicState { regs: [0; 1024] } + } +} + +pub fn kvm_get_lapic(cpufd: RawFd) -> Result { + let mut lapic_state = KvmLapicState::new(); + unsafe { + ioctl_with_mut_ref(cpufd, KVM_GET_LAPIC, &mut lapic_state) + .map_err(|e| Error::IoctlError("KVM_GET_LAPIC", e))?; + } + Ok(lapic_state) +} + +pub fn kvm_set_lapic(cpufd: RawFd, lapic_state: &KvmLapicState) -> Result<()> { + unsafe { + ioctl_with_ref(cpufd, KVM_SET_LAPIC, lapic_state) + .map_err(|e| Error::IoctlError("KVM_SET_LAPIC", e))?; + } + Ok(()) +} + +const APIC_MODE_EXTINT: u8 = 0x7; +const APIC_MODE_NMI: u8 = 0x4; +const APIC_LVT_LINT0_OFFSET: usize = 0x350; +const APIC_LVT_LINT1_OFFSET: usize = 0x360; + +pub fn setup_lapic(cpufd: RawFd) -> Result<()> { + let mut lapic = kvm_get_lapic(cpufd)?; + // delivery mode + lapic.regs[APIC_LVT_LINT0_OFFSET + 1] &= 0xF8; + lapic.regs[APIC_LVT_LINT0_OFFSET + 1] |= APIC_MODE_EXTINT; + lapic.regs[APIC_LVT_LINT1_OFFSET + 1] &= 0xF8; + lapic.regs[APIC_LVT_LINT1_OFFSET + 1] |= APIC_MODE_NMI; + kvm_set_lapic(cpufd, &lapic) +} + + diff --git a/src/vm/arch/x86/ioctl.rs b/src/vm/arch/x86/ioctl.rs new file mode 100644 index 0000000..72572fe --- /dev/null +++ b/src/vm/arch/x86/ioctl.rs @@ -0,0 +1,44 @@ +use std::os::unix::io::RawFd; +use libc::{self, c_ulong}; + +use crate::system::ioctl::{ioctl_with_ref, ioctl_with_mut_ref, ioctl_with_val}; +use crate::vm::arch::{Error,Result}; + +const KVMIO: u64 = 0xAE; + +pub const KVM_GET_SUPPORTED_CPUID: libc::c_ulong = iorw! (KVMIO, 0x05, 8); +pub const KVM_SET_CPUID2: libc::c_ulong = iow! (KVMIO, 0x90, 8); +pub const KVM_SET_TSS_ADDR: c_ulong = io! (KVMIO, 0x47); +pub const KVM_CREATE_PIT2: c_ulong = iow! (KVMIO, 0x77, 64); +pub const KVM_SET_FPU: c_ulong = iow! (KVMIO, 0x8d, 416); +pub const KVM_SET_MSRS: c_ulong = iow! (KVMIO, 0x89, 8); +pub const KVM_GET_SREGS: c_ulong = ior! (KVMIO, 0x83, 312); +pub const KVM_SET_SREGS: c_ulong = iow! (KVMIO, 0x84, 312); +pub const KVM_GET_LAPIC: c_ulong = ior! (KVMIO, 0x8e, 1024); +pub const KVM_SET_LAPIC: c_ulong = iow! (KVMIO, 0x8f, 1024); + +pub fn call_ioctl_with_ref(name: &'static str, fd: RawFd, request: c_ulong, arg: &T) -> Result<()> { + unsafe { + ioctl_with_ref(fd, request, arg) + .map_err(|e| Error::IoctlError(name, e))?; + Ok(()) + } +} + +pub fn call_ioctl_with_mut_ref(name: &'static str, fd: RawFd, request: c_ulong, arg: &mut T) -> Result<()> { + unsafe { + ioctl_with_mut_ref(fd, request, arg) + .map_err(|e| Error::IoctlError(name, e))?; + Ok(()) + } +} + +pub fn call_ioctl_with_val(name: &'static str, fd: RawFd, request: c_ulong, val: c_ulong) -> Result<()> { + unsafe { + ioctl_with_val(fd, request, val) + .map_err(|e| Error::IoctlError(name, e))?; + Ok(()) + } +} + + diff --git a/src/vm/arch/x86/kernel.rs b/src/vm/arch/x86/kernel.rs new file mode 100644 index 0000000..92d781c --- /dev/null +++ b/src/vm/arch/x86/kernel.rs @@ -0,0 +1,129 @@ +use std::io; + +use crate::memory::GuestRam; +use crate::system; +use crate::util::ByteBuffer; +use crate::vm::arch::PCI_MMIO_RESERVED_BASE; +use crate::vm::arch::x86::memory::HIMEM_BASE; +use crate::vm::KERNEL; + +pub const KVM_KERNEL_LOAD_ADDRESS: u64 = 0x1000000; +pub const KERNEL_CMDLINE_ADDRESS: u64 = 0x20000; +pub const KERNEL_ZERO_PAGE: u64 = 0x7000; + +// Documentation/x86/boot.txt + +const HDR_BOOT_FLAG: usize = 0x1fe; // u16 +const HDR_HEADER: usize = 0x202; // u32 +const HDR_TYPE_LOADER: usize = 0x210; // u8 +const HDR_CMDLINE_PTR: usize = 0x228; // u32 +const HDR_CMDLINE_SIZE: usize = 0x238; // u32 +const HDR_KERNEL_ALIGNMENT: usize = 0x230; // u32 + +// Documentation/x86/zero-page.txt + +const BOOT_PARAM_E820_ENTRIES: usize = 0x1e8; +const BOOT_PARAM_E820_MAP: usize = 0x2d0; + +const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55; +const EBDA_START: u64 = 0x0009fc00; +const KERNEL_HDR_MAGIC: u32 = 0x53726448; +const KERNEL_LOADER_OTHER: u8 = 0xff; +const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x1000000; + +const E820_RAM: u32 = 1; + +fn setup_e820(memory: &GuestRam, mut zero: ByteBuffer<&mut [u8]>) -> system::Result<()> { + let ram_size = memory.ram_size() as u64; + + let mut e820_ranges = Vec::new(); + e820_ranges.push((0u64, EBDA_START)); + + if ram_size < PCI_MMIO_RESERVED_BASE { + e820_ranges.push((KVM_KERNEL_LOAD_ADDRESS, ram_size - KVM_KERNEL_LOAD_ADDRESS)); + } else { + e820_ranges.push((KVM_KERNEL_LOAD_ADDRESS, PCI_MMIO_RESERVED_BASE - KVM_KERNEL_LOAD_ADDRESS)); + e820_ranges.push((HIMEM_BASE, ram_size - HIMEM_BASE)); + } + zero.write_at(BOOT_PARAM_E820_ENTRIES , e820_ranges.len() as u8); + + zero.set_offset(BOOT_PARAM_E820_MAP); + for i in 0..e820_ranges.len() { + zero.write(e820_ranges[i].0) + .write(e820_ranges[i].1) + .write(E820_RAM); + } + Ok(()) +} + +fn setup_zero_page(memory: &GuestRam, cmdline_addr: u64, cmdline_size: usize) -> system::Result<()> { + let mut zero = memory.mut_buffer(KERNEL_ZERO_PAGE, 4096)?; + zero.write_at(HDR_BOOT_FLAG, KERNEL_BOOT_FLAG_MAGIC) + .write_at(HDR_HEADER, KERNEL_HDR_MAGIC) + .write_at(HDR_TYPE_LOADER, KERNEL_LOADER_OTHER) + .write_at(HDR_CMDLINE_PTR, cmdline_addr as u32) + .write_at(HDR_CMDLINE_SIZE, cmdline_size as u32) + .write_at(HDR_KERNEL_ALIGNMENT, KERNEL_MIN_ALIGNMENT_BYTES); + + setup_e820(memory, zero) +} + +pub fn load_pm_kernel(memory: &GuestRam, cmdline_addr: u64, cmdline_size: usize) -> system::Result<()> { + load_elf_kernel(memory)?; + setup_zero_page(memory, cmdline_addr, cmdline_size) +} + +fn load_elf_segment(memory: &GuestRam, hdr: ElfPhdr) { + let addr = hdr.p_paddr + KVM_KERNEL_LOAD_ADDRESS; + let size = hdr.p_filesz as usize; + let off = hdr.p_offset as usize; + let dst = memory.mut_slice(addr, size).unwrap(); + let src = &KERNEL[off..off+size]; + dst.copy_from_slice(src); +} + +pub fn load_elf_kernel(memory: &GuestRam) -> io::Result<()> { + let mut k = ByteBuffer::from_bytes(KERNEL); + let phoff = k.read_at::(32); + let phnum = k.read_at::(56); + + k.set_offset(phoff as usize); + + for _ in 0..phnum { + let hdr = ElfPhdr::load_from(&mut k); + if hdr.is_pt_load() { + load_elf_segment(memory, hdr); + } + } + Ok(()) +} + +struct ElfPhdr { + pub p_type: u32, + pub p_flags: u32, + pub p_offset: u64, + pub p_vaddr: u64, + pub p_paddr: u64, + pub p_filesz: u64, + pub p_memsz: u64, + pub p_align: u64, +} + +impl ElfPhdr { + fn load_from(buf: &mut ByteBuffer<&[u8]>) -> Self { + ElfPhdr { + p_type: buf.read(), + p_flags: buf.read(), + p_offset: buf.read(), + p_vaddr: buf.read(), + p_paddr: buf.read(), + p_filesz: buf.read(), + p_memsz: buf.read(), + p_align: buf.read(), + } + } + + fn is_pt_load(&self) -> bool { + self.p_type == 1 + } +} \ No newline at end of file diff --git a/src/vm/arch/x86/kvm.rs b/src/vm/arch/x86/kvm.rs new file mode 100644 index 0000000..57bb854 --- /dev/null +++ b/src/vm/arch/x86/kvm.rs @@ -0,0 +1,50 @@ +use std::os::unix::io::RawFd; +use crate::kvm::{Kvm, KVM_CAP_IOEVENTFD, KVM_CAP_PIT2, KVM_CAP_IRQ_INJECT_STATUS, KVM_CAP_IRQ_ROUTING, KVM_CAP_EXT_CPUID, KVM_CAP_SET_TSS_ADDR, KVM_CAP_USER_MEMORY, KVM_CAP_HLT, KVM_CAP_IRQCHIP}; +use crate::vm::arch::{Result,Error}; + +use libc::c_ulong; +use crate::vm::arch::x86::ioctl::{ + call_ioctl_with_ref, call_ioctl_with_val, KVM_CREATE_PIT2, KVM_SET_TSS_ADDR +}; + +static REQUIRED_EXTENSIONS: &[u32] = &[ + KVM_CAP_IRQCHIP, + KVM_CAP_HLT, + KVM_CAP_USER_MEMORY, + KVM_CAP_SET_TSS_ADDR, + KVM_CAP_EXT_CPUID, + KVM_CAP_IRQ_ROUTING, + KVM_CAP_IRQ_INJECT_STATUS, + KVM_CAP_PIT2, + KVM_CAP_IOEVENTFD, +]; + +pub fn x86_open_kvm() -> Result { + let kvm = Kvm::open(REQUIRED_EXTENSIONS) + .map_err(Error::KvmError)?; + kvm.create_irqchip().map_err(Error::KvmError)?; + kvm_set_tss_addr(kvm.vmfd(), 0xFFFbd000)?; + kvm_create_pit2(kvm.vmfd())?; + Ok(kvm) +} + +#[repr(C)] +struct KvmPitConfig { + flags: u32, + padding: [u32; 15], +} + +impl KvmPitConfig { + pub fn new(flags: u32) -> KvmPitConfig { + KvmPitConfig { flags, padding: [0; 15] } + } +} + +fn kvm_create_pit2(vmfd: RawFd) -> Result<()> { + let pit_config = KvmPitConfig::new(0); + call_ioctl_with_ref("KVM_CREATE_PIT2", vmfd, KVM_CREATE_PIT2, &pit_config) +} + +fn kvm_set_tss_addr(vmfd: RawFd, addr: u32) -> Result<()> { + call_ioctl_with_val("KVM_SET_TSS_ADDR", vmfd, KVM_SET_TSS_ADDR, addr as c_ulong) +} diff --git a/src/vm/arch/x86/memory.rs b/src/vm/arch/x86/memory.rs new file mode 100644 index 0000000..4ecbd59 --- /dev/null +++ b/src/vm/arch/x86/memory.rs @@ -0,0 +1,99 @@ +use crate::kvm::Kvm; +use crate::memory::{MemoryManager, MemoryRegion, GuestRam}; +use crate::vm::arch::{Error, Result}; +use std::cmp; +use crate::vm::kernel_cmdline::KernelCmdLine; +use crate::vm::arch::x86::kernel::{load_pm_kernel, KERNEL_CMDLINE_ADDRESS}; +use crate::system; +use crate::vm::arch::x86::mptable::setup_mptable; +use crate::virtio::PciIrq; + +pub const HIMEM_BASE: u64 = (1 << 32); +pub const PCI_MMIO_RESERVED_SIZE: usize = (512 << 20); +pub const PCI_MMIO_RESERVED_BASE: u64 = HIMEM_BASE - PCI_MMIO_RESERVED_SIZE as u64; + + +pub fn x86_setup_memory_regions(memory: &mut MemoryManager, ram_size: usize) -> Result<()> { + let mut regions = Vec::new(); + let lowmem_sz = cmp::min(ram_size, PCI_MMIO_RESERVED_BASE as usize); + regions.push(create_region(memory.kvm(), 0, lowmem_sz, 0)?); + + if lowmem_sz < ram_size { + let himem_sz = ram_size - lowmem_sz; + regions.push(create_region(memory.kvm(), HIMEM_BASE, himem_sz, 1)?); + } + memory.set_ram_regions(regions); + Ok(()) +} + +fn create_region(kvm: &Kvm, base: u64, size: usize, slot: u32) -> Result { + let mr = MemoryRegion::new(base, size) + .map_err(Error::MemoryRegionCreate)?; + kvm.add_memory_region(slot, base, mr.base_address(), size) + .map_err(Error::MemoryRegister)?; + Ok(mr) +} + +const BOOT_GDT_OFFSET: usize = 0x500; +const BOOT_IDT_OFFSET: usize = 0x520; + +const BOOT_PML4: u64 = 0x9000; +const BOOT_PDPTE: u64 = 0xA000; +const BOOT_PDE: u64 = 0xB000; + +pub fn x86_setup_memory(memory: &mut MemoryManager, cmdline: &KernelCmdLine, ncpus: usize, pci_irqs: &[PciIrq]) -> Result<()> { + load_pm_kernel(memory.guest_ram(), KERNEL_CMDLINE_ADDRESS, cmdline.size()) + .map_err(Error::LoadKernel)?; + setup_gdt(memory.guest_ram())?; + setup_boot_pagetables(memory.guest_ram()).map_err(Error::SystemError)?; + setup_mptable(memory.guest_ram(), ncpus, pci_irqs).map_err(Error::SystemError)?; + write_cmdline(memory.guest_ram(), cmdline).map_err(Error::SystemError)?; + Ok(()) +} + +fn setup_boot_pagetables(memory: &GuestRam) -> system::Result<()> { + memory.write_int::(BOOT_PML4, BOOT_PDPTE | 0x3)?; + memory.write_int::(BOOT_PDPTE, BOOT_PDE | 0x3)?; + for i in 0..512_u64 { + let entry = (i << 21) | 0x83; + memory.write_int::(BOOT_PDE + (i * 8), entry)?; + } + Ok(()) +} + +fn write_gdt_table(table: &[u64], memory: &GuestRam) -> system::Result<()> { + for i in 0..table.len() { + memory.write_int((BOOT_GDT_OFFSET + i * 8) as u64, table[i])?; + } + Ok(()) +} + +pub fn gdt_entry(flags: u16, base: u32, limit: u32) -> u64 { + ((((base as u64) & 0xff000000u64) << (56 - 24)) | (((flags as u64) & 0x0000f0ffu64) << 40) | + (((limit as u64) & 0x000f0000u64) << (48 - 16)) | + (((base as u64) & 0x00ffffffu64) << 16) | ((limit as u64) & 0x0000ffffu64)) +} + +pub fn setup_gdt(memory: &GuestRam) -> Result<()> { + let table = [ + gdt_entry(0,0,0), + gdt_entry(0xa09b,0,0xfffff), + gdt_entry(0xc093,0,0xfffff), + gdt_entry(0x808b,0,0xfffff), + ]; + write_gdt_table(&table, memory) + .map_err(Error::SystemError)?; + + memory.write_int::(BOOT_IDT_OFFSET as u64, 0u64) + .map_err(Error::SystemError)?; + + Ok(()) +} + +fn write_cmdline(memory: &GuestRam, cmdline: &KernelCmdLine) -> system::Result<()> { + let bytes = cmdline.as_bytes(); + let len = bytes.len() as u64; + memory.write_bytes(KERNEL_CMDLINE_ADDRESS, bytes)?; + memory.write_int(KERNEL_CMDLINE_ADDRESS + len, 0u8)?; + Ok(()) +} diff --git a/src/vm/arch/x86/mod.rs b/src/vm/arch/x86/mod.rs new file mode 100644 index 0000000..3243005 --- /dev/null +++ b/src/vm/arch/x86/mod.rs @@ -0,0 +1,13 @@ +mod cpuid; +mod interrupts; +mod kvm; +mod memory; +mod mptable; +mod registers; +mod kernel; +mod ioctl; +mod setup; + +pub use setup::X86ArchSetup; +pub use memory::PCI_MMIO_RESERVED_BASE; +pub use registers::KvmRegs; diff --git a/src/vm/setup/mptable.rs b/src/vm/arch/x86/mptable.rs similarity index 96% rename from src/vm/setup/mptable.rs rename to src/vm/arch/x86/mptable.rs index d2c6e78..db9b9b8 100644 --- a/src/vm/setup/mptable.rs +++ b/src/vm/arch/x86/mptable.rs @@ -4,7 +4,7 @@ use std::iter; use crate::memory::GuestRam; use crate::virtio::PciIrq; -use crate::vm::Result; +use crate::system::Result; const APIC_DEFAULT_PHYS_BASE: u32 = 0xfee00000; const IO_APIC_DEFAULT_PHYS_BASE: u32 = 0xfec00000; @@ -99,7 +99,7 @@ impl Buffer { .w8(dstirq) // dest irq } - fn write_all_mpc_intsrc(&mut self, ioapicid: u8, pci_irqs: &Vec) -> &mut Self { + fn write_all_mpc_intsrc(&mut self, ioapicid: u8, pci_irqs: &[PciIrq]) -> &mut Self { for irq in pci_irqs { self.write_mpc_intsrc(ioapicid, irq.src_bus_irq(), irq.irq_line()); } @@ -192,9 +192,8 @@ fn align(sz: usize, n: usize) -> usize { (sz + (n - 1)) & !(n - 1) } -pub fn setup_mptable(memory: &GuestRam, ncpus: usize, pci_irqs: Vec) -> Result<()> { +pub fn setup_mptable(memory: &GuestRam, ncpus: usize, pci_irqs: &[PciIrq]) -> Result<()> { let ioapicid = (ncpus + 1) as u8; - //let address= align(BIOS_BEGIN as usize + BIOS_BIN.len(), 16) as u32; let mut body = Buffer::new(); let address = 0; @@ -209,6 +208,5 @@ pub fn setup_mptable(memory: &GuestRam, ncpus: usize, pci_irqs: Vec) -> let mut table = Buffer::new(); table.write_mpctable(ncpus as u16, &body); - //memory.write_bytes(address as u64, &table.vec) memory.write_bytes(address as u64, &table.vec) } \ No newline at end of file diff --git a/src/vm/arch/x86/registers.rs b/src/vm/arch/x86/registers.rs new file mode 100644 index 0000000..d4e3ecc --- /dev/null +++ b/src/vm/arch/x86/registers.rs @@ -0,0 +1,314 @@ +use std::fmt; +use std::os::unix::io::RawFd; + +use crate::kvm::KvmVcpu; +use crate::vm::arch::{Result, Error}; +use crate::vm::arch::x86::kernel::KERNEL_ZERO_PAGE; +use crate::vm::arch::x86::ioctl::{ + call_ioctl_with_ref, KVM_SET_FPU, KVM_SET_MSRS, call_ioctl_with_mut_ref, KVM_GET_SREGS, KVM_SET_SREGS +}; + +const MSR_IA32_SYSENTER_CS: u32 = 0x00000174; +const MSR_IA32_SYSENTER_ESP: u32 = 0x00000175; +const MSR_IA32_SYSENTER_EIP: u32 = 0x00000176; +const MSR_STAR: u32 = 0xc0000081; +const MSR_LSTAR: u32 = 0xc0000082; +const MSR_CSTAR: u32 = 0xc0000083; +const MSR_SYSCALL_MASK: u32 = 0xc0000084; +const MSR_KERNEL_GS_BASE: u32 = 0xc0000102; +const MSR_IA32_TSC: u32 = 0x00000010; +const MSR_IA32_MISC_ENABLE: u32 = 0x000001a0; + +const MSR_IA32_MISC_ENABLE_FAST_STRING: u64 = 0x01; + +pub fn setup_fpu(vcpu: &KvmVcpu) -> Result<()> { + let mut fpu = KvmFpu::new(); + fpu.fcw = 0x37f; + fpu.mxcsr = 0x1f80; + kvm_set_fpu(vcpu.raw_fd(), &fpu)?; + Ok(()) +} + +pub fn setup_msrs(vcpu: &KvmVcpu) -> Result<()> { + let mut msrs = KvmMsrs::new(); + msrs.add(MSR_IA32_SYSENTER_CS, 0); + msrs.add(MSR_IA32_SYSENTER_ESP, 0); + msrs.add(MSR_IA32_SYSENTER_EIP, 0); + msrs.add(MSR_STAR, 0); + msrs.add(MSR_CSTAR, 0); + msrs.add(MSR_KERNEL_GS_BASE, 0); + msrs.add(MSR_SYSCALL_MASK, 0); + msrs.add(MSR_LSTAR, 0); + msrs.add(MSR_IA32_TSC, 0); + msrs.add(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_FAST_STRING); + kvm_set_msrs(vcpu.raw_fd(), &msrs)?; + Ok(()) +} + +const BOOT_GDT_OFFSET: usize = 0x500; +const BOOT_IDT_OFFSET: usize = 0x520; + +const BOOT_STACK: u64 = 0x8000; +const BOOT_PML4: u64 = 0x9000; + +const X86_CR0_PE: u64 = 0x1; +const X86_CR0_PG: u64 = 0x80000000; +const X86_CR4_PAE: u64 = 0x20; + +const EFER_LME: u64 = 0x100; +const EFER_LMA: u64 = (1 << 10); + +pub fn setup_pm_sregs(vcpu: &KvmVcpu) -> Result<()> { + + let code = KvmSegment::new(0, 0xfffff, 1 * 8, 0xa09b); + let data = KvmSegment::new(0, 0xfffff, 2 * 8, 0xc093); + let tss = KvmSegment::new(0, 0xfffff, 3 * 8, 0x808b); + + let mut regs = kvm_get_sregs(vcpu.raw_fd())?; + + regs.gdt.base = BOOT_GDT_OFFSET as u64; + regs.gdt.limit = 32 - 1; + + regs.itd.base = BOOT_IDT_OFFSET as u64; + regs.itd.limit = 8 - 1; + + regs.cs = code; + regs.ds = data; + regs.es = data; + regs.fs = data; + regs.gs = data; + regs.ss = data; + regs.tr = tss; + + // protected mode + regs.cr0 |= X86_CR0_PE; + regs.efer |= EFER_LME; + + regs.cr3 = BOOT_PML4; + regs.cr4 |= X86_CR4_PAE; + regs.cr0 |= X86_CR0_PG; + regs.efer |= EFER_LMA; + + kvm_set_sregs(vcpu.raw_fd(), ®s)?; + Ok(()) +} + +pub fn setup_pm_regs(vcpu: &KvmVcpu, kernel_entry: u64) -> Result<()> { + let mut regs = KvmRegs::new(); + regs.rflags = 0x0000000000000002; + regs.rip = kernel_entry; + regs.rsp = BOOT_STACK; + regs.rbp = BOOT_STACK; + regs.rsi = KERNEL_ZERO_PAGE; + vcpu.set_regs(®s) + .map_err(Error::KvmError)?; + Ok(()) +} + +#[derive(Copy)] +#[repr(C)] +pub struct KvmFpu { + fpr: [u8; 128], + pub fcw: u16, + fsw: u16, + ftwx: u8, + pad1: u8, + last_opcode: u16, + last_ip: u64, + last_dp: u64, + xmm: [u8; 256], + pub mxcsr: u32, + pad2: u32, +} + +impl Clone for KvmFpu { + fn clone(&self) -> KvmFpu { *self } +} +impl KvmFpu { + pub fn new() -> KvmFpu { + KvmFpu { + fpr: [0; 128], + fcw: 0, + fsw: 0, + ftwx: 0, pad1: 0, + last_opcode: 0, + last_ip: 0, + last_dp: 0, + xmm: [0; 256], + mxcsr: 0, + pad2: 0 + } + } +} + +pub fn kvm_set_fpu(cpufd: RawFd, fpu: &KvmFpu) -> Result<()> { + call_ioctl_with_ref("KVM_SET_FPU", cpufd, KVM_SET_FPU, fpu) +} + +#[derive(Copy, Clone, Default)] +#[repr(C)] +struct KvmMsrEntry { + index: u32, + reserved: u32, + data: u64 +} + +#[repr(C)] +pub struct KvmMsrs { + nent: u32, + padding: u32, + entries: [KvmMsrEntry; 100] +} + +impl KvmMsrs { + pub fn new() -> KvmMsrs { + KvmMsrs{ nent: 0, padding: 0, entries: [Default::default(); 100]} + } + + pub fn add(&mut self, index: u32, data: u64) { + self.entries[self.nent as usize].index = index; + self.entries[self.nent as usize].data = data; + self.nent += 1; + } +} + +pub fn kvm_set_msrs(cpufd: RawFd, msrs: &KvmMsrs) -> Result<()> { + call_ioctl_with_ref("KVM_SET_MSRS", cpufd, KVM_SET_MSRS, msrs) +} + +#[derive(Copy, Clone, Default)] +#[repr(C)] +pub struct KvmSegment { + base: u64, + limit: u32, + selector: u16, + stype: u8, + present: u8, + dpl: u8, + db: u8, + s: u8, + l: u8, + g: u8, + avl: u8, + unusable: u8, + padding: u8, +} + +impl KvmSegment { + pub fn new(base: u64, limit: u32, selector: u16, flags: u16) -> KvmSegment { + let mut seg = KvmSegment{ ..Default::default() }; + seg.setup(base, limit, selector, flags); + seg + } + + pub fn setup(&mut self, base: u64, limit: u32, selector: u16, flags: u16) { + self.base = base; + self.limit = limit; + self.selector = selector; + self.stype = (flags & 0xF) as u8; + self.present = ((flags >> 7) & 0x1) as u8; + self.dpl = ((flags >> 5) & 0x3) as u8; + self.db = ((flags >> 14) & 0x1) as u8; + self.s = ((flags >> 4) & 0x1) as u8; + self.l = ((flags >> 13) & 0x1) as u8; + self.g = ((flags >> 15) & 0x1) as u8; + self.avl = ((flags >> 12) & 0x1) as u8; + self.unusable = if self.present == 1 { 0 } else { 1 } + } +} + +impl fmt::Debug for KvmSegment { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "(base: {:x} limit {:x} selector: {:x} type: {:x} p: {} dpl: {} db: {} s: {} l: {} g: {} avl: {} unuse: {})", + self.base, self.limit, self.selector, self.stype, self.present, self.dpl, self.db, self.s, self.l, self.g, self.avl, self.unusable) + } +} + +#[derive(Copy, Clone, Default)] +#[repr(C)] +pub struct KvmDtable { + pub base: u64, + pub limit: u16, + padding: [u16; 3], +} + +impl fmt::Debug for KvmDtable { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "(base: {:x} limit {:x})", self.base, self.limit) + } +} + +#[derive(Copy, Clone, Default)] +#[repr(C)] +pub struct KvmSRegs { + pub cs: KvmSegment, + pub ds: KvmSegment, + pub es: KvmSegment, + pub fs: KvmSegment, + pub gs: KvmSegment, + pub ss: KvmSegment, + pub tr: KvmSegment, + pub ldt: KvmSegment, + pub gdt: KvmDtable, + pub itd: KvmDtable, + pub cr0: u64, + pub cr2: u64, + pub cr3: u64, + pub cr4: u64, + pub cr8: u64, + pub efer: u64, + pub apic_base: u64, + pub interrupt_bitmap: [u64; 4], +} + +impl fmt::Debug for KvmSRegs { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "cs: {:?}\nds: {:?}\nes: {:?}\nfs: {:?}\n", self.cs, self.ds, self.es, self.fs)?; + write!(f, "gs: {:?}\nss: {:?}\ntr: {:?}\nldt: {:?}\n", self.gs, self.ss, self.tr, self.ldt)?; + write!(f, "gdt: {:?} itd: {:?}\n", self.gdt, self.itd)?; + write!(f, "cr0: {:x} cr2: {:x} cr3: {:x} cr4: {:x}\n", self.cr0, self.cr2, self.cr3, self.cr4)?; + write!(f, "efer: {:x} apic_base: {:x}\n", self.efer, self.apic_base) + } +} + +impl KvmSRegs { + pub fn new() -> KvmSRegs { + KvmSRegs { ..Default::default() } + } +} + +pub fn kvm_get_sregs(cpufd: RawFd) -> Result { + let mut sregs = KvmSRegs::new(); + call_ioctl_with_mut_ref("KVM_GET_SREGS", cpufd, KVM_GET_SREGS, &mut sregs)?; + Ok(sregs) +} + +pub fn kvm_set_sregs(cpufd: RawFd, sregs: &KvmSRegs) -> Result<()> { + call_ioctl_with_ref("KVM_SET_SREGS", cpufd, KVM_SET_SREGS, sregs) +} + +#[derive(Copy, Clone, Default)] +#[repr(C)] +pub struct KvmRegs { + pub rax: u64, pub rbx: u64, pub rcx: u64, pub rdx: u64, + pub rsi: u64, pub rdi: u64, pub rsp: u64, pub rbp: u64, + pub r8: u64, pub r9: u64, pub r10: u64, pub r11: u64, + pub r12: u64, pub r13: u64, pub r14: u64, pub r15: u64, + pub rip: u64, pub rflags: u64, +} + +impl fmt::Debug for KvmRegs { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "rax 0x{:x} rbx 0x{:x} rcx 0x{:x} rdx 0x{:x}\n", self.rax, self.rbx, self.rcx, self.rdx)?; + write!(f, "rsi 0x{:x} rdi 0x{:x} rsp 0x{:x} rbp 0x{:x}\n", self.rsi, self.rdi, self.rsp, self.rbp)?; + write!(f, "r8 0x{:x} r9 0x{:x} r10 0x{:x} r11 0x{:x}\n", self.r8, self.r9, self.r10, self.r11)?; + write!(f, "r12 0x{:x} r13 0x{:x} r14 0x{:x} r15 0x{:x}\n", self.r12, self.r13, self.r14, self.r15)?; + write!(f, "rip 0x{:x} rflags 0x{:x}\n", self.rip, self.rflags) + } +} + +impl KvmRegs { + pub fn new() -> KvmRegs { + KvmRegs { ..Default::default() } + } +} diff --git a/src/vm/arch/x86/setup.rs b/src/vm/arch/x86/setup.rs new file mode 100644 index 0000000..47ac887 --- /dev/null +++ b/src/vm/arch/x86/setup.rs @@ -0,0 +1,73 @@ +use crate::memory::{MemoryManager, GuestRam, SystemAllocator, AddressRange}; +use crate::vm::VmConfig; +use crate::vm::arch::{ArchSetup, Error, Result}; +use crate::vm::kernel_cmdline::KernelCmdLine; +use crate::virtio::PciIrq; +use crate::kvm::{Kvm, KvmVcpu}; +use crate::vm::arch::x86::kvm::x86_open_kvm; +use crate::vm::arch::x86::memory::{x86_setup_memory_regions, x86_setup_memory}; +use crate::vm::arch::x86::cpuid::setup_cpuid; +use crate::vm::arch::x86::registers::{setup_pm_sregs, setup_pm_regs, setup_fpu, setup_msrs}; +use crate::vm::arch::x86::interrupts::setup_lapic; +use crate::vm::arch::x86::kernel::KVM_KERNEL_LOAD_ADDRESS; + +pub struct X86ArchSetup { + ram_size: usize, + use_drm: bool, + ncpus: usize, + memory: Option, +} + +impl X86ArchSetup { + pub fn create(config: &VmConfig) -> Self { + let ram_size = config.ram_size(); + let use_drm = config.is_wayland_enabled() && config.is_dmabuf_enabled(); + X86ArchSetup { + ram_size, + use_drm, + ncpus: config.ncpus(), + memory: None, + } + } +} + +fn get_base_dev_pfn(mem_size: u64) -> u64 { + // Put device memory at a 2MB boundary after physical memory or 4gb, whichever is greater. + const MB: u64 = 1024 * 1024; + const GB: u64 = 1024 * MB; + let mem_size_round_2mb = (mem_size + 2 * MB - 1) / (2 * MB) * (2 * MB); + std::cmp::max(mem_size_round_2mb, 4 * GB) / 4096 +} + +impl ArchSetup for X86ArchSetup { + fn open_kvm(&self) -> Result { + x86_open_kvm() + } + + fn create_memory(&mut self, kvm: &Kvm) -> Result { + let ram = GuestRam::new(self.ram_size); + let dev_addr_start = get_base_dev_pfn(self.ram_size as u64) * 4096; + let dev_addr_size = u64::max_value() - dev_addr_start; + let allocator = SystemAllocator::new(AddressRange::new(dev_addr_start,dev_addr_size as usize)); + let mut mm = MemoryManager::new(kvm.clone(), ram, allocator, self.use_drm) + .map_err(Error::MemoryManagerCreate)?; + x86_setup_memory_regions(&mut mm, self.ram_size)?; + self.memory = Some(mm.clone()); + Ok(mm) + } + + fn setup_memory(&mut self, cmdline: &KernelCmdLine, pci_irqs: &[PciIrq]) -> Result<()> { + let memory = self.memory.as_mut().expect("No memory created"); + x86_setup_memory(memory, cmdline, self.ncpus, pci_irqs)?; + Ok(()) + } + + fn setup_vcpu(&self, vcpu: &KvmVcpu) -> Result<()> { + setup_cpuid(vcpu)?; + setup_pm_sregs(vcpu)?; + setup_pm_regs(&vcpu, KVM_KERNEL_LOAD_ADDRESS)?; + setup_fpu(vcpu)?; + setup_msrs(vcpu)?; + setup_lapic(vcpu.raw_fd()) + } +} diff --git a/src/vm/config.rs b/src/vm/config.rs index 336cb0a..b693dee 100644 --- a/src/vm/config.rs +++ b/src/vm/config.rs @@ -1,10 +1,11 @@ use std::path::{PathBuf, Path}; -use crate::vm::Vm; +use crate::vm::{VmSetup, arch}; use std::{env, process}; use crate::devices::SyntheticFS; use crate::disk::{RawDiskImage, RealmFSImage, OpenType}; use libcitadel::Realms; use libcitadel::terminal::{TerminalPalette, AnsiTerminal, Base16Scheme}; +use crate::vm::arch::X86ArchSetup; pub struct VmConfig { ram_size: usize, @@ -15,6 +16,7 @@ pub struct VmConfig { dmabuf: bool, network: bool, home: String, + colorscheme: String, bridge_name: String, kernel_path: Option, init_path: Option, @@ -39,6 +41,7 @@ impl VmConfig { network: true, bridge_name: "vz-clear".to_string(), home: Self::default_homedir(), + colorscheme: "dracula".to_string(), kernel_path: None, init_path: None, init_cmd: None, @@ -65,18 +68,23 @@ impl VmConfig { self } - pub fn raw_disk_image>(mut self, path: P, open_type: OpenType) -> Self { - self.raw_disks.push(RawDiskImage::new(path, open_type)); - self + pub fn raw_disk_image>(self, path: P, open_type: OpenType) -> Self { + self.raw_disk_image_with_offset(path, open_type, 0) } pub fn raw_disk_image_with_offset>(mut self, path: P, open_type: OpenType, offset: usize) -> Self { - self.raw_disks.push(RawDiskImage::new_with_offset(path, open_type, offset)); + match RawDiskImage::new_with_offset(path, open_type, offset) { + Ok(disk) => self.raw_disks.push(disk), + Err(e) => warn!("Could not add disk: {}", e), + }; self } pub fn realmfs_image>(mut self, path: P) -> Self { - self.realmfs_images.push(RealmFSImage::new(path, OpenType::MemoryOverlay)); + match RealmFSImage::new(path, OpenType::MemoryOverlay) { + Ok(disk) => self.realmfs_images.push(disk), + Err(e) => warn!("Could not add disk: {}", e), + }; self } @@ -109,21 +117,31 @@ impl VmConfig { let _terminal_restore = TerminalRestore::save(); - if let Some(scheme) = Base16Scheme::by_name("black-metal-immortal") { + if let Some(scheme) = Base16Scheme::by_name(&self.colorscheme) { let mut term = AnsiTerminal::new().unwrap(); if let Err(err) = term.apply_base16(scheme) { warn!("Failed to set terminal color scheme: {}", err); } } - - match Vm::open(self) { - Ok(vm) => if let Err(err) = vm.start() { - notify!("Error starting VM: {}", err); + let mut setup = self.setup(); + let vm = match setup.create_vm() { + Ok(vm) => vm, + Err(err) => { + warn!("Failed to create VM: {}", err); + return; } - Err(e) => notify!("Error creating VM: {}", e), + }; + + if let Err(err) = vm.start() { + warn!("Failed to start VM: {}", err); } } + pub fn setup(self) -> VmSetup { + let arch_setup = arch::create_setup(&self); + VmSetup::new(self, arch_setup) + } + pub fn ram_size(&self) -> usize { self.ram_size } @@ -206,7 +224,13 @@ impl VmConfig { eprintln!("Realmfs image does not exist at {}", path.display()); process::exit(1); } - self.realmfs_images.push(RealmFSImage::new(path, OpenType::MemoryOverlay)); + match RealmFSImage::new(path, OpenType::MemoryOverlay) { + Ok(disk) => self.realmfs_images.push(disk), + Err(e) => { + warn!("Could not add disk: {}", e); + process::exit(1); + }, + }; } fn add_realm_by_name(&mut self, realm: &str) { @@ -217,7 +241,10 @@ impl VmConfig { self.add_realmfs_by_name(realmfs); self.home = realm.base_path().join("home").display().to_string(); self.realm_name = Some(realm.name().to_string()); - self.bridge_name = config.network_zone().to_string(); + self.bridge_name = format!("vz-{}", config.network_zone()); + if let Some(scheme) = config.terminal_scheme() { + self.colorscheme = scheme.to_string(); + } } } @@ -318,7 +345,6 @@ impl TerminalRestore { let _ = p.apply(&mut term); } } - } impl Drop for TerminalRestore { diff --git a/src/vm/error.rs b/src/vm/error.rs index c250d4a..0beca8c 100644 --- a/src/vm/error.rs +++ b/src/vm/error.rs @@ -1,199 +1,48 @@ use std::{result, io}; -use std::error; use std::fmt; -use std::str; -use std::ffi::CStr; -use libc; -use crate::disk; +use crate::{system, kvm, virtio}; use crate::system::netlink; +use crate::vm::arch; pub type Result = result::Result; #[derive(Debug)] -pub enum ErrorKind { - InvalidAddress(u64), - InvalidMappingOffset(usize), - RegisterMemoryFailed, - ReadKernelFailed, - Interrupted, - InvalidVring, - IoctlFailed(&'static str), - MissingRequiredExtension(u32), - OpenDeviceFailed, - CreateVmFailed, - BadVersion, - EventFdError, - DiskImageOpen(disk::Error), +pub enum Error { + CreateVmFailed(kvm::Error), + MappingFailed(system::Error), TerminalTermios(io::Error), IoError(io::Error), - MemoryManagerCreate, + ArchError(arch::Error), NetworkSetup(netlink::Error), + SetupBootFs(io::Error), + SetupVirtio(virtio::Error), } -impl ErrorKind { - fn as_str(&self) -> &'static str { - match *self { - ErrorKind::InvalidAddress(..) => "Invalid guest memory address", - ErrorKind::InvalidMappingOffset(..) => "Invalid memory mapping offset", - ErrorKind::RegisterMemoryFailed => "Failed to register memory region", - ErrorKind::ReadKernelFailed => "Failed to load kernel from disk", - ErrorKind::Interrupted => "System call interrupted", - ErrorKind::InvalidVring => "Invalid Vring", - ErrorKind::IoctlFailed(..) => "Ioctl failed", - ErrorKind::MissingRequiredExtension(..) => "kernel does not support requred kvm extension", - ErrorKind::OpenDeviceFailed => "could not open /dev/kvm", - ErrorKind::CreateVmFailed => "call to create vm failed", - ErrorKind::BadVersion => "unexpected kvm api version", - ErrorKind::EventFdError => "eventfd error", - ErrorKind::DiskImageOpen(_) => "failed to open disk image", - ErrorKind::TerminalTermios(_) => "failed termios", - ErrorKind::IoError(_) => "i/o error", - ErrorKind::MemoryManagerCreate => "memory manager", - ErrorKind::NetworkSetup(_) => "error setting up network", - } - } -} -impl fmt::Display for ErrorKind { +impl fmt::Display for Error { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - ErrorKind::InvalidAddress(addr) => write!(f, "{}: 0x{:x}", self.as_str(), addr), - ErrorKind::InvalidMappingOffset(offset) => write!(f, "{}: 0x{:x}", self.as_str(), offset), - ErrorKind::IoctlFailed(name) => write!(f, "Ioctl {} failed", name), - ErrorKind::DiskImageOpen(ref e) => write!(f, "failed to open disk image: {}", e), - ErrorKind::TerminalTermios(ref e) => write!(f, "error reading/restoring terminal state: {}", e), - ErrorKind::IoError(ref e) => write!(f, "i/o error: {}", e), - ErrorKind::MemoryManagerCreate => write!(f, "error creating memory manager"), - ErrorKind::NetworkSetup(ref e) => write!(f, "error setting up network: {}", e), - _ => write!(f, "{}", self.as_str()), + match self { + Error::TerminalTermios(e) => write!(f, "error reading/restoring terminal state: {}", e), + Error::IoError(e) => write!(f, "i/o error: {}", e), + Error::NetworkSetup(e) => write!(f, "error setting up network: {}", e), + Error::CreateVmFailed(e) => write!(f, "call to create vm failed: {}", e), + Error::MappingFailed(e) => write!(f, "memory mapping failed: {}", e), + Error::SetupBootFs(e) => write!(f, "setting up boot fs failed: {}", e), + Error::SetupVirtio(e) => write!(f, "setting up virtio devices failed: {}", e), + Error::ArchError(e) => e.fmt(f), } } } impl From for Error { fn from(err: io::Error) -> Error { - ErrorKind::IoError(err).into() + Error::IoError(err).into() } } -impl From for Error { - fn from(kind: ErrorKind) -> Error { - Error { repr: Repr::Simple(kind) } - } -} impl From for Error { fn from(err: netlink::Error) -> Error { - ErrorKind::NetworkSetup(err).into() + Error::NetworkSetup(err).into() } } - -enum Repr { - Errno(i32), - Simple(ErrorKind), - General(Box), -} - -#[derive(Debug)] -struct General { - kind: ErrorKind, - error: Box, -} - -#[derive(Debug)] -pub struct Error { - repr: Repr, -} - -impl Error { - pub fn new(kind: ErrorKind, error: E) -> Error - where E: Into> { - Self::_new(kind, error.into()) - } - - fn _new(kind: ErrorKind, error: Box) -> Error { - Error { - repr: Repr::General(Box::new(General{ - kind, error - })) - } - } - - pub fn from_last_errno() -> Error { - let errno = unsafe { *libc::__errno_location() }; - Error::from_errno(errno) - } - - pub fn from_errno(errno: i32) -> Error { - if errno == libc::EINTR { - Error { repr: Repr::Simple(ErrorKind::Interrupted) } - } else { - Error { repr: Repr::Errno(errno) } - } - } - - pub fn is_interrupted(&self) -> bool { - match self.repr { - Repr::Simple(ErrorKind::Interrupted) => true, - _ => false, - } - } -} - -fn error_string(errno: i32) -> String { - let mut buf = [0 as libc::c_char; 256]; - let p = buf.as_mut_ptr(); - unsafe { - if libc::strerror_r(errno as libc::c_int, p, buf.len()) < 0 { - panic!("strerror_r failed in error_string"); - } - let p = p as *const _; - str::from_utf8(CStr::from_ptr(p).to_bytes()).unwrap().to_owned() - } -} - -impl fmt::Debug for Repr { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match *self { - Repr::Errno(ref errno) => - f.debug_struct("Errno").field("errno", errno) - .field("message", &error_string(*errno)).finish(), - Repr::General(ref c) => f.debug_tuple("General").field(c).finish(), - Repr::Simple(ref kind) => f.debug_tuple("Kind").field(kind).finish(), - } - } -} - -impl fmt::Display for Error { - fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - match self.repr { - Repr::Errno(errno) => { - let detail = error_string(errno); - write!(f, "{} (errno: {})", detail, errno) - } - Repr::General(ref c) => { - write!(f, "{}: {}", c.kind, c.error) - }, - Repr::Simple(ref kind) => kind.fmt(f), - } - } -} - -impl error::Error for Error { - fn description(&self) -> &str { - match self.repr { - Repr::Errno(..) => "Errno Error", - Repr::Simple(ref kind) => kind.as_str(), - Repr::General(ref c) => c.error.description(), - } - } - - fn source(&self) -> Option<&(dyn error::Error + 'static)> { - match self.repr { - Repr::Errno(..) => None, - Repr::Simple(..) => None, - Repr::General(ref c) => c.error.source(), - } - } -} - diff --git a/src/vm/kernel_cmdline.rs b/src/vm/kernel_cmdline.rs index 69d8a97..e6eccb5 100644 --- a/src/vm/kernel_cmdline.rs +++ b/src/vm/kernel_cmdline.rs @@ -1,8 +1,6 @@ use std::ffi::OsString; use std::os::unix::ffi::OsStrExt; -use crate::memory::{GuestRam,KERNEL_CMDLINE_ADDRESS}; -use super::Result; fn add_defaults(cmdline: &mut KernelCmdLine) { @@ -30,13 +28,12 @@ fn add_defaults(cmdline: &mut KernelCmdLine) { pub struct KernelCmdLine { - address: u64, buffer: OsString, } impl KernelCmdLine { pub fn new() -> KernelCmdLine { - KernelCmdLine { address: KERNEL_CMDLINE_ADDRESS, buffer: OsString::new() } + KernelCmdLine { buffer: OsString::new() } } pub fn new_default() -> KernelCmdLine { @@ -61,19 +58,11 @@ impl KernelCmdLine { self.push(&format!("{}={}", var, val)) } - pub fn address(&self) -> u64 { - self.address - } - pub fn size(&self) -> usize { (&self.buffer).as_bytes().len() + 1 } - pub fn write_to_memory(&self, memory: &GuestRam) -> Result<()> { - let bs = self.buffer.as_bytes(); - let len = bs.len(); - memory.write_bytes(KERNEL_CMDLINE_ADDRESS, bs)?; - memory.write_int(KERNEL_CMDLINE_ADDRESS + len as u64, 0u8)?; - Ok(()) + pub fn as_bytes(&self) -> &[u8] { + self.buffer.as_bytes() } } diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 81913b2..84a3fb2 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -1,251 +1,19 @@ -use std::{thread, fs}; - -use self::io::IoDispatcher; - -use crate::virtio::VirtioBus; -use crate::devices; - -use crate::memory::{GuestRam, KVM_KERNEL_LOAD_ADDRESS, MemoryManager, SystemAllocator, AddressRange}; -use crate::kvm::*; - static KERNEL: &[u8] = include_bytes!("../../kernel/ph_linux"); static PHINIT: &[u8] = include_bytes!("../../ph-init/target/release/ph-init"); static SOMMELIER: &[u8] = include_bytes!("../../sommelier/sommelier"); +pub mod arch; mod run; pub mod io; mod setup; mod error; mod kernel_cmdline; mod config; + pub use config::VmConfig; +pub use setup::VmSetup; -pub use self::error::{Result,Error,ErrorKind}; +pub use self::error::{Result,Error}; +pub use arch::{ArchSetup,create_setup}; -use self::run::KvmRunArea; - -use self::kernel_cmdline::KernelCmdLine; -use std::sync::Arc; -use std::sync::atomic::AtomicBool; -use termios::Termios; -use crate::devices::SyntheticFS; -use crate::disk::DiskImage; -use crate::system::{NetlinkSocket, Tap}; - -pub struct Vm { - _config: VmConfig, - memory: MemoryManager, - io_dispatcher: Arc, - termios: Option, - _virtio: Arc, -} - -static REQUIRED_EXTENSIONS: &[u32] = &[ - KVM_CAP_IRQCHIP, - KVM_CAP_HLT, - KVM_CAP_USER_MEMORY, - KVM_CAP_SET_TSS_ADDR, - KVM_CAP_EXT_CPUID, - KVM_CAP_IRQ_ROUTING, - KVM_CAP_IRQ_INJECT_STATUS, - KVM_CAP_PIT2, - KVM_CAP_IOEVENTFD, -]; - -fn get_base_dev_pfn(mem_size: u64) -> u64 { - // Put device memory at a 2MB boundary after physical memory or 4gb, whichever is greater. - const MB: u64 = 1024 * 1024; - const GB: u64 = 1024 * MB; - let mem_size_round_2mb = (mem_size + 2 * MB - 1) / (2 * MB) * (2 * MB); - std::cmp::max(mem_size_round_2mb, 4 * GB) / 4096 -} - -impl Vm { - fn create_kvm() -> Result { - let kvm = Kvm::open(&REQUIRED_EXTENSIONS)?; - kvm.set_tss_addr(0xFFFbd000)?; - kvm.create_pit2()?; - kvm.create_irqchip()?; - Ok(kvm) - } - fn create_memory_manager(ram_size: usize, use_drm: bool) -> Result { - let kvm = Self::create_kvm()?; - let ram = GuestRam::new(ram_size, &kvm)?; - let dev_addr_start = get_base_dev_pfn(ram_size as u64) * 4096; - let dev_addr_size = u64::max_value() - dev_addr_start; - let allocator = SystemAllocator::new(AddressRange::new(dev_addr_start,dev_addr_size as usize)); - Ok(MemoryManager::new(kvm, ram, allocator, use_drm).map_err(|_| ErrorKind::MemoryManagerCreate)?) - } - - fn setup_virtio(config: &mut VmConfig, cmdline: &mut KernelCmdLine, virtio: &mut VirtioBus) -> Result<()> { - devices::VirtioSerial::create(virtio)?; - devices::VirtioRandom::create(virtio)?; - - if config.is_wayland_enabled() { - devices::VirtioWayland::create(virtio)?; - } - - let homedir = config.homedir(); - devices::VirtioP9::create(virtio, "home", homedir, false, false)?; - if homedir != "/home/user" && !config.is_realm() { - cmdline.push_set_val("phinit.home", homedir); - } - - let mut block_root = None; - - for mut disk in config.get_realmfs_images() { - disk.open().map_err(ErrorKind::DiskImageOpen)?; - if block_root == None { - block_root = Some(disk.read_only()); - } - devices::VirtioBlock::create(virtio, disk)?; - } - - for mut disk in config.get_raw_disk_images() { - disk.open().map_err(ErrorKind::DiskImageOpen)?; - if block_root == None { - block_root = Some(disk.read_only()); - } - devices::VirtioBlock::create(virtio, disk)?; - } - - if let Some(read_only) = block_root { - if !read_only { - cmdline.push("phinit.root_rw"); - } - cmdline.push("phinit.root=/dev/vda"); - cmdline.push("phinit.rootfstype=ext4"); - } else { - devices::VirtioP9::create(virtio, "9proot", "/", true, false)?; - cmdline.push_set_val("phinit.root", "9proot"); - cmdline.push_set_val("phinit.rootfstype", "9p"); - cmdline.push_set_val("phinit.rootflags", "trans=virtio"); - } - - Self::setup_synthetic_bootfs(cmdline, virtio)?; - if config.network() { - Self::setup_network(config, cmdline, virtio)?; - } - Ok(()) - } - - fn setup_synthetic_bootfs(cmdline: &mut KernelCmdLine, virtio: &mut VirtioBus) -> Result<()> { - let mut s = SyntheticFS::new(); - s.mkdirs(&["/tmp", "/proc", "/sys", "/dev", "/home/user", "/bin", "/etc"]); - - fs::write("/tmp/ph-init", PHINIT)?; - s.add_library_dependencies("/tmp/ph-init")?; - fs::remove_file("/tmp/ph-init")?; - - s.add_memory_file("/usr/bin", "ph-init", 0o755, PHINIT)?; - s.add_memory_file("/usr/bin", "sommelier", 0o755, SOMMELIER)?; - - s.add_file("/etc", "ld.so.cache", 0o644, "/etc/ld.so.cache"); - devices::VirtioP9::create_with_filesystem(s, virtio, "/dev/root", "/", false)?; - cmdline.push_set_val("init", "/usr/bin/ph-init"); - cmdline.push_set_val("root", "/dev/root"); - cmdline.push("ro"); - cmdline.push_set_val("rootfstype", "9p"); - cmdline.push_set_val("rootflags", "trans=virtio"); - Ok(()) - } - - fn setup_network(config: &VmConfig, cmdline: &mut KernelCmdLine, virtio: &mut VirtioBus) -> Result<()> { - let tap = Self::setup_tap(config.bridge())?; - devices::VirtioNet::create(virtio, tap)?; - cmdline.push("phinit.ip=172.17.0.22"); - Ok(()) - } - - fn setup_tap(bridge_name: &str) -> Result { - let tap = Tap::new_default()?; - let nl = NetlinkSocket::open()?; - - if !nl.interface_exists(bridge_name) { - nl.create_bridge(bridge_name)?; - nl.set_interface_up(bridge_name)?; - } - nl.add_interface_to_bridge(tap.name(), bridge_name)?; - nl.set_interface_up(tap.name())?; - Ok(tap) - } - - pub fn open(mut config: VmConfig) -> Result { - - let with_drm = config.is_wayland_enabled() && config.is_dmabuf_enabled(); - let mut memory = Self::create_memory_manager(config.ram_size(), with_drm)?; - - let mut cmdline = KernelCmdLine::new_default(); - - setup::kernel::load_pm_kernel(memory.guest_ram(), cmdline.address(), cmdline.size())?; - - let io_dispatch = IoDispatcher::new(); - - memory.kvm_mut().create_vcpus(config.ncpus())?; - - devices::rtc::Rtc::register(io_dispatch.clone()); - - if config.verbose() { - cmdline.push("earlyprintk=serial"); - devices::serial::SerialDevice::register(memory.kvm().clone(),io_dispatch.clone(), 0); - } else { - cmdline.push("quiet"); - } - if config.rootshell() { - cmdline.push("phinit.rootshell"); - } - - if memory.drm_available() && config.is_dmabuf_enabled() { - cmdline.push("phinit.virtwl_dmabuf"); - } - - if let Some(realm) = config.realm_name() { - cmdline.push_set_val("phinit.realm", realm); - } - - let saved= Termios::from_fd(0) - .map_err(ErrorKind::TerminalTermios)?; - let termios = Some(saved); - - let mut virtio = VirtioBus::new(memory.clone(), io_dispatch.clone(), memory.kvm().clone()); - Self::setup_virtio(&mut config, &mut cmdline, &mut virtio)?; - - if let Some(init_cmd) = config.get_init_cmdline() { - cmdline.push_set_val("init", init_cmd); - } - - cmdline.write_to_memory(memory.guest_ram())?; - - setup::mptable::setup_mptable(memory.guest_ram(), config.ncpus(), virtio.pci_irqs())?; - - Ok(Vm { - _config: config, - memory, - io_dispatcher: io_dispatch, - termios, - _virtio: Arc::new(virtio), - }) - } - - pub fn start(&self) -> Result<()> { - let shutdown = Arc::new(AtomicBool::new(false)); - let mut handles = Vec::new(); - for vcpu in self.memory.kvm().get_vcpus() { - setup::cpu::setup_protected_mode(&vcpu, KVM_KERNEL_LOAD_ADDRESS + 0x200, self.memory.guest_ram())?; - let mut run_area = KvmRunArea::new(vcpu, shutdown.clone(), self.io_dispatcher.clone())?; - let h = thread::spawn(move || run_area.run()); - handles.push(h); - } - - for h in handles { - h.join().expect("..."); - } - if let Some(termios) = self.termios { - let _ = termios::tcsetattr(0, termios::TCSANOW, &termios) - .map_err(ErrorKind::TerminalTermios)?; - } - Ok(()) - } -} - diff --git a/src/vm/run.rs b/src/vm/run.rs index bff9825..28fe4eb 100644 --- a/src/vm/run.rs +++ b/src/vm/run.rs @@ -5,6 +5,7 @@ use crate::memory::Mapping; use super::Result; use super::io::IoDispatcher; use std::sync::atomic::{AtomicBool, Ordering}; +use crate::vm::Error; const KVM_EXIT_UNKNOWN:u32 = 0; const KVM_EXIT_IO:u32 = 2; @@ -37,8 +38,8 @@ pub struct MmioExitData { impl KvmRunArea { pub fn new(vcpu: KvmVcpu, shutdown: Arc, io_dispatcher: Arc) -> Result { - let size = vcpu.get_vcpu_mmap_size()?; - let mapping = Mapping::new_from_fd(vcpu.raw_fd(), size)?; + let size = vcpu.get_vcpu_mmap_size().map_err(Error::CreateVmFailed)?; + let mapping = Mapping::new_from_fd(vcpu.raw_fd(), size).map_err(Error::MappingFailed)?; Ok(KvmRunArea{ vcpu, io: io_dispatcher, @@ -120,7 +121,6 @@ impl KvmRunArea { let sub = self.suberror(); println!("internal error: {}", sub); println!("{:?}", self.vcpu.get_regs().unwrap()); - println!("{:?}", self.vcpu.get_sregs().unwrap()); } n => { println!("unhandled exit: {}", n);}, } @@ -130,15 +130,6 @@ impl KvmRunArea { self.shutdown.store(true, Ordering::Relaxed); } - fn _handle_problem(&mut self) { - let regs = self.vcpu.get_regs().unwrap(); - let sregs = self.vcpu.get_sregs().unwrap(); - println!("REGS:\n{:?}", regs); - println!("SREGS:\n{:?}", sregs); - panic!(":("); - - } - fn handle_exit_io(&mut self) { let exit = self.get_io_exit(); if exit.dir_out { diff --git a/src/vm/setup.rs b/src/vm/setup.rs new file mode 100644 index 0000000..ebf1f39 --- /dev/null +++ b/src/vm/setup.rs @@ -0,0 +1,242 @@ +use crate::vm::{VmConfig, Result, Error, PHINIT, SOMMELIER}; +use crate::vm::arch::ArchSetup; +use crate::vm::kernel_cmdline::KernelCmdLine; +use crate::vm::io::IoDispatcher; +use crate::devices; +use termios::Termios; +use crate::virtio::VirtioBus; +use crate::virtio; +use crate::devices::SyntheticFS; +use std::{fs, thread}; +use crate::system::{Tap, NetlinkSocket}; +use crate::disk::DiskImage; +use crate::kvm::{KvmVcpu, Kvm}; +use std::sync::Arc; +use crate::memory::MemoryManager; +use std::sync::atomic::AtomicBool; +use crate::vm::run::KvmRunArea; + +pub struct Vm { + kvm: Kvm, + vcpus: Vec, + memory: MemoryManager, + io_dispatch: Arc, + termios: Option, +} + +impl Vm { + fn create(arch: &mut A) -> Result { + let kvm = arch.open_kvm() + .map_err(Error::ArchError)?; + let memory = arch.create_memory(&kvm) + .map_err(Error::ArchError)?; + Ok(Vm { + kvm, + memory, + vcpus: Vec::new(), + io_dispatch: IoDispatcher::new(), + termios: None, + }) + } + + pub fn start(&self) -> Result<()> { + let shutdown = Arc::new(AtomicBool::new(false)); + let mut handles = Vec::new(); + for vcpu in self.vcpus.clone() { + let mut run_area = KvmRunArea::new(vcpu, shutdown.clone(), self.io_dispatch.clone())?; + let h = thread::spawn(move || run_area.run()); + handles.push(h); + } + + for h in handles { + h.join().expect("..."); + } + if let Some(termios) = self.termios { + let _ = termios::tcsetattr(0, termios::TCSANOW, &termios) + .map_err(Error::TerminalTermios)?; + } + Ok(()) + + } +} + +pub struct VmSetup { + config: VmConfig, + cmdline: KernelCmdLine, + arch: T, +} + +impl VmSetup { + + pub fn new(config: VmConfig, arch: T) -> Self { + VmSetup { + config, + cmdline: KernelCmdLine::new_default(), + arch, + } + } + + pub fn create_vm(&mut self) -> Result { + let mut vm = Vm::create(&mut self.arch)?; + + devices::rtc::Rtc::register(vm.io_dispatch.clone()); + + if self.config.verbose() { + self.cmdline.push("earlyprintk=serial"); + devices::serial::SerialDevice::register(vm.kvm.clone(),vm.io_dispatch.clone(), 0); + } else { + self.cmdline.push("quiet"); + } + if self.config.rootshell() { + self.cmdline.push("phinit.rootshell"); + } + if vm.memory.drm_available() && self.config.is_dmabuf_enabled() { + self.cmdline.push("phinit.virtwl_dmabuf"); + } + + if let Some(realm) = self.config.realm_name() { + self.cmdline.push_set_val("phinit.realm", realm); + } + + let saved= Termios::from_fd(0) + .map_err(Error::TerminalTermios)?; + vm.termios = Some(saved); + + let mut virtio = VirtioBus::new(vm.memory.clone(), vm.io_dispatch.clone(), vm.kvm.clone()); + self.setup_synthetic_bootfs(&mut virtio)?; + self.setup_virtio(&mut virtio) + .map_err(Error::SetupVirtio)?; + + if let Some(init_cmd) = self.config.get_init_cmdline() { + self.cmdline.push_set_val("init", init_cmd); + } + + self.arch.setup_memory(&self.cmdline, &virtio.pci_irqs()) + .map_err(Error::ArchError)?; + + for id in 0..self.config.ncpus() { + let vcpu = vm.kvm.new_vcpu(id).map_err(Error::CreateVmFailed)?; + self.arch.setup_vcpu(&vcpu).map_err(Error::ArchError)?; + vm.vcpus.push(vcpu); + } + Ok(vm) + } + + fn setup_virtio(&mut self, virtio: &mut VirtioBus) -> virtio::Result<()> { + devices::VirtioSerial::create(virtio)?; + devices::VirtioRandom::create(virtio)?; + + if self.config.is_wayland_enabled() { + devices::VirtioWayland::create(virtio)?; + } + + let homedir = self.config.homedir(); + devices::VirtioP9::create(virtio, "home", homedir, false, false)?; + if homedir != "/home/user" && !self.config.is_realm() { + self.cmdline.push_set_val("phinit.home", homedir); + } + + let mut block_root = None; + + for disk in self.config.get_realmfs_images() { + if block_root == None { + block_root = Some(disk.read_only()); + } + devices::VirtioBlock::create(virtio, disk)?; + } + + for disk in self.config.get_raw_disk_images() { + if block_root == None { + block_root = Some(disk.read_only()); + } + devices::VirtioBlock::create(virtio, disk)?; + } + + if let Some(read_only) = block_root { + if !read_only { + self.cmdline.push("phinit.root_rw"); + } + self.cmdline.push("phinit.root=/dev/vda"); + self.cmdline.push("phinit.rootfstype=ext4"); + } else { + devices::VirtioP9::create(virtio, "9proot", "/", true, false)?; + self.cmdline.push_set_val("phinit.root", "9proot"); + self.cmdline.push_set_val("phinit.rootfstype", "9p"); + self.cmdline.push_set_val("phinit.rootflags", "trans=virtio"); + } + + if self.config.network() { + self.setup_network(virtio)?; + self.drop_privs(); + + } + Ok(()) + } + + fn drop_privs(&self) { + unsafe { + libc::setgid(1000); + libc::setuid(1000); + libc::setegid(1000); + libc::seteuid(1000); + } + + } + + fn setup_synthetic_bootfs(&mut self, virtio: &mut VirtioBus) -> Result<()> { + let bootfs = self.create_bootfs() + .map_err(Error::SetupBootFs)?; + + devices::VirtioP9::create_with_filesystem(bootfs, virtio, "/dev/root", "/", false) + .map_err(Error::SetupVirtio)?; + + self.cmdline.push_set_val("init", "/usr/bin/ph-init"); + self.cmdline.push_set_val("root", "/dev/root"); + self.cmdline.push("ro"); + self.cmdline.push_set_val("rootfstype", "9p"); + self.cmdline.push_set_val("rootflags", "trans=virtio"); + Ok(()) + } + + fn create_bootfs(&self) -> ::std::io::Result { + let mut s = SyntheticFS::new(); + s.mkdirs(&["/tmp", "/proc", "/sys", "/dev", "/home/user", "/bin", "/etc"]); + + fs::write("/tmp/ph-init", PHINIT)?; + s.add_library_dependencies("/tmp/ph-init")?; + fs::remove_file("/tmp/ph-init")?; + + s.add_memory_file("/usr/bin", "ph-init", 0o755, PHINIT)?; + s.add_memory_file("/usr/bin", "sommelier", 0o755, SOMMELIER)?; + + s.add_file("/etc", "ld.so.cache", 0o644, "/etc/ld.so.cache"); + Ok(s) + } + + fn setup_network(&mut self, virtio: &mut VirtioBus) -> virtio::Result<()> { + let tap = match self.setup_tap() { + Ok(tap) => tap, + Err(e) => { + warn!("failed to create tap device: {}", e); + return Ok(()); + } + }; + devices::VirtioNet::create(virtio, tap)?; + self.cmdline.push("phinit.ip=172.17.0.22"); + Ok(()) + } + + fn setup_tap(&self) -> Result { + let bridge_name = self.config.bridge(); + let tap = Tap::new_default()?; + let nl = NetlinkSocket::open()?; + + if !nl.interface_exists(bridge_name) { + nl.create_bridge(bridge_name)?; + nl.set_interface_up(bridge_name)?; + } + nl.add_interface_to_bridge(tap.name(), bridge_name)?; + nl.set_interface_up(tap.name())?; + Ok(tap) + } +} \ No newline at end of file diff --git a/src/vm/setup/cpu.rs b/src/vm/setup/cpu.rs deleted file mode 100644 index ff5206f..0000000 --- a/src/vm/setup/cpu.rs +++ /dev/null @@ -1,200 +0,0 @@ -use crate::vm::Result; - -use crate::kvm::{KvmVcpu,KvmRegs,KvmFpu, KvmMsrs, KvmSegment}; -use crate::memory::{GuestRam,KERNEL_ZERO_PAGE}; - - -const MSR_IA32_SYSENTER_CS: u32 = 0x00000174; -const MSR_IA32_SYSENTER_ESP: u32 = 0x00000175; -const MSR_IA32_SYSENTER_EIP: u32 = 0x00000176; -const MSR_STAR: u32 = 0xc0000081; -const MSR_LSTAR: u32 = 0xc0000082; -const MSR_CSTAR: u32 = 0xc0000083; -const MSR_SYSCALL_MASK: u32 = 0xc0000084; -const MSR_KERNEL_GS_BASE: u32 = 0xc0000102; -const MSR_IA32_TSC: u32 = 0x00000010; -const MSR_IA32_MISC_ENABLE: u32 = 0x000001a0; - -const MSR_IA32_MISC_ENABLE_FAST_STRING: u64 = 0x01; - - -const EBX_CLFLUSH_CACHELINE: u32 = 8; // Flush a cache line size. -const EBX_CLFLUSH_SIZE_SHIFT: u32 = 8; // Bytes flushed when executing CLFLUSH. -const _EBX_CPU_COUNT_SHIFT: u32 = 16; // Index of this CPU. -const EBX_CPUID_SHIFT: u32 = 24; // Index of this CPU. -const _ECX_EPB_SHIFT: u32 = 3; // "Energy Performance Bias" bit. -const _ECX_HYPERVISOR_SHIFT: u32 = 31; // Flag to be set when the cpu is running on a hypervisor. -const _EDX_HTT_SHIFT: u32 = 28; // Hyper Threading Enabled. - -fn setup_cpuid(vcpu: &KvmVcpu) -> Result<()> { - let mut cpuid = vcpu.get_supported_cpuid()?; - let cpu_id = 0u32; // first vcpu - - for e in &mut cpuid { - match e.function { - 0 => { - e.ebx = 0x67627553; - e.ecx = 0x20487020; - e.edx = 0x68706172; - } - 1 => { - if e.index == 0 { - e.ecx |= 1<<31; - } - e.ebx = (cpu_id << EBX_CPUID_SHIFT) as u32 | - (EBX_CLFLUSH_CACHELINE << EBX_CLFLUSH_SIZE_SHIFT); - /* - if cpu_count > 1 { - entry.ebx |= (cpu_count as u32) << EBX_CPU_COUNT_SHIFT; - entry.edx |= 1 << EDX_HTT_SHIFT; - } - */ - } - 6 => { - e.ecx &= !(1<<3); - - } - 10 => { - if e.eax > 0 { - let version = e.eax & 0xFF; - let ncounters = (e.eax >> 8) & 0xFF; - if version != 2 || ncounters == 0 { - e.eax = 0; - } - } - - } - _ => {} - } - } - vcpu.set_cpuid2(cpuid)?; - Ok(()) -} - -fn setup_fpu(vcpu: &KvmVcpu) -> Result<()> { - let mut fpu = KvmFpu::new(); - fpu.fcw = 0x37f; - fpu.mxcsr = 0x1f80; - vcpu.set_fpu(&fpu)?; - Ok(()) -} - -fn setup_msrs(vcpu: &KvmVcpu) -> Result<()> { - let mut msrs = KvmMsrs::new(); - msrs.add(MSR_IA32_SYSENTER_CS, 0); - msrs.add(MSR_IA32_SYSENTER_ESP, 0); - msrs.add(MSR_IA32_SYSENTER_EIP, 0); - msrs.add(MSR_STAR, 0); - msrs.add(MSR_CSTAR, 0); - msrs.add(MSR_KERNEL_GS_BASE, 0); - msrs.add(MSR_SYSCALL_MASK, 0); - msrs.add(MSR_LSTAR, 0); - msrs.add(MSR_IA32_TSC, 0); - msrs.add(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_FAST_STRING); - vcpu.set_msrs(&msrs)?; - Ok(()) -} - - -pub fn gdt_entry(flags: u16, base: u32, limit: u32) -> u64 { - ((((base as u64) & 0xff000000u64) << (56 - 24)) | (((flags as u64) & 0x0000f0ffu64) << 40) | - (((limit as u64) & 0x000f0000u64) << (48 - 16)) | - (((base as u64) & 0x00ffffffu64) << 16) | ((limit as u64) & 0x0000ffffu64)) -} -const BOOT_GDT_OFFSET: usize = 0x500; -const BOOT_IDT_OFFSET: usize = 0x520; - -const BOOT_STACK: u64 = 0x8000; -const BOOT_PML4: u64 = 0x9000; -const BOOT_PDPTE: u64 = 0xA000; -const BOOT_PDE: u64 = 0xB000; - - -const X86_CR0_PE: u64 = 0x1; -const X86_CR0_PG: u64 = 0x80000000; -const X86_CR4_PAE: u64 = 0x20; - -const EFER_LME: u64 = 0x100; -const EFER_LMA: u64 = (1 << 10); - -fn setup_boot_pagetables(memory: &GuestRam) -> Result<()> { - memory.write_int::(BOOT_PML4, BOOT_PDPTE | 0x3)?; - memory.write_int::(BOOT_PDPTE, BOOT_PDE | 0x3)?; - for i in 0..512_u64 { - let entry = (i << 21) | 0x83; - memory.write_int::(BOOT_PDE + (i * 8), entry)?; - } - Ok(()) -} - -fn write_gdt_table(table: &[u64], memory: &GuestRam) -> Result<()> { - for i in 0..table.len() { - memory.write_int((BOOT_GDT_OFFSET + i * 8) as u64, table[i])?; - } - Ok(()) -} - -pub fn setup_pm_sregs(vcpu: &KvmVcpu, memory: &GuestRam) -> Result<()> { - let table = [ - gdt_entry(0,0,0), - gdt_entry(0xa09b,0,0xfffff), - gdt_entry(0xc093,0,0xfffff), - gdt_entry(0x808b,0,0xfffff), - ]; - write_gdt_table(&table, memory)?; - - memory.write_int::(BOOT_IDT_OFFSET as u64, 0u64)?; - - let code = KvmSegment::new(0, 0xfffff, 1 * 8, 0xa09b); - let data = KvmSegment::new(0, 0xfffff, 2 * 8, 0xc093); - let tss = KvmSegment::new(0, 0xfffff, 3 * 8, 0x808b); - - let mut regs = vcpu.get_sregs()?; - - regs.gdt.base = BOOT_GDT_OFFSET as u64; - regs.gdt.limit = 32 - 1; - - regs.itd.base = BOOT_IDT_OFFSET as u64; - regs.itd.limit = 8 - 1; - - regs.cs = code; - regs.ds = data; - regs.es = data; - regs.fs = data; - regs.gs = data; - regs.ss = data; - regs.tr = tss; - - // protected mode - regs.cr0 |= X86_CR0_PE; - regs.efer |= EFER_LME; - - setup_boot_pagetables(&memory)?; - regs.cr3 = BOOT_PML4; - regs.cr4 |= X86_CR4_PAE; - regs.cr0 |= X86_CR0_PG; - regs.efer |= EFER_LMA; - - vcpu.set_sregs(®s)?; - Ok(()) -} - -pub fn setup_pm_regs(vcpu: &KvmVcpu, kernel_entry: u64) -> Result<()> { - let mut regs = KvmRegs::new(); - regs.rflags = 0x0000000000000002; - regs.rip = kernel_entry; - regs.rsp = BOOT_STACK; - regs.rbp = BOOT_STACK; - regs.rsi = KERNEL_ZERO_PAGE; - vcpu.set_regs(®s)?; - Ok(()) -} - -pub fn setup_protected_mode(vcpu: &KvmVcpu, kernel_entry: u64, memory: &GuestRam) -> Result<()> { - setup_cpuid(&vcpu)?; - setup_pm_sregs(&vcpu, memory)?; - setup_pm_regs(&vcpu, kernel_entry)?; - setup_fpu(&vcpu)?; - setup_msrs(&vcpu)?; - Ok(()) -} diff --git a/src/vm/setup/kernel.rs b/src/vm/setup/kernel.rs deleted file mode 100644 index db3bfe6..0000000 --- a/src/vm/setup/kernel.rs +++ /dev/null @@ -1,116 +0,0 @@ - -use std::io::{self, Read, SeekFrom, Seek, Cursor}; -use byteorder::{LittleEndian,ReadBytesExt}; - -use crate::memory::{self,GuestRam,KERNEL_ZERO_PAGE}; -use crate::vm::{Result, Error, ErrorKind, KERNEL}; - - -// Documentation/x86/boot.txt - -const HDR_BOOT_FLAG: u64 = 0x1fe; // u16 -const HDR_HEADER: u64 = 0x202; // u32 -const HDR_TYPE_LOADER: u64 = 0x210; // u8 -const HDR_CMDLINE_PTR: u64 = 0x228; // u32 -const HDR_CMDLINE_SIZE: u64 = 0x238; // u32 -const HDR_KERNEL_ALIGNMENT: u64 = 0x230; // u32 - -// Documentation/x86/zero-page.txt - -const BOOT_PARAM_E820_ENTRIES: u64 = 0x1e8; -const BOOT_PARAM_E820_MAP: u64 = 0x2d0; - -const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55; -const EBDA_START: u64 = 0x0009fc00; -const KERNEL_HDR_MAGIC: u32 = 0x53726448; -const KERNEL_LOADER_OTHER: u8 = 0xff; -const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x1000000; - -const E820_RAM: u32 = 1; - -fn setup_e820(memory: &GuestRam, base: u64) -> Result<()> { - let ram_size = memory.ram_size() as u64; - - let mut e820_ranges = Vec::new(); - e820_ranges.push((0u64, EBDA_START)); - - if ram_size < memory::PCI_MMIO_RESERVED_BASE { - e820_ranges.push((memory::KVM_KERNEL_LOAD_ADDRESS, ram_size - memory::KVM_KERNEL_LOAD_ADDRESS)); - } else { - e820_ranges.push((memory::KVM_KERNEL_LOAD_ADDRESS, memory::PCI_MMIO_RESERVED_BASE - memory::KVM_KERNEL_LOAD_ADDRESS)); - e820_ranges.push((memory::HIMEM_BASE, ram_size - memory::HIMEM_BASE)); - } - memory.write_int::(base + BOOT_PARAM_E820_ENTRIES, e820_ranges.len() as u8)?; - for i in 0..e820_ranges.len() { - let entry_base = base + BOOT_PARAM_E820_MAP + (i as u64 * 20); - memory.write_int::(entry_base, e820_ranges[i].0)?; - memory.write_int::(entry_base + 8, e820_ranges[i].1)?; - memory.write_int::(entry_base + 16, E820_RAM)?; - } - Ok(()) -} - -fn setup_zero_page(memory: &GuestRam, cmdline_addr: u64, cmdline_size: usize) -> Result<()> { - let base = KERNEL_ZERO_PAGE; - memory.write_int::(base + HDR_BOOT_FLAG, KERNEL_BOOT_FLAG_MAGIC)?; - memory.write_int::(base + HDR_HEADER, KERNEL_HDR_MAGIC)?; - memory.write_int::(base + HDR_TYPE_LOADER, KERNEL_LOADER_OTHER)?; - memory.write_int::(base + HDR_CMDLINE_PTR, cmdline_addr as u32)?; - memory.write_int::(base + HDR_CMDLINE_SIZE, cmdline_size as u32)?; - memory.write_int::(base + HDR_KERNEL_ALIGNMENT, KERNEL_MIN_ALIGNMENT_BYTES)?; - - setup_e820(memory, base) -} - -pub fn load_pm_kernel(memory: &GuestRam, cmdline_addr: u64, cmdline_size: usize) -> Result<()> { - load_elf_kernel(memory).map_err(|_| Error::from(ErrorKind::ReadKernelFailed))?; - setup_zero_page(memory, cmdline_addr, cmdline_size) -} - -pub fn load_elf_kernel(memory: &GuestRam) -> io::Result<()> { - let mut f = Cursor::new(KERNEL); - f.seek(SeekFrom::Start(32))?; - let phoff = f.read_u64::()?; - f.seek(SeekFrom::Current(16))?; - let phnum = f.read_u16::()?; - f.seek(SeekFrom::Start(phoff))?; - let mut v = Vec::new(); - for _ in 0..phnum { - let hdr = load_phdr(&mut f)?; - if hdr.p_type == 1 { - v.push(hdr); - } - } - - for h in v { - f.seek(SeekFrom::Start(h.p_offset))?; - let slice = memory.mut_slice(memory::KVM_KERNEL_LOAD_ADDRESS + h.p_paddr, h.p_filesz as usize).unwrap(); - f.read_exact(slice)?; - } - Ok(()) -} - -fn load_phdr(r: &mut R) -> io::Result { - let mut phdr: ElfPhdr = Default::default(); - phdr.p_type = r.read_u32::()?; - phdr.p_flags = r.read_u32::()?; - phdr.p_offset = r.read_u64::()?; - phdr.p_vaddr = r.read_u64::()?; - phdr.p_paddr = r.read_u64::()?; - phdr.p_filesz = r.read_u64::()?; - phdr.p_memsz = r.read_u64::()?; - phdr.p_align = r.read_u64::()?; - Ok(phdr) -} - -#[derive(Default,Debug)] -struct ElfPhdr { - pub p_type: u32, - pub p_flags: u32, - pub p_offset: u64, - pub p_vaddr: u64, - pub p_paddr: u64, - pub p_filesz: u64, - pub p_memsz: u64, - pub p_align: u64, -} \ No newline at end of file diff --git a/src/vm/setup/mod.rs b/src/vm/setup/mod.rs deleted file mode 100644 index 39f40e8..0000000 --- a/src/vm/setup/mod.rs +++ /dev/null @@ -1,3 +0,0 @@ -pub mod cpu; -pub mod kernel; -pub mod mptable; \ No newline at end of file