Large refactor.

Many many changes. The major themes of the refactor were to move the
x86 specific code into a separate 'arch' package and make the main
initialization and run loop much simpler to understand. The second big
change was to improve how errors are handled by making them more 'local'
so that packages define their own errors most of the time.
This commit is contained in:
Bruce Leidl 2019-10-02 16:41:02 -04:00
parent d24c5d9ea6
commit 7f3b3aa409
66 changed files with 2683 additions and 1986 deletions

29
Cargo.lock generated
View File

@ -5,6 +5,11 @@ name = "adler32"
version = "1.0.4" version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "arc-swap"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]] [[package]]
name = "backtrace" name = "backtrace"
version = "0.3.37" version = "0.3.37"
@ -201,13 +206,14 @@ dependencies = [
] ]
[[package]] [[package]]
name = "pH" name = "ph"
version = "0.1.0" version = "0.1.0"
dependencies = [ dependencies = [
"byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
"lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)",
"libcitadel 0.1.0 (git+https://github.com/brl/citadel-tools?rev=44d5ce660f1f5cf8a3ad1060b143926a99be5148)", "libcitadel 0.1.0 (git+https://github.com/brl/citadel-tools?rev=44d5ce660f1f5cf8a3ad1060b143926a99be5148)",
"signal-hook 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)",
"termios 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", "termios 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
@ -286,6 +292,24 @@ dependencies = [
"syn 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", "syn 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)",
] ]
[[package]]
name = "signal-hook"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)",
"signal-hook-registry 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "signal-hook-registry"
version = "1.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"arc-swap 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.62 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]] [[package]]
name = "sodiumoxide" name = "sodiumoxide"
version = "0.2.4" version = "0.2.4"
@ -426,6 +450,7 @@ dependencies = [
[metadata] [metadata]
"checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2" "checksum adler32 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "5d2e7343e7fc9de883d1b0341e0b13970f764c14101234857d2ddafa1cb1cac2"
"checksum arc-swap 0.4.3 (registry+https://github.com/rust-lang/crates.io-index)" = "f1a1eca3195b729bbd64e292ef2f5fff6b1c28504fed762ce2b1013dde4d8e92"
"checksum backtrace 0.3.37 (registry+https://github.com/rust-lang/crates.io-index)" = "5180c5a20655b14a819b652fd2378fa5f1697b6c9ddad3e695c2f9cedf6df4e2" "checksum backtrace 0.3.37 (registry+https://github.com/rust-lang/crates.io-index)" = "5180c5a20655b14a819b652fd2378fa5f1697b6c9ddad3e695c2f9cedf6df4e2"
"checksum backtrace-sys 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)" = "82a830b4ef2d1124a711c71d263c5abdc710ef8e907bd508c88be475cebc422b" "checksum backtrace-sys 0.1.31 (registry+https://github.com/rust-lang/crates.io-index)" = "82a830b4ef2d1124a711c71d263c5abdc710ef8e907bd508c88be475cebc422b"
"checksum bincode 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9f2fb9e29e72fd6bc12071533d5dc7664cb01480c59406f656d7ac25c7bd8ff7" "checksum bincode 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "9f2fb9e29e72fd6bc12071533d5dc7664cb01480c59406f656d7ac25c7bd8ff7"
@ -459,6 +484,8 @@ dependencies = [
"checksum same-file 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "585e8ddcedc187886a30fa705c47985c3fa88d06624095856b36ca0b82ff4421" "checksum same-file 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "585e8ddcedc187886a30fa705c47985c3fa88d06624095856b36ca0b82ff4421"
"checksum serde 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)" = "9796c9b7ba2ffe7a9ce53c2287dfc48080f4b2b362fcc245a259b3a7201119dd" "checksum serde 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)" = "9796c9b7ba2ffe7a9ce53c2287dfc48080f4b2b362fcc245a259b3a7201119dd"
"checksum serde_derive 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)" = "4b133a43a1ecd55d4086bd5b4dc6c1751c68b1bfbeba7a5040442022c7e7c02e" "checksum serde_derive 1.0.101 (registry+https://github.com/rust-lang/crates.io-index)" = "4b133a43a1ecd55d4086bd5b4dc6c1751c68b1bfbeba7a5040442022c7e7c02e"
"checksum signal-hook 0.1.10 (registry+https://github.com/rust-lang/crates.io-index)" = "4f61c4d59f3aaa9f61bba6450a9b80ba48362fd7d651689e7a10c453b1f6dc68"
"checksum signal-hook-registry 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "1797d48f38f91643908bb14e35e79928f9f4b3cefb2420a564dde0991b4358dc"
"checksum sodiumoxide 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1c99301d6d59f205d2abc4f0bd168edb5cd8a42bdbe99539773a21360d2a8ff7" "checksum sodiumoxide 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "1c99301d6d59f205d2abc4f0bd168edb5cd8a42bdbe99539773a21360d2a8ff7"
"checksum syn 0.15.44 (registry+https://github.com/rust-lang/crates.io-index)" = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5" "checksum syn 0.15.44 (registry+https://github.com/rust-lang/crates.io-index)" = "9ca4b3b69a77cbe1ffc9e198781b7acb0c7365a883670e8f1c1bc66fba79a5c5"
"checksum syn 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "66850e97125af79138385e9b88339cbcd037e3f28ceab8c5ad98e64f0f1f80bf" "checksum syn 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "66850e97125af79138385e9b88339cbcd037e3f28ceab8c5ad98e64f0f1f80bf"

View File

@ -1,5 +1,5 @@
[package] [package]
name = "pH" name = "ph"
version = "0.1.0" version = "0.1.0"
authors = ["Bruce Leidl <bruce@subgraph.com>"] authors = ["Bruce Leidl <bruce@subgraph.com>"]
edition = "2018" edition = "2018"
@ -9,4 +9,5 @@ byteorder="1.0.0"
libc = "*" libc = "*"
termios = "0.2.2" termios = "0.2.2"
lazy_static = "1.4.0" lazy_static = "1.4.0"
signal-hook = "0.1.10"
libcitadel = { git = "https://github.com/brl/citadel-tools", rev="44d5ce660f1f5cf8a3ad1060b143926a99be5148" } libcitadel = { git = "https://github.com/brl/citadel-tools", rev="44d5ce660f1f5cf8a3ad1060b143926a99be5148" }

9
src/bin/pH.rs Normal file
View File

@ -0,0 +1,9 @@
#![allow(non_snake_case)]
use ph::VmConfig;
fn main() {
VmConfig::new()
.ram_size_megs(2048)
.boot();
}

View File

@ -4,8 +4,7 @@ use std::thread;
use std::path::{PathBuf, Path}; use std::path::{PathBuf, Path};
use crate::memory::{GuestRam, MemoryManager}; use crate::memory::{GuestRam, MemoryManager};
use crate::virtio::{self,VirtioBus,VirtioDeviceOps, VirtQueue}; use crate::virtio::{self,VirtioBus,VirtioDeviceOps, VirtQueue, Result};
use crate::vm::Result;
use crate::devices::virtio_9p::server::Server; use crate::devices::virtio_9p::server::Server;
use crate::devices::virtio_9p::filesystem::{FileSystem, FileSystemOps}; use crate::devices::virtio_9p::filesystem::{FileSystem, FileSystemOps};
use self::pdu::PduParser; use self::pdu::PduParser;

View File

@ -664,7 +664,7 @@ impl <T: FileSystemOps> Server<T> {
if n == 0 { if n == 0 {
break; break;
} }
pp.chain.inc_offset(n, true); pp.chain.inc_write_offset(n);
nread += n as u32; nread += n as u32;
} }
pp.w32_at(0, nread as u32); pp.w32_at(0, nread as u32);
@ -692,7 +692,7 @@ impl <T: FileSystemOps> Server<T> {
if n == 0 { if n == 0 {
break; break;
} }
pp.chain.inc_offset(n, false); pp.chain.inc_read_offset(n);
nread += n as u32; nread += n as u32;
} }
pp.read_done()?; pp.read_done()?;

View File

@ -1,46 +1,43 @@
use crate::{vm, disk};
use crate::virtio::{VirtioBus, VirtioDeviceOps, VirtQueue, DeviceConfigArea, Chain};
use std::sync::{RwLock, Arc};
use crate::memory::MemoryManager;
use std::{result, io, fmt, thread};
use crate::devices::virtio_block::Error::IoChainError;
use std::io::Write; use std::io::Write;
use std::sync::{RwLock, Arc};
use std::{result, io, fmt, thread};
use crate::{disk, virtio};
use crate::virtio::{VirtioBus, VirtioDeviceOps, VirtQueue, DeviceConfigArea, Chain};
use crate::memory::MemoryManager;
use crate::disk::DiskImage; use crate::disk::DiskImage;
const VIRTIO_BLK_F_RO: u64 = (1 << 5); const VIRTIO_BLK_F_RO: u64 = (1 << 5);
//const VIRTIO_BLK_F_BLK_SIZE: u64 = (1 << 6); const VIRTIO_BLK_F_BLK_SIZE: u64 = (1 << 6);
const VIRTIO_BLK_F_FLUSH: u64 = (1 << 9); const VIRTIO_BLK_F_FLUSH: u64 = (1 << 9);
//const VIRTIO_BLK_F_DISCARD: u64 = (1 << 13); const VIRTIO_BLK_F_SEG_MAX: u64 = (1 << 2);
//const VIRTIO_BLK_F_WRITE_ZEROES: u64 = (1 << 14);
const VIRTIO_BLK_T_IN: u32 = 0; const VIRTIO_BLK_T_IN: u32 = 0;
const VIRTIO_BLK_T_OUT: u32 = 1; const VIRTIO_BLK_T_OUT: u32 = 1;
const VIRTIO_BLK_T_FLUSH: u32 = 4; const VIRTIO_BLK_T_FLUSH: u32 = 4;
const VIRTIO_BLK_T_GET_ID: u32 = 8; const VIRTIO_BLK_T_GET_ID: u32 = 8;
//const VIRTIO_BLK_T_DISCARD: u32 = 11;
//const VIRTIO_BLK_T_WRITE_ZEROES: u32 = 13;
const VIRTIO_BLK_S_OK: u8 = 0; const VIRTIO_BLK_S_OK: u8 = 0;
const VIRTIO_BLK_S_IOERR: u8 = 1; const VIRTIO_BLK_S_IOERR: u8 = 1;
const VIRTIO_BLK_S_UNSUPP: u8 = 2; const VIRTIO_BLK_S_UNSUPP: u8 = 2;
const SECTOR_SIZE: usize = 512; const SECTOR_SHIFT: usize = 9;
const SECTOR_SIZE: usize = 1 << SECTOR_SHIFT;
const QUEUE_SIZE: usize = 256;
// TODO:
// - feature bits
// - disk image write overlay
// - better error handling for i/o
enum Error { enum Error {
IoChainError(io::Error), IoChainError(io::Error),
DiskRead(disk::Error), DiskRead(disk::Error),
DiskWrite(disk::Error), DiskWrite(disk::Error),
DiskFlush(disk::Error), DiskFlush(disk::Error),
VirtQueueWait(vm::Error), VirtQueueWait(virtio::Error),
InvalidReadDescriptor(usize),
} }
impl From<io::Error> for Error { impl From<io::Error> for Error {
fn from(e: io::Error) -> Self { fn from(e: io::Error) -> Self {
IoChainError(e) Error::IoChainError(e)
} }
} }
@ -53,6 +50,7 @@ impl fmt::Display for Error {
DiskWrite(e) => write!(f, "error writing disk image: {}", e), DiskWrite(e) => write!(f, "error writing disk image: {}", e),
DiskFlush(e) => write!(f, "error flushing disk image: {}", e), DiskFlush(e) => write!(f, "error flushing disk image: {}", e),
VirtQueueWait(e) =>write!(f, "error waiting on virtqueue: {}", e), VirtQueueWait(e) =>write!(f, "error waiting on virtqueue: {}", e),
InvalidReadDescriptor(sz) => write!(f, "virtqueue read descriptor size ({}) is invalid. Not a multiple of sector size", sz),
} }
} }
} }
@ -64,11 +62,20 @@ pub struct VirtioBlock<D: DiskImage+'static> {
enabled_features: u64, enabled_features: u64,
} }
const HEADER_SIZE: usize = 16;
const VIRTIO_ID_BLOCK: u16 = 2; const VIRTIO_ID_BLOCK: u16 = 2;
const CAPACITY_OFFSET: usize = 0;
const SEG_MAX_OFFSET: usize = 12;
const BLK_SIZE_OFFSET: usize = 20;
const CONFIG_SIZE: usize = 24;
impl <D: DiskImage + 'static> VirtioBlock<D> { impl <D: DiskImage + 'static> VirtioBlock<D> {
pub fn new(disk_image: D) -> Self {
let mut config = DeviceConfigArea::new(8); fn new(disk_image: D) -> Self {
config.write_u64(0, disk_image.sector_count()); let mut config = DeviceConfigArea::new(CONFIG_SIZE);
config.write_u64(CAPACITY_OFFSET, disk_image.sector_count());
config.write_u32(SEG_MAX_OFFSET, QUEUE_SIZE as u32 - 2);
config.write_u32(BLK_SIZE_OFFSET, 1024);
VirtioBlock { VirtioBlock {
disk_image: Some(disk_image), disk_image: Some(disk_image),
config, config,
@ -76,18 +83,21 @@ impl <D: DiskImage + 'static> VirtioBlock<D> {
} }
} }
pub fn create(vbus: &mut VirtioBus, disk_image: D) -> vm::Result<()> { pub fn create(vbus: &mut VirtioBus, disk_image: D) -> virtio::Result<()> {
let feature_bits = if disk_image.read_only() { let feature_bits = VIRTIO_BLK_F_FLUSH |
VIRTIO_BLK_F_FLUSH|VIRTIO_BLK_F_RO VIRTIO_BLK_F_BLK_SIZE |
} else { VIRTIO_BLK_F_SEG_MAX |
VIRTIO_BLK_F_FLUSH if disk_image.read_only() {
}; VIRTIO_BLK_F_RO
} else {
0
};
let dev = Arc::new(RwLock::new(VirtioBlock::new(disk_image))); let dev = Arc::new(RwLock::new(VirtioBlock::new(disk_image)));
vbus.new_virtio_device(VIRTIO_ID_BLOCK, dev) vbus.new_virtio_device(VIRTIO_ID_BLOCK, dev)
.set_queue_sizes(&[256]) .set_queue_sizes(&[QUEUE_SIZE])
.set_config_size(8) .set_config_size(CONFIG_SIZE)
.set_features(feature_bits) .set_features(feature_bits)
.register() .register()
} }
@ -109,20 +119,18 @@ impl <D: DiskImage> VirtioDeviceOps for VirtioBlock<D> {
fn start(&mut self, _: &MemoryManager, mut queues: Vec<VirtQueue>) { fn start(&mut self, _: &MemoryManager, mut queues: Vec<VirtQueue>) {
let vq = queues.pop().unwrap(); let vq = queues.pop().unwrap();
let mut dev = match self.disk_image.take() {
Some(d) => VirtioBlockDevice::new(vq, d),
None => {
warn!("Unable to start virtio-block device. Already started?");
return;
}
};
let mut disk = self.disk_image.take().expect("No disk image?");
if let Err(err) = disk.open() {
warn!("Unable to start virtio-block device: {}", err);
return;
}
let mut dev = VirtioBlockDevice::new(vq, disk);
thread::spawn(move || { thread::spawn(move || {
if let Err(err) = dev.run() { if let Err(err) = dev.run() {
warn!("Error running virtio block device: {}", err); warn!("Error running virtio block device: {}", err);
} }
}); });
} }
} }
@ -138,29 +146,31 @@ impl <D: DiskImage> VirtioBlockDevice<D> {
fn run(&mut self) -> Result<()> { fn run(&mut self) -> Result<()> {
loop { loop {
let chain = self.vq.wait_next_chain() let mut chain = self.vq.wait_next_chain()
.map_err(Error::VirtQueueWait)?; .map_err(Error::VirtQueueWait)?;
match MessageHandler::read_header(&mut self.disk, chain) { while chain.remaining_read() >= HEADER_SIZE {
Ok(mut handler) => handler.process_message(), match MessageHandler::read_header(&mut self.disk, &mut chain) {
Err(e) => { Ok(mut handler) => handler.process_message(),
warn!("Error handling virtio_block message: {}", e); Err(e) => {
warn!("Error handling virtio_block message: {}", e);
}
} }
} }
} }
} }
} }
struct MessageHandler<'a, D: DiskImage> { struct MessageHandler<'a,'b, D: DiskImage> {
disk: &'a mut D, disk: &'a mut D,
chain: Chain, chain: &'b mut Chain,
msg_type: u32, msg_type: u32,
sector: u64, sector: u64,
} }
impl <'a, D: DiskImage> MessageHandler<'a, D> { impl <'a,'b, D: DiskImage> MessageHandler<'a,'b, D> {
fn read_header(disk: &'a mut D, mut chain: Chain) -> Result<Self> { fn read_header(disk: &'a mut D, chain: &'b mut Chain) -> Result<Self> {
let msg_type = chain.r32()?; let msg_type = chain.r32()?;
let _ = chain.r32()?; let _ = chain.r32()?;
let sector = chain.r64()?; let sector = chain.r64()?;
@ -192,30 +202,39 @@ impl <'a, D: DiskImage> MessageHandler<'a, D> {
} }
} }
fn sector_round(sz: usize) -> usize {
(sz / SECTOR_SIZE) * SECTOR_SIZE
}
fn handle_io_in(&mut self) -> Result<()> { fn handle_io_in(&mut self) -> Result<()> {
let current = self.chain.current_write_slice(); loop {
let len = Self::sector_round(current.len()); let current = self.chain.current_write_slice();
let buffer = &mut current[..len]; let nsectors = current.len() >> SECTOR_SHIFT;
if nsectors == 0 {
return Ok(())
}
let len = nsectors << SECTOR_SHIFT;
let buffer = &mut current[..len];
self.disk.read_sectors(self.sector, buffer) self.disk.read_sectors(self.sector, buffer)
.map_err(Error::DiskRead)?; .map_err(Error::DiskRead)?;
self.chain.inc_offset(len, true); self.chain.inc_write_offset(len);
Ok(()) self.sector += nsectors as u64;
}
} }
fn handle_io_out(&mut self) -> Result<()> { fn handle_io_out(&mut self) -> Result<()> {
let current = self.chain.current_read_slice(); loop {
let len = Self::sector_round(current.len()); let current = self.chain.current_read_slice();
let buffer = &current[..len]; if current.len() & (SECTOR_SIZE-1) != 0 {
return Err(Error::InvalidReadDescriptor(current.len()));
}
let nsectors = current.len() >> SECTOR_SHIFT;
if nsectors == 0 {
return Ok(())
}
self.disk.write_sectors(self.sector, current)
.map_err(Error::DiskWrite)?;
self.disk.write_sectors(self.sector, buffer) self.chain.inc_read_offset(nsectors << SECTOR_SHIFT);
.map_err(Error::DiskWrite)?; self.sector += nsectors as u64;
self.chain.inc_offset(len, false); }
Ok(())
} }
fn handle_io_flush(&mut self) -> Result<()> { fn handle_io_flush(&mut self) -> Result<()> {

View File

@ -1,6 +1,6 @@
use crate::virtio::{VirtioDeviceOps, VirtQueue, VirtioBus}; use crate::virtio::{VirtioDeviceOps, VirtQueue, VirtioBus, Chain};
use crate::memory::MemoryManager; use crate::memory::MemoryManager;
use crate::{vm, system}; use crate::{system, virtio};
use std::sync::{RwLock, Arc}; use std::sync::{RwLock, Arc};
use std::{fmt, result, thread, io}; use std::{fmt, result, thread, io};
use crate::system::{EPoll,Event}; use crate::system::{EPoll,Event};
@ -15,7 +15,7 @@ const MAC_ADDR_LEN: usize = 6;
pub enum Error { pub enum Error {
ChainWrite(io::Error), ChainWrite(io::Error),
ChainRead(io::Error), ChainRead(io::Error),
ChainIoEvent(vm::Error), ChainIoEvent(system::Error),
SetupPoll(system::Error), SetupPoll(system::Error),
TapRead(io::Error), TapRead(io::Error),
TapWrite(io::Error), TapWrite(io::Error),
@ -42,35 +42,41 @@ type Result<T> = result::Result<T, Error>;
const VIRTIO_NET_F_CSUM: u64 = 1; const VIRTIO_NET_F_CSUM: u64 = 1;
const VIRTIO_NET_F_GUEST_CSUM: u64 = 1 << 1; const VIRTIO_NET_F_GUEST_CSUM: u64 = 1 << 1;
const VIRTIO_NET_F_GUEST_TSO4: u64 = 1 << 7; const VIRTIO_NET_F_GUEST_TSO4: u64 = 1 << 7;
const VIRTIO_NET_F_GUEST_UFO: u64 = 1 << 10; const VIRTIO_NET_F_GUEST_TSO6: u64 = 1 << 8;
const VIRTIO_NET_F_GUEST_ECN : u64 = 1 << 9;
const VIRTIO_NET_F_HOST_TSO4: u64 = 1 << 11; const VIRTIO_NET_F_HOST_TSO4: u64 = 1 << 11;
const VIRTIO_NET_F_HOST_UFO: u64 = 1 << 14; const VIRTIO_NET_F_HOST_TSO6: u64 = 1 << 12;
const VIRTIO_NET_F_HOST_ECN: u64 = 1 << 13;
//const VIRTIO_NET_HDR_SIZE: i32 = 12; const VIRTIO_NET_HDR_SIZE: i32 = 12;
pub struct VirtioNet { pub struct VirtioNet {
_features_supported: u64,
tap: Option<Tap>, tap: Option<Tap>,
} }
impl VirtioNet { impl VirtioNet {
fn new(tap: Tap) -> Self { fn new(tap: Tap, features_supported: u64) -> Self {
VirtioNet{ VirtioNet{
_features_supported: features_supported,
tap: Some(tap) tap: Some(tap)
} }
} }
pub fn create(vbus: &mut VirtioBus, tap: Tap) -> vm::Result<()> { pub fn create(vbus: &mut VirtioBus, tap: Tap) -> virtio::Result<()> {
tap.set_offload(TUN_F_CSUM | TUN_F_UFO | TUN_F_TSO4 | TUN_F_TSO6).unwrap(); tap.set_offload(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6| TUN_F_TSO_ECN).unwrap();
tap.set_vnet_hdr_size(12).unwrap(); tap.set_vnet_hdr_size(VIRTIO_NET_HDR_SIZE).unwrap();
let dev = Arc::new(RwLock::new(VirtioNet::new(tap)));
let feature_bits = let feature_bits =
VIRTIO_NET_F_CSUM | VIRTIO_NET_F_CSUM |
VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_CSUM |
VIRTIO_NET_F_GUEST_TSO4 | VIRTIO_NET_F_GUEST_TSO4 |
VIRTIO_NET_F_GUEST_UFO | VIRTIO_NET_F_GUEST_TSO6 |
VIRTIO_NET_F_GUEST_ECN |
VIRTIO_NET_F_HOST_TSO4 | VIRTIO_NET_F_HOST_TSO4 |
VIRTIO_NET_F_HOST_UFO; VIRTIO_NET_F_HOST_TSO6 |
VIRTIO_NET_F_HOST_ECN;
let dev = Arc::new(RwLock::new(VirtioNet::new(tap, feature_bits)));
vbus.new_virtio_device(VIRTIO_ID_NET, dev) vbus.new_virtio_device(VIRTIO_ID_NET, dev)
.set_queue_sizes(&[256, 256]) .set_queue_sizes(&[256, 256])
.set_config_size(MAC_ADDR_LEN) .set_config_size(MAC_ADDR_LEN)
@ -82,7 +88,7 @@ impl VirtioNet {
pub const TUN_F_CSUM: u32 = 1; pub const TUN_F_CSUM: u32 = 1;
pub const TUN_F_TSO4: u32 = 2; pub const TUN_F_TSO4: u32 = 2;
pub const TUN_F_TSO6: u32 = 4; pub const TUN_F_TSO6: u32 = 4;
pub const TUN_F_UFO: u32= 16; pub const TUN_F_TSO_ECN: u32 = 8;
impl VirtioDeviceOps for VirtioNet { impl VirtioDeviceOps for VirtioNet {
fn start(&mut self, _memory: &MemoryManager, mut queues: Vec<VirtQueue>) { fn start(&mut self, _memory: &MemoryManager, mut queues: Vec<VirtQueue>) {
@ -170,8 +176,6 @@ impl VirtioNetDevice {
self.tap.write_all(&self.tx_frame[..n]) self.tap.write_all(&self.tx_frame[..n])
.map_err(Error::TapWrite)?; .map_err(Error::TapWrite)?;
} }
chain.skip_readable();
chain.flush_chain() chain.flush_chain()
} }
Ok(()) Ok(())
@ -181,17 +185,15 @@ impl VirtioNetDevice {
self.rx_bytes != 0 self.rx_bytes != 0
} }
fn receive_frame(&mut self) -> Result<bool> { fn receive_frame(&mut self, chain: &mut Chain) -> Result<bool> {
if let Some(mut chain) = self.rx.next_chain() { if chain.remaining_write() < self.rx_bytes {
notify!("not enough space for frame");
Ok(false)
} else {
chain.write_all(&self.rx_frame[..self.rx_bytes]) chain.write_all(&self.rx_frame[..self.rx_bytes])
.map_err(Error::ChainWrite)?; .map_err(Error::ChainWrite)?;
self.rx_bytes = 0; self.rx_bytes = 0;
// XXX defer interrupt
chain.flush_chain();
Ok(true) Ok(true)
} else {
self.disable_tap_events();
Ok(false)
} }
} }
@ -202,7 +204,6 @@ impl VirtioNetDevice {
Ok(true) Ok(true)
}, },
Err(e) => if let Some(libc::EAGAIN) = e.raw_os_error() { Err(e) => if let Some(libc::EAGAIN) = e.raw_os_error() {
// handle deferred interrupts
Ok(false) Ok(false)
} else { } else {
Err(Error::TapRead(e)) Err(Error::TapRead(e))
@ -210,16 +211,40 @@ impl VirtioNetDevice {
} }
} }
fn next_rx_chain(&mut self) -> Option<Chain> {
self.rx.next_chain().or_else(|| {
self.disable_tap_events();
None
})
}
fn handle_rx_tap(&mut self) -> Result<()> { fn handle_rx_tap(&mut self) -> Result<()> {
// tap wants to send packets to guest, is an rx chain available?
let mut chain = match self.next_rx_chain() {
Some(chain) => chain,
None => return Ok(()),
};
// If there is already an rx packet pending to send to guest
// first write it to rx chain.
if self.pending_rx() { if self.pending_rx() {
if !self.receive_frame()? { if !self.receive_frame(&mut chain)? {
return Ok(()) return Ok(())
} }
} }
while self.tap_read()? { while self.tap_read()? {
if !self.receive_frame()? { if chain.remaining_write() < self.rx_bytes {
break; // chain is full but there is still data to deliver,
// see if there is another rx chain available.
chain = match self.rx.next_chain() {
Some(chain) => chain,
None => return Ok(()),
};
}
if !self.receive_frame(&mut chain)? {
return Ok(());
} }
} }
Ok(()) Ok(())
@ -227,10 +252,12 @@ impl VirtioNetDevice {
fn handle_rx_queue(&mut self) -> Result<()> { fn handle_rx_queue(&mut self) -> Result<()> {
self.rx.ioevent().read().unwrap(); self.rx.ioevent().read().unwrap();
if !self.tap_event_enabled {
self.enable_tap_poll();
}
if self.pending_rx() { if self.pending_rx() {
if self.receive_frame()? { self.handle_rx_tap()?;
self.enable_tap_poll();
}
} }
Ok(()) Ok(())
} }

View File

@ -3,9 +3,8 @@ use std::sync::{Arc,RwLock};
use std::thread; use std::thread;
use std::fs::File; use std::fs::File;
use crate::virtio::{VirtioDeviceOps,VirtioBus,VirtQueue}; use crate::virtio::{VirtioDeviceOps,VirtioBus,VirtQueue,Result};
use crate::memory::MemoryManager; use crate::memory::MemoryManager;
use crate::vm::Result;
const VIRTIO_ID_RANDOM: u16 = 4; const VIRTIO_ID_RANDOM: u16 = 4;

View File

@ -3,9 +3,8 @@ use std::io::{self,Write,Read};
use std::thread::spawn; use std::thread::spawn;
use termios::*; use termios::*;
use crate::virtio::{VirtioDeviceOps,VirtioBus, VirtQueue}; use crate::virtio::{VirtioDeviceOps,VirtioBus, VirtQueue,Result};
use crate::memory::MemoryManager; use crate::memory::MemoryManager;
use crate::vm::Result;
const VIRTIO_ID_CONSOLE: u16 = 3; const VIRTIO_ID_CONSOLE: u16 = 3;

View File

@ -2,10 +2,10 @@ use std::os::unix::io::{AsRawFd,RawFd};
use std::sync::{RwLock, Arc}; use std::sync::{RwLock, Arc};
use std::thread; use std::thread;
use crate::{vm, system}; use crate::{system, virtio};
use crate::system::EPoll; use crate::system::{EPoll,EventFd};
use crate::memory::{MemoryManager, DrmDescriptor}; use crate::memory::{MemoryManager, DrmDescriptor};
use crate::virtio::{VirtQueue, EventFd, Chain, VirtioBus, VirtioDeviceOps}; use crate::virtio::{VirtQueue, VirtioBus, VirtioDeviceOps, Chain};
use crate::devices::virtio_wl::{vfd::VfdManager, consts::*, Error, Result, VfdObject}; use crate::devices::virtio_wl::{vfd::VfdManager, consts::*, Error, Result, VfdObject};
use crate::system::ioctl::ioctl_with_ref; use crate::system::ioctl::ioctl_with_ref;
@ -27,7 +27,7 @@ impl VirtioWayland {
VirtioWayland { feature_bits: 0 } VirtioWayland { feature_bits: 0 }
} }
pub fn create(vbus: &mut VirtioBus) -> vm::Result<()> { pub fn create(vbus: &mut VirtioBus) -> virtio::Result<()> {
let dev = Arc::new(RwLock::new(VirtioWayland::new())); let dev = Arc::new(RwLock::new(VirtioWayland::new()));
vbus.new_virtio_device(VIRTIO_ID_WL, dev) vbus.new_virtio_device(VIRTIO_ID_WL, dev)
.set_num_queues(2) .set_num_queues(2)
@ -40,7 +40,7 @@ impl VirtioWayland {
} }
fn create_device(memory: MemoryManager, in_vq: VirtQueue, out_vq: VirtQueue, transition: bool) -> Result<WaylandDevice> { fn create_device(memory: MemoryManager, in_vq: VirtQueue, out_vq: VirtQueue, transition: bool) -> Result<WaylandDevice> {
let kill_evt = EventFd::new().map_err(Error::IoEventError)?; let kill_evt = EventFd::new().map_err(Error::EventFdCreate)?;
let dev = WaylandDevice::new(memory, in_vq, out_vq, kill_evt, transition)?; let dev = WaylandDevice::new(memory, in_vq, out_vq, kill_evt, transition)?;
Ok(dev) Ok(dev)
} }

View File

@ -1,7 +1,7 @@
use std::os::unix::io::RawFd; use std::os::unix::io::RawFd;
use std::{result, io, fmt}; use std::{result, io, fmt};
use crate::{vm, system}; use crate::system;
use crate::memory::Error as MemError; use crate::memory::Error as MemError;
use crate::system::FileDesc; use crate::system::FileDesc;
@ -82,13 +82,14 @@ pub trait VfdObject {
#[derive(Debug)] #[derive(Debug)]
pub enum Error { pub enum Error {
IoEventError(vm::Error), IoEventError(system::Error),
EventFdCreate(system::Error),
ChainIoError(io::Error), ChainIoError(io::Error),
UnexpectedCommand(u32), UnexpectedCommand(u32),
ShmAllocFailed(system::Error), ShmAllocFailed(system::Error),
RegisterMemoryFailed(MemError), RegisterMemoryFailed(MemError),
CreatePipesFailed(system::Error), CreatePipesFailed(system::Error),
SocketReceive(system::Error), SocketReceive(system::ErrnoError),
SocketConnect(io::Error), SocketConnect(io::Error),
PipeReceive(io::Error), PipeReceive(io::Error),
SendVfd(io::Error), SendVfd(io::Error),
@ -96,7 +97,7 @@ pub enum Error {
TooManySendVfds(usize), TooManySendVfds(usize),
FailedPollContextCreate(system::Error), FailedPollContextCreate(system::Error),
FailedPollAdd(system::Error), FailedPollAdd(system::Error),
DmaSync(system::Error), DmaSync(system::ErrnoError),
DmaBuf(MemError), DmaBuf(MemError),
DmaBufSize(system::Error), DmaBufSize(system::Error),
} }
@ -106,6 +107,7 @@ impl fmt::Display for Error {
use Error::*; use Error::*;
match self { match self {
IoEventError(e) => write!(f, "error reading from ioevent fd: {}", e), IoEventError(e) => write!(f, "error reading from ioevent fd: {}", e),
EventFdCreate(e) => write!(f, "error creating eventfd: {}", e),
ChainIoError(e) => write!(f, "i/o error on virtio chain operation: {}", e), ChainIoError(e) => write!(f, "i/o error on virtio chain operation: {}", e),
UnexpectedCommand(cmd) => write!(f, "unexpected virtio wayland command: {}", cmd), UnexpectedCommand(cmd) => write!(f, "unexpected virtio wayland command: {}", cmd),
ShmAllocFailed(e) => write!(f, "failed to allocate shared memory: {}", e), ShmAllocFailed(e) => write!(f, "failed to allocate shared memory: {}", e),

View File

@ -1,17 +1,18 @@
use crate::system::{MemoryFd, BitVec}; use crate::system::MemoryFd;
use crate::util::BitSet;
use crate::disk::{Result, Error, SECTOR_SIZE, DiskImage}; use crate::disk::{Result, Error, SECTOR_SIZE, DiskImage};
use std::io::SeekFrom; use std::io::SeekFrom;
pub struct MemoryOverlay { pub struct MemoryOverlay {
memory: MemoryFd, memory: MemoryFd,
written_sectors: BitVec, written_sectors: BitSet,
} }
impl MemoryOverlay { impl MemoryOverlay {
pub fn new() -> Result<Self> { pub fn new() -> Result<Self> {
let memory = MemoryFd::new_memfd(0, false) let memory = MemoryFd::new_memfd(0, false)
.map_err(Error::MemoryOverlayCreate)?; .map_err(Error::MemoryOverlayCreate)?;
let written_sectors = BitVec::new(); let written_sectors = BitSet::new();
Ok(MemoryOverlay { memory, written_sectors }) Ok(MemoryOverlay { memory, written_sectors })
} }
@ -30,14 +31,14 @@ impl MemoryOverlay {
for n in 0..sector_count { for n in 0..sector_count {
let idx = start as usize + n; let idx = start as usize + n;
self.written_sectors.set_bit(idx); self.written_sectors.insert(idx);
} }
Ok(()) Ok(())
} }
pub fn read_sectors<D: DiskImage>(&mut self, disk: &mut D, start: u64, buffer: &mut [u8]) -> Result<()> { pub fn read_sectors<D: DiskImage>(&mut self, disk: &mut D, start: u64, buffer: &mut [u8]) -> Result<()> {
let sector_count = buffer.len() / SECTOR_SIZE; let sector_count = buffer.len() / SECTOR_SIZE;
if (0..sector_count).all(|i| !self.written_sectors.get_bit(i)) { if (0..sector_count).all(|i| !self.written_sectors.get(i)) {
return disk.read_sectors(start, buffer); return disk.read_sectors(start, buffer);
} }
@ -45,7 +46,7 @@ impl MemoryOverlay {
let sector = start + n as u64; let sector = start + n as u64;
let offset = n * SECTOR_SIZE; let offset = n * SECTOR_SIZE;
let sector_buffer = &mut buffer[offset..offset+SECTOR_SIZE]; let sector_buffer = &mut buffer[offset..offset+SECTOR_SIZE];
if self.written_sectors.get_bit(sector as usize) { if self.written_sectors.get(sector as usize) {
self.read_single_sector(sector, sector_buffer)?; self.read_single_sector(sector, sector_buffer)?;
} else { } else {
disk.read_sectors(sector, sector_buffer)?; disk.read_sectors(sector, sector_buffer)?;

View File

@ -23,6 +23,7 @@ pub enum OpenType {
} }
pub trait DiskImage: Sync+Send { pub trait DiskImage: Sync+Send {
fn open(&mut self) -> Result<()>;
fn read_only(&self) -> bool; fn read_only(&self) -> bool;
fn sector_count(&self) -> u64; fn sector_count(&self) -> u64;
fn disk_file(&mut self) -> Result<&mut File>; fn disk_file(&mut self) -> Result<&mut File>;
@ -61,6 +62,7 @@ pub type Result<T> = result::Result<T, Error>;
#[derive(Debug)] #[derive(Debug)]
pub enum Error { pub enum Error {
ReadOnly, ReadOnly,
ImageDoesntExit(PathBuf),
DiskOpen(PathBuf,io::Error), DiskOpen(PathBuf,io::Error),
DiskOpenTooShort(PathBuf), DiskOpenTooShort(PathBuf),
DiskRead(io::Error), DiskRead(io::Error),
@ -78,6 +80,7 @@ impl fmt::Display for Error {
use Error::*; use Error::*;
match self { match self {
ReadOnly => write!(f, "attempted write to read-only device"), ReadOnly => write!(f, "attempted write to read-only device"),
ImageDoesntExit(path) => write!(f, "disk image {} does not exist", path.display()),
DiskOpen(path, err) => write!(f, "failed to open disk image {}: {}", path.display(), err), DiskOpen(path, err) => write!(f, "failed to open disk image {}: {}", path.display(), err),
DiskOpenTooShort(path) => write!(f, "failed to open disk image {} because file is too short", path.display()), DiskOpenTooShort(path) => write!(f, "failed to open disk image {} because file is too short", path.display()),
DiskRead(err) => write!(f, "error reading from disk image: {}", err), DiskRead(err) => write!(f, "error reading from disk image: {}", err),

View File

@ -3,7 +3,7 @@ use std::fs::{File, OpenOptions};
use std::io::{Write, Read, SeekFrom, Seek}; use std::io::{Write, Read, SeekFrom, Seek};
use crate::disk::Error::DiskRead; use crate::disk::Error::DiskRead;
use crate::disk::memory::MemoryOverlay; use crate::disk::memory::MemoryOverlay;
use std::path::PathBuf; use std::path::{PathBuf, Path};
pub struct RawDiskImage { pub struct RawDiskImage {
@ -17,24 +17,37 @@ pub struct RawDiskImage {
} }
impl RawDiskImage { impl RawDiskImage {
pub fn new<P: Into<PathBuf>>(path: P, open_type: OpenType) -> Self { fn get_nsectors(path: &Path, offset: usize) -> Result<u64> {
if let Ok(meta) = path.metadata() {
Ok((meta.len() - offset as u64) / SECTOR_SIZE as u64)
} else {
Err(Error::ImageDoesntExit(path.to_path_buf()))
}
}
#[allow(dead_code)]
pub fn new<P: Into<PathBuf>>(path: P, open_type: OpenType) -> Result<Self> {
Self::new_with_offset(path, open_type, 0) Self::new_with_offset(path, open_type, 0)
} }
pub fn new_with_offset<P: Into<PathBuf>>(path: P, open_type: OpenType, offset: usize) -> Self { pub fn new_with_offset<P: Into<PathBuf>>(path: P, open_type: OpenType, offset: usize) -> Result<Self> {
let path = path.into(); let path = path.into();
RawDiskImage { let nsectors = Self::get_nsectors(&path, offset)?;
Ok(RawDiskImage {
path, path,
open_type, open_type,
file: None, file: None,
offset, offset,
nsectors: 0, nsectors,
disk_image_id: Vec::new(), disk_image_id: Vec::new(),
overlay: None, overlay: None,
} })
} }
pub fn open(&mut self) -> Result<()> { }
impl DiskImage for RawDiskImage {
fn open(&mut self) -> Result<()> {
let meta = self.path.metadata() let meta = self.path.metadata()
.map_err(|e| Error::DiskOpen(self.path.clone(), e))?; .map_err(|e| Error::DiskOpen(self.path.clone(), e))?;
@ -42,8 +55,6 @@ impl RawDiskImage {
return Err(Error::DiskOpenTooShort(self.path.clone())) return Err(Error::DiskOpenTooShort(self.path.clone()))
} }
self.nsectors = (meta.len() - self.offset as u64) / SECTOR_SIZE as u64;
let file = OpenOptions::new() let file = OpenOptions::new()
.read(true) .read(true)
.write(self.open_type == OpenType::ReadWrite) .write(self.open_type == OpenType::ReadWrite)
@ -59,9 +70,7 @@ impl RawDiskImage {
} }
Ok(()) Ok(())
} }
}
impl DiskImage for RawDiskImage {
fn read_only(&self) -> bool { fn read_only(&self) -> bool {
self.open_type == OpenType::ReadOnly self.open_type == OpenType::ReadOnly
} }

View File

@ -11,17 +11,18 @@ pub struct RealmFSImage {
// Just pass everything through to raw image for now // Just pass everything through to raw image for now
impl RealmFSImage { impl RealmFSImage {
pub fn new<P: Into<PathBuf>>(path: P, open_type: OpenType) -> Self { pub fn new<P: Into<PathBuf>>(path: P, open_type: OpenType) -> Result<Self> {
assert_ne!(open_type, OpenType::ReadWrite);
let offset = HEADER_SECTOR_COUNT * SECTOR_SIZE; let offset = HEADER_SECTOR_COUNT * SECTOR_SIZE;
let raw = RawDiskImage::new_with_offset(path, open_type, offset); let raw = RawDiskImage::new_with_offset(path, open_type, offset)?;
RealmFSImage { raw } Ok(RealmFSImage { raw })
}
pub fn open(&mut self) -> Result<()> {
self.raw.open()
} }
} }
impl DiskImage for RealmFSImage { impl DiskImage for RealmFSImage {
fn open(&mut self) -> Result<()> {
self.raw.open()
}
fn read_only(&self) -> bool { fn read_only(&self) -> bool {
self.raw.read_only() self.raw.read_only()
} }

36
src/kvm/error.rs Normal file
View File

@ -0,0 +1,36 @@
use std::{fmt, result};
use crate::system::Error as SysError;
use crate::system::ErrnoError;
pub type Result<T> = result::Result<T, Error>;
#[derive(Debug)]
pub enum Error {
OpenKvm(ErrnoError),
MissingRequiredExtension(u32),
BadVersion,
IoctlError(&'static str, ErrnoError),
IoEventCreate(SysError),
}
impl Error {
pub fn is_interrupted(&self) -> bool {
match self {
Error::IoctlError(_, e) => e.is_interrupted(),
_ => false,
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use Error::*;
match self {
OpenKvm(e) => write!(f, "could not open /dev/kvm: {}", e),
MissingRequiredExtension(ext) => write!(f, "kernel does not support a required kvm extension: {}", ext),
BadVersion => write!(f, "unexpected kvm api version"),
IoctlError(name, err) => write!(f, "failed to call {} ioctl: {}", name, err),
IoEventCreate(e) => write!(f, "failed to create ioeventfd: {}", e),
}
}
}

View File

@ -1,23 +1,20 @@
use libc::{self, c_char, c_ulong}; use libc::{self, c_char, c_ulong};
use std::os::unix::io::RawFd; use std::os::unix::io::RawFd;
use std::ffi::CString; use std::ffi::CString;
use std::fmt;
use crate::system::ioctl::{ioctl_with_val,ioctl_with_ref,ioctl_with_mut_ref}; use crate::system::ioctl::{ioctl_with_val,ioctl_with_ref,ioctl_with_mut_ref};
use crate::vm::{Result,Error,ErrorKind}; use crate::kvm::{Result, Error};
use crate::system; use crate::system::ErrnoError;
use crate::vm::arch::KvmRegs;
use std::result;
const KVMIO: u64 = 0xAE; const KVMIO: u64 = 0xAE;
const KVM_GET_API_VERSION: c_ulong = io! (KVMIO, 0x00); const KVM_GET_API_VERSION: c_ulong = io! (KVMIO, 0x00);
const KVM_CREATE_VM: c_ulong = io! (KVMIO, 0x01); const KVM_CREATE_VM: c_ulong = io! (KVMIO, 0x01);
const KVM_CHECK_EXTENSION: c_ulong = io! (KVMIO, 0x03); const KVM_CHECK_EXTENSION: c_ulong = io! (KVMIO, 0x03);
const KVM_GET_SUPPORTED_CPUID: c_ulong = iorw! (KVMIO, 0x05, 8);
const KVM_SET_TSS_ADDR: c_ulong = io! (KVMIO, 0x47);
const KVM_CREATE_IRQCHIP: c_ulong = io! (KVMIO, 0x60); const KVM_CREATE_IRQCHIP: c_ulong = io! (KVMIO, 0x60);
const KVM_CREATE_PIT2: c_ulong = iow! (KVMIO, 0x77, 64);
const KVM_GET_VCPU_MMAP_SIZE: c_ulong = io! (KVMIO, 0x04); const KVM_GET_VCPU_MMAP_SIZE: c_ulong = io! (KVMIO, 0x04);
const KVM_CREATE_VCPU: c_ulong = io! (KVMIO, 0x41); const KVM_CREATE_VCPU: c_ulong = io! (KVMIO, 0x41);
const KVM_SET_USER_MEMORY_REGION: c_ulong = iow! (KVMIO, 0x46, 32); const KVM_SET_USER_MEMORY_REGION: c_ulong = iow! (KVMIO, 0x46, 32);
@ -27,14 +24,6 @@ const KVM_IOEVENTFD: c_ulong = iow! (KVMIO, 0x79, 64);
const KVM_RUN: c_ulong = io! (KVMIO, 0x80); const KVM_RUN: c_ulong = io! (KVMIO, 0x80);
const KVM_GET_REGS: c_ulong = ior! (KVMIO, 0x81, 144); const KVM_GET_REGS: c_ulong = ior! (KVMIO, 0x81, 144);
const KVM_SET_REGS: c_ulong = iow! (KVMIO, 0x82, 144); const KVM_SET_REGS: c_ulong = iow! (KVMIO, 0x82, 144);
const KVM_GET_SREGS: c_ulong = ior! (KVMIO, 0x83, 312);
const KVM_SET_SREGS: c_ulong = iow! (KVMIO, 0x84, 312);
const KVM_SET_MSRS: c_ulong = iow! (KVMIO, 0x89, 8);
const KVM_SET_FPU: c_ulong = iow! (KVMIO, 0x8d, 416);
const KVM_GET_LAPIC: c_ulong = ior! (KVMIO, 0x8e, 1024);
const KVM_SET_LAPIC: c_ulong = iow! (KVMIO, 0x8f, 1024);
const KVM_SET_CPUID2: c_ulong = iow! (KVMIO, 0x90, 8);
struct InnerFd(RawFd); struct InnerFd(RawFd);
impl InnerFd { impl InnerFd {
@ -53,20 +42,18 @@ fn raw_open_kvm() -> Result<RawFd> {
let path = CString::new("/dev/kvm").unwrap(); let path = CString::new("/dev/kvm").unwrap();
let fd = unsafe { libc::open(path.as_ptr() as *const c_char, libc::O_RDWR) }; let fd = unsafe { libc::open(path.as_ptr() as *const c_char, libc::O_RDWR) };
if fd < 0 { if fd < 0 {
return Err(Error::from_last_errno()); return Err(Error::OpenKvm(ErrnoError::last_os_error()));
} }
Ok(fd) Ok(fd)
} }
impl SysFd { impl SysFd {
pub fn open() -> Result<SysFd> { pub fn open() -> Result<SysFd> {
match raw_open_kvm() { let fd = raw_open_kvm()?;
Ok(fd) => Ok(SysFd(InnerFd(fd))), Ok(SysFd(InnerFd(fd)))
Err(e) => Err(Error::new(ErrorKind::OpenDeviceFailed, e)),
}
} }
fn raw(&self) -> RawFd { self.0.raw() } pub fn raw(&self) -> RawFd { self.0.raw() }
} }
pub struct VmFd(InnerFd); pub struct VmFd(InnerFd);
@ -75,7 +62,7 @@ impl VmFd {
fn new(fd: RawFd) -> VmFd { fn new(fd: RawFd) -> VmFd {
VmFd( InnerFd(fd) ) VmFd( InnerFd(fd) )
} }
fn raw(&self) -> RawFd { self.0.raw() } pub fn raw(&self) -> RawFd { self.0.raw() }
} }
pub struct VcpuFd(InnerFd); pub struct VcpuFd(InnerFd);
@ -91,21 +78,21 @@ impl VcpuFd {
pub fn kvm_check_extension(sysfd: &SysFd, extension: u32) -> Result<u32> { pub fn kvm_check_extension(sysfd: &SysFd, extension: u32) -> Result<u32> {
unsafe { unsafe {
ioctl_with_val(sysfd.raw(), KVM_CHECK_EXTENSION, extension as c_ulong) ioctl_with_val(sysfd.raw(), KVM_CHECK_EXTENSION, extension as c_ulong)
.map_err(|e| ioctl_err("KVM_CHECK_EXTENSION", e)) .map_err(|e| Error::IoctlError("KVM_CHECK_EXTENSION", e))
} }
} }
pub fn kvm_get_api_version(sysfd: &SysFd) -> Result<u32> { pub fn kvm_get_api_version(sysfd: &SysFd) -> Result<u32> {
unsafe { unsafe {
ioctl_with_val(sysfd.raw(), KVM_GET_API_VERSION, 0) ioctl_with_val(sysfd.raw(), KVM_GET_API_VERSION, 0)
.map_err(|e| ioctl_err("KVM_GET_API_VERSION", e)) .map_err(|e| Error::IoctlError("KVM_GET_API_VERSION", e))
} }
} }
pub fn kvm_create_vm(sysfd: &SysFd) -> Result<VmFd> { pub fn kvm_create_vm(sysfd: &SysFd) -> Result<VmFd> {
let fd = unsafe { let fd = unsafe {
ioctl_with_val(sysfd.raw(), KVM_CREATE_VM, 0) ioctl_with_val(sysfd.raw(), KVM_CREATE_VM, 0)
.map_err(|e| ioctl_err("KVM_CREATE_VM", e))? .map_err(|e| Error::IoctlError("KVM_CREATE_VM", e))?
}; };
Ok(VmFd::new(fd as RawFd)) Ok(VmFd::new(fd as RawFd))
} }
@ -113,73 +100,7 @@ pub fn kvm_create_vm(sysfd: &SysFd) -> Result<VmFd> {
pub fn kvm_get_vcpu_mmap_size(sysfd: &SysFd) -> Result<u32> { pub fn kvm_get_vcpu_mmap_size(sysfd: &SysFd) -> Result<u32> {
unsafe { unsafe {
ioctl_with_val(sysfd.raw(), KVM_GET_VCPU_MMAP_SIZE, 0) ioctl_with_val(sysfd.raw(), KVM_GET_VCPU_MMAP_SIZE, 0)
.map_err(|e| ioctl_err("KVM_GET_VCPU_MMAP_SIZE", e)) .map_err(|e| Error::IoctlError("KVM_GET_VCPU_MMAP_SIZE", e))
}
}
#[derive(Copy, Clone, Default)]
#[repr(C)]
pub struct KvmCpuIdEntry {
pub function: u32,
pub index: u32,
pub flags: u32,
pub eax: u32,
pub ebx: u32,
pub ecx: u32,
pub edx: u32,
padding: [u32; 3]
}
const KVM_CPUID_MAX_ENTRIES:usize = 256;
#[repr(C)]
pub struct KvmCpuId2 {
nent: u32,
padding: u32,
entries: [KvmCpuIdEntry; KVM_CPUID_MAX_ENTRIES]
}
impl KvmCpuId2 {
pub fn new() -> KvmCpuId2 {
KvmCpuId2 {
nent: KVM_CPUID_MAX_ENTRIES as u32,
padding: 0,
entries: [Default::default(); KVM_CPUID_MAX_ENTRIES],
}
}
pub fn new_from_entries(entries: Vec<KvmCpuIdEntry>) -> KvmCpuId2 {
let mut cpuid = KvmCpuId2::new();
let sz = entries.len();
assert!(sz <= KVM_CPUID_MAX_ENTRIES, "Too many cpuid entries");
for i in 0..sz {
cpuid.entries[i] = entries[i];
}
cpuid.nent = sz as u32;
cpuid
}
pub fn get_entries(&self) -> Vec<KvmCpuIdEntry> {
let mut entries = Vec::new();
let sz = self.nent as usize;
for i in 0..sz {
entries.push(self.entries[i]);
}
entries
}
}
pub fn kvm_get_supported_cpuid(sysfd: &SysFd, cpuid: &mut KvmCpuId2) -> Result<u32> {
unsafe {
ioctl_with_mut_ref(sysfd.raw(), KVM_GET_SUPPORTED_CPUID, cpuid)
.map_err(|e| ioctl_err("KVM_GET_SUPPORTED_CPUID", e))
}
}
pub fn kvm_set_cpuid2(cpufd: &VcpuFd, cpuid: &KvmCpuId2) -> Result<u32> {
unsafe {
ioctl_with_ref(cpufd.raw(), KVM_SET_CPUID2, cpuid)
.map_err(|e| ioctl_err("KVM_SET_CPUID2", e))
} }
} }
@ -204,50 +125,18 @@ impl KvmUserspaceMemoryRegion {
} }
} }
pub fn kvm_set_user_memory_region(vmfd: &VmFd, region: &KvmUserspaceMemoryRegion) -> Result<u32> { pub fn kvm_set_user_memory_region(vmfd: &VmFd, region: &KvmUserspaceMemoryRegion) -> Result<()> {
unsafe { call_ioctl_with_ref("KVM_SET_USER_MEMORY_REGION",vmfd.raw(), KVM_SET_USER_MEMORY_REGION, region)
ioctl_with_ref(vmfd.raw(), KVM_SET_USER_MEMORY_REGION, region)
.map_err(|e| ioctl_err("KVM_SET_USER_MEMORY_REGION", e))
}
} }
#[repr(C)] pub fn kvm_create_irqchip(vmfd: &VmFd) -> Result<()> {
pub struct KvmPitConfig { call_ioctl_with_val("KVM_CREATE_IRQCHIP", vmfd.raw(), KVM_CREATE_IRQCHIP, 0)
flags: u32,
padding: [u32; 15],
}
impl KvmPitConfig {
pub fn new(flags: u32) -> KvmPitConfig {
KvmPitConfig { flags, padding: [0; 15] }
}
}
pub fn kvm_create_pit2(vmfd: &VmFd, config: &KvmPitConfig) -> Result<u32> {
unsafe {
ioctl_with_ref(vmfd.raw(), KVM_CREATE_PIT2, config)
.map_err(|e| ioctl_err("KVM_CREATE_PIT2", e))
}
}
pub fn kvm_create_irqchip(vmfd: &VmFd) -> Result<u32> {
unsafe {
ioctl_with_val(vmfd.raw(), KVM_CREATE_IRQCHIP, 0)
.map_err(|e| ioctl_err("KVM_CREATE_IRQCHIP", e))
}
}
pub fn kvm_set_tss_addr(vmfd: &VmFd, addr: u32) -> Result<u32> {
unsafe {
ioctl_with_val(vmfd.raw(), KVM_SET_TSS_ADDR, addr as c_ulong)
.map_err(|e| ioctl_err("KVM_SET_TSS_ADDR", e))
}
} }
pub fn kvm_create_vcpu(vmfd: &VmFd, cpu_id: u32) -> Result<VcpuFd> { pub fn kvm_create_vcpu(vmfd: &VmFd, cpu_id: u32) -> Result<VcpuFd> {
let fd = unsafe { let fd = unsafe {
ioctl_with_val(vmfd.raw(), KVM_CREATE_VCPU, cpu_id as c_ulong) ioctl_with_val(vmfd.raw(), KVM_CREATE_VCPU, cpu_id as c_ulong)
.map_err(|e| ioctl_err("KVM_CREATE_VCPU", e))? .map_err(|e| Error::IoctlError("KVM_CREATE_VCPU", e))?
}; };
Ok(VcpuFd::new(fd as RawFd)) Ok(VcpuFd::new(fd as RawFd))
} }
@ -264,11 +153,8 @@ impl KvmIrqLevel {
} }
} }
pub fn kvm_irq_line(vmfd: &VmFd, level: &KvmIrqLevel) -> Result<u32> { pub fn kvm_irq_line(vmfd: &VmFd, level: &KvmIrqLevel) -> Result<()> {
unsafe { call_ioctl_with_ref("KVM_IRQ_LINE", vmfd.raw(), KVM_IRQ_LINE, level)
ioctl_with_ref(vmfd.raw(), KVM_IRQ_LINE, level)
.map_err(|e| ioctl_err("KVM_IRQ_LINE", e))
}
} }
#[repr(C)] #[repr(C)]
@ -287,11 +173,8 @@ impl KvmIrqFd {
} }
} }
pub fn kvm_irqfd(vmfd: &VmFd, irqfd: &KvmIrqFd) -> Result<u32> { pub fn kvm_irqfd(vmfd: &VmFd, irqfd: &KvmIrqFd) -> Result<()> {
unsafe { call_ioctl_with_ref("KVM_IRQFD", vmfd.raw(), KVM_IRQFD, irqfd)
ioctl_with_ref(vmfd.raw(), KVM_IRQFD, irqfd)
.map_err(|e| ioctl_err("KVM_IRQFD", e))
}
} }
pub const IOEVENTFD_FLAG_DATAMATCH: u32 = 1; pub const IOEVENTFD_FLAG_DATAMATCH: u32 = 1;
@ -329,285 +212,45 @@ impl KvmIoEventFd {
} }
} }
pub fn kvm_ioeventfd(vmfd: &VmFd, ioeventfd: &KvmIoEventFd) -> Result<u32> { pub fn kvm_ioeventfd(vmfd: &VmFd, ioeventfd: &KvmIoEventFd) -> Result<()> {
call_ioctl_with_ref("KVM_IOEVENTFD", vmfd.raw(), KVM_IOEVENTFD, ioeventfd)
}
pub fn kvm_get_regs(cpufd: &VcpuFd, regs: &mut KvmRegs) -> Result<()> {
call_ioctl_with_mut_ref("KVM_GET_REGS", cpufd.raw(), KVM_GET_REGS, regs)
}
pub fn kvm_set_regs(cpufd: &VcpuFd, regs: &KvmRegs) -> Result<()> {
call_ioctl_with_ref("KVM_SET_REGS", cpufd.raw(), KVM_SET_REGS, regs)
}
pub fn kvm_run(cpufd: &VcpuFd) -> Result<()> {
call_ioctl_with_val("KVM_RUN", cpufd.raw(), KVM_RUN, 0)
}
fn call_ioctl(name: &'static str, result: result::Result<u32, ErrnoError>) -> Result<()> {
result.map_err(|e| Error::IoctlError(name, e))?;
Ok(())
}
fn call_ioctl_with_ref<T>(name: &'static str, fd: RawFd, request: c_ulong, arg: &T) -> Result<()> {
unsafe { unsafe {
ioctl_with_ref(vmfd.raw(), KVM_IOEVENTFD, ioeventfd) ioctl_with_ref(fd, request, arg)
.map_err(|e| ioctl_err("KVM_IOEVENTFD", e)) .map_err(|e| Error::IoctlError(name, e))?;
Ok(())
} }
} }
fn call_ioctl_with_mut_ref<T>(name: &'static str, fd: RawFd, request: c_ulong, arg: &mut T) -> Result<()> {
#[repr(C)]
pub struct KvmLapicState {
pub regs: [u8; 1024]
}
impl KvmLapicState {
pub fn new() -> KvmLapicState {
KvmLapicState { regs: [0; 1024] }
}
}
pub fn kvm_get_lapic(cpufd: &VcpuFd, lapic_state: &mut KvmLapicState) -> Result<u32> {
unsafe { unsafe {
ioctl_with_mut_ref(cpufd.raw(), KVM_GET_LAPIC, lapic_state) ioctl_with_mut_ref(fd, request, arg)
.map_err(|e| ioctl_err("KVM_GET_LAPIC", e)) .map_err(|e| Error::IoctlError(name, e))?;
Ok(())
} }
} }
pub fn kvm_set_lapic(cpufd: &VcpuFd, lapic_state: &KvmLapicState) -> Result<u32> { fn call_ioctl_with_val(name: &'static str, fd: RawFd, request: c_ulong, val: c_ulong) -> Result<()> {
unsafe { unsafe {
ioctl_with_ref(cpufd.raw(), KVM_SET_LAPIC, lapic_state) call_ioctl(name, ioctl_with_val(fd, request, val))
.map_err(|e| ioctl_err("KVM_SET_LAPIC", e))
} }
} }
#[derive(Copy, Clone, Default)]
#[repr(C)]
pub struct KvmSegment {
base: u64,
limit: u32,
selector: u16,
stype: u8,
present: u8,
dpl: u8,
db: u8,
s: u8,
l: u8,
g: u8,
avl: u8,
unusable: u8,
padding: u8,
}
impl KvmSegment {
pub fn new(base: u64, limit: u32, selector: u16, flags: u16) -> KvmSegment {
let mut seg = KvmSegment{ ..Default::default() };
seg.setup(base, limit, selector, flags);
seg
}
pub fn setup(&mut self, base: u64, limit: u32, selector: u16, flags: u16) {
self.base = base;
self.limit = limit;
self.selector = selector;
self.stype = (flags & 0xF) as u8;
self.present = ((flags >> 7) & 0x1) as u8;
self.dpl = ((flags >> 5) & 0x3) as u8;
self.db = ((flags >> 14) & 0x1) as u8;
self.s = ((flags >> 4) & 0x1) as u8;
self.l = ((flags >> 13) & 0x1) as u8;
self.g = ((flags >> 15) & 0x1) as u8;
self.avl = ((flags >> 12) & 0x1) as u8;
self.unusable = if self.present == 1 { 0 } else { 1 }
}
}
impl fmt::Debug for KvmSegment {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "(base: {:x} limit {:x} selector: {:x} type: {:x} p: {} dpl: {} db: {} s: {} l: {} g: {} avl: {} unuse: {})",
self.base, self.limit, self.selector, self.stype, self.present, self.dpl, self.db, self.s, self.l, self.g, self.avl, self.unusable)
}
}
#[derive(Copy, Clone, Default)]
#[repr(C)]
pub struct KvmDtable {
pub base: u64,
pub limit: u16,
padding: [u16; 3],
}
impl fmt::Debug for KvmDtable {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "(base: {:x} limit {:x})", self.base, self.limit)
}
}
#[derive(Copy, Clone, Default)]
#[repr(C)]
pub struct KvmSRegs {
pub cs: KvmSegment,
pub ds: KvmSegment,
pub es: KvmSegment,
pub fs: KvmSegment,
pub gs: KvmSegment,
pub ss: KvmSegment,
pub tr: KvmSegment,
pub ldt: KvmSegment,
pub gdt: KvmDtable,
pub itd: KvmDtable,
pub cr0: u64,
pub cr2: u64,
pub cr3: u64,
pub cr4: u64,
pub cr8: u64,
pub efer: u64,
pub apic_base: u64,
pub interrupt_bitmap: [u64; 4],
}
impl fmt::Debug for KvmSRegs {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "cs: {:?}\nds: {:?}\nes: {:?}\nfs: {:?}\n", self.cs, self.ds, self.es, self.fs)?;
write!(f, "gs: {:?}\nss: {:?}\ntr: {:?}\nldt: {:?}\n", self.gs, self.ss, self.tr, self.ldt)?;
write!(f, "gdt: {:?} itd: {:?}\n", self.gdt, self.itd)?;
write!(f, "cr0: {:x} cr2: {:x} cr3: {:x} cr4: {:x}\n", self.cr0, self.cr2, self.cr3, self.cr4)?;
write!(f, "efer: {:x} apic_base: {:x}\n", self.efer, self.apic_base)
}
}
impl KvmSRegs {
pub fn new() -> KvmSRegs {
KvmSRegs { ..Default::default() }
}
}
pub fn kvm_get_sregs(cpufd: &VcpuFd, sregs: &mut KvmSRegs) -> Result<u32> {
unsafe {
ioctl_with_mut_ref(cpufd.raw(), KVM_GET_SREGS, sregs)
.map_err(|e| ioctl_err("KVM_GET_SREGS", e))
}
}
pub fn kvm_set_sregs(cpufd: &VcpuFd, sregs: &KvmSRegs) -> Result<u32> {
unsafe {
ioctl_with_ref(cpufd.raw(), KVM_SET_SREGS, sregs)
.map_err(|e| ioctl_err("KVM_SET_SREGS", e))
}
}
#[derive(Copy, Clone, Default)]
#[repr(C)]
pub struct KvmRegs {
pub rax: u64, pub rbx: u64, pub rcx: u64, pub rdx: u64,
pub rsi: u64, pub rdi: u64, pub rsp: u64, pub rbp: u64,
pub r8: u64, pub r9: u64, pub r10: u64, pub r11: u64,
pub r12: u64, pub r13: u64, pub r14: u64, pub r15: u64,
pub rip: u64, pub rflags: u64,
}
impl fmt::Debug for KvmRegs {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "rax 0x{:x} rbx 0x{:x} rcx 0x{:x} rdx 0x{:x}\n", self.rax, self.rbx, self.rcx, self.rdx)?;
write!(f, "rsi 0x{:x} rdi 0x{:x} rsp 0x{:x} rbp 0x{:x}\n", self.rsi, self.rdi, self.rsp, self.rbp)?;
write!(f, "r8 0x{:x} r9 0x{:x} r10 0x{:x} r11 0x{:x}\n", self.r8, self.r9, self.r10, self.r11)?;
write!(f, "r12 0x{:x} r13 0x{:x} r14 0x{:x} r15 0x{:x}\n", self.r12, self.r13, self.r14, self.r15)?;
write!(f, "rip 0x{:x} rflags 0x{:x}\n", self.rip, self.rflags)
}
}
impl KvmRegs {
pub fn new() -> KvmRegs {
KvmRegs { ..Default::default() }
}
}
pub fn kvm_get_regs(cpufd: &VcpuFd, regs: &mut KvmRegs) -> Result<u32> {
unsafe {
ioctl_with_mut_ref(cpufd.raw(), KVM_GET_REGS, regs)
.map_err(|e| ioctl_err("KVM_GET_REGS", e))
}
}
pub fn kvm_set_regs(cpufd: &VcpuFd, regs: &KvmRegs) -> Result<u32> {
unsafe {
ioctl_with_ref(cpufd.raw(), KVM_SET_REGS, regs)
.map_err(|e| ioctl_err("KVM_SET_REGS", e))
}
}
#[derive(Copy)]
#[repr(C)]
pub struct KvmFpu {
fpr: [u8; 128],
pub fcw: u16,
fsw: u16,
ftwx: u8,
pad1: u8,
last_opcode: u16,
last_ip: u64,
last_dp: u64,
xmm: [u8; 256],
pub mxcsr: u32,
pad2: u32,
}
impl Clone for KvmFpu {
fn clone(&self) -> KvmFpu { *self }
}
impl KvmFpu {
pub fn new() -> KvmFpu {
KvmFpu {
fpr: [0; 128],
fcw: 0,
fsw: 0,
ftwx: 0, pad1: 0,
last_opcode: 0,
last_ip: 0,
last_dp: 0,
xmm: [0; 256],
mxcsr: 0,
pad2: 0
}
}
}
pub fn kvm_set_fpu(cpufd: &VcpuFd, fpu: &KvmFpu) -> Result<u32> {
unsafe {
ioctl_with_ref(cpufd.raw(), KVM_SET_FPU, fpu )
.map_err(|e| ioctl_err("KVM_SET_FPU", e))
}
}
#[derive(Copy, Clone, Default)]
#[repr(C)]
struct KvmMsrEntry {
index: u32,
reserved: u32,
data: u64
}
#[repr(C)]
pub struct KvmMsrs {
nent: u32,
padding: u32,
entries: [KvmMsrEntry; 100]
}
impl KvmMsrs {
pub fn new() -> KvmMsrs {
KvmMsrs{ nent: 0, padding: 0, entries: [Default::default(); 100]}
}
pub fn add(&mut self, index: u32, data: u64) {
self.entries[self.nent as usize].index = index;
self.entries[self.nent as usize].data = data;
self.nent += 1;
}
}
pub fn kvm_set_msrs(cpufd: &VcpuFd, msrs: &KvmMsrs) -> Result<u32> {
unsafe {
ioctl_with_ref(cpufd.raw(), KVM_SET_MSRS, msrs)
.map_err(|e| ioctl_err("KVM_SET_MSRS", e))
}
}
pub fn kvm_run(cpufd: &VcpuFd) -> Result<u32> {
unsafe {
ioctl_with_val(cpufd.raw(), KVM_RUN, 0)
.map_err(|e| ioctl_err("KVM_RUN", e))
}
}
pub fn ioctl_err(ioctl_name: &'static str, e: system::Error) -> Error {
if e.is_interrupted() {
Error::new(ErrorKind::Interrupted, e)
} else {
Error::new(ErrorKind::IoctlFailed(ioctl_name), e)
}
}

43
src/kvm/ioeventfd.rs Normal file
View File

@ -0,0 +1,43 @@
use std::sync::Arc;
use std::os::unix::io::{AsRawFd,RawFd};
use crate::kvm::{Kvm,Result,Error};
use crate::system::EventFd;
use crate::system;
pub struct IoEventFd {
kvm: Kvm,
addr: u64,
evt: Arc<EventFd>
}
impl IoEventFd {
pub fn new(kvm: &Kvm, address: u64) -> Result<IoEventFd> {
let evt = EventFd::new().map_err(Error::IoEventCreate)?;
kvm.ioeventfd_add(address, evt.as_raw_fd())?;
Ok(IoEventFd {
kvm: kvm.clone(),
addr: address,
evt: evt.into(),
})
}
pub fn read(&self) -> system::Result<u64> {
self.evt.read()
}
pub fn write(&self, v: u64) -> system::Result<()> {
self.evt.write(v)
}
}
impl Drop for IoEventFd {
fn drop(&mut self) {
let _ = self.kvm.ioeventfd_del(self.addr, self.evt.as_raw_fd());
}
}
impl AsRawFd for IoEventFd {
fn as_raw_fd(&self) -> RawFd {
self.evt.as_raw_fd()
}
}

View File

@ -2,9 +2,13 @@ use std::os::unix::io::RawFd;
use std::sync::Arc; use std::sync::Arc;
mod ioctl; mod ioctl;
mod ioeventfd;
mod error;
use crate::vm::{Result,Error,ErrorKind}; pub use error::{Result,Error};
pub use self::ioctl::{KvmCpuIdEntry,KvmLapicState, KvmSRegs, KvmRegs, KvmFpu, KvmMsrs, KvmSegment}; pub use ioeventfd::IoEventFd;
use crate::vm::arch::KvmRegs;
pub const KVM_CAP_IRQCHIP: u32 = 0; pub const KVM_CAP_IRQCHIP: u32 = 0;
pub const KVM_CAP_HLT: u32 = 1; pub const KVM_CAP_HLT: u32 = 1;
@ -20,21 +24,29 @@ pub const KVM_CAP_IOEVENTFD: u32 = 36;
pub struct Kvm { pub struct Kvm {
sysfd: Arc<ioctl::SysFd>, sysfd: Arc<ioctl::SysFd>,
vmfd: Arc<ioctl::VmFd>, vmfd: Arc<ioctl::VmFd>,
vcpus: Vec<KvmVcpu>,
} }
fn check_extensions(sysfd: &ioctl::SysFd, extensions: &[u32]) -> Result<()> { fn check_extensions(sysfd: &ioctl::SysFd, extensions: &[u32]) -> Result<()> {
for e in extensions { for e in extensions {
if ioctl::kvm_check_extension(&sysfd, *e)? == 0 { check_extension(sysfd, *e)?;
return Err(Error::from(ErrorKind::MissingRequiredExtension(*e)));
}
} }
Ok(()) Ok(())
} }
fn check_extension(sysfd: &ioctl::SysFd, extension: u32) -> Result<()> {
let ret = ioctl::kvm_check_extension(&sysfd, extension)?;
if ret == 0 {
Err(Error::MissingRequiredExtension(extension))
} else {
Ok(())
}
}
fn check_version(sysfd: &ioctl::SysFd) -> Result<()> { fn check_version(sysfd: &ioctl::SysFd) -> Result<()> {
if ioctl::kvm_get_api_version(&sysfd)? != 12 { let version= ioctl::kvm_get_api_version(&sysfd)?;
return Err(Error::from(ErrorKind::BadVersion));
if version != 12 {
return Err(Error::BadVersion);
} }
Ok(()) Ok(())
} }
@ -46,17 +58,16 @@ impl Kvm {
check_version(&sysfd)?; check_version(&sysfd)?;
check_extensions(&sysfd, &required_extensions)?; check_extensions(&sysfd, &required_extensions)?;
let vmfd= ioctl::kvm_create_vm(&sysfd) let vmfd= ioctl::kvm_create_vm(&sysfd)?;
.map_err(|_| Error::from(ErrorKind::CreateVmFailed))?;
Ok(Kvm{ Ok(Kvm{
sysfd: Arc::new(sysfd), sysfd: Arc::new(sysfd),
vmfd: Arc::new(vmfd), vmfd: Arc::new(vmfd),
vcpus: Vec::new(),
}) })
} }
pub fn add_memory_region(&self, slot: u32, guest_address: u64, host_address: u64, size: usize) -> Result<()> { pub fn add_memory_region(&self, slot: u32, guest_address: u64, host_address: u64, size: usize) -> Result<()> {
let region = ioctl::KvmUserspaceMemoryRegion::new(slot, guest_address, host_address, size as u64); let region = ioctl::KvmUserspaceMemoryRegion::new(slot, guest_address, host_address, size as u64);
ioctl::kvm_set_user_memory_region(&self.vmfd, &region)?; ioctl::kvm_set_user_memory_region(&self.vmfd, &region)?;
Ok(()) Ok(())
@ -68,22 +79,11 @@ impl Kvm {
Ok(()) Ok(())
} }
pub fn create_pit2(&self) -> Result<()> {
let pit_config = ioctl::KvmPitConfig::new(0);
ioctl::kvm_create_pit2(&self.vmfd, &pit_config)?;
Ok(())
}
pub fn create_irqchip(&self) -> Result<()> { pub fn create_irqchip(&self) -> Result<()> {
ioctl::kvm_create_irqchip(&self.vmfd)?; ioctl::kvm_create_irqchip(&self.vmfd)?;
Ok(()) Ok(())
} }
pub fn set_tss_addr(&self, addr: u32) -> Result<()> {
ioctl::kvm_set_tss_addr(&self.vmfd, addr)?;
Ok(())
}
pub fn irq_line(&self, irq: u32, level: u32) -> Result<()> { pub fn irq_line(&self, irq: u32, level: u32) -> Result<()> {
let irq_level = ioctl::KvmIrqLevel::new(irq, level); let irq_level = ioctl::KvmIrqLevel::new(irq, level);
ioctl::kvm_irq_line(&self.vmfd, &irq_level)?; ioctl::kvm_irq_line(&self.vmfd, &irq_level)?;
@ -92,40 +92,28 @@ impl Kvm {
pub fn irqfd(&self, fd: u32, gsi: u32) -> Result<()> { pub fn irqfd(&self, fd: u32, gsi: u32) -> Result<()> {
let irqfd = ioctl::KvmIrqFd::new(fd, gsi); let irqfd = ioctl::KvmIrqFd::new(fd, gsi);
ioctl::kvm_irqfd(&self.vmfd, &irqfd)?; ioctl::kvm_irqfd(&self.vmfd, &irqfd)
Ok(())
} }
pub fn ioeventfd_add(&self, address: u64, fd: RawFd) -> Result<()> { pub fn ioeventfd_add(&self, address: u64, fd: RawFd) -> Result<()> {
// XXX check for zero length capability // XXX check for zero length capability
let ioeventfd = ioctl::KvmIoEventFd::new_with_addr_fd(address, fd); let ioeventfd = ioctl::KvmIoEventFd::new_with_addr_fd(address, fd);
ioctl::kvm_ioeventfd(&self.vmfd, &ioeventfd)?; ioctl::kvm_ioeventfd(&self.vmfd, &ioeventfd)
Ok(())
} }
pub fn ioeventfd_del(&self, address: u64, fd: RawFd) -> Result<()> { pub fn ioeventfd_del(&self, address: u64, fd: RawFd) -> Result<()> {
let mut ioeventfd = ioctl::KvmIoEventFd::new_with_addr_fd(address, fd); let mut ioeventfd = ioctl::KvmIoEventFd::new_with_addr_fd(address, fd);
ioeventfd.set_deassign(); ioeventfd.set_deassign();
ioctl::kvm_ioeventfd(&self.vmfd, &ioeventfd)?; ioctl::kvm_ioeventfd(&self.vmfd, &ioeventfd)
Ok(())
} }
pub fn create_vcpus(&mut self, ncpus: usize) -> Result<()> { pub fn new_vcpu(&self, id: usize) -> Result<KvmVcpu> {
for id in 0..ncpus {
let vcpu = self.new_vcpu(id)?;
vcpu.setup_lapic()?;
self.vcpus.push(vcpu);
}
Ok(())
}
fn new_vcpu(&self, id: usize) -> Result<KvmVcpu> {
let cpufd = ioctl::kvm_create_vcpu(&self.vmfd, id as u32)?; let cpufd = ioctl::kvm_create_vcpu(&self.vmfd, id as u32)?;
Ok(KvmVcpu::new(id, Arc::new(cpufd), self.sysfd.clone())) Ok(KvmVcpu::new(id, Arc::new(cpufd), self.sysfd.clone()))
} }
pub fn get_vcpus(&self) -> Vec<KvmVcpu> { pub fn vmfd(&self) -> RawFd {
self.vcpus.clone() self.vmfd.raw()
} }
} }
@ -134,14 +122,8 @@ pub struct KvmVcpu {
id: usize, id: usize,
cpufd: Arc<ioctl::VcpuFd>, cpufd: Arc<ioctl::VcpuFd>,
sysfd: Arc<ioctl::SysFd>, sysfd: Arc<ioctl::SysFd>,
} }
const APIC_MODE_EXTINT: u8 = 0x7;
const APIC_MODE_NMI: u8 = 0x4;
const APIC_LVT_LINT0_OFFSET: usize = 0x350;
const APIC_LVT_LINT1_OFFSET: usize = 0x360;
impl KvmVcpu { impl KvmVcpu {
fn new(id: usize, cpufd: Arc<ioctl::VcpuFd>, sysfd: Arc<ioctl::SysFd>) -> KvmVcpu { fn new(id: usize, cpufd: Arc<ioctl::VcpuFd>, sysfd: Arc<ioctl::SysFd>) -> KvmVcpu {
KvmVcpu { id, cpufd, sysfd } KvmVcpu { id, cpufd, sysfd }
@ -151,38 +133,8 @@ impl KvmVcpu {
self.cpufd.raw() self.cpufd.raw()
} }
pub fn get_supported_cpuid(&self) -> Result<Vec<KvmCpuIdEntry>> { pub fn sys_raw_fd(&self) -> RawFd {
let mut cpuid = ioctl::KvmCpuId2::new(); self.sysfd.raw()
ioctl::kvm_get_supported_cpuid(&self.sysfd, &mut cpuid)?;
Ok(cpuid.get_entries())
}
pub fn set_cpuid2(&self, entries: Vec<KvmCpuIdEntry>) -> Result<()> {
let cpuid = ioctl::KvmCpuId2::new_from_entries(entries);
ioctl::kvm_set_cpuid2(&self.cpufd, &cpuid)?;
Ok(())
}
pub fn get_lapic(&self) -> Result<KvmLapicState> {
let mut lapic = KvmLapicState::new();
ioctl::kvm_get_lapic(&self.cpufd, &mut lapic)?;
Ok(lapic)
}
pub fn set_lapic(&self, lapic_state: &KvmLapicState) -> Result<()> {
ioctl::kvm_set_lapic(&self.cpufd, &lapic_state)?;
Ok(())
}
pub fn get_sregs(&self) -> Result<KvmSRegs> {
let mut sregs = KvmSRegs::new();
ioctl::kvm_get_sregs(&self.cpufd, &mut sregs)?;
Ok(sregs)
}
pub fn set_sregs(&self, sregs: &KvmSRegs) -> Result<()> {
ioctl::kvm_set_sregs(&self.cpufd, &sregs)?;
Ok(())
} }
pub fn get_regs(&self) -> Result<KvmRegs> { pub fn get_regs(&self) -> Result<KvmRegs> {
@ -201,29 +153,8 @@ impl KvmVcpu {
Ok(()) Ok(())
} }
pub fn set_fpu(&self, fpu: &KvmFpu) -> Result<()> {
ioctl::kvm_set_fpu(&self.cpufd, &fpu)?;
Ok(())
}
pub fn set_msrs(&self, msrs: &KvmMsrs) -> Result<()> {
ioctl::kvm_set_msrs(&self.cpufd, &msrs)?;
Ok(())
}
pub fn get_vcpu_mmap_size(&self) -> Result<usize> { pub fn get_vcpu_mmap_size(&self) -> Result<usize> {
Ok(ioctl::kvm_get_vcpu_mmap_size(&self.sysfd)? as usize) Ok(ioctl::kvm_get_vcpu_mmap_size(&self.sysfd)? as usize)
} }
pub fn setup_lapic(&self) -> Result<()> {
let mut lapic = self.get_lapic()?;
// delivery mode
lapic.regs[APIC_LVT_LINT0_OFFSET + 1] &= 0xF8;
lapic.regs[APIC_LVT_LINT0_OFFSET + 1] |= APIC_MODE_EXTINT;
lapic.regs[APIC_LVT_LINT1_OFFSET + 1] &= 0xF8;
lapic.regs[APIC_LVT_LINT1_OFFSET + 1] |= APIC_MODE_NMI;
self.set_lapic(&lapic)?;
Ok(())
}
} }

15
src/lib.rs Normal file
View File

@ -0,0 +1,15 @@
#[macro_use]
extern crate lazy_static;
#[macro_use]
mod system;
#[macro_use]
pub mod util;
mod vm;
mod memory;
mod devices;
mod kvm;
mod virtio;
mod disk;
pub use util::{Logger,LogLevel};
pub use vm::VmConfig;

View File

@ -1,21 +0,0 @@
#![allow(non_snake_case)]
#[macro_use] extern crate lazy_static;
#[macro_use] mod log;
mod vm;
#[macro_use]
mod system;
mod memory;
mod devices;
mod kvm;
mod virtio;
mod disk;
pub use log::{Logger,LogLevel};
fn main() {
vm::VmConfig::new()
.ram_size_megs(1024)
.boot();
}

View File

@ -4,9 +4,11 @@ use std::sync::{Arc, RwLock};
use crate::memory::{GuestRam, SystemAllocator, Mapping, Error, Result}; use crate::memory::{GuestRam, SystemAllocator, Mapping, Error, Result};
use crate::kvm::Kvm; use crate::kvm::Kvm;
use crate::system::{BitVec, FileDesc}; use crate::system::FileDesc;
use crate::util::BitSet;
use crate::memory::drm::{DrmBufferAllocator, DrmDescriptor}; use crate::memory::drm::{DrmBufferAllocator, DrmDescriptor};
use std::io::SeekFrom; use std::io::SeekFrom;
use crate::memory::ram::MemoryRegion;
#[derive(Clone)] #[derive(Clone)]
pub struct MemoryManager { pub struct MemoryManager {
@ -35,14 +37,16 @@ impl MemoryManager {
&self.ram &self.ram
} }
pub fn kvm_mut(&mut self) -> &mut Kvm {
&mut self.kvm
}
pub fn kvm(&self) -> &Kvm { pub fn kvm(&self) -> &Kvm {
&self.kvm &self.kvm
} }
pub fn set_ram_regions(&mut self, regions: Vec<MemoryRegion>) {
let mut devmem = self.device_memory.write().unwrap();
devmem.set_slots_occupied(0, regions.len());
self.ram.set_regions(regions);
}
pub fn register_device_memory(&self, fd: RawFd, size: usize) -> Result<(u64, u32)> { pub fn register_device_memory(&self, fd: RawFd, size: usize) -> Result<(u64, u32)> {
let mut devmem = self.device_memory.write().unwrap(); let mut devmem = self.device_memory.write().unwrap();
devmem.register(self.kvm(), fd, size) devmem.register(self.kvm(), fd, size)
@ -82,19 +86,25 @@ impl MemoryRegistration {
} }
struct DeviceMemory { struct DeviceMemory {
slots: BitVec, slots: BitSet,
mappings: HashMap<u32, MemoryRegistration>, mappings: HashMap<u32, MemoryRegistration>,
allocator: SystemAllocator, allocator: SystemAllocator,
} }
impl DeviceMemory { impl DeviceMemory {
fn new(ram_region_count: usize, allocator: SystemAllocator) -> DeviceMemory { fn new(ram_region_count: usize, allocator: SystemAllocator) -> DeviceMemory {
let mut slots = BitVec::new(); let mut devmem = DeviceMemory {
for i in 0..ram_region_count { slots: BitSet::new(),
slots.set_bit(i); mappings: HashMap::new(),
} allocator
DeviceMemory { };
slots, mappings: HashMap::new(), allocator devmem.set_slots_occupied(0, ram_region_count);
devmem
}
fn set_slots_occupied(&mut self, first: usize, count: usize) {
for i in first..first+count {
self.slots.insert(i)
} }
} }
@ -134,12 +144,16 @@ impl DeviceMemory {
} }
fn allocate_slot(&mut self) -> u32 { fn allocate_slot(&mut self) -> u32 {
let slot = self.slots.first_unset(); for i in 0.. {
self.slots.set_bit(slot); if !self.slots.get(i) {
slot as u32 self.slots.insert(i);
return i as u32;
}
}
unreachable!()
} }
fn free_slot(&mut self, slot: u32) { fn free_slot(&mut self, slot: u32) {
self.slots.clear_bit(slot as usize) self.slots.remove(slot as usize)
} }
} }

View File

@ -5,7 +5,7 @@ use std::mem;
use std::io::Write; use std::io::Write;
use std::os::unix::io::RawFd; use std::os::unix::io::RawFd;
use crate::vm::{Result,Error,ErrorKind}; use crate::system::{Result,Error};
pub struct Mapping { pub struct Mapping {
ptr: *mut u8, ptr: *mut u8,
@ -65,7 +65,7 @@ impl Mapping {
/// ///
fn check_offset(&self, offset: usize) -> Result<()> { fn check_offset(&self, offset: usize) -> Result<()> {
if offset > self.size { if offset > self.size {
Err(Error::from(ErrorKind::InvalidMappingOffset(offset))) Err(Error::InvalidOffset)
} else { } else {
Ok(()) Ok(())
} }
@ -105,7 +105,7 @@ impl Mapping {
self.check_offset(offset + bytes.len())?; self.check_offset(offset + bytes.len())?;
unsafe { unsafe {
let mut slice: &mut [u8] = &mut self.as_mut_slice()[offset..]; let mut slice: &mut [u8] = &mut self.as_mut_slice()[offset..];
slice.write_all(bytes).map_err(|_| Error::from(ErrorKind::InvalidMappingOffset(offset))) slice.write_all(bytes).map_err(|_| Error::InvalidOffset)
} }
} }
@ -138,7 +138,7 @@ impl Mapping {
pub fn set_mergeable(&self) -> Result<()> { pub fn set_mergeable(&self) -> Result<()> {
unsafe { unsafe {
if libc::madvise(self.ptr as *mut libc::c_void, self.size, libc::MADV_MERGEABLE) == -1 { if libc::madvise(self.ptr as *mut libc::c_void, self.size, libc::MADV_MERGEABLE) == -1 {
return Err(Error::from_last_errno()); return Err(Error::last_os_error());
} }
} }
Ok(()) Ok(())
@ -166,7 +166,7 @@ unsafe fn mmap_allocate(size: usize, flags: libc::c_int, fd: libc::c_int) -> Res
flags, fd, 0); flags, fd, 0);
if p.is_null() || p == libc::MAP_FAILED { if p.is_null() || p == libc::MAP_FAILED {
return Err(Error::from_last_errno()); return Err(Error::last_os_error());
} }
Ok(p as *mut u8) Ok(p as *mut u8)
} }

View File

@ -8,30 +8,24 @@ mod allocator;
pub use self::allocator::SystemAllocator; pub use self::allocator::SystemAllocator;
pub use self::address::AddressRange; pub use self::address::AddressRange;
pub use self::mmap::Mapping; pub use self::mmap::Mapping;
pub use self::ram::GuestRam; pub use self::ram::{GuestRam,MemoryRegion};
pub use self::ram::{PCI_MMIO_RESERVED_BASE,HIMEM_BASE};
pub use manager::MemoryManager; pub use manager::MemoryManager;
pub use drm::{DrmDescriptor,DrmPlaneDescriptor}; pub use drm::{DrmDescriptor,DrmPlaneDescriptor};
use crate::vm::Error as VmError;
use std::{result, fmt, io}; use std::{result, fmt, io};
use crate::system; use crate::{system, kvm};
pub const KVM_KERNEL_LOAD_ADDRESS: u64 = 0x1000000;
pub const KERNEL_CMDLINE_ADDRESS: u64 = 0x20000;
pub const KERNEL_ZERO_PAGE: u64 = 0x7000;
#[derive(Debug)] #[derive(Debug)]
pub enum Error { pub enum Error {
DeviceMemoryAllocFailed, DeviceMemoryAllocFailed,
MappingFailed(VmError), MappingFailed(system::Error),
RegisterMemoryFailed(VmError), RegisterMemoryFailed(kvm::Error),
UnregisterMemoryFailed(VmError), UnregisterMemoryFailed(kvm::Error),
GbmCreateDevice(system::Error), GbmCreateDevice(system::Error),
GbmCreateBuffer(system::Error), GbmCreateBuffer(system::Error),
OpenRenderNode(io::Error), OpenRenderNode(io::Error),
PrimeHandleToFD(system::Error), PrimeHandleToFD(system::ErrnoError),
CreateBuffer(io::Error), CreateBuffer(io::Error),
NoDrmAllocator, NoDrmAllocator,
} }

View File

@ -1,17 +1,10 @@
use std::sync::Arc; use std::sync::Arc;
use std::cmp;
use std::mem; use std::mem;
use crate::memory::Mapping; use crate::memory::{Mapping,AddressRange};
use crate::memory::mmap::Serializable; use crate::memory::mmap::Serializable;
use crate::memory::AddressRange; use crate::system::{Result, Error};
use crate::util::ByteBuffer;
use crate::kvm::Kvm;
use crate::vm::{Result,Error,ErrorKind};
pub const HIMEM_BASE: u64 = (1 << 32);
pub const PCI_MMIO_RESERVED_SIZE: usize = (512 << 20);
pub const PCI_MMIO_RESERVED_BASE: u64 = HIMEM_BASE - PCI_MMIO_RESERVED_SIZE as u64;
#[derive(Clone)] #[derive(Clone)]
pub struct GuestRam { pub struct GuestRam {
@ -20,11 +13,11 @@ pub struct GuestRam {
} }
impl GuestRam { impl GuestRam {
pub fn new(ram_size: usize, kvm: &Kvm) -> Result<GuestRam> { pub fn new(ram_size: usize) -> GuestRam {
Ok(GuestRam { GuestRam {
ram_size, ram_size,
regions: Arc::new(create_regions(kvm, ram_size)?), regions: Vec::new().into(),
}) }
} }
pub fn ram_size(&self) -> usize { pub fn ram_size(&self) -> usize {
@ -50,6 +43,11 @@ impl GuestRam {
region.slice(guest_address, size) region.slice(guest_address, size)
} }
pub fn mut_buffer(&self, guest_address: u64, size: usize) -> Result<ByteBuffer<&mut [u8]>> {
let bytes = self.mut_slice(guest_address, size)?;
Ok(ByteBuffer::from_bytes_mut(bytes))
}
pub fn mut_slice(&self, guest_address: u64, size: usize) -> Result<&mut[u8]> { pub fn mut_slice(&self, guest_address: u64, size: usize) -> Result<&mut[u8]> {
let region = self.find_region(guest_address, size)?; let region = self.find_region(guest_address, size)?;
region.mut_slice(guest_address, size) region.mut_slice(guest_address, size)
@ -65,6 +63,10 @@ impl GuestRam {
region.read_int(guest_address) region.read_int(guest_address)
} }
pub fn set_regions(&mut self, regions: Vec<MemoryRegion>) {
self.regions = regions.into();
}
#[allow(dead_code)] #[allow(dead_code)]
pub fn end_addr(&self) -> u64 { pub fn end_addr(&self) -> u64 {
self.regions.iter() self.regions.iter()
@ -78,54 +80,36 @@ impl GuestRam {
fn find_region(&self, guest_address: u64, size: usize) -> Result<&MemoryRegion> { fn find_region(&self, guest_address: u64, size: usize) -> Result<&MemoryRegion> {
self.regions.iter() self.regions.iter()
.find(|r| r.contains(guest_address, size)) .find(|r| r.contains(guest_address, size))
.ok_or_else(|| Error::from(ErrorKind::InvalidAddress(guest_address))) .ok_or(Error::InvalidAddress(guest_address))
} }
} }
fn add_region(regions: &mut Vec<MemoryRegion>, base: u64, size: usize, kvm: &Kvm) -> Result<()> { pub struct MemoryRegion {
let slot = regions.len() as u32;
let mr = MemoryRegion::new(base, size)?;
kvm.add_memory_region(slot, base, mr.mapping.address(), size)
.map_err(|e| Error::new(ErrorKind::RegisterMemoryFailed, e))?;
regions.push(mr);
Ok(())
}
fn create_regions(kvm: &Kvm, ram_size: usize) -> Result<Vec<MemoryRegion>> {
let mut regions = Vec::new();
let lowmem_sz = cmp::min(ram_size, PCI_MMIO_RESERVED_BASE as usize);
add_region(&mut regions, 0, lowmem_sz, &kvm)?;
if lowmem_sz < ram_size {
let himem_sz = ram_size - lowmem_sz;
add_region(&mut regions, HIMEM_BASE, himem_sz, &kvm)?;
}
Ok(regions)
}
struct MemoryRegion {
guest_range: AddressRange, guest_range: AddressRange,
mapping: Mapping, mapping: Mapping,
} }
impl MemoryRegion { impl MemoryRegion {
fn new(guest_base: u64, size: usize) -> Result<MemoryRegion> { pub fn new(guest_base: u64, size: usize) -> Result<MemoryRegion> {
Ok(MemoryRegion{ Ok(MemoryRegion{
guest_range: AddressRange::new(guest_base, size), guest_range: AddressRange::new(guest_base, size),
mapping: Mapping::new(size)?, mapping: Mapping::new(size)?,
}) })
} }
pub fn base_address(&self) -> u64 {
self.mapping.address()
}
fn contains(&self, guest_addr: u64, size: usize) -> bool { self.guest_range.contains(guest_addr, size) } fn contains(&self, guest_addr: u64, size: usize) -> bool { self.guest_range.contains(guest_addr, size) }
fn checked_offset(&self, guest_addr: u64, size: usize) -> Result<usize> { fn checked_offset(&self, guest_addr: u64, size: usize) -> Result<usize> {
if self.contains(guest_addr, size) { if self.contains(guest_addr, size) {
Ok(self.guest_range.offset_of(guest_addr)) Ok(self.guest_range.offset_of(guest_addr))
} else { } else {
Err(Error::from(ErrorKind::InvalidAddress(guest_addr))) Err(Error::InvalidAddress(guest_addr))
} }
} }

View File

@ -1,78 +0,0 @@
use std::ops::Index;
pub struct BitVec {
blocks: Vec<u64>,
}
impl BitVec {
pub fn new() -> BitVec {
BitVec { blocks: Vec::new() }
}
pub fn set_bit(&mut self, idx: usize) {
*self.mut_block(idx) |= Self::shifted_bit(idx)
}
pub fn clear_bit(&mut self, idx: usize) {
if self.blocks.len() > Self::block_idx(idx) {
let bit = Self::shifted_bit(idx);
*self.mut_block(idx) &= !bit;
}
}
pub fn get_bit(&self, n: usize) -> bool {
let off = n % 64;
let bit = 1 << off as u64;
self.get_block(n) & bit != 0
}
pub fn first_unset(&self) -> usize {
for (i,block) in self.blocks.iter().enumerate() {
if *block != u64::max_value() {
return (i * 64) + (0..64).find(|n| Self::shifted_bit(*n) & *block == 0).expect("...");
}
}
self.blocks.len() * 64
}
fn shifted_bit(idx: usize) -> u64 {
let shift = (idx % 64) as u64;
(1 << shift)
}
fn block_idx(idx: usize) -> usize {
idx / 64
}
fn get_block(&self, idx: usize) -> u64 {
let idx = Self::block_idx(idx);
if self.blocks.len() > idx {
self.blocks[idx]
} else {
0
}
}
fn mut_block(&mut self, idx: usize) -> &mut u64 {
let idx = Self::block_idx(idx);
if self.blocks.len() <= idx {
self.blocks.resize_with(idx + 1, Default::default);
}
&mut self.blocks[idx]
}
}
static TRUE: bool = true;
static FALSE: bool = false;
impl Index<usize> for BitVec {
type Output = bool;
fn index(&self, index: usize) -> &Self::Output {
if self.get_bit(index) {
&TRUE
} else {
&FALSE
}
}
}

View File

@ -79,7 +79,7 @@ impl EPoll {
libc::epoll_wait(self.fd, events.events_ptr(), nevents, timeout) libc::epoll_wait(self.fd, events.events_ptr(), nevents, timeout)
}; };
if ret == -1 && Error::last_os_error() != Error::from_raw_os_error(EINTR) { if ret == -1 && Error::last_errno() != EINTR {
return Err(Error::last_os_error()); return Err(Error::last_os_error());
} else if ret as usize > events.len() { } else if ret as usize > events.len() {
return Err(Error::from_raw_os_error(EINVAL)); return Err(Error::from_raw_os_error(EINVAL));

View File

@ -13,8 +13,16 @@ impl Error {
Error(e) Error(e)
} }
pub fn errno(self) -> i32 {
self.0
}
pub fn last_os_error() -> Error { pub fn last_os_error() -> Error {
Error(unsafe { *__errno_location() }) Error(Self::last_errno())
}
pub fn last_errno() -> i32 {
unsafe { *__errno_location() }
} }
pub fn is_interrupted(&self) -> bool { pub fn is_interrupted(&self) -> bool {
@ -42,10 +50,6 @@ impl From<Error> for io::Error {
} }
} }
pub fn errno_result<T>() -> Result<T> {
Err(Error::last_os_error())
}
pub fn cvt<T: IsMinusOne>(t: T) -> Result<T> { pub fn cvt<T: IsMinusOne>(t: T) -> Result<T> {
if t.is_minus_one() { if t.is_minus_one() {
Err(Error::last_os_error()) Err(Error::last_os_error())

54
src/system/eventfd.rs Normal file
View File

@ -0,0 +1,54 @@
use std::os::unix::io::{RawFd,AsRawFd};
use libc;
use crate::system::{Result,Error};
pub struct EventFd(RawFd);
const U64_SZ: usize = 8;
impl EventFd {
pub fn new() -> Result<EventFd> {
let fd = unsafe { libc::eventfd(0, 0) };
if fd < 0 {
return Err(Error::last_os_error());
}
Ok(EventFd(fd))
}
pub fn write(&self, v: u64) -> Result<()> {
let ret = unsafe { libc::write(self.0, &v as *const _ as *const libc::c_void, U64_SZ) };
if ret as usize != U64_SZ {
if ret < 0 {
return Err(Error::last_os_error())
}
return Err(Error::EventFdWrite);
}
Ok(())
}
pub fn read(&self) -> Result<u64> {
let mut v = 0u64;
let ret = unsafe { libc::read(self.0, &mut v as *mut _ as *mut libc::c_void, U64_SZ) };
if ret as usize != U64_SZ {
if ret < 0 {
return Err(Error::last_os_error());
}
return Err(Error::EventFdRead);
}
Ok(v)
}
}
impl Drop for EventFd {
fn drop(&mut self) {
let _ = unsafe { libc::close(self.0) };
}
}
impl AsRawFd for EventFd {
fn as_raw_fd(&self) -> RawFd {
self.0
}
}

View File

@ -1,6 +1,6 @@
use libc::{self, c_ulong, c_void}; use libc::{self, c_ulong, c_void};
use std::os::unix::io::RawFd; use std::os::unix::io::RawFd;
use crate::system::{Error,Result}; use crate::system::errno::{Result,Error};
pub const IOC_SIZEBITS: u64 = 14; pub const IOC_SIZEBITS: u64 = 14;
pub const IOC_DIRBITS: u64 = 2; pub const IOC_DIRBITS: u64 = 2;

View File

@ -2,7 +2,7 @@ use std::ffi::CString;
use std::io::SeekFrom; use std::io::SeekFrom;
use std::os::unix::io::{RawFd,AsRawFd}; use std::os::unix::io::{RawFd,AsRawFd};
use crate::system::{Result, FileDesc, errno_result}; use crate::system::{Error,Result, FileDesc};
use libc::{ use libc::{
self, c_char, c_uint, c_int, c_long,SYS_memfd_create, self, c_char, c_uint, c_int, c_long,SYS_memfd_create,
@ -52,7 +52,7 @@ impl MemoryFd {
let name = name.as_ptr() as *const c_char; let name = name.as_ptr() as *const c_char;
let fd = unsafe { libc::syscall(SYS_memfd_create as c_long, name, flags) } as c_int; let fd = unsafe { libc::syscall(SYS_memfd_create as c_long, name, flags) } as c_int;
if fd < 0 { if fd < 0 {
errno_result() Err(Error::last_os_error())
} else { } else {
Ok(FileDesc::new(fd)) Ok(FileDesc::new(fd))
} }
@ -61,7 +61,7 @@ impl MemoryFd {
fn add_seals(&self, flags: c_int) -> Result<()> { fn add_seals(&self, flags: c_int) -> Result<()> {
let ret = unsafe { libc::fcntl(self.fd.as_raw_fd(), libc::F_ADD_SEALS, flags) }; let ret = unsafe { libc::fcntl(self.fd.as_raw_fd(), libc::F_ADD_SEALS, flags) };
if ret < 0 { if ret < 0 {
errno_result() Err(Error::last_os_error())
} else { } else {
Ok(()) Ok(())
} }

View File

@ -1,19 +1,100 @@
#[macro_use]pub mod ioctl; #[macro_use]pub mod ioctl;
mod epoll; mod epoll;
mod errno; mod errno;
mod bitvec; mod eventfd;
mod socket; mod socket;
mod filedesc; mod filedesc;
mod memfd; mod memfd;
mod tap; mod tap;
pub mod netlink; pub mod netlink;
pub use bitvec::BitVec;
pub use filedesc::{FileDesc, FileFlags}; pub use filedesc::{FileDesc, FileFlags};
pub use eventfd::EventFd;
pub use memfd::MemoryFd; pub use memfd::MemoryFd;
pub use epoll::{EPoll,Event}; pub use epoll::{EPoll,Event};
pub use errno::{Error,Result,errno_result};
pub use socket::ScmSocket; pub use socket::ScmSocket;
pub use netlink::NetlinkSocket; pub use netlink::NetlinkSocket;
pub use tap::Tap; pub use tap::Tap;
use std::{fmt, result, io};
pub use errno::Error as ErrnoError;
pub type Result<T> = result::Result<T, Error>;
#[derive(Debug)]
pub enum Error {
Errno(errno::Error),
OpenKvmFailed(errno::Error),
InvalidOffset,
InvalidAddress(u64),
IoctlError(&'static str, errno::Error),
EventFdWrite,
EventFdRead,
}
impl Error {
pub fn last_os_error() -> Error {
Error::Errno(errno::Error::last_os_error())
}
pub fn last_errno() -> i32 {
errno::Error::last_errno()
}
pub fn from_raw_os_error(e: i32) -> Error {
Error::Errno(errno::Error::from_raw_os_error(e))
}
pub fn inner_err(&self) -> Option<&errno::Error> {
match self {
Error::IoctlError(_,e) => Some(e),
Error::Errno(e) => Some(e),
Error::OpenKvmFailed(e) => Some(e),
_ => None,
}
}
pub fn is_interrupted(&self) -> bool {
self.inner_err()
.map(|e| e.is_interrupted())
.unwrap_or(false)
}
}
impl std::error::Error for Error {}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use Error::*;
match self {
Errno(err) => err.fmt(f),
InvalidOffset => write!(f, "attempt to access invalid offset into mapping"),
InvalidAddress(addr) => write!(f, "attempt to access invalid address: {0:16x}", addr),
OpenKvmFailed(err) => write!(f, "failed to open /dev/kvm: {}", err),
IoctlError(name, err) => write!(f, "failed to call {} ioctl: {}", name, err),
EventFdWrite => write!(f, "failed writing to eventfd"),
EventFdRead => write!(f, "failed reading from eventfd"),
}
}
}
impl From<errno::Error> for Error {
fn from(err: errno::Error) -> Error {
Error::Errno(err)
}
}
impl From<io::Error> for Error {
fn from(e: io::Error) -> Self {
Error::from_raw_os_error(e.raw_os_error().unwrap_or_default())
}
}
impl From<Error> for io::Error {
fn from(e: Error) -> Self {
match e {
Error::Errno(e) => io::Error::from_raw_os_error(e.errno()),
e => io::Error::new(io::ErrorKind::Other, e),
}
}
}

77
src/util/bitvec.rs Normal file
View File

@ -0,0 +1,77 @@
/// An efficiently stored array (or set) of bits.
///
/// Bits can be set, cleared, or tested by index into the
/// array of bits. Since the methods are named to follow
/// the set collection convention you can also think of
/// it as a set which stores `usize` index values.
///
pub struct BitSet {
blocks: Vec<u64>,
}
impl BitSet {
/// Create a new empty `BitSet`
pub fn new() -> BitSet {
BitSet { blocks: Vec::new() }
}
/// Removes all entries from the set.
pub fn clear(&mut self) {
self.blocks.clear();
}
/// Inserts a bit into the set. Sets the entry at `idx` to `true`.
pub fn insert(&mut self, idx: usize) {
let (bit,block) = Self::bit_and_block(idx);
*self.block_mut(block) |= bit;
}
/// Removes a bit from the set. Sets the entry at `idx` to `false`.
pub fn remove(&mut self, idx: usize) {
let (bit,block) = Self::bit_and_block(idx);
if self.blocks.len() > block {
*self.block_mut(block) &= !bit;
}
}
/// Returns the value of the bit at `idx`
pub fn get(&self, idx: usize) -> bool {
let (bit,block) = Self::bit_and_block(idx);
if self.block(block) & bit != 0 {
return true;
}
false
}
/// Convert a bit index `idx` into an index into
/// the block array and the corresponding bit value
/// inside of that block.
fn bit_and_block(idx: usize) -> (u64, usize) {
const SHIFT64: usize = 6;
const MASK64: usize = (1 << SHIFT64) - 1;
let bit = (1usize << (idx & MASK64)) as u64;
let block = idx >> SHIFT64;
(bit, block)
}
/// Returns value stored at index `blk` or returns 0 if `blk`
/// is index larger than block array.
fn block(&self, blk: usize) -> u64 {
if self.blocks.len() > blk {
self.blocks[blk]
} else {
0
}
}
/// Returns mutable reference to value stored at index `blk`
/// and will resize block vector if index is larger than block
/// array.
fn block_mut(&mut self, blk: usize) -> &mut u64 {
if self.blocks.len() <= blk {
self.blocks.resize_with(blk + 1, Default::default);
}
&mut self.blocks[blk]
}
}

386
src/util/buffer.rs Normal file
View File

@ -0,0 +1,386 @@
/// Wraps a block of bytes and provides an interface for reading/writing integers and byte slices.
///
/// The inner type `<T>` be a `Vec[u8]` a slice `&[u8]` or a mutable slice `&mut [u8]`.
///
/// Methods for reading data are provided for all inner object types, and for vectors and mutable slices
/// methods are also available for writing into the buffer.
///
/// Reading from and writing to the buffer can either be at an absolute offset passed or
/// at the current offset. When using the current offset methods, the current offset will
/// be advanced by the size of the object read or written.
///
/// The default endian ordering for integers read from or written to the buffer is the native
/// ordering of the system. Use `self.big_endian()` or `self.little_endian()` to set a specific
/// byte ordering.
pub struct ByteBuffer<T> {
/// Byte-order of integers stored in this buffer
endian: Endian,
/// Current offset for reading or writing.
offset: usize,
/// The block of bytes wrapped by this buffer
inner: T,
}
impl <T: AsMut<[u8]>> ByteBuffer<T> {
/// Return a mutable slice of length `len` starting at `offset` into the buffer.
pub fn mut_at(&mut self, offset: usize, len: usize) -> &mut [u8] {
&mut self.inner.as_mut()[offset..offset+len]
}
/// Write an integer or a `&[u8]` slice at the specified `offset` into the buffer.
///
/// For integers, the type may be any of: u8, u16, u32, u64
///
pub fn write_at<V: Writeable>(&mut self, offset: usize, val: V) -> &mut Self {
let sz = val.size();
let endian = self.endian;
val.write(self.mut_at(offset, sz), endian);
self
}
}
impl <T: AsRef<[u8]>> ByteBuffer<T> {
/// Return a slice of length `len` starting at `offset` into the buffer.
///
/// # Panics
///
/// Panics if `offset + len` exceeds size of buffer.
///
pub fn ref_at(&self, offset: usize, len: usize) -> &[u8] {
&self.inner.as_ref()[offset..offset+len]
}
pub fn as_ref(&self) -> &[u8] {
&self.inner.as_ref()
}
/// Read and return an integer value from the current offset and increment
/// the current offset by the byte size of the integer type.
///
/// The integer type `V` may be any of: u8, u16, u32, u64
///
/// # Panics
///
/// Panics if byte size of integer type added to current offset exceeds size
/// of buffer.
///
/// # Examples
/// ```
/// use ph::util::ByteBuffer;
///
/// let bytes = &[0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF];
/// let mut buffer = ByteBuffer::from_bytes(bytes).big_endian();
///
/// let n16 = buffer.read::<u16>();
/// let n32: u32 = buffer.read();
///
/// assert_eq!(n16, 0xAABB);
/// assert_eq!(n32, 0xCCDDEEFF);
///
/// ```
pub fn read<V: Readable>(&mut self) -> V {
let offset = self.offset;
self.offset += V::SIZE;
self.read_at(offset)
}
/// Read and return an integer value from the specified `offset` into the buffer.
///
/// The integer type `V` may be any of: u8, u16, u32, u64
///
/// # Panics
///
/// Panics if byte size of integer type added to `offset` exceeds size
/// of buffer.
///
/// # Examples
/// ```
/// use ph::util::ByteBuffer;
///
/// let bytes = &[0xAA, 0xBB, 0xCC, 0xDD, 0xEE, 0xFF];
/// let mut buffer = ByteBuffer::from_bytes(bytes).big_endian();
///
/// let n8 = buffer.read_at::<u8>(5);
/// let n16: u16 = buffer.read_at(2);
/// let n32: u32 = buffer.read_at(0);
///
/// assert_eq!(n8, 0xFF);
/// assert_eq!(n16, 0xCCDD);
/// assert_eq!(n32, 0xAABBCCDD);
/// ```
///
pub fn read_at<V: Readable>(&self, offset: usize) -> V {
let endian = self.endian;
V::read(self.ref_at(offset, V::SIZE), endian)
}
/// Copy from the current offset into the slice `bytes` and increment the current
/// offset by the size of `bytes`
///
/// # Panics
///
/// Panics if `bytes.len()` added to current offset exceeds size of buffer.
///
pub fn read_bytes(&mut self, bytes: &mut [u8]) {
let offset = self.offset;
self.offset += bytes.len();
self.read_bytes_at(offset, bytes);
}
/// Copy from the specified offset into the slice `bytes`
///
/// # Panics
///
/// Panics if `bytes.len() + offset` exceeds size of buffer.
///
pub fn read_bytes_at(&self, offset: usize, bytes: &mut [u8]) {
bytes.copy_from_slice(self.ref_at(offset, bytes.len()));
}
}
impl <T> ByteBuffer<T> {
fn new_with(inner: T) -> Self {
ByteBuffer {
endian: Endian::Native,
offset: 0,
inner,
}
}
/// Set the current offset into the buffer to the value `offset`
///
/// # Examples
///
/// ```
/// use ph::util::ByteBuffer;
///
/// let mut buffer = ByteBuffer::from_bytes(&[0xAA, 0xBB, 0xCC, 0xDD]).big_endian();
///
/// buffer.set_offset(2);
/// let n: u8 = buffer.read();
/// assert_eq!(n, 0xCC);
///
/// buffer.set_offset(1);
/// let n: u16 = buffer.read();
/// assert_eq!(n, 0xBBCC);
///
/// ```
pub fn set_offset(&mut self, offset: usize) {
self.offset = offset;
}
/// Configure this `ByteBuffer` instance to write integers in big-endian byte order
///
/// Caller must chain this to call to constructor because it consumes and returns
/// `self` argument.
///
/// # Examples
///
/// ```
/// use ph::util::ByteBuffer;
///
/// let mut buffer = ByteBuffer::from_bytes(&[0xAA, 0xBB, 0xCC, 0xDD])
/// .big_endian();
///
/// let n: u32 = buffer.read();
///
/// assert_eq!(n, 0xAABBCCDD);
/// ```
///
pub fn big_endian(mut self) -> Self {
self.endian = Endian::Big;
self
}
/// Configure this `ByteBuffer` instance to write integers in little-endian byte order
///
/// Caller must chain this to call to constructor because it consumes and returns
/// `self` argument.
///
/// # Examples
///
/// ```
/// use ph::util::ByteBuffer;
///
/// let mut buffer = ByteBuffer::from_bytes(&[0xAA, 0xBB, 0xCC, 0xDD])
/// .little_endian();
///
/// let n: u32 = buffer.read();
/// assert_eq!(n, 0xDDCCBBAA);
///
/// let n: u16 = buffer.read_at(2);
/// assert_eq!(n, 0xDDCC);
/// ```
///
pub fn little_endian(mut self) -> Self {
self.endian = Endian::Little;
self
}
}
impl <'a> ByteBuffer<&'a [u8]> {
/// Create a new read-only `ByteBuffer` from the slice `bytes`
pub fn from_bytes(bytes: &'a [u8]) -> Self {
ByteBuffer::new_with(bytes)
}
/// Return the byte length of the inner slice;
pub fn len(&self) -> usize {
self.inner.len()
}
}
impl <'a> ByteBuffer<&'a mut [u8]> {
/// Create a new `ByteBuffer` from the mutable slice `bytes`
pub fn from_bytes_mut(bytes: &'a mut [u8]) -> Self {
ByteBuffer::new_with(bytes)
}
/// Write an integer or a `&[u8]` slice at the current offset and increment
/// the current offset by the size of `val`.
///
/// For integers, the type may be any of: u8, u16, u32, u64
///
pub fn write<V: Writeable>(&mut self, val: V) -> &mut Self {
let offset = self.offset;
self.offset += val.size();
self.write_at(offset, val)
}
/// Return the byte length of the inner slice;
pub fn len(&self) -> usize {
self.inner.len()
}
}
impl ByteBuffer<Vec<u8>> {
/// Create a `size` length byte buffer and initialize the entire buffer with
/// `0u8` (zero bytes).
pub fn new(size: usize) -> Self {
Self::from_vec(vec![0u8; size])
}
/// Create an empty buffer (`self.len() == 0`) with an inner vector instance.
///
/// Data can be appended to this buffer with `self.write()`
///
pub fn new_empty() -> Self {
Self::from_vec(Vec::new())
}
/// Create a buffer from a `Vec<u8>`
pub fn from_vec(vec: Vec<u8>) -> Self {
Self::new_with(vec)
}
/// Returns the byte length of the inner vector.
pub fn len(&self) -> usize {
self.inner.len()
}
/// Write an integer or a `&[u8]` slice at the current offset and increment
/// the current offset by the size of `val`.
///
/// For integers, the type may be any of: u8, u16, u32, u64
///
/// If the size of the integer type added to the current offset exceeds
/// the length of the vector, the vector will be resized.
///
/// # Examples
/// ```
/// use ph::util::ByteBuffer;
///
/// let mut buf = ByteBuffer::new_empty().big_endian();
///
/// assert_eq!(buf.len(), 0);
///
/// let n: u32 = 0xAABBCCDD;
///
/// buf.write(n);
///
/// assert_eq!(buf.as_ref(), &[0xAA, 0xBB, 0xCC, 0xDD]);
///
/// buf.write(n);
///
/// assert_eq!(buf.len(), 8);
///
/// ```
pub fn write<V: Writeable>(&mut self, val: V) -> &mut Self {
let offset = self.offset;
self.offset += val.size();
if self.offset > self.inner.len() {
self.inner.resize(self.offset, 0);
}
self.write_at(offset, val)
}
}
/// The byte-order configuration of a `ByteBuffer`
#[derive(Copy,Clone,Debug)]
pub enum Endian {
Big,
Little,
Native,
}
/// An object type which can be read from a `ByteBuffer` with the
/// `self.read()` or `self.read_at()` methods.
pub trait Readable {
const SIZE: usize;
fn read(bytes: &[u8], endian: Endian) -> Self;
}
/// An object type which can be written to a `ByteBuffer` with the
/// `self.write(val)` or `self.write_at(val)` methods.
pub trait Writeable {
fn size(&self) -> usize;
fn write(&self, bytes: &mut [u8], endian: Endian);
}
impl Writeable for &[u8] {
fn size(&self) -> usize {
self.len()
}
fn write(&self, bytes: &mut [u8], _endian: Endian) {
bytes.copy_from_slice(self);
}
}
macro_rules! storeable_int {
{$T:ty} => {
impl Writeable for $T {
fn size(&self) -> usize {
::std::mem::size_of::<$T>()
}
fn write(&self, bytes: &mut [u8], endian: Endian) {
bytes.copy_from_slice(&match endian {
Endian::Big => self.to_be_bytes(),
Endian::Little => self.to_le_bytes(),
Endian::Native => self.to_ne_bytes(),
});
}
}
impl Readable for $T {
const SIZE: usize = ::std::mem::size_of::<$T>();
fn read(bytes: &[u8], endian: Endian) -> Self {
let mut buf = [0u8; Self::SIZE];
buf.copy_from_slice(&bytes[..Self::SIZE]);
match endian {
Endian::Big => <$T>::from_be_bytes(buf),
Endian::Little=> <$T>::from_le_bytes(buf),
Endian::Native=> <$T>::from_ne_bytes(buf),
}
}
}
}
}
storeable_int!(u8);
storeable_int!(u16);
storeable_int!(u32);
storeable_int!(u64);

8
src/util/mod.rs Normal file
View File

@ -0,0 +1,8 @@
mod bitvec;
mod buffer;
#[macro_use]
mod log;
pub use bitvec::BitSet;
pub use buffer::ByteBuffer;
pub use log::{Logger,LogLevel};

View File

@ -5,7 +5,7 @@ use crate::memory::{AddressRange, MemoryManager};
use super::{VirtioDevice,VirtioDeviceOps,PciIrq}; use super::{VirtioDevice,VirtioDeviceOps,PciIrq};
use super::consts::*; use super::consts::*;
use super::pci::PciBus; use super::pci::PciBus;
use crate::vm::Result; use crate::virtio::Result;
use std::iter; use std::iter;

View File

@ -1,291 +1,312 @@
use std::fmt;
use std::io::{self,Read,Write}; use std::io::{self,Read,Write};
use crate::memory::GuestRam; use crate::memory::GuestRam;
use super::VirtQueue; use crate::virtio::VirtQueue;
use super::vring::Descriptor; use crate::virtio::vring::Descriptor;
use byteorder::{WriteBytesExt, LittleEndian, ReadBytesExt};
pub struct Chain {
struct DescriptorList {
memory: GuestRam, memory: GuestRam,
descriptors: Vec<Descriptor>,
vq: VirtQueue,
/// Number of remaining descriptors allowed in this chain.
ttl: u16,
/// Current descriptor or `None` if at end of chain
current: Option<Descriptor>,
/// Offset for read/write into current descriptor
offset: usize, offset: usize,
total_size: usize,
/// Saved head index to place in used ring. Set to `None` consumed_size: usize,
/// after writing to used ring.
head_idx: Option<u16>,
/// Number of bytes written into writeable descriptors
/// in this chain. Will be written into used ring later.
wlen: usize,
} }
impl DescriptorList {
impl Chain { fn new(memory: GuestRam) -> Self {
pub fn new(memory: GuestRam, vq: VirtQueue, head: u16, ttl: u16) -> Chain { DescriptorList {
let first = vq.load_descriptor(head);
Chain {
memory, memory,
vq, ttl, head_idx: Some(head), descriptors: Vec::new(),
current: first, offset: 0,
offset: 0, wlen: 0, total_size: 0,
consumed_size: 0,
} }
} }
/// Applies a function to the current descriptor (if `Some`) or fn add_descriptor(&mut self, d: Descriptor) {
/// returns default parameter `d` (if `None`). self.total_size += d.len as usize;
pub fn with_current_descriptor<U,F>(&self, d: U, f: F) -> U self.descriptors.push(d)
where F: FnOnce(&Descriptor) -> U {
match self.current {
Some(ref desc) => f(desc),
None => d,
}
} }
/// Load and return next descriptor from chain. fn reverse(&mut self) {
/// self.descriptors.reverse();
/// If `self.current` }
///
/// 1) holds a descriptor (`self.current.is_some()`) fn clear(&mut self) {
/// 2) that descriptor has a next field (`desc.has_next()`) self.descriptors.clear();
/// 3) time-to-live is not zero (`self.ttl > 0`) self.offset = 0;
/// }
/// then load and return the descriptor pointed to by the current
/// descriptor. Returns `None` otherwise. fn is_empty(&self) -> bool {
/// self.descriptors.is_empty()
fn next_desc(&self) -> Option<Descriptor> { }
self.with_current_descriptor(None, |desc| {
if desc.has_next() && self.ttl > 0 { fn current(&self) -> Option<&Descriptor> {
self.vq.load_descriptor(desc.next) self.descriptors.last()
}
fn current_address(&self, size: usize) -> Option<u64> {
self.current().and_then(|d| {
if d.remaining(self.offset) >= size {
Some(d.addr + self.offset as u64)
} else { } else {
None None
} }
}) })
} }
/// Load next descriptor in chain into `self.current`. fn inc(&mut self, len: usize) {
/// let d = match self.current() {
/// Set `self.current` to the next descriptor in chain or `None` if Some(d) => d,
/// at end of chain. None => {
/// warn!("Virtqueue increment called with no current descriptor");
pub fn load_next_descriptor(&mut self) { return;
self.current = self.next_desc(); }
// Only decrement ttl if a new descriptor was loaded };
if self.current.is_some() { let remaining = d.remaining(self.offset);
self.ttl -= 1; if len > remaining {
warn!("Virtqueue descriptor buffer increment exceeds current size");
} }
self.offset = 0; if len >= remaining {
} self.consumed_size += remaining;
self.offset = 0;
/// self.descriptors.pop();
/// Return `true` if current descriptor exists and is readable, otherwise } else {
/// `false`. self.consumed_size += len;
/// self.offset += len;
pub fn is_current_readable(&self) -> bool {
self.with_current_descriptor(false, |desc| !desc.is_write())
}
///
/// If `current` is a writeable descriptor, keep loading new descriptors until
/// a readable descriptor is found or end of chain is reached. After this
/// call `current` will either be a readable descriptor or `None` if the
/// end of chain was reached.
///
pub fn skip_readable(&mut self) {
while self.is_current_readable() {
self.load_next_descriptor();
} }
} }
/// Return `true` if the end of the descriptor chain has been reached. fn read(&mut self, buf: &mut [u8]) -> usize {
/// if let Some(d) = self.current() {
/// When at end of chain `self.current` is `None`. let n = d.read_from(&self.memory, self.offset, buf);
pub fn is_end_of_chain(&self) -> bool { self.inc(n);
self.current.is_none() return n;
}
0
} }
/// fn write(&mut self, buf: &[u8]) -> usize {
/// Length field of current descriptor is returned or 0 if if let Some(d) = self.current() {
/// at end of chain. let n = d.write_to(&self.memory, self.offset, buf);
/// self.inc(n);
fn current_size(&self) -> usize { return n;
self.with_current_descriptor(0, |desc| desc.len as usize) }
0
} }
/// fn write_from_reader<R>(&mut self, reader: R, size: usize) -> io::Result<usize>
/// Increment `self.offset` with the number of bytes where R: Read+Sized
/// read or written from `current` descriptor and {
/// load next descriptor if `current` descriptor if let Some(d) = self.current() {
/// has been fully consumed. let n = d.write_from_reader(&self.memory, self.offset, reader, size)?;
/// self.inc(n);
fn _inc_offset(&mut self, sz: usize) { Ok(n)
self.offset += sz; } else {
if self.offset >= self.current_size() { Ok(0)
self.load_next_descriptor();
} }
} }
pub fn inc_offset(&mut self, sz: usize, write: bool) { fn current_slice(&self) -> &[u8] {
if write { if let Some(d) = self.current() {
assert!(!self.is_current_readable()); let size = d.remaining(self.offset);
self.wlen += sz; let addr = d.addr + self.offset as u64;
} self.memory.slice(addr, size).unwrap_or(&[])
self._inc_offset(sz) } else {
} &[]
///
/// Read from the `current` readable descriptor and return
/// the number of bytes read.
///
/// If this read exhausts the `current` descriptor then the
/// next descriptor in chain will be loaded into `current`.
///
/// Assumes that current is a readable descriptor so caller must
/// call `self.is_current_readable()` before calling this.
///
fn read_current(&mut self, bytes: &mut[u8]) -> usize {
assert!(self.is_current_readable());
let nread = self.with_current_descriptor(0, |desc| {
desc.read_from(&self.memory, self.offset, bytes)
});
self._inc_offset(nread);
nread
}
///
/// Write into the `current` writeable descriptor if it exists
/// and return the number of bytes read or 0 if at end of chain.
///
/// If this write exausts the `current` descriptor then the
/// next descriptor in chain will be loaded into `current`
///
/// Assumes that `current` is a writeable descriptor or `None`
/// so caller must call `self.skip_readable()` before calling this.
///
fn write_current(&mut self, bytes: &[u8]) -> usize {
assert!(!self.is_current_readable());
let sz = self.with_current_descriptor(0, |desc| {
desc.write_to(&self.memory, self.offset, bytes)
});
self._inc_offset(sz);
sz
}
///
/// Write this chain head index (`self.head_idx`) and bytes written (`self.wlen`)
/// into used ring. Consumes `self.head_idx` so that used ring cannot
/// accidentally be written more than once. Since we have returned this
/// chain to the guest, it is no longer valid to access any descriptors in
/// this chain so `self.current` is set to `None`.
///
pub fn flush_chain(&mut self) {
match self.head_idx {
Some(idx) => self.vq.put_used(idx, self.wlen as u32),
None => (),
}
self.current = None;
self.head_idx = None;
}
pub fn current_write_address(&mut self, size: usize) -> Option<u64> {
self.skip_readable();
self.current_address(size)
}
pub fn current_address(&mut self, size: usize) -> Option<u64> {
self.with_current_descriptor(None, |desc| {
if desc.len as usize - self.offset < size {
None
} else {
Some(desc.addr + self.offset as u64)
}
})
}
pub fn get_wlen(&self) -> usize {
self.wlen
}
#[allow(dead_code)]
pub fn debug(&self) {
self.with_current_descriptor((), |desc| {
println!("offset: {} desc: {:?}", self.offset, desc);
});
}
pub fn copy_from_reader<R: Read+Sized>(&mut self, r: R, size: usize) -> io::Result<usize> {
self.skip_readable();
assert!(!self.is_current_readable());
let res = self.with_current_descriptor(Ok(0usize), |desc| {
desc.write_from_reader(&self.memory, self.offset,r, size)
});
if let Ok(nread) = res {
self._inc_offset(nread);
self.wlen += nread;
}
res
}
pub fn current_write_slice(&self) -> &mut [u8] {
match self.current {
Some(d) if d.is_write() && d.remaining(self.offset) > 0 => {
let size = d.remaining(self.offset);
self.memory.mut_slice(d.addr + self.offset as u64, size).unwrap_or(&mut [])
},
_ => &mut [],
} }
} }
pub fn current_read_slice(&self) -> &[u8] {
match self.current { fn current_mut_slice(&self) -> &mut [u8] {
Some(d) if !d.is_write() && d.remaining(self.offset) > 0 => { if let Some(d) = self.current() {
let size = d.remaining(self.offset); let size = d.remaining(self.offset);
self.memory.slice(d.addr + self.offset as u64, size).unwrap_or(&[]) let addr = d.addr + self.offset as u64;
}, self.memory.mut_slice(addr, size).unwrap_or(&mut [])
_ => &[], } else {
&mut []
} }
} }
fn remaining(&self) -> usize {
self.total_size - self.consumed_size
}
}
impl fmt::Debug for DescriptorList {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "DList[size={}, [", self.total_size)?;
for d in self.descriptors.iter().rev() {
write!(f, "(#{}, 0x{:08x}, [{}]),", d.idx, d.addr, d.len)?;
}
write!(f, "]")
}
}
pub struct Chain {
head: Option<u16>,
vq: VirtQueue,
readable: DescriptorList,
writeable: DescriptorList,
}
impl Chain {
pub fn new(memory: GuestRam, vq: VirtQueue, head: u16, ttl: u16) -> Self {
let (readable,writeable) = Self::load_descriptors(memory, &vq, head, ttl);
Chain {
head: Some(head),
vq,
readable,
writeable,
}
}
fn load_descriptors(memory: GuestRam, vq: &VirtQueue, head: u16, ttl: u16) -> (DescriptorList, DescriptorList) {
let mut readable = DescriptorList::new(memory.clone());
let mut writeable = DescriptorList::new(memory);
let mut idx = head;
let mut ttl = ttl;
while let Some(d) = vq.load_descriptor(idx) {
if ttl == 0 {
warn!("Descriptor chain length exceeded ttl");
break;
} else {
ttl -= 1;
}
if d.is_write() {
writeable.add_descriptor(d);
} else {
if !writeable.is_empty() {
warn!("Guest sent readable virtqueue descriptor after writeable descriptor in violation of specification");
}
readable.add_descriptor(d);
}
if !d.has_next() {
break;
}
idx = d.next;
}
readable.reverse();
writeable.reverse();
return (readable, writeable);
}
pub fn w8(&mut self, n: u8) -> io::Result<()> { pub fn w8(&mut self, n: u8) -> io::Result<()> {
self.write_u8(n) self.write_all(&[n])?;
Ok(())
} }
#[allow(unused)]
pub fn w16(&mut self, n: u16) -> io::Result<()> { pub fn w16(&mut self, n: u16) -> io::Result<()> {
self.write_u16::<LittleEndian>(n) self.write_all(&n.to_le_bytes())?;
Ok(())
} }
pub fn w32(&mut self, n: u32) -> io::Result<()> { pub fn w32(&mut self, n: u32) -> io::Result<()> {
self.write_u32::<LittleEndian>(n) self.write_all(&n.to_le_bytes())?;
Ok(())
} }
pub fn w64(&mut self, n: u64) -> io::Result<()> { pub fn w64(&mut self, n: u64) -> io::Result<()> {
self.write_u64::<LittleEndian>(n) self.write_all(&n.to_le_bytes())?;
Ok(())
} }
#[allow(unused)]
pub fn r16(&mut self) -> io::Result<u16> { pub fn r16(&mut self) -> io::Result<u16> {
self.read_u16::<LittleEndian>() let mut buf = [0u8; 2];
self.read_exact(&mut buf)?;
Ok(u16::from_le_bytes(buf))
} }
pub fn r32(&mut self) -> io::Result<u32> { pub fn r32(&mut self) -> io::Result<u32> {
self.read_u32::<LittleEndian>() let mut buf = [0u8; 4];
self.read_exact(&mut buf)?;
Ok(u32::from_le_bytes(buf))
} }
pub fn r64(&mut self) -> io::Result<u64> { pub fn r64(&mut self) -> io::Result<u64> {
self.read_u64::<LittleEndian>() let mut buf = [0u8; 8];
self.read_exact(&mut buf)?;
Ok(u64::from_le_bytes(buf))
}
pub fn flush_chain(&mut self) {
if let Some(head) = self.head.take() {
self.readable.clear();
self.writeable.clear();
self.vq.put_used(head, self.writeable.consumed_size as u32);
}
}
pub fn current_write_address(&mut self, size: usize) -> Option<u64> {
self.writeable.current_address(size)
}
pub fn remaining_read(&self) -> usize {
self.readable.remaining()
}
pub fn remaining_write(&self) -> usize {
self.writeable.remaining()
}
pub fn get_wlen(&self) -> usize {
self.writeable.consumed_size
}
pub fn is_end_of_chain(&self) -> bool {
self.readable.is_empty() && self.writeable.is_empty()
}
pub fn current_read_slice(&self) -> &[u8] {
self.readable.current_slice()
}
pub fn inc_read_offset(&mut self, sz: usize) {
self.readable.inc(sz);
}
pub fn inc_write_offset(&mut self, sz: usize) {
if !self.readable.is_empty() {
self.readable.clear();
}
self.writeable.inc(sz);
}
pub fn current_write_slice(&mut self) -> &mut [u8] {
self.writeable.current_mut_slice()
}
pub fn copy_from_reader<R>(&mut self, r: R, size: usize) -> io::Result<usize>
where R: Read+Sized
{
self.writeable.write_from_reader(r, size)
}
}
impl Read for Chain {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let mut nread = 0usize;
while nread < buf.len() {
nread += match self.readable.read(&mut buf[nread..]) {
0 => return Ok(nread),
n => n,
};
}
Ok(nread)
}
}
impl Write for Chain {
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
let mut nwrote = 0;
while nwrote < buf.len() {
match self.writeable.write(&buf[nwrote..]) {
0 => return Ok(nwrote),
n => nwrote += n,
};
}
Ok(nwrote)
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
} }
} }
@ -295,30 +316,8 @@ impl Drop for Chain {
} }
} }
impl Read for Chain { impl fmt::Debug for Chain {
// nb: does not fail, but can read short fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> { write!(f, "Chain {{ R {:?} W {:?} }}", self.readable, self.writeable)
let mut nread = 0usize;
while self.is_current_readable() && nread < buf.len() {
nread += self.read_current(&mut buf[nread..]);
}
Ok(nread)
}
}
impl Write for Chain {
// nb: does not fail, but can write short
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.skip_readable();
let mut nwrote = 0usize;
while !self.is_end_of_chain() && nwrote < buf.len() {
nwrote += self.write_current(&buf[nwrote..]);
}
self.wlen += nwrote;
Ok(nwrote)
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
} }
} }

View File

@ -1,13 +1,12 @@
use crate::memory::GuestRam; use crate::memory::GuestRam;
use std::sync::Arc; use std::sync::Arc;
use crate::vm::Result;
use super::VirtQueue; use super::VirtQueue;
use super::eventfd::IoEventFd;
use super::vring::Vring; use super::vring::Vring;
use super::virtqueue::InterruptLine; use super::virtqueue::InterruptLine;
use super::bus::VirtioDeviceConfig; use super::bus::VirtioDeviceConfig;
use crate::virtio::{Result, Error};
use crate::kvm::IoEventFd;
/// ///
/// Manages a set of virtqueues during device intitialization. /// Manages a set of virtqueues during device intitialization.
@ -116,7 +115,8 @@ fn create_ioeventfds(conf: &VirtioDeviceConfig) -> Result<Vec<Arc<IoEventFd>>> {
let notify_base = conf.notify_mmio().base(); let notify_base = conf.notify_mmio().base();
for i in 0..conf.num_queues() { for i in 0..conf.num_queues() {
let evt = IoEventFd::new(conf.kvm(), notify_base + (4 * i as u64))?; let evt = IoEventFd::new(conf.kvm(), notify_base + (4 * i as u64))
.map_err(Error::CreateIoEventFd)?;
v.push(Arc::new(evt)); v.push(Arc::new(evt));
} }
Ok(v) Ok(v)

View File

@ -7,7 +7,7 @@ use super::VirtQueue;
use super::config::VirtQueueConfig; use super::config::VirtQueueConfig;
use super::consts::*; use super::consts::*;
use crate::vm::io::MmioOps; use crate::vm::io::MmioOps;
use crate::vm::Result; use crate::virtio::Result;
pub trait VirtioDeviceOps: Send+Sync { pub trait VirtioDeviceOps: Send+Sync {
fn reset(&mut self) {} fn reset(&mut self) {}

View File

@ -1,93 +0,0 @@
use std::sync::Arc;
use std::os::unix::io::{RawFd,AsRawFd};
use libc;
use crate::vm::{Result,Error,ErrorKind};
use crate::kvm::Kvm;
pub struct EventFd(RawFd);
const U64_SZ: usize = 8;
impl EventFd {
pub fn new() -> Result<EventFd> {
let fd = unsafe { libc::eventfd(0, 0) };
if fd < 0 {
return Err(Error::from_last_errno());
}
Ok(EventFd(fd))
}
pub fn write(&self, v: u64) -> Result<()> {
let ret = unsafe { libc::write(self.0, &v as *const _ as *const libc::c_void, U64_SZ) };
if ret as usize != U64_SZ {
if ret < 0 {
return Err(Error::new(ErrorKind::EventFdError, Error::from_last_errno()));
}
return Err(Error::new(ErrorKind::EventFdError, "write failed"));
}
Ok(())
}
pub fn read(&self) -> Result<u64> {
let mut v = 0u64;
let ret = unsafe { libc::read(self.0, &mut v as *mut _ as *mut libc::c_void, U64_SZ) };
if ret as usize != U64_SZ {
if ret < 0 {
return Err(Error::new(ErrorKind::EventFdError, Error::from_last_errno()));
}
return Err(Error::new(ErrorKind::EventFdError, "read failed"));
}
Ok(v)
}
}
impl Drop for EventFd {
fn drop(&mut self) {
let _ = unsafe { libc::close(self.0) };
}
}
impl AsRawFd for EventFd {
fn as_raw_fd(&self) -> RawFd {
self.0
}
}
pub struct IoEventFd {
kvm: Kvm,
addr: u64,
evt: Arc<EventFd>
}
impl IoEventFd {
pub fn new(kvm: &Kvm, address: u64) -> Result<IoEventFd> {
let evt = Arc::new(EventFd::new()?);
kvm.ioeventfd_add(address, evt.as_raw_fd())?;
Ok(IoEventFd {
kvm: kvm.clone(),
addr: address,
evt,
})
}
pub fn read(&self) -> Result<u64> {
self.evt.read()
}
pub fn write(&self, v: u64) -> Result<()> {
self.evt.write(v)
}
}
impl Drop for IoEventFd {
fn drop(&mut self) {
let _ = self.kvm.ioeventfd_del(self.addr, self.evt.as_raw_fd());
}
}
impl AsRawFd for IoEventFd {
fn as_raw_fd(&self) -> RawFd {
self.evt.as_raw_fd()
}
}

View File

@ -3,7 +3,6 @@ mod chain;
mod config; mod config;
mod consts; mod consts;
mod device; mod device;
mod eventfd;
mod pci; mod pci;
mod virtqueue; mod virtqueue;
mod vring; mod vring;
@ -14,10 +13,42 @@ pub use self::pci::PciIrq;
pub use self::bus::VirtioBus; pub use self::bus::VirtioBus;
pub use self::device::{VirtioDevice,VirtioDeviceOps}; pub use self::device::{VirtioDevice,VirtioDeviceOps};
pub use self::chain::Chain; pub use self::chain::Chain;
pub use self::eventfd::EventFd;
pub use self::device_config::DeviceConfigArea; pub use self::device_config::DeviceConfigArea;
use byteorder::{ByteOrder,LittleEndian}; use byteorder::{ByteOrder,LittleEndian};
use std::{result, fmt};
use crate::{system, kvm};
pub type Result<T> = result::Result<T, Error>;
#[derive(Debug)]
pub enum Error {
CreateEventFd(system::Error),
CreateIoEventFd(kvm::Error),
ReadIoEventFd(system::Error),
IrqFd(kvm::Error),
VringNotEnabled,
VringRangeInvalid(u64),
VringAvailInvalid(u64),
VringUsedInvalid(u64),
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use Error::*;
match self {
CreateIoEventFd(e) => write!(f, "failed to create IoEventFd for VirtQueue: {}", e),
CreateEventFd(e) => write!(f, "failed to create EventFd for VirtQueue: {}", e),
ReadIoEventFd(e) => write!(f, "failed to read from IoEventFd: {}", e),
IrqFd(e) => write!(f, "VirtQueue: {}", e),
VringNotEnabled => write!(f, "vring is not enabled"),
VringRangeInvalid(addr) => write!(f, "vring descriptor table range is invalid 0x{:x}", addr),
VringAvailInvalid(addr) => write!(f, "vring avail ring range range is invalid 0x{:x}", addr),
VringUsedInvalid(addr) => write!(f, "vring used ring range is invalid 0x{:x}", addr),
}
}
}
pub fn read_config_buffer(config: &[u8], offset: usize, size: usize) -> u64 { pub fn read_config_buffer(config: &[u8], offset: usize, size: usize) -> u64 {
if offset + size > config.len() { if offset + size > config.len() {

View File

@ -2,7 +2,7 @@ use std::sync::{Arc,RwLock};
use byteorder::{ByteOrder,LittleEndian}; use byteorder::{ByteOrder,LittleEndian};
use crate::vm::io::{IoDispatcher,IoPortOps}; use crate::vm::io::{IoDispatcher,IoPortOps};
use crate::memory::PCI_MMIO_RESERVED_BASE; use crate::vm::arch::PCI_MMIO_RESERVED_BASE;
use crate::memory::AddressRange; use crate::memory::AddressRange;
use super::consts::*; use super::consts::*;

View File

@ -4,13 +4,13 @@ use std::os::unix::io::AsRawFd;
use crate::memory::GuestRam; use crate::memory::GuestRam;
use crate::kvm::Kvm; use crate::kvm::Kvm;
use crate::vm::Result; use crate::virtio::{Result,Error};
use crate::system::EventFd;
use super::eventfd::{EventFd,IoEventFd}; use crate::kvm::IoEventFd;
use super::consts::*; use super::consts::*;
use super::vring::{Vring,Descriptor}; use super::vring::{Vring,Descriptor};
use super::bus::VirtioDeviceConfig; use super::bus::VirtioDeviceConfig;
use super::chain::Chain; use crate::virtio::chain::Chain;
#[derive(Clone)] #[derive(Clone)]
pub struct VirtQueue { pub struct VirtQueue {
@ -51,7 +51,8 @@ impl VirtQueue {
pub fn wait_ready(&self) -> Result<()> { pub fn wait_ready(&self) -> Result<()> {
if self.vring.is_empty() { if self.vring.is_empty() {
let _ = self.ioeventfd.read()?; let _ = self.ioeventfd.read()
.map_err(Error::ReadIoEventFd)?;
} }
Ok(()) Ok(())
} }
@ -129,11 +130,11 @@ pub struct QueueIter {
} }
impl Iterator for QueueIter { impl Iterator for QueueIter {
type Item = Chain; type Item = Chain;
fn next(&mut self) -> Option<Self::Item> { fn next(&mut self) -> Option<Self::Item> {
self.vq.pop_avail_entry().map(|idx| { self.vq.pop_avail_entry().map(|idx| {
Chain::new(self.vq.memory.clone(),self.vq.clone(),idx, self.vq.vring.size()) Chain::new(self.vq.memory.clone(), self.vq.clone(), idx, self.vq.vring.size())
}) })
} }
} }
@ -150,8 +151,9 @@ impl InterruptLine {
} }
fn new(kvm: &Kvm, irq: u8) -> Result<Arc<InterruptLine>> { fn new(kvm: &Kvm, irq: u8) -> Result<Arc<InterruptLine>> {
let irqfd = EventFd::new()?; let irqfd = EventFd::new().map_err(Error::CreateEventFd)?;
kvm.irqfd(irqfd.as_raw_fd() as u32, irq as u32)?; kvm.irqfd(irqfd.as_raw_fd() as u32, irq as u32)
.map_err(Error::IrqFd)?;
Ok(Arc::new(InterruptLine{ Ok(Arc::new(InterruptLine{
irqfd, irqfd,
isr: AtomicUsize::new(0) isr: AtomicUsize::new(0)

View File

@ -7,7 +7,7 @@ use std::io::{self, Read};
use crate::memory::GuestRam; use crate::memory::GuestRam;
use super::consts::*; use super::consts::*;
use crate::vm::{Result,Error,ErrorKind}; use crate::virtio::{Result,Error};
/// ///
/// A convenience wrapper around `AtomicUsize` /// A convenience wrapper around `AtomicUsize`
@ -275,25 +275,21 @@ impl Vring {
} }
pub fn validate(&self) -> Result<()> { pub fn validate(&self) -> Result<()> {
fn vring_err<T: ToString>(msg: T) -> Result<()> {
Err(Error::new(ErrorKind::InvalidVring, msg.to_string()))
}
if !self.enabled { if !self.enabled {
return vring_err("vring is not enabled"); return Err(Error::VringNotEnabled);
} }
let qsz = self.queue_size as usize; let qsz = self.queue_size as usize;
let desc_table_sz = 16 * qsz; let desc_table_sz = 16 * qsz;
let avail_ring_sz = 6 + 2 * qsz; let avail_ring_sz = 6 + 2 * qsz;
let used_ring_sz = 6 + 8 * qsz; let used_ring_sz = 6 + 8 * qsz;
if !self.memory.is_valid_range(self.descriptors, desc_table_sz) { if !self.memory.is_valid_range(self.descriptors, desc_table_sz) {
return vring_err(format!("descriptor table range is invalid 0x{:x}", self.descriptors)); return Err(Error::VringRangeInvalid(self.descriptors));
} }
if !self.memory.is_valid_range(self.avail_ring, avail_ring_sz) { if !self.memory.is_valid_range(self.avail_ring, avail_ring_sz) {
return vring_err(format!("avail ring range is invalid 0x{:x}", self.avail_ring)); return Err(Error::VringAvailInvalid(self.avail_ring));
} }
if !self.memory.is_valid_range(self.used_ring, used_ring_sz) { if !self.memory.is_valid_range(self.used_ring, used_ring_sz) {
return vring_err(format!("used ring range is invalid 0x{:x}", self.used_ring)); return Err(Error::VringUsedInvalid(self.used_ring));
} }
Ok(()) Ok(())
} }

31
src/vm/arch/error.rs Normal file
View File

@ -0,0 +1,31 @@
use crate::{kvm, system, memory};
use crate::system::ErrnoError;
use std::{fmt, result};
#[derive(Debug)]
pub enum Error {
MemoryManagerCreate(memory::Error),
MemoryRegister(kvm::Error),
MemoryRegionCreate(system::Error),
LoadKernel(system::Error),
KvmError(kvm::Error),
SystemError(system::Error),
IoctlError(&'static str, ErrnoError),
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use Error::*;
match self {
MemoryManagerCreate(err) => write!(f, "failed to create memory manager: {}", err),
MemoryRegister(err) => write!(f, "failed to register memory region: {}", err),
MemoryRegionCreate(err) => write!(f, "failed to create memory region: {}", err),
LoadKernel(err) => write!(f, "error loading kernel: {}", err),
KvmError(e) => e.fmt(f),
SystemError(e) => e.fmt(f),
IoctlError(name, err) => write!(f, "failed to call {} ioctl: {}", name, err),
}
}
}
pub type Result<T> = result::Result<T, Error>;

27
src/vm/arch/mod.rs Normal file
View File

@ -0,0 +1,27 @@
use crate::kvm::{KvmVcpu, Kvm};
pub use crate::vm::arch::x86::X86ArchSetup;
use crate::memory::MemoryManager;
mod error;
mod x86;
pub use x86::PCI_MMIO_RESERVED_BASE;
pub use x86::KvmRegs;
pub use error::{Error,Result};
use crate::vm::kernel_cmdline::KernelCmdLine;
use crate::vm::VmConfig;
use crate::virtio::PciIrq;
pub fn create_setup(config: &VmConfig) -> X86ArchSetup {
X86ArchSetup::create(config)
}
pub trait ArchSetup {
fn open_kvm(&self) -> Result<Kvm>;
fn create_memory(&mut self, kvm: &Kvm) -> Result<MemoryManager>;
fn setup_memory(&mut self, cmdline: &KernelCmdLine, pci_irqs: &[PciIrq]) -> Result<()>;
fn setup_vcpu(&self, vcpu: &KvmVcpu) -> Result<()>;
}

121
src/vm/arch/x86/cpuid.rs Normal file
View File

@ -0,0 +1,121 @@
use std::os::unix::io::RawFd;
use crate::vm::arch::Result;
use crate::kvm::KvmVcpu;
use crate::vm::arch::x86::ioctl::{KVM_GET_SUPPORTED_CPUID, KVM_SET_CPUID2, call_ioctl_with_ref, call_ioctl_with_mut_ref};
const EBX_CLFLUSH_CACHELINE: u32 = 8; // Flush a cache line size.
const EBX_CLFLUSH_SIZE_SHIFT: u32 = 8; // Bytes flushed when executing CLFLUSH.
const _EBX_CPU_COUNT_SHIFT: u32 = 16; // Index of this CPU.
const EBX_CPUID_SHIFT: u32 = 24; // Index of this CPU.
const _ECX_EPB_SHIFT: u32 = 3; // "Energy Performance Bias" bit.
const _ECX_HYPERVISOR_SHIFT: u32 = 31; // Flag to be set when the cpu is running on a hypervisor.
const _EDX_HTT_SHIFT: u32 = 28; // Hyper Threading Enabled.
pub fn setup_cpuid(vcpu: &KvmVcpu) -> Result<()> {
let mut cpuid = kvm_get_supported_cpuid(vcpu.sys_raw_fd())?;
let cpu_id = 0u32; // first vcpu
for e in &mut cpuid {
match e.function {
0 => {
e.ebx = 0x67627553;
e.ecx = 0x20487020;
e.edx = 0x68706172;
}
1 => {
if e.index == 0 {
e.ecx |= 1<<31;
}
e.ebx = (cpu_id << EBX_CPUID_SHIFT) as u32 |
(EBX_CLFLUSH_CACHELINE << EBX_CLFLUSH_SIZE_SHIFT);
/*
if cpu_count > 1 {
entry.ebx |= (cpu_count as u32) << EBX_CPU_COUNT_SHIFT;
entry.edx |= 1 << EDX_HTT_SHIFT;
}
*/
}
6 => {
e.ecx &= !(1<<3);
}
10 => {
if e.eax > 0 {
let version = e.eax & 0xFF;
let ncounters = (e.eax >> 8) & 0xFF;
if version != 2 || ncounters == 0 {
e.eax = 0;
}
}
}
_ => {}
}
}
kvm_set_cpuid2(vcpu.raw_fd(), cpuid)
}
pub fn kvm_get_supported_cpuid(sysfd: RawFd) -> Result<Vec<KvmCpuIdEntry>> {
let mut cpuid = KvmCpuId2::new();
call_ioctl_with_mut_ref("KVM_GET_SUPPORTED_CPUID", sysfd, KVM_GET_SUPPORTED_CPUID, &mut cpuid)?;
Ok(cpuid.get_entries())
}
pub fn kvm_set_cpuid2(cpufd: RawFd, entries: Vec<KvmCpuIdEntry>) -> Result<()> {
let cpuid = KvmCpuId2::new_from_entries(entries);
call_ioctl_with_ref("KVM_SET_CPUID2", cpufd, KVM_SET_CPUID2, &cpuid)
}
#[derive(Copy, Clone, Default)]
#[repr(C)]
pub struct KvmCpuIdEntry {
pub function: u32,
pub index: u32,
pub flags: u32,
pub eax: u32,
pub ebx: u32,
pub ecx: u32,
pub edx: u32,
padding: [u32; 3]
}
const KVM_CPUID_MAX_ENTRIES:usize = 256;
#[repr(C)]
pub struct KvmCpuId2 {
nent: u32,
padding: u32,
entries: [KvmCpuIdEntry; KVM_CPUID_MAX_ENTRIES]
}
impl KvmCpuId2 {
pub fn new() -> KvmCpuId2 {
KvmCpuId2 {
nent: KVM_CPUID_MAX_ENTRIES as u32,
padding: 0,
entries: [Default::default(); KVM_CPUID_MAX_ENTRIES],
}
}
pub fn new_from_entries(entries: Vec<KvmCpuIdEntry>) -> KvmCpuId2 {
let mut cpuid = KvmCpuId2::new();
let sz = entries.len();
assert!(sz <= KVM_CPUID_MAX_ENTRIES, "Too many cpuid entries");
for i in 0..sz {
cpuid.entries[i] = entries[i];
}
cpuid.nent = sz as u32;
cpuid
}
pub fn get_entries(&self) -> Vec<KvmCpuIdEntry> {
let mut entries = Vec::new();
let sz = self.nent as usize;
for i in 0..sz {
entries.push(self.entries[i]);
}
entries
}
}

View File

@ -0,0 +1,50 @@
use std::os::unix::io::RawFd;
use crate::system::ioctl::{ioctl_with_mut_ref, ioctl_with_ref};
use crate::vm::arch::{Error,Result};
use crate::vm::arch::x86::ioctl::{KVM_GET_LAPIC, KVM_SET_LAPIC};
#[repr(C)]
pub struct KvmLapicState {
pub regs: [u8; 1024]
}
impl KvmLapicState {
pub fn new() -> KvmLapicState {
KvmLapicState { regs: [0; 1024] }
}
}
pub fn kvm_get_lapic(cpufd: RawFd) -> Result<KvmLapicState> {
let mut lapic_state = KvmLapicState::new();
unsafe {
ioctl_with_mut_ref(cpufd, KVM_GET_LAPIC, &mut lapic_state)
.map_err(|e| Error::IoctlError("KVM_GET_LAPIC", e))?;
}
Ok(lapic_state)
}
pub fn kvm_set_lapic(cpufd: RawFd, lapic_state: &KvmLapicState) -> Result<()> {
unsafe {
ioctl_with_ref(cpufd, KVM_SET_LAPIC, lapic_state)
.map_err(|e| Error::IoctlError("KVM_SET_LAPIC", e))?;
}
Ok(())
}
const APIC_MODE_EXTINT: u8 = 0x7;
const APIC_MODE_NMI: u8 = 0x4;
const APIC_LVT_LINT0_OFFSET: usize = 0x350;
const APIC_LVT_LINT1_OFFSET: usize = 0x360;
pub fn setup_lapic(cpufd: RawFd) -> Result<()> {
let mut lapic = kvm_get_lapic(cpufd)?;
// delivery mode
lapic.regs[APIC_LVT_LINT0_OFFSET + 1] &= 0xF8;
lapic.regs[APIC_LVT_LINT0_OFFSET + 1] |= APIC_MODE_EXTINT;
lapic.regs[APIC_LVT_LINT1_OFFSET + 1] &= 0xF8;
lapic.regs[APIC_LVT_LINT1_OFFSET + 1] |= APIC_MODE_NMI;
kvm_set_lapic(cpufd, &lapic)
}

44
src/vm/arch/x86/ioctl.rs Normal file
View File

@ -0,0 +1,44 @@
use std::os::unix::io::RawFd;
use libc::{self, c_ulong};
use crate::system::ioctl::{ioctl_with_ref, ioctl_with_mut_ref, ioctl_with_val};
use crate::vm::arch::{Error,Result};
const KVMIO: u64 = 0xAE;
pub const KVM_GET_SUPPORTED_CPUID: libc::c_ulong = iorw! (KVMIO, 0x05, 8);
pub const KVM_SET_CPUID2: libc::c_ulong = iow! (KVMIO, 0x90, 8);
pub const KVM_SET_TSS_ADDR: c_ulong = io! (KVMIO, 0x47);
pub const KVM_CREATE_PIT2: c_ulong = iow! (KVMIO, 0x77, 64);
pub const KVM_SET_FPU: c_ulong = iow! (KVMIO, 0x8d, 416);
pub const KVM_SET_MSRS: c_ulong = iow! (KVMIO, 0x89, 8);
pub const KVM_GET_SREGS: c_ulong = ior! (KVMIO, 0x83, 312);
pub const KVM_SET_SREGS: c_ulong = iow! (KVMIO, 0x84, 312);
pub const KVM_GET_LAPIC: c_ulong = ior! (KVMIO, 0x8e, 1024);
pub const KVM_SET_LAPIC: c_ulong = iow! (KVMIO, 0x8f, 1024);
pub fn call_ioctl_with_ref<T>(name: &'static str, fd: RawFd, request: c_ulong, arg: &T) -> Result<()> {
unsafe {
ioctl_with_ref(fd, request, arg)
.map_err(|e| Error::IoctlError(name, e))?;
Ok(())
}
}
pub fn call_ioctl_with_mut_ref<T>(name: &'static str, fd: RawFd, request: c_ulong, arg: &mut T) -> Result<()> {
unsafe {
ioctl_with_mut_ref(fd, request, arg)
.map_err(|e| Error::IoctlError(name, e))?;
Ok(())
}
}
pub fn call_ioctl_with_val(name: &'static str, fd: RawFd, request: c_ulong, val: c_ulong) -> Result<()> {
unsafe {
ioctl_with_val(fd, request, val)
.map_err(|e| Error::IoctlError(name, e))?;
Ok(())
}
}

129
src/vm/arch/x86/kernel.rs Normal file
View File

@ -0,0 +1,129 @@
use std::io;
use crate::memory::GuestRam;
use crate::system;
use crate::util::ByteBuffer;
use crate::vm::arch::PCI_MMIO_RESERVED_BASE;
use crate::vm::arch::x86::memory::HIMEM_BASE;
use crate::vm::KERNEL;
pub const KVM_KERNEL_LOAD_ADDRESS: u64 = 0x1000000;
pub const KERNEL_CMDLINE_ADDRESS: u64 = 0x20000;
pub const KERNEL_ZERO_PAGE: u64 = 0x7000;
// Documentation/x86/boot.txt
const HDR_BOOT_FLAG: usize = 0x1fe; // u16
const HDR_HEADER: usize = 0x202; // u32
const HDR_TYPE_LOADER: usize = 0x210; // u8
const HDR_CMDLINE_PTR: usize = 0x228; // u32
const HDR_CMDLINE_SIZE: usize = 0x238; // u32
const HDR_KERNEL_ALIGNMENT: usize = 0x230; // u32
// Documentation/x86/zero-page.txt
const BOOT_PARAM_E820_ENTRIES: usize = 0x1e8;
const BOOT_PARAM_E820_MAP: usize = 0x2d0;
const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55;
const EBDA_START: u64 = 0x0009fc00;
const KERNEL_HDR_MAGIC: u32 = 0x53726448;
const KERNEL_LOADER_OTHER: u8 = 0xff;
const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x1000000;
const E820_RAM: u32 = 1;
fn setup_e820(memory: &GuestRam, mut zero: ByteBuffer<&mut [u8]>) -> system::Result<()> {
let ram_size = memory.ram_size() as u64;
let mut e820_ranges = Vec::new();
e820_ranges.push((0u64, EBDA_START));
if ram_size < PCI_MMIO_RESERVED_BASE {
e820_ranges.push((KVM_KERNEL_LOAD_ADDRESS, ram_size - KVM_KERNEL_LOAD_ADDRESS));
} else {
e820_ranges.push((KVM_KERNEL_LOAD_ADDRESS, PCI_MMIO_RESERVED_BASE - KVM_KERNEL_LOAD_ADDRESS));
e820_ranges.push((HIMEM_BASE, ram_size - HIMEM_BASE));
}
zero.write_at(BOOT_PARAM_E820_ENTRIES , e820_ranges.len() as u8);
zero.set_offset(BOOT_PARAM_E820_MAP);
for i in 0..e820_ranges.len() {
zero.write(e820_ranges[i].0)
.write(e820_ranges[i].1)
.write(E820_RAM);
}
Ok(())
}
fn setup_zero_page(memory: &GuestRam, cmdline_addr: u64, cmdline_size: usize) -> system::Result<()> {
let mut zero = memory.mut_buffer(KERNEL_ZERO_PAGE, 4096)?;
zero.write_at(HDR_BOOT_FLAG, KERNEL_BOOT_FLAG_MAGIC)
.write_at(HDR_HEADER, KERNEL_HDR_MAGIC)
.write_at(HDR_TYPE_LOADER, KERNEL_LOADER_OTHER)
.write_at(HDR_CMDLINE_PTR, cmdline_addr as u32)
.write_at(HDR_CMDLINE_SIZE, cmdline_size as u32)
.write_at(HDR_KERNEL_ALIGNMENT, KERNEL_MIN_ALIGNMENT_BYTES);
setup_e820(memory, zero)
}
pub fn load_pm_kernel(memory: &GuestRam, cmdline_addr: u64, cmdline_size: usize) -> system::Result<()> {
load_elf_kernel(memory)?;
setup_zero_page(memory, cmdline_addr, cmdline_size)
}
fn load_elf_segment(memory: &GuestRam, hdr: ElfPhdr) {
let addr = hdr.p_paddr + KVM_KERNEL_LOAD_ADDRESS;
let size = hdr.p_filesz as usize;
let off = hdr.p_offset as usize;
let dst = memory.mut_slice(addr, size).unwrap();
let src = &KERNEL[off..off+size];
dst.copy_from_slice(src);
}
pub fn load_elf_kernel(memory: &GuestRam) -> io::Result<()> {
let mut k = ByteBuffer::from_bytes(KERNEL);
let phoff = k.read_at::<u64>(32);
let phnum = k.read_at::<u16>(56);
k.set_offset(phoff as usize);
for _ in 0..phnum {
let hdr = ElfPhdr::load_from(&mut k);
if hdr.is_pt_load() {
load_elf_segment(memory, hdr);
}
}
Ok(())
}
struct ElfPhdr {
pub p_type: u32,
pub p_flags: u32,
pub p_offset: u64,
pub p_vaddr: u64,
pub p_paddr: u64,
pub p_filesz: u64,
pub p_memsz: u64,
pub p_align: u64,
}
impl ElfPhdr {
fn load_from(buf: &mut ByteBuffer<&[u8]>) -> Self {
ElfPhdr {
p_type: buf.read(),
p_flags: buf.read(),
p_offset: buf.read(),
p_vaddr: buf.read(),
p_paddr: buf.read(),
p_filesz: buf.read(),
p_memsz: buf.read(),
p_align: buf.read(),
}
}
fn is_pt_load(&self) -> bool {
self.p_type == 1
}
}

50
src/vm/arch/x86/kvm.rs Normal file
View File

@ -0,0 +1,50 @@
use std::os::unix::io::RawFd;
use crate::kvm::{Kvm, KVM_CAP_IOEVENTFD, KVM_CAP_PIT2, KVM_CAP_IRQ_INJECT_STATUS, KVM_CAP_IRQ_ROUTING, KVM_CAP_EXT_CPUID, KVM_CAP_SET_TSS_ADDR, KVM_CAP_USER_MEMORY, KVM_CAP_HLT, KVM_CAP_IRQCHIP};
use crate::vm::arch::{Result,Error};
use libc::c_ulong;
use crate::vm::arch::x86::ioctl::{
call_ioctl_with_ref, call_ioctl_with_val, KVM_CREATE_PIT2, KVM_SET_TSS_ADDR
};
static REQUIRED_EXTENSIONS: &[u32] = &[
KVM_CAP_IRQCHIP,
KVM_CAP_HLT,
KVM_CAP_USER_MEMORY,
KVM_CAP_SET_TSS_ADDR,
KVM_CAP_EXT_CPUID,
KVM_CAP_IRQ_ROUTING,
KVM_CAP_IRQ_INJECT_STATUS,
KVM_CAP_PIT2,
KVM_CAP_IOEVENTFD,
];
pub fn x86_open_kvm() -> Result<Kvm> {
let kvm = Kvm::open(REQUIRED_EXTENSIONS)
.map_err(Error::KvmError)?;
kvm.create_irqchip().map_err(Error::KvmError)?;
kvm_set_tss_addr(kvm.vmfd(), 0xFFFbd000)?;
kvm_create_pit2(kvm.vmfd())?;
Ok(kvm)
}
#[repr(C)]
struct KvmPitConfig {
flags: u32,
padding: [u32; 15],
}
impl KvmPitConfig {
pub fn new(flags: u32) -> KvmPitConfig {
KvmPitConfig { flags, padding: [0; 15] }
}
}
fn kvm_create_pit2(vmfd: RawFd) -> Result<()> {
let pit_config = KvmPitConfig::new(0);
call_ioctl_with_ref("KVM_CREATE_PIT2", vmfd, KVM_CREATE_PIT2, &pit_config)
}
fn kvm_set_tss_addr(vmfd: RawFd, addr: u32) -> Result<()> {
call_ioctl_with_val("KVM_SET_TSS_ADDR", vmfd, KVM_SET_TSS_ADDR, addr as c_ulong)
}

99
src/vm/arch/x86/memory.rs Normal file
View File

@ -0,0 +1,99 @@
use crate::kvm::Kvm;
use crate::memory::{MemoryManager, MemoryRegion, GuestRam};
use crate::vm::arch::{Error, Result};
use std::cmp;
use crate::vm::kernel_cmdline::KernelCmdLine;
use crate::vm::arch::x86::kernel::{load_pm_kernel, KERNEL_CMDLINE_ADDRESS};
use crate::system;
use crate::vm::arch::x86::mptable::setup_mptable;
use crate::virtio::PciIrq;
pub const HIMEM_BASE: u64 = (1 << 32);
pub const PCI_MMIO_RESERVED_SIZE: usize = (512 << 20);
pub const PCI_MMIO_RESERVED_BASE: u64 = HIMEM_BASE - PCI_MMIO_RESERVED_SIZE as u64;
pub fn x86_setup_memory_regions(memory: &mut MemoryManager, ram_size: usize) -> Result<()> {
let mut regions = Vec::new();
let lowmem_sz = cmp::min(ram_size, PCI_MMIO_RESERVED_BASE as usize);
regions.push(create_region(memory.kvm(), 0, lowmem_sz, 0)?);
if lowmem_sz < ram_size {
let himem_sz = ram_size - lowmem_sz;
regions.push(create_region(memory.kvm(), HIMEM_BASE, himem_sz, 1)?);
}
memory.set_ram_regions(regions);
Ok(())
}
fn create_region(kvm: &Kvm, base: u64, size: usize, slot: u32) -> Result<MemoryRegion> {
let mr = MemoryRegion::new(base, size)
.map_err(Error::MemoryRegionCreate)?;
kvm.add_memory_region(slot, base, mr.base_address(), size)
.map_err(Error::MemoryRegister)?;
Ok(mr)
}
const BOOT_GDT_OFFSET: usize = 0x500;
const BOOT_IDT_OFFSET: usize = 0x520;
const BOOT_PML4: u64 = 0x9000;
const BOOT_PDPTE: u64 = 0xA000;
const BOOT_PDE: u64 = 0xB000;
pub fn x86_setup_memory(memory: &mut MemoryManager, cmdline: &KernelCmdLine, ncpus: usize, pci_irqs: &[PciIrq]) -> Result<()> {
load_pm_kernel(memory.guest_ram(), KERNEL_CMDLINE_ADDRESS, cmdline.size())
.map_err(Error::LoadKernel)?;
setup_gdt(memory.guest_ram())?;
setup_boot_pagetables(memory.guest_ram()).map_err(Error::SystemError)?;
setup_mptable(memory.guest_ram(), ncpus, pci_irqs).map_err(Error::SystemError)?;
write_cmdline(memory.guest_ram(), cmdline).map_err(Error::SystemError)?;
Ok(())
}
fn setup_boot_pagetables(memory: &GuestRam) -> system::Result<()> {
memory.write_int::<u64>(BOOT_PML4, BOOT_PDPTE | 0x3)?;
memory.write_int::<u64>(BOOT_PDPTE, BOOT_PDE | 0x3)?;
for i in 0..512_u64 {
let entry = (i << 21) | 0x83;
memory.write_int::<u64>(BOOT_PDE + (i * 8), entry)?;
}
Ok(())
}
fn write_gdt_table(table: &[u64], memory: &GuestRam) -> system::Result<()> {
for i in 0..table.len() {
memory.write_int((BOOT_GDT_OFFSET + i * 8) as u64, table[i])?;
}
Ok(())
}
pub fn gdt_entry(flags: u16, base: u32, limit: u32) -> u64 {
((((base as u64) & 0xff000000u64) << (56 - 24)) | (((flags as u64) & 0x0000f0ffu64) << 40) |
(((limit as u64) & 0x000f0000u64) << (48 - 16)) |
(((base as u64) & 0x00ffffffu64) << 16) | ((limit as u64) & 0x0000ffffu64))
}
pub fn setup_gdt(memory: &GuestRam) -> Result<()> {
let table = [
gdt_entry(0,0,0),
gdt_entry(0xa09b,0,0xfffff),
gdt_entry(0xc093,0,0xfffff),
gdt_entry(0x808b,0,0xfffff),
];
write_gdt_table(&table, memory)
.map_err(Error::SystemError)?;
memory.write_int::<u64>(BOOT_IDT_OFFSET as u64, 0u64)
.map_err(Error::SystemError)?;
Ok(())
}
fn write_cmdline(memory: &GuestRam, cmdline: &KernelCmdLine) -> system::Result<()> {
let bytes = cmdline.as_bytes();
let len = bytes.len() as u64;
memory.write_bytes(KERNEL_CMDLINE_ADDRESS, bytes)?;
memory.write_int(KERNEL_CMDLINE_ADDRESS + len, 0u8)?;
Ok(())
}

13
src/vm/arch/x86/mod.rs Normal file
View File

@ -0,0 +1,13 @@
mod cpuid;
mod interrupts;
mod kvm;
mod memory;
mod mptable;
mod registers;
mod kernel;
mod ioctl;
mod setup;
pub use setup::X86ArchSetup;
pub use memory::PCI_MMIO_RESERVED_BASE;
pub use registers::KvmRegs;

View File

@ -4,7 +4,7 @@ use std::iter;
use crate::memory::GuestRam; use crate::memory::GuestRam;
use crate::virtio::PciIrq; use crate::virtio::PciIrq;
use crate::vm::Result; use crate::system::Result;
const APIC_DEFAULT_PHYS_BASE: u32 = 0xfee00000; const APIC_DEFAULT_PHYS_BASE: u32 = 0xfee00000;
const IO_APIC_DEFAULT_PHYS_BASE: u32 = 0xfec00000; const IO_APIC_DEFAULT_PHYS_BASE: u32 = 0xfec00000;
@ -99,7 +99,7 @@ impl Buffer {
.w8(dstirq) // dest irq .w8(dstirq) // dest irq
} }
fn write_all_mpc_intsrc(&mut self, ioapicid: u8, pci_irqs: &Vec<PciIrq>) -> &mut Self { fn write_all_mpc_intsrc(&mut self, ioapicid: u8, pci_irqs: &[PciIrq]) -> &mut Self {
for irq in pci_irqs { for irq in pci_irqs {
self.write_mpc_intsrc(ioapicid, irq.src_bus_irq(), irq.irq_line()); self.write_mpc_intsrc(ioapicid, irq.src_bus_irq(), irq.irq_line());
} }
@ -192,9 +192,8 @@ fn align(sz: usize, n: usize) -> usize {
(sz + (n - 1)) & !(n - 1) (sz + (n - 1)) & !(n - 1)
} }
pub fn setup_mptable(memory: &GuestRam, ncpus: usize, pci_irqs: Vec<PciIrq>) -> Result<()> { pub fn setup_mptable(memory: &GuestRam, ncpus: usize, pci_irqs: &[PciIrq]) -> Result<()> {
let ioapicid = (ncpus + 1) as u8; let ioapicid = (ncpus + 1) as u8;
//let address= align(BIOS_BEGIN as usize + BIOS_BIN.len(), 16) as u32;
let mut body = Buffer::new(); let mut body = Buffer::new();
let address = 0; let address = 0;
@ -209,6 +208,5 @@ pub fn setup_mptable(memory: &GuestRam, ncpus: usize, pci_irqs: Vec<PciIrq>) ->
let mut table = Buffer::new(); let mut table = Buffer::new();
table.write_mpctable(ncpus as u16, &body); table.write_mpctable(ncpus as u16, &body);
//memory.write_bytes(address as u64, &table.vec)
memory.write_bytes(address as u64, &table.vec) memory.write_bytes(address as u64, &table.vec)
} }

View File

@ -0,0 +1,314 @@
use std::fmt;
use std::os::unix::io::RawFd;
use crate::kvm::KvmVcpu;
use crate::vm::arch::{Result, Error};
use crate::vm::arch::x86::kernel::KERNEL_ZERO_PAGE;
use crate::vm::arch::x86::ioctl::{
call_ioctl_with_ref, KVM_SET_FPU, KVM_SET_MSRS, call_ioctl_with_mut_ref, KVM_GET_SREGS, KVM_SET_SREGS
};
const MSR_IA32_SYSENTER_CS: u32 = 0x00000174;
const MSR_IA32_SYSENTER_ESP: u32 = 0x00000175;
const MSR_IA32_SYSENTER_EIP: u32 = 0x00000176;
const MSR_STAR: u32 = 0xc0000081;
const MSR_LSTAR: u32 = 0xc0000082;
const MSR_CSTAR: u32 = 0xc0000083;
const MSR_SYSCALL_MASK: u32 = 0xc0000084;
const MSR_KERNEL_GS_BASE: u32 = 0xc0000102;
const MSR_IA32_TSC: u32 = 0x00000010;
const MSR_IA32_MISC_ENABLE: u32 = 0x000001a0;
const MSR_IA32_MISC_ENABLE_FAST_STRING: u64 = 0x01;
pub fn setup_fpu(vcpu: &KvmVcpu) -> Result<()> {
let mut fpu = KvmFpu::new();
fpu.fcw = 0x37f;
fpu.mxcsr = 0x1f80;
kvm_set_fpu(vcpu.raw_fd(), &fpu)?;
Ok(())
}
pub fn setup_msrs(vcpu: &KvmVcpu) -> Result<()> {
let mut msrs = KvmMsrs::new();
msrs.add(MSR_IA32_SYSENTER_CS, 0);
msrs.add(MSR_IA32_SYSENTER_ESP, 0);
msrs.add(MSR_IA32_SYSENTER_EIP, 0);
msrs.add(MSR_STAR, 0);
msrs.add(MSR_CSTAR, 0);
msrs.add(MSR_KERNEL_GS_BASE, 0);
msrs.add(MSR_SYSCALL_MASK, 0);
msrs.add(MSR_LSTAR, 0);
msrs.add(MSR_IA32_TSC, 0);
msrs.add(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_FAST_STRING);
kvm_set_msrs(vcpu.raw_fd(), &msrs)?;
Ok(())
}
const BOOT_GDT_OFFSET: usize = 0x500;
const BOOT_IDT_OFFSET: usize = 0x520;
const BOOT_STACK: u64 = 0x8000;
const BOOT_PML4: u64 = 0x9000;
const X86_CR0_PE: u64 = 0x1;
const X86_CR0_PG: u64 = 0x80000000;
const X86_CR4_PAE: u64 = 0x20;
const EFER_LME: u64 = 0x100;
const EFER_LMA: u64 = (1 << 10);
pub fn setup_pm_sregs(vcpu: &KvmVcpu) -> Result<()> {
let code = KvmSegment::new(0, 0xfffff, 1 * 8, 0xa09b);
let data = KvmSegment::new(0, 0xfffff, 2 * 8, 0xc093);
let tss = KvmSegment::new(0, 0xfffff, 3 * 8, 0x808b);
let mut regs = kvm_get_sregs(vcpu.raw_fd())?;
regs.gdt.base = BOOT_GDT_OFFSET as u64;
regs.gdt.limit = 32 - 1;
regs.itd.base = BOOT_IDT_OFFSET as u64;
regs.itd.limit = 8 - 1;
regs.cs = code;
regs.ds = data;
regs.es = data;
regs.fs = data;
regs.gs = data;
regs.ss = data;
regs.tr = tss;
// protected mode
regs.cr0 |= X86_CR0_PE;
regs.efer |= EFER_LME;
regs.cr3 = BOOT_PML4;
regs.cr4 |= X86_CR4_PAE;
regs.cr0 |= X86_CR0_PG;
regs.efer |= EFER_LMA;
kvm_set_sregs(vcpu.raw_fd(), &regs)?;
Ok(())
}
pub fn setup_pm_regs(vcpu: &KvmVcpu, kernel_entry: u64) -> Result<()> {
let mut regs = KvmRegs::new();
regs.rflags = 0x0000000000000002;
regs.rip = kernel_entry;
regs.rsp = BOOT_STACK;
regs.rbp = BOOT_STACK;
regs.rsi = KERNEL_ZERO_PAGE;
vcpu.set_regs(&regs)
.map_err(Error::KvmError)?;
Ok(())
}
#[derive(Copy)]
#[repr(C)]
pub struct KvmFpu {
fpr: [u8; 128],
pub fcw: u16,
fsw: u16,
ftwx: u8,
pad1: u8,
last_opcode: u16,
last_ip: u64,
last_dp: u64,
xmm: [u8; 256],
pub mxcsr: u32,
pad2: u32,
}
impl Clone for KvmFpu {
fn clone(&self) -> KvmFpu { *self }
}
impl KvmFpu {
pub fn new() -> KvmFpu {
KvmFpu {
fpr: [0; 128],
fcw: 0,
fsw: 0,
ftwx: 0, pad1: 0,
last_opcode: 0,
last_ip: 0,
last_dp: 0,
xmm: [0; 256],
mxcsr: 0,
pad2: 0
}
}
}
pub fn kvm_set_fpu(cpufd: RawFd, fpu: &KvmFpu) -> Result<()> {
call_ioctl_with_ref("KVM_SET_FPU", cpufd, KVM_SET_FPU, fpu)
}
#[derive(Copy, Clone, Default)]
#[repr(C)]
struct KvmMsrEntry {
index: u32,
reserved: u32,
data: u64
}
#[repr(C)]
pub struct KvmMsrs {
nent: u32,
padding: u32,
entries: [KvmMsrEntry; 100]
}
impl KvmMsrs {
pub fn new() -> KvmMsrs {
KvmMsrs{ nent: 0, padding: 0, entries: [Default::default(); 100]}
}
pub fn add(&mut self, index: u32, data: u64) {
self.entries[self.nent as usize].index = index;
self.entries[self.nent as usize].data = data;
self.nent += 1;
}
}
pub fn kvm_set_msrs(cpufd: RawFd, msrs: &KvmMsrs) -> Result<()> {
call_ioctl_with_ref("KVM_SET_MSRS", cpufd, KVM_SET_MSRS, msrs)
}
#[derive(Copy, Clone, Default)]
#[repr(C)]
pub struct KvmSegment {
base: u64,
limit: u32,
selector: u16,
stype: u8,
present: u8,
dpl: u8,
db: u8,
s: u8,
l: u8,
g: u8,
avl: u8,
unusable: u8,
padding: u8,
}
impl KvmSegment {
pub fn new(base: u64, limit: u32, selector: u16, flags: u16) -> KvmSegment {
let mut seg = KvmSegment{ ..Default::default() };
seg.setup(base, limit, selector, flags);
seg
}
pub fn setup(&mut self, base: u64, limit: u32, selector: u16, flags: u16) {
self.base = base;
self.limit = limit;
self.selector = selector;
self.stype = (flags & 0xF) as u8;
self.present = ((flags >> 7) & 0x1) as u8;
self.dpl = ((flags >> 5) & 0x3) as u8;
self.db = ((flags >> 14) & 0x1) as u8;
self.s = ((flags >> 4) & 0x1) as u8;
self.l = ((flags >> 13) & 0x1) as u8;
self.g = ((flags >> 15) & 0x1) as u8;
self.avl = ((flags >> 12) & 0x1) as u8;
self.unusable = if self.present == 1 { 0 } else { 1 }
}
}
impl fmt::Debug for KvmSegment {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "(base: {:x} limit {:x} selector: {:x} type: {:x} p: {} dpl: {} db: {} s: {} l: {} g: {} avl: {} unuse: {})",
self.base, self.limit, self.selector, self.stype, self.present, self.dpl, self.db, self.s, self.l, self.g, self.avl, self.unusable)
}
}
#[derive(Copy, Clone, Default)]
#[repr(C)]
pub struct KvmDtable {
pub base: u64,
pub limit: u16,
padding: [u16; 3],
}
impl fmt::Debug for KvmDtable {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "(base: {:x} limit {:x})", self.base, self.limit)
}
}
#[derive(Copy, Clone, Default)]
#[repr(C)]
pub struct KvmSRegs {
pub cs: KvmSegment,
pub ds: KvmSegment,
pub es: KvmSegment,
pub fs: KvmSegment,
pub gs: KvmSegment,
pub ss: KvmSegment,
pub tr: KvmSegment,
pub ldt: KvmSegment,
pub gdt: KvmDtable,
pub itd: KvmDtable,
pub cr0: u64,
pub cr2: u64,
pub cr3: u64,
pub cr4: u64,
pub cr8: u64,
pub efer: u64,
pub apic_base: u64,
pub interrupt_bitmap: [u64; 4],
}
impl fmt::Debug for KvmSRegs {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "cs: {:?}\nds: {:?}\nes: {:?}\nfs: {:?}\n", self.cs, self.ds, self.es, self.fs)?;
write!(f, "gs: {:?}\nss: {:?}\ntr: {:?}\nldt: {:?}\n", self.gs, self.ss, self.tr, self.ldt)?;
write!(f, "gdt: {:?} itd: {:?}\n", self.gdt, self.itd)?;
write!(f, "cr0: {:x} cr2: {:x} cr3: {:x} cr4: {:x}\n", self.cr0, self.cr2, self.cr3, self.cr4)?;
write!(f, "efer: {:x} apic_base: {:x}\n", self.efer, self.apic_base)
}
}
impl KvmSRegs {
pub fn new() -> KvmSRegs {
KvmSRegs { ..Default::default() }
}
}
pub fn kvm_get_sregs(cpufd: RawFd) -> Result<KvmSRegs> {
let mut sregs = KvmSRegs::new();
call_ioctl_with_mut_ref("KVM_GET_SREGS", cpufd, KVM_GET_SREGS, &mut sregs)?;
Ok(sregs)
}
pub fn kvm_set_sregs(cpufd: RawFd, sregs: &KvmSRegs) -> Result<()> {
call_ioctl_with_ref("KVM_SET_SREGS", cpufd, KVM_SET_SREGS, sregs)
}
#[derive(Copy, Clone, Default)]
#[repr(C)]
pub struct KvmRegs {
pub rax: u64, pub rbx: u64, pub rcx: u64, pub rdx: u64,
pub rsi: u64, pub rdi: u64, pub rsp: u64, pub rbp: u64,
pub r8: u64, pub r9: u64, pub r10: u64, pub r11: u64,
pub r12: u64, pub r13: u64, pub r14: u64, pub r15: u64,
pub rip: u64, pub rflags: u64,
}
impl fmt::Debug for KvmRegs {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "rax 0x{:x} rbx 0x{:x} rcx 0x{:x} rdx 0x{:x}\n", self.rax, self.rbx, self.rcx, self.rdx)?;
write!(f, "rsi 0x{:x} rdi 0x{:x} rsp 0x{:x} rbp 0x{:x}\n", self.rsi, self.rdi, self.rsp, self.rbp)?;
write!(f, "r8 0x{:x} r9 0x{:x} r10 0x{:x} r11 0x{:x}\n", self.r8, self.r9, self.r10, self.r11)?;
write!(f, "r12 0x{:x} r13 0x{:x} r14 0x{:x} r15 0x{:x}\n", self.r12, self.r13, self.r14, self.r15)?;
write!(f, "rip 0x{:x} rflags 0x{:x}\n", self.rip, self.rflags)
}
}
impl KvmRegs {
pub fn new() -> KvmRegs {
KvmRegs { ..Default::default() }
}
}

73
src/vm/arch/x86/setup.rs Normal file
View File

@ -0,0 +1,73 @@
use crate::memory::{MemoryManager, GuestRam, SystemAllocator, AddressRange};
use crate::vm::VmConfig;
use crate::vm::arch::{ArchSetup, Error, Result};
use crate::vm::kernel_cmdline::KernelCmdLine;
use crate::virtio::PciIrq;
use crate::kvm::{Kvm, KvmVcpu};
use crate::vm::arch::x86::kvm::x86_open_kvm;
use crate::vm::arch::x86::memory::{x86_setup_memory_regions, x86_setup_memory};
use crate::vm::arch::x86::cpuid::setup_cpuid;
use crate::vm::arch::x86::registers::{setup_pm_sregs, setup_pm_regs, setup_fpu, setup_msrs};
use crate::vm::arch::x86::interrupts::setup_lapic;
use crate::vm::arch::x86::kernel::KVM_KERNEL_LOAD_ADDRESS;
pub struct X86ArchSetup {
ram_size: usize,
use_drm: bool,
ncpus: usize,
memory: Option<MemoryManager>,
}
impl X86ArchSetup {
pub fn create(config: &VmConfig) -> Self {
let ram_size = config.ram_size();
let use_drm = config.is_wayland_enabled() && config.is_dmabuf_enabled();
X86ArchSetup {
ram_size,
use_drm,
ncpus: config.ncpus(),
memory: None,
}
}
}
fn get_base_dev_pfn(mem_size: u64) -> u64 {
// Put device memory at a 2MB boundary after physical memory or 4gb, whichever is greater.
const MB: u64 = 1024 * 1024;
const GB: u64 = 1024 * MB;
let mem_size_round_2mb = (mem_size + 2 * MB - 1) / (2 * MB) * (2 * MB);
std::cmp::max(mem_size_round_2mb, 4 * GB) / 4096
}
impl ArchSetup for X86ArchSetup {
fn open_kvm(&self) -> Result<Kvm> {
x86_open_kvm()
}
fn create_memory(&mut self, kvm: &Kvm) -> Result<MemoryManager> {
let ram = GuestRam::new(self.ram_size);
let dev_addr_start = get_base_dev_pfn(self.ram_size as u64) * 4096;
let dev_addr_size = u64::max_value() - dev_addr_start;
let allocator = SystemAllocator::new(AddressRange::new(dev_addr_start,dev_addr_size as usize));
let mut mm = MemoryManager::new(kvm.clone(), ram, allocator, self.use_drm)
.map_err(Error::MemoryManagerCreate)?;
x86_setup_memory_regions(&mut mm, self.ram_size)?;
self.memory = Some(mm.clone());
Ok(mm)
}
fn setup_memory(&mut self, cmdline: &KernelCmdLine, pci_irqs: &[PciIrq]) -> Result<()> {
let memory = self.memory.as_mut().expect("No memory created");
x86_setup_memory(memory, cmdline, self.ncpus, pci_irqs)?;
Ok(())
}
fn setup_vcpu(&self, vcpu: &KvmVcpu) -> Result<()> {
setup_cpuid(vcpu)?;
setup_pm_sregs(vcpu)?;
setup_pm_regs(&vcpu, KVM_KERNEL_LOAD_ADDRESS)?;
setup_fpu(vcpu)?;
setup_msrs(vcpu)?;
setup_lapic(vcpu.raw_fd())
}
}

View File

@ -1,10 +1,11 @@
use std::path::{PathBuf, Path}; use std::path::{PathBuf, Path};
use crate::vm::Vm; use crate::vm::{VmSetup, arch};
use std::{env, process}; use std::{env, process};
use crate::devices::SyntheticFS; use crate::devices::SyntheticFS;
use crate::disk::{RawDiskImage, RealmFSImage, OpenType}; use crate::disk::{RawDiskImage, RealmFSImage, OpenType};
use libcitadel::Realms; use libcitadel::Realms;
use libcitadel::terminal::{TerminalPalette, AnsiTerminal, Base16Scheme}; use libcitadel::terminal::{TerminalPalette, AnsiTerminal, Base16Scheme};
use crate::vm::arch::X86ArchSetup;
pub struct VmConfig { pub struct VmConfig {
ram_size: usize, ram_size: usize,
@ -15,6 +16,7 @@ pub struct VmConfig {
dmabuf: bool, dmabuf: bool,
network: bool, network: bool,
home: String, home: String,
colorscheme: String,
bridge_name: String, bridge_name: String,
kernel_path: Option<PathBuf>, kernel_path: Option<PathBuf>,
init_path: Option<PathBuf>, init_path: Option<PathBuf>,
@ -39,6 +41,7 @@ impl VmConfig {
network: true, network: true,
bridge_name: "vz-clear".to_string(), bridge_name: "vz-clear".to_string(),
home: Self::default_homedir(), home: Self::default_homedir(),
colorscheme: "dracula".to_string(),
kernel_path: None, kernel_path: None,
init_path: None, init_path: None,
init_cmd: None, init_cmd: None,
@ -65,18 +68,23 @@ impl VmConfig {
self self
} }
pub fn raw_disk_image<P: Into<PathBuf>>(mut self, path: P, open_type: OpenType) -> Self { pub fn raw_disk_image<P: Into<PathBuf>>(self, path: P, open_type: OpenType) -> Self {
self.raw_disks.push(RawDiskImage::new(path, open_type)); self.raw_disk_image_with_offset(path, open_type, 0)
self
} }
pub fn raw_disk_image_with_offset<P: Into<PathBuf>>(mut self, path: P, open_type: OpenType, offset: usize) -> Self { pub fn raw_disk_image_with_offset<P: Into<PathBuf>>(mut self, path: P, open_type: OpenType, offset: usize) -> Self {
self.raw_disks.push(RawDiskImage::new_with_offset(path, open_type, offset)); match RawDiskImage::new_with_offset(path, open_type, offset) {
Ok(disk) => self.raw_disks.push(disk),
Err(e) => warn!("Could not add disk: {}", e),
};
self self
} }
pub fn realmfs_image<P: Into<PathBuf>>(mut self, path: P) -> Self { pub fn realmfs_image<P: Into<PathBuf>>(mut self, path: P) -> Self {
self.realmfs_images.push(RealmFSImage::new(path, OpenType::MemoryOverlay)); match RealmFSImage::new(path, OpenType::MemoryOverlay) {
Ok(disk) => self.realmfs_images.push(disk),
Err(e) => warn!("Could not add disk: {}", e),
};
self self
} }
@ -109,21 +117,31 @@ impl VmConfig {
let _terminal_restore = TerminalRestore::save(); let _terminal_restore = TerminalRestore::save();
if let Some(scheme) = Base16Scheme::by_name("black-metal-immortal") { if let Some(scheme) = Base16Scheme::by_name(&self.colorscheme) {
let mut term = AnsiTerminal::new().unwrap(); let mut term = AnsiTerminal::new().unwrap();
if let Err(err) = term.apply_base16(scheme) { if let Err(err) = term.apply_base16(scheme) {
warn!("Failed to set terminal color scheme: {}", err); warn!("Failed to set terminal color scheme: {}", err);
} }
} }
let mut setup = self.setup();
match Vm::open(self) { let vm = match setup.create_vm() {
Ok(vm) => if let Err(err) = vm.start() { Ok(vm) => vm,
notify!("Error starting VM: {}", err); Err(err) => {
warn!("Failed to create VM: {}", err);
return;
} }
Err(e) => notify!("Error creating VM: {}", e), };
if let Err(err) = vm.start() {
warn!("Failed to start VM: {}", err);
} }
} }
pub fn setup(self) -> VmSetup<X86ArchSetup> {
let arch_setup = arch::create_setup(&self);
VmSetup::new(self, arch_setup)
}
pub fn ram_size(&self) -> usize { pub fn ram_size(&self) -> usize {
self.ram_size self.ram_size
} }
@ -206,7 +224,13 @@ impl VmConfig {
eprintln!("Realmfs image does not exist at {}", path.display()); eprintln!("Realmfs image does not exist at {}", path.display());
process::exit(1); process::exit(1);
} }
self.realmfs_images.push(RealmFSImage::new(path, OpenType::MemoryOverlay)); match RealmFSImage::new(path, OpenType::MemoryOverlay) {
Ok(disk) => self.realmfs_images.push(disk),
Err(e) => {
warn!("Could not add disk: {}", e);
process::exit(1);
},
};
} }
fn add_realm_by_name(&mut self, realm: &str) { fn add_realm_by_name(&mut self, realm: &str) {
@ -217,7 +241,10 @@ impl VmConfig {
self.add_realmfs_by_name(realmfs); self.add_realmfs_by_name(realmfs);
self.home = realm.base_path().join("home").display().to_string(); self.home = realm.base_path().join("home").display().to_string();
self.realm_name = Some(realm.name().to_string()); self.realm_name = Some(realm.name().to_string());
self.bridge_name = config.network_zone().to_string(); self.bridge_name = format!("vz-{}", config.network_zone());
if let Some(scheme) = config.terminal_scheme() {
self.colorscheme = scheme.to_string();
}
} }
} }
@ -318,7 +345,6 @@ impl TerminalRestore {
let _ = p.apply(&mut term); let _ = p.apply(&mut term);
} }
} }
} }
impl Drop for TerminalRestore { impl Drop for TerminalRestore {

View File

@ -1,199 +1,48 @@
use std::{result, io}; use std::{result, io};
use std::error;
use std::fmt; use std::fmt;
use std::str; use crate::{system, kvm, virtio};
use std::ffi::CStr;
use libc;
use crate::disk;
use crate::system::netlink; use crate::system::netlink;
use crate::vm::arch;
pub type Result<T> = result::Result<T, Error>; pub type Result<T> = result::Result<T, Error>;
#[derive(Debug)] #[derive(Debug)]
pub enum ErrorKind { pub enum Error {
InvalidAddress(u64), CreateVmFailed(kvm::Error),
InvalidMappingOffset(usize), MappingFailed(system::Error),
RegisterMemoryFailed,
ReadKernelFailed,
Interrupted,
InvalidVring,
IoctlFailed(&'static str),
MissingRequiredExtension(u32),
OpenDeviceFailed,
CreateVmFailed,
BadVersion,
EventFdError,
DiskImageOpen(disk::Error),
TerminalTermios(io::Error), TerminalTermios(io::Error),
IoError(io::Error), IoError(io::Error),
MemoryManagerCreate, ArchError(arch::Error),
NetworkSetup(netlink::Error), NetworkSetup(netlink::Error),
SetupBootFs(io::Error),
SetupVirtio(virtio::Error),
} }
impl ErrorKind {
fn as_str(&self) -> &'static str {
match *self {
ErrorKind::InvalidAddress(..) => "Invalid guest memory address",
ErrorKind::InvalidMappingOffset(..) => "Invalid memory mapping offset",
ErrorKind::RegisterMemoryFailed => "Failed to register memory region",
ErrorKind::ReadKernelFailed => "Failed to load kernel from disk",
ErrorKind::Interrupted => "System call interrupted",
ErrorKind::InvalidVring => "Invalid Vring",
ErrorKind::IoctlFailed(..) => "Ioctl failed",
ErrorKind::MissingRequiredExtension(..) => "kernel does not support requred kvm extension",
ErrorKind::OpenDeviceFailed => "could not open /dev/kvm",
ErrorKind::CreateVmFailed => "call to create vm failed",
ErrorKind::BadVersion => "unexpected kvm api version",
ErrorKind::EventFdError => "eventfd error",
ErrorKind::DiskImageOpen(_) => "failed to open disk image",
ErrorKind::TerminalTermios(_) => "failed termios",
ErrorKind::IoError(_) => "i/o error",
ErrorKind::MemoryManagerCreate => "memory manager",
ErrorKind::NetworkSetup(_) => "error setting up network",
}
}
}
impl fmt::Display for ErrorKind { impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self { match self {
ErrorKind::InvalidAddress(addr) => write!(f, "{}: 0x{:x}", self.as_str(), addr), Error::TerminalTermios(e) => write!(f, "error reading/restoring terminal state: {}", e),
ErrorKind::InvalidMappingOffset(offset) => write!(f, "{}: 0x{:x}", self.as_str(), offset), Error::IoError(e) => write!(f, "i/o error: {}", e),
ErrorKind::IoctlFailed(name) => write!(f, "Ioctl {} failed", name), Error::NetworkSetup(e) => write!(f, "error setting up network: {}", e),
ErrorKind::DiskImageOpen(ref e) => write!(f, "failed to open disk image: {}", e), Error::CreateVmFailed(e) => write!(f, "call to create vm failed: {}", e),
ErrorKind::TerminalTermios(ref e) => write!(f, "error reading/restoring terminal state: {}", e), Error::MappingFailed(e) => write!(f, "memory mapping failed: {}", e),
ErrorKind::IoError(ref e) => write!(f, "i/o error: {}", e), Error::SetupBootFs(e) => write!(f, "setting up boot fs failed: {}", e),
ErrorKind::MemoryManagerCreate => write!(f, "error creating memory manager"), Error::SetupVirtio(e) => write!(f, "setting up virtio devices failed: {}", e),
ErrorKind::NetworkSetup(ref e) => write!(f, "error setting up network: {}", e), Error::ArchError(e) => e.fmt(f),
_ => write!(f, "{}", self.as_str()),
} }
} }
} }
impl From<io::Error> for Error { impl From<io::Error> for Error {
fn from(err: io::Error) -> Error { fn from(err: io::Error) -> Error {
ErrorKind::IoError(err).into() Error::IoError(err).into()
} }
} }
impl From<ErrorKind> for Error {
fn from(kind: ErrorKind) -> Error {
Error { repr: Repr::Simple(kind) }
}
}
impl From<netlink::Error> for Error { impl From<netlink::Error> for Error {
fn from(err: netlink::Error) -> Error { fn from(err: netlink::Error) -> Error {
ErrorKind::NetworkSetup(err).into() Error::NetworkSetup(err).into()
} }
} }
enum Repr {
Errno(i32),
Simple(ErrorKind),
General(Box<General>),
}
#[derive(Debug)]
struct General {
kind: ErrorKind,
error: Box<dyn error::Error+Send+Sync>,
}
#[derive(Debug)]
pub struct Error {
repr: Repr,
}
impl Error {
pub fn new<E>(kind: ErrorKind, error: E) -> Error
where E: Into<Box<dyn error::Error+Send+Sync>> {
Self::_new(kind, error.into())
}
fn _new(kind: ErrorKind, error: Box<dyn error::Error+Send+Sync>) -> Error {
Error {
repr: Repr::General(Box::new(General{
kind, error
}))
}
}
pub fn from_last_errno() -> Error {
let errno = unsafe { *libc::__errno_location() };
Error::from_errno(errno)
}
pub fn from_errno(errno: i32) -> Error {
if errno == libc::EINTR {
Error { repr: Repr::Simple(ErrorKind::Interrupted) }
} else {
Error { repr: Repr::Errno(errno) }
}
}
pub fn is_interrupted(&self) -> bool {
match self.repr {
Repr::Simple(ErrorKind::Interrupted) => true,
_ => false,
}
}
}
fn error_string(errno: i32) -> String {
let mut buf = [0 as libc::c_char; 256];
let p = buf.as_mut_ptr();
unsafe {
if libc::strerror_r(errno as libc::c_int, p, buf.len()) < 0 {
panic!("strerror_r failed in error_string");
}
let p = p as *const _;
str::from_utf8(CStr::from_ptr(p).to_bytes()).unwrap().to_owned()
}
}
impl fmt::Debug for Repr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Repr::Errno(ref errno) =>
f.debug_struct("Errno").field("errno", errno)
.field("message", &error_string(*errno)).finish(),
Repr::General(ref c) => f.debug_tuple("General").field(c).finish(),
Repr::Simple(ref kind) => f.debug_tuple("Kind").field(kind).finish(),
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.repr {
Repr::Errno(errno) => {
let detail = error_string(errno);
write!(f, "{} (errno: {})", detail, errno)
}
Repr::General(ref c) => {
write!(f, "{}: {}", c.kind, c.error)
},
Repr::Simple(ref kind) => kind.fmt(f),
}
}
}
impl error::Error for Error {
fn description(&self) -> &str {
match self.repr {
Repr::Errno(..) => "Errno Error",
Repr::Simple(ref kind) => kind.as_str(),
Repr::General(ref c) => c.error.description(),
}
}
fn source(&self) -> Option<&(dyn error::Error + 'static)> {
match self.repr {
Repr::Errno(..) => None,
Repr::Simple(..) => None,
Repr::General(ref c) => c.error.source(),
}
}
}

View File

@ -1,8 +1,6 @@
use std::ffi::OsString; use std::ffi::OsString;
use std::os::unix::ffi::OsStrExt; use std::os::unix::ffi::OsStrExt;
use crate::memory::{GuestRam,KERNEL_CMDLINE_ADDRESS};
use super::Result;
fn add_defaults(cmdline: &mut KernelCmdLine) { fn add_defaults(cmdline: &mut KernelCmdLine) {
@ -30,13 +28,12 @@ fn add_defaults(cmdline: &mut KernelCmdLine) {
pub struct KernelCmdLine { pub struct KernelCmdLine {
address: u64,
buffer: OsString, buffer: OsString,
} }
impl KernelCmdLine { impl KernelCmdLine {
pub fn new() -> KernelCmdLine { pub fn new() -> KernelCmdLine {
KernelCmdLine { address: KERNEL_CMDLINE_ADDRESS, buffer: OsString::new() } KernelCmdLine { buffer: OsString::new() }
} }
pub fn new_default() -> KernelCmdLine { pub fn new_default() -> KernelCmdLine {
@ -61,19 +58,11 @@ impl KernelCmdLine {
self.push(&format!("{}={}", var, val)) self.push(&format!("{}={}", var, val))
} }
pub fn address(&self) -> u64 {
self.address
}
pub fn size(&self) -> usize { pub fn size(&self) -> usize {
(&self.buffer).as_bytes().len() + 1 (&self.buffer).as_bytes().len() + 1
} }
pub fn write_to_memory(&self, memory: &GuestRam) -> Result<()> { pub fn as_bytes(&self) -> &[u8] {
let bs = self.buffer.as_bytes(); self.buffer.as_bytes()
let len = bs.len();
memory.write_bytes(KERNEL_CMDLINE_ADDRESS, bs)?;
memory.write_int(KERNEL_CMDLINE_ADDRESS + len as u64, 0u8)?;
Ok(())
} }
} }

View File

@ -1,251 +1,19 @@
use std::{thread, fs};
use self::io::IoDispatcher;
use crate::virtio::VirtioBus;
use crate::devices;
use crate::memory::{GuestRam, KVM_KERNEL_LOAD_ADDRESS, MemoryManager, SystemAllocator, AddressRange};
use crate::kvm::*;
static KERNEL: &[u8] = include_bytes!("../../kernel/ph_linux"); static KERNEL: &[u8] = include_bytes!("../../kernel/ph_linux");
static PHINIT: &[u8] = include_bytes!("../../ph-init/target/release/ph-init"); static PHINIT: &[u8] = include_bytes!("../../ph-init/target/release/ph-init");
static SOMMELIER: &[u8] = include_bytes!("../../sommelier/sommelier"); static SOMMELIER: &[u8] = include_bytes!("../../sommelier/sommelier");
pub mod arch;
mod run; mod run;
pub mod io; pub mod io;
mod setup; mod setup;
mod error; mod error;
mod kernel_cmdline; mod kernel_cmdline;
mod config; mod config;
pub use config::VmConfig; pub use config::VmConfig;
pub use setup::VmSetup;
pub use self::error::{Result,Error,ErrorKind}; pub use self::error::{Result,Error};
pub use arch::{ArchSetup,create_setup};
use self::run::KvmRunArea;
use self::kernel_cmdline::KernelCmdLine;
use std::sync::Arc;
use std::sync::atomic::AtomicBool;
use termios::Termios;
use crate::devices::SyntheticFS;
use crate::disk::DiskImage;
use crate::system::{NetlinkSocket, Tap};
pub struct Vm {
_config: VmConfig,
memory: MemoryManager,
io_dispatcher: Arc<IoDispatcher>,
termios: Option<Termios>,
_virtio: Arc<VirtioBus>,
}
static REQUIRED_EXTENSIONS: &[u32] = &[
KVM_CAP_IRQCHIP,
KVM_CAP_HLT,
KVM_CAP_USER_MEMORY,
KVM_CAP_SET_TSS_ADDR,
KVM_CAP_EXT_CPUID,
KVM_CAP_IRQ_ROUTING,
KVM_CAP_IRQ_INJECT_STATUS,
KVM_CAP_PIT2,
KVM_CAP_IOEVENTFD,
];
fn get_base_dev_pfn(mem_size: u64) -> u64 {
// Put device memory at a 2MB boundary after physical memory or 4gb, whichever is greater.
const MB: u64 = 1024 * 1024;
const GB: u64 = 1024 * MB;
let mem_size_round_2mb = (mem_size + 2 * MB - 1) / (2 * MB) * (2 * MB);
std::cmp::max(mem_size_round_2mb, 4 * GB) / 4096
}
impl Vm {
fn create_kvm() -> Result<Kvm> {
let kvm = Kvm::open(&REQUIRED_EXTENSIONS)?;
kvm.set_tss_addr(0xFFFbd000)?;
kvm.create_pit2()?;
kvm.create_irqchip()?;
Ok(kvm)
}
fn create_memory_manager(ram_size: usize, use_drm: bool) -> Result<MemoryManager> {
let kvm = Self::create_kvm()?;
let ram = GuestRam::new(ram_size, &kvm)?;
let dev_addr_start = get_base_dev_pfn(ram_size as u64) * 4096;
let dev_addr_size = u64::max_value() - dev_addr_start;
let allocator = SystemAllocator::new(AddressRange::new(dev_addr_start,dev_addr_size as usize));
Ok(MemoryManager::new(kvm, ram, allocator, use_drm).map_err(|_| ErrorKind::MemoryManagerCreate)?)
}
fn setup_virtio(config: &mut VmConfig, cmdline: &mut KernelCmdLine, virtio: &mut VirtioBus) -> Result<()> {
devices::VirtioSerial::create(virtio)?;
devices::VirtioRandom::create(virtio)?;
if config.is_wayland_enabled() {
devices::VirtioWayland::create(virtio)?;
}
let homedir = config.homedir();
devices::VirtioP9::create(virtio, "home", homedir, false, false)?;
if homedir != "/home/user" && !config.is_realm() {
cmdline.push_set_val("phinit.home", homedir);
}
let mut block_root = None;
for mut disk in config.get_realmfs_images() {
disk.open().map_err(ErrorKind::DiskImageOpen)?;
if block_root == None {
block_root = Some(disk.read_only());
}
devices::VirtioBlock::create(virtio, disk)?;
}
for mut disk in config.get_raw_disk_images() {
disk.open().map_err(ErrorKind::DiskImageOpen)?;
if block_root == None {
block_root = Some(disk.read_only());
}
devices::VirtioBlock::create(virtio, disk)?;
}
if let Some(read_only) = block_root {
if !read_only {
cmdline.push("phinit.root_rw");
}
cmdline.push("phinit.root=/dev/vda");
cmdline.push("phinit.rootfstype=ext4");
} else {
devices::VirtioP9::create(virtio, "9proot", "/", true, false)?;
cmdline.push_set_val("phinit.root", "9proot");
cmdline.push_set_val("phinit.rootfstype", "9p");
cmdline.push_set_val("phinit.rootflags", "trans=virtio");
}
Self::setup_synthetic_bootfs(cmdline, virtio)?;
if config.network() {
Self::setup_network(config, cmdline, virtio)?;
}
Ok(())
}
fn setup_synthetic_bootfs(cmdline: &mut KernelCmdLine, virtio: &mut VirtioBus) -> Result<()> {
let mut s = SyntheticFS::new();
s.mkdirs(&["/tmp", "/proc", "/sys", "/dev", "/home/user", "/bin", "/etc"]);
fs::write("/tmp/ph-init", PHINIT)?;
s.add_library_dependencies("/tmp/ph-init")?;
fs::remove_file("/tmp/ph-init")?;
s.add_memory_file("/usr/bin", "ph-init", 0o755, PHINIT)?;
s.add_memory_file("/usr/bin", "sommelier", 0o755, SOMMELIER)?;
s.add_file("/etc", "ld.so.cache", 0o644, "/etc/ld.so.cache");
devices::VirtioP9::create_with_filesystem(s, virtio, "/dev/root", "/", false)?;
cmdline.push_set_val("init", "/usr/bin/ph-init");
cmdline.push_set_val("root", "/dev/root");
cmdline.push("ro");
cmdline.push_set_val("rootfstype", "9p");
cmdline.push_set_val("rootflags", "trans=virtio");
Ok(())
}
fn setup_network(config: &VmConfig, cmdline: &mut KernelCmdLine, virtio: &mut VirtioBus) -> Result<()> {
let tap = Self::setup_tap(config.bridge())?;
devices::VirtioNet::create(virtio, tap)?;
cmdline.push("phinit.ip=172.17.0.22");
Ok(())
}
fn setup_tap(bridge_name: &str) -> Result<Tap> {
let tap = Tap::new_default()?;
let nl = NetlinkSocket::open()?;
if !nl.interface_exists(bridge_name) {
nl.create_bridge(bridge_name)?;
nl.set_interface_up(bridge_name)?;
}
nl.add_interface_to_bridge(tap.name(), bridge_name)?;
nl.set_interface_up(tap.name())?;
Ok(tap)
}
pub fn open(mut config: VmConfig) -> Result<Vm> {
let with_drm = config.is_wayland_enabled() && config.is_dmabuf_enabled();
let mut memory = Self::create_memory_manager(config.ram_size(), with_drm)?;
let mut cmdline = KernelCmdLine::new_default();
setup::kernel::load_pm_kernel(memory.guest_ram(), cmdline.address(), cmdline.size())?;
let io_dispatch = IoDispatcher::new();
memory.kvm_mut().create_vcpus(config.ncpus())?;
devices::rtc::Rtc::register(io_dispatch.clone());
if config.verbose() {
cmdline.push("earlyprintk=serial");
devices::serial::SerialDevice::register(memory.kvm().clone(),io_dispatch.clone(), 0);
} else {
cmdline.push("quiet");
}
if config.rootshell() {
cmdline.push("phinit.rootshell");
}
if memory.drm_available() && config.is_dmabuf_enabled() {
cmdline.push("phinit.virtwl_dmabuf");
}
if let Some(realm) = config.realm_name() {
cmdline.push_set_val("phinit.realm", realm);
}
let saved= Termios::from_fd(0)
.map_err(ErrorKind::TerminalTermios)?;
let termios = Some(saved);
let mut virtio = VirtioBus::new(memory.clone(), io_dispatch.clone(), memory.kvm().clone());
Self::setup_virtio(&mut config, &mut cmdline, &mut virtio)?;
if let Some(init_cmd) = config.get_init_cmdline() {
cmdline.push_set_val("init", init_cmd);
}
cmdline.write_to_memory(memory.guest_ram())?;
setup::mptable::setup_mptable(memory.guest_ram(), config.ncpus(), virtio.pci_irqs())?;
Ok(Vm {
_config: config,
memory,
io_dispatcher: io_dispatch,
termios,
_virtio: Arc::new(virtio),
})
}
pub fn start(&self) -> Result<()> {
let shutdown = Arc::new(AtomicBool::new(false));
let mut handles = Vec::new();
for vcpu in self.memory.kvm().get_vcpus() {
setup::cpu::setup_protected_mode(&vcpu, KVM_KERNEL_LOAD_ADDRESS + 0x200, self.memory.guest_ram())?;
let mut run_area = KvmRunArea::new(vcpu, shutdown.clone(), self.io_dispatcher.clone())?;
let h = thread::spawn(move || run_area.run());
handles.push(h);
}
for h in handles {
h.join().expect("...");
}
if let Some(termios) = self.termios {
let _ = termios::tcsetattr(0, termios::TCSANOW, &termios)
.map_err(ErrorKind::TerminalTermios)?;
}
Ok(())
}
}

View File

@ -5,6 +5,7 @@ use crate::memory::Mapping;
use super::Result; use super::Result;
use super::io::IoDispatcher; use super::io::IoDispatcher;
use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::atomic::{AtomicBool, Ordering};
use crate::vm::Error;
const KVM_EXIT_UNKNOWN:u32 = 0; const KVM_EXIT_UNKNOWN:u32 = 0;
const KVM_EXIT_IO:u32 = 2; const KVM_EXIT_IO:u32 = 2;
@ -37,8 +38,8 @@ pub struct MmioExitData {
impl KvmRunArea { impl KvmRunArea {
pub fn new(vcpu: KvmVcpu, shutdown: Arc<AtomicBool>, io_dispatcher: Arc<IoDispatcher>) -> Result<KvmRunArea> { pub fn new(vcpu: KvmVcpu, shutdown: Arc<AtomicBool>, io_dispatcher: Arc<IoDispatcher>) -> Result<KvmRunArea> {
let size = vcpu.get_vcpu_mmap_size()?; let size = vcpu.get_vcpu_mmap_size().map_err(Error::CreateVmFailed)?;
let mapping = Mapping::new_from_fd(vcpu.raw_fd(), size)?; let mapping = Mapping::new_from_fd(vcpu.raw_fd(), size).map_err(Error::MappingFailed)?;
Ok(KvmRunArea{ Ok(KvmRunArea{
vcpu, vcpu,
io: io_dispatcher, io: io_dispatcher,
@ -120,7 +121,6 @@ impl KvmRunArea {
let sub = self.suberror(); let sub = self.suberror();
println!("internal error: {}", sub); println!("internal error: {}", sub);
println!("{:?}", self.vcpu.get_regs().unwrap()); println!("{:?}", self.vcpu.get_regs().unwrap());
println!("{:?}", self.vcpu.get_sregs().unwrap());
} }
n => { println!("unhandled exit: {}", n);}, n => { println!("unhandled exit: {}", n);},
} }
@ -130,15 +130,6 @@ impl KvmRunArea {
self.shutdown.store(true, Ordering::Relaxed); self.shutdown.store(true, Ordering::Relaxed);
} }
fn _handle_problem(&mut self) {
let regs = self.vcpu.get_regs().unwrap();
let sregs = self.vcpu.get_sregs().unwrap();
println!("REGS:\n{:?}", regs);
println!("SREGS:\n{:?}", sregs);
panic!(":(");
}
fn handle_exit_io(&mut self) { fn handle_exit_io(&mut self) {
let exit = self.get_io_exit(); let exit = self.get_io_exit();
if exit.dir_out { if exit.dir_out {

242
src/vm/setup.rs Normal file
View File

@ -0,0 +1,242 @@
use crate::vm::{VmConfig, Result, Error, PHINIT, SOMMELIER};
use crate::vm::arch::ArchSetup;
use crate::vm::kernel_cmdline::KernelCmdLine;
use crate::vm::io::IoDispatcher;
use crate::devices;
use termios::Termios;
use crate::virtio::VirtioBus;
use crate::virtio;
use crate::devices::SyntheticFS;
use std::{fs, thread};
use crate::system::{Tap, NetlinkSocket};
use crate::disk::DiskImage;
use crate::kvm::{KvmVcpu, Kvm};
use std::sync::Arc;
use crate::memory::MemoryManager;
use std::sync::atomic::AtomicBool;
use crate::vm::run::KvmRunArea;
pub struct Vm {
kvm: Kvm,
vcpus: Vec<KvmVcpu>,
memory: MemoryManager,
io_dispatch: Arc<IoDispatcher>,
termios: Option<Termios>,
}
impl Vm {
fn create<A: ArchSetup>(arch: &mut A) -> Result<Self> {
let kvm = arch.open_kvm()
.map_err(Error::ArchError)?;
let memory = arch.create_memory(&kvm)
.map_err(Error::ArchError)?;
Ok(Vm {
kvm,
memory,
vcpus: Vec::new(),
io_dispatch: IoDispatcher::new(),
termios: None,
})
}
pub fn start(&self) -> Result<()> {
let shutdown = Arc::new(AtomicBool::new(false));
let mut handles = Vec::new();
for vcpu in self.vcpus.clone() {
let mut run_area = KvmRunArea::new(vcpu, shutdown.clone(), self.io_dispatch.clone())?;
let h = thread::spawn(move || run_area.run());
handles.push(h);
}
for h in handles {
h.join().expect("...");
}
if let Some(termios) = self.termios {
let _ = termios::tcsetattr(0, termios::TCSANOW, &termios)
.map_err(Error::TerminalTermios)?;
}
Ok(())
}
}
pub struct VmSetup <T: ArchSetup> {
config: VmConfig,
cmdline: KernelCmdLine,
arch: T,
}
impl <T: ArchSetup> VmSetup <T> {
pub fn new(config: VmConfig, arch: T) -> Self {
VmSetup {
config,
cmdline: KernelCmdLine::new_default(),
arch,
}
}
pub fn create_vm(&mut self) -> Result<Vm> {
let mut vm = Vm::create(&mut self.arch)?;
devices::rtc::Rtc::register(vm.io_dispatch.clone());
if self.config.verbose() {
self.cmdline.push("earlyprintk=serial");
devices::serial::SerialDevice::register(vm.kvm.clone(),vm.io_dispatch.clone(), 0);
} else {
self.cmdline.push("quiet");
}
if self.config.rootshell() {
self.cmdline.push("phinit.rootshell");
}
if vm.memory.drm_available() && self.config.is_dmabuf_enabled() {
self.cmdline.push("phinit.virtwl_dmabuf");
}
if let Some(realm) = self.config.realm_name() {
self.cmdline.push_set_val("phinit.realm", realm);
}
let saved= Termios::from_fd(0)
.map_err(Error::TerminalTermios)?;
vm.termios = Some(saved);
let mut virtio = VirtioBus::new(vm.memory.clone(), vm.io_dispatch.clone(), vm.kvm.clone());
self.setup_synthetic_bootfs(&mut virtio)?;
self.setup_virtio(&mut virtio)
.map_err(Error::SetupVirtio)?;
if let Some(init_cmd) = self.config.get_init_cmdline() {
self.cmdline.push_set_val("init", init_cmd);
}
self.arch.setup_memory(&self.cmdline, &virtio.pci_irqs())
.map_err(Error::ArchError)?;
for id in 0..self.config.ncpus() {
let vcpu = vm.kvm.new_vcpu(id).map_err(Error::CreateVmFailed)?;
self.arch.setup_vcpu(&vcpu).map_err(Error::ArchError)?;
vm.vcpus.push(vcpu);
}
Ok(vm)
}
fn setup_virtio(&mut self, virtio: &mut VirtioBus) -> virtio::Result<()> {
devices::VirtioSerial::create(virtio)?;
devices::VirtioRandom::create(virtio)?;
if self.config.is_wayland_enabled() {
devices::VirtioWayland::create(virtio)?;
}
let homedir = self.config.homedir();
devices::VirtioP9::create(virtio, "home", homedir, false, false)?;
if homedir != "/home/user" && !self.config.is_realm() {
self.cmdline.push_set_val("phinit.home", homedir);
}
let mut block_root = None;
for disk in self.config.get_realmfs_images() {
if block_root == None {
block_root = Some(disk.read_only());
}
devices::VirtioBlock::create(virtio, disk)?;
}
for disk in self.config.get_raw_disk_images() {
if block_root == None {
block_root = Some(disk.read_only());
}
devices::VirtioBlock::create(virtio, disk)?;
}
if let Some(read_only) = block_root {
if !read_only {
self.cmdline.push("phinit.root_rw");
}
self.cmdline.push("phinit.root=/dev/vda");
self.cmdline.push("phinit.rootfstype=ext4");
} else {
devices::VirtioP9::create(virtio, "9proot", "/", true, false)?;
self.cmdline.push_set_val("phinit.root", "9proot");
self.cmdline.push_set_val("phinit.rootfstype", "9p");
self.cmdline.push_set_val("phinit.rootflags", "trans=virtio");
}
if self.config.network() {
self.setup_network(virtio)?;
self.drop_privs();
}
Ok(())
}
fn drop_privs(&self) {
unsafe {
libc::setgid(1000);
libc::setuid(1000);
libc::setegid(1000);
libc::seteuid(1000);
}
}
fn setup_synthetic_bootfs(&mut self, virtio: &mut VirtioBus) -> Result<()> {
let bootfs = self.create_bootfs()
.map_err(Error::SetupBootFs)?;
devices::VirtioP9::create_with_filesystem(bootfs, virtio, "/dev/root", "/", false)
.map_err(Error::SetupVirtio)?;
self.cmdline.push_set_val("init", "/usr/bin/ph-init");
self.cmdline.push_set_val("root", "/dev/root");
self.cmdline.push("ro");
self.cmdline.push_set_val("rootfstype", "9p");
self.cmdline.push_set_val("rootflags", "trans=virtio");
Ok(())
}
fn create_bootfs(&self) -> ::std::io::Result<SyntheticFS> {
let mut s = SyntheticFS::new();
s.mkdirs(&["/tmp", "/proc", "/sys", "/dev", "/home/user", "/bin", "/etc"]);
fs::write("/tmp/ph-init", PHINIT)?;
s.add_library_dependencies("/tmp/ph-init")?;
fs::remove_file("/tmp/ph-init")?;
s.add_memory_file("/usr/bin", "ph-init", 0o755, PHINIT)?;
s.add_memory_file("/usr/bin", "sommelier", 0o755, SOMMELIER)?;
s.add_file("/etc", "ld.so.cache", 0o644, "/etc/ld.so.cache");
Ok(s)
}
fn setup_network(&mut self, virtio: &mut VirtioBus) -> virtio::Result<()> {
let tap = match self.setup_tap() {
Ok(tap) => tap,
Err(e) => {
warn!("failed to create tap device: {}", e);
return Ok(());
}
};
devices::VirtioNet::create(virtio, tap)?;
self.cmdline.push("phinit.ip=172.17.0.22");
Ok(())
}
fn setup_tap(&self) -> Result<Tap> {
let bridge_name = self.config.bridge();
let tap = Tap::new_default()?;
let nl = NetlinkSocket::open()?;
if !nl.interface_exists(bridge_name) {
nl.create_bridge(bridge_name)?;
nl.set_interface_up(bridge_name)?;
}
nl.add_interface_to_bridge(tap.name(), bridge_name)?;
nl.set_interface_up(tap.name())?;
Ok(tap)
}
}

View File

@ -1,200 +0,0 @@
use crate::vm::Result;
use crate::kvm::{KvmVcpu,KvmRegs,KvmFpu, KvmMsrs, KvmSegment};
use crate::memory::{GuestRam,KERNEL_ZERO_PAGE};
const MSR_IA32_SYSENTER_CS: u32 = 0x00000174;
const MSR_IA32_SYSENTER_ESP: u32 = 0x00000175;
const MSR_IA32_SYSENTER_EIP: u32 = 0x00000176;
const MSR_STAR: u32 = 0xc0000081;
const MSR_LSTAR: u32 = 0xc0000082;
const MSR_CSTAR: u32 = 0xc0000083;
const MSR_SYSCALL_MASK: u32 = 0xc0000084;
const MSR_KERNEL_GS_BASE: u32 = 0xc0000102;
const MSR_IA32_TSC: u32 = 0x00000010;
const MSR_IA32_MISC_ENABLE: u32 = 0x000001a0;
const MSR_IA32_MISC_ENABLE_FAST_STRING: u64 = 0x01;
const EBX_CLFLUSH_CACHELINE: u32 = 8; // Flush a cache line size.
const EBX_CLFLUSH_SIZE_SHIFT: u32 = 8; // Bytes flushed when executing CLFLUSH.
const _EBX_CPU_COUNT_SHIFT: u32 = 16; // Index of this CPU.
const EBX_CPUID_SHIFT: u32 = 24; // Index of this CPU.
const _ECX_EPB_SHIFT: u32 = 3; // "Energy Performance Bias" bit.
const _ECX_HYPERVISOR_SHIFT: u32 = 31; // Flag to be set when the cpu is running on a hypervisor.
const _EDX_HTT_SHIFT: u32 = 28; // Hyper Threading Enabled.
fn setup_cpuid(vcpu: &KvmVcpu) -> Result<()> {
let mut cpuid = vcpu.get_supported_cpuid()?;
let cpu_id = 0u32; // first vcpu
for e in &mut cpuid {
match e.function {
0 => {
e.ebx = 0x67627553;
e.ecx = 0x20487020;
e.edx = 0x68706172;
}
1 => {
if e.index == 0 {
e.ecx |= 1<<31;
}
e.ebx = (cpu_id << EBX_CPUID_SHIFT) as u32 |
(EBX_CLFLUSH_CACHELINE << EBX_CLFLUSH_SIZE_SHIFT);
/*
if cpu_count > 1 {
entry.ebx |= (cpu_count as u32) << EBX_CPU_COUNT_SHIFT;
entry.edx |= 1 << EDX_HTT_SHIFT;
}
*/
}
6 => {
e.ecx &= !(1<<3);
}
10 => {
if e.eax > 0 {
let version = e.eax & 0xFF;
let ncounters = (e.eax >> 8) & 0xFF;
if version != 2 || ncounters == 0 {
e.eax = 0;
}
}
}
_ => {}
}
}
vcpu.set_cpuid2(cpuid)?;
Ok(())
}
fn setup_fpu(vcpu: &KvmVcpu) -> Result<()> {
let mut fpu = KvmFpu::new();
fpu.fcw = 0x37f;
fpu.mxcsr = 0x1f80;
vcpu.set_fpu(&fpu)?;
Ok(())
}
fn setup_msrs(vcpu: &KvmVcpu) -> Result<()> {
let mut msrs = KvmMsrs::new();
msrs.add(MSR_IA32_SYSENTER_CS, 0);
msrs.add(MSR_IA32_SYSENTER_ESP, 0);
msrs.add(MSR_IA32_SYSENTER_EIP, 0);
msrs.add(MSR_STAR, 0);
msrs.add(MSR_CSTAR, 0);
msrs.add(MSR_KERNEL_GS_BASE, 0);
msrs.add(MSR_SYSCALL_MASK, 0);
msrs.add(MSR_LSTAR, 0);
msrs.add(MSR_IA32_TSC, 0);
msrs.add(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_FAST_STRING);
vcpu.set_msrs(&msrs)?;
Ok(())
}
pub fn gdt_entry(flags: u16, base: u32, limit: u32) -> u64 {
((((base as u64) & 0xff000000u64) << (56 - 24)) | (((flags as u64) & 0x0000f0ffu64) << 40) |
(((limit as u64) & 0x000f0000u64) << (48 - 16)) |
(((base as u64) & 0x00ffffffu64) << 16) | ((limit as u64) & 0x0000ffffu64))
}
const BOOT_GDT_OFFSET: usize = 0x500;
const BOOT_IDT_OFFSET: usize = 0x520;
const BOOT_STACK: u64 = 0x8000;
const BOOT_PML4: u64 = 0x9000;
const BOOT_PDPTE: u64 = 0xA000;
const BOOT_PDE: u64 = 0xB000;
const X86_CR0_PE: u64 = 0x1;
const X86_CR0_PG: u64 = 0x80000000;
const X86_CR4_PAE: u64 = 0x20;
const EFER_LME: u64 = 0x100;
const EFER_LMA: u64 = (1 << 10);
fn setup_boot_pagetables(memory: &GuestRam) -> Result<()> {
memory.write_int::<u64>(BOOT_PML4, BOOT_PDPTE | 0x3)?;
memory.write_int::<u64>(BOOT_PDPTE, BOOT_PDE | 0x3)?;
for i in 0..512_u64 {
let entry = (i << 21) | 0x83;
memory.write_int::<u64>(BOOT_PDE + (i * 8), entry)?;
}
Ok(())
}
fn write_gdt_table(table: &[u64], memory: &GuestRam) -> Result<()> {
for i in 0..table.len() {
memory.write_int((BOOT_GDT_OFFSET + i * 8) as u64, table[i])?;
}
Ok(())
}
pub fn setup_pm_sregs(vcpu: &KvmVcpu, memory: &GuestRam) -> Result<()> {
let table = [
gdt_entry(0,0,0),
gdt_entry(0xa09b,0,0xfffff),
gdt_entry(0xc093,0,0xfffff),
gdt_entry(0x808b,0,0xfffff),
];
write_gdt_table(&table, memory)?;
memory.write_int::<u64>(BOOT_IDT_OFFSET as u64, 0u64)?;
let code = KvmSegment::new(0, 0xfffff, 1 * 8, 0xa09b);
let data = KvmSegment::new(0, 0xfffff, 2 * 8, 0xc093);
let tss = KvmSegment::new(0, 0xfffff, 3 * 8, 0x808b);
let mut regs = vcpu.get_sregs()?;
regs.gdt.base = BOOT_GDT_OFFSET as u64;
regs.gdt.limit = 32 - 1;
regs.itd.base = BOOT_IDT_OFFSET as u64;
regs.itd.limit = 8 - 1;
regs.cs = code;
regs.ds = data;
regs.es = data;
regs.fs = data;
regs.gs = data;
regs.ss = data;
regs.tr = tss;
// protected mode
regs.cr0 |= X86_CR0_PE;
regs.efer |= EFER_LME;
setup_boot_pagetables(&memory)?;
regs.cr3 = BOOT_PML4;
regs.cr4 |= X86_CR4_PAE;
regs.cr0 |= X86_CR0_PG;
regs.efer |= EFER_LMA;
vcpu.set_sregs(&regs)?;
Ok(())
}
pub fn setup_pm_regs(vcpu: &KvmVcpu, kernel_entry: u64) -> Result<()> {
let mut regs = KvmRegs::new();
regs.rflags = 0x0000000000000002;
regs.rip = kernel_entry;
regs.rsp = BOOT_STACK;
regs.rbp = BOOT_STACK;
regs.rsi = KERNEL_ZERO_PAGE;
vcpu.set_regs(&regs)?;
Ok(())
}
pub fn setup_protected_mode(vcpu: &KvmVcpu, kernel_entry: u64, memory: &GuestRam) -> Result<()> {
setup_cpuid(&vcpu)?;
setup_pm_sregs(&vcpu, memory)?;
setup_pm_regs(&vcpu, kernel_entry)?;
setup_fpu(&vcpu)?;
setup_msrs(&vcpu)?;
Ok(())
}

View File

@ -1,116 +0,0 @@
use std::io::{self, Read, SeekFrom, Seek, Cursor};
use byteorder::{LittleEndian,ReadBytesExt};
use crate::memory::{self,GuestRam,KERNEL_ZERO_PAGE};
use crate::vm::{Result, Error, ErrorKind, KERNEL};
// Documentation/x86/boot.txt
const HDR_BOOT_FLAG: u64 = 0x1fe; // u16
const HDR_HEADER: u64 = 0x202; // u32
const HDR_TYPE_LOADER: u64 = 0x210; // u8
const HDR_CMDLINE_PTR: u64 = 0x228; // u32
const HDR_CMDLINE_SIZE: u64 = 0x238; // u32
const HDR_KERNEL_ALIGNMENT: u64 = 0x230; // u32
// Documentation/x86/zero-page.txt
const BOOT_PARAM_E820_ENTRIES: u64 = 0x1e8;
const BOOT_PARAM_E820_MAP: u64 = 0x2d0;
const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55;
const EBDA_START: u64 = 0x0009fc00;
const KERNEL_HDR_MAGIC: u32 = 0x53726448;
const KERNEL_LOADER_OTHER: u8 = 0xff;
const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x1000000;
const E820_RAM: u32 = 1;
fn setup_e820(memory: &GuestRam, base: u64) -> Result<()> {
let ram_size = memory.ram_size() as u64;
let mut e820_ranges = Vec::new();
e820_ranges.push((0u64, EBDA_START));
if ram_size < memory::PCI_MMIO_RESERVED_BASE {
e820_ranges.push((memory::KVM_KERNEL_LOAD_ADDRESS, ram_size - memory::KVM_KERNEL_LOAD_ADDRESS));
} else {
e820_ranges.push((memory::KVM_KERNEL_LOAD_ADDRESS, memory::PCI_MMIO_RESERVED_BASE - memory::KVM_KERNEL_LOAD_ADDRESS));
e820_ranges.push((memory::HIMEM_BASE, ram_size - memory::HIMEM_BASE));
}
memory.write_int::<u8>(base + BOOT_PARAM_E820_ENTRIES, e820_ranges.len() as u8)?;
for i in 0..e820_ranges.len() {
let entry_base = base + BOOT_PARAM_E820_MAP + (i as u64 * 20);
memory.write_int::<u64>(entry_base, e820_ranges[i].0)?;
memory.write_int::<u64>(entry_base + 8, e820_ranges[i].1)?;
memory.write_int::<u32>(entry_base + 16, E820_RAM)?;
}
Ok(())
}
fn setup_zero_page(memory: &GuestRam, cmdline_addr: u64, cmdline_size: usize) -> Result<()> {
let base = KERNEL_ZERO_PAGE;
memory.write_int::<u16>(base + HDR_BOOT_FLAG, KERNEL_BOOT_FLAG_MAGIC)?;
memory.write_int::<u32>(base + HDR_HEADER, KERNEL_HDR_MAGIC)?;
memory.write_int::<u8>(base + HDR_TYPE_LOADER, KERNEL_LOADER_OTHER)?;
memory.write_int::<u32>(base + HDR_CMDLINE_PTR, cmdline_addr as u32)?;
memory.write_int::<u32>(base + HDR_CMDLINE_SIZE, cmdline_size as u32)?;
memory.write_int::<u32>(base + HDR_KERNEL_ALIGNMENT, KERNEL_MIN_ALIGNMENT_BYTES)?;
setup_e820(memory, base)
}
pub fn load_pm_kernel(memory: &GuestRam, cmdline_addr: u64, cmdline_size: usize) -> Result<()> {
load_elf_kernel(memory).map_err(|_| Error::from(ErrorKind::ReadKernelFailed))?;
setup_zero_page(memory, cmdline_addr, cmdline_size)
}
pub fn load_elf_kernel(memory: &GuestRam) -> io::Result<()> {
let mut f = Cursor::new(KERNEL);
f.seek(SeekFrom::Start(32))?;
let phoff = f.read_u64::<LittleEndian>()?;
f.seek(SeekFrom::Current(16))?;
let phnum = f.read_u16::<LittleEndian>()?;
f.seek(SeekFrom::Start(phoff))?;
let mut v = Vec::new();
for _ in 0..phnum {
let hdr = load_phdr(&mut f)?;
if hdr.p_type == 1 {
v.push(hdr);
}
}
for h in v {
f.seek(SeekFrom::Start(h.p_offset))?;
let slice = memory.mut_slice(memory::KVM_KERNEL_LOAD_ADDRESS + h.p_paddr, h.p_filesz as usize).unwrap();
f.read_exact(slice)?;
}
Ok(())
}
fn load_phdr<R: Read+Sized>(r: &mut R) -> io::Result<ElfPhdr> {
let mut phdr: ElfPhdr = Default::default();
phdr.p_type = r.read_u32::<LittleEndian>()?;
phdr.p_flags = r.read_u32::<LittleEndian>()?;
phdr.p_offset = r.read_u64::<LittleEndian>()?;
phdr.p_vaddr = r.read_u64::<LittleEndian>()?;
phdr.p_paddr = r.read_u64::<LittleEndian>()?;
phdr.p_filesz = r.read_u64::<LittleEndian>()?;
phdr.p_memsz = r.read_u64::<LittleEndian>()?;
phdr.p_align = r.read_u64::<LittleEndian>()?;
Ok(phdr)
}
#[derive(Default,Debug)]
struct ElfPhdr {
pub p_type: u32,
pub p_flags: u32,
pub p_offset: u64,
pub p_vaddr: u64,
pub p_paddr: u64,
pub p_filesz: u64,
pub p_memsz: u64,
pub p_align: u64,
}

View File

@ -1,3 +0,0 @@
pub mod cpu;
pub mod kernel;
pub mod mptable;