From 2a442775ddbb5f91d59564a16bd9801a48031d92 Mon Sep 17 00:00:00 2001 From: Bruce Leidl Date: Thu, 26 Sep 2019 22:52:33 -0400 Subject: [PATCH] virtio_net --- ph-init/src/error.rs | 1 + ph-init/src/init.rs | 34 +- ph-init/src/main.rs | 2 + ph-init/src/netlink.rs | 642 +++++++++++++++++++++++++++++++++++ src/devices/mod.rs | 2 + src/devices/virtio_net.rs | 264 ++++++++++++++ src/devices/virtio_wl/mod.rs | 2 +- src/system/mod.rs | 7 +- src/system/netlink.rs | 642 +++++++++++++++++++++++++++++++++++ src/system/tap.rs | 151 ++++++++ src/vm/config.rs | 24 +- src/vm/error.rs | 10 + src/vm/mod.rs | 27 +- 13 files changed, 1799 insertions(+), 9 deletions(-) create mode 100644 ph-init/src/netlink.rs create mode 100644 src/devices/virtio_net.rs create mode 100644 src/system/netlink.rs create mode 100644 src/system/tap.rs diff --git a/ph-init/src/error.rs b/ph-init/src/error.rs index a7a10a8..e4d9c98 100644 --- a/ph-init/src/error.rs +++ b/ph-init/src/error.rs @@ -1,4 +1,5 @@ use std::{result, io, fmt}; +use crate::netlink; pub enum Error { Pid1, diff --git a/ph-init/src/init.rs b/ph-init/src/init.rs index 26b48fd..5809b37 100644 --- a/ph-init/src/init.rs +++ b/ph-init/src/init.rs @@ -1,5 +1,5 @@ -use crate::{Error, Result, Logger, LogLevel}; +use crate::{Error, Result, Logger, LogLevel, netlink}; use crate::cmdline::CmdLine; use crate::sys::{sethostname, setsid, set_controlling_tty, mount_devtmpfs, mount_tmpfs, mkdir, umount, mount_sysfs, mount_procfs, mount_devpts, chown, chmod, create_directories, mount_overlay, move_mount, pivot_root, mount_9p, mount, waitpid, reboot, getpid, mount_tmpdir, mount_cgroup, mkdir_mode, umask, _chown}; use std::path::Path; @@ -97,6 +97,9 @@ impl InitServer { } else { self.setup_writeable_root()?; } + fs::write("/etc/hosts", format!("127.0.0.1 {} localhost\n", self.hostname)) + .map_err(Error::WriteEtcHosts)?; + umount("/opt/ph/tmp")?; umount("/opt/ph/proc")?; umount("/opt/ph/dev")?; @@ -145,8 +148,6 @@ impl InitServer { .map_err(|e| Error::MkDir(String::from("/tmp/sysroot/opt/ph"), e))?; } pivot_root("/tmp/sysroot", "/tmp/sysroot/opt/ph")?; - fs::write("/etc/hosts", format!("127.0.0.1 {} localhost", self.hostname)) - .map_err(Error::WriteEtcHosts)?; Ok(()) } @@ -179,6 +180,7 @@ impl InitServer { Ok(()) } + pub fn run_daemons(&mut self) -> Result<()> { if !Path::new("/dev/wl0").exists() { return Ok(()); @@ -246,6 +248,30 @@ impl InitServer { Ok(()) } + pub fn setup_network(&self) -> Result<()> { + if let Some(val) = self.cmdline.lookup("phinit.ip") { + if let Ok(ip) = Ipv4Addr::from_str(&val) { + self.configure_network(ip) + .map_err(Error::NetworkConfigure)?; + } + } + Ok(()) + } + + fn configure_network(&self, ip: Ipv4Addr) -> netlink::Result<()> { + let mut octets = ip.octets(); + octets[3] = 1; + let gw = Ipv4Addr::from(octets); + let nl = NetlinkSocket::open()?; + if !nl.interface_exists("eth0") { + + } + nl.add_ip_address("eth0", ip, 24)?; + nl.set_interface_up("eth0")?; + nl.add_default_route(gw)?; + Ok(()) + } + fn write_xauth(&self) -> io::Result<()> { let xauth_path = format!("{}/.Xauthority", self.homedir()); @@ -276,11 +302,13 @@ impl InitServer { } pub fn launch_console_shell(&mut self, splash: &'static str) -> Result<()> { + fs::write("/run/bashrc", BASHRC).map_err(Error::WriteBashrc)?; let root = self.cmdline.has_var("phinit.rootshell"); let realm = self.cmdline.lookup("phinit.realm"); let home = if root { "/".to_string() } else { self.homedir().to_string() }; let shell = ServiceLaunch::new_shell(root, &home, realm) + .arg("--rcfile").arg("/run/bashrc") .launch_with_preexec(move || { // set_controlling_tty(0, true)?; env::set_current_dir(&home)?; diff --git a/ph-init/src/main.rs b/ph-init/src/main.rs index 088707d..ff33a1a 100644 --- a/ph-init/src/main.rs +++ b/ph-init/src/main.rs @@ -8,6 +8,7 @@ mod cmdline; mod service; mod init; mod sys; +mod netlink; pub use error::{Error,Result}; pub use log::{Logger,LogLevel}; @@ -17,6 +18,7 @@ fn run_init() -> Result<()> { let mut server = InitServer::create("airwolf")?; server.setup_filesystem()?; server.run_daemons()?; + server.setup_network()?; server.launch_console_shell(SPLASH)?; server.run()?; Ok(()) diff --git a/ph-init/src/netlink.rs b/ph-init/src/netlink.rs new file mode 100644 index 0000000..1fd0cfb --- /dev/null +++ b/ph-init/src/netlink.rs @@ -0,0 +1,642 @@ +use std::cell::Cell; +use std::convert::TryInto; +use std::ffi::CString; +use std::net::Ipv4Addr; +use std::{mem, result, fmt, io}; +use std::os::unix::io::RawFd; +use std::path::Path; + +use libc::{ + PF_NETLINK, SOCK_RAW, SOCK_CLOEXEC, SOCK_NONBLOCK +}; + +const NETLINK_ROUTE: i32 = 0; + +const IFLA_IFNAME: u16 = 3; +const IFLA_MASTER: u16 = 10; +const IFLA_LINKINFO: u16 = 18; +const IFLA_INFO_KIND: u16 = 1; + +const NLA_F_NESTED: u16 = 1 << 15; +const IFA_ADDRESS: u16 = 1; +const IFA_LOCAL: u16 = 2; + +const NLMSG_ERROR: u16 = 2; + +pub const NLM_F_REQUEST: u16 = 1; +pub const NLM_F_ACK: u16 = 4; +pub const NLM_F_EXCL: u16 = 512; +pub const NLM_F_CREATE: u16 = 1024; + +pub const RTM_NEWLINK: u16 = 16; +pub const RTM_SETLINK: u16 = 19; +pub const RTM_NEWADDR: u16 = 20; + +pub const AF_UNSPEC: u8 = 0; +pub const AF_INET: u8 = 2; + +const NL_HDRLEN: usize = 16; +const ATTR_HDRLEN: usize = 4; + +const RTM_NEWROUTE: u16 = 24; + +const RT_TABLE_MAIN: u8 = 254; +const RT_SCOPE_UNIVERSE: u8 = 0; +const RTPROT_BOOT: u8 = 3; +const RTN_UNICAST: u8 = 1; + +const RTA_GATEWAY: u16 = 5; + + +const MESSAGE_ALIGN: usize = 4; + +pub const IFF_UP: u32 = libc::IFF_UP as u32; + +pub type Result = result::Result; + +#[derive(Debug)] +pub enum Error { + Socket(io::Error), + SocketBind(io::Error), + SocketSend(io::Error), + SocketRecv(io::Error), + NameToIndex(io::Error), + ErrorResponse(io::Error), + UnexpectedResponse, + ShortSend, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use Error::*; + match self { + Socket(e) => write!(f, "failed to create netlink socket: {}", e), + SocketBind(e) => write!(f, "failed calling bind() on netlink socket: {}", e), + SocketSend(e) => write!(f, "failed calling sendto() on netlink socket: {}", e), + SocketRecv(e) => write!(f, "failed calling recv() on netlink socket: {}", e), + NameToIndex(e) => write!(f, "failed to convert interface name to index: {}", e), + ErrorResponse(e) => write!(f, "error response to netlink request: {}", e), + UnexpectedResponse => write!(f, "could not parse response message from netlink"), + ShortSend => write!(f, "failed to transmit entire netlink message"), + } + } +} + +pub struct NetlinkSocket { + sock: RawFd, + seq: Cell, +} + + +impl NetlinkSocket { + pub fn open() -> Result { + Self::open_protocol(NETLINK_ROUTE) + } + + #[allow(dead_code)] + pub fn add_default_route(&self, gateway: Ipv4Addr) -> Result<()> { + let msg = self.message_create(RTM_NEWROUTE) + .with_rtmsg(AF_INET, |hdr| { + hdr.table(RT_TABLE_MAIN) + .scope(RT_SCOPE_UNIVERSE) + .protocol(RTPROT_BOOT) + .rtype(RTN_UNICAST); + }) + .append_attr(RTA_GATEWAY, &gateway.octets()) + .done(); + + self.send_message(msg) + } + + #[allow(dead_code)] + pub fn add_interface_to_bridge(&self, iface: &str, bridge: &str) -> Result<()> { + let bridge_idx = self.name_to_index(bridge)?; + let msg = self.message(RTM_SETLINK) + .with_ifinfomsg(AF_UNSPEC, |hdr| { + hdr.set_flags(IFF_UP) + .set_change(IFF_UP); + }) + .attr_u32(IFLA_MASTER, bridge_idx) + .attr_str(IFLA_IFNAME, iface) + .done(); + + self.send_message(msg) + } + + #[allow(dead_code)] + pub fn create_bridge(&self, name: &str) -> Result<()> { + let msg = self.message_create(RTM_NEWLINK) + .ifinfomsg(AF_UNSPEC) + .attr_str(IFLA_IFNAME, name) + .with_nested(IFLA_LINKINFO, |a| { + a.attr_str(IFLA_INFO_KIND, "bridge") + .align(); + }) + .done(); + + self.send_message(msg) + } + + #[allow(dead_code)] + pub fn set_interface_up(&self, iface: &str) -> Result<()> { + let idx = self.name_to_index(iface)?; + let msg = self.message(RTM_NEWLINK) + .with_ifinfomsg(AF_UNSPEC, |hdr| { + hdr.set_flags(IFF_UP) + .set_change(IFF_UP) + .index(idx); + }) + .done(); + + self.send_message(msg) + } + + #[allow(dead_code)] + pub fn add_ip_address(&self, iface: &str, ip: Ipv4Addr, netmask_bits: u32) -> Result<()> { + let idx = self.name_to_index(iface)?; + let msg = self.message_create(RTM_NEWADDR) + .with_ifaddrmsg(|hdr| { + hdr.family(AF_INET) + .prefixlen(netmask_bits as u8) + .scope(RT_SCOPE_UNIVERSE) + .index(idx); + }) + .append_attr(IFA_ADDRESS, &ip.octets()) + .append_attr(IFA_LOCAL, &ip.octets()) + .done(); + + self.send_message(msg) + } + + fn open_protocol(protocol: i32) -> Result { + let sock = sys_socket(PF_NETLINK, + SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK, + protocol)?; + + let mut sockaddr: libc::sockaddr_nl = unsafe { mem::zeroed() }; + sockaddr.nl_family = PF_NETLINK as u16; + let addrlen = mem::size_of::(); + sys_bind(sock, + &sockaddr as *const libc::sockaddr_nl as *const libc::sockaddr, + addrlen)?; + + Ok(NetlinkSocket{ sock, seq: Cell::new(1) } ) + } + + fn name_to_index(&self, name: &str) -> Result { + let name = CString::new(name).unwrap(); + let ret = unsafe { libc::if_nametoindex(name.as_ptr()) }; + if ret == 0 { + Err(Error::NameToIndex(io::Error::last_os_error())) + } else { + Ok(ret as u32) + } + } + + pub fn interface_exists(&self, name: &str) -> bool { + Path::new("/sys/class/net") + .join(name) + .exists() + } + + fn seq(&self) -> u32 { + let seq = self.seq.get(); + self.seq.set(seq + 1); + seq + } + + fn message(&self, mtype: u16) -> NetlinkMessage { + let flags = NLM_F_REQUEST | NLM_F_ACK; + NetlinkMessage::new(mtype, flags, self.seq()) + } + + fn message_create(&self, mtype: u16) -> NetlinkMessage { + let flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_EXCL; + NetlinkMessage::new(mtype, flags, self.seq()) + } + + fn send_message(&self, msg: NetlinkMessage) -> Result<()> { + self.send(msg.as_bytes())?; + let mut recv_buffer = vec![0u8; 4096]; + let n = sys_recv(self.sock, &mut recv_buffer, 0)?; + if n < NL_HDRLEN + 4 { + return Err(Error::UnexpectedResponse); + } + recv_buffer.truncate(n); + self.process_response(InBuffer::new(&recv_buffer)) + } + + fn process_response(&self, mut resp: InBuffer) -> Result<()> { + resp.skip(4); + let mtype = resp.read_u16(); + resp.skip(10); + if mtype == NLMSG_ERROR { + match resp.read_u32() { + 0 => Ok(()), + errno => { + let e = io::Error::from_raw_os_error(errno as i32); + Err(Error::ErrorResponse(e)) + } + } + } else { + Err(Error::UnexpectedResponse) + } + } + + fn send(&self, buf: &[u8]) -> Result<()> { + let mut sockaddr: libc::sockaddr_nl = unsafe { mem::zeroed() }; + sockaddr.nl_family = PF_NETLINK as u16; + let addrlen = mem::size_of::(); + let n = sys_sendto(self.sock, + buf, + &sockaddr as *const libc::sockaddr_nl as *const libc::sockaddr, + addrlen)?; + + if n != buf.len() { + Err(Error::ShortSend) + } else { + Ok(()) + } + } +} + +impl Drop for NetlinkSocket { + fn drop(&mut self) { + let _ = unsafe { libc::close(self.sock) }; + } +} + +pub struct NetlinkMessage(Buffer>); + +impl NetlinkMessage { + + pub fn new(mtype: u16, flags: u16, seq: u32) -> Self { + let mut msg = NetlinkMessage(Buffer::new_empty()); + NetlinkHeader::new(msg.0.next(NL_HDRLEN), mtype) + .flags(flags) + .seq(seq); + msg + } + + fn ifinfomsg(self, family: u8) -> Self { + self.with_ifinfomsg(family, |_| { }) + } + + fn with_ifinfomsg(mut self, family: u8, mut f: F) -> Self + where F: FnMut(IfInfoMsgHdr) + { + const IF_INFOHDRLEN: usize = 16; + + f(IfInfoMsgHdr::new(self.0.next(IF_INFOHDRLEN), family)); + self + } + + fn with_rtmsg(mut self, family: u8, mut f: F) -> Self + where F: FnMut(RtMsg) + { + const RTMSG_HDRLEN: usize = 12; + + f(RtMsg::new(self.0.next(RTMSG_HDRLEN), family)); + self + } + + fn with_ifaddrmsg(mut self, mut f: F) -> Self + where F: FnMut(IfAddrMsg) + { + const IFADDRMSG_LEN: usize = 8; + f(IfAddrMsg::new(self.0.next(IFADDRMSG_LEN))); + self + + } + + fn with_nested(self, atype: u16, mut f: F) -> Self + where F: FnMut(&mut Buffer>) + { + let mut nested = Buffer::new_empty(); + nested.write_u16(0); + nested.write_u16(atype | NLA_F_NESTED); + f(&mut nested); + nested.align(); + nested.write_u16_at(0, nested.len() as u16); + self.write(nested.as_bytes()) + } + + fn attr_u32(mut self, atype: u16, val: u32) -> Self { + self.0.attr_u32(atype, val); + self + } + + fn attr_str(mut self, atype: u16, val: &str) -> Self { + self.0.attr_str(atype, val); + self + } + + fn append_attr(mut self, atype: u16, data: &[u8]) -> Self { + self.0.append_attr(atype, data); + self + } + + fn write(mut self, data: &[u8]) -> Self { + self.0.write(data).align(); + self + } + + fn update_len(&mut self) { + self.0.align(); + self.0.write_u32_at(0, self.0.len() as u32); + } + + fn done(mut self) -> Self { + self.update_len(); + self + } + + fn as_bytes(&self) -> &[u8] { + self.0.as_bytes() + } +} + +struct Buffer+AsRef<[u8]>>(T); + +impl +AsRef<[u8]>> Buffer { + + fn write_u8_at(&mut self, offset: usize, val: u8) -> &mut Self { + self.write_at(offset, &val.to_ne_bytes()) + } + + fn write_u16_at(&mut self, offset: usize, val: u16) -> &mut Self { + self.write_at(offset, &val.to_ne_bytes()) + } + + fn write_u32_at(&mut self, offset: usize, val: u32) -> &mut Self { + self.write_at(offset, &val.to_ne_bytes()) + } + + fn slice_at(&mut self, off: usize, len: usize) -> &mut [u8] { + &mut self.0.as_mut()[off..off+len] + } + + fn write_at(&mut self, offset: usize, bytes: &[u8]) -> &mut Self { + self.slice_at(offset, bytes.len()) + .copy_from_slice(bytes); + self + } + + fn as_bytes(&self) -> &[u8] { + self.0.as_ref() + } +} + +impl <'a> Buffer<&'a mut [u8]> { + fn bytes(bytes: &'a mut [u8]) -> Self { + Buffer(bytes) + } +} + +impl Buffer> { + + fn new_empty() -> Self { + Buffer(Vec::new()) + } + + fn len(&self) -> usize { + self.0.len() + } + + fn next(&mut self, size: usize) -> &mut [u8] { + let off = self.0.len(); + self.0.resize(off + size, 0); + self.slice_at(off, size) + } + + fn align(&mut self) { + let aligned_len = self.0.len() + (MESSAGE_ALIGN - 1) & !(MESSAGE_ALIGN - 1); + self.0.resize(aligned_len, 0); + } + + fn _write_u8(&mut self, val: u8) -> &mut Self { + self.write(&val.to_ne_bytes()) + } + + fn write_u16(&mut self, val: u16) -> &mut Self { + self.write(&val.to_ne_bytes()) + } + + fn _write_u32(&mut self, val: u32) -> &mut Self { + self.write(&val.to_ne_bytes()) + } + + fn write(&mut self, bytes: &[u8]) -> &mut Self { + self.next(bytes.len()) + .copy_from_slice(bytes); + self + } + + fn attr_u32(&mut self, atype: u16, val: u32) -> &mut Self { + self.append_attr(atype, &val.to_ne_bytes()) + } + + fn attr_str(&mut self, atype: u16, val: &str) -> &mut Self { + self.append_attr(atype, val.as_bytes()) + } + + fn append_attr(&mut self, atype: u16, data: &[u8]) -> &mut Self { + let attrlen = data.len() + ATTR_HDRLEN; + assert!(attrlen <= u16::max_value() as usize); + self.write_u16(attrlen as u16) + .write_u16(atype) + .write(data) + .align(); + self + } +} + +struct InBuffer { + bytes: Vec, + offset: usize, +} + +impl InBuffer { + fn new(bytes: &[u8]) -> Self { + let mut v = Vec::new(); + v.extend_from_slice(bytes); + InBuffer { bytes: v, offset: 0} + } + + fn next(&mut self, size: usize) -> &[u8] { + assert!(self.offset + size <= self.bytes.len()); + let off = self.skip(size); + &self.bytes[off..off+size] + } + + fn read_u16(&mut self) -> u16 { + u16::from_ne_bytes(self.next(2).try_into().unwrap()) + } + + fn read_u32(&mut self) -> u32 { + u32::from_ne_bytes(self.next(4).try_into().unwrap()) + } + + fn skip(&mut self, n: usize) -> usize { + let off = self.offset; + self.offset += n; + off + } +} + +pub struct RtMsg<'a>(Buffer<&'a mut [u8]>); + +impl <'a> RtMsg <'a> { + pub fn new(bytes: &'a mut [u8], family: u8) -> Self { + let mut buffer = Buffer::bytes(bytes); + buffer.write_u8_at(0, family); + RtMsg(buffer) + } + + pub fn table(mut self, table: u8) -> Self { + self.0.write_u8_at(4, table); + self + } + + pub fn protocol(mut self, proto: u8) -> Self { + self.0.write_u8_at(5, proto); + self + } + + pub fn scope(mut self, scope: u8) -> Self { + self.0.write_u8_at(6, scope); + self + } + + pub fn rtype(mut self, rtype: u8) -> Self { + self.0.write_u8_at(7, rtype); + self + } +} + +pub struct IfInfoMsgHdr<'a>(Buffer<&'a mut [u8]>); + +impl <'a> IfInfoMsgHdr <'a> { + pub fn new(bytes: &'a mut [u8], family: u8) -> Self { + let mut buffer = Buffer::bytes(bytes); + buffer.write_u8_at(0, family); + IfInfoMsgHdr(buffer) + } + + fn index(mut self, index: u32) -> Self { + self.0.write_u32_at(4, index); + self + } + + pub fn set_flags(mut self, flags: u32) -> Self { + self.0.write_u32_at(8, flags); + self + } + + pub fn set_change(mut self, flags: u32) -> Self { + self.0.write_u32_at(12, flags); + self + } +} + +pub struct IfAddrMsg<'a>(Buffer<&'a mut [u8]>); + +impl <'a> IfAddrMsg <'a> { + fn new(bytes: &'a mut [u8]) -> Self { + IfAddrMsg(Buffer::bytes(bytes)) + } + + pub fn family(mut self, family: u8) -> Self { + self.0.write_u8_at(0, family); + self + } + pub fn prefixlen(mut self, prefixlen: u8) -> Self { + self.0.write_u8_at(1, prefixlen); + self + } + pub fn _flags(mut self, flags: u8) -> Self { + self.0.write_u8_at(2, flags); + self + } + pub fn scope(mut self, scope: u8) -> Self { + self.0.write_u8_at(3, scope); + self + } + pub fn index(mut self, index: u32) -> Self { + self.0.write_u32_at(4, index); + self + } +} + +pub struct NetlinkHeader<'a>(Buffer<&'a mut [u8]>); + +impl <'a> NetlinkHeader <'a> { + pub fn new(bytes: &'a mut [u8], mtype: u16) -> Self { + let mut buffer = Buffer::bytes(bytes); + buffer.write_u16_at(4, mtype); + NetlinkHeader(buffer) + } + + pub fn flags(mut self, flags: u16) -> Self { + self.0.write_u16_at(6, flags); + self + } + + fn seq(mut self, seq: u32) -> Self { + self.0.write_u32_at(8, seq); + self + } + + fn _portid(mut self, portid: u32) -> Self { + self.0.write_u32_at(12, portid); + self + } +} + +fn sys_socket(domain: i32, stype: i32, protocol: i32) -> Result { + unsafe { + let fd = libc::socket(domain, stype, protocol); + if fd < 0 { + Err(Error::Socket(io::Error::last_os_error())) + } else { + Ok(fd) + } + } +} + +fn sys_bind(sockfd: RawFd, addr: *const libc::sockaddr, addrlen: usize) -> Result<()> { + unsafe { + if libc::bind(sockfd, addr, addrlen as u32) < 0 { + Err(Error::SocketBind(io::Error::last_os_error())) + } else { + Ok(()) + } + } +} + +fn sys_sendto(sockfd: RawFd, buf: &[u8], addr: *const libc::sockaddr, addrlen: usize) -> Result { + let len = buf.len(); + let buf = buf.as_ptr() as *const libc::c_void; + let flags = 0; + unsafe { + let size = libc::sendto(sockfd, buf, len, flags, addr, addrlen as u32); + if size < 0 { + Err(Error::SocketSend(io::Error::last_os_error())) + } else { + Ok(size as usize) + } + } +} + +fn sys_recv(sockfd: RawFd, buf: &mut [u8], flags: i32) -> Result { + let len = buf.len(); + let buf = buf.as_mut_ptr() as *mut libc::c_void; + unsafe { + let size = libc::recv(sockfd, buf, len, flags); + if size < 0 { + Err(Error::SocketRecv(io::Error::last_os_error())) + } else { + Ok(size as usize) + } + } +} diff --git a/src/devices/mod.rs b/src/devices/mod.rs index 8f78c1f..fe76f43 100644 --- a/src/devices/mod.rs +++ b/src/devices/mod.rs @@ -5,6 +5,7 @@ mod virtio_serial; mod virtio_rng; mod virtio_wl; mod virtio_block; +mod virtio_net; pub use self::virtio_serial::VirtioSerial; pub use self::virtio_9p::VirtioP9; @@ -12,3 +13,4 @@ pub use self::virtio_9p::SyntheticFS; pub use self::virtio_rng::VirtioRandom; pub use self::virtio_wl::VirtioWayland; pub use self::virtio_block::VirtioBlock; +pub use self::virtio_net::VirtioNet; diff --git a/src/devices/virtio_net.rs b/src/devices/virtio_net.rs new file mode 100644 index 0000000..3ff92c4 --- /dev/null +++ b/src/devices/virtio_net.rs @@ -0,0 +1,264 @@ +use crate::virtio::{VirtioDeviceOps, VirtQueue, VirtioBus}; +use crate::memory::MemoryManager; +use crate::{vm, system}; +use std::sync::{RwLock, Arc}; +use std::{fmt, result, thread, io}; +use crate::system::{EPoll,Event}; +use std::io::{Read, Write}; +use std::os::unix::io::AsRawFd; +use crate::system::Tap; + +const VIRTIO_ID_NET: u16 = 1; +const MAC_ADDR_LEN: usize = 6; + +#[derive(Debug)] +pub enum Error { + ChainWrite(io::Error), + ChainRead(io::Error), + ChainIoEvent(vm::Error), + SetupPoll(system::Error), + TapRead(io::Error), + TapWrite(io::Error), + PollWait(system::Error), +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use Error::*; + match self { + ChainWrite(err) => write!(f, "Error writing to virtqueue chain: {}", err), + ChainRead(err) => write!(f, "Error reading from virtqueue chain: {}", err), + ChainIoEvent(err) => write!(f, "Error reading from virtqueue ioevent: {}", err), + SetupPoll(e) => write!(f, "Failed to set up Poll: {}", e), + TapRead(e) => write!(f, "Error reading from tap device: {}", e), + TapWrite(e) => write!(f, "Error writing to tap device: {}", e), + PollWait(e) => write!(f, "Poll wait returned error: {}", e), + } + } +} +type Result = result::Result; + + +const VIRTIO_NET_F_CSUM: u64 = 1; +const VIRTIO_NET_F_GUEST_CSUM: u64 = 1 << 1; +const VIRTIO_NET_F_GUEST_TSO4: u64 = 1 << 7; +const VIRTIO_NET_F_GUEST_UFO: u64 = 1 << 10; +const VIRTIO_NET_F_HOST_TSO4: u64 = 1 << 11; +const VIRTIO_NET_F_HOST_UFO: u64 = 1 << 14; + +//const VIRTIO_NET_HDR_SIZE: i32 = 12; + +pub struct VirtioNet { + tap: Option, +} + +impl VirtioNet { + fn new(tap: Tap) -> Self { + VirtioNet{ + tap: Some(tap) + } + } + + pub fn create(vbus: &mut VirtioBus, tap: Tap) -> vm::Result<()> { + tap.set_offload(TUN_F_CSUM | TUN_F_UFO | TUN_F_TSO4 | TUN_F_TSO6).unwrap(); + tap.set_vnet_hdr_size(12).unwrap(); + let dev = Arc::new(RwLock::new(VirtioNet::new(tap))); + let feature_bits = + VIRTIO_NET_F_CSUM | + VIRTIO_NET_F_GUEST_CSUM | + VIRTIO_NET_F_GUEST_TSO4 | + VIRTIO_NET_F_GUEST_UFO | + VIRTIO_NET_F_HOST_TSO4 | + VIRTIO_NET_F_HOST_UFO; + + vbus.new_virtio_device(VIRTIO_ID_NET, dev) + .set_queue_sizes(&[256, 256]) + .set_config_size(MAC_ADDR_LEN) + .set_features(feature_bits) + .register() + } +} + +pub const TUN_F_CSUM: u32 = 1; +pub const TUN_F_TSO4: u32 = 2; +pub const TUN_F_TSO6: u32 = 4; +pub const TUN_F_UFO: u32= 16; + +impl VirtioDeviceOps for VirtioNet { + fn start(&mut self, _memory: &MemoryManager, mut queues: Vec) { + let tx = queues.pop().unwrap(); + let rx = queues.pop().unwrap(); + let tap = self.tap.take().unwrap(); + let poll = match EPoll::new() { + Ok(poll) => poll, + Err(e) => { + warn!("Cannot start VirtioNet because unable to create Epoll instance: {}", e); + return; + } + }; + let mut dev = VirtioNetDevice::new(rx, tx, tap, poll); + thread::spawn(move || { + if let Err(err) = dev.run() { + warn!("error running virtio net device: {}", err); + } + }); + } +} + +const MAX_BUFFER_SIZE: usize = 65562; +const RX_VQ_TOKEN:u64 = 1; +const TX_VQ_TOKEN:u64 = 2; +const RX_TAP:u64 = 3; + +struct VirtioNetDevice { + tap: Tap, + poll: EPoll, + tap_event_enabled: bool, + rx: VirtQueue, + tx: VirtQueue, + rx_bytes: usize, + rx_frame: [u8; MAX_BUFFER_SIZE], + tx_frame: [u8; MAX_BUFFER_SIZE], +} + +impl VirtioNetDevice { + fn new(rx: VirtQueue, tx: VirtQueue, tap: Tap, poll: EPoll) -> Self { + VirtioNetDevice { + rx, + tx, + tap, + poll, + tap_event_enabled: false, + rx_bytes: 0, + rx_frame: [0; MAX_BUFFER_SIZE], + tx_frame: [0; MAX_BUFFER_SIZE], + } + } + + fn enable_tap_poll(&mut self) { + if !self.tap_event_enabled { + if let Err(e) = self.poll.add_read(self.tap.as_raw_fd(), RX_TAP) { + warn!("virtio_net: error enabling tap poll event: {}", e); + } else { + self.tap_event_enabled = true; + } + } + } + + fn disable_tap_events(&mut self) { + if self.tap_event_enabled { + if let Err(e) = self.poll.delete(self.tap.as_raw_fd()) { + warn!("virtio_net: error disabling tap poll event: {}", e); + } else { + self.tap_event_enabled = false; + } + } + } + + fn handle_tx_queue(&mut self) -> Result<()> { + self.tx.ioevent() + .read() + .map_err(Error::ChainIoEvent)?; + + while let Some(mut chain) = self.tx.next_chain() { + loop { + let n = chain.read(&mut self.tx_frame) + .map_err(Error::ChainRead)?; + if n == 0 { + break; + } + self.tap.write_all(&self.tx_frame[..n]) + .map_err(Error::TapWrite)?; + } + + chain.skip_readable(); + chain.flush_chain() + } + Ok(()) + } + + fn pending_rx(&self) -> bool { + self.rx_bytes != 0 + } + + fn receive_frame(&mut self) -> Result { + if let Some(mut chain) = self.rx.next_chain() { + chain.write_all(&self.rx_frame[..self.rx_bytes]) + .map_err(Error::ChainWrite)?; + self.rx_bytes = 0; + // XXX defer interrupt + chain.flush_chain(); + Ok(true) + } else { + self.disable_tap_events(); + Ok(false) + } + } + + fn tap_read(&mut self) -> Result { + match self.tap.read(&mut self.rx_frame) { + Ok(n) => { + self.rx_bytes = n; + Ok(true) + }, + Err(e) => if let Some(libc::EAGAIN) = e.raw_os_error() { + // handle deferred interrupts + Ok(false) + } else { + Err(Error::TapRead(e)) + }, + } + } + + fn handle_rx_tap(&mut self) -> Result<()> { + if self.pending_rx() { + if !self.receive_frame()? { + return Ok(()) + } + } + + while self.tap_read()? { + if !self.receive_frame()? { + break; + } + } + Ok(()) + } + + fn handle_rx_queue(&mut self) -> Result<()> { + self.rx.ioevent().read().unwrap(); + if self.pending_rx() { + if self.receive_frame()? { + self.enable_tap_poll(); + } + } + Ok(()) + } + + fn handle_event(&mut self, ev: Event) -> Result<()> { + match ev.id() { + TX_VQ_TOKEN => self.handle_tx_queue(), + RX_VQ_TOKEN => self.handle_rx_queue(), + RX_TAP=> self.handle_rx_tap(), + _ => Ok(()), + } + } + + fn run(&mut self) -> Result<()> { + self.poll.add_read(self.rx.ioevent().as_raw_fd(), RX_VQ_TOKEN) + .map_err(Error::SetupPoll)?; + self.poll.add_read(self.tx.ioevent().as_raw_fd(), TX_VQ_TOKEN) + .map_err(Error::SetupPoll)?; + self.enable_tap_poll(); + + loop { + let events = self.poll.wait().map_err(Error::PollWait)?; + + for ev in events.iter() { + if let Err(err) = self.handle_event(ev) { + warn!("virtio_net: error handling poll event: {}", err); + } + } + } + } +} \ No newline at end of file diff --git a/src/devices/virtio_wl/mod.rs b/src/devices/virtio_wl/mod.rs index e749f22..284d815 100644 --- a/src/devices/virtio_wl/mod.rs +++ b/src/devices/virtio_wl/mod.rs @@ -96,7 +96,7 @@ pub enum Error { TooManySendVfds(usize), FailedPollContextCreate(system::Error), FailedPollAdd(system::Error), - DmaSync(vm::Error), + DmaSync(system::Error), DmaBuf(MemError), DmaBufSize(system::Error), } diff --git a/src/system/mod.rs b/src/system/mod.rs index a451da1..1d58a54 100644 --- a/src/system/mod.rs +++ b/src/system/mod.rs @@ -5,12 +5,15 @@ mod bitvec; mod socket; mod filedesc; mod memfd; +mod tap; +pub mod netlink; pub use bitvec::BitVec; pub use filedesc::{FileDesc, FileFlags}; pub use memfd::MemoryFd; -pub use epoll::EPoll; +pub use epoll::{EPoll,Event}; pub use errno::{Error,Result,errno_result}; pub use socket::ScmSocket; +pub use netlink::NetlinkSocket; +pub use tap::Tap; -use errno::cvt; diff --git a/src/system/netlink.rs b/src/system/netlink.rs new file mode 100644 index 0000000..1fd0cfb --- /dev/null +++ b/src/system/netlink.rs @@ -0,0 +1,642 @@ +use std::cell::Cell; +use std::convert::TryInto; +use std::ffi::CString; +use std::net::Ipv4Addr; +use std::{mem, result, fmt, io}; +use std::os::unix::io::RawFd; +use std::path::Path; + +use libc::{ + PF_NETLINK, SOCK_RAW, SOCK_CLOEXEC, SOCK_NONBLOCK +}; + +const NETLINK_ROUTE: i32 = 0; + +const IFLA_IFNAME: u16 = 3; +const IFLA_MASTER: u16 = 10; +const IFLA_LINKINFO: u16 = 18; +const IFLA_INFO_KIND: u16 = 1; + +const NLA_F_NESTED: u16 = 1 << 15; +const IFA_ADDRESS: u16 = 1; +const IFA_LOCAL: u16 = 2; + +const NLMSG_ERROR: u16 = 2; + +pub const NLM_F_REQUEST: u16 = 1; +pub const NLM_F_ACK: u16 = 4; +pub const NLM_F_EXCL: u16 = 512; +pub const NLM_F_CREATE: u16 = 1024; + +pub const RTM_NEWLINK: u16 = 16; +pub const RTM_SETLINK: u16 = 19; +pub const RTM_NEWADDR: u16 = 20; + +pub const AF_UNSPEC: u8 = 0; +pub const AF_INET: u8 = 2; + +const NL_HDRLEN: usize = 16; +const ATTR_HDRLEN: usize = 4; + +const RTM_NEWROUTE: u16 = 24; + +const RT_TABLE_MAIN: u8 = 254; +const RT_SCOPE_UNIVERSE: u8 = 0; +const RTPROT_BOOT: u8 = 3; +const RTN_UNICAST: u8 = 1; + +const RTA_GATEWAY: u16 = 5; + + +const MESSAGE_ALIGN: usize = 4; + +pub const IFF_UP: u32 = libc::IFF_UP as u32; + +pub type Result = result::Result; + +#[derive(Debug)] +pub enum Error { + Socket(io::Error), + SocketBind(io::Error), + SocketSend(io::Error), + SocketRecv(io::Error), + NameToIndex(io::Error), + ErrorResponse(io::Error), + UnexpectedResponse, + ShortSend, +} + +impl fmt::Display for Error { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + use Error::*; + match self { + Socket(e) => write!(f, "failed to create netlink socket: {}", e), + SocketBind(e) => write!(f, "failed calling bind() on netlink socket: {}", e), + SocketSend(e) => write!(f, "failed calling sendto() on netlink socket: {}", e), + SocketRecv(e) => write!(f, "failed calling recv() on netlink socket: {}", e), + NameToIndex(e) => write!(f, "failed to convert interface name to index: {}", e), + ErrorResponse(e) => write!(f, "error response to netlink request: {}", e), + UnexpectedResponse => write!(f, "could not parse response message from netlink"), + ShortSend => write!(f, "failed to transmit entire netlink message"), + } + } +} + +pub struct NetlinkSocket { + sock: RawFd, + seq: Cell, +} + + +impl NetlinkSocket { + pub fn open() -> Result { + Self::open_protocol(NETLINK_ROUTE) + } + + #[allow(dead_code)] + pub fn add_default_route(&self, gateway: Ipv4Addr) -> Result<()> { + let msg = self.message_create(RTM_NEWROUTE) + .with_rtmsg(AF_INET, |hdr| { + hdr.table(RT_TABLE_MAIN) + .scope(RT_SCOPE_UNIVERSE) + .protocol(RTPROT_BOOT) + .rtype(RTN_UNICAST); + }) + .append_attr(RTA_GATEWAY, &gateway.octets()) + .done(); + + self.send_message(msg) + } + + #[allow(dead_code)] + pub fn add_interface_to_bridge(&self, iface: &str, bridge: &str) -> Result<()> { + let bridge_idx = self.name_to_index(bridge)?; + let msg = self.message(RTM_SETLINK) + .with_ifinfomsg(AF_UNSPEC, |hdr| { + hdr.set_flags(IFF_UP) + .set_change(IFF_UP); + }) + .attr_u32(IFLA_MASTER, bridge_idx) + .attr_str(IFLA_IFNAME, iface) + .done(); + + self.send_message(msg) + } + + #[allow(dead_code)] + pub fn create_bridge(&self, name: &str) -> Result<()> { + let msg = self.message_create(RTM_NEWLINK) + .ifinfomsg(AF_UNSPEC) + .attr_str(IFLA_IFNAME, name) + .with_nested(IFLA_LINKINFO, |a| { + a.attr_str(IFLA_INFO_KIND, "bridge") + .align(); + }) + .done(); + + self.send_message(msg) + } + + #[allow(dead_code)] + pub fn set_interface_up(&self, iface: &str) -> Result<()> { + let idx = self.name_to_index(iface)?; + let msg = self.message(RTM_NEWLINK) + .with_ifinfomsg(AF_UNSPEC, |hdr| { + hdr.set_flags(IFF_UP) + .set_change(IFF_UP) + .index(idx); + }) + .done(); + + self.send_message(msg) + } + + #[allow(dead_code)] + pub fn add_ip_address(&self, iface: &str, ip: Ipv4Addr, netmask_bits: u32) -> Result<()> { + let idx = self.name_to_index(iface)?; + let msg = self.message_create(RTM_NEWADDR) + .with_ifaddrmsg(|hdr| { + hdr.family(AF_INET) + .prefixlen(netmask_bits as u8) + .scope(RT_SCOPE_UNIVERSE) + .index(idx); + }) + .append_attr(IFA_ADDRESS, &ip.octets()) + .append_attr(IFA_LOCAL, &ip.octets()) + .done(); + + self.send_message(msg) + } + + fn open_protocol(protocol: i32) -> Result { + let sock = sys_socket(PF_NETLINK, + SOCK_RAW | SOCK_CLOEXEC | SOCK_NONBLOCK, + protocol)?; + + let mut sockaddr: libc::sockaddr_nl = unsafe { mem::zeroed() }; + sockaddr.nl_family = PF_NETLINK as u16; + let addrlen = mem::size_of::(); + sys_bind(sock, + &sockaddr as *const libc::sockaddr_nl as *const libc::sockaddr, + addrlen)?; + + Ok(NetlinkSocket{ sock, seq: Cell::new(1) } ) + } + + fn name_to_index(&self, name: &str) -> Result { + let name = CString::new(name).unwrap(); + let ret = unsafe { libc::if_nametoindex(name.as_ptr()) }; + if ret == 0 { + Err(Error::NameToIndex(io::Error::last_os_error())) + } else { + Ok(ret as u32) + } + } + + pub fn interface_exists(&self, name: &str) -> bool { + Path::new("/sys/class/net") + .join(name) + .exists() + } + + fn seq(&self) -> u32 { + let seq = self.seq.get(); + self.seq.set(seq + 1); + seq + } + + fn message(&self, mtype: u16) -> NetlinkMessage { + let flags = NLM_F_REQUEST | NLM_F_ACK; + NetlinkMessage::new(mtype, flags, self.seq()) + } + + fn message_create(&self, mtype: u16) -> NetlinkMessage { + let flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE | NLM_F_EXCL; + NetlinkMessage::new(mtype, flags, self.seq()) + } + + fn send_message(&self, msg: NetlinkMessage) -> Result<()> { + self.send(msg.as_bytes())?; + let mut recv_buffer = vec![0u8; 4096]; + let n = sys_recv(self.sock, &mut recv_buffer, 0)?; + if n < NL_HDRLEN + 4 { + return Err(Error::UnexpectedResponse); + } + recv_buffer.truncate(n); + self.process_response(InBuffer::new(&recv_buffer)) + } + + fn process_response(&self, mut resp: InBuffer) -> Result<()> { + resp.skip(4); + let mtype = resp.read_u16(); + resp.skip(10); + if mtype == NLMSG_ERROR { + match resp.read_u32() { + 0 => Ok(()), + errno => { + let e = io::Error::from_raw_os_error(errno as i32); + Err(Error::ErrorResponse(e)) + } + } + } else { + Err(Error::UnexpectedResponse) + } + } + + fn send(&self, buf: &[u8]) -> Result<()> { + let mut sockaddr: libc::sockaddr_nl = unsafe { mem::zeroed() }; + sockaddr.nl_family = PF_NETLINK as u16; + let addrlen = mem::size_of::(); + let n = sys_sendto(self.sock, + buf, + &sockaddr as *const libc::sockaddr_nl as *const libc::sockaddr, + addrlen)?; + + if n != buf.len() { + Err(Error::ShortSend) + } else { + Ok(()) + } + } +} + +impl Drop for NetlinkSocket { + fn drop(&mut self) { + let _ = unsafe { libc::close(self.sock) }; + } +} + +pub struct NetlinkMessage(Buffer>); + +impl NetlinkMessage { + + pub fn new(mtype: u16, flags: u16, seq: u32) -> Self { + let mut msg = NetlinkMessage(Buffer::new_empty()); + NetlinkHeader::new(msg.0.next(NL_HDRLEN), mtype) + .flags(flags) + .seq(seq); + msg + } + + fn ifinfomsg(self, family: u8) -> Self { + self.with_ifinfomsg(family, |_| { }) + } + + fn with_ifinfomsg(mut self, family: u8, mut f: F) -> Self + where F: FnMut(IfInfoMsgHdr) + { + const IF_INFOHDRLEN: usize = 16; + + f(IfInfoMsgHdr::new(self.0.next(IF_INFOHDRLEN), family)); + self + } + + fn with_rtmsg(mut self, family: u8, mut f: F) -> Self + where F: FnMut(RtMsg) + { + const RTMSG_HDRLEN: usize = 12; + + f(RtMsg::new(self.0.next(RTMSG_HDRLEN), family)); + self + } + + fn with_ifaddrmsg(mut self, mut f: F) -> Self + where F: FnMut(IfAddrMsg) + { + const IFADDRMSG_LEN: usize = 8; + f(IfAddrMsg::new(self.0.next(IFADDRMSG_LEN))); + self + + } + + fn with_nested(self, atype: u16, mut f: F) -> Self + where F: FnMut(&mut Buffer>) + { + let mut nested = Buffer::new_empty(); + nested.write_u16(0); + nested.write_u16(atype | NLA_F_NESTED); + f(&mut nested); + nested.align(); + nested.write_u16_at(0, nested.len() as u16); + self.write(nested.as_bytes()) + } + + fn attr_u32(mut self, atype: u16, val: u32) -> Self { + self.0.attr_u32(atype, val); + self + } + + fn attr_str(mut self, atype: u16, val: &str) -> Self { + self.0.attr_str(atype, val); + self + } + + fn append_attr(mut self, atype: u16, data: &[u8]) -> Self { + self.0.append_attr(atype, data); + self + } + + fn write(mut self, data: &[u8]) -> Self { + self.0.write(data).align(); + self + } + + fn update_len(&mut self) { + self.0.align(); + self.0.write_u32_at(0, self.0.len() as u32); + } + + fn done(mut self) -> Self { + self.update_len(); + self + } + + fn as_bytes(&self) -> &[u8] { + self.0.as_bytes() + } +} + +struct Buffer+AsRef<[u8]>>(T); + +impl +AsRef<[u8]>> Buffer { + + fn write_u8_at(&mut self, offset: usize, val: u8) -> &mut Self { + self.write_at(offset, &val.to_ne_bytes()) + } + + fn write_u16_at(&mut self, offset: usize, val: u16) -> &mut Self { + self.write_at(offset, &val.to_ne_bytes()) + } + + fn write_u32_at(&mut self, offset: usize, val: u32) -> &mut Self { + self.write_at(offset, &val.to_ne_bytes()) + } + + fn slice_at(&mut self, off: usize, len: usize) -> &mut [u8] { + &mut self.0.as_mut()[off..off+len] + } + + fn write_at(&mut self, offset: usize, bytes: &[u8]) -> &mut Self { + self.slice_at(offset, bytes.len()) + .copy_from_slice(bytes); + self + } + + fn as_bytes(&self) -> &[u8] { + self.0.as_ref() + } +} + +impl <'a> Buffer<&'a mut [u8]> { + fn bytes(bytes: &'a mut [u8]) -> Self { + Buffer(bytes) + } +} + +impl Buffer> { + + fn new_empty() -> Self { + Buffer(Vec::new()) + } + + fn len(&self) -> usize { + self.0.len() + } + + fn next(&mut self, size: usize) -> &mut [u8] { + let off = self.0.len(); + self.0.resize(off + size, 0); + self.slice_at(off, size) + } + + fn align(&mut self) { + let aligned_len = self.0.len() + (MESSAGE_ALIGN - 1) & !(MESSAGE_ALIGN - 1); + self.0.resize(aligned_len, 0); + } + + fn _write_u8(&mut self, val: u8) -> &mut Self { + self.write(&val.to_ne_bytes()) + } + + fn write_u16(&mut self, val: u16) -> &mut Self { + self.write(&val.to_ne_bytes()) + } + + fn _write_u32(&mut self, val: u32) -> &mut Self { + self.write(&val.to_ne_bytes()) + } + + fn write(&mut self, bytes: &[u8]) -> &mut Self { + self.next(bytes.len()) + .copy_from_slice(bytes); + self + } + + fn attr_u32(&mut self, atype: u16, val: u32) -> &mut Self { + self.append_attr(atype, &val.to_ne_bytes()) + } + + fn attr_str(&mut self, atype: u16, val: &str) -> &mut Self { + self.append_attr(atype, val.as_bytes()) + } + + fn append_attr(&mut self, atype: u16, data: &[u8]) -> &mut Self { + let attrlen = data.len() + ATTR_HDRLEN; + assert!(attrlen <= u16::max_value() as usize); + self.write_u16(attrlen as u16) + .write_u16(atype) + .write(data) + .align(); + self + } +} + +struct InBuffer { + bytes: Vec, + offset: usize, +} + +impl InBuffer { + fn new(bytes: &[u8]) -> Self { + let mut v = Vec::new(); + v.extend_from_slice(bytes); + InBuffer { bytes: v, offset: 0} + } + + fn next(&mut self, size: usize) -> &[u8] { + assert!(self.offset + size <= self.bytes.len()); + let off = self.skip(size); + &self.bytes[off..off+size] + } + + fn read_u16(&mut self) -> u16 { + u16::from_ne_bytes(self.next(2).try_into().unwrap()) + } + + fn read_u32(&mut self) -> u32 { + u32::from_ne_bytes(self.next(4).try_into().unwrap()) + } + + fn skip(&mut self, n: usize) -> usize { + let off = self.offset; + self.offset += n; + off + } +} + +pub struct RtMsg<'a>(Buffer<&'a mut [u8]>); + +impl <'a> RtMsg <'a> { + pub fn new(bytes: &'a mut [u8], family: u8) -> Self { + let mut buffer = Buffer::bytes(bytes); + buffer.write_u8_at(0, family); + RtMsg(buffer) + } + + pub fn table(mut self, table: u8) -> Self { + self.0.write_u8_at(4, table); + self + } + + pub fn protocol(mut self, proto: u8) -> Self { + self.0.write_u8_at(5, proto); + self + } + + pub fn scope(mut self, scope: u8) -> Self { + self.0.write_u8_at(6, scope); + self + } + + pub fn rtype(mut self, rtype: u8) -> Self { + self.0.write_u8_at(7, rtype); + self + } +} + +pub struct IfInfoMsgHdr<'a>(Buffer<&'a mut [u8]>); + +impl <'a> IfInfoMsgHdr <'a> { + pub fn new(bytes: &'a mut [u8], family: u8) -> Self { + let mut buffer = Buffer::bytes(bytes); + buffer.write_u8_at(0, family); + IfInfoMsgHdr(buffer) + } + + fn index(mut self, index: u32) -> Self { + self.0.write_u32_at(4, index); + self + } + + pub fn set_flags(mut self, flags: u32) -> Self { + self.0.write_u32_at(8, flags); + self + } + + pub fn set_change(mut self, flags: u32) -> Self { + self.0.write_u32_at(12, flags); + self + } +} + +pub struct IfAddrMsg<'a>(Buffer<&'a mut [u8]>); + +impl <'a> IfAddrMsg <'a> { + fn new(bytes: &'a mut [u8]) -> Self { + IfAddrMsg(Buffer::bytes(bytes)) + } + + pub fn family(mut self, family: u8) -> Self { + self.0.write_u8_at(0, family); + self + } + pub fn prefixlen(mut self, prefixlen: u8) -> Self { + self.0.write_u8_at(1, prefixlen); + self + } + pub fn _flags(mut self, flags: u8) -> Self { + self.0.write_u8_at(2, flags); + self + } + pub fn scope(mut self, scope: u8) -> Self { + self.0.write_u8_at(3, scope); + self + } + pub fn index(mut self, index: u32) -> Self { + self.0.write_u32_at(4, index); + self + } +} + +pub struct NetlinkHeader<'a>(Buffer<&'a mut [u8]>); + +impl <'a> NetlinkHeader <'a> { + pub fn new(bytes: &'a mut [u8], mtype: u16) -> Self { + let mut buffer = Buffer::bytes(bytes); + buffer.write_u16_at(4, mtype); + NetlinkHeader(buffer) + } + + pub fn flags(mut self, flags: u16) -> Self { + self.0.write_u16_at(6, flags); + self + } + + fn seq(mut self, seq: u32) -> Self { + self.0.write_u32_at(8, seq); + self + } + + fn _portid(mut self, portid: u32) -> Self { + self.0.write_u32_at(12, portid); + self + } +} + +fn sys_socket(domain: i32, stype: i32, protocol: i32) -> Result { + unsafe { + let fd = libc::socket(domain, stype, protocol); + if fd < 0 { + Err(Error::Socket(io::Error::last_os_error())) + } else { + Ok(fd) + } + } +} + +fn sys_bind(sockfd: RawFd, addr: *const libc::sockaddr, addrlen: usize) -> Result<()> { + unsafe { + if libc::bind(sockfd, addr, addrlen as u32) < 0 { + Err(Error::SocketBind(io::Error::last_os_error())) + } else { + Ok(()) + } + } +} + +fn sys_sendto(sockfd: RawFd, buf: &[u8], addr: *const libc::sockaddr, addrlen: usize) -> Result { + let len = buf.len(); + let buf = buf.as_ptr() as *const libc::c_void; + let flags = 0; + unsafe { + let size = libc::sendto(sockfd, buf, len, flags, addr, addrlen as u32); + if size < 0 { + Err(Error::SocketSend(io::Error::last_os_error())) + } else { + Ok(size as usize) + } + } +} + +fn sys_recv(sockfd: RawFd, buf: &mut [u8], flags: i32) -> Result { + let len = buf.len(); + let buf = buf.as_mut_ptr() as *mut libc::c_void; + unsafe { + let size = libc::recv(sockfd, buf, len, flags); + if size < 0 { + Err(Error::SocketRecv(io::Error::last_os_error())) + } else { + Ok(size as usize) + } + } +} diff --git a/src/system/tap.rs b/src/system/tap.rs new file mode 100644 index 0000000..d3ae16d --- /dev/null +++ b/src/system/tap.rs @@ -0,0 +1,151 @@ +use std::fs::{File, OpenOptions}; +use std::io::{self, Read, Write}; +use std::os::unix::fs::OpenOptionsExt; +use std::os::unix::io::{AsRawFd,RawFd}; + +use crate::system; +use crate::system::ioctl::{ + ioctl_with_ref, ioctl_with_val, ioctl_with_mut_ref +}; + +pub struct Tap { + file: File, + name: String, +} + +const IFF_TAP: u16 = 0x0002; +const IFF_NO_PI: u16 = 0x1000; +const IFF_VNET_HDR: u16 = 0x4000; + +const TAPTUN: u64 = 0x54; +const TUNSETIFF: libc::c_ulong = iow!(TAPTUN, 202, 4); +const TUNSETOFFLOAD: libc::c_ulong = iow!(TAPTUN, 208, 4); +const TUNSETVNETHDRSZ: libc::c_ulong = iow!(TAPTUN, 216, 4); + +impl Tap { + pub fn new_default() -> io::Result { + Self::new("vmtap%d") + } + + pub fn new(if_name: &str) -> io::Result { + let file = Self::open_tun()?; + let mut ifreq = IfReq::new(if_name); + + ifreq + .set_flags(IFF_TAP | IFF_NO_PI| IFF_VNET_HDR) + .ioctl_mut(&file, TUNSETIFF)?; + + let name = ifreq.name().to_string(); + let tap = Tap { file, name }; + + Ok(tap) + } + + fn open_tun() -> io::Result { + OpenOptions::new() + .read(true) + .write(true) + .custom_flags(libc::O_NONBLOCK|libc::O_CLOEXEC) + .open("/dev/net/tun") + } + + pub fn name(&self) -> &str { + &self.name + } + + pub fn set_offload(&self, flags: libc::c_uint) -> io::Result<()> { + unsafe { + ioctl_with_val(self.file.as_raw_fd(), TUNSETOFFLOAD, flags.into())?; + } + Ok(()) + } + + pub fn set_vnet_hdr_size(&self, size: libc::c_int) -> io::Result<()> { + unsafe { + ioctl_with_ref(self.file.as_raw_fd(), TUNSETVNETHDRSZ, &size)?; + } + Ok(()) + } +} + +impl Read for Tap { + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.file.read(buf) + } +} + +impl Write for Tap { + fn write(&mut self, buf: &[u8]) -> io::Result { + self.file.write(buf) + } + fn flush(&mut self) -> io::Result<()> { + Ok(()) + } +} + +impl AsRawFd for Tap { + fn as_raw_fd(&self) -> RawFd { + self.file.as_raw_fd() + } +} + +#[repr(C)] +#[derive(Copy,Clone,Default)] +struct IfReq { + pub ireqn: IrReqN, + pub irequ: IfReqU, +} + +impl IfReq { + fn new(ifname: &str) -> Self { + let ifname = ifname.as_bytes(); + assert!(ifname.len() < 16); + let mut ifreq = Self::default(); + ifreq.ireqn.name[..ifname.len()] + .copy_from_slice(ifname); + ifreq + } + + fn name(&self) -> &str { + if let Some(idx) = self.ireqn.name.iter().position(|&b| b == 0) { + ::std::str::from_utf8(&self.ireqn.name[..idx]).unwrap() + } else { + "" + } + } + + fn set_flags(&mut self, flags: u16) -> &mut Self { + self.irequ.flags = flags; + self + } + + fn ioctl_mut(&mut self, fd: &R, request: libc::c_ulong) -> system::Result<()> { + unsafe { + ioctl_with_mut_ref(fd.as_raw_fd(), request, self)?; + } + Ok(()) + } +} + +#[repr(C)] +#[derive(Copy,Clone,Default)] +struct IrReqN { + name: [u8; 16], +} + +#[repr(C)] +#[derive(Copy,Clone)] +union IfReqU { + flags: u16, + addr: libc::sockaddr, + addr_in: libc::sockaddr_in, + ifindex: u32, + _align: [u64; 3], +} + + +impl Default for IfReqU { + fn default() -> Self { + IfReqU { _align: [0u64; 3]} + } +} diff --git a/src/vm/config.rs b/src/vm/config.rs index ecbf2eb..336cb0a 100644 --- a/src/vm/config.rs +++ b/src/vm/config.rs @@ -4,7 +4,7 @@ use std::{env, process}; use crate::devices::SyntheticFS; use crate::disk::{RawDiskImage, RealmFSImage, OpenType}; use libcitadel::Realms; -use libcitadel::terminal::{TerminalPalette, AnsiTerminal}; +use libcitadel::terminal::{TerminalPalette, AnsiTerminal, Base16Scheme}; pub struct VmConfig { ram_size: usize, @@ -13,7 +13,9 @@ pub struct VmConfig { rootshell: bool, wayland: bool, dmabuf: bool, + network: bool, home: String, + bridge_name: String, kernel_path: Option, init_path: Option, init_cmd: Option, @@ -34,6 +36,8 @@ impl VmConfig { rootshell: false, wayland: true, dmabuf: false, + network: true, + bridge_name: "vz-clear".to_string(), home: Self::default_homedir(), kernel_path: None, init_path: None, @@ -136,6 +140,14 @@ impl VmConfig { self.rootshell } + pub fn network(&self) -> bool { + if unsafe { libc::geteuid() } != 0 { + false + } else { + self.network + } + } + pub fn homedir(&self) -> &str { &self.home } @@ -183,6 +195,10 @@ impl VmConfig { self.dmabuf } + pub fn bridge(&self) -> &str { + &self.bridge_name + } + fn add_realmfs_by_name(&mut self, realmfs: &str) { let path = Path::new("/realms/realmfs-images") .join(format!("{}-realmfs.img", realmfs)); @@ -200,7 +216,8 @@ impl VmConfig { let realmfs = config.realmfs(); self.add_realmfs_by_name(realmfs); self.home = realm.base_path().join("home").display().to_string(); - self.realm_name = Some(realm.name().to_string()) + self.realm_name = Some(realm.name().to_string()); + self.bridge_name = config.network_zone().to_string(); } } @@ -219,6 +236,9 @@ impl VmConfig { if args.has_arg("--use-dmabuf") { self.dmabuf = true; } + if args.has_arg("--no-network") { + self.network = false; + } if let Some(home) = args.arg_with_value("--home") { self.home = home.to_string(); } diff --git a/src/vm/error.rs b/src/vm/error.rs index b71bf17..c250d4a 100644 --- a/src/vm/error.rs +++ b/src/vm/error.rs @@ -5,6 +5,7 @@ use std::str; use std::ffi::CStr; use libc; use crate::disk; +use crate::system::netlink; pub type Result = result::Result; @@ -26,6 +27,7 @@ pub enum ErrorKind { TerminalTermios(io::Error), IoError(io::Error), MemoryManagerCreate, + NetworkSetup(netlink::Error), } impl ErrorKind { @@ -47,6 +49,7 @@ impl ErrorKind { ErrorKind::TerminalTermios(_) => "failed termios", ErrorKind::IoError(_) => "i/o error", ErrorKind::MemoryManagerCreate => "memory manager", + ErrorKind::NetworkSetup(_) => "error setting up network", } } } @@ -61,6 +64,7 @@ impl fmt::Display for ErrorKind { ErrorKind::TerminalTermios(ref e) => write!(f, "error reading/restoring terminal state: {}", e), ErrorKind::IoError(ref e) => write!(f, "i/o error: {}", e), ErrorKind::MemoryManagerCreate => write!(f, "error creating memory manager"), + ErrorKind::NetworkSetup(ref e) => write!(f, "error setting up network: {}", e), _ => write!(f, "{}", self.as_str()), } } @@ -78,6 +82,12 @@ impl From for Error { } } +impl From for Error { + fn from(err: netlink::Error) -> Error { + ErrorKind::NetworkSetup(err).into() + } +} + enum Repr { Errno(i32), Simple(ErrorKind), diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 66c1fdb..81913b2 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -31,6 +31,7 @@ use std::sync::atomic::AtomicBool; use termios::Termios; use crate::devices::SyntheticFS; use crate::disk::DiskImage; +use crate::system::{NetlinkSocket, Tap}; pub struct Vm { _config: VmConfig, @@ -122,7 +123,11 @@ impl Vm { cmdline.push_set_val("phinit.rootflags", "trans=virtio"); } - Self::setup_synthetic_bootfs(cmdline, virtio) + Self::setup_synthetic_bootfs(cmdline, virtio)?; + if config.network() { + Self::setup_network(config, cmdline, virtio)?; + } + Ok(()) } fn setup_synthetic_bootfs(cmdline: &mut KernelCmdLine, virtio: &mut VirtioBus) -> Result<()> { @@ -146,6 +151,26 @@ impl Vm { Ok(()) } + fn setup_network(config: &VmConfig, cmdline: &mut KernelCmdLine, virtio: &mut VirtioBus) -> Result<()> { + let tap = Self::setup_tap(config.bridge())?; + devices::VirtioNet::create(virtio, tap)?; + cmdline.push("phinit.ip=172.17.0.22"); + Ok(()) + } + + fn setup_tap(bridge_name: &str) -> Result { + let tap = Tap::new_default()?; + let nl = NetlinkSocket::open()?; + + if !nl.interface_exists(bridge_name) { + nl.create_bridge(bridge_name)?; + nl.set_interface_up(bridge_name)?; + } + nl.add_interface_to_bridge(tap.name(), bridge_name)?; + nl.set_interface_up(tap.name())?; + Ok(tap) + } + pub fn open(mut config: VmConfig) -> Result { let with_drm = config.is_wayland_enabled() && config.is_dmabuf_enabled();