Initial commit

This commit is contained in:
Bruce Leidl 2017-10-16 02:36:00 +00:00
commit 317426f5bf
68 changed files with 14519 additions and 0 deletions

31
Cargo.lock generated Normal file
View File

@ -0,0 +1,31 @@
[root]
name = "pH"
version = "0.1.0"
dependencies = [
"byteorder 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)",
"libc 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)",
"termios 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)",
]
[[package]]
name = "byteorder"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "libc"
version = "0.2.32"
source = "registry+https://github.com/rust-lang/crates.io-index"
[[package]]
name = "termios"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
dependencies = [
"libc 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)",
]
[metadata]
"checksum byteorder 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ff81738b726f5d099632ceaffe7fb65b90212e8dce59d518729e7e8634032d3d"
"checksum libc 0.2.32 (registry+https://github.com/rust-lang/crates.io-index)" = "56cce3130fd040c28df6f495c8492e5ec5808fb4c9093c310df02b0c8f030148"
"checksum termios 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d5d9cf598a6d7ce700a4e6a9199da127e6819a61e64b68609683cc9a01b5683a"

9
Cargo.toml Normal file
View File

@ -0,0 +1,9 @@
[package]
name = "pH"
version = "0.1.0"
authors = ["Bruce Leidl <bruce@subgraph.com>"]
[dependencies]
byteorder="1.0.0"
libc = "*"
termios = "0.2.2"

127
kernel/Makefile Normal file
View File

@ -0,0 +1,127 @@
LINUX_VERSION = 4.9.56
MINIPLI_TAG = 20171013093040
MINIPLI_VERSION = v$(LINUX_VERSION)-unofficial_grsec
MINIPLI_PATH = linux-unofficial_grsec/releases/download/$(MINIPLI_VERSION)
GRSEC_DIFF = $(MINIPLI_VERSION)-$(MINIPLI_TAG).diff
GRSEC_DOWNLOAD = https://github.com/minipli/$(MINIPLI_PATH)/$(GRSEC_DIFF)
KERNEL_VERSION = linux-$(LINUX_VERSION)
KERNEL_DOWNLOAD = https://cdn.kernel.org/pub/linux/kernel/v4.x/$(KERNEL_VERSION)
WGET = wget
SHA256 = sha256sum
TAR = tar
PATCH = patch
LN = ln
CP = cp
MV = mv
RM = rm
RMDIR = rmdir
MKDIR = mkdir
TOUCH = touch
GPG = gpg
TOUCH = touch
UNXZ = unxz
PWD = $(shell pwd)
PATCH_FILES := $(shell find patches/ -name "00*.patch" | sort)
PATCHES = $(patsubst %.patch,build/.%.done,$(PATCH_FILES))
GRSEC_DL_PATH = downloads/$(GRSEC_DIFF)
KERNEL_DL_PATH = downloads/$(KERNEL_VERSION).tar
KERNEL_BUILD_PATH = build/$(KERNEL_VERSION)
KERNEL_UNPACK_PATH = build/unpack/$(KERNEL_VERSION)
KERNEL_BUILD_OUTPUT = $(KERNEL_BUILD_PATH)/vmlinux
PH_LINUX = ph_linux
PH_LINUX_VER = $(PH_LINUX)-$(KERNEL_VERSION)
GPGTMP = verify-tmp/
GPGOPTS = --homedir $(GPGTMP) --trust-model direct --no-autostart
GPG_IMPORT = $(GPG) $(GPGOPTS) --import
GPG_VERIFY = $(GPG) $(GPGOPTS) --verify
SHA256_FILE = v$(KERNEL_VERSION).sha256
SHA256_SENTINEL = build/.sha256-verififed
.NOTPARALLEL:
.PHONY: all gen-sha256
all: $(KERNEL_BUILD_OUTPUT)
$(GRSEC_DL_PATH):
cd downloads; $(WGET) $(GRSEC_DOWNLOAD)
cd downloads; $(WGET) $(GRSEC_DOWNLOAD).sig
$(KERNEL_DL_PATH):
cd downloads; $(WGET) $(KERNEL_DOWNLOAD).tar.xz
cd downloads; $(UNXZ) $(KERNEL_VERSION).tar.xz
cd downloads; $(WGET) $(KERNEL_DOWNLOAD).tar.sign
$(KERNEL_BUILD_PATH): $(KERNEL_UNPACK_PATH)
$(PATCH) -p1 -d $(KERNEL_UNPACK_PATH) < $(GRSEC_DL_PATH)
$(CP) config $(KERNEL_UNPACK_PATH)/.config
$(MV) build/unpack/$(KERNEL_VERSION) build/
$(MKDIR) $(KERNEL_UNPACK_PATH)
$(MKDIR) build/.patches
$(KERNEL_UNPACK_PATH): $(KERNEL_DL_PATH) $(SHA256_SENTINEL)
$(RM) -rf build/.unpack
$(MKDIR) -p build/.unpack
$(TAR) -C build/.unpack -xvf downloads/$(KERNEL_VERSION).tar
$(MV) build/.unpack build/unpack
$(PATCHES): build/.%.done : | $(KERNEL_BUILD_PATH)
$(PATCH) -p1 -d build/$(KERNEL_VERSION) < $*.patch
$(TOUCH) $@
$(PH_LINUX_VER): $(KERNEL_BUILD_OUTPUT)
$(RM) $(PH_LINUX)
$(CP) $(KERNEL_BUILD_OUTPUT) $(PH_LINUX_VER)
$(LN) $(PWD)/$(PH_LINUX_VER) $(PH_LINUX)
$(KERNEL_BUILD_OUTPUT): $(PATCHES)
$(MAKE) -C build/$(KERNEL_VERSION) -j 4
clean:
$(RM) -rf $(KERNEL_BUILD_PATH) build/unpack build/.unpack build/.patches $(SHA256_SENTINEL)
$(SHA256_SENTINEL):
$(SHA256) -c v$(LINUX_VERSION).sha256
$(TOUCH) $@
gen-sha256: $(SHA256_FILE)
$(GRSEC_DL_PATH).verify: $(GRSEC_DL_PATH)
rm -rf $(GPGTMP)
$(MKDIR) -m 0700 $(GPGTMP)
$(GPG_IMPORT) < keys/minipli.key
$(GPG_VERIFY) downloads/$(GRSEC_DIFF).sig $(GRSEC_DL_PATH) 2> .out
mv .out $(GRSEC_DL_PATH).verify
rm -rf $(GPGTMP)
$(KERNEL_DL_PATH).verify: $(KERNEL_DL_PATH)
rm -rf $(GPGTMP)
$(MKDIR) -m 0700 $(GPGTMP)
$(GPG_IMPORT) < keys/torvalds.key
$(GPG_IMPORT) < keys/gregkh.key
$(GPG_VERIFY) downloads/$(KERNEL_VERSION).tar.sign $(KERNEL_DL_PATH) 2> .out
mv .out $(KERNEL_DL_PATH).verify
rm -rf $(GPGTMP)
$(SHA256_FILE): $(KERNEL_DL_PATH).verify $(GRSEC_DL_PATH).verify
$(SHA256) $(KERNEL_DL_PATH) $(GRSEC_DL_PATH) > v$(LINUX_VERSION).sha256

2181
kernel/config Normal file

File diff suppressed because it is too large Load Diff

2
kernel/init/Makefile Normal file
View File

@ -0,0 +1,2 @@
init: init.c
gcc -o init init.c

137
kernel/init/init.c Normal file
View File

@ -0,0 +1,137 @@
#include <sys/mount.h>
#include <string.h>
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
#include <sys/stat.h>
#include <sys/wait.h>
#include <sys/reboot.h>
#include <sys/syscall.h>
char *splash[] = {
"",
" ------------------------------||-------------------------------",
" [##]",
" /~~~~~~\\",
" |~~\\ /~~|",
" ==][===|___||___|===][==",
" [::] ( () ) [::]",
" ~/~~~~\\~",
" O' `o", "",NULL
};
static int run_shell()
{
char *new_argv[] = { "/bin/bash", NULL };
char *new_env[] = { "TERM=xterm-256color", "HOME=/home/user", NULL };
char **p;
for(p = splash; *p; p++) {
printf("%s\n", *p);
}
return execve("/bin/bash", new_argv, new_env);
}
static void do_mkdir(const char* path) {
if(mkdir(path, 0755) < 0) {
printf("mkdir %s failed: %s\n", path, strerror(errno));
}
}
static void mount_tmpfs(const char *path) {
if(mount("tmpfs", path, "tmpfs", 0, "mode=755") < 0) {
printf("mount tmpfs to %s failed: %s\n", path, strerror(errno));
}
}
static void pivot_root(const char *new_root, const char *put_old) {
if(syscall(SYS_pivot_root, new_root, put_old) < 0) {
printf("pivot_root failed (%s : %s) : %s\n", new_root, put_old, strerror(errno));
}
}
static void move_mount(const char *source, const char *target) {
if(mount(source, target, "", MS_MOVE, NULL) < 0) {
printf("move mount of %s to %s failed: %s\n", source, target, strerror(errno));
}
}
static void setup_overlay(void) {
mount_tmpfs("/tmp");
mount("tmpfs", "/tmp", "tmpfs", 0, "mode=755");
do_mkdir("/tmp/ro");
do_mkdir("/tmp/rw");
mount_tmpfs("/tmp/rw");
do_mkdir("/tmp/rw/upper");
do_mkdir("/tmp/rw/work");
do_mkdir("/tmp/overlay");
pivot_root("/tmp", "/tmp/ro");
/*
* /ro real root mounted here
* /rw tmpfs mounted here
* /rw/upper empty directory
* /rw/work empty directory
* /overlay empty directory
*
*/
if(mount("overlay", "/overlay", "overlay", 0, "lowerdir=/ro,upperdir=/rw/upper,workdir=/rw/work") < 0) {
printf("mount overlay failed: %s\n", strerror(errno));
}
do_mkdir("/overlay/ro");
do_mkdir("/overlay/rw");
do_mkdir("/overlay/old-root");
move_mount("/ro", "/overlay/ro");
move_mount("/rw", "/overlay/rw");
pivot_root("/overlay", "/overlay/old-root");
umount("/old-root");
umount("/ro/tmp");
}
static void do_mounts(void)
{
mount("sysfs", "/sys", "sysfs", 0, NULL);
mount("proc", "/proc", "proc", 0, NULL);
mount("devtmpfs", "/dev", "devtmpfs", 0, NULL);
mkdir("/dev/pts", 0755);
mount("devpts", "/dev/pts", "devpts", 0, NULL);
}
int main(int argc, char *argv[])
{
pid_t child;
int status;
setup_overlay();
do_mounts();
sethostname("airwolf", 7);
/* get session leader */
setsid();
/* set controlling terminal */
ioctl(0, TIOCSCTTY, 1);
child = fork();
if (child < 0) {
printf("Fatal: fork() failed with %d\n", child);
return 0;
} else if (child == 0) {
run_shell();
} else {
pid_t corpse;
do {
corpse = waitpid(-1, &status, 0);
} while (corpse != child);
}
reboot(RB_AUTOBOOT);
printf("Init failed: %s\n", strerror(errno));
return 0;
}

1162
kernel/keys/gregkh.key Normal file

File diff suppressed because it is too large Load Diff

88
kernel/keys/minipli.key Normal file
View File

@ -0,0 +1,88 @@
-----BEGIN PGP PUBLIC KEY BLOCK-----
Version: SKS 1.1.6
Comment: Hostname: pgp.mit.edu
mQINBE7f0lwBEAC8xd/vF0Tkh5hPrUy55U98Pi4bJf93PFCovSis+qiInCrQ+WE3Ni32ZYun
pjOE/QpNZBw/RmiFROZXBw+H/u2LwYNotLzjGcNxZ9jlOr/w/RXTU5pR1WBjYmyvLssy+YZ0
lMXezfG2QjZIpiTJh9eEicl+QPIJFBNm13wZv+b54oG6xTPl4BX3ri01+OAuSTkro40qD2LG
gz8+SqQNKzApfOuk6FEzAsb2ySp+CNdmJVhRMGIQldeqYezkRDJB7u9bD+85bgmC+0f4y+4/
1CShXtbpdF1Z+cX9RQ9bULetheYHAHycyFGKyaN/KLhptnjEsCQzebRFjy9ZAxZS2KnZXqre
1oUGQ3vl+3wr9X//hfuD7zJCUusc8ptLgRyHSHwat7OqTL38tA+InwkNvFVwQNP4UNwLIrqw
FoEQOhRFjY7ahW9+0Skqvt7XtZz3uzI32o+3XdYXfxfiiXJZylUuZv8ZqYXFNri6XuduXcma
A9Dhijaw2NNessuvdEGhE/cDJgQNezLIvZNpJHbWhdoBcvhc2Lybp5+hvx5VP2JMg2fzvyHi
DP9Ql898Z+oDpGBRMZq7jCxRzisRVCCDuZagbNG8ObSvE5SZDpxBvh5AuVBHw849BU1FIGRC
ykNDTB614fLqKQiescp1OtyG7rwu9926q2qpsQ8C+czDV+WpJQARAQABtCRNYXRoaWFzIEty
YXVzZSA8bWluaXBsaUBsZC1saW51eC5zbz6JAjgEEwECACIFAk7f0lwCGwMGCwkIBwMCBhUI
AgkKCwQWAgMBAh4BAheAAAoJEHWFOZmSQ1ukfLEQAIp9I5Ss/dnA2OgrBiY2LBgkJ/gMsA00
jlMOF2Hh30OwS04n8uY6Q6zc4Uc/O1IVb968B0yqdT/HgpfalqYCk9fkTiyCmS39pdEKqVWl
DzN72IUaXTxnJ9HNHcsM/66Yxw34+yWdHSlKz/hysoCN/414lZrMWbHXFD/rdF8qP88BVC82
ZCO0fLyqmnowLyDSliqk7Iq19WEKZeA5/3cDvAhWKmalT2MA7cbXbtCJW5EaW3Mw6aRjYyA7
19FHkhk5DvsaJP4i9y+xFQI5j27g1W665tbiffbU9jpANnFw6C9kA9szg3dymS6AL00m3QY9
FKm7UJs00AUD38vQhHHl2peF7RC8UpL+TTxxVwz7CNb3xMKrYUA62uM8aQYIC1gsr73//RVT
616HQh+T+LaXfy2+NuIeSV/PPKuwTcULeyZPC5o3ycM/VGpjb7dxUiT5NEMU5I+CQ4dwAHg8
Mf7R4LBVP2Fd5sq/0UQ+yrv1npHl7OiVYr3YPPg4vOYjktSlGwcTyLVqzs8DcH3zaSaeb0OD
I2kN1uvt6PFrrhyi61RM0I+94uO2LAKZTNsylipoCf9cRO08dXf+kMaU9ICQSs2eduNTgfFB
Qo5DbC0wcmdBLUmTK7ekmgD30EiAFLMqHuD9O64tH3r0AyBH4mGbdoL5GEaVZoU+KUOvAlko
+4PuiEYEEBECAAYFAk//Fg0ACgkQZS2uZ5iBxS9ZgQCdHkiWHtc/9buaEIHqRFXAXXBR+k4A
oJlD1ggy4ugF4BMUpERiyuhhpGQytCRNYXRoaWFzIEtyYXVzZSA8bWluaXBsaUBtaW5pcGxp
Lm5ldD6JAjgEEwECACIFAk7f1YcCGwMGCwkIBwMCBhUIAgkKCwQWAgMBAh4BAheAAAoJEHWF
OZmSQ1ukzGYQAJgucBJF1Dr0yeLbGW77YXq1ClvZ73U5CxjejVNxORMls8zMbAdXq9HCXmho
99T9PPG0Zs6BcIDzBNcx2VaoAyTSQWRppwBTAvLieAF+1VPYlZqOnAO5pLI+pS0ICsP1qKSp
dnq7SRZTMfLgMP0xWX9+hFm+r8CF/nrpcG07N39fcGnvehWey5CeeqgzndhTlEXbWgtcEDQR
jEyasqzb3Ag7dYDEqLcsHUOC+uyKotbK8WouYpjaf8sHoUTm9Lh8Nq/De7vqM5kDwMzNLIqQ
dconKIhiHyNFU4NoMM7+NSc/l0WeEEBKbvVjA1wq4mkBeZttKSwNndRYjjL85+7vQxMGzbVo
g7SzPJyEhycqrtKTEGMLx+hOOI+Ka4eVmuoIzi5E25R8Tb3hINw+T2IEwDyksR0he6WalcfZ
6Ac9C47ou6Ob7MPJsLOnfmoA/a8QIvGk6kbi8hsQLBS8DOVrez/hFyB4/q5wcJU6tTWm4lk9
tk2Y14C4OyN3d8OZ1vYxnzsG2qaqgrPWkPlA/QE2PRNem0pfbFNRtuIef0Le8zRw0DjMcBzO
qKE+Y3MH7YMa+Ol6+/Zvgm0sFsheU9raoaJzuNny0tc1sAVcpNwl1oURvzy0P2U+dJNplrrQ
ZjP5u+Vc7LzL+PqBL6lHM7Q1BvuOXilT+zCiirsGW5W18w7iiEYEEBECAAYFAk//Fg0ACgkQ
ZS2uZ5iBxS9ZbQCgha9Jcf2Y7kq46I/4PHaCUlsxR7MAn3zspNTFYnN1PREsucJ825N3BQvA
tCZNYXRoaWFzIEtyYXVzZSA8bWluaXBsaUByMDB0d29ybGQubmV0PokCOAQTAQIAIgIbAwYL
CQgHAwIGFQgCCQoLBBYCAwECHgECF4AFAk7f3n4ACgkQdYU5mZJDW6THdQ/9Hk4ErtabYVXU
awsqonSdsWRLhtCJUr3jg7b0S1W6a/MrLdDeIbQrNAwkq3BxHRPElMke2MfJgeQxRuTEQuxx
sYmghf+441BEk45y0Fn0RbSYmXgJfdQY1P9YJKGNBNd2kW4/23krWJgoKqFk4cMifr5YNymR
kUtOvvUS2ZtZF2KofOapVKbsUqk6fGXxRNfXcFPsT0br7/OUZlW+RyldOMma9PjKdFWdQ3mc
oIX8xh4YnkxmBx2Akz0AlK5KBg332thIfirPFJDiTYU/ovaNTMkk7mfKexrl7NaKsCg0QUcH
3H26vThh3hxVY+ovqhHNU9M7qn+ieNav5T49y51nlQXdPsogvomM8K3neENV9fUyKvxIufaN
bgWA5fuF6rP7s46eywLK56sL9qCVZ2FCSp60LxE2tnPy5EZU250UMWriWbal8TUPXoCquwlw
QT4jQSQ4xXSiZVG1iol9cgmkb9khznY4etQgVCy9smxT09ywc0oYENz+LAVd+Y4AeSkK06XG
N24oeIVcoM5U/O8V3W+8jOITtd3wr5YFWLOQVz2nW2nYfn7N1a68KixCvGCpxduVQLYXfYko
4LWlTz+dJU/0TdsPOb5BlWi68fQsDQn9ySzUdrhD3/16mLrUT1ouVCcA5bk4APErNe6z7x2W
8djc7KePdr9nTwkckL/7n6SIRgQQEQIABgUCT/8WDQAKCRBlLa5nmIHFL9dPAKCdOqTO8jJl
BSal2+hQKFMnSIJ+cgCfT7BUhV4YrxfG9rzZZ0apvCujzdC0J01hdGhpYXMgS3JhdXNlIDxt
aW5pcGxpQGdvb2dsZW1haWwuY29tPokCOwQTAQIAJQIbAwYLCQgHAwIGFQgCCQoLBBYCAwEC
HgECF4AFAk7f3oQCGQEACgkQdYU5mZJDW6QkNA//dWfeZ7qqFFrPd20TgPEMQFbxuZxs0GHU
VQbpBJ/M2agmL7sX6RD2Zere8POoU+l2d/GQp0Z2U2ztrWT9jDNYy2zjW9aw4A3eW3t31mjs
Sde4wESYwTp8+9zr+fsNbj6/vFJ/VFX+uB3AWE8zwyYnoWtcY0SAbVoYwZeKbnG67aupJUiz
Xcp9/FYDOZVZUcROrnG3Q+2sJGkAoZwBjueiCeHcFMsbqlKlM9qW/EQQh/pgoYkiAn5jtwEu
mRgF9itjpmX5PBUJBtaHVtXcANK5vCdSBqwNPIuULbGhfKto8XzMzAIZp2P21DasBBeUyu1Z
+LsTSwTNKzbIfvj8HgG9Phcsiq3mWBL6phfbVDFQBdDRTLM22B8JrcA6IsYtZUbwF2eugkUP
udoNSYts7KocOIO8/lrW6jCDnYrv9fhTrHkfA/qnOOvmzOBVAPaYDFKKd/mE8KXwgoP1N7kq
oh9jmuRkWg2bjregnnpi1ruEXmDjcXMYcJnpIjF7Z53Wxp2rpg+JYJz8HaSAQLjBC3x+brpr
mbFQMFbKkllOb57uqJC+WOiKp3WsceMRs5RZgrnS/q+m0EBskjrRipV0SXI/lNhELS2dcNe1
+e0O+gxPhiI+L030O2BwfWpwfd3HRAGzL3YO3A12zIs7HtyW/BhuaJfRDLU5JslfeDpOpxR4
Z0OIRgQQEQIABgUCT/8WDQAKCRBlLa5nmIHFL9HYAKCmz7Qfmu5orKFsR9GSDCYpvs14MgCe
Nm+BMfQCAXQ2sS1c3T5V8Y8H4XG5Ag0ETt/SXAEQANIPSLA0YqtAlHZMGe0IYUU40CAevIiz
j6sW14X2b2NsA8Eh0FkujG1zDEDr2Xzq307vqo+u4rYp8dJZlr2qiEKg1CQRoXvk30jbKr8w
QZ66EYkoRg2wzqBDitpVJbtLW5LeqaV96+aP4EO5IK/O+oUshqvHRG0QNPcYoL01mXX7E9F5
maESiGM1KYwC+fiw7HUes64zT0Z/BJ+bH85ByTt9Z3/0JCYoeD3yy09V1nIj0i5tLWaSJTfj
9l0wJtZNOgQCBQ0wqj1+yzeMY84YkQw5i7HN6m/2iQwPpwt7ck9X7J+EbLSFyrGQCF4m56l7
EIfO7vnz71cyUKuY1hhHCZgjx//8JqDS+2p7HwWsxciZkPz8/tVQAKn9dtP8ahmolW2AHCo3
GmjGQ3HMHNtHRqtdjh6qCc27ScbkkSILSEoqG1zpRyB0cUV5QzwGdIIJHM+LcVC9v7lHjMD+
tZJcv157A4GzZaOG1rP9FXdMwfeVkqMdu7cjI6hxa2h2K9uamDjKK9EqVcX7hYomz832HvTj
mceB2C5TwKJABY6EBbRH8DT4VFF7aYEtlzBJSN/O6QaxGL/aX5vxib2gGXTafZ2HFpaO4NYV
TkHx5mHLdI4eq4VJlOQjKMc+5yrV4idwg0IPgyAcVFifLjZJY7d/Gsciv3NGdpCaeJRxiHaM
j/S5ABEBAAGJAh8EGAECAAkFAk7f0lwCGwwACgkQdYU5mZJDW6T/XBAAgKaAmkC6WUTwlc5W
xIKCaqd1uCv86sGsPhxteOiHIn97ucELaAn3TdyQ3u0dY8TEdYEcww5MWWhQw6M0bYG/iW+k
9qbUusgcAj/PBqA+Iu55QoVpZrrX9PZCumhtaG2F6VWsz4U1ML1pxYeVC+X77iQep00yFlhG
NX9uFAqD8hp87QVpslgh2eWU8Ma9sG73avESJkE1jnw23rsSKY96Fy7sS50NdNfeMFEYBpXv
yO41J1/XVowfKxihD7Hs4Jz9iV2TQo3ASjtYcHDVRy3xRud3x0LgN5vDGO/FN6gbXXPM6HyT
lC4eKhz74oyoT7t6BsbvU2i50kmGYBx6PG8Xy+6Iu21/+imr6QuEwhSAuCRKHUYV2d/CrjvL
syxDpWFHEGwbqfCpOy15DV37Nlgtu3zUvc3T0TywCJjj4Q/fOHTm5Jjr0vvINdmP4qLhmikE
hZgk0QUs4pVYad5zFW4F9KDyMTtxiNk+NRlcNU4qurCPjvX9F7dX/3SPWNy5zQ9xPUtJdvXT
N/IoWduHiZO7ih4aS1bsaLr4YJ/n1NHwbH4vdI9s1heUezFHtoLEr3mBuiTxd/2F+1wtbb7C
mkmVzjYkhYrONRg6Pl3XGZrikOUa2Vy9u9w/M9AOpwbTR8j+raHIPZLNUwgZQDF47vggvj7y
tCiqjLBC3pzX+sNjnwQ=
=bPiP
-----END PGP PUBLIC KEY BLOCK-----

1265
kernel/keys/torvalds.key Normal file

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,34 @@
From e86a8d2060f5551bc3bbb1621cce206954340cc1 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 11 Feb 2015 16:19:26 -0600
Subject: [PATCH 02/22] cpuidle: skip synchronize_rcu() on single CPU systems
synchronize_rcu() is pretty expensive, and on single CPU systems we don't need
it in this specific case, so skip it.
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
---
drivers/cpuidle/cpuidle.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index c73207ab..224cefc0 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -307,8 +307,11 @@ void cpuidle_uninstall_idle_handler(void)
/*
* Make sure external observers (such as the scheduler)
* are done looking at pointed idle states.
+ * This is only relevant if there is more than one cpu,
+ * if there is only one CPU, that is us... and we're
+ * coherent to ourselves.
*/
- synchronize_rcu();
+
}
/**
--
2.11.0

View File

@ -0,0 +1,38 @@
From 7b5447090b8fbb80a85320c880934f35acbf68a7 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 11 Feb 2015 16:25:16 -0600
Subject: [PATCH 03/22] sysrq: skip synchronize_rcu() if there is no old op
synchronize_rcu() is expensive. Currently it is called as part of the sysrq
registration/unregistration, which happens during boot several times.
Now, the reason for the synchronize_rcu() is to allow an old registered
operation to expire properly... which is pointless if the old operation
is NULL...
So we can save the common case of the old operation being NULL a lot of time
by just checking for non-NULL prior to the synchronize_rcu()
Signed-off-by: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
---
drivers/tty/sysrq.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c
index 701c085b..c60c7ba5 100644
--- a/drivers/tty/sysrq.c
+++ b/drivers/tty/sysrq.c
@@ -1065,8 +1065,10 @@ static int __sysrq_swap_key_ops(int key, struct sysrq_key_op *insert_op_p,
* A concurrent __handle_sysrq either got the old op or the new op.
* Wait for it to go away before returning, so the code for an old
* op is not freed (eg. on module unload) while it is in use.
+ * This is only relevant if the old op is not NULL of course.
*/
- synchronize_rcu();
+ if (remove_op_p)
+ synchronize_rcu();
return retval;
}
--
2.11.0

View File

@ -0,0 +1,28 @@
From 43e288fed0ccb8bf17cfea69d032425e6d224b96 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 11 Feb 2015 16:47:20 -0600
Subject: [PATCH 05/22] vmstats: wakeups
Author: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
---
mm/vmstat.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 604f26a4..bfbbcd76 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1549,7 +1549,7 @@ static const struct file_operations proc_vmstat_file_operations = {
#ifdef CONFIG_SMP
static struct workqueue_struct *vmstat_wq;
static DEFINE_PER_CPU(struct delayed_work, vmstat_work);
-int sysctl_stat_interval __read_mostly = HZ;
+int sysctl_stat_interval __read_mostly = 8 * HZ;
#ifdef CONFIG_PROC_FS
static void refresh_vm_stats(struct work_struct *work)
--
2.11.0

View File

@ -0,0 +1,123 @@
From 9c7e56e2621a12943055442a2b4b963ee1604e4a Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 11 Feb 2015 16:53:08 -0600
Subject: [PATCH 06/22] pci: probe
Author: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
---
drivers/pci/probe.c | 43 ++++++++++++++++++++++++++++++++++++++++---
1 file changed, 40 insertions(+), 3 deletions(-)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index d266d800..73ebc222 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -182,6 +182,10 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
mask = type ? PCI_ROM_ADDRESS_MASK : ~0;
+ res->name = pci_name(dev);
+
+ printk("clr: Starting probe for %s\n", res->name);
+
/* No printks while decoding is disabled! */
if (!dev->mmio_always_on) {
pci_read_config_word(dev, PCI_COMMAND, &orig_cmd);
@@ -191,8 +195,6 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
}
}
- res->name = pci_name(dev);
-
pci_read_config_dword(dev, pos, &l);
pci_write_config_dword(dev, pos, l | mask);
pci_read_config_dword(dev, pos, &sz);
@@ -324,6 +326,8 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
if (dev->non_compliant_bars)
return;
+ printk("clr: pci_read_bases start\n");
+
for (pos = 0; pos < howmany; pos++) {
struct resource *res = &dev->resource[pos];
reg = PCI_BASE_ADDRESS_0 + (pos << 2);
@@ -332,11 +336,13 @@ static void pci_read_bases(struct pci_dev *dev, unsigned int howmany, int rom)
if (rom) {
struct resource *res = &dev->resource[PCI_ROM_RESOURCE];
+ printk("clr: rom path\n");
dev->rom_base_reg = rom;
res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH |
IORESOURCE_READONLY | IORESOURCE_SIZEALIGN;
__pci_read_base(dev, pci_bar_mem32, res, rom);
}
+ printk("clr: pci_read_bases end\n");
}
static void pci_read_bridge_io(struct pci_bus *child)
@@ -1193,6 +1199,28 @@ static void pci_msi_setup_pci_dev(struct pci_dev *dev)
pci_msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
}
+static int guess_bar_count(int class)
+{
+ if (class == 0x068000)
+ return 0;
+ if (class == 0x020000)
+ return 2;
+ if (class == 0x010000)
+ return 2;
+ if (class == 0x00ff00)
+ return 1;
+ return 6;
+}
+
+static int has_rom(int class, int rom)
+{
+ if (class == 0x020000)
+ return 0;
+ if (class == 0x010000 || class == 0x00ff00)
+ return 0;
+ return rom;
+}
+
/**
* pci_setup_device - fill in class and map information of a device
* @dev: the device structure to fill
@@ -1211,6 +1239,9 @@ int pci_setup_device(struct pci_dev *dev)
int pos = 0;
struct pci_bus_region region;
struct resource *res;
+ int maxbar;
+
+ printk("clr: pci_setup_device start\n");
if (pci_read_config_byte(dev, PCI_HEADER_TYPE, &hdr_type))
return -EIO;
@@ -1265,7 +1296,11 @@ int pci_setup_device(struct pci_dev *dev)
if (class == PCI_CLASS_BRIDGE_PCI)
goto bad;
pci_read_irq(dev);
- pci_read_bases(dev, 6, PCI_ROM_ADDRESS);
+
+ maxbar = guess_bar_count(dev->class);
+
+ if (class != PCI_CLASS_STORAGE_IDE)
+ pci_read_bases(dev, maxbar, has_rom(dev->class, PCI_ROM_ADDRESS));
pci_read_config_word(dev, PCI_SUBSYSTEM_VENDOR_ID, &dev->subsystem_vendor);
pci_read_config_word(dev, PCI_SUBSYSTEM_ID, &dev->subsystem_device);
@@ -1350,6 +1385,8 @@ int pci_setup_device(struct pci_dev *dev)
dev->class = PCI_CLASS_NOT_DEFINED << 8;
}
+ printk("clr: pci_setup_device end\n");
+
/* We found a fine healthy device, go go go... */
return 0;
}
--
2.11.0

View File

@ -0,0 +1,107 @@
From a672fb44791bab2e0b7cd519fbb55751fb2fbe16 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Fri, 28 Aug 2015 11:00:36 -0500
Subject: [PATCH 07/22] cgroup
Author: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
Signed-off-by: Jose Carlos Venegas Munoz <jos.c.venegas.munoz@intel.com>
---
include/linux/cgroup-defs.h | 2 +-
kernel/cgroup.c | 24 ++++++++++++++----------
2 files changed, 15 insertions(+), 11 deletions(-)
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index 6fb1c34c..f5ffee9d 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -137,7 +137,7 @@ struct cgroup_subsys_state {
/* percpu_ref killing and RCU release */
struct rcu_head rcu_head;
- struct work_struct destroy_work;
+ struct delayed_work destroy_work;
};
/*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4c233437..27a71a9e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -73,7 +73,7 @@
* Expiring in the middle is a performance problem not a correctness one.
* 1 sec should be enough.
*/
-#define CGROUP_PIDLIST_DESTROY_DELAY HZ
+#define CGROUP_PIDLIST_DESTROY_DELAY round_jiffies_relative(HZ)
#define CGROUP_FILE_NAME_MAX (MAX_CGROUP_TYPE_NAMELEN + \
MAX_CFTYPE_NAME + 2)
@@ -4987,8 +4987,9 @@ static struct cftype cgroup_legacy_base_files[] = {
*/
static void css_free_work_fn(struct work_struct *work)
{
+ struct delayed_work *dwork = to_delayed_work(work);
struct cgroup_subsys_state *css =
- container_of(work, struct cgroup_subsys_state, destroy_work);
+ container_of(dwork, struct cgroup_subsys_state, destroy_work);
struct cgroup_subsys *ss = css->ss;
struct cgroup *cgrp = css->cgroup;
@@ -5037,14 +5038,15 @@ static void css_free_rcu_fn(struct rcu_head *rcu_head)
struct cgroup_subsys_state *css =
container_of(rcu_head, struct cgroup_subsys_state, rcu_head);
- INIT_WORK(&css->destroy_work, css_free_work_fn);
- queue_work(cgroup_destroy_wq, &css->destroy_work);
+ INIT_DELAYED_WORK(&css->destroy_work, css_free_work_fn);
+ queue_delayed_work(cgroup_destroy_wq, &css->destroy_work, CGROUP_PIDLIST_DESTROY_DELAY);
}
static void css_release_work_fn(struct work_struct *work)
{
+ struct delayed_work *dwork = to_delayed_work(work);
struct cgroup_subsys_state *css =
- container_of(work, struct cgroup_subsys_state, destroy_work);
+ container_of(dwork, struct cgroup_subsys_state, destroy_work);
struct cgroup_subsys *ss = css->ss;
struct cgroup *cgrp = css->cgroup;
@@ -5087,8 +5089,9 @@ static void css_release(struct percpu_ref *ref)
struct cgroup_subsys_state *css =
container_of(ref, struct cgroup_subsys_state, refcnt);
- INIT_WORK(&css->destroy_work, css_release_work_fn);
- queue_work(cgroup_destroy_wq, &css->destroy_work);
+ INIT_DELAYED_WORK(&css->destroy_work, css_release_work_fn);
+ queue_delayed_work(cgroup_destroy_wq, &css->destroy_work, CGROUP_PIDLIST_DESTROY_DELAY);
+
}
static void init_and_link_css(struct cgroup_subsys_state *css,
@@ -5367,8 +5370,9 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
*/
static void css_killed_work_fn(struct work_struct *work)
{
+ struct delayed_work *dwork = to_delayed_work(work);
struct cgroup_subsys_state *css =
- container_of(work, struct cgroup_subsys_state, destroy_work);
+ container_of(dwork, struct cgroup_subsys_state, destroy_work);
mutex_lock(&cgroup_mutex);
@@ -5389,8 +5393,8 @@ static void css_killed_ref_fn(struct percpu_ref *ref)
container_of(ref, struct cgroup_subsys_state, refcnt);
if (atomic_dec_and_test(&css->online_cnt)) {
- INIT_WORK(&css->destroy_work, css_killed_work_fn);
- queue_work(cgroup_destroy_wq, &css->destroy_work);
+ INIT_DELAYED_WORK(&css->destroy_work, css_killed_work_fn);
+ queue_delayed_work(cgroup_destroy_wq, &css->destroy_work, CGROUP_PIDLIST_DESTROY_DELAY);
}
}
--
2.11.0

View File

@ -0,0 +1,45 @@
From a312877651e18175726e33e4530307880d249aa5 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 11 Feb 2015 17:28:14 -0600
Subject: [PATCH 08/22] smpboot: reuse timer calibration
NO point recalibrating for known-constant tsc... saves 200ms+ of boot time.
Author: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
---
arch/x86/kernel/smpboot.c | 2 +-
arch/x86/kernel/tsc.c | 3 +++
2 files changed, 4 insertions(+), 1 deletion(-)
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 36171bcd..7f4e9cdf 100644
--- a/arch/x86/kernel/smpboot.c
+++ b/arch/x86/kernel/smpboot.c
@@ -731,7 +731,7 @@ wakeup_secondary_cpu_via_init(int phys_apicid, unsigned long start_eip)
pr_debug("Waiting for send to finish...\n");
send_status = safe_apic_wait_icr_idle();
- udelay(init_udelay);
+ udelay(100);
pr_debug("Deasserting INIT\n");
diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
index 6e57edf3..94ab049f 100644
--- a/arch/x86/kernel/tsc.c
+++ b/arch/x86/kernel/tsc.c
@@ -1390,6 +1390,9 @@ unsigned long calibrate_delay_is_known(void)
if (!mask)
return 0;
+ if (cpu !=0)
+ return cpu_data(0).loops_per_jiffy;
+
sibling = cpumask_any_but(mask, cpu);
if (sibling < nr_cpu_ids)
return cpu_data(sibling).loops_per_jiffy;
--
2.11.0

View File

@ -0,0 +1,28 @@
From f1af807278f9b52c68b32e604aa4c23f1456f5d5 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 4 Nov 2015 15:17:10 -0600
Subject: [PATCH 09/22] perf
Author: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
---
arch/x86/events/intel/core.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c
index f0f197f4..9d1f4060 100644
--- a/arch/x86/events/intel/core.c
+++ b/arch/x86/events/intel/core.c
@@ -4028,7 +4028,7 @@ __init int intel_pmu_init(void)
*/
if (x86_pmu.extra_regs) {
for (er = x86_pmu.extra_regs; er->msr; er++) {
- er->extra_msr_access = check_msr(er->msr, 0x11UL);
+ er->extra_msr_access = false;
/* Disable LBR select mapping */
if ((er->idx == EXTRA_REG_LBR) && !er->extra_msr_access)
x86_pmu.lbr_sel_map = NULL;
--
2.11.0

View File

@ -0,0 +1,190 @@
From 0f320ebfefc339814bc7efe46a83550cc6ee1453 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Sat, 14 Feb 2015 09:49:41 -0600
Subject: [PATCH 10/22] pci: probe: identify known devices
Author: Arjan van de Ven <arjan@linux.intel.com>
Modify-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
---
drivers/pci/probe.c | 156 ++++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 156 insertions(+)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 73ebc222..d693b6b8 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -163,6 +163,159 @@ static inline unsigned long decode_bar(struct pci_dev *dev, u32 bar)
#define PCI_COMMAND_DECODE_ENABLE (PCI_COMMAND_MEMORY | PCI_COMMAND_IO)
+/* shortcut version of __pci_read_base where we know the sizes already */
+int __pci_read_base_shortcut(struct pci_dev *dev, enum pci_bar_type type,
+ struct resource *res, unsigned int pos, u32 sz_in, u32 sz2_in)
+{
+ u32 l, sz;
+ u64 l64, sz64, mask64;
+ struct pci_bus_region region, inverted_region;
+
+ res->name = pci_name(dev);
+
+ pci_read_config_dword(dev, pos, &l);
+
+ sz = sz_in;
+
+ /*
+ * All bits set in sz means the device isn't working properly.
+ * If the BAR isn't implemented, all bits must be 0. If it's a
+ * memory BAR or a ROM, bit 0 must be clear; if it's an io BAR, bit
+ * 1 must be clear.
+ * Here we set the size and is not 0xffffffff
+ */
+
+ /*
+ * I don't know how l can have all bits set. Copied from old code.
+ * Maybe it fixes a bug on some ancient platform.
+ */
+ if (l == 0xffffffff)
+ l = 0;
+
+ if (type == pci_bar_unknown) {
+ res->flags = decode_bar(dev, l);
+ res->flags |= IORESOURCE_SIZEALIGN;
+ if (res->flags & IORESOURCE_IO) {
+ l64 = l & PCI_BASE_ADDRESS_IO_MASK;
+ sz64 = sz & PCI_BASE_ADDRESS_IO_MASK;
+ mask64 = PCI_BASE_ADDRESS_IO_MASK & (u32)IO_SPACE_LIMIT;
+ } else {
+ l64 = l & PCI_BASE_ADDRESS_MEM_MASK;
+ sz64 = sz & PCI_BASE_ADDRESS_MEM_MASK;
+ mask64 = (u32)PCI_BASE_ADDRESS_MEM_MASK;
+ }
+ } else {
+ res->flags |= (l & IORESOURCE_ROM_ENABLE);
+ l64 = l & PCI_ROM_ADDRESS_MASK;
+ sz64 = sz & PCI_ROM_ADDRESS_MASK;
+ mask64 = (u32)PCI_ROM_ADDRESS_MASK;
+ }
+
+ if (res->flags & IORESOURCE_MEM_64) {
+ pci_read_config_dword(dev, pos + 4, &l);
+ sz = sz2_in;
+
+ l64 |= ((u64)l << 32);
+ sz64 |= ((u64)sz << 32);
+ mask64 |= ((u64)~0 << 32);
+ }
+
+ if (!sz64)
+ goto fail;
+
+ sz64 = pci_size(l64, sz64, mask64);
+ if (!sz64) {
+ dev_info(&dev->dev, FW_BUG "reg 0x%x: invalid BAR (can't size)\n",
+ pos);
+ goto fail;
+ }
+
+ if (res->flags & IORESOURCE_MEM_64) {
+ if ((sizeof(dma_addr_t) < 8 || sizeof(resource_size_t) < 8) &&
+ sz64 > 0x100000000ULL) {
+ res->flags |= IORESOURCE_UNSET | IORESOURCE_DISABLED;
+ res->start = 0;
+ res->end = 0;
+ dev_err(&dev->dev, "reg 0x%x: can't handle BAR larger than 4GB (size %#010llx)\n",
+ pos, (unsigned long long)sz64);
+ goto out;
+ }
+
+ if ((sizeof(dma_addr_t) < 8) && l) {
+ /* Above 32-bit boundary; try to reallocate */
+ res->flags |= IORESOURCE_UNSET;
+ res->start = 0;
+ res->end = sz64;
+ dev_info(&dev->dev, "reg 0x%x: can't handle BAR above 4GB (bus address %#010llx)\n",
+ pos, (unsigned long long)l64);
+ goto out;
+ }
+ }
+
+ region.start = l64;
+ region.end = l64 + sz64;
+
+ pcibios_bus_to_resource(dev->bus, res, &region);
+ pcibios_resource_to_bus(dev->bus, &inverted_region, res);
+
+ /*
+ * If "A" is a BAR value (a bus address), "bus_to_resource(A)" is
+ * the corresponding resource address (the physical address used by
+ * the CPU. Converting that resource address back to a bus address
+ * should yield the original BAR value:
+ *
+ * resource_to_bus(bus_to_resource(A)) == A
+ *
+ * If it doesn't, CPU accesses to "bus_to_resource(A)" will not
+ * be claimed by the device.
+ */
+ if (inverted_region.start != region.start) {
+ res->flags |= IORESOURCE_UNSET;
+ res->start = 0;
+ res->end = region.end - region.start;
+ dev_info(&dev->dev, "reg 0x%x: initial BAR value %#010llx invalid\n",
+ pos, (unsigned long long)region.start);
+ }
+
+ goto out;
+
+
+fail:
+ res->flags = 0;
+out:
+ if (res->flags)
+ dev_printk(KERN_DEBUG, &dev->dev, "reg 0x%x: %pR\n", pos, res);
+
+ return (res->flags & IORESOURCE_MEM_64) ? 1 : 0;
+}
+
+static int is_known_device(struct pci_dev *dev, int pos, int *sz)
+{
+ /* Red Hat, Inc : Virtio network device */
+ if (dev->vendor == 0x1af4 && dev->device == 0x1000) {
+ if (pos == 0x10) {
+ *sz = 0xffffffe1;
+ return 1;
+ }
+ if (pos == 0x14) {
+ *sz = 0xfffff000;
+ return 1;
+ }
+ }
+ /* Red Hat, Inc : Virtio block device */
+ if (dev->vendor == 0x1af4 && dev->device == 0x1001) {
+ if (pos == 0x10) {
+ *sz = 0xffffffc1;
+ return 1;
+ }
+ if (pos == 0x14) {
+ *sz = 0xfffff000;
+ return 1;
+ }
+ }
+ return 0;
+}
+
/**
* pci_read_base - read a PCI BAR
* @dev: the PCI device
@@ -182,6 +335,9 @@ int __pci_read_base(struct pci_dev *dev, enum pci_bar_type type,
mask = type ? PCI_ROM_ADDRESS_MASK : ~0;
+ if (is_known_device(dev, pos, &sz))
+ return __pci_read_base_shortcut(dev, type, res, pos, sz, 0);
+
res->name = pci_name(dev);
printk("clr: Starting probe for %s\n", res->name);
--
2.11.0

View File

@ -0,0 +1,39 @@
From 11a42057e43d77c04317eb3a3f40989b6f57864d Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 22 Jun 2015 09:33:33 -0500
Subject: [PATCH 11/22] init: no wait for the known devices
No wait for the known devices to complete their probing
Author: Arjan van de Ven <arjan@linux.intel.com>
Signed-off-by: Miguel Bernal Marin <miguel.bernal.marin@linux.intel.com>
---
init/do_mounts.c | 4 +++-
1 file changed, 3 insertions(+), 1 deletion(-)
diff --git a/init/do_mounts.c b/init/do_mounts.c
index dea5de95..da840946 100644
--- a/init/do_mounts.c
+++ b/init/do_mounts.c
@@ -28,6 +28,7 @@
#include <linux/slab.h>
#include <linux/ramfs.h>
#include <linux/shmem_fs.h>
+#include <linux/async.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_fs_sb.h>
@@ -563,7 +564,8 @@ void __init prepare_namespace(void)
* For example, it is not atypical to wait 5 seconds here
* for the touchpad of a laptop to initialize.
*/
- wait_for_device_probe();
+ //wait_for_device_probe();
+ async_synchronize_full();
md_run_setup();
--
2.11.0

View File

@ -0,0 +1,32 @@
From d8056696038fd33187ca41e25832ed3960c3ec7f Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Mon, 14 Mar 2016 11:06:46 -0600
Subject: [PATCH 12/22] ksm-wakeups
reduce wakeups in ksm
---
mm/ksm.c | 8 ++++++--
1 file changed, 6 insertions(+), 2 deletions(-)
diff --git a/mm/ksm.c b/mm/ksm.c
index caa54a55..b043f871 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1724,8 +1724,12 @@ static int ksm_scan_thread(void *nothing)
try_to_freeze();
if (ksmd_should_run()) {
- schedule_timeout_interruptible(
- msecs_to_jiffies(ksm_thread_sleep_millisecs));
+ if (ksm_thread_sleep_millisecs >= 1000)
+ schedule_timeout_interruptible(
+ msecs_to_jiffies(round_jiffies_relative(ksm_thread_sleep_millisecs)));
+ else
+ schedule_timeout_interruptible(
+ msecs_to_jiffies(ksm_thread_sleep_millisecs));
} else {
wait_event_freezable(ksm_thread_wait,
ksmd_should_run() || kthread_should_stop());
--
2.11.0

View File

@ -0,0 +1,56 @@
From c03fac43a4294098b01d6a0eadd824e6c79e70e6 Mon Sep 17 00:00:00 2001
From: Alan Cox <alan@linux.intel.com>
Date: Thu, 10 Mar 2016 15:11:28 +0000
Subject: [PATCH 14/22] xattr: allow setting user.* attributes on symlinks by
owner
Kvmtool and clear containers supports using user attributes to label host
files with the virtual uid/guid of the file in the container. This allows an
end user to manage their files and a complete uid space without all the ugly
namespace stuff.
The one gap in the support is symlinks because an end user can change the
ownership of a symbolic link. We support attributes on these files as you
can already (as root) set security attributes on them.
The current rules seem slightly over-paranoid and as we have a use case this
patch enables updating the attributes on a symbolic link IFF you are the
owner of the synlink (as permissions are not usually meaningful on the link
itself).
Signed-off-by: Alan Cox <alan@linux.intel.com>
---
fs/xattr.c | 14 ++++++++------
1 file changed, 8 insertions(+), 6 deletions(-)
diff --git a/fs/xattr.c b/fs/xattr.c
index ed8c3745..f48d608e 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -118,15 +118,17 @@ xattr_permission(struct inode *inode, const char *name, int mask)
}
/*
- * In the user.* namespace, only regular files and directories can have
- * extended attributes. For sticky directories, only the owner and
- * privileged users can write attributes.
+ * In the user.* namespace, only regular files, symbolic links, and
+ * directories can have extended attributes. For symbolic links and
+ * sticky directories, only the owner and privileged users can write
+ * attributes.
*/
if (!strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN)) {
- if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
+ if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode) && !S_ISLNK(inode->i_mode))
return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
- if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
- (mask & MAY_WRITE) && !inode_owner_or_capable(inode))
+ if (((S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX))
+ || S_ISLNK(inode->i_mode)) && (mask & MAY_WRITE)
+ && !inode_owner_or_capable(inode))
return -EPERM;
}
--
2.11.0

View File

@ -0,0 +1,25 @@
From ec4c5378faf4ab331d56706d58e112c14ec45396 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Wed, 7 Dec 2016 17:50:05 -0600
Subject: [PATCH 15/22] crypto: allow testmgr to be skipped
---
crypto/testmgr.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/crypto/testmgr.c b/crypto/testmgr.c
index 62dffa00..197e9c53 100644
--- a/crypto/testmgr.c
+++ b/crypto/testmgr.c
@@ -36,7 +36,7 @@
#include "internal.h"
-static bool notests;
+static bool notests = true;
module_param(notests, bool, 0644);
MODULE_PARM_DESC(notests, "disable crypto self-tests");
--
2.11.0

View File

@ -0,0 +1,25 @@
From 5b5f050eec3a244002b1a729627b7b610c86a4e5 Mon Sep 17 00:00:00 2001
From: Jose Carlos Venegas Munoz <jose.carlos.venegas.munoz@intel.com>
Date: Wed, 7 Dec 2016 17:53:45 -0600
Subject: [PATCH 16/22] silence "Power down" msg
---
kernel/reboot.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/reboot.c b/kernel/reboot.c
index bd30a973..eb6f8f6e 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -261,7 +261,7 @@ void kernel_power_off(void)
pm_power_off_prepare();
migrate_to_reboot_cpu();
syscore_shutdown();
- pr_emerg("Power down\n");
+ pr_info("Power down\n");
kmsg_dump(KMSG_DUMP_POWEROFF);
machine_power_off();
}
--
2.11.0

View File

@ -0,0 +1,131 @@
From 3db64f7f26d077d4c05e413ac21269bc5a897c6a Mon Sep 17 00:00:00 2001
From: Eric Van Hensbergen <ericvh@gmail.com>
Date: Tue, 21 Apr 2015 12:46:29 -0700
Subject: [PATCH 17/22] fs/9p: fix create-unlink-getattr idiom
Fixes several outstanding bug reports of not being able to getattr from an
open file after an unlink. This patch cleans up transient fids on an unlink
and will search open fids on a client if it detects a dentry that appears to
have been unlinked. This search is necessary because fstat does not pass fd
information through the VFS API to the filesystem, only the dentry which for
9p has an imperfect match to fids.
Inherent in this patch is also a fix for the qid handling on create/open
which apparently wasn't being set correctly and was necessary for the search
to succeed.
A possible optimization over this fix is to include accounting of open fids
with the inode in the private data (in a similar fashion to the way we track
transient fids with dentries). This would allow a much quicker search for
a matching open fid.
Signed-off-by: Eric Van Hensbergen <ericvh@gmail.com>
---
fs/9p/fid.c | 30 ++++++++++++++++++++++++++++++
fs/9p/vfs_inode.c | 4 ++++
net/9p/client.c | 5 ++++-
3 files changed, 38 insertions(+), 1 deletion(-)
diff --git a/fs/9p/fid.c b/fs/9p/fid.c
index 60fb4746..e19c9cf7 100644
--- a/fs/9p/fid.c
+++ b/fs/9p/fid.c
@@ -54,6 +54,33 @@ void v9fs_fid_add(struct dentry *dentry, struct p9_fid *fid)
}
/**
+ * v9fs_fid_find_global - search for a fid off of the client list
+ * @inode: return a fid pointing to a specific inode
+ * @uid: return a fid belonging to the specified user
+ *
+ */
+
+static struct p9_fid *v9fs_fid_find_inode(struct inode *inode, kuid_t uid)
+{
+ struct p9_client *clnt = v9fs_inode2v9ses(inode)->clnt;
+ struct p9_fid *fid, *fidptr, *ret = NULL;
+ unsigned long flags;
+
+ p9_debug(P9_DEBUG_VFS, " inode: %p\n", inode);
+
+ spin_lock_irqsave(&clnt->lock, flags);
+ list_for_each_entry_safe(fid, fidptr, &clnt->fidlist, flist) {
+ if (uid_eq(fid->uid, uid) &&
+ (inode->i_ino == v9fs_qid2ino(&fid->qid))) {
+ ret = fid;
+ break;
+ }
+ }
+ spin_unlock_irqrestore(&clnt->lock, flags);
+ return ret;
+}
+
+/**
* v9fs_fid_find - retrieve a fid that belongs to the specified uid
* @dentry: dentry to look for fid in
* @uid: return fid that belongs to the specified user
@@ -80,6 +107,9 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any)
}
}
spin_unlock(&dentry->d_lock);
+ } else {
+ if (dentry->d_inode)
+ ret = v9fs_fid_find_inode(dentry->d_inode, uid);
}
return ret;
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 30ca770c..c00487ea 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -624,6 +624,10 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags)
v9fs_invalidate_inode_attr(inode);
v9fs_invalidate_inode_attr(dir);
+
+ /* invalidate all fids associated with dentry */
+ /* NOTE: This will not include open fids */
+ dentry->d_op->d_release(dentry);
}
return retval;
}
diff --git a/net/9p/client.c b/net/9p/client.c
index cf129fec..8284ad03 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -1208,7 +1208,7 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname,
if (nwname)
memmove(&fid->qid, &wqids[nwqids - 1], sizeof(struct p9_qid));
else
- fid->qid = oldfid->qid;
+ memmove(&fid->qid, &oldfid->qid, sizeof(struct p9_qid));
kfree(wqids);
return fid;
@@ -1261,6 +1261,7 @@ int p9_client_open(struct p9_fid *fid, int mode)
p9_is_proto_dotl(clnt) ? "RLOPEN" : "ROPEN", qid.type,
(unsigned long long)qid.path, qid.version, iounit);
+ memmove(&fid->qid, &qid, sizeof(struct p9_qid));
fid->mode = mode;
fid->iounit = iounit;
@@ -1306,6 +1307,7 @@ int p9_client_create_dotl(struct p9_fid *ofid, char *name, u32 flags, u32 mode,
(unsigned long long)qid->path,
qid->version, iounit);
+ memmove(&ofid->qid, qid, sizeof(struct p9_qid));
ofid->mode = mode;
ofid->iounit = iounit;
@@ -1351,6 +1353,7 @@ int p9_client_fcreate(struct p9_fid *fid, char *name, u32 perm, int mode,
(unsigned long long)qid.path,
qid.version, iounit);
+ memmove(&fid->qid, &qid, sizeof(struct p9_qid));
fid->mode = mode;
fid->iounit = iounit;
--
2.11.0

View File

@ -0,0 +1,24 @@
From 9ac45f2a4f0de76365e7b12414500f3a12bf028e Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Tue, 29 Mar 2016 14:29:24 -0600
Subject: [PATCH 18/22] rdrand
---
arch/x86/kernel/cpu/rdrand.c | 1 +
1 file changed, 1 insertion(+)
diff --git a/arch/x86/kernel/cpu/rdrand.c b/arch/x86/kernel/cpu/rdrand.c
index cfa97ff6..cd7aa166 100644
--- a/arch/x86/kernel/cpu/rdrand.c
+++ b/arch/x86/kernel/cpu/rdrand.c
@@ -48,6 +48,7 @@ void x86_init_rdrand(struct cpuinfo_x86 *c)
if (!cpu_has(c, X86_FEATURE_RDRAND))
return;
+ return;
for (i = 0; i < SANITY_CHECK_LOOPS; i++) {
if (!rdrand_long(&tmp)) {
clear_cpu_cap(c, X86_FEATURE_RDRAND);
--
2.11.0

View File

@ -0,0 +1,34 @@
From 1629faac6cc01351c26608c3ea8c669c8b87459b Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 11 Feb 2016 11:06:26 -0600
Subject: [PATCH 19/22] reboot
---
kernel/reboot.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/reboot.c b/kernel/reboot.c
index eb6f8f6e..b367c1e0 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -309,7 +309,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
* halt when pm_power_off is not set do it the easy way.
*/
if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
- cmd = LINUX_REBOOT_CMD_HALT;
+ cmd = LINUX_REBOOT_CMD_RESTART;
mutex_lock(&reboot_mutex);
switch (cmd) {
@@ -326,7 +326,7 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
break;
case LINUX_REBOOT_CMD_HALT:
- kernel_halt();
+ kernel_restart(NULL);
do_exit(0);
panic("cannot halt");
--
2.11.0

View File

@ -0,0 +1,25 @@
From 9424467a5d1fb1f971076187c07c78a81e9cf661 Mon Sep 17 00:00:00 2001
From: Arjan van de Ven <arjan@linux.intel.com>
Date: Thu, 11 Feb 2016 11:07:54 -0600
Subject: [PATCH 20/22] no early modprobe
---
kernel/kmod.c | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 0277d121..20b5777f 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -557,7 +557,7 @@ int call_usermodehelper_exec(struct subprocess_info *sub_info, int wait)
DECLARE_COMPLETION_ONSTACK(done);
int retval = 0;
- if (!sub_info->path) {
+ if (!sub_info->path || system_state == SYSTEM_BOOTING) {
call_usermodehelper_freeinfo(sub_info);
return -EINVAL;
}
--
2.11.0

View File

@ -0,0 +1,28 @@
From f209324e2d027b432870b64847122e55ba805ee9 Mon Sep 17 00:00:00 2001
From: Dimitri John Ledkov <dimitri.j.ledkov@intel.com>
Date: Thu, 11 Feb 2016 13:14:53 -0600
Subject: [PATCH 22/22] Show restart information using info log
---
kernel/reboot.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/kernel/reboot.c b/kernel/reboot.c
index b367c1e0..cfd60137 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -217,9 +217,9 @@ void kernel_restart(char *cmd)
migrate_to_reboot_cpu();
syscore_shutdown();
if (!cmd)
- pr_emerg("Restarting system\n");
+ pr_info("Restarting system\n");
else
- pr_emerg("Restarting system with command '%s'\n", cmd);
+ pr_info("Restarting system with command '%s'\n", cmd);
kmsg_dump(KMSG_DUMP_RESTART);
machine_restart(cmd);
}
--
2.11.0

File diff suppressed because it is too large Load Diff

2
kernel/v4.9.56.sha256 Normal file
View File

@ -0,0 +1,2 @@
66df3d3c25d3627da87b612db9f0158b652d949b5e8b27ac38c6c5957fa2bf72 downloads/linux-4.9.56.tar
5b0d7ac640b6c1f20524cf6042ad39bb5d8f2184ca5ef7a9cd309e5b8a5c9b60 downloads/v4.9.56-unofficial_grsec-20171013093040.diff

9
src/devices/mod.rs Normal file
View File

@ -0,0 +1,9 @@
pub mod serial;
pub mod rtc;
pub mod virtio_9p;
pub mod virtio_serial;
pub mod virtio_rng;
pub use self::virtio_serial::VirtioSerial;
pub use self::virtio_9p::VirtioP9;
pub use self::virtio_rng::VirtioRandom;

116
src/devices/rtc.rs Normal file
View File

@ -0,0 +1,116 @@
use std::sync::{Arc,RwLock};
use std::mem;
use libc;
use vm::io::{IoDispatcher,IoPortOps};
const RTC_SECONDS: u8 = 0x00;
const RTC_MINUTES: u8 = 0x02;
const RTC_HOURS: u8 = 0x04;
const RTC_DAY_OF_WEEK: u8 = 0x06;
const RTC_DAY_OF_MONTH: u8 = 0x07;
const RTC_MONTH: u8 = 0x08;
const RTC_YEAR: u8 = 0x09;
const RTC_CENTURY: u8 = 0x32;
const RTC_REG_C: u8 = 0x0C;
const RTC_REG_D: u8 = 0x0D;
pub struct Rtc {
idx: u8,
data: [u8; 128]
}
impl IoPortOps for Rtc {
fn io_in(&mut self, port: u16, _size: usize) -> u32 {
if port == 0x0071 {
self.data_in() as u32
} else {
0
}
}
fn io_out(&mut self, port: u16, _size: usize, val: u32) {
if port == 0x0070 {
self.index_out(val as u8);
} else if port == 0x0071 {
self.data_out(val as u8)
}
}
}
impl Rtc {
pub fn register(io: Arc<IoDispatcher>) {
let rtc = Arc::new(RwLock::new(Rtc::new()));
io.register_ioports(0x0070, 2, rtc);
}
fn new() -> Rtc {
Rtc {
idx:0,
data: [0; 128]
}
}
fn index_out(&mut self, data: u8) {
let _nmi_disable = data & 0x80;
self.idx = data & 0x7f;
}
fn data_in(&mut self) -> u8 {
let now = RtcTime::now();
match self.idx {
RTC_SECONDS => now.seconds,
RTC_MINUTES => now.minutes,
RTC_HOURS => now.hours,
RTC_DAY_OF_WEEK => now.wday,
RTC_DAY_OF_MONTH => now.mday,
RTC_MONTH => now.month,
RTC_YEAR => now.year,
RTC_CENTURY => now.century,
_ => { self.data[self.idx as usize]},
}
}
fn data_out(&mut self, data: u8) {
if self.idx == RTC_REG_C || self.idx == RTC_REG_D {
return;
}
self.data[self.idx as usize] = data;
}
}
struct RtcTime {
seconds: u8,
minutes: u8,
hours: u8,
wday: u8,
mday: u8,
month: u8,
year: u8,
century: u8,
}
impl RtcTime {
fn now() -> RtcTime {
fn bcd(val: i32) -> u8 {
(((val/10) << 4) + (val % 10)) as u8
}
unsafe {
let mut tm: libc::tm = mem::zeroed();
let mut time: libc::time_t = 0;
libc::time(&mut time as *mut _);
libc::gmtime_r(&time, &mut tm as *mut _);
RtcTime {
seconds: bcd(tm.tm_sec),
minutes: bcd(tm.tm_min),
hours: bcd(tm.tm_hour),
wday: bcd(tm.tm_wday + 1),
mday: bcd(tm.tm_mday),
month: bcd(tm.tm_mon + 1),
year: bcd(tm.tm_year % 100),
century: bcd(tm.tm_year / 100),
}
}
}
}

313
src/devices/serial.rs Normal file
View File

@ -0,0 +1,313 @@
use std::sync::{Arc, RwLock};
use std::io::{self, Write};
use vm::io::{IoPortOps,IoDispatcher};
use kvm::Kvm;
const UART_TX: u16 = 0;
const UART_RX: u16 = 0;
const UART_IER: u16 = 1;
const UART_IER_RDI: u8 = 0x01;
const UART_IER_THRI: u8 = 0x02;
const UART_IIR: u16 = 2;
const UART_IIR_NO_INT: u8 = 0x01;
const UART_IIR_THRI: u8 = 0x02;
const UART_IIR_RDI: u8 = 0x04;
const UART_IIR_TYPE_BITS: u8 = 0xc0;
const UART_FCR: u16 = 2;
const UART_FCR_CLEAR_RCVR: u8 = 0x02;
const UART_FCR_CLEAR_XMIT: u8 = 0x04;
const UART_LCR: u16 = 3;
const UART_LCR_DLAB: u8 = 0x80;
const UART_MCR: u16 = 4;
const UART_MCR_LOOP: u8 = 0x10;
const UART_MCR_OUT2: u8 = 0x08;
const UART_LSR: u16 = 5;
const UART_LSR_TEMT: u8 = 0x40;
const UART_LSR_THRE: u8 = 0x20;
const UART_LSR_BI: u8 = 0x10;
const UART_LSR_DR: u8 = 0x01;
const UART_MSR: u16 = 6;
const UART_MSR_DCD: u8 = 0x80;
const UART_MSR_DSR: u8 = 0x20;
const UART_MSR_CTS: u8 = 0x10;
const UART_SCR: u16 = 7;
const FIFO_LEN: usize = 64;
trait Bits {
fn set(&mut self, flag: Self);
fn clear(&mut self, flag: Self);
fn is_set(&self, flag: Self) -> bool;
}
impl Bits for u8 {
fn set(&mut self, flag: u8) {
*self |= flag;
}
fn clear(&mut self, flag: u8) {
*self &= !flag;
}
fn is_set(&self, flag: u8) -> bool {
*self & flag == flag
}
}
pub struct SerialDevice {
iobase: u16,
kvm: Kvm,
irq: u8,
irq_state: u8,
txcnt: usize,
rxcnt: usize,
rxdone: usize,
txbuf: [u8; FIFO_LEN],
rxbuf: [u8; FIFO_LEN],
dll: u8,
dlm: u8,
iir: u8,
ier: u8,
fcr: u8,
lcr: u8,
mcr: u8,
lsr: u8,
msr: u8,
scr: u8,
}
impl IoPortOps for SerialDevice {
fn io_in(&mut self, port: u16, _size: usize) -> u32 {
let off = port - self.iobase;
self.serial_in(off) as u32
}
fn io_out(&mut self, port: u16, _size: usize, val: u32) {
let off = port - self.iobase;
self.serial_out(off, val as u8);
}
}
impl SerialDevice {
fn flush_tx(&mut self) {
self.lsr.set(UART_LSR_TEMT | UART_LSR_THRE);
if self.txcnt > 0 {
io::stdout().write(&self.txbuf[..self.txcnt]).unwrap();
self.txcnt = 0;
}
}
fn update_irq(&mut self) {
let mut iir = 0u8;
if self.lcr.is_set(UART_FCR_CLEAR_RCVR) {
self.lcr.clear(UART_FCR_CLEAR_RCVR);
self.rxcnt = 0;
self.rxdone = 0;
self.lsr.clear(UART_LSR_DR);
}
if self.lcr.is_set(UART_FCR_CLEAR_XMIT) {
self.lcr.clear(UART_FCR_CLEAR_XMIT);
self.txcnt = 0;
self.lsr.set(UART_LSR_TEMT|UART_LSR_THRE);
}
if self.ier.is_set(UART_IER_RDI) && self.lsr.is_set(UART_LSR_DR) {
iir |= UART_IIR_RDI;
}
if self.ier.is_set(UART_IER_THRI) && self.lsr.is_set(UART_LSR_TEMT) {
iir |= UART_IIR_THRI;
}
if iir == 0 {
self.iir = UART_IIR_NO_INT;
if self.irq_state != 0 {
self.kvm.irq_line(self.irq as u32, 0).unwrap();
}
} else {
self.iir = iir;
if self.irq_state == 0 {
self.kvm.irq_line(self.irq as u32, 1).unwrap();
}
}
self.irq_state = iir;
if !self.ier.is_set(UART_IER_THRI) {
self.flush_tx();
}
}
fn tx(&mut self, data: u8) {
if self.lcr.is_set(UART_LCR_DLAB) {
self.dll = data;
return;
}
if self.mcr.is_set(UART_MCR_LOOP) {
if self.rxcnt < FIFO_LEN {
self.rxbuf[self.rxcnt] = data;
self.rxcnt += 1;
self.lsr.set(UART_LSR_DR);
}
return;
}
if self.txcnt < FIFO_LEN {
self.txbuf[self.txcnt] = data;
self.txcnt += 1;
self.lsr.clear(UART_LSR_TEMT);
if self.txcnt == FIFO_LEN / 2 {
self.lsr.clear(UART_LSR_THRE);
}
self.flush_tx();
} else {
self.lsr.clear(UART_LSR_TEMT | UART_LSR_THRE);
}
}
fn serial_out(&mut self, port: u16, data: u8) {
match port {
UART_TX => {
self.tx(data);
},
UART_IER => {
if self.lcr.is_set(UART_LCR_DLAB) {
self.ier = data & 0x0f;
} else {
self.dlm = data;
}
},
UART_FCR => {
self.fcr = data;
},
UART_LCR => {
self.lcr = data;
},
UART_MCR => {
self.mcr = data;
},
UART_LSR => {},
UART_MSR => {},
UART_SCR => {
self.scr = data;
},
_ => {}
}
self.update_irq();
}
fn serial_in(&mut self, port: u16) -> u8 {
let mut data = 0u8;
match port {
UART_RX => {
if self.lcr.is_set(UART_LCR_DLAB) {
data = self.dll;
} else {
self.rx(&mut data);
}
},
UART_IER => {
if self.lcr.is_set(UART_LCR_DLAB) {
data = self.dlm;
} else {
data = self.ier;
}
},
UART_IIR => {
data = self.iir & UART_IIR_TYPE_BITS;
},
UART_LCR => {
data = self.lcr;
},
UART_MCR => {
data = self.mcr;
},
UART_LSR => {
data = self.lsr;
},
UART_MSR => {
data = self.msr;
},
UART_SCR => {
data = self.scr;
},
_ => {},
}
self.update_irq();
data
}
fn rx(&mut self, data: &mut u8) {
if self.rxdone == self.rxcnt {
return;
}
if self.lsr.is_set(UART_LSR_BI) {
self.lsr.clear(UART_LSR_BI);
*data = 0;
return;
}
*data = self.rxbuf[self.rxdone];
self.rxdone += 1;
if self.rxdone == self.rxcnt {
self.lsr.clear(UART_LSR_DR);
self.rxdone = 0;
self.rxcnt = 0;
}
}
pub fn register(kvm: Kvm, io: Arc<IoDispatcher>, id: u8) {
if let Some((base,irq)) = SerialDevice::base_irq_for_id(id) {
let dev = SerialDevice::new(kvm, base, irq);
io.register_ioports(base, 8, Arc::new(RwLock::new(dev)));
}
}
fn base_irq_for_id(id: u8) -> Option<(u16, u8)> {
match id {
0 => Some((0x3f8, 4)),
1 => Some((0x2f8, 3)),
2 => Some((0x3e8, 4)),
3 => Some((0x2e8, 3)),
_ => None,
}
}
fn new(kvm: Kvm, iobase: u16, irq: u8) -> SerialDevice {
SerialDevice {
iobase,
kvm,
irq,
irq_state: 0,
txcnt: 0,
rxcnt: 0,
rxdone:0,
txbuf: [0; FIFO_LEN],
rxbuf: [0; FIFO_LEN],
dll: 0,
dlm: 0,
iir: UART_IIR_NO_INT,
ier: 0,
fcr: 0,
lcr: 0,
mcr: UART_MCR_OUT2,
lsr: UART_LSR_TEMT | UART_LSR_THRE,
msr: UART_MSR_DCD | UART_MSR_DSR | UART_MSR_CTS,
scr: 0,
}
}
}

View File

@ -0,0 +1,404 @@
use std::path::PathBuf;
use std::io;
use std::path::Path;
use std::fs;
use libc;
use memory::GuestRam;
use super::pdu::{PduParser,P9Attr};
use super::fid::FidCache;
use super::filesystem::{FileSystem,FsTouch,FileSystemOps};
const P9_TSTATFS: u8 = 8;
const P9_TLOPEN: u8 = 12;
const P9_TLCREATE: u8 = 14;
const P9_TSYMLINK: u8 = 16;
//const P9_TMKNOD: u8 = 18;
//const P9_TRENAME: u8 = 20;
const P9_TREADLINK: u8 = 22;
const P9_TGETATTR: u8 = 24;
const P9_TSETATTR: u8 = 26;
const P9_TXATTRWALK: u8 = 30;
const P9_TXATTRCREATE: u8 = 32;
const P9_TREADDIR: u8 = 40;
const P9_TFSYNC: u8 = 50;
const P9_TLOCK: u8 = 52;
const P9_TGETLOCK: u8 = 54;
//const P9_TLINK: u8 = 70;
//const P9_TMKDIR: u8 = 72;
//const P9_TRENAMEAT: u8 = 74;
//const P9_TUNLINKAT: u8 = 76;
const P9_TVERSION:u8 = 100;
const P9_TATTACH :u8 = 104;
//const P9_TFLUSH: u8 = 108;
const P9_TWALK :u8 = 110;
const P9_TREAD: u8 = 116;
//const P9_TWRITE: u8 = 118;
const P9_TCLUNK: u8 = 120;
//const P9_REMOVE: u8 = 122;
const P9_LOCK_SUCCESS:u32 = 0;
const F_UNLCK: u8 = 2;
const P9_VERSION_DOTL:&str = "9P2000.L";
pub struct Commands {
filesystem: FileSystem,
fids: FidCache,
root_dir: PathBuf,
_memory: GuestRam,
}
impl Commands {
pub fn new(root_dir: PathBuf, init_path: PathBuf, memory: GuestRam) -> Commands {
let fsys = FileSystem::new(root_dir.clone(), init_path,true);
Commands {
filesystem: fsys.clone(),
fids: FidCache::new(fsys.clone()),
root_dir, _memory: memory,
}
}
fn handle_io_result(&self, cmd: u8, result: io::Result<()>) {
match result {
Ok(()) => (),
Err(e) => println!("io error in 9p command {} processing: {:?}",cmd, e),
}
}
pub fn handle(&mut self, pp: &mut PduParser) {
match pp.command() {
Ok(cmd) => {
let res = self.dispatch(cmd, pp);
self.handle_io_result(cmd,res);
},
Err(e) => self.handle_io_result(0,Err(e)),
}
}
fn dispatch(&mut self, cmd: u8, pp: &mut PduParser) -> io::Result<()> {
match cmd {
P9_TSTATFS => self.p9_statfs(pp)?,
P9_TLOPEN => self.p9_open(pp)?,
P9_TLCREATE => self.p9_create(pp)?,
P9_TSYMLINK => self.p9_symlink(pp)?,
//P9_TMKNOD => self.p9_mknod(pp)?,
//P9_TRENAME => self.p9_rename(pp)?,
P9_TREADLINK => self.p9_readlink(pp)?,
P9_TGETATTR => self.p9_getattr(pp)?,
P9_TSETATTR => self.p9_setattr(pp)?,
P9_TXATTRWALK => self.p9_unsupported(pp)?,
P9_TXATTRCREATE => self.p9_unsupported(pp)?,
P9_TREADDIR => self.p9_readdir(pp)?,
P9_TFSYNC => self.p9_fsync(pp)?,
P9_TLOCK => self.p9_lock(pp)?,
P9_TGETLOCK => self.p9_getlock(pp)?,
//P9_TLINK => self.p9_link(pp)?,
//P9_TMKDIR=> self.p9_mkdir(pp)?,
//P9_TRENAMEAT => self.p9_renameat(pp)?,
//P9_UNLINKAT => self.p9_unlinkat(pp)?,
P9_TVERSION => self.p9_version(pp)?,
P9_TATTACH => self.p9_attach(pp)?,
//P9_FLUSH => self.p9_flush(pp)?,
P9_TWALK => self.p9_walk(pp)?,
P9_TREAD => self.p9_read(pp)?,
//P9_WRITE => self.p9_write(pp)?,
P9_TCLUNK => self.p9_clunk(pp)?,
//P9_REMOVE => self.p9_remove(pp)?,
n => println!("unhandled 9p command: {}", n),
}
Ok(())
}
fn p9_unsupported(&self, pp: &mut PduParser) -> io::Result<()> {
pp.read_done()?;
pp.bail_err(io::Error::from_raw_os_error(libc::EOPNOTSUPP))
}
fn p9_statfs(&mut self, pp: &mut PduParser) -> io::Result<()> {
let fid = pp.r32()?;
pp.read_done()?;
match self.fids.statfs(fid) {
Ok(statfs) => {
pp.write_statfs(statfs)?;
pp.write_done()
},
Err(err) => pp.bail_err(err),
}
}
fn p9_version(&self, pp: &mut PduParser) -> io::Result<()> {
let msize = pp.r32()?;
let version = pp.read_string()?;
pp.read_done()?;
pp.w32(msize)?;
if version == P9_VERSION_DOTL {
pp.write_string(&version)?;
} else {
pp.write_string("unknown")?;
}
pp.write_done()
}
fn p9_attach(&mut self, pp: &mut PduParser) -> io::Result<()> {
let fid_val = pp.r32()?;
let _afid = pp.r32()?;
let _uname = pp.read_string()?;
let _aname = pp.read_string()?;
let uid = pp.r32()?;
pp.read_done()?;
self.fids.with_fid_mut(fid_val, |fid| {
fid.uid = uid;
fid.path.push("/");
});
match fs::metadata(&self.root_dir) {
Ok(ref meta) => {
pp.write_qid(meta)?;
pp.write_done()
}
Err(e) => pp.bail_err(e),
}
}
fn p9_open(&mut self, pp: &mut PduParser) -> io::Result<()> {
let fid = pp.r32()?;
let flags = pp.r32()?;
pp.read_done()?;
if let Err(err) = self.fids.open(fid, flags) {
return pp.bail_err(err);
}
let meta = match self.fids.metadata(fid) {
Ok(meta) => meta,
Err(err) => {
return pp.bail_err(err);
}
};
pp.write_qid(&meta)?;
// XXX iounit goes here
pp.w32(0)?;
pp.write_done()
}
fn p9_create(&mut self, pp: &mut PduParser) -> io::Result<()> {
let dfid = pp.r32()?;
let name = pp.read_string()?;
let flags = pp.r32()?;
let mode = pp.r32()?;
let gid = pp.r32()?;
pp.read_done()?;
match self.fids.create(dfid, name, flags, mode, gid) {
Ok(meta) => {
pp.write_statl(&meta)?;
pp.write_done()?;
},
Err(err) => return pp.bail_err(err),
}
Ok(())
}
fn p9_symlink(&mut self, pp: &mut PduParser) -> io::Result<()> {
let _fid = pp.r32()?;
let _name = pp.read_string()?;
let _old_path = pp.read_string()?;
let _gid = pp.r32()?;
pp.read_done()?;
// XXX
pp.write_done()
}
fn p9_read(&mut self, pp: &mut PduParser) -> io::Result<()> {
let id = pp.r32()?;
let off = pp.r64()?;
let cnt = pp.r32()?;
pp.read_done()?;
// space for size field
pp.w32(0)?;
match self.fids.fid_mut(id).read(off, cnt as usize, pp) {
Ok(nread) => {
// write nread in space reserved earlier
pp.w32_at(0, nread as u32);
pp.write_done()?;
}
Err(err) => {
println!("oops error on read: {:?}", err);
return pp.bail_err(err)
},
};
Ok(())
}
fn p9_readdir(&mut self, pp: &mut PduParser) -> io::Result<()> {
let id = pp.r32()?;
let off = pp.r64()?;
let cnt = pp.r32()?;
pp.read_done()?;
self.fids.readdir(id,off, cnt as usize, pp)
}
fn p9_clunk(&mut self, pp: &mut PduParser) -> io::Result<()> {
let id = pp.r32()?;
pp.read_done()?;
self.fids.clunk(id);
pp.write_done()
}
fn p9_readlink(&mut self, pp: &mut PduParser) -> io::Result<()> {
let id = pp.r32()?;
pp.read_done()?;
let link = self.fids.readlink(id)?;
pp.write_os_string(&link)?;
pp.write_done()
}
fn p9_getattr(&mut self, pp: &mut PduParser) -> io::Result<()> {
let id = pp.r32()?;
let _mask = pp.r64()?;
pp.read_done()?;
let meta = match self.fids.metadata(id) {
Ok(meta) => meta,
Err(e) => return pp.bail_err(e),
};
pp.write_statl(&meta)?;
pp.write_done()
}
fn do_setattr(&mut self, fid: u32, attr: P9Attr) -> io::Result<()> {
if attr.has_mode() {
self.fids.chmod(fid, attr.mode())?
}
if attr.has_atime() {
if attr.has_atime_set() {
self.fids.touch(fid, FsTouch::Atime,attr.atime())?
} else {
self.fids.touch(fid, FsTouch::AtimeNow,(0,0))?
}
}
if attr.has_mtime() {
if attr.has_mtime_set() {
self.fids.touch(fid, FsTouch::Mtime,attr.mtime())?
} else {
self.fids.touch(fid, FsTouch::MtimeNow,(0,0))?
}
}
if attr.has_chown() {
let (uid, gid) = attr.chown_ids();
self.fids.chown(fid, uid, gid)?;
}
if attr.has_size() {
self.fids.truncate(fid, attr.size())?;
}
Ok(())
}
fn p9_setattr(&mut self, pp: &mut PduParser) -> io::Result<()> {
let fid = pp.r32()?;
let attr = pp.read_attr()?;
pp.read_done()?;
if let Err(err) = self.do_setattr(fid, attr) {
return pp.bail_err(err)
}
pp.write_done()
}
// XXX look at walk in qemu
fn p9_walk(&mut self, pp: &mut PduParser) -> io::Result<()> {
let fid_id = pp.r32()?;
let new_fid_id = pp.r32()?;
let nwname = pp.r16()?;
self.fids.dup_fid(fid_id, new_fid_id);
let mut cur = self.fids.fid(new_fid_id).path.clone();
let mut metalist = Vec::new();
for _ in 0..nwname {
let s = pp.read_string()?;
let p = Path::new(&s);
if p.components().count() != 1 {
println!("uh...");
}
cur.push(p);
match self.filesystem.stat(&cur) {
Ok(m) => metalist.push(m),
Err(e) => {
pp.read_done()?;
return pp.bail_err(e)
},
}
}
self.fids.with_fid_mut(new_fid_id, |fid| {
fid.path = cur;
});
pp.read_done()?;
pp.w16(metalist.len() as u16)?;
for meta in metalist {
pp.write_qid(&meta)?;
}
pp.write_done()
}
fn p9_fsync(&mut self, pp: &mut PduParser) -> io::Result<()> {
let fid = pp.r32()?;
let dsync = pp.r32()?;
pp.read_done()?;
if let Err(err) = self.fids.fsync(fid, dsync != 0) {
return pp.bail_err(err);
}
pp.write_done()
}
fn p9_lock(&mut self, pp: &mut PduParser) -> io::Result<()> {
let _ = pp.r32()?;
let _ = pp.r8()?;
let _ = pp.r32()?;
let _ = pp.r64()?;
let _ = pp.r64()?;
let _ = pp.r32()?;
let _ = pp.read_string()?;
pp.read_done()?;
pp.w32(P9_LOCK_SUCCESS)?;
pp.write_done()
}
fn p9_getlock(&mut self, pp: &mut PduParser) -> io::Result<()> {
let _fid = pp.r32()?;
let _type = pp.r8()?;
let glock_start = pp.r64()?;
let glock_len = pp.r64()?;
let glock_proc_id = pp.r32()?;
let glock_client_id = pp.read_string()?;
pp.read_done()?;
pp.w8(F_UNLCK)?;
pp.w64(glock_start)?;
pp.w64(glock_len)?;
pp.w32(glock_proc_id)?;
pp.write_string(&glock_client_id)?;
pp.write_done()
}
}

View File

@ -0,0 +1,230 @@
use std::fs::Metadata;
use std::collections::HashMap;
use std::path::PathBuf;
use std::io::{self, Seek,Write};
use std::os::unix::io::AsRawFd;
use std::ffi::OsString;
use libc;
use super::pdu::PduParser;
use super::readdir::DirEntry;
use super::filesystem::{FileSystem,FileDescriptor,StatFs,FileSystemOps,FsTouch};
pub struct FidCache {
filesystem: FileSystem,
fidmap: HashMap<u32, Fid>,
}
impl FidCache {
pub fn new(filesystem: FileSystem) -> FidCache {
FidCache {
filesystem,
fidmap: HashMap::new(),
}
}
fn add_if_absent(&mut self, id: u32) {
if !self.fidmap.contains_key(&id) {
self.fidmap.insert(id, Fid::new());
}
}
pub fn fid(&mut self, id: u32) -> &Fid {
self.add_if_absent(id);
self.fidmap.get(&id).expect("fidmap does not have element")
}
pub fn _fid(&self, id: u32) -> &Fid {
self.fidmap.get(&id).expect("fidmap does not have element")
}
pub fn fid_mut(&mut self, id: u32) -> &mut Fid {
self.add_if_absent(id);
self.fidmap.get_mut(&id).expect("fidmap does not have element")
}
pub fn with_fid_mut<F,U>(&mut self, id: u32, f: F) -> U
where F: FnOnce(&mut Fid) -> U {
self.add_if_absent(id);
f(self.fid_mut(id))
}
#[allow(dead_code)]
pub fn with_fid<F,U>(&mut self, id: u32, f: F) -> U
where F: FnOnce(&Fid) -> U {
self.add_if_absent(id);
f(self.fid(id))
}
pub fn dup_fid(&mut self, old_id: u32, new_id: u32) {
self.fid_mut(new_id).path = self.fid(old_id).path.clone();
self.fid_mut(new_id).uid = self.fid(old_id).uid;
}
pub fn clunk(&mut self, id: u32) {
match self.fidmap.remove(&id) {
Some(ref mut fid) => fid.close(),
None => (),
}
}
pub fn open(&mut self, id: u32, flags: u32) -> io::Result<()> {
let path = self.fid(id).path.clone();
let fd = self.filesystem.open(&path, flags)?;
self.fid_mut(id).desc = fd;
Ok(())
}
fn fid_dir_join(&mut self, id: u32, name: &str) -> io::Result<PathBuf> {
let meta = self.metadata(id)?;
if !meta.is_dir() {
return Err(io::Error::from_raw_os_error(libc::EBADF));
}
let fname = PathBuf::from(name);
if fname.is_absolute() || fname.components().count() != 1 {
return Err(io::Error::from_raw_os_error(libc::EINVAL));
}
let mut path = self.fid(id).path.clone();
path.push(fname);
Ok(path)
}
pub fn create(&mut self, id: u32, name: String, flags: u32, mode: u32, gid: u32) -> io::Result<Metadata> {
let path = self.fid_dir_join(id,&name)?;
self.filesystem.create(&path, flags, mode)?;
let uid = self.fid(id).uid;
self.filesystem.chown(&path, uid, gid)?;
self.filesystem.stat(&path)
}
pub fn readlink(&mut self, id: u32) -> io::Result<OsString> {
let path = self.fid(id).path.clone();
self.filesystem.readlink(&path)
}
pub fn metadata(&mut self, id: u32) -> io::Result<Metadata> {
let path = self.fid(id).path.clone();
self.filesystem.stat(&path)
}
pub fn readdir(&mut self, id: u32, off: u64, len: usize, pp: &mut PduParser) -> io::Result<()> {
//let is_dir = self.fid(id).desc.is_dir();
if off != 0 {
//self.fid_mut(id).desc.borrow_dir().unwrap().seek(off as i64);
}
self.fid_mut(id).readdir(len, pp)
}
pub fn chmod(&mut self, id: u32, mode: u32) -> io::Result<()> {
let path = self.fid(id).path.clone();
self.filesystem.chmod(&path, mode)
}
pub fn chown(&mut self, id: u32, uid: u32, gid: u32) -> io::Result<()> {
let path = self.fid(id).path.clone();
self.filesystem.chown(&path, uid, gid)
}
pub fn touch(&mut self, id: u32, which: FsTouch, tv: (u64,u64)) -> io::Result<()> {
let path = self.fid(id).path.clone();
self.filesystem.touch(&path, which, tv)
}
pub fn truncate(&mut self, _id: u32, _size: u64) -> io::Result<()> {
Ok(())
}
pub fn statfs(&mut self, fid: u32) -> io::Result<StatFs> {
let path = self.fid(fid).path.clone();
self.filesystem.statfs(&path)
}
pub fn fsync(&mut self, fid: u32, datasync: bool) -> io::Result<()> {
match self.fid(fid).desc {
FileDescriptor::File(ref file) => {
let fd = file.as_raw_fd();
unsafe {
let res = if datasync {
libc::fdatasync(fd)
} else {
libc::fsync(fd)
};
if res < 0 {
return Err(io::Error::last_os_error());
}
}
},
FileDescriptor::Dir(ref dir) => { return dir.fsync(); },
FileDescriptor::None => { return Err(io::Error::from_raw_os_error(libc::EBADF))},
};
Ok(())
}
}
pub struct Fid {
pub uid: u32,
pub path: PathBuf,
desc: FileDescriptor,
}
impl Fid {
fn new() -> Fid {
Fid {
uid: 0, path: PathBuf::new(), desc: FileDescriptor::None,
}
}
pub fn read(&mut self, offset: u64, len: usize, pp: &mut PduParser) -> io::Result<(usize)> {
self.desc.borrow_file()?.seek(io::SeekFrom::Start(offset))?;
pp.chain.copy_from_reader(self.desc.borrow_file()?, len)
}
fn dirent_len(dent: &DirEntry) -> usize {
// qid + offset + type + strlen + str
return 13 + 8 + 1 + 2 + dent.name_bytes().len()
}
fn write_dirent(dent: &DirEntry, pp: &mut PduParser) -> io::Result<()> {
pp.write_qid_path_only(dent.ino())?;
pp.w64(dent.offset())?;
pp.w8(dent.file_type())?;
pp.w16(dent.name_bytes().len() as u16)?;
pp.chain.write(&dent.name_bytes())?;
Ok(())
}
pub fn readdir(&mut self, len: usize, pp: &mut PduParser) -> io::Result<()> {
let mut write_len = 0_usize;
pp.w32(0)?;
while let Some(entry) = self.desc.borrow_dir()?.next() {
match entry {
Ok(ref dent) => {
let dlen = Fid::dirent_len(dent);
if write_len + dlen > len {
self.desc.borrow_dir()?.restore_last_pos();
break;
}
write_len += dlen;
Fid::write_dirent(dent, pp)?;
}
Err(err) => return pp.bail_err(err),
}
}
pp.w32_at(0, write_len as u32);
pp.write_done()
}
pub fn close(&mut self) {
self.desc = FileDescriptor::None;
}
}

View File

@ -0,0 +1,412 @@
use std::mem;
use std::ffi::CString;
use std::ffi::OsString;
use std::os::unix::ffi::OsStrExt;
use std::fs::{self,File,Metadata,OpenOptions};
use std::io;
use std::path::{PathBuf,Path,Component};
use std::os::unix::fs::OpenOptionsExt;
use libc;
use super::readdir::ReadDir;
const MAX_SYMLINKS: usize = 16;
const PATH_MAX: usize = 1024; // it's actually 4096 on linux
const O_RDONLY: u32 = 0;
const O_WRONLY: u32 = 1;
const O_RDWR: u32 = 2;
const O_ACCMODE: u32 = 0x3;
const ALLOWED_FLAGS: u32 = (libc::O_APPEND | libc::O_TRUNC | libc::O_LARGEFILE
| libc::O_DIRECTORY | libc::O_DSYNC | libc::O_NOFOLLOW
| libc::O_SYNC) as u32;
#[derive(Default)]
pub struct StatFs {
pub f_type: u32,
pub f_bsize: u32,
pub f_blocks: u64,
pub f_bfree: u64,
pub f_bavail: u64,
pub f_files: u64,
pub f_ffree: u64,
pub fsid: u64,
pub f_namelen: u32,
}
impl StatFs {
fn new() -> StatFs {
StatFs { ..Default::default() }
}
}
pub enum FsTouch {
Atime,
AtimeNow,
Mtime,
MtimeNow,
}
pub trait FileSystemOps {
fn open(&self, path: &Path, flags: u32) -> io::Result<FileDescriptor>;
fn open_dir(&self, path: &Path) -> io::Result<FileDescriptor>;
fn create(&self, path: &Path, flags: u32, mode: u32) -> io::Result<FileDescriptor>;
fn stat(&self, path: &Path) -> io::Result<Metadata>;
fn statfs(&self, path: &Path) -> io::Result<StatFs>;
fn chown(&self, path: &Path, uid: u32, gid: u32) -> io::Result<()>;
fn chmod(&self, path: &Path, mode: u32) -> io::Result<()>;
fn touch(&self, path: &Path, which: FsTouch, tv: (u64, u64)) -> io::Result<()>;
fn truncate(&self, path: &Path, size: u64) -> io::Result<()>;
fn readlink(&self, path: &Path) -> io::Result<OsString>;
// fn symlink(&self, target: &Path, linkpath: &Path) -> io::Result<()>;
}
#[derive(Clone)]
pub struct FileSystem {
init_path: PathBuf,
resolver: PathResolver,
readonly: bool,
}
pub enum FileDescriptor {
None,
Dir(ReadDir),
File(File),
}
impl FileDescriptor {
#[allow(dead_code)]
pub fn is_file(&self) -> bool {
match *self {
FileDescriptor::File(..) => true,
_ => false,
}
}
#[allow(dead_code)]
pub fn is_dir(&self) -> bool {
match *self {
FileDescriptor::Dir(..) => true,
_ => false,
}
}
pub fn borrow_file(&mut self) -> io::Result<&mut File> {
match *self {
FileDescriptor::File(ref mut file_ref) => Ok(file_ref),
_ => Err(os_err(libc::EBADF)),
}
}
pub fn borrow_dir(&mut self) -> io::Result<&mut ReadDir> {
match *self {
FileDescriptor::Dir(ref mut dir_ref) => Ok(dir_ref),
_ => Err(os_err(libc::EBADF)),
}
}
}
impl FileSystem {
pub fn new(root: PathBuf, init_path: PathBuf, readonly: bool) -> FileSystem {
FileSystem { resolver: PathResolver::new(root), init_path, readonly }
}
fn fullpath(&self, path: &Path) -> io::Result<PathBuf> {
if path.to_str().unwrap() == "/phinit" {
return Ok(self.init_path.clone())
}
self.resolver.fullpath(path)
}
fn flags_to_open_options(&self, flags: u32) -> io::Result<OpenOptions> {
let acc = flags & O_ACCMODE;
let mut oo = OpenOptions::new();
if self.readonly && acc != O_RDONLY {
return Err(io::Error::from_raw_os_error(libc::EACCES));
}
match acc {
O_RDONLY => { oo.read(true).write(false); }
O_WRONLY => { oo.read(false).write(true); }
O_RDWR => { oo.read(true).write(true); }
_ => return Err(os_err(libc::EINVAL))
}
// There should never be a symlink in path but add O_NOFOLLOW anyways
let custom = libc::O_NOFOLLOW | (flags & ALLOWED_FLAGS) as i32;
oo.custom_flags(custom);
Ok(oo)
}
}
///
/// Resolves paths into a canonical path which is always no higher
/// than the `root` path.
#[derive(Clone)]
struct PathResolver {
root: PathBuf,
}
impl PathResolver {
fn new(root: PathBuf) -> PathResolver {
// root must be absolute path
PathResolver{ root }
}
///
/// Canonicalize `path` so that .. segments in both in
/// the path itself and any symlinks in the path do
/// not escape. The returned path will not contain any
/// symlinks and refers to a path which is a subdirectory
/// of `self.root`
fn resolve_path(&self, path: &Path) -> io::Result<PathBuf> {
let mut buf = PathBuf::from(path);
let mut nlinks = 0_usize;
while self._resolve(&mut buf)? {
nlinks += 1;
if nlinks > MAX_SYMLINKS {
return Err(io::Error::from_raw_os_error(libc::ELOOP))
}
if buf.as_os_str().len() > PATH_MAX {
return Err(io::Error::from_raw_os_error(libc::ENAMETOOLONG))
}
}
Ok(buf)
}
fn is_path_symlink(path: &Path) -> bool {
match path.symlink_metadata() {
Ok(meta) => meta.file_type().is_symlink(),
Err(..) => false
}
}
fn fullpath(&self, path: &Path) -> io::Result<PathBuf> {
let resolved = self.resolve_path(path)?;
Ok(self.realpath(&resolved))
}
fn realpath(&self, path: &Path) -> PathBuf {
let mut cs = path.components();
if path.is_absolute() {
cs.next();
}
self.root.join(cs.as_path())
}
fn resolve_symlink(&self, path: &mut PathBuf) -> io::Result<bool> {
let realpath = self.realpath(path);
if PathResolver::is_path_symlink(&realpath) {
path.pop();
path.push(realpath.read_link()?);
return Ok(true)
}
Ok(false)
}
fn resolve_component(&self, c: Component, pathbuf: &mut PathBuf) -> io::Result<bool> {
match c {
Component::RootDir => pathbuf.push("/"),
Component::CurDir | Component::Prefix(..) => (),
Component::ParentDir => { pathbuf.pop(); },
Component::Normal(name) => {
pathbuf.push(name);
let link = self.resolve_symlink(pathbuf)?;
return Ok(link)
}
};
Ok(false)
}
fn _resolve(&self, path: &mut PathBuf) -> io::Result<bool> {
let copy = (*path).clone();
let mut components = copy.components();
path.push("/");
while let Some(c) = components.next() {
if self.resolve_component(c, path)? {
let tmp = path.join(components.as_path());
path.push(tmp);
return Ok(true)
}
}
Ok(false)
}
}
fn cstr(path: &Path) -> io::Result<CString> {
Ok(CString::new(path.as_os_str().as_bytes())?)
}
impl FileSystemOps for FileSystem {
fn open(&self, path: &Path, flags: u32) -> io::Result<FileDescriptor> {
let fullpath = self.fullpath(path)?;
let meta = fullpath.metadata()?;
if meta.is_dir() {
let read_dir = ReadDir::open(&fullpath)?;
return Ok(FileDescriptor::Dir(read_dir))
}
let options = self.flags_to_open_options(flags)?;
let file = options.open(&fullpath)?;
return Ok(FileDescriptor::File(file))
}
fn create(&self, path: &Path, flags: u32, mode: u32) -> io::Result<FileDescriptor> {
let fullpath = self.fullpath(path)?;
let mut options = self.flags_to_open_options(flags)?;
options.create(true);
options.mode(mode & 0o777);
let file = options.open(&fullpath)?;
return Ok(FileDescriptor::File(file))
}
fn open_dir(&self, path: &Path) -> io::Result<FileDescriptor> {
let fullpath = self.fullpath(path)?;
let read_dir = ReadDir::open(&fullpath)?;
return Ok(FileDescriptor::Dir(read_dir))
}
fn stat(&self, path: &Path) -> io::Result<Metadata> {
let fullpath = self.fullpath(path)?;
let meta = fullpath.metadata()?;
Ok(meta)
}
fn statfs(&self, path: &Path) -> io::Result<StatFs> {
let fullpath = self.fullpath(path)?;
let path_cstr = cstr(&fullpath)?;
let mut stat: LibcStatFs;
unsafe {
stat = mem::zeroed();
let ret = statfs(path_cstr.as_ptr(), &mut stat);
if ret < 0 {
return Err(io::Error::last_os_error());
}
}
let mut statfs = StatFs::new();
statfs.f_type = stat.f_type as u32;
statfs.f_bsize = stat.f_bsize as u32;
statfs.f_blocks = stat.f_blocks;
statfs.f_bfree = stat.f_bfree;
statfs.f_bavail = stat.f_bavail;
statfs.f_files = stat.f_files;
statfs.f_ffree = stat.f_ffree;
statfs.f_namelen = stat.f_namelen as u32;
statfs.fsid = stat.f_fsid.val[0] as u64 | ((stat.f_fsid.val[1] as u64) << 32);
Ok(statfs)
}
fn chown(&self, path: &Path, uid: u32, gid: u32) -> io::Result<()> {
let fullpath = self.fullpath(path)?;
let path_cstr = cstr(&fullpath)?;
unsafe {
if libc::chown(path_cstr.as_ptr(), uid, gid) < 0 {
return Err(io::Error::last_os_error());
}
Ok(())
}
}
fn chmod(&self, path: &Path, mode: u32) -> io::Result<()> {
// XXX see std::os::unix::fs::PermissionsExt for a better way
let fullpath = self.fullpath(path)?;
let path_cstr = cstr(&fullpath)?;
unsafe {
if libc::chmod(path_cstr.as_ptr(), mode) < 0 {
return Err(io::Error::last_os_error());
}
Ok(())
}
}
fn touch(&self, path: &Path, which: FsTouch, tv: (u64, u64)) -> io::Result<()> {
let fullpath = self.fullpath(path)?;
let path_cstr = cstr(&fullpath)?;
let tval = libc::timespec {
tv_sec: tv.0 as i64,
tv_nsec: tv.1 as i64,
};
let omit = libc::timespec {
tv_sec: 0,
tv_nsec: libc::UTIME_OMIT,
};
let now = libc::timespec {
tv_sec: 0,
tv_nsec: libc::UTIME_NOW,
};
let times = match which {
FsTouch::Atime => [tval, omit],
FsTouch::AtimeNow => [ now, omit ],
FsTouch::Mtime => [omit, tval ],
FsTouch::MtimeNow => [omit, now],
};
unsafe {
// XXX this could be wildly wrong but libc has wrong type
if libc::utimensat(-1, path_cstr.as_ptr(), &times.as_ptr() as *const _ as *const libc::timespec, 0) < 0 {
return Err(io::Error::last_os_error());
}
}
Ok(())
}
fn truncate(&self, path: &Path, size: u64) -> io::Result<()> {
let fullpath = self.fullpath(path)?;
let path_cstr = cstr(&fullpath)?;
unsafe {
if libc::truncate64(path_cstr.as_ptr(), size as i64) < 0 {
return Err(io::Error::last_os_error());
}
}
Ok(())
}
// XXX
fn readlink(&self, path: &Path) -> io::Result<OsString> {
let fullpath = self.fullpath(path)?;
fs::read_link(&fullpath).map(|pbuf| pbuf.into_os_string())
}
}
#[repr(C)]
pub struct LibcStatFs {
f_type: u64,
f_bsize: u64,
f_blocks: u64,
f_bfree: u64,
f_bavail: u64,
f_files: u64,
f_ffree: u64,
f_fsid: FsidT,
f_namelen: u64,
f_frsize: u64,
f_spare: [u64; 5],
}
#[repr(C)]
struct FsidT{
val: [libc::c_int; 2],
}
extern {
pub fn statfs(path: *const libc::c_char, buf: *mut LibcStatFs) -> libc::c_int;
}
fn os_err(errno: i32) -> io::Error {
io::Error::from_raw_os_error(errno)
}

View File

@ -0,0 +1,90 @@
use std::sync::{Arc,RwLock};
use std::thread;
use std::path::{Path,PathBuf};
use memory::GuestRam;
use virtio::{self,VirtioBus,VirtioDeviceOps, VirtQueue};
use vm::Result;
mod fid;
mod pdu;
mod commands;
mod readdir;
mod filesystem;
use self::pdu::PduParser;
use self::commands::Commands;
const VIRTIO_ID_9P: u16 = 9;
const VIRTIO_9P_MOUNT_TAG: u64 = 0x1;
pub struct VirtioP9 {
root_dir: PathBuf,
init_path: PathBuf,
feature_bits: u64,
config: Vec<u8>,
}
impl VirtioP9 {
fn create_config(tag_name: &str) -> Vec<u8> {
let tag_len = tag_name.len() as u16;
let mut config = Vec::with_capacity(tag_name.len() + 3);
config.push(tag_len as u8);
config.push((tag_len >> 8) as u8);
config.append(&mut tag_name.as_bytes().to_vec());
config.push(0);
config
}
fn new(tag_name: &str, root_dir: &str, init_path: &Path) -> Arc<RwLock<VirtioP9>> {
Arc::new(RwLock::new(VirtioP9 {
root_dir: PathBuf::from(root_dir),
init_path: init_path.to_path_buf(),
feature_bits: 0,
config: VirtioP9::create_config(tag_name),
}))
}
pub fn create(vbus: &mut VirtioBus, tag_name: &str, root_dir: &str, init_path: &Path) -> Result<()> {
vbus.new_virtio_device(VIRTIO_ID_9P, VirtioP9::new(tag_name, root_dir, init_path))
.set_num_queues(1)
.set_features(VIRTIO_9P_MOUNT_TAG)
.set_config_size(tag_name.len() + 3)
.register()
}
}
impl VirtioDeviceOps for VirtioP9 {
fn reset(&mut self) {
println!("Reset called");
}
fn enable_features(&mut self, bits: u64) -> bool {
self.feature_bits = bits;
true
}
fn read_config(&mut self, offset: usize, size: usize) -> u64 {
virtio::read_config_buffer(&self.config, offset, size)
}
fn start(&mut self, memory: GuestRam, mut queues: Vec<VirtQueue>) {
let vq = queues.pop().unwrap();
let root_dir = self.root_dir.clone();
let init_path = self.init_path.clone();
thread::spawn(|| run_device(memory, vq, root_dir, init_path));
}
}
fn run_device(memory: GuestRam, vq: VirtQueue, root_dir: PathBuf, init_path: PathBuf) {
let mut commands = Commands::new(root_dir,init_path,memory.clone());
vq.on_each_chain(|mut chain| {
let mut pp = PduParser::new(&mut chain, memory.clone());
commands.handle(&mut pp);
});
}

View File

@ -0,0 +1,322 @@
use std::fs::Metadata;
const P9_RLERROR: u8 = 7;
use byteorder::{LittleEndian,ReadBytesExt,WriteBytesExt};
use std::io::{self,Read,Write};
use std::os::linux::fs::MetadataExt;
use std::os::unix::ffi::OsStrExt;
use std::ffi::OsStr;
use memory::GuestRam;
use virtio::Chain;
use super::filesystem::StatFs;
use libc;
const P9_STATS_BASIC: u64 = 0x000007ff;
const P9_HEADER_LEN: usize = 7;
const P9_QTFILE: u8 = 0x00;
const P9_QTLINK: u8 = 0x01;
const _P9_QTSYMLINK: u8 = 0x02;
const P9_QTDIR: u8 = 0x80;
pub struct PduParser<'a> {
memory: GuestRam,
pub chain: &'a mut Chain,
size: u32,
cmd: u8,
tag: u16,
reply_start_addr: u64,
}
#[derive(Default)]
pub struct P9Attr {
valid: u32,
mode: u32,
uid: u32,
gid: u32,
size: u64,
atime_sec: u64,
atime_nsec: u64,
mtime_sec: u64,
mtime_nsec: u64,
}
impl P9Attr {
const MODE: u32 = (1 << 0);
const UID: u32 = (1 << 1);
const GID: u32 = (1 << 2);
const SIZE: u32 = (1 << 3);
const ATIME: u32 = (1 << 4);
const MTIME: u32 = (1 << 5);
const CTIME: u32 = (1 << 6);
const ATIME_SET: u32 = (1 << 7);
const MTIME_SET: u32 = (1 << 8);
const MASK: u32 = 127;
const NO_UID: u32 = 0xFFFFFFFF;
fn new() -> P9Attr {
P9Attr { ..Default::default() }
}
fn is_valid(&self, flag: u32) -> bool {
self.valid & flag != 0
}
pub fn has_mode(&self) -> bool { self.is_valid(P9Attr::MODE) }
pub fn has_atime(&self) -> bool { self.is_valid(P9Attr::ATIME) }
pub fn has_atime_set(&self) -> bool { self.is_valid(P9Attr::ATIME_SET) }
pub fn has_mtime(&self) -> bool { self.is_valid(P9Attr::MTIME) }
pub fn has_mtime_set(&self) -> bool { self.is_valid(P9Attr::MTIME_SET) }
pub fn has_chown(&self) -> bool {
self.valid & P9Attr::MASK == P9Attr::CTIME||
self.is_valid(P9Attr::UID|P9Attr::GID)
}
pub fn has_size(&self) -> bool { self.is_valid(P9Attr::SIZE) }
pub fn mode(&self) -> u32 {
self.mode
}
pub fn size(&self) -> u64 {
self.size
}
pub fn chown_ids(&self) -> (u32, u32) {
let uid = if self.is_valid(P9Attr::UID)
{ self.uid } else { P9Attr::NO_UID };
let gid = if self.is_valid(P9Attr::GID)
{ self.gid } else { P9Attr::NO_UID };
(uid, gid)
}
pub fn atime(&self) -> (u64, u64) {
(self.atime_sec, self.atime_nsec)
}
pub fn mtime(&self) -> (u64, u64) {
(self.mtime_sec, self.mtime_nsec)
}
fn parse(&mut self, pp: &mut PduParser) -> io::Result<()> {
self.valid = pp.r32()?;
self.mode = pp.r32()?;
self.uid = pp.r32()?;
self.gid = pp.r32()?;
self.size = pp.r64()?;
self.atime_sec = pp.r64()?;
self.atime_nsec = pp.r64()?;
self.mtime_sec = pp.r64()?;
self.mtime_nsec = pp.r64()?;
Ok(())
}
}
impl <'a> PduParser<'a> {
pub fn new(chain: &'a mut Chain, memory: GuestRam) -> PduParser<'a> {
PduParser{ memory, chain, size: 0, cmd: 0, tag: 0, reply_start_addr: 0 }
}
pub fn command(&mut self) -> io::Result<u8> {
self.size = self.r32()?;
self.cmd = self.r8()?;
self.tag = self.r16()?;
Ok(self.cmd)
}
pub fn read_done(&mut self) -> io::Result<()> {
// XXX unwrap
self.reply_start_addr = self.chain.current_write_address(8).unwrap();
// reserve header
self.w32(0)?;
self.w8(0)?;
self.w16(0)?;
Ok(())
}
pub fn bail_err(&mut self, error: io::Error) -> io::Result<()> {
if self.reply_start_addr == 0 {
self.read_done()?;
}
let err = match error.raw_os_error() {
Some(errno) => errno as u32,
None => 0,
};
self._w32_at(0,P9_HEADER_LEN as u32 + 4);
self._w8_at(4, P9_RLERROR);
self._w16_at(5, self.tag);
self._w32_at(7, err);
self.chain.flush_chain();
Ok(())
}
#[allow(dead_code)]
pub fn w8_at(&self, offset: usize, val: u8) {
self._w8_at(offset + P9_HEADER_LEN, val);
}
pub fn _w8_at(&self, offset: usize, val: u8) {
self.memory.write_int::<u8>(self.reply_start_addr + offset as u64, val).unwrap();
}
#[allow(dead_code)]
pub fn w16_at(&self, offset: usize, val: u16) {
self._w16_at(offset + P9_HEADER_LEN, val);
}
pub fn _w16_at(&self, offset: usize, val: u16) {
self.memory.write_int::<u16>(self.reply_start_addr + offset as u64, val).unwrap();
}
pub fn w32_at(&self, offset: usize, val: u32) {
self._w32_at(offset + P9_HEADER_LEN, val);
}
pub fn _w32_at(&self, offset: usize, val: u32) {
self.memory.write_int::<u32>(self.reply_start_addr + offset as u64, val).unwrap();
}
pub fn write_done(&mut self) -> io::Result<()> {
self._w32_at(0, self.chain.get_wlen() as u32);
let cmd = self.cmd + 1;
self._w8_at(4, cmd);
let tag = self.tag;
self._w16_at(5, tag);
self.chain.flush_chain();
Ok(())
}
pub fn read_string(&mut self) -> io::Result<String> {
let len = self.r16()?;
if len == 0 {
return Ok(String::new());
}
let mut buf = vec![0u8; len as usize];
self.chain.read_exact(&mut buf)?;
let s = String::from_utf8(buf)
.map_err(|_| io::Error::new(io::ErrorKind::Other, "bad 9p string"))?;
Ok(s)
}
pub fn read_attr(&mut self) -> io::Result<P9Attr> {
let mut attr = P9Attr::new();
attr.parse(self)?;
Ok(attr)
}
pub fn write_string(&mut self, str: &str) -> io::Result<()> {
self.w16(str.len() as u16)?;
self.chain.write_all(str.as_bytes())
}
pub fn write_os_string(&mut self, str: &OsStr) -> io::Result<()> {
self.w16(str.len() as u16)?;
self.chain.write_all(str.as_bytes())
}
fn is_lnk(meta: &Metadata) -> bool {
meta.st_mode() & libc::S_IFMT == libc::S_IFLNK
}
fn meta_to_qtype(meta: &Metadata) -> u8 {
if meta.is_dir() {
P9_QTDIR
} else if PduParser::is_lnk(meta) {
P9_QTLINK
} else {
P9_QTFILE
}
}
pub fn write_qid(&mut self, meta: &Metadata) -> io::Result<()> {
// type
self.w8(PduParser::meta_to_qtype(meta))?;
// version
self.w32(meta.st_mtime() as u32 ^ (meta.st_size() << 8) as u32)?;
// path
self.w64(meta.st_ino())
}
pub fn write_qid_path_only(&mut self, ino: u64) -> io::Result<()> {
self.w8(0)?;
self.w32(0)?;
self.w64(ino)
}
pub fn write_statl(&mut self, st: &Metadata) -> io::Result<()> {
self.w64(P9_STATS_BASIC)?;
self.write_qid(&st)?;
self.w32(st.st_mode())?;
self.w32(st.st_uid())?;
self.w32(st.st_gid())?;
self.w64(st.st_nlink())?;
self.w64(st.st_rdev())?;
self.w64(st.st_size())?;
self.w64(st.st_blksize())?;
self.w64(st.st_blocks())?;
self.w64(st.st_atime() as u64)?;
self.w64(st.st_atime_nsec() as u64)?;
self.w64(st.st_mtime() as u64)?;
self.w64(st.st_mtime_nsec() as u64)?;
self.w64(st.st_ctime() as u64)?;
self.w64(st.st_ctime_nsec() as u64)?;
self.w64(0)?;
self.w64(0)?;
self.w64(0)?;
self.w64(0)?;
Ok(())
}
pub fn write_statfs(&mut self, statfs: StatFs) -> io::Result<()> {
self.w32(statfs.f_type)?;
self.w32(statfs.f_bsize)?;
self.w64(statfs.f_blocks)?;
self.w64(statfs.f_bfree)?;
self.w64(statfs.f_bavail)?;
self.w64(statfs.f_files)?;
self.w64(statfs.f_ffree)?;
self.w64(statfs.fsid)?;
self.w32(statfs.f_namelen)?;
Ok(())
}
pub fn r8(&mut self) -> io::Result<u8> {
self.chain.read_u8()
}
pub fn r16(&mut self) -> io::Result<u16> {
self.chain.read_u16::<LittleEndian>()
}
pub fn r32(&mut self) -> io::Result<u32> {
self.chain.read_u32::<LittleEndian>()
}
pub fn r64(&mut self) -> io::Result<u64> {
self.chain.read_u64::<LittleEndian>()
}
pub fn w8(&mut self, val: u8) -> io::Result<()> {
self.chain.write_u8(val)
}
pub fn w16(&mut self, val: u16) -> io::Result<()> {
self.chain.write_u16::<LittleEndian>(val)
}
pub fn w32(&mut self, val: u32) -> io::Result<()> {
self.chain.write_u32::<LittleEndian>(val)
}
pub fn w64(&mut self, val: u64) -> io::Result<()> {
self.chain.write_u64::<LittleEndian>(val)
}
}

View File

@ -0,0 +1,131 @@
use std::path::Path;
use std::mem;
use std::ptr;
use std::io;
use std::ffi::{OsStr,CStr,CString};
use std::os::unix::ffi::OsStrExt;
use libc;
struct Dir(*mut libc::DIR);
pub struct ReadDir {
dirp: Dir,
last_pos: i64,
}
pub struct DirEntry {
entry: libc::dirent64,
}
fn cstr(path: &Path) -> io::Result<CString> {
Ok(CString::new(path.as_os_str().as_bytes())?)
}
impl ReadDir {
pub fn open(path: &Path) -> io::Result<ReadDir> {
let p = cstr(path)?;
unsafe {
let ptr = libc::opendir(p.as_ptr());
if ptr.is_null() {
Err(io::Error::last_os_error())
} else {
Ok(ReadDir{ dirp: Dir(ptr), last_pos: 0 })
}
}
}
pub fn tell(&self) -> io::Result<i64> {
unsafe {
let loc = libc::telldir(self.dirp.0);
if loc == -1 {
return Err(io::Error::last_os_error());
}
Ok(loc)
}
}
pub fn seek(&self, loc: i64) {
unsafe { libc::seekdir(self.dirp.0, loc)}
}
pub fn fsync(&self) -> io::Result<()> {
unsafe {
if libc::fsync(libc::dirfd(self.dirp.0)) < 0 {
return Err(io::Error::last_os_error());
}
}
Ok(())
}
fn save_current_pos(&mut self) {
match self.tell() {
Ok(loc) => self.last_pos = loc,
Err(_) => (),
};
}
pub fn restore_last_pos(&mut self) {
self.seek(self.last_pos)
}
}
impl Iterator for ReadDir {
type Item = io::Result<DirEntry>;
fn next(&mut self) -> Option<io::Result<DirEntry>> {
self.save_current_pos();
unsafe {
let mut ret = DirEntry {
entry: mem::zeroed(),
};
let mut entry_ptr = ptr::null_mut();
loop {
if libc::readdir64_r(self.dirp.0, &mut ret.entry, &mut entry_ptr) != 0 {
return Some(Err(io::Error::last_os_error()))
}
if entry_ptr.is_null() {
return None
}
if ret.name_bytes() != b"." && ret.name_bytes() != b".." {
return Some(Ok(ret))
}
}
}
}
}
impl Drop for Dir {
fn drop(&mut self) {
let _ = unsafe { libc::closedir(self.0) };
}
}
impl DirEntry {
#[allow(dead_code)]
pub fn file_name(&self) -> &OsStr {
OsStr::from_bytes(self.name_bytes())
}
pub fn offset(&self) -> u64 {
self.entry.d_off as u64
}
pub fn file_type(&self) -> u8 {
self.entry.d_type
}
pub fn ino(&self) -> u64 {
self.entry.d_ino as u64
}
pub fn name_bytes(&self) -> &[u8] {
unsafe {
CStr::from_ptr(self.entry.d_name.as_ptr()).to_bytes()
}
}
}

45
src/devices/virtio_rng.rs Normal file
View File

@ -0,0 +1,45 @@
use std::sync::{Arc,RwLock};
use std::thread;
use std::fs::File;
use virtio::{VirtioDeviceOps,VirtioBus,VirtQueue};
use memory::GuestRam;
use vm::Result;
const VIRTIO_ID_RANDOM: u16 = 4;
pub struct VirtioRandom;
impl VirtioRandom {
fn new() -> VirtioRandom { VirtioRandom }
pub fn create(vbus: &mut VirtioBus) -> Result<()> {
let dev = Arc::new(RwLock::new(VirtioRandom::new()));
vbus.new_virtio_device(VIRTIO_ID_RANDOM, dev)
.set_num_queues(1)
.register()
}
}
impl VirtioDeviceOps for VirtioRandom {
fn start(&mut self, _memory: GuestRam, mut queues: Vec<VirtQueue>) {
thread::spawn(move|| {
run(queues.pop().unwrap())
});
}
}
fn run(q: VirtQueue) {
let random = File::open("/dev/urandom").unwrap();
loop {
q.on_each_chain(|mut chain| {
while !chain.is_end_of_chain() {
let _ = chain.copy_from_reader(&random, 256).unwrap();
}
});
}
}

View File

@ -0,0 +1,226 @@
use std::sync::{Arc,RwLock};
use std::io::{self,Write,Read};
use std::thread::spawn;
use termios::*;
use virtio::{VirtioDeviceOps,VirtioBus, VirtQueue};
use memory::GuestRam;
use vm::Result;
const VIRTIO_ID_CONSOLE: u16 = 3;
const VIRTIO_CONSOLE_F_SIZE: u64 = 0x1;
const VIRTIO_CONSOLE_F_MULTIPORT: u64 = 0x2;
const VIRTIO_CONSOLE_DEVICE_READY: u16 = 0;
const VIRTIO_CONSOLE_DEVICE_ADD: u16 = 1;
const _VIRTIO_CONSOLE_DEVICE_REMOVE: u16 = 2;
const VIRTIO_CONSOLE_PORT_READY: u16 = 3;
const VIRTIO_CONSOLE_CONSOLE_PORT: u16 = 4;
const VIRTIO_CONSOLE_RESIZE: u16 = 5;
const VIRTIO_CONSOLE_PORT_OPEN: u16 = 6;
const _VIRTIO_CONSOLE_PORT_NAME: u16 = 7;
pub struct VirtioSerial {
feature_bits: u64,
}
impl VirtioSerial {
fn new() -> VirtioSerial {
VirtioSerial{feature_bits:0}
}
pub fn create(vbus: &mut VirtioBus) -> Result<()> {
let dev = Arc::new(RwLock::new(VirtioSerial::new()));
vbus.new_virtio_device(VIRTIO_ID_CONSOLE, dev)
.set_num_queues(4)
.set_device_class(0x0700)
.set_config_size(12)
.set_features(VIRTIO_CONSOLE_F_MULTIPORT|VIRTIO_CONSOLE_F_SIZE)
.register()
}
fn start_console(&self, _memory: GuestRam, q: VirtQueue) {
spawn(move || {
loop {
q.wait_ready().unwrap();
for mut chain in q.iter() {
io::copy(&mut chain, &mut io::stdout()).unwrap();
io::stdout().flush().unwrap();
}
}
});
}
fn multiport(&self) -> bool {
self.feature_bits & VIRTIO_CONSOLE_F_MULTIPORT != 0
}
}
use system::ioctl;
#[repr(C)]
#[derive(Default)]
struct WinSz {
ws_row: u16,
ws_col: u16,
ws_xpixel: u16,
ws_ypixel: u16,
}
const TIOCGWINSZ: u64 = 0x5413;
impl VirtioDeviceOps for VirtioSerial {
fn reset(&mut self) {
println!("Reset called");
}
fn enable_features(&mut self, bits: u64) -> bool {
self.feature_bits = bits;
true
}
fn start(&mut self, memory: GuestRam, mut queues: Vec<VirtQueue>) {
let mut term = Terminal::create(queues.remove(0));
self.start_console(memory, queues.remove(0));
spawn( move || {
term.read_loop();
});
if self.multiport() {
let mut control = Control::new(queues.remove(0), queues.remove(0));
spawn(move || {
control.run();
});
}
}
fn read_config(&mut self, offset: usize, _size: usize) -> u64 {
if offset == 4 {
return 1;
}
0
}
}
struct Control {
rx_vq: VirtQueue,
tx_vq: VirtQueue,
}
use byteorder::{LittleEndian,ReadBytesExt,WriteBytesExt};
impl Control {
fn new(rx: VirtQueue, tx: VirtQueue) -> Control {
Control { rx_vq: rx, tx_vq: tx }
}
fn run(&mut self) {
let mut rx = self.rx_vq.clone();
self.tx_vq.on_each_chain(|mut chain| {
let _id = chain.read_u32::<LittleEndian>().unwrap();
let event = chain.read_u16::<LittleEndian>().unwrap();
let _value = chain.read_u16::<LittleEndian>().unwrap();
if event == VIRTIO_CONSOLE_DEVICE_READY {
Control::send_msg(&mut rx,0, VIRTIO_CONSOLE_DEVICE_ADD, 1).unwrap();
}
if event == VIRTIO_CONSOLE_PORT_READY {
Control::send_msg(&mut rx,0, VIRTIO_CONSOLE_CONSOLE_PORT, 1).unwrap();
Control::send_msg(&mut rx,0, VIRTIO_CONSOLE_PORT_OPEN, 1).unwrap();
Control::send_resize(&mut rx, 0).unwrap();
}
chain.flush_chain();
});
}
fn send_msg(vq: &mut VirtQueue, id: u32, event: u16, val: u16) -> io::Result<()> {
let mut chain = vq.wait_next_chain().unwrap();
chain.write_u32::<LittleEndian>(id)?;
chain.write_u16::<LittleEndian>(event)?;
chain.write_u16::<LittleEndian>(val)?;
chain.flush_chain();
Ok(())
}
fn send_resize(vq: &mut VirtQueue, id: u32) -> io::Result<()> {
let (cols, rows) = Control::stdin_terminal_size()?;
let mut chain = vq.wait_next_chain().unwrap();
chain.write_u32::<LittleEndian>(id)?;
chain.write_u16::<LittleEndian>(VIRTIO_CONSOLE_RESIZE)?;
chain.write_u16::<LittleEndian>(0)?;
chain.write_u16::<LittleEndian>(rows)?;
chain.write_u16::<LittleEndian>(cols)?;
chain.flush_chain();
Ok(())
}
fn stdin_terminal_size() -> io::Result<(u16, u16)> {
let mut wsz = WinSz{..Default::default()};
unsafe {
if let Err(err) = ioctl::ioctl_with_mut_ref(0, TIOCGWINSZ, &mut wsz) {
println!("Got error calling TIOCGWINSZ on stdin: {:?}", err);
return Err(io::Error::last_os_error());
}
}
Ok((wsz.ws_col, wsz.ws_row))
}
}
struct Terminal {
saved: Termios,
vq: VirtQueue,
}
impl Terminal {
fn create(vq: VirtQueue) -> Terminal {
let termios = Termios::from_fd(0).unwrap();
Terminal {
saved: termios,
vq,
}
}
fn setup_term(&self) {
let mut termios = self.saved.clone();
termios.c_iflag &= !(ICRNL);
termios.c_lflag &= !(ISIG|ICANON|ECHO);
let _ = tcsetattr(0, TCSANOW, &termios);
}
fn read_loop(&mut self) {
self.setup_term();
let mut abort_cnt = 0;
let mut buf = vec![0u8; 32];
loop {
let n = io::stdin().read(&mut buf).unwrap();
if n > 0 {
// XXX write_all
let mut chain = self.vq.wait_next_chain().unwrap();
chain.write_all(&mut buf[..n]).unwrap();
chain.flush_chain();
if n > 1 || buf[0] != 3 {
abort_cnt = 0;
} else {
abort_cnt += 1;
}
}
if abort_cnt == 3 {
let _ = tcsetattr(0, TCSANOW, &self.saved);
}
}
}
}
impl Drop for Terminal {
fn drop(&mut self) {
let _ = tcsetattr(0, TCSANOW, &self.saved);
}
}

612
src/kvm/ioctl.rs Normal file
View File

@ -0,0 +1,612 @@
use libc::{self, c_char, c_ulong};
use std::os::unix::io::RawFd;
use std::ffi::CString;
use std::fmt;
use system::ioctl::{ioctl_with_val,ioctl_with_ref,ioctl_with_mut_ref};
use vm::{Result,Error,ErrorKind};
const KVMIO: u64 = 0xAE;
const KVM_GET_API_VERSION: c_ulong = io! (KVMIO, 0x00);
const KVM_CREATE_VM: c_ulong = io! (KVMIO, 0x01);
const KVM_CHECK_EXTENSION: c_ulong = io! (KVMIO, 0x03);
const KVM_GET_SUPPORTED_CPUID: c_ulong = iorw! (KVMIO, 0x05, 8);
const KVM_SET_TSS_ADDR: c_ulong = io! (KVMIO, 0x47);
const KVM_CREATE_IRQCHIP: c_ulong = io! (KVMIO, 0x60);
const KVM_CREATE_PIT2: c_ulong = iow! (KVMIO, 0x77, 64);
const KVM_GET_VCPU_MMAP_SIZE: c_ulong = io! (KVMIO, 0x04);
const KVM_CREATE_VCPU: c_ulong = io! (KVMIO, 0x41);
const KVM_SET_USER_MEMORY_REGION: c_ulong = iow! (KVMIO, 0x46, 32);
const KVM_IRQ_LINE: c_ulong = iow! (KVMIO, 0x61, 8);
const KVM_IRQFD: c_ulong = iow! (KVMIO, 0x76, 32);
const KVM_IOEVENTFD: c_ulong = iow! (KVMIO, 0x79, 64);
const KVM_RUN: c_ulong = io! (KVMIO, 0x80);
const KVM_GET_REGS: c_ulong = ior! (KVMIO, 0x81, 144);
const KVM_SET_REGS: c_ulong = iow! (KVMIO, 0x82, 144);
const KVM_GET_SREGS: c_ulong = ior! (KVMIO, 0x83, 312);
const KVM_SET_SREGS: c_ulong = iow! (KVMIO, 0x84, 312);
const KVM_SET_MSRS: c_ulong = iow! (KVMIO, 0x89, 8);
const KVM_SET_FPU: c_ulong = iow! (KVMIO, 0x8d, 416);
const KVM_GET_LAPIC: c_ulong = ior! (KVMIO, 0x8e, 1024);
const KVM_SET_LAPIC: c_ulong = iow! (KVMIO, 0x8f, 1024);
const KVM_SET_CPUID2: c_ulong = iow! (KVMIO, 0x90, 8);
struct InnerFd(RawFd);
impl InnerFd {
fn raw(&self) -> RawFd { self.0 }
}
impl Drop for InnerFd {
fn drop(&mut self) {
let _ = unsafe { libc::close(self.0) };
}
}
pub struct SysFd(InnerFd);
fn raw_open_kvm() -> Result<RawFd> {
let path = CString::new("/dev/kvm").unwrap();
let fd = unsafe { libc::open(path.as_ptr() as *const c_char, libc::O_RDWR) };
if fd < 0 {
return Err(Error::from_last_errno());
}
Ok(fd)
}
impl SysFd {
pub fn open() -> Result<SysFd> {
match raw_open_kvm() {
Ok(fd) => Ok(SysFd(InnerFd(fd))),
Err(e) => Err(Error::new(ErrorKind::OpenDeviceFailed, e)),
}
}
fn raw(&self) -> RawFd { self.0.raw() }
}
pub struct VmFd(InnerFd);
impl VmFd {
fn new(fd: RawFd) -> VmFd {
VmFd( InnerFd(fd) )
}
fn raw(&self) -> RawFd { self.0.raw() }
}
pub struct VcpuFd(InnerFd);
impl VcpuFd {
fn new(fd: RawFd) -> VcpuFd {
VcpuFd( InnerFd(fd) )
}
pub fn raw(&self) -> RawFd { self.0.raw() }
}
pub fn kvm_check_extension(sysfd: &SysFd, extension: u32) -> Result<u32> {
unsafe {
ioctl_with_val(sysfd.raw(), KVM_CHECK_EXTENSION, extension as c_ulong)
.map_err(|e| ioctl_err("KVM_CHECK_EXTENSION", e))
}
}
pub fn kvm_get_api_version(sysfd: &SysFd) -> Result<u32> {
unsafe {
ioctl_with_val(sysfd.raw(), KVM_GET_API_VERSION, 0)
.map_err(|e| ioctl_err("KVM_GET_API_VERSION", e))
}
}
pub fn kvm_create_vm(sysfd: &SysFd) -> Result<VmFd> {
let fd = unsafe {
ioctl_with_val(sysfd.raw(), KVM_CREATE_VM, 0)
.map_err(|e| ioctl_err("KVM_CREATE_VM", e))?
};
Ok(VmFd::new(fd as RawFd))
}
pub fn kvm_get_vcpu_mmap_size(sysfd: &SysFd) -> Result<u32> {
unsafe {
ioctl_with_val(sysfd.raw(), KVM_GET_VCPU_MMAP_SIZE, 0)
.map_err(|e| ioctl_err("KVM_GET_VCPU_MMAP_SIZE", e))
}
}
#[derive(Copy, Clone, Default)]
#[repr(C)]
pub struct KvmCpuIdEntry {
pub function: u32,
pub index: u32,
pub flags: u32,
pub eax: u32,
pub ebx: u32,
pub ecx: u32,
pub edx: u32,
padding: [u32; 3]
}
const KVM_CPUID_MAX_ENTRIES:usize = 256;
#[repr(C)]
pub struct KvmCpuId2 {
nent: u32,
padding: u32,
entries: [KvmCpuIdEntry; KVM_CPUID_MAX_ENTRIES]
}
impl KvmCpuId2 {
pub fn new() -> KvmCpuId2 {
KvmCpuId2 {
nent: KVM_CPUID_MAX_ENTRIES as u32,
padding: 0,
entries: [Default::default(); KVM_CPUID_MAX_ENTRIES],
}
}
pub fn new_from_entries(entries: Vec<KvmCpuIdEntry>) -> KvmCpuId2 {
let mut cpuid = KvmCpuId2::new();
let sz = entries.len();
assert!(sz <= KVM_CPUID_MAX_ENTRIES, "Too many cpuid entries");
for i in 0..sz {
cpuid.entries[i] = entries[i];
}
cpuid.nent = sz as u32;
cpuid
}
pub fn get_entries(&self) -> Vec<KvmCpuIdEntry> {
let mut entries = Vec::new();
let sz = self.nent as usize;
for i in 0..sz {
entries.push(self.entries[i]);
}
entries
}
}
pub fn kvm_get_supported_cpuid(sysfd: &SysFd, cpuid: &mut KvmCpuId2) -> Result<u32> {
unsafe {
ioctl_with_mut_ref(sysfd.raw(), KVM_GET_SUPPORTED_CPUID, cpuid)
.map_err(|e| ioctl_err("KVM_GET_SUPPORTED_CPUID", e))
}
}
pub fn kvm_set_cpuid2(cpufd: &VcpuFd, cpuid: &KvmCpuId2) -> Result<u32> {
unsafe {
ioctl_with_ref(cpufd.raw(), KVM_SET_CPUID2, cpuid)
.map_err(|e| ioctl_err("KVM_SET_CPUID2", e))
}
}
#[repr(C)]
pub struct KvmUserspaceMemoryRegion {
slot: u32,
flags: u32,
guest_phys_addr: u64,
memory_size: u64,
userspace_addr: u64,
}
impl KvmUserspaceMemoryRegion {
pub fn new(slot: u32, guest_address: u64, host_address: u64, size: u64) -> KvmUserspaceMemoryRegion {
KvmUserspaceMemoryRegion {
slot,
flags: 0,
guest_phys_addr: guest_address,
memory_size: size,
userspace_addr: host_address,
}
}
}
pub fn kvm_set_user_memory_region(vmfd: &VmFd, region: &KvmUserspaceMemoryRegion) -> Result<u32> {
unsafe {
ioctl_with_ref(vmfd.raw(), KVM_SET_USER_MEMORY_REGION, region)
.map_err(|e| ioctl_err("KVM_SET_USER_MEMORY_REGION", e))
}
}
#[repr(C)]
pub struct KvmPitConfig {
flags: u32,
padding: [u32; 15],
}
impl KvmPitConfig {
pub fn new(flags: u32) -> KvmPitConfig {
KvmPitConfig { flags, padding: [0; 15] }
}
}
pub fn kvm_create_pit2(vmfd: &VmFd, config: &KvmPitConfig) -> Result<u32> {
unsafe {
ioctl_with_ref(vmfd.raw(), KVM_CREATE_PIT2, config)
.map_err(|e| ioctl_err("KVM_CREATE_PIT2", e))
}
}
pub fn kvm_create_irqchip(vmfd: &VmFd) -> Result<u32> {
unsafe {
ioctl_with_val(vmfd.raw(), KVM_CREATE_IRQCHIP, 0)
.map_err(|e| ioctl_err("KVM_CREATE_IRQCHIP", e))
}
}
pub fn kvm_set_tss_addr(vmfd: &VmFd, addr: u32) -> Result<u32> {
unsafe {
ioctl_with_val(vmfd.raw(), KVM_SET_TSS_ADDR, addr as c_ulong)
.map_err(|e| ioctl_err("KVM_SET_TSS_ADDR", e))
}
}
pub fn kvm_create_vcpu(vmfd: &VmFd, cpu_id: u32) -> Result<VcpuFd> {
let fd = unsafe {
ioctl_with_val(vmfd.raw(), KVM_CREATE_VCPU, cpu_id as c_ulong)
.map_err(|e| ioctl_err("KVM_CREATE_VCPU", e))?
};
Ok(VcpuFd::new(fd as RawFd))
}
#[repr(C)]
pub struct KvmIrqLevel {
irq: u32,
level: u32,
}
impl KvmIrqLevel {
pub fn new(irq: u32, level: u32) -> KvmIrqLevel {
KvmIrqLevel { irq, level }
}
}
pub fn kvm_irq_line(vmfd: &VmFd, level: &KvmIrqLevel) -> Result<u32> {
unsafe {
ioctl_with_ref(vmfd.raw(), KVM_IRQ_LINE, level)
.map_err(|e| ioctl_err("KVM_IRQ_LINE", e))
}
}
#[repr(C)]
pub struct KvmIrqFd {
fd: u32,
gsi: u32,
flags: u32,
resample_fd: u32,
pad1: u64,
pad2: u64,
}
impl KvmIrqFd {
pub fn new(fd: u32, gsi: u32) -> KvmIrqFd {
KvmIrqFd{fd, gsi, flags:0, resample_fd: 0, pad1: 0, pad2: 0}
}
}
pub fn kvm_irqfd(vmfd: &VmFd, irqfd: &KvmIrqFd) -> Result<u32> {
unsafe {
ioctl_with_ref(vmfd.raw(), KVM_IRQFD, irqfd)
.map_err(|e| ioctl_err("KVM_IRQFD", e))
}
}
pub const IOEVENTFD_FLAG_DATAMATCH: u32 = 1;
pub const _IOEVENTFD_FLAG_PIO : u32 = 2;
pub const IOEVENTFD_FLAG_DEASSIGN: u32 = 4;
#[repr(C)]
pub struct KvmIoEventFd {
datamatch: u64,
addr: u64,
len: u32,
fd: u32,
flags: u32,
padding: [u8; 36],
}
impl KvmIoEventFd {
pub fn new_with_addr_fd(addr: u64, fd: RawFd) -> KvmIoEventFd {
KvmIoEventFd::new(0, addr, 0, fd as u32, 0)
}
fn new(datamatch: u64, addr: u64, len: u32, fd: u32, flags: u32) -> KvmIoEventFd {
KvmIoEventFd{datamatch, addr, len, fd, flags, padding: [0;36]}
}
#[allow(dead_code)]
pub fn set_datamatch(&mut self, datamatch: u64, len: u32) {
self.flags |= IOEVENTFD_FLAG_DATAMATCH;
self.datamatch = datamatch;
self.len = len;
}
pub fn set_deassign(&mut self) {
self.flags |= IOEVENTFD_FLAG_DEASSIGN;
}
}
pub fn kvm_ioeventfd(vmfd: &VmFd, ioeventfd: &KvmIoEventFd) -> Result<u32> {
unsafe {
ioctl_with_ref(vmfd.raw(), KVM_IOEVENTFD, ioeventfd)
.map_err(|e| ioctl_err("KVM_IOEVENTFD", e))
}
}
#[repr(C)]
pub struct KvmLapicState {
pub regs: [u8; 1024]
}
impl KvmLapicState {
pub fn new() -> KvmLapicState {
KvmLapicState { regs: [0; 1024] }
}
}
pub fn kvm_get_lapic(cpufd: &VcpuFd, lapic_state: &mut KvmLapicState) -> Result<u32> {
unsafe {
ioctl_with_mut_ref(cpufd.raw(), KVM_GET_LAPIC, lapic_state)
.map_err(|e| ioctl_err("KVM_GET_LAPIC", e))
}
}
pub fn kvm_set_lapic(cpufd: &VcpuFd, lapic_state: &KvmLapicState) -> Result<u32> {
unsafe {
ioctl_with_ref(cpufd.raw(), KVM_SET_LAPIC, lapic_state)
.map_err(|e| ioctl_err("KVM_SET_LAPIC", e))
}
}
#[derive(Copy, Clone, Default)]
#[repr(C)]
pub struct KvmSegment {
base: u64,
limit: u32,
selector: u16,
stype: u8,
present: u8,
dpl: u8,
db: u8,
s: u8,
l: u8,
g: u8,
avl: u8,
unusable: u8,
padding: u8,
}
impl KvmSegment {
pub fn new(base: u64, limit: u32, selector: u16, flags: u16) -> KvmSegment {
let mut seg = KvmSegment{ ..Default::default() };
seg.setup(base, limit, selector, flags);
seg
}
pub fn setup(&mut self, base: u64, limit: u32, selector: u16, flags: u16) {
self.base = base;
self.limit = limit;
self.selector = selector;
self.stype = (flags & 0xF) as u8;
self.present = ((flags >> 7) & 0x1) as u8;
self.dpl = ((flags >> 5) & 0x3) as u8;
self.db = ((flags >> 14) & 0x1) as u8;
self.s = ((flags >> 4) & 0x1) as u8;
self.l = ((flags >> 13) & 0x1) as u8;
self.g = ((flags >> 15) & 0x1) as u8;
self.avl = ((flags >> 12) & 0x1) as u8;
self.unusable = if self.present == 1 { 0 } else { 1 }
}
}
impl fmt::Debug for KvmSegment {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "(base: {:x} limit {:x} selector: {:x} type: {:x} p: {} dpl: {} db: {} s: {} l: {} g: {} avl: {} unuse: {})",
self.base, self.limit, self.selector, self.stype, self.present, self.dpl, self.db, self.s, self.l, self.g, self.avl, self.unusable)
}
}
#[derive(Copy, Clone, Default)]
#[repr(C)]
pub struct KvmDtable {
pub base: u64,
pub limit: u16,
padding: [u16; 3],
}
impl fmt::Debug for KvmDtable {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "(base: {:x} limit {:x})", self.base, self.limit)
}
}
#[derive(Copy, Clone, Default)]
#[repr(C)]
pub struct KvmSRegs {
pub cs: KvmSegment,
pub ds: KvmSegment,
pub es: KvmSegment,
pub fs: KvmSegment,
pub gs: KvmSegment,
pub ss: KvmSegment,
pub tr: KvmSegment,
pub ldt: KvmSegment,
pub gdt: KvmDtable,
pub itd: KvmDtable,
pub cr0: u64,
pub cr2: u64,
pub cr3: u64,
pub cr4: u64,
pub cr8: u64,
pub efer: u64,
pub apic_base: u64,
pub interrupt_bitmap: [u64; 4],
}
impl fmt::Debug for KvmSRegs {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "cs: {:?}\nds: {:?}\nes: {:?}\nfs: {:?}\n", self.cs, self.ds, self.es, self.fs)?;
write!(f, "gs: {:?}\nss: {:?}\ntr: {:?}\nldt: {:?}\n", self.gs, self.ss, self.tr, self.ldt)?;
write!(f, "gdt: {:?} itd: {:?}\n", self.gdt, self.itd)?;
write!(f, "cr0: {:x} cr2: {:x} cr3: {:x} cr4: {:x}\n", self.cr0, self.cr2, self.cr3, self.cr4)?;
write!(f, "efer: {:x} apic_base: {:x}\n", self.efer, self.apic_base)
}
}
impl KvmSRegs {
pub fn new() -> KvmSRegs {
KvmSRegs { ..Default::default() }
}
}
pub fn kvm_get_sregs(cpufd: &VcpuFd, sregs: &mut KvmSRegs) -> Result<u32> {
unsafe {
ioctl_with_mut_ref(cpufd.raw(), KVM_GET_SREGS, sregs)
.map_err(|e| ioctl_err("KVM_GET_SREGS", e))
}
}
pub fn kvm_set_sregs(cpufd: &VcpuFd, sregs: &KvmSRegs) -> Result<u32> {
unsafe {
ioctl_with_ref(cpufd.raw(), KVM_SET_SREGS, sregs)
.map_err(|e| ioctl_err("KVM_SET_SREGS", e))
}
}
#[derive(Copy, Clone, Default)]
#[repr(C)]
pub struct KvmRegs {
pub rax: u64, pub rbx: u64, pub rcx: u64, pub rdx: u64,
pub rsi: u64, pub rdi: u64, pub rsp: u64, pub rbp: u64,
pub r8: u64, pub r9: u64, pub r10: u64, pub r11: u64,
pub r12: u64, pub r13: u64, pub r14: u64, pub r15: u64,
pub rip: u64, pub rflags: u64,
}
impl fmt::Debug for KvmRegs {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "rax 0x{:x} rbx 0x{:x} rcx 0x{:x} rdx 0x{:x}\n", self.rax, self.rbx, self.rcx, self.rdx)?;
write!(f, "rsi 0x{:x} rdi 0x{:x} rsp 0x{:x} rbp 0x{:x}\n", self.rsi, self.rdi, self.rsp, self.rbp)?;
write!(f, "r8 0x{:x} r9 0x{:x} r10 0x{:x} r11 0x{:x}\n", self.r8, self.r9, self.r10, self.r11)?;
write!(f, "r12 0x{:x} r13 0x{:x} r14 0x{:x} r15 0x{:x}\n", self.r12, self.r13, self.r14, self.r15)?;
write!(f, "rip 0x{:x} rflags 0x{:x}\n", self.rip, self.rflags)
}
}
impl KvmRegs {
pub fn new() -> KvmRegs {
KvmRegs { ..Default::default() }
}
}
pub fn kvm_get_regs(cpufd: &VcpuFd, regs: &mut KvmRegs) -> Result<u32> {
unsafe {
ioctl_with_mut_ref(cpufd.raw(), KVM_GET_REGS, regs)
.map_err(|e| ioctl_err("KVM_GET_REGS", e))
}
}
pub fn kvm_set_regs(cpufd: &VcpuFd, regs: &KvmRegs) -> Result<u32> {
unsafe {
ioctl_with_ref(cpufd.raw(), KVM_SET_REGS, regs)
.map_err(|e| ioctl_err("KVM_SET_REGS", e))
}
}
#[derive(Copy)]
#[repr(C)]
pub struct KvmFpu {
fpr: [u8; 128],
pub fcw: u16,
fsw: u16,
ftwx: u8,
pad1: u8,
last_opcode: u16,
last_ip: u64,
last_dp: u64,
xmm: [u8; 256],
pub mxcsr: u32,
pad2: u32,
}
impl Clone for KvmFpu {
fn clone(&self) -> KvmFpu { *self }
}
impl KvmFpu {
pub fn new() -> KvmFpu {
KvmFpu {
fpr: [0; 128],
fcw: 0,
fsw: 0,
ftwx: 0, pad1: 0,
last_opcode: 0,
last_ip: 0,
last_dp: 0,
xmm: [0; 256],
mxcsr: 0,
pad2: 0
}
}
}
pub fn kvm_set_fpu(cpufd: &VcpuFd, fpu: &KvmFpu) -> Result<u32> {
unsafe {
ioctl_with_ref(cpufd.raw(), KVM_SET_FPU, fpu )
.map_err(|e| ioctl_err("KVM_SET_FPU", e))
}
}
#[derive(Copy, Clone, Default)]
#[repr(C)]
struct KvmMsrEntry {
index: u32,
reserved: u32,
data: u64
}
#[repr(C)]
pub struct KvmMsrs {
nent: u32,
padding: u32,
entries: [KvmMsrEntry; 100]
}
impl KvmMsrs {
pub fn new() -> KvmMsrs {
KvmMsrs{ nent: 0, padding: 0, entries: [Default::default(); 100]}
}
pub fn add(&mut self, index: u32, data: u64) {
self.entries[self.nent as usize].index = index;
self.entries[self.nent as usize].data = data;
self.nent += 1;
}
}
pub fn kvm_set_msrs(cpufd: &VcpuFd, msrs: &KvmMsrs) -> Result<u32> {
unsafe {
ioctl_with_ref(cpufd.raw(), KVM_SET_MSRS, msrs)
.map_err(|e| ioctl_err("KVM_SET_MSRS", e))
}
}
pub fn kvm_run(cpufd: &VcpuFd) -> Result<u32> {
unsafe {
ioctl_with_val(cpufd.raw(), KVM_RUN, 0)
.map_err(|e| ioctl_err("KVM_RUN", e))
}
}
pub fn ioctl_err(ioctl_name: &'static str, e: Error) -> Error {
if e.is_interrupted() {
e
} else {
Error::new(ErrorKind::IoctlFailed(ioctl_name), e)
}
}

223
src/kvm/mod.rs Normal file
View File

@ -0,0 +1,223 @@
use std::os::unix::io::RawFd;
use std::sync::Arc;
mod ioctl;
use vm::{Result,Error,ErrorKind};
pub use self::ioctl::{KvmCpuIdEntry,KvmLapicState, KvmSRegs, KvmRegs, KvmFpu, KvmMsrs, KvmSegment};
pub const KVM_CAP_IRQCHIP: u32 = 0;
pub const KVM_CAP_HLT: u32 = 1;
pub const KVM_CAP_USER_MEMORY: u32 = 3;
pub const KVM_CAP_SET_TSS_ADDR: u32 = 4;
pub const KVM_CAP_EXT_CPUID: u32 = 7;
pub const KVM_CAP_IRQ_ROUTING: u32 = 25;
pub const KVM_CAP_IRQ_INJECT_STATUS: u32 = 26;
pub const KVM_CAP_PIT2: u32 = 33;
pub const KVM_CAP_IOEVENTFD: u32 = 36;
#[derive(Clone)]
pub struct Kvm {
sysfd: Arc<ioctl::SysFd>,
vmfd: Arc<ioctl::VmFd>,
vcpus: Vec<KvmVcpu>,
}
fn check_extensions(sysfd: &ioctl::SysFd, extensions: &[u32]) -> Result<()> {
for e in extensions {
if ioctl::kvm_check_extension(&sysfd, *e)? == 0 {
return Err(Error::from(ErrorKind::MissingRequiredExtension(*e)));
}
}
Ok(())
}
fn check_version(sysfd: &ioctl::SysFd) -> Result<()> {
if ioctl::kvm_get_api_version(&sysfd)? != 12 {
return Err(Error::from(ErrorKind::BadVersion));
}
Ok(())
}
impl Kvm {
pub fn open(required_extensions: &[u32]) -> Result<Kvm> {
let sysfd = ioctl::SysFd::open()?;
check_version(&sysfd)?;
check_extensions(&sysfd, &required_extensions)?;
let vmfd= ioctl::kvm_create_vm(&sysfd)
.map_err(|_| Error::from(ErrorKind::CreateVmFailed))?;
Ok(Kvm{
sysfd: Arc::new(sysfd),
vmfd: Arc::new(vmfd),
vcpus: Vec::new(),
})
}
pub fn add_memory_region(&self, slot: usize, guest_address: u64, host_address: u64, size: usize) -> Result<()> {
let region = ioctl::KvmUserspaceMemoryRegion::new(slot as u32, guest_address, host_address, size as u64);
ioctl::kvm_set_user_memory_region(&self.vmfd, &region)?;
Ok(())
}
pub fn create_pit2(&self) -> Result<()> {
let pit_config = ioctl::KvmPitConfig::new(0);
ioctl::kvm_create_pit2(&self.vmfd, &pit_config)?;
Ok(())
}
pub fn create_irqchip(&self) -> Result<()> {
ioctl::kvm_create_irqchip(&self.vmfd)?;
Ok(())
}
pub fn set_tss_addr(&self, addr: u32) -> Result<()> {
ioctl::kvm_set_tss_addr(&self.vmfd, addr)?;
Ok(())
}
pub fn irq_line(&self, irq: u32, level: u32) -> Result<()> {
let irq_level = ioctl::KvmIrqLevel::new(irq, level);
ioctl::kvm_irq_line(&self.vmfd, &irq_level)?;
Ok(())
}
pub fn irqfd(&self, fd: u32, gsi: u32) -> Result<()> {
let irqfd = ioctl::KvmIrqFd::new(fd, gsi);
ioctl::kvm_irqfd(&self.vmfd, &irqfd)?;
Ok(())
}
pub fn ioeventfd_add(&self, address: u64, fd: RawFd) -> Result<()> {
// XXX check for zero length capability
let ioeventfd = ioctl::KvmIoEventFd::new_with_addr_fd(address, fd);
ioctl::kvm_ioeventfd(&self.vmfd, &ioeventfd)?;
Ok(())
}
pub fn ioeventfd_del(&self, address: u64, fd: RawFd) -> Result<()> {
let mut ioeventfd = ioctl::KvmIoEventFd::new_with_addr_fd(address, fd);
ioeventfd.set_deassign();
ioctl::kvm_ioeventfd(&self.vmfd, &ioeventfd)?;
Ok(())
}
pub fn create_vcpus(&mut self, ncpus: usize) -> Result<()> {
for id in 0..ncpus {
let vcpu = self.new_vcpu(id)?;
vcpu.setup_lapic()?;
self.vcpus.push(vcpu);
}
Ok(())
}
fn new_vcpu(&self, id: usize) -> Result<KvmVcpu> {
let cpufd = ioctl::kvm_create_vcpu(&self.vmfd, id as u32)?;
Ok(KvmVcpu::new(id, Arc::new(cpufd), self.sysfd.clone()))
}
pub fn get_vcpus(&self) -> Vec<KvmVcpu> {
self.vcpus.clone()
}
}
#[derive(Clone)]
pub struct KvmVcpu {
id: usize,
cpufd: Arc<ioctl::VcpuFd>,
sysfd: Arc<ioctl::SysFd>,
}
const APIC_MODE_EXTINT: u8 = 0x7;
const APIC_MODE_NMI: u8 = 0x4;
const APIC_LVT_LINT0_OFFSET: usize = 0x350;
const APIC_LVT_LINT1_OFFSET: usize = 0x360;
impl KvmVcpu {
fn new(id: usize, cpufd: Arc<ioctl::VcpuFd>, sysfd: Arc<ioctl::SysFd>) -> KvmVcpu {
KvmVcpu { id, cpufd, sysfd }
}
pub fn raw_fd(&self) -> RawFd {
self.cpufd.raw()
}
pub fn get_supported_cpuid(&self) -> Result<Vec<KvmCpuIdEntry>> {
let mut cpuid = ioctl::KvmCpuId2::new();
ioctl::kvm_get_supported_cpuid(&self.sysfd, &mut cpuid)?;
Ok(cpuid.get_entries())
}
pub fn set_cpuid2(&self, entries: Vec<KvmCpuIdEntry>) -> Result<()> {
let cpuid = ioctl::KvmCpuId2::new_from_entries(entries);
ioctl::kvm_set_cpuid2(&self.cpufd, &cpuid)?;
Ok(())
}
pub fn get_lapic(&self) -> Result<KvmLapicState> {
let mut lapic = KvmLapicState::new();
ioctl::kvm_get_lapic(&self.cpufd, &mut lapic)?;
Ok(lapic)
}
pub fn set_lapic(&self, lapic_state: &KvmLapicState) -> Result<()> {
ioctl::kvm_set_lapic(&self.cpufd, &lapic_state)?;
Ok(())
}
pub fn get_sregs(&self) -> Result<KvmSRegs> {
let mut sregs = KvmSRegs::new();
ioctl::kvm_get_sregs(&self.cpufd, &mut sregs)?;
Ok(sregs)
}
pub fn set_sregs(&self, sregs: &KvmSRegs) -> Result<()> {
ioctl::kvm_set_sregs(&self.cpufd, &sregs)?;
Ok(())
}
pub fn get_regs(&self) -> Result<KvmRegs> {
let mut regs = KvmRegs::new();
ioctl::kvm_get_regs(&self.cpufd, &mut regs)?;
Ok(regs)
}
pub fn set_regs(&self, regs: &KvmRegs) -> Result<()> {
ioctl::kvm_set_regs(&self.cpufd, regs)?;
Ok(())
}
pub fn run(&self) -> Result<()> {
ioctl::kvm_run(&self.cpufd)?;
Ok(())
}
pub fn set_fpu(&self, fpu: &KvmFpu) -> Result<()> {
ioctl::kvm_set_fpu(&self.cpufd, &fpu)?;
Ok(())
}
pub fn set_msrs(&self, msrs: &KvmMsrs) -> Result<()> {
ioctl::kvm_set_msrs(&self.cpufd, &msrs)?;
Ok(())
}
pub fn get_vcpu_mmap_size(&self) -> Result<usize> {
Ok(ioctl::kvm_get_vcpu_mmap_size(&self.sysfd)? as usize)
}
pub fn setup_lapic(&self) -> Result<()> {
let mut lapic = self.get_lapic()?;
// delivery mode
lapic.regs[APIC_LVT_LINT0_OFFSET + 1] &= 0xF8;
lapic.regs[APIC_LVT_LINT0_OFFSET + 1] |= APIC_MODE_EXTINT;
lapic.regs[APIC_LVT_LINT1_OFFSET + 1] &= 0xF8;
lapic.regs[APIC_LVT_LINT1_OFFSET + 1] |= APIC_MODE_NMI;
self.set_lapic(&lapic)?;
Ok(())
}
}

63
src/main.rs Normal file
View File

@ -0,0 +1,63 @@
#![allow(non_snake_case)]
extern crate libc;
extern crate byteorder;
extern crate termios;
mod vm;
mod memory;
#[macro_use]
mod system;
mod devices;
mod kvm;
mod virtio;
use std::env;
use std::path::{Path,PathBuf};
fn main() {
let mut config = vm::VmConfig::new();
config.ram_size_megs(1024);
match find_kernel() {
Some(path) => config.kernel_path(&path),
None => { println!("Could not find kernel"); return; }
}
match find_init() {
Some(path) => config.init_path(&path),
None => { println!("Could not find init"); return; }
}
match vm::Vm::open(config) {
Ok(vm) => {
vm.start().unwrap();
},
Err(e) => println!("error :( {}", e)
}
}
fn find_init() -> Option<PathBuf> {
match find_kernel_base() {
Some(buf) => Some(buf.join("init/init")),
None => None,
}
}
fn find_kernel() -> Option<PathBuf> {
match find_kernel_base() {
Some(buf) => Some(buf.join("build/linux-4.9.56/vmlinux")),
None => None,
}
}
fn find_kernel_base() -> Option<PathBuf> {
let mut cwd = env::current_dir().unwrap();
if try_kernel_base(&cwd) {
cwd.push("kernel");
return Some(cwd);
}
None
}
fn try_kernel_base(path: &Path) -> bool {
path.join("kernel/build/linux-4.9.56/vmlinux").exists()
}

79
src/memory/address.rs Normal file
View File

@ -0,0 +1,79 @@
use std::fmt;
#[derive(Copy,Clone,Debug)]
pub struct AddressRange {
start: u64, // inclusive
end: u64, // exclusive
}
impl fmt::Display for AddressRange {
fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> {
write!(f, "AddressRange(0x{:x} - 0x{:x}) [size: {}]", self.start, self.end - 1, self.size())
}
}
impl AddressRange {
pub fn checked_new(base: u64, size: usize) -> Option<AddressRange> {
match base.checked_add(size as u64) {
Some(end) if size > 0 => Some(AddressRange{ start:base, end }),
_ => None,
}
}
pub fn new(base: u64, size: usize) -> AddressRange {
assert!(size > 0, "cannot construct address range with size = 0");
AddressRange::checked_new(base, size)
.expect(format!("Address range overflows base: {:x} size: {}", base, size).as_str())
}
pub fn contains_address(&self, address: u64) -> bool {
address >= self.start && address < self.end
}
pub fn contains(&self, address: u64, size: usize) -> bool {
assert!(size > 0, "size cannot be 0, use contains_address() for single address test");
match address.checked_add(size as u64) {
Some(end) => self.contains_address(address) && self.contains_address(end - 1),
None => false,
}
}
pub fn checked_offset_of(&self, address: u64) -> Option<usize> {
if self.contains_address(address) {
Some((address - self.start) as usize)
} else {
None
}
}
pub fn offset_of(&self, address: u64) -> usize {
self.checked_offset_of(address).expect("range does not contain address for call to offset_into()")
}
pub fn checked_offset_into(&self, offset: usize) -> Option<u64> {
match self.start.checked_add(offset as u64) {
Some(addr) if self.contains_address(addr) => Some(addr),
_ => None,
}
}
pub fn subrange(&self, offset: usize, size: usize) -> Option<AddressRange> {
match self.checked_offset_into(offset) {
Some(base) if self.contains(base, size) => Some(AddressRange::new(base, size)),
_ => None,
}
}
pub fn base(&self) -> u64 { self.start }
pub fn size(&self) -> usize { (self.end - self.start) as usize }
pub fn is_base2_sized(&self) -> bool {
let sz = self.size();
sz & (sz - 1) == 0
}
pub fn is_naturally_aligned(&self) -> bool {
self.is_base2_sized() && (self.base() % (self.size() as u64) == 0)
}
}

172
src/memory/mmap.rs Normal file
View File

@ -0,0 +1,172 @@
use libc;
use std::ptr;
use std::slice;
use std::mem;
use std::io::Write;
use std::os::unix::io::RawFd;
use vm::{Result,Error,ErrorKind};
pub struct Mapping {
ptr: *mut u8,
size: usize,
}
/// Marks types that can be passed to `write_int` and returned from `read_int`
pub unsafe trait Serializable: Copy+Send+Sync {}
unsafe impl Serializable for u8 {}
unsafe impl Serializable for u16 {}
unsafe impl Serializable for u32 {}
unsafe impl Serializable for u64 {}
unsafe impl Send for Mapping {}
unsafe impl Sync for Mapping {}
/// A block of memory returned from the mmap() system call. Provides safe access to the raw
/// memory region.
impl Mapping {
/// Creates a new anonymous mapping of `size` bytes.
///
/// # Errors
/// Returns [`Err`] if the `mmap()` system call fails and returns an `Error` representing
/// the system error which occurred.
///
pub fn new(size: usize) -> Result<Mapping> {
Mapping::_new(size,libc::MAP_ANONYMOUS | libc::MAP_SHARED | libc::MAP_NORESERVE, -1)
}
/// Creates a new mapping of `size` bytes from the object referenced by file descriptor `fd`
///
/// # Errors
/// Returns [`Err`] if the `mmap()` system call fails and returns an `Error` representing
/// the system error which occurred.
///
pub fn new_from_fd(fd: RawFd, size: usize) -> Result<Mapping> {
Mapping::_new(size, libc::MAP_SHARED, fd)
}
fn _new(size: usize, flags: libc::c_int, fd: RawFd) -> Result<Mapping> {
let p = unsafe { mmap_allocate(size, flags, fd)? };
Ok(Mapping {
ptr: p,
size
})
}
/// Ensure that `offset` is not larger than this allocation.
///
/// # Errors
///
/// Returns [`Err`] of kind `InvalidMappingOffset` if passed an
/// illegal `offset`
///
fn check_offset(&self, offset: usize) -> Result<()> {
if offset > self.size {
Err(Error::from(ErrorKind::InvalidMappingOffset(offset)))
} else {
Ok(())
}
}
/// Return the pointer address of this allocation.
pub fn address(&self) -> u64 {
self.ptr as u64
}
/// Read and return an integer value in native byte order from `offset` into the memory allocation
///
/// # Errors
/// Returns [`Err`] of kind `InvalidMappingOffset` if passed an
/// illegal `offset`
///
pub fn read_int<T: Serializable>(&self, offset: usize) -> Result<T> {
self.check_offset(offset + mem::size_of::<T>())?;
unsafe {
Ok(ptr::read_volatile(&self.as_slice()[offset..] as *const _ as *const T))
}
}
/// Write the integer `val` in native byte order at `offset` into the memory allocation
///
/// # Errors
/// Returns [`Err`] of kind `InvalidMappingOffset` if passed an
/// illegal `offset`
///
pub fn write_int<T: Serializable>(&self, offset: usize, val: T) -> Result<()> {
self.check_offset(offset + mem::size_of::<T>())?;
unsafe { ptr::write_volatile(&mut self.as_mut_slice()[offset..] as *mut _ as *mut T, val); }
Ok(())
}
pub fn write_bytes(&self, offset: usize, bytes: &[u8]) -> Result<()> {
self.check_offset(offset + bytes.len())?;
unsafe {
let mut slice: &mut [u8] = &mut self.as_mut_slice()[offset..];
slice.write_all(bytes).map_err(|_| Error::from(ErrorKind::InvalidMappingOffset(offset)))
}
}
pub fn read_bytes(&self, offset: usize, mut bytes: &mut [u8]) -> Result<()> {
self.check_offset(offset + bytes.len())?;
unsafe {
let slice: &[u8] = &self.as_slice()[offset..];
bytes.write(slice).unwrap();
Ok(())
}
}
pub fn slice(&self, offset: usize, size: usize) -> Result<&[u8]> {
self.check_offset(offset + size)?;
unsafe {
let x = &self.as_slice()[offset..offset+size];
Ok(x)
}
}
pub fn mut_slice(&self, offset: usize, size: usize) -> Result<&mut [u8]> {
self.check_offset(offset + size)?;
unsafe {
let x = &mut self.as_mut_slice()[offset..offset+size];
Ok(x)
}
}
#[allow(dead_code)]
pub fn set_mergeable(&self) -> Result<()> {
unsafe {
if libc::madvise(self.ptr as *mut libc::c_void, self.size, libc::MADV_MERGEABLE) == -1 {
return Err(Error::from_last_errno());
}
}
Ok(())
}
unsafe fn as_slice(&self) -> &[u8] {
slice::from_raw_parts(self.ptr, self.size)
}
unsafe fn as_mut_slice(&self) -> &mut [u8] {
slice::from_raw_parts_mut(self.ptr, self.size)
}
}
impl Drop for Mapping {
fn drop(&mut self) {
unsafe {
libc::munmap(self.ptr as *mut libc::c_void, self.size);
}
}
}
unsafe fn mmap_allocate(size: usize, flags: libc::c_int, fd: libc::c_int) -> Result<*mut u8> {
let p = libc::mmap(ptr::null_mut(),
size, libc::PROT_READ|libc::PROT_WRITE,
flags, fd, 0);
if p.is_null() || p == libc::MAP_FAILED {
return Err(Error::from_last_errno());
}
Ok(p as *mut u8)
}

13
src/memory/mod.rs Normal file
View File

@ -0,0 +1,13 @@
mod ram;
mod mmap;
mod address;
pub use self::address::AddressRange;
pub use self::mmap::Mapping;
pub use self::ram::GuestRam;
pub use self::ram::{PCI_MMIO_RESERVED_BASE,HIMEM_BASE};
pub const KVM_KERNEL_LOAD_ADDRESS: u64 = 0x1000000;
pub const KERNEL_CMDLINE_ADDRESS: u64 = 0x20000;
pub const KERNEL_ZERO_PAGE: u64 = 0x7000;

150
src/memory/ram.rs Normal file
View File

@ -0,0 +1,150 @@
use std::sync::Arc;
use std::cmp;
use std::mem;
use memory::Mapping;
use memory::mmap::Serializable;
use memory::AddressRange;
use kvm::Kvm;
use vm::{Result,Error,ErrorKind};
pub const HIMEM_BASE: u64 = (1 << 32);
pub const PCI_MMIO_RESERVED_SIZE: usize = (512 << 20);
pub const PCI_MMIO_RESERVED_BASE: u64 = HIMEM_BASE - PCI_MMIO_RESERVED_SIZE as u64;
#[derive(Clone)]
pub struct GuestRam {
ram_size: usize,
regions: Arc<Vec<MemoryRegion>>,
}
impl GuestRam {
pub fn new(ram_size: usize, kvm: &Kvm) -> Result<GuestRam> {
Ok(GuestRam {
ram_size,
regions: Arc::new(create_regions(kvm, ram_size)?),
})
}
pub fn ram_size(&self) -> usize {
self.ram_size
}
pub fn write_bytes(&self, guest_address: u64, bytes: &[u8]) -> Result<()> {
let region = self.find_region(guest_address, bytes.len())?;
region.write_bytes(guest_address, bytes)
}
pub fn read_bytes(&self, guest_address: u64, bytes: &mut [u8]) -> Result<()> {
let region = self.find_region(guest_address, bytes.len())?;
region.read_bytes(guest_address, bytes)
}
#[allow(dead_code)]
pub fn slice(&self, guest_address: u64, size: usize) -> Result<&[u8]> {
let region = self.find_region(guest_address, size)?;
region.slice(guest_address, size)
}
pub fn mut_slice(&self, guest_address: u64, size: usize) -> Result<&mut[u8]> {
let region = self.find_region(guest_address, size)?;
region.mut_slice(guest_address, size)
}
pub fn write_int<T: Serializable>(&self, guest_address: u64, val: T) -> Result<()> {
let region = self.find_region(guest_address, mem::size_of::<T>())?;
region.write_int(guest_address, val)
}
pub fn read_int<T: Serializable>(&self, guest_address: u64) -> Result<T> {
let region = self.find_region(guest_address, mem::size_of::<T>())?;
region.read_int(guest_address)
}
pub fn is_valid_range(&self, guest_address: u64, size: usize) -> bool {
self.find_region(guest_address, size).is_ok()
}
fn find_region(&self, guest_address: u64, size: usize) -> Result<&MemoryRegion> {
self.regions.iter()
.find(|r| r.contains(guest_address, size))
.ok_or_else(|| Error::from(ErrorKind::InvalidAddress(guest_address)))
}
}
fn add_region(regions: &mut Vec<MemoryRegion>, base: u64, size: usize, kvm: &Kvm) -> Result<()> {
let slot = regions.len();
let mr = MemoryRegion::new(base, size)?;
kvm.add_memory_region(slot, base, mr.mapping.address(), size)
.map_err(|e| Error::new(ErrorKind::RegisterMemoryFailed, e))?;
regions.push(mr);
Ok(())
}
fn create_regions(kvm: &Kvm, ram_size: usize) -> Result<Vec<MemoryRegion>> {
let mut regions = Vec::new();
let lowmem_sz = cmp::min(ram_size, PCI_MMIO_RESERVED_BASE as usize);
add_region(&mut regions, 0, lowmem_sz, &kvm)?;
if lowmem_sz < ram_size {
let himem_sz = ram_size - lowmem_sz;
add_region(&mut regions, HIMEM_BASE, himem_sz, &kvm)?;
}
Ok(regions)
}
struct MemoryRegion {
guest_range: AddressRange,
mapping: Mapping,
}
impl MemoryRegion {
fn new(guest_base: u64, size: usize) -> Result<MemoryRegion> {
Ok(MemoryRegion{
guest_range: AddressRange::new(guest_base, size),
mapping: Mapping::new(size)?,
})
}
fn contains(&self, guest_addr: u64, size: usize) -> bool { self.guest_range.contains(guest_addr, size) }
fn checked_offset(&self, guest_addr: u64, size: usize) -> Result<usize> {
if self.contains(guest_addr, size) {
Ok(self.guest_range.offset_of(guest_addr))
} else {
Err(Error::from(ErrorKind::InvalidAddress(guest_addr)))
}
}
pub fn write_bytes(&self, guest_address: u64, bytes: &[u8]) -> Result<()> {
let offset = self.checked_offset(guest_address, bytes.len())?;
self.mapping.write_bytes(offset, bytes)
}
pub fn read_bytes(&self, guest_address: u64, bytes: &mut [u8]) -> Result<()> {
let offset = self.checked_offset(guest_address, bytes.len())?;
self.mapping.read_bytes(offset, bytes)
}
pub fn slice(&self, guest_address: u64, size: usize) -> Result<&[u8]> {
let offset = self.checked_offset(guest_address, size)?;
self.mapping.slice(offset, size)
}
pub fn mut_slice(&self, guest_address: u64, size: usize) -> Result<&mut [u8]> {
let offset = self.checked_offset(guest_address, size)?;
self.mapping.mut_slice(offset, size)
}
pub fn write_int<T: Serializable>(&self, guest_address: u64, val: T) -> Result<()> {
let offset = self.checked_offset(guest_address, mem::size_of::<T>())?;
self.mapping.write_int(offset, val)
}
pub fn read_int<T: Serializable>(&self, guest_address: u64) -> Result<T> {
let offset = self.checked_offset(guest_address, mem::size_of::<T>())?;
self.mapping.read_int(offset)
}
}

71
src/system/ioctl.rs Normal file
View File

@ -0,0 +1,71 @@
use libc::{self, c_ulong, c_void};
use std::os::unix::io::RawFd;
use vm::{Error,Result};
pub const IOC_SIZEBITS: u64 = 14;
pub const IOC_DIRBITS: u64 = 2;
pub const IOC_NONE: u64 = 0;
pub const IOC_READ: u64 = 2;
pub const IOC_WRITE: u64 = 1;
pub const IOC_RDWR: u64 = IOC_READ | IOC_WRITE;
pub const IOC_NRBITS: u64 = 8;
pub const IOC_TYPEBITS: u64 = 8;
pub const IOC_NRSHIFT: u64 = 0;
pub const IOC_TYPESHIFT: u64 = IOC_NRSHIFT + IOC_NRBITS;
pub const IOC_SIZESHIFT: u64 = IOC_TYPESHIFT + IOC_TYPEBITS;
pub const IOC_DIRSHIFT: u64 = IOC_SIZESHIFT + IOC_SIZEBITS;
pub const IOC_NRMASK: u64 = (1 << IOC_NRBITS) - 1;
pub const IOC_TYPEMASK: u64 = (1 << IOC_TYPEBITS) - 1;
pub const IOC_SIZEMASK: u64 = (1 << IOC_SIZEBITS) - 1;
pub const IOC_DIRMASK: u64 = (1 << IOC_DIRBITS) - 1;
macro_rules! ioc {
($dir:expr, $ty:expr, $nr:expr, $sz:expr) => (
((($dir as u64 & $crate::system::ioctl::IOC_DIRMASK) << $crate::system::ioctl::IOC_DIRSHIFT) |
(($ty as u64 & $crate::system::ioctl::IOC_TYPEMASK) << $crate::system::ioctl::IOC_TYPESHIFT) |
(($nr as u64 & $crate::system::ioctl::IOC_NRMASK) << $crate::system::ioctl::IOC_NRSHIFT) |
(($sz as u64 & $crate::system::ioctl::IOC_SIZEMASK) << $crate::system::ioctl::IOC_SIZESHIFT)) as c_ulong)
}
macro_rules! io {
($ty:expr, $nr:expr) => (ioc!($crate::system::ioctl::IOC_NONE, $ty, $nr, 0))
}
macro_rules! iow {
($ty:expr, $nr:expr, $sz:expr) => (ioc!($crate::system::ioctl::IOC_WRITE, $ty, $nr, $sz))
}
macro_rules! ior {
($ty:expr, $nr:expr, $sz:expr) => (ioc!($crate::system::ioctl::IOC_READ, $ty, $nr, $sz))
}
macro_rules! iorw {
($ty:expr, $nr:expr, $sz:expr) => (ioc!($crate::system::ioctl::IOC_RDWR, $ty, $nr, $sz))
}
pub unsafe fn ioctl_with_val(fd: RawFd, request: c_ulong, val: c_ulong) -> Result<u32> {
let ret = libc::ioctl(fd, request, val);
if ret < 0 {
return Err(Error::from_last_errno());
}
Ok(ret as u32)
}
pub unsafe fn ioctl_with_ref<T>(fd: RawFd, request: c_ulong, arg: &T) -> Result<u32> {
let ret = libc::ioctl(fd, request, arg as *const T as *const c_void);
if ret < 0 {
return Err(Error::from_last_errno());
}
Ok(ret as u32)
}
pub unsafe fn ioctl_with_mut_ref<T>(fd: RawFd, request: c_ulong, arg: &mut T) -> Result<u32> {
let ret = libc::ioctl(fd, request, arg as *mut T as *mut c_void);
if ret < 0 {
return Err(Error::from_last_errno());
}
Ok(ret as u32)
}

2
src/system/mod.rs Normal file
View File

@ -0,0 +1,2 @@
#[macro_use]
pub mod ioctl;

149
src/virtio/bus.rs Normal file
View File

@ -0,0 +1,149 @@
use std::sync::{Arc,RwLock};
use vm::io::IoDispatcher;
use kvm::Kvm;
use memory::{GuestRam,AddressRange};
use super::{VirtioDevice,VirtioDeviceOps,PciIrq};
use super::consts::*;
use super::pci::PciBus;
use vm::Result;
pub struct VirtioBus {
kvm: Kvm,
memory: GuestRam,
io_dispatcher: Arc<IoDispatcher>,
pci_bus: Arc<RwLock<PciBus>>,
devices: Vec<Arc<RwLock<VirtioDevice>>>,
}
impl VirtioBus {
pub fn new(memory: GuestRam, io_dispatcher: Arc<IoDispatcher>, kvm: Kvm) -> VirtioBus {
VirtioBus {
kvm,
memory,
io_dispatcher: io_dispatcher.clone(),
pci_bus: PciBus::new(&io_dispatcher),
devices: Vec::new(),
}
}
pub fn new_virtio_device(&mut self, device_type: u16, ops: Arc<RwLock<VirtioDeviceOps>>) -> VirtioDeviceConfig {
VirtioDeviceConfig::new(self, device_type, ops)
}
pub fn pci_irqs(&self) -> Vec<PciIrq> {
self.pci_bus.read().unwrap().pci_irqs()
}
}
pub struct VirtioDeviceConfig<'a> {
virtio_bus: &'a mut VirtioBus,
device_type: u16,
irq: u8,
kvm: Kvm,
ops: Arc<RwLock<VirtioDeviceOps>>,
mmio: AddressRange,
num_queues: usize,
config_size: usize,
device_class: u16,
features: u64,
}
impl <'a> VirtioDeviceConfig<'a> {
fn new(virtio_bus: &mut VirtioBus, device_type: u16, ops: Arc<RwLock<VirtioDeviceOps>>) -> VirtioDeviceConfig {
let kvm = virtio_bus.kvm.clone();
let mmio = virtio_bus.pci_bus.write().unwrap().allocate_mmio_space(VIRTIO_MMIO_AREA_SIZE);
VirtioDeviceConfig {
virtio_bus,
device_type,
irq: 0,
kvm,
ops,
mmio,
num_queues: 0,
config_size: 0,
features: 0,
device_class: 0x0880,
}
}
pub fn kvm(&self) -> &Kvm { &self.kvm }
pub fn ops(&self) -> Arc<RwLock<VirtioDeviceOps>> {
self.ops.clone()
}
pub fn irq(&self) -> u8 { self.irq }
pub fn common_cfg_mmio(&self) -> AddressRange {
self.mmio.subrange(VIRTIO_MMIO_OFFSET_COMMON_CFG, VIRTIO_MMIO_COMMON_CFG_SIZE).unwrap()
}
pub fn notify_mmio(&self) -> AddressRange {
self.mmio.subrange(VIRTIO_MMIO_OFFSET_NOTIFY, VIRTIO_MMIO_NOTIFY_SIZE).unwrap()
}
pub fn isr_mmio(&self) -> AddressRange {
self.mmio.subrange(VIRTIO_MMIO_OFFSET_ISR, VIRTIO_MMIO_ISR_SIZE).unwrap()
}
pub fn device_cfg_mmio(&self) -> Option<AddressRange> {
if self.config_size > 0 {
Some(self.mmio.subrange(VIRTIO_MMIO_OFFSET_DEV_CFG, self.config_size).unwrap())
} else {
None
}
}
pub fn feature_bits(&self) -> u64 {
self.features
}
pub fn num_queues(&self) -> usize {
self.num_queues
}
#[allow(dead_code)]
pub fn config_size(&self) -> usize {
self.config_size
}
pub fn set_num_queues(&mut self, n: usize) -> &'a mut VirtioDeviceConfig {
self.num_queues = n;
self
}
pub fn set_config_size(&mut self, sz: usize) -> &'a mut VirtioDeviceConfig {
self.config_size = sz;
self
}
pub fn set_device_class(&mut self, cl: u16) -> &'a mut VirtioDeviceConfig {
self.device_class = cl;
self
}
pub fn set_features(&mut self, features: u64) -> &'a mut VirtioDeviceConfig {
self.features = features;
self
}
pub fn register(&mut self) -> Result<()> {
self.create_pci_device();
self.features |= VIRTIO_F_VERSION_1;
//self.features |= VIRTIO_F_EVENT_IDX;
let dev = VirtioDevice::new(self.virtio_bus.memory.clone(), &self)?;
self.virtio_bus.io_dispatcher.register_mmio(self.mmio, dev.clone());
self.virtio_bus.devices.push(dev);
Ok(())
}
fn create_pci_device(&mut self) {
let mut pci_bus = self.virtio_bus.pci_bus.write().unwrap();
let mut pci = pci_bus.create_device(PCI_VENDOR_ID_REDHAT, PCI_VIRTIO_DEVICE_ID_BASE + self.device_type, self.device_class);
pci.add_virtio_caps(self.config_size);
pci.set_mmio_bar(VIRTIO_MMIO_BAR, self.mmio);
self.irq = pci.get_irq();
pci_bus.store_device(pci);
}
}

269
src/virtio/chain.rs Normal file
View File

@ -0,0 +1,269 @@
use std::io::{self,Read,Write};
use memory::GuestRam;
use super::VirtQueue;
use super::vring::Descriptor;
pub struct Chain {
memory: GuestRam,
vq: VirtQueue,
/// Number of remaining descriptors allowed in this chain.
ttl: u16,
/// Current descriptor or `None` if at end of chain
current: Option<Descriptor>,
/// Offset for read/write into current descriptor
offset: usize,
/// Saved head index to place in used ring. Set to `None`
/// after writing to used ring.
head_idx: Option<u16>,
/// Number of bytes written into writeable descriptors
/// in this chain. Will be written into used ring later.
wlen: usize,
}
impl Chain {
pub fn new(memory: GuestRam, vq: VirtQueue, head: u16, ttl: u16) -> Chain {
let first = vq.load_descriptor(head);
Chain {
memory,
vq, ttl, head_idx: Some(head),
current: first,
offset: 0, wlen: 0,
}
}
/// Applies a function to the current descriptor (if `Some`) or
/// returns default parameter `d` (if `None`).
pub fn with_current_descriptor<U,F>(&self, d: U, f: F) -> U
where F: FnOnce(&Descriptor) -> U {
match self.current {
Some(ref desc) => f(desc),
None => d,
}
}
/// Load and return next descriptor from chain.
///
/// If `self.current`
///
/// 1) holds a descriptor (`self.current.is_some()`)
/// 2) that descriptor has a next field (`desc.has_next()`)
/// 3) time-to-live is not zero (`self.ttl > 0`)
///
/// then load and return the descriptor pointed to by the current
/// descriptor. Returns `None` otherwise.
///
fn next_desc(&self) -> Option<Descriptor> {
self.with_current_descriptor(None, |desc| {
if desc.has_next() && self.ttl > 0 {
self.vq.load_descriptor(desc.next)
} else {
None
}
})
}
/// Load next descriptor in chain into `self.current`.
///
/// Set `self.current` to the next descriptor in chain or `None` if
/// at end of chain.
///
pub fn load_next_descriptor(&mut self) {
self.current = self.next_desc();
// Only decrement ttl if a new descriptor was loaded
if self.current.is_some() {
self.ttl -= 1;
}
self.offset = 0;
}
///
/// Return `true` if current descriptor exists and is readable, otherwise
/// `false`.
///
pub fn is_current_readable(&self) -> bool {
self.with_current_descriptor(false, |desc| !desc.is_write())
}
///
/// If `current` is a writeable descriptor, keep loading new descriptors until
/// a readable descriptor is found or end of chain is reached. After this
/// call `current` will either be a readable descriptor or `None` if the
/// end of chain was reached.
///
pub fn skip_readable(&mut self) {
while self.is_current_readable() {
self.load_next_descriptor();
}
}
/// Return `true` if the end of the descriptor chain has been reached.
///
/// When at end of chain `self.current` is `None`.
pub fn is_end_of_chain(&self) -> bool {
self.current.is_none()
}
///
/// Length field of current descriptor is returned or 0 if
/// at end of chain.
///
fn current_size(&self) -> usize {
self.with_current_descriptor(0, |desc| desc.len as usize)
}
///
/// Increment `self.offset` with the number of bytes
/// read or written from `current` descriptor and
/// load next descriptor if `current` descriptor
/// has been fully consumed.
///
fn inc_offset(&mut self, sz: usize) {
self.offset += sz;
if self.offset >= self.current_size() {
self.load_next_descriptor();
}
}
///
/// Read from the `current` readable descriptor and return
/// the number of bytes read.
///
/// If this read exhausts the `current` descriptor then the
/// next descriptor in chain will be loaded into `current`.
///
/// Assumes that current is a readable descriptor so caller must
/// call `self.is_current_readable()` before calling this.
///
fn read_current(&mut self, bytes: &mut[u8]) -> usize {
assert!(self.is_current_readable());
let nread = self.with_current_descriptor(0, |desc| {
desc.read_from(&self.memory, self.offset, bytes)
});
self.inc_offset(nread);
nread
}
///
/// Write into the `current` writeable descriptor if it exists
/// and return the number of bytes read or 0 if at end of chain.
///
/// If this write exausts the `current` descriptor then the
/// next descriptor in chain will be loaded into `current`
///
/// Assumes that `current` is a writeable descriptor or `None`
/// so caller must call `self.skip_readable()` before calling this.
///
fn write_current(&mut self, bytes: &[u8]) -> usize {
assert!(!self.is_current_readable());
let sz = self.with_current_descriptor(0, |desc| {
desc.write_to(&self.memory, self.offset, bytes)
});
self.inc_offset(sz);
sz
}
///
/// Write this chain head index (`self.head_idx`) and bytes written (`self.wlen`)
/// into used ring. Consumes `self.head_idx` so that used ring cannot
/// accidentally be written more than once. Since we have returned this
/// chain to the guest, it is no longer valid to access any descriptors in
/// this chain so `self.current` is set to `None`.
///
pub fn flush_chain(&mut self) {
match self.head_idx {
Some(idx) => self.vq.put_used(idx, self.wlen as u32),
None => (),
}
self.current = None;
self.head_idx = None;
}
pub fn current_write_address(&mut self, size: usize) -> Option<u64> {
self.skip_readable();
self.with_current_descriptor(None, |desc| {
if desc.len as usize - self.offset < size {
None
} else {
Some(desc.addr + self.offset as u64)
}
})
}
pub fn get_wlen(&self) -> usize {
self.wlen
}
#[allow(dead_code)]
pub fn debug(&self) {
self.with_current_descriptor((), |desc| {
println!("offset: {} desc: {:?}", self.offset, desc);
});
}
pub fn copy_from_reader<R: Read+Sized>(&mut self, r: R, size: usize) -> io::Result<usize> {
self.skip_readable();
assert!(!self.is_current_readable());
let res = self.with_current_descriptor(Ok(0usize), |desc| {
desc.write_from_reader(&self.memory, self.offset,r, size)
});
if let Ok(nread) = res {
self.inc_offset(nread);
self.wlen += nread;
}
res
}
/*
pub fn copy_to_writer<W: Write+Sized>(&mut self, w: W, size: usize) -> io::Result<usize> {
unimplemented!()
}
*/
}
impl Drop for Chain {
fn drop(&mut self) {
self.flush_chain();
}
}
impl Read for Chain {
// nb: does not fail, but can read short
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
let mut nread = 0usize;
while self.is_current_readable() && nread < buf.len() {
nread += self.read_current(&mut buf[nread..]);
}
Ok(nread)
}
}
impl Write for Chain {
// nb: does not fail, but can write short
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
self.skip_readable();
let mut nwrote = 0usize;
while !self.is_end_of_chain() && nwrote < buf.len() {
nwrote += self.write_current(&buf[nwrote..]);
}
self.wlen += nwrote;
Ok(nwrote)
}
fn flush(&mut self) -> io::Result<()> {
Ok(())
}
}

132
src/virtio/config.rs Normal file
View File

@ -0,0 +1,132 @@
use memory::GuestRam;
use std::sync::Arc;
use vm::Result;
use super::VirtQueue;
use super::eventfd::IoEventFd;
use super::vring::Vring;
use super::virtqueue::InterruptLine;
use super::bus::VirtioDeviceConfig;
use super::consts::DEFAULT_QUEUE_SIZE;
///
/// Manages a set of virtqueues during device intitialization.
///
pub struct VirtQueueConfig {
num_queues: usize,
selected_queue: u16,
enabled_features: u64,
vrings: Vec<Vring>,
interrupt: Arc<InterruptLine>,
events: Vec<Arc<IoEventFd>>,
}
impl VirtQueueConfig {
pub fn new(memory: &GuestRam, dev_config: &VirtioDeviceConfig) -> Result<VirtQueueConfig> {
Ok(VirtQueueConfig {
num_queues: dev_config.num_queues(),
selected_queue: 0,
enabled_features: 0,
vrings: create_vrings(memory,dev_config.num_queues()),
interrupt: InterruptLine::from_config(&dev_config)?,
events: create_ioeventfds(&dev_config)?,
})
}
pub fn isr_read(&self) -> u64 {
self.interrupt.isr_read()
}
pub fn notify_config(&self) {
self.interrupt.notify_config();
}
pub fn enable_features(&mut self, features: u64) {
self.enabled_features = features;
}
pub fn reset(&mut self) {
self.selected_queue = 0;
let _ = self.interrupt.isr_read();
for vr in &mut self.vrings {
vr.reset();
}
}
pub fn num_queues(&self) -> u16 {
self.num_queues as u16
}
pub fn selected_queue(&self) -> u16 {
self.selected_queue
}
pub fn select_queue(&mut self, q: u16) {
self.selected_queue = q;
}
pub fn with_vring<U,F>(&self, d: U, f: F) -> U
where F: FnOnce(&Vring) -> U
{
match self.vrings.get(self.selected_queue as usize) {
Some(vr) => f(vr),
None => d,
}
}
pub fn with_vring_mut<F>(&mut self, f: F)
where F: FnOnce(&mut Vring)
{
match self.vrings.get_mut(self.selected_queue as usize) {
Some(vr) => if !vr.is_enabled() { f(vr) },
None => (),
}
}
pub fn vring_get_size(&self) -> u16 { self.with_vring(0, |vr| vr.size() ) }
pub fn vring_set_size(&mut self, sz: u16) { self.with_vring_mut(|vr| vr.set_size(sz)) }
pub fn vring_enable(&mut self) { self.with_vring_mut(|vr| vr.enable() ) }
pub fn vring_is_enabled(&self) -> bool { self.with_vring(false, |vr| vr.is_enabled() ) }
pub fn notify(&self, vq: u16) {
match self.events.get(vq as usize) {
Some(ref ev) => ev.write(1).expect("ioeventfd write failed in notify"),
None => (),
}
}
fn create_vq(&self, memory: &GuestRam, idx: usize) -> Result<VirtQueue> {
let vring = self.vrings[idx].clone();
vring.validate()?;
Ok(VirtQueue::new(memory.clone(), vring, self.interrupt.clone(), self.events[idx].clone()))
}
pub fn create_queues(&self, memory: &GuestRam) -> Result<Vec<VirtQueue>> {
let mut v = Vec::with_capacity(self.num_queues);
for i in 0..self.num_queues {
v.push(self.create_vq(memory, i)?);
}
Ok(v)
}
}
fn create_ioeventfds(conf: &VirtioDeviceConfig) -> Result<Vec<Arc<IoEventFd>>> {
let mut v = Vec::with_capacity(conf.num_queues());
let notify_base = conf.notify_mmio().base();
for i in 0..conf.num_queues() {
let evt = IoEventFd::new(conf.kvm(), notify_base + (4 * i as u64))?;
v.push(Arc::new(evt));
}
Ok(v)
}
fn create_vrings(memory: &GuestRam, n: usize) -> Vec<Vring> {
let mut v = Vec::with_capacity(n);
for _ in 0..n {
v.push(Vring::new(memory.clone(), DEFAULT_QUEUE_SIZE));
}
v
}

120
src/virtio/consts.rs Normal file
View File

@ -0,0 +1,120 @@
// Maximum number of logical devices on a PCI bus
pub const PCI_MAX_DEVICES: usize = 32;
// IO Port addresses for PCI configuration access
pub const PCI_CONFIG_ADDRESS: u16 = 0xcf8;
pub const PCI_CONFIG_DATA: u16 = 0xcfc;
// Vendor specific PCI capabilities
pub const PCI_CAP_ID_VENDOR: u8 = 0x09;
pub const PCI_CONFIG_SPACE_SIZE: usize = 256;
pub const PCI_CAP_BASE_OFFSET: usize = 0x40;
pub const PCI_VENDOR_ID: usize = 0x00;
pub const PCI_DEVICE_ID: usize = 0x02;
pub const PCI_COMMAND: usize = 0x04;
pub const PCI_COMMAND_IO: u16 = 0x01;
pub const PCI_COMMAND_MEMORY: u16 = 0x02;
pub const PCI_COMMAND_INTX_DISABLE: u16 = 0x400;
pub const PCI_STATUS: usize = 0x06;
pub const PCI_STATUS_CAP_LIST: u16 = 0x10;
pub const PCI_CLASS_REVISION: usize = 0x08;
pub const PCI_CLASS_DEVICE: usize = 0x0a;
pub const PCI_CACHE_LINE_SIZE: usize = 0x0c;
pub const PCI_LATENCY_TIMER: usize = 0x0d;
pub const _PCI_SUBSYSTEM_VENDOR_ID: usize = 0x2c;
pub const PCI_SUBSYSTEM_ID: usize = 0x2e;
pub const PCI_CAPABILITY_LIST: usize = 0x34;
pub const PCI_INTERRUPT_LINE: usize = 0x3C;
pub const PCI_INTERRUPT_PIN: usize = 0x3D;
// Virtio PCI capability types
pub const VIRTIO_PCI_CAP_COMMON_CFG : u8 = 1;
pub const VIRTIO_PCI_CAP_NOTIFY_CFG : u8 = 2;
pub const VIRTIO_PCI_CAP_ISR_CFG : u8 = 3;
pub const VIRTIO_PCI_CAP_DEVICE_CFG : u8 = 4;
// Indicates that no MSIX vector is configured
pub const VIRTIO_NO_MSI_VECTOR: u16 = 0xFFFF;
// Bar number 0 is used for Virtio MMIO area
pub const VIRTIO_MMIO_BAR: usize = 0;
// Virtio MMIO area is one page
pub const VIRTIO_MMIO_AREA_SIZE: usize = 4096;
// Offsets and sizes for each structure in MMIO area
pub const VIRTIO_MMIO_OFFSET_COMMON_CFG : usize = 0; // Common configuration offset
pub const VIRTIO_MMIO_OFFSET_ISR : usize = 56; // ISR register offset
pub const VIRTIO_MMIO_OFFSET_NOTIFY : usize = 0x400; // Notify area offset
pub const VIRTIO_MMIO_OFFSET_DEV_CFG : usize = 0x800; // Device specific configuration offset
pub const VIRTIO_MMIO_COMMON_CFG_SIZE: usize = 56; // Common configuration size
pub const VIRTIO_MMIO_NOTIFY_SIZE : usize = 0x400; // Notify area size
pub const VIRTIO_MMIO_ISR_SIZE : usize = 4; // ISR register size
// Common configuration header offsets
pub const VIRTIO_PCI_COMMON_DFSELECT : usize = 0;
pub const VIRTIO_PCI_COMMON_DF : usize = 4;
pub const VIRTIO_PCI_COMMON_GFSELECT : usize = 8;
pub const VIRTIO_PCI_COMMON_GF : usize = 12;
pub const VIRTIO_PCI_COMMON_MSIX : usize = 16;
pub const VIRTIO_PCI_COMMON_NUMQ : usize = 18;
pub const VIRTIO_PCI_COMMON_STATUS : usize = 20;
pub const VIRTIO_PCI_COMMON_CFGGENERATION : usize = 21;
pub const VIRTIO_PCI_COMMON_Q_SELECT : usize = 22;
pub const VIRTIO_PCI_COMMON_Q_SIZE : usize = 24;
pub const VIRTIO_PCI_COMMON_Q_MSIX : usize = 26;
pub const VIRTIO_PCI_COMMON_Q_ENABLE : usize = 28;
pub const VIRTIO_PCI_COMMON_Q_NOFF : usize = 30;
pub const VIRTIO_PCI_COMMON_Q_DESCLO : usize = 32;
pub const VIRTIO_PCI_COMMON_Q_DESCHI : usize = 36;
pub const VIRTIO_PCI_COMMON_Q_AVAILLO : usize = 40;
pub const VIRTIO_PCI_COMMON_Q_AVAILHI : usize = 44;
pub const VIRTIO_PCI_COMMON_Q_USEDLO : usize = 48;
pub const VIRTIO_PCI_COMMON_Q_USEDHI : usize = 52;
// Common configuration status bits
pub const _VIRTIO_CONFIG_S_ACKNOWLEDGE : u8 = 1;
pub const _VIRTIO_CONFIG_S_DRIVER : u8 = 2;
pub const VIRTIO_CONFIG_S_DRIVER_OK : u8 = 4;
pub const VIRTIO_CONFIG_S_FEATURES_OK : u8 = 8;
pub const VIRTIO_CONFIG_S_NEEDS_RESET : u8 = 0x40;
pub const _VIRTIO_CONFIG_S_FAILED : u8 = 0x80;
pub const _VRING_USED_F_NO_NOTIFY: u16 = 1;
pub const _VRING_AVAIL_F_NO_INTERRUPT: u16 = 1;
pub const _VIRTIO_F_INDIRECT_DESC: u64 = (1 << 28);
pub const VIRTIO_F_EVENT_IDX: u64 = (1 << 29);
pub const VIRTIO_F_VERSION_1: u64 = (1 << 32);
pub const VRING_DESC_F_NEXT: u16 = 1;
pub const VRING_DESC_F_WRITE: u16 = 2;
pub const VRING_DESC_F_INDIRECT: u16 = 4;
pub const DEFAULT_QUEUE_SIZE: u16 = 128;
pub const MAX_QUEUE_SIZE: u16 = 1024;
// PCI Vendor id for Virtio devices
pub const PCI_VENDOR_ID_REDHAT: u16 = 0x1af4;
// Base PCI device id for Virtio devices
pub const PCI_VIRTIO_DEVICE_ID_BASE: u16 = 0x1040;
pub const PCI_VENDOR_ID_INTEL: u16 = 0x8086;
pub const PCI_CLASS_BRIDGE_HOST: u16 = 0x0600;

228
src/virtio/device.rs Normal file
View File

@ -0,0 +1,228 @@
use std::sync::{Arc,RwLock};
use std::ops::DerefMut;
use memory::{GuestRam,AddressRange};
use super::bus::VirtioDeviceConfig;
use super::VirtQueue;
use super::config::VirtQueueConfig;
use super::consts::*;
use vm::io::MmioOps;
use vm::Result;
pub trait VirtioDeviceOps: Send+Sync {
fn reset(&mut self) {}
fn enable_features(&mut self, bits: u64) -> bool { let _ = bits; true }
fn write_config(&mut self, offset: usize, size: usize, val: u64) { let (_,_,_) = (offset, size, val); }
fn read_config(&mut self, offset: usize, size: usize) -> u64 { let (_,_) = (offset, size); 0 }
fn start(&mut self, memory: GuestRam, queues: Vec<VirtQueue>);
}
pub struct VirtioDevice {
memory: GuestRam,
vq_config: VirtQueueConfig,
common_cfg_mmio: AddressRange,
isr_mmio: AddressRange,
notify_mmio: AddressRange,
device_cfg_mmio: Option<AddressRange>,
device_ops: Arc<RwLock<VirtioDeviceOps>>,
dfselect: u32,
gfselect: u32,
device_features: u64,
guest_features: u64,
status: u8,
}
const MASK_LOW_32: u64 = (1u64 << 32) - 1;
const MASK_HI_32: u64 = MASK_LOW_32 << 32;
fn set_lo32(val: &mut u64, low32: u32) { *val = (*val & MASK_HI_32) | (low32 as u64) }
fn set_hi32(val: &mut u64, hi32: u32) { *val = ((hi32 as u64) << 32) | (*val & MASK_LOW_32) }
fn get_lo32(val: u64) -> u32 { val as u32 }
fn get_hi32(val: u64) -> u32 { (val >> 32) as u32 }
impl VirtioDevice {
pub fn new(memory: GuestRam, config: &VirtioDeviceConfig) -> Result<Arc<RwLock<VirtioDevice>>> {
Ok(Arc::new(RwLock::new(VirtioDevice {
memory: memory.clone(),
vq_config: VirtQueueConfig::new(&memory.clone(),&config)?,
common_cfg_mmio: config.common_cfg_mmio(),
isr_mmio: config.isr_mmio(),
notify_mmio: config.notify_mmio(),
device_cfg_mmio: config.device_cfg_mmio(),
device_ops: config.ops(),
dfselect: 0,
gfselect: 0,
device_features: config.feature_bits(),
guest_features: 0,
status: 0,
})))
}
fn reset(&mut self) {
self.dfselect = 0;
self.gfselect = 0;
self.guest_features = 0;
self.status = 0;
self.vq_config.reset();
}
fn status_write(&mut self, val: u8) {
// 4.1.4.3.1 The device MUST reset when 0 is written to device status
if val == 0 {
self.reset();
return;
}
// 2.1.1 The driver MUST NOT clear a device status bit
if self.status & !val != 0 {
return;
}
let new_bits = val & !self.status;
if new_bits & VIRTIO_CONFIG_S_DRIVER_OK != 0 {
match self.vq_config.create_queues(&self.memory) {
Ok(queues) => self.with_ops(|ops| ops.start(self.memory.clone(), queues)),
Err(e) => {
println!("creating virtqueues failed {}", e);
self.status |= VIRTIO_CONFIG_S_NEEDS_RESET;
self.vq_config.notify_config();
return;
}
}
}
if new_bits & VIRTIO_CONFIG_S_FEATURES_OK != 0 {
if !self.with_ops(|ops| ops.enable_features(self.guest_features)) {
self.vq_config.enable_features(self.guest_features);
return;
}
}
self.status |= new_bits;
}
fn common_config_write(&mut self, offset: usize, _size: usize, val: u32) {
match offset {
VIRTIO_PCI_COMMON_DFSELECT => self.dfselect = val,
VIRTIO_PCI_COMMON_GFSELECT => self.gfselect = val,
VIRTIO_PCI_COMMON_GF => {
match self.gfselect {
0 => set_lo32(&mut self.guest_features, val),
1 => set_hi32(&mut self.guest_features, val),
_ => {},
}
// 2.2.1
// The driver MUST NOT accept a feature which the device did
// not offer.
self.guest_features &= self.device_features;
},
VIRTIO_PCI_COMMON_STATUS => self.status_write(val as u8),
VIRTIO_PCI_COMMON_Q_SELECT=> self.vq_config.select_queue(val as u16),
VIRTIO_PCI_COMMON_Q_SIZE => self.vq_config.vring_set_size(val as u16),
VIRTIO_PCI_COMMON_Q_ENABLE=> if val == 1 { self.vq_config.vring_enable() } ,
VIRTIO_PCI_COMMON_Q_DESCLO=> self.vq_config.with_vring_mut(|vr| set_lo32(&mut vr.descriptors, val)),
VIRTIO_PCI_COMMON_Q_DESCHI=> self.vq_config.with_vring_mut(|vr| set_hi32(&mut vr.descriptors, val)),
VIRTIO_PCI_COMMON_Q_AVAILLO=> self.vq_config.with_vring_mut(|vr| set_lo32(&mut vr.avail_ring, val)),
VIRTIO_PCI_COMMON_Q_AVAILHI=> self.vq_config.with_vring_mut(|vr| set_hi32(&mut vr.avail_ring, val)),
VIRTIO_PCI_COMMON_Q_USEDLO=> self.vq_config.with_vring_mut(|vr| set_lo32(&mut vr.used_ring, val)),
VIRTIO_PCI_COMMON_Q_USEDHI=> self.vq_config.with_vring_mut(|vr| set_hi32(&mut vr.used_ring, val)),
_ => {},
}
}
fn common_config_read(&mut self, offset: usize, _size: usize) -> u32 {
match offset {
VIRTIO_PCI_COMMON_DFSELECT => self.dfselect,
VIRTIO_PCI_COMMON_DF=> match self.dfselect {
0 => get_lo32(self.device_features),
1 => get_hi32(self.device_features),
_ => 0,
},
VIRTIO_PCI_COMMON_GFSELECT => { self.gfselect },
VIRTIO_PCI_COMMON_GF => match self.gfselect {
0 => get_lo32(self.guest_features),
1 => get_hi32(self.guest_features),
_ => 0,
},
VIRTIO_PCI_COMMON_MSIX => VIRTIO_NO_MSI_VECTOR as u32,
VIRTIO_PCI_COMMON_NUMQ => self.vq_config.num_queues() as u32,
VIRTIO_PCI_COMMON_STATUS => self.status as u32,
VIRTIO_PCI_COMMON_CFGGENERATION => 0,
VIRTIO_PCI_COMMON_Q_SELECT => self.vq_config.selected_queue() as u32,
VIRTIO_PCI_COMMON_Q_SIZE => self.vq_config.vring_get_size() as u32,
VIRTIO_PCI_COMMON_Q_MSIX => VIRTIO_NO_MSI_VECTOR as u32,
VIRTIO_PCI_COMMON_Q_ENABLE => if self.vq_config.vring_is_enabled() {1} else {0},
VIRTIO_PCI_COMMON_Q_NOFF => self.vq_config.selected_queue() as u32,
VIRTIO_PCI_COMMON_Q_DESCLO => self.vq_config.with_vring(0, |vr| get_lo32(vr.descriptors)),
VIRTIO_PCI_COMMON_Q_DESCHI => self.vq_config.with_vring(0, |vr| get_hi32(vr.descriptors)),
VIRTIO_PCI_COMMON_Q_AVAILLO => self.vq_config.with_vring(0, |vr| get_lo32(vr.avail_ring)),
VIRTIO_PCI_COMMON_Q_AVAILHI => self.vq_config.with_vring(0, |vr| get_hi32(vr.avail_ring)),
VIRTIO_PCI_COMMON_Q_USEDLO => self.vq_config.with_vring(0, |vr| get_lo32(vr.used_ring)),
VIRTIO_PCI_COMMON_Q_USEDHI => self.vq_config.with_vring(0, |vr| get_hi32(vr.used_ring)),
_ => 0,
}
}
fn notify_read(&mut self, _offset: usize, _size: usize) -> u64 {
0
}
fn notify_write(&mut self, offset: usize, _size: usize, _val: u64) {
let vq = (offset / 4) as u16;
self.vq_config.notify(vq);
}
fn isr_read(&mut self) -> u64 {
self.vq_config.isr_read()
}
fn with_ops<U,F>(&self, f: F) -> U
where F: FnOnce(&mut VirtioDeviceOps) -> U {
let mut ops = self.device_ops.write().unwrap();
f(ops.deref_mut())
}
}
impl MmioOps for VirtioDevice {
fn mmio_read(&mut self, address: u64, size: usize) -> u64 {
if self.common_cfg_mmio.contains(address, size) {
let offset = self.common_cfg_mmio.offset_of(address);
self.common_config_read(offset,size) as u64
} else if self.notify_mmio.contains(address, size) {
let offset = self.notify_mmio.offset_of(address);
self.notify_read(offset, size) as u64
} else if self.isr_mmio.contains(address, size) {
self.isr_read()
} else if let Some(ref dev_cfg_mmio) = self.device_cfg_mmio {
let offset = dev_cfg_mmio.offset_of(address);
self.with_ops(|ops| ops.read_config(offset, size))
} else {
0
}
}
fn mmio_write(&mut self, address: u64, size: usize, val: u64) {
if self.common_cfg_mmio.contains(address, size) {
let offset = self.common_cfg_mmio.offset_of(address);
self.common_config_write(offset,size, val as u32)
} else if self.notify_mmio.contains(address, size) {
let offset = self.notify_mmio.offset_of(address);
self.notify_write(offset, size, val)
} else if let Some(ref dev_cfg_mmio) = self.device_cfg_mmio {
let offset = dev_cfg_mmio.offset_of(address);
self.with_ops(|ops| ops.write_config(offset, size, val))
}
}
}

87
src/virtio/eventfd.rs Normal file
View File

@ -0,0 +1,87 @@
use std::sync::Arc;
use std::os::unix::io::RawFd;
use libc;
use vm::{Result,Error,ErrorKind};
use kvm::Kvm;
pub struct EventFd(RawFd);
const U64_SZ: usize = 8;
impl EventFd {
pub fn new() -> Result<EventFd> {
let fd = unsafe { libc::eventfd(0, 0) };
if fd < 0 {
return Err(Error::from_last_errno());
}
Ok(EventFd(fd))
}
pub fn raw_fd(&self) -> RawFd {
self.0
}
pub fn write(&self, v: u64) -> Result<()> {
let ret = unsafe { libc::write(self.0, &v as *const _ as *const libc::c_void, U64_SZ) };
if ret as usize != U64_SZ {
if ret < 0 {
return Err(Error::new(ErrorKind::EventFdError, Error::from_last_errno()));
}
return Err(Error::new(ErrorKind::EventFdError, "write failed"));
}
Ok(())
}
pub fn read(&self) -> Result<u64> {
let mut v = 0u64;
let ret = unsafe { libc::read(self.0, &mut v as *mut _ as *mut libc::c_void, U64_SZ) };
if ret as usize != U64_SZ {
if ret < 0 {
return Err(Error::new(ErrorKind::EventFdError, Error::from_last_errno()));
}
return Err(Error::new(ErrorKind::EventFdError, "read failed"));
}
Ok((v))
}
}
impl Drop for EventFd {
fn drop(&mut self) {
let _ = unsafe { libc::close(self.0) };
}
}
pub struct IoEventFd {
kvm: Kvm,
addr: u64,
evt: Arc<EventFd>
}
impl IoEventFd {
pub fn new(kvm: &Kvm, address: u64) -> Result<IoEventFd> {
let evt = Arc::new(EventFd::new()?);
kvm.ioeventfd_add(address, evt.raw_fd())?;
Ok(IoEventFd {
kvm: kvm.clone(),
addr: address,
evt,
})
}
pub fn read(&self) -> Result<u64> {
self.evt.read()
}
pub fn write(&self, v: u64) -> Result<()> {
self.evt.write(v)
}
}
impl Drop for IoEventFd {
fn drop(&mut self) {
let _ = self.kvm.ioeventfd_del(self.addr, self.evt.raw_fd());
}
}

30
src/virtio/mod.rs Normal file
View File

@ -0,0 +1,30 @@
mod bus;
mod chain;
mod config;
mod consts;
mod device;
mod eventfd;
mod pci;
mod virtqueue;
mod vring;
pub use self::virtqueue::VirtQueue;
pub use self::pci::PciIrq;
pub use self::bus::VirtioBus;
pub use self::device::{VirtioDevice,VirtioDeviceOps};
pub use self::chain::Chain;
use byteorder::{ByteOrder,LittleEndian};
pub fn read_config_buffer(config: &[u8], offset: usize, size: usize) -> u64 {
if offset + size > config.len() {
return 0;
}
match size {
1 => config[offset] as u64,
2 => LittleEndian::read_u16(&config[offset..]) as u64,
4 => LittleEndian::read_u32(&config[offset..]) as u64,
8 => LittleEndian::read_u64(&config[offset..]) as u64,
_ => 0,
}
}

436
src/virtio/pci.rs Normal file
View File

@ -0,0 +1,436 @@
use std::sync::{Arc,RwLock};
use byteorder::{ByteOrder,LittleEndian};
use vm::io::{IoDispatcher,IoPortOps};
use memory::PCI_MMIO_RESERVED_BASE;
use memory::AddressRange;
use super::consts::*;
struct PciConfigAddress(u32);
impl PciConfigAddress {
fn new() -> PciConfigAddress { PciConfigAddress(0) }
fn set(&mut self, n: u32) { self.0 = n }
fn get(&self) -> u32 { self.0 }
fn bus(&self) -> u32 { self.bits(16, 8) }
fn function(&self) -> u32 { self.bits(8, 3) }
fn device(&self) -> usize { self.bits(11, 5) as usize }
fn offset(&self) -> usize { (self.bits(0, 8) & !0x3) as usize }
fn bits(&self, offset: u32, size: u32) -> u32 {
let mask = (1u32 << size) - 1;
(self.0 >> offset) & mask
}
}
pub struct PciIrq {
pci_id: u8,
int_pin: u8,
irq: u8,
}
impl PciIrq {
fn new(pci: &PciDevice) -> PciIrq {
PciIrq {
pci_id: pci.id,
int_pin: 1,
irq: pci.irq,
}
}
pub fn src_bus_irq(&self) -> u8 {
(self.pci_id << 2) | (self.int_pin - 1)
}
pub fn irq_line(&self) -> u8 {
self.irq
}
}
pub struct PciBus {
devices: Vec<Option<PciDevice>>,
mmio_next_alloc: u32,
next_irq: u8,
next_dev: u8,
config_address: PciConfigAddress,
}
impl PciBus {
pub fn new(io: &IoDispatcher) -> Arc<RwLock<PciBus>> {
let bus = Arc::new(RwLock::new(PciBus {
devices: PciBus::create_device_vec(PCI_MAX_DEVICES),
mmio_next_alloc: PCI_MMIO_RESERVED_BASE as u32,
next_irq: 5,
next_dev: 1,
config_address: PciConfigAddress::new(),
}));
io.register_ioports(PCI_CONFIG_ADDRESS, 8, bus.clone());
let pci = PciDevice::new(0, 0, PCI_VENDOR_ID_INTEL, 0, PCI_CLASS_BRIDGE_HOST);
bus.write().unwrap().store_device(pci);
bus
}
pub fn pci_irqs(&self) -> Vec<PciIrq> {
let mut v = Vec::new();
for d in &self.devices {
match *d {
Some(ref dev) => v.push(PciIrq::new(dev)),
None => (),
}
}
v
}
fn allocate_irq(&mut self) -> u8 {
let irq = self.next_irq;
self.next_irq += 1;
irq
}
fn allocate_id(&mut self) -> u8 {
let id = self.next_dev;
self.next_dev += 1;
id
}
pub fn create_device(&mut self, vendor: u16, device: u16, class_id: u16) -> PciDevice {
let irq = self.allocate_irq();
let id = self.allocate_id();
let pci = PciDevice::new(id, irq, vendor, device, class_id);
pci
}
pub fn store_device(&mut self, pci: PciDevice) {
let id = pci.id as usize;
self.devices[id] = Some(pci)
}
fn create_device_vec(sz: usize) -> Vec<Option<PciDevice>> {
let mut v = Vec::with_capacity(sz);
for _ in 0..sz {
v.push(None)
}
v
}
pub fn allocate_mmio_space(&mut self, sz: usize) -> AddressRange {
let mask = (sz - 1) as u32;
let aligned = (self.mmio_next_alloc + mask) & !mask;
self.mmio_next_alloc = aligned + (sz as u32);
AddressRange::new(aligned as u64, sz)
}
fn is_in_range(base: u16, port: u16, len: usize) -> bool {
let end = port + len as u16;
port >= base && end <= (base + 4)
}
fn is_config_address(&self, port: u16, len: usize) -> bool {
PciBus::is_in_range(PCI_CONFIG_ADDRESS, port, len)
}
fn is_config_data(&self, port: u16, len: usize) -> bool {
PciBus::is_in_range(PCI_CONFIG_DATA, port, len)
}
fn config_address_in(&self, _: usize) -> u32 {
self.config_address.get()
}
fn current_config_device(&mut self) -> Option<&mut PciDevice> {
let b = self.config_address.bus();
let d = self.config_address.device();
let f = self.config_address.function();
if b != 0 || f != 0 || d >= self.devices.len() {
return None;
}
self.devices[d].as_mut()
}
fn config_address_out(&mut self, _offset: u16, size: usize, data: u32) {
if size == 4 {
self.config_address.set(data);
}
}
#[allow(dead_code)]
fn valid_config_access(&self, offset: u16, len: usize) -> bool {
(offset as usize) + len <= 4
}
fn config_data_in(&mut self, offset: usize, size: usize) -> u32 {
let off = self.config_address.offset() + offset;
match self.current_config_device() {
Some(dev) => { dev.read_config(off, size)},
None => 0xFFFFFFFF,
}
}
fn config_data_out(&mut self, offset: u16, size: usize, data: u32) {
let off = self.config_address.offset() + offset as usize;
if let Some(dev) = self.current_config_device() {
dev.write_config(off, size,data)
}
}
}
impl IoPortOps for PciBus {
fn io_in(&mut self, port: u16, size: usize) -> u32 {
if self.is_config_address(port, size) {
return self.config_address_in(size)
}
if self.is_config_data(port, size) {
return self.config_data_in((port - PCI_CONFIG_DATA) as usize, size)
}
return 0;
}
fn io_out(&mut self, port: u16, size: usize, val: u32) {
if self.is_config_address(port, size) {
self.config_address_out(port - PCI_CONFIG_ADDRESS,size, val)
}
if self.is_config_data(port, size) {
self.config_data_out(port - PCI_CONFIG_DATA, size, val)
}
}
}
pub struct PciDevice {
next_cap: usize,
last_cap: usize,
id: u8,
irq: u8,
config_buffer: [u8; PCI_CONFIG_SPACE_SIZE],
bar_write_masks: [u32; 6],
}
impl PciDevice {
pub fn new(id: u8, irq: u8, vendor: u16, device: u16, class_id: u16) -> PciDevice {
let mut d = PciDevice {
next_cap: PCI_CAP_BASE_OFFSET,
last_cap: 0,
id,
irq,
config_buffer: [0; PCI_CONFIG_SPACE_SIZE],
bar_write_masks: [0; 6],
};
d.w16(PCI_VENDOR_ID, vendor);
d.w16(PCI_DEVICE_ID, device);
d.w16(PCI_COMMAND, PCI_COMMAND_IO | PCI_COMMAND_MEMORY);
d.w8(PCI_CLASS_REVISION, 1);
d.w16(PCI_CLASS_DEVICE, class_id);
d.w8(PCI_INTERRUPT_PIN, 1);
d.w8(PCI_INTERRUPT_LINE, irq);
d.w16(PCI_SUBSYSTEM_ID, 0x40);
d
}
pub fn get_irq(&self) -> u8 {
self.irq
}
fn is_valid_write(&self, offset: usize, size: usize) -> bool {
if offset + size > PCI_CONFIG_SPACE_SIZE {
return false;
}
// check alignment of write
let mod4 = offset % 4;
match size {
4 if mod4 == 0 => true,
2 if mod4 == 0 || mod4 == 2 => true,
1 => true,
_ => false,
}
}
fn write_bar(&mut self, offset: usize, size: usize, data: u32) {
assert!(is_bar_offset(offset), "not a bar offset in write_bar()");
let bar = offset_to_bar(offset);
let write_mask = self.bar_write_masks[bar];
if write_mask == 0 {
// no writable bits
return;
}
let mod4 = offset % 4;
match size {
4 => self.w32(offset, data),
2 => self.w16(offset+ mod4, data as u16),
1 => self.w8(offset+ mod4, data as u8),
_ => (),
};
// apply write mask to whatever was written
let v = self.r32(offset);
self.w32(offset, v & write_mask);
}
fn write_config(&mut self, offset: usize, size: usize, data: u32) {
if !self.is_valid_write(offset, size) {
return;
}
if is_bar_offset(offset) {
self.write_bar(offset, size, data);
return;
}
match offset {
PCI_COMMAND if size == 2 => self.w16(PCI_COMMAND, data as u16),
PCI_STATUS if size == 2 => self.w16(PCI_STATUS, data as u16),
PCI_CACHE_LINE_SIZE if size == 1 => self.w8(PCI_CACHE_LINE_SIZE, data as u8),
PCI_LATENCY_TIMER if size == 1 => self.w8(PCI_LATENCY_TIMER, data as u8),
_ => (),
}
}
fn w32(&mut self, off: usize, val: u32) { LittleEndian::write_u32(&mut self.config_buffer[off..], val); }
fn w16(&mut self, off: usize, val: u16) { LittleEndian::write_u16(&mut self.config_buffer[off..], val); }
fn w8(&mut self, off: usize, val: u8) { self.config_buffer[off] = val; }
fn r32(&self, off: usize) -> u32 { LittleEndian::read_u32(&self.config_buffer[off..]) }
fn r16(&self, off: usize) -> u16 { LittleEndian::read_u16(&self.config_buffer[off..]) }
fn r8(&self, off: usize) -> u8 { self.config_buffer[off] }
fn read_config(&self, offset: usize, size: usize) -> u32 {
if offset + size > PCI_CONFIG_SPACE_SIZE {
return 0xFFFFFFFF;
}
match size {
1 => self.r8(offset) as u32,
2 => self.r16(offset) as u32,
4 => self.r32(offset),
_ => 0xFFFFFFFF
}
}
#[allow(dead_code)]
pub fn is_irq_disabled(&self) -> bool {
self.r16(PCI_COMMAND) & PCI_COMMAND_INTX_DISABLE != 0
}
pub fn set_mmio_bar(&mut self, bar: usize, range: AddressRange) {
assert!(range.is_naturally_aligned(), "cannot set_mmio_bar() because mmio range is not naturally aligned");
assert!(bar < 5, "bar is invalid value in set_mmio_bar()");
self.bar_write_masks[bar] = !((range.size() as u32) - 1);
self.w32(bar_to_offset(bar), range.base() as u32);
}
pub fn add_virtio_caps(&mut self, config_size: usize) {
self.new_virtio_cap(VIRTIO_PCI_CAP_COMMON_CFG)
.set_mmio_range(VIRTIO_MMIO_OFFSET_COMMON_CFG, VIRTIO_MMIO_COMMON_CFG_SIZE).add(self);
self.new_virtio_cap(VIRTIO_PCI_CAP_ISR_CFG)
.set_mmio_range(VIRTIO_MMIO_OFFSET_ISR, VIRTIO_MMIO_ISR_SIZE).add(self);
self.new_virtio_cap(VIRTIO_PCI_CAP_NOTIFY_CFG)
.set_mmio_range(VIRTIO_MMIO_OFFSET_NOTIFY, VIRTIO_MMIO_NOTIFY_SIZE)
.set_extra_word(4).add(self);
if config_size > 0 {
self.new_virtio_cap(VIRTIO_PCI_CAP_DEVICE_CFG)
.set_mmio_range(VIRTIO_MMIO_OFFSET_DEV_CFG,config_size).add(self);
}
}
pub fn new_virtio_cap(&mut self, vtype: u8) -> VirtioCap {
VirtioCap::new(self.next_cap, vtype)
}
fn inc_cap(&mut self, size: usize) {
let next = self.next_cap as u8;
let last = self.last_cap;
if self.last_cap == 0 {
self.w8(PCI_CAPABILITY_LIST, next);
let status = self.r16(PCI_STATUS) | PCI_STATUS_CAP_LIST;
self.w16(PCI_STATUS, status);
} else {
self.w8(last + 1, next);
}
self.last_cap = self.next_cap;
let aligned = (size + 3) & !3;
self.next_cap += aligned;
}
}
fn is_bar_offset(offset: usize) -> bool {
offset >= 0x10 && offset < 0x28
}
fn bar_to_offset(bar: usize) -> usize {
0x10 + (bar * 4)
}
fn offset_to_bar(offset: usize) -> usize {
assert!(offset >= 0x10 && offset < 0x28, "not a valid bar offset");
(offset - 0x10) / 4
}
pub struct VirtioCap {
offset: usize,
vtype: u8,
size: u8,
mmio_offset: u32,
mmio_len: u32,
extra_word: Option<u32>,
}
impl VirtioCap {
fn new(offset: usize, vtype: u8) -> VirtioCap {
VirtioCap {
vtype,
offset,
size: 16,
mmio_offset: 0,
mmio_len: 0,
extra_word: None,
}
}
pub fn set_mmio_range(&mut self, offset: usize, len: usize) -> &mut VirtioCap {
self.mmio_offset = offset as u32;
self.mmio_len = len as u32;
self
}
pub fn set_extra_word(&mut self, val: u32) -> &mut VirtioCap {
self.size += 4;
self.extra_word = Some(val);
self
}
pub fn add(&mut self, dev: &mut PciDevice) {
/*
* struct virtio_pci_cap {
* u8 cap_vndr; /* Generic PCI field: PCI_CAP_ID_VNDR */
* u8 cap_next; /* Generic PCI field: next ptr. */
* u8 cap_len; /* Generic PCI field: capability length */
* u8 cfg_type; /* Identifies the structure. */
* u8 bar; /* Where to find it. */
* u8 padding[3]; /* Pad to full dword. */
* le32 offset; /* Offset within bar. */
* le32 length; /* Length of the structure, in bytes. */
* };
*/
dev.w8(self.offset, PCI_CAP_ID_VENDOR);
dev.w8(self.offset + 2, self.size);
dev.w8(self.offset + 3, self.vtype);
dev.w8(self.offset + 4, VIRTIO_MMIO_BAR as u8);
if self.mmio_len > 0 {
dev.w32(self.offset + 8, self.mmio_offset);
dev.w32(self.offset + 12, self.mmio_len);
}
if let Some(word) = self.extra_word {
dev.w32(self.offset + 16, word);
}
dev.inc_cap(self.size as usize);
}
}

166
src/virtio/virtqueue.rs Normal file
View File

@ -0,0 +1,166 @@
use std::sync::atomic::{Ordering, AtomicUsize, AtomicBool};
use std::sync::Arc;
use memory::GuestRam;
use kvm::Kvm;
use vm::Result;
use super::eventfd::{EventFd,IoEventFd};
use super::consts::*;
use super::vring::{Vring,Descriptor};
use super::bus::VirtioDeviceConfig;
use super::chain::Chain;
#[derive(Clone)]
pub struct VirtQueue {
memory: GuestRam,
vring: Vring,
features: u64,
ioeventfd: Arc<IoEventFd>,
interrupt: Arc<InterruptLine>,
closed: Arc<AtomicBool>,
}
impl VirtQueue {
pub fn new(memory: GuestRam, vring: Vring, interrupt: Arc<InterruptLine>, ioeventfd: Arc<IoEventFd>) -> VirtQueue {
VirtQueue {
memory,
vring,
features: 0,
ioeventfd,
interrupt,
closed: Arc::new(AtomicBool::new(false)),
}
}
#[allow(dead_code)]
pub fn set_closed(&self) {
self.closed.store(true, Ordering::SeqCst);
self.ioeventfd.write(1).unwrap();
}
#[allow(dead_code)]
pub fn is_closed(&self) -> bool {
self.closed.load(Ordering::SeqCst)
}
fn use_event_idx(&self) -> bool {
self.features & VIRTIO_F_EVENT_IDX != 0
}
pub fn wait_ready(&self) -> Result<()> {
if self.vring.is_empty() {
let _ = self.ioeventfd.read()?;
}
Ok(())
}
pub fn wait_next_chain(&self) -> Result<Chain> {
loop {
self.wait_ready()?;
if let Some(idx) = self.pop_avail_entry() {
return Ok(Chain::new(self.memory.clone(), self.clone(), idx, self.vring.size()));
}
}
}
pub fn on_each_chain<F>(&self, mut f: F)
where F: FnMut(Chain) {
loop {
self.wait_ready().unwrap();
for chain in self.iter() {
f(chain);
}
}
}
pub fn iter(&self) -> QueueIter {
QueueIter { vq: self.clone() }
}
fn need_interrupt(&self, first_used: u16, used_count: usize) -> bool {
if used_count == 0 {
return false;
}
if self.use_event_idx() {
let event = self.vring.read_used_event();
// Minimum count needed to traverse event idx
let span = ((event - first_used) + 1) as usize;
return used_count >= span;
}
!self.vring.read_avail_no_interrupt()
}
pub fn put_used(&self, idx: u16, len: u32) {
let used = self.vring.next_used();
self.vring.put_used(idx, len);
if self.need_interrupt(used, 1) {
self.interrupt.notify_queue();
}
}
fn pop_avail_entry(&self) -> Option<u16> {
if let Some(idx) = self.vring.pop_avail_entry() {
if self.use_event_idx() {
self.vring.write_avail_event(self.vring.next_avail());
}
return Some(idx)
}
None
}
pub fn load_descriptor(&self, idx: u16) -> Option<Descriptor> {
self.vring.load_descriptor(idx)
}
}
pub struct QueueIter {
vq: VirtQueue,
}
impl Iterator for QueueIter {
type Item = Chain;
fn next(&mut self) -> Option<Self::Item> {
self.vq.pop_avail_entry().map(|idx| {
Chain::new(self.vq.memory.clone(),self.vq.clone(),idx, self.vq.vring.size())
})
}
}
pub struct InterruptLine {
irqfd: EventFd,
isr: AtomicUsize,
}
impl InterruptLine {
pub fn from_config(conf: &VirtioDeviceConfig) -> Result<Arc<InterruptLine>> {
InterruptLine::new(conf.kvm(), conf.irq())
}
fn new(kvm: &Kvm, irq: u8) -> Result<Arc<InterruptLine>> {
let irqfd = EventFd::new()?;
kvm.irqfd(irqfd.raw_fd() as u32, irq as u32)?;
Ok(Arc::new(InterruptLine{
irqfd,
isr: AtomicUsize::new(0)
}))
}
pub fn isr_read(&self) -> u64 {
self.isr.swap(0, Ordering::SeqCst) as u64
}
pub fn notify_queue(&self) {
self.isr.fetch_or(0x1, Ordering::SeqCst);
self.irqfd.write(1).unwrap();
}
pub fn notify_config(&self) {
self.isr.fetch_or(0x2, Ordering::SeqCst);
self.irqfd.write(1).unwrap();
}
}

388
src/virtio/vring.rs Normal file
View File

@ -0,0 +1,388 @@
use std::sync::atomic::{self,AtomicUsize,Ordering};
use std::sync::Arc;
use std::fmt;
use std::cmp;
use std::io::{self, Read};
use memory::GuestRam;
use super::consts::*;
use vm::{Result,Error,ErrorKind};
///
/// A convenience wrapper around `AtomicUsize`
///
#[derive(Clone)]
struct SharedIndex(Arc<AtomicUsize>);
impl SharedIndex {
fn new() -> SharedIndex {
SharedIndex(Arc::new(AtomicUsize::new(0)))
}
fn get(&self) -> u16 {
self.0.load(Ordering::SeqCst) as u16
}
fn inc(&self) {
self.0.fetch_add(1, Ordering::SeqCst);
}
fn set(&self, v: u16) {
self.0.store(v as usize, Ordering::SeqCst);
}
}
///
/// Access to the low-level memory structure of a Virtqueue.
///
#[derive(Clone)]
pub struct Vring {
memory: GuestRam,
/// Default queue_size for this virtqueue
default_size: u16,
/// Number of elements in the virtqueue ring
queue_size: u16,
/// Guest address for base of descriptor table
pub descriptors: u64,
/// Guest address for base of available ring
pub avail_ring: u64,
/// Guest address for base of used ring
pub used_ring: u64,
/// Has this virtqueue been enabled?
enabled: bool,
/// The index in the used ring where the next used entry will be placed
next_used_idx: SharedIndex,
/// last seen avail_idx loaded from guest memory
cached_avail_idx: SharedIndex,
/// The index in the avail ring where the next available entry will be read
next_avail: SharedIndex,
}
impl Vring {
pub fn new(memory: GuestRam, default_size: u16) -> Vring {
Vring {
memory,
default_size,
queue_size: default_size,
descriptors:0,
avail_ring: 0,
used_ring: 0,
enabled: false,
next_used_idx: SharedIndex::new(),
cached_avail_idx: SharedIndex::new(),
next_avail: SharedIndex::new(),
}
}
///
/// Set `Vring` into the enabled state.
///
pub fn enable(&mut self) {
self.enabled = true;
}
///
/// Return `true` if this `Vring` has been enabled.
///
pub fn is_enabled(&self) -> bool {
self.enabled
}
///
/// Queue size of this `Vring`
///
pub fn size(&self) -> u16 {
self.queue_size
}
///
/// Set the queue size of this `Vring`. If `sz` is an invalid value
/// ignore the request. It is illegal to change the queue size after
/// a virtqueue has been enabled, so ignore requests if enabled.
///
/// Valid sizes are less than or equal to `MAX_QUEUE_SIZE` and must
/// be a power of 2.
///
pub fn set_size(&mut self, sz: u16) {
if self.enabled || sz > MAX_QUEUE_SIZE || (sz & (sz - 1) != 0) {
return;
}
self.queue_size = sz;
}
///
/// Reset `Vring` to the initial state. `queue_size` is set to the `default_size`
/// and all other fields are cleared. `enabled` is set to false.
///
pub fn reset(&mut self) {
self.queue_size = self.default_size;
self.descriptors = 0;
self.avail_ring = 0;
self.used_ring = 0;
self.enabled = false;
self.next_used_idx.set(0);
self.cached_avail_idx.set(0);
self.next_avail.set(0);
}
///
/// Does `Vring` currently have available entries?
///
/// Queue is empty if `next_avail` is same value as
/// `avail_ring.idx` value in guest memory If `cached_avail_idx`
/// currently matches `next_avail` it is reloaded from
/// memory in case guest has updated field since last
/// time it was loaded.
///
pub fn is_empty(&self) -> bool {
let next_avail = self.next_avail.get();
if self.cached_avail_idx.get() != next_avail {
return false;
}
next_avail == self.load_avail_idx()
}
///
/// Write an entry into the Used ring.
///
/// The entry is written into the ring structure at offset
/// `next_used_idx % queue_size`. The value of `next_used_idx`
/// is then incremented and the new value is written into
/// guest memory into the `used_ring.idx` field.
///
pub fn put_used(&self, idx: u16, len: u32) {
if idx >= self.queue_size {
return;
}
let used_idx = (self.next_used_idx.get() % self.queue_size) as u64;
let elem_addr = self.used_ring + (4 + used_idx * 8);
// write descriptor index to 'next used' slot in used ring
self.memory.write_int(elem_addr, idx as u32).unwrap();
// write length to 'next used' slot in ring
self.memory.write_int(elem_addr + 4, len as u32).unwrap();
self.next_used_idx.inc();
atomic::fence(Ordering::Release);
// write updated next_used
self.memory.write_int(self.used_ring + 2, self.next_used_idx.get()).unwrap();
}
///
/// Load `avail_ring.idx` from guest memory and store it in `cached_avail_idx`.
///
pub fn load_avail_idx(&self) -> u16 {
let avail_idx = self.memory.read_int::<u16>(self.avail_ring + 2).unwrap();
self.cached_avail_idx.set(avail_idx);
avail_idx
}
///
/// Read from guest memory and return the Avail ring entry at
/// index `ring_idx % queue_size`.
///
fn load_avail_entry(&self, ring_idx: u16) -> u16 {
let offset = (4 + (ring_idx % self.queue_size) * 2) as u64;
self.memory.read_int(self.avail_ring + offset).unwrap()
}
///
/// If queue is not empty, read and return the next Avail ring entry
/// and increment `next_avail`. If queue is empty return `None`
///
pub fn pop_avail_entry(&self) -> Option<u16> {
if self.is_empty() {
return None
}
let next_avail = self.next_avail.get();
let avail_entry = self.load_avail_entry(next_avail);
self.next_avail.inc();
Some(avail_entry)
}
pub fn next_avail(&self) -> u16 {
self.next_avail.get() % self.queue_size
}
///
/// Read and return the `used_event` field from the Avail ring.
///
pub fn read_used_event(&self) -> u16 {
let addr = self.avail_ring + 4 + (self.queue_size as u64 * 2);
self.memory.read_int::<u16>(addr).unwrap()
}
///
/// Read the `flags` field from the Avail ring and return `true` if
/// `NO_INTERRUPT` bit is set.
///
pub fn read_avail_no_interrupt(&self) -> bool {
let flags = self.memory.read_int::<u16>(self.avail_ring).unwrap();
flags & 0x01 != 0
}
///
/// Write `val` to the `avail_event` field of Used ring.
///
/// If `val` is not a valid index for this virtqueue this
/// function does nothing.
///
pub fn write_avail_event(&self, val: u16) {
if val > self.queue_size {
return;
}
let addr = self.used_ring + 4 + (self.queue_size as u64 * 8);
self.memory.write_int::<u16>(addr, val).unwrap();
atomic::fence(Ordering::Release);
}
///
/// Set or clear the `NO_NOTIFY` bit in flags field of Used ring
///
#[allow(dead_code)]
pub fn write_used_no_notify(&self, val: bool) {
let flag = if val { 0x1 } else { 0x0 };
self.memory.write_int::<u16>(self.used_ring, flag).unwrap();
}
///
/// Load the descriptor table entry at `idx` from guest memory and return it.
///
pub fn load_descriptor(&self, idx: u16) -> Option<Descriptor> {
if idx >= self.queue_size {
panic!("load_descriptor called with index larger than queue size");
}
let head = self.descriptors + (idx as u64 * 16);
let addr = self.memory.read_int::<u64>(head).unwrap();
let len= self.memory.read_int::<u32>(head + 8).unwrap();
let flags = self.memory.read_int::<u16>(head + 12).unwrap();
let next = self.memory.read_int::<u16>(head + 14).unwrap();
if self.memory.is_valid_range(addr, len as usize) && next < self.queue_size {
return Some(Descriptor::new(idx, addr, len, flags, next));
}
None
}
pub fn next_used(&self) -> u16 {
self.next_used_idx.get()
}
pub fn validate(&self) -> Result<()> {
fn vring_err<T: ToString>(msg: T) -> Result<()> {
Err(Error::new(ErrorKind::InvalidVring, msg.to_string()))
}
if !self.enabled {
return vring_err("vring is not enabled");
}
let qsz = self.queue_size as usize;
let desc_table_sz = 16 * qsz;
let avail_ring_sz = 6 + 2 * qsz;
let used_ring_sz = 6 + 8 * qsz;
if !self.memory.is_valid_range(self.descriptors, desc_table_sz) {
return vring_err(format!("descriptor table range is invalid 0x{:x}", self.descriptors));
}
if !self.memory.is_valid_range(self.avail_ring, avail_ring_sz) {
return vring_err(format!("avail ring range is invalid 0x{:x}", self.avail_ring));
}
if !self.memory.is_valid_range(self.used_ring, used_ring_sz) {
return vring_err(format!("used ring range is invalid 0x{:x}", self.used_ring));
}
Ok(())
}
}
///
/// An entry read from the descriptor table
///
#[derive(Copy,Clone)]
pub struct Descriptor {
pub idx: u16,
pub addr: u64,
pub len: u32,
pub flags: u16,
pub next: u16,
}
impl Descriptor {
fn new(idx: u16, addr: u64, len: u32, flags: u16, next:u16) -> Descriptor {
Descriptor{ idx, addr, len, flags, next }
}
///
/// Test if `flag` is set in `self.flags`
///
fn has_flag(&self, flag: u16) -> bool {
self.flags & flag == flag
}
///
/// Is VRING_DESC_F_NEXT set in `self.flags`?
///
pub fn has_next(&self) -> bool {
self.has_flag(VRING_DESC_F_NEXT)
}
///
/// Is VRING_DESC_F_WRITE set in `self.flags`?
///
pub fn is_write(&self) -> bool {
self.has_flag(VRING_DESC_F_WRITE)
}
///
/// Is VRING_DESC_F_INDIRECT set in `self.flags`?
///
#[allow(dead_code)]
pub fn is_indirect(&self) -> bool {
self.has_flag(VRING_DESC_F_INDIRECT)
}
fn remaining(&self, offset: usize) -> usize {
if offset >= self.len as usize {
0
} else {
self.len as usize - offset
}
}
pub fn read_from(&self, memory: &GuestRam, offset: usize, buf: &mut[u8]) -> usize {
let sz = cmp::min(buf.len(), self.remaining(offset));
if sz > 0 {
memory.read_bytes(self.addr + offset as u64, buf).unwrap();
}
sz
}
pub fn write_to(&self, memory: &GuestRam, offset: usize, buf: &[u8]) -> usize {
let sz = cmp::min(buf.len(), self.remaining(offset));
if sz > 0 {
memory.write_bytes(self.addr + offset as u64, buf).unwrap();
}
sz
}
pub fn write_from_reader<R: Read+Sized>(&self, memory: &GuestRam, offset: usize, mut r: R, size: usize) -> io::Result<usize> {
let sz = cmp::min(size, self.remaining(offset));
if sz > 0 {
let slice = memory.mut_slice(self.addr + offset as u64, sz).unwrap();
return r.read(slice);
}
Ok(0)
}
}
impl fmt::Debug for Descriptor {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "Descriptor{{ idx: {} addr: {:x} len: {} flags: {:x} next: {} }}", self.idx, self.addr, self.len, self.flags, self.next)
}
}

170
src/vm/error.rs Normal file
View File

@ -0,0 +1,170 @@
use std::result;
use std::error;
use std::fmt;
use std::str;
use std::ffi::CStr;
use libc;
pub type Result<T> = result::Result<T, Error>;
#[derive(Debug)]
pub enum ErrorKind {
InvalidAddress(u64),
InvalidMappingOffset(usize),
RegisterMemoryFailed,
ReadKernelFailed,
Interrupted,
InvalidVring,
IoctlFailed(&'static str),
MissingRequiredExtension(u32),
OpenDeviceFailed,
CreateVmFailed,
BadVersion,
EventFdError,
}
impl ErrorKind {
fn as_str(&self) -> &'static str {
match *self {
ErrorKind::InvalidAddress(..) => "Invalid guest memory address",
ErrorKind::InvalidMappingOffset(..) => "Invalid memory mapping offset",
ErrorKind::RegisterMemoryFailed => "Failed to register memory region",
ErrorKind::ReadKernelFailed => "Failed to load kernel from disk",
ErrorKind::Interrupted => "System call interrupted",
ErrorKind::InvalidVring => "Invalid Vring",
ErrorKind::IoctlFailed(..) => "Ioctl failed",
ErrorKind::MissingRequiredExtension(..) => "kernel does not support requred kvm extension",
ErrorKind::OpenDeviceFailed => "could not open /dev/kvm",
ErrorKind::CreateVmFailed => "call to create vm failed",
ErrorKind::BadVersion => "unexpected kvm api version",
ErrorKind::EventFdError => "eventfd error",
}
}
}
impl fmt::Display for ErrorKind {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
ErrorKind::InvalidAddress(addr) => write!(f, "{}: 0x{:x}", self.as_str(), addr),
ErrorKind::InvalidMappingOffset(offset) => write!(f, "{}: 0x{:x}", self.as_str(), offset),
ErrorKind::IoctlFailed(name) => write!(f, "Ioctl {} failed", name),
_ => write!(f, "{}", self.as_str()),
}
}
}
impl From<ErrorKind> for Error {
fn from(kind: ErrorKind) -> Error {
Error { repr: Repr::Simple(kind) }
}
}
enum Repr {
Errno(i32),
Simple(ErrorKind),
General(Box<General>),
}
#[derive(Debug)]
struct General {
kind: ErrorKind,
error: Box<error::Error+Send+Sync>,
}
#[derive(Debug)]
pub struct Error {
repr: Repr,
}
impl Error {
pub fn new<E>(kind: ErrorKind, error: E) -> Error
where E: Into<Box<error::Error+Send+Sync>> {
Self::_new(kind, error.into())
}
fn _new(kind: ErrorKind, error: Box<error::Error+Send+Sync>) -> Error {
Error {
repr: Repr::General(Box::new(General{
kind, error
}))
}
}
pub fn from_last_errno() -> Error {
let errno = unsafe { *libc::__errno_location() };
Error::from_errno(errno)
}
pub fn from_errno(errno: i32) -> Error {
if errno == libc::EINTR {
Error { repr: Repr::Simple(ErrorKind::Interrupted) }
} else {
Error { repr: Repr::Errno(errno) }
}
}
pub fn is_interrupted(&self) -> bool {
match self.repr {
Repr::Simple(ErrorKind::Interrupted) => true,
_ => false,
}
}
}
fn error_string(errno: i32) -> String {
let mut buf = [0 as libc::c_char; 256];
let p = buf.as_mut_ptr();
unsafe {
if libc::strerror_r(errno as libc::c_int, p, buf.len()) < 0 {
panic!("strerror_r failed in error_string");
}
let p = p as *const _;
str::from_utf8(CStr::from_ptr(p).to_bytes()).unwrap().to_owned()
}
}
impl fmt::Debug for Repr {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match *self {
Repr::Errno(ref errno) =>
f.debug_struct("Errno").field("errno", errno)
.field("message", &error_string(*errno)).finish(),
Repr::General(ref c) => f.debug_tuple("General").field(c).finish(),
Repr::Simple(ref kind) => f.debug_tuple("Kind").field(kind).finish(),
}
}
}
impl fmt::Display for Error {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self.repr {
Repr::Errno(errno) => {
let detail = error_string(errno);
write!(f, "{} (errno: {})", detail, errno)
}
Repr::General(ref c) => {
write!(f, "{}: {}", c.kind, c.error)
},
Repr::Simple(ref kind) => kind.fmt(f),
}
}
}
impl error::Error for Error {
fn description(&self) -> &str {
match self.repr {
Repr::Errno(..) => "Errno Error",
Repr::Simple(ref kind) => kind.as_str(),
Repr::General(ref c) => c.error.description(),
}
}
fn cause(&self) -> Option<&error::Error> {
match self.repr {
Repr::Errno(..) => None,
Repr::Simple(..) => None,
Repr::General(ref c) => c.error.cause(),
}
}
}

254
src/vm/io.rs Normal file
View File

@ -0,0 +1,254 @@
use std::sync::{Arc,RwLock,RwLockWriteGuard};
use memory::AddressRange;
pub trait IoPortOps: Send+Sync {
fn io_in(&mut self, port: u16, size: usize) -> u32 {
let (_,_) = (port, size);
0
}
fn io_out(&mut self, port: u16, size: usize, val: u32) {
let (_,_,_) = (port,size,val);
}
}
pub trait MmioOps: Send+Sync {
fn mmio_read(&mut self, address: u64, size: usize) -> u64 {
let (_,_) = (address, size);
0
}
fn mmio_write(&mut self, address: u64, size: usize, val: u64) {
let (_,_,_) = (address, size, val);
}
}
struct IoPortDummy;
impl IoPortOps for IoPortDummy {}
struct IoPortPS2Control;
impl IoPortOps for IoPortPS2Control {
fn io_in(&mut self, _port: u16, _size: usize) -> u32 { 0x02 }
}
struct IoPortFakeI8042(bool);
impl IoPortOps for IoPortFakeI8042 {
fn io_in(&mut self, port: u16, _size: usize) -> u32 {
if port == 0x64 {
0x1
} else if port == 0x61 {
0x20
} else {
0
}
}
fn io_out(&mut self, port: u16, _size: usize, val: u32) {
if port == 0x64 && val == 0xfe && !self.0 {
self.0 = true;
println!("Reset signal!");
}
}
}
struct IoPortEntry {
port: u16,
count: usize,
device: Arc<RwLock<IoPortOps>>,
}
impl IoPortEntry {
fn new(port: u16, count: usize, device: Arc<RwLock<IoPortOps>>) -> IoPortEntry {
IoPortEntry{ port: port, count: count, device: device }
}
fn contains(&self, port: u16) -> bool {
port >= self.port && port < (self.port + self.count as u16)
}
fn io_in(&mut self, port: u16, size: usize) -> u32 {
let mut d = self.device.write().unwrap();
d.io_in(port, size)
}
fn io_out(&mut self, port: u16, size: usize, val: u32) {
let mut d = self.device.write().unwrap();
d.io_out(port, size, val)
}
}
struct MmioEntry {
range: AddressRange,
device: Arc<RwLock<MmioOps>>,
}
impl MmioEntry {
fn new(range: AddressRange, device: Arc<RwLock<MmioOps>>) -> MmioEntry {
MmioEntry{ range, device }
}
fn contains_range(&self, address: u64, length: usize) -> bool {
self.range.contains(address, length)
}
fn read(&mut self, address: u64, size: usize) -> u64 {
self.device.write().unwrap().mmio_read(address, size)
}
fn write(&mut self, address: u64, size: usize, val: u64) {
self.device.write().unwrap().mmio_write(address, size, val)
}
}
pub struct IoDispatcher {
state: RwLock<IoDispatcherState>,
}
impl IoDispatcher {
pub fn new() -> Arc<IoDispatcher> {
Arc::new(IoDispatcher{
state: RwLock::new(IoDispatcherState::new()),
})
}
fn state_mut(&self) -> RwLockWriteGuard<IoDispatcherState> {
self.state.write().unwrap()
}
pub fn register_ioports(&self, port: u16, count: usize, dev: Arc<RwLock<IoPortOps>>) {
self.state_mut().register_ioports(port, count, dev)
}
pub fn register_mmio(&self, range: AddressRange, device: Arc<RwLock<MmioOps>>) {
self.state_mut().register_mmio(range, device);
}
pub fn emulate_io_in(&self, port: u16, size: usize) -> u32 {
self.state_mut().emulate_io_in(port, size)
}
pub fn emulate_io_out(&self, port: u16, size: usize, val: u32) {
self.state_mut().emulate_io_out(port, size, val)
}
pub fn emulate_mmio_read(&self, address: u64, size: usize) -> u64 {
self.state_mut().emulate_mmio_read(address, size)
}
pub fn emulate_mmio_write(&self, address: u64, size: usize, val: u64) {
self.state_mut().emulate_mmio_write(address, size, val)
}
}
struct IoDispatcherState {
last_unhandled_port: u16,
ioport_entries: Vec<IoPortEntry>,
mmio_entries: Vec<MmioEntry>,
}
impl IoDispatcherState {
pub fn new() -> IoDispatcherState {
let mut st = IoDispatcherState {
last_unhandled_port: 0,
ioport_entries: Vec::new(),
mmio_entries: Vec::new(),
};
st.setup_ioports();
st
}
fn register_ioports(&mut self, port: u16, count: usize, dev: Arc<RwLock<IoPortOps>>) {
self.ioport_entries.push(IoPortEntry::new(port, count, dev));
}
fn register_mmio(&mut self, range: AddressRange, device: Arc<RwLock<MmioOps>>) {
self.mmio_entries.push(MmioEntry::new(range, device));
}
fn mmio_for(&mut self, address: u64, size: usize) -> Option<&mut MmioEntry> {
for e in &mut self.mmio_entries {
if e.contains_range(address, size) {
return Some(e);
}
}
None
}
fn ioports_for(&mut self, port: u16) -> Option<&mut IoPortEntry> {
for e in &mut self.ioport_entries {
if e.contains(port) {
return Some(e);
}
}
None
}
fn emulate_io_in(&mut self, port: u16, size: usize) -> u32 {
if let Some(entry) = self.ioports_for(port) {
return entry.io_in(port, size);
}
self.debug_port(port, true);
0
}
fn emulate_io_out(&mut self, port: u16, size: usize, val: u32) {
if let Some(entry) = self.ioports_for(port) {
entry.io_out(port, size as usize, val);
return;
}
self.debug_port(port, false);
}
fn debug_port(&mut self, port: u16, is_in: bool) {
if self.last_unhandled_port != port {
self.last_unhandled_port = port;
let s = if is_in { "IN" } else { "OUT "};
println!("unhandled io {} on port {:x}", s, port);
}
}
pub fn emulate_mmio_write(&mut self, address: u64, size: usize, val: u64) {
match self.mmio_for(address, size) {
Some(d) => { d.write(address, size, val) },
None => { println!("unhandled mmio write on address {:x}", address) }
}
}
fn emulate_mmio_read(&mut self, address: u64, size: usize) -> u64 {
match self.mmio_for(address, size) {
Some(d) => { d.read(address, size) },
None => { println!("unhandled mmio read on address {:x}", address); 0 }
}
}
fn register_dummy(&mut self, port: u16, count: usize) {
self.register_ioports(port, count, Arc::new(RwLock::new(IoPortDummy)));
}
fn setup_ioports(&mut self) {
/* 0000 - 001F - DMA1 controller */
self.register_dummy(0x0000, 32);
/* 0020 - 003F - 8259A PIC 1 */
self.register_dummy(0x0020, 2);
/* 0060 - 0068 - i8042 */
self.register_ioports(0x0060, 8, Arc::new(RwLock::new(IoPortFakeI8042(false))));
/* 0040 - 005F - PIT (8253,8254) */
self.register_dummy(0x0040, 4);
/* 0092 - PS/2 system control port A */
self.register_ioports(0x0092, 1, Arc::new(RwLock::new(IoPortPS2Control)));
/* 00A0 - 00AF - 8259A PIC 1 */
self.register_dummy(0x00A0, 2);
/* 00C0 - 00CF - DMA1 controller */
self.register_dummy(0x00C0, 32);
/* 00F0 - 00FF - Math co-processor */
self.register_dummy(0x00F0, 2);
/* 0278 - 027A - Parallel printer port */
self.register_dummy(0x0278, 3);
/* 0378 - 037A - Parallel printer port */
self.register_dummy(0x0378, 3);
/* 03D4 - 03D5 - CRT Control registers */
self.register_dummy(0x03D4, 2);
}
}

101
src/vm/kernel_cmdline.rs Normal file
View File

@ -0,0 +1,101 @@
use std::ffi::OsString;
use std::os::unix::ffi::OsStrExt;
use memory::{GuestRam,KERNEL_CMDLINE_ADDRESS};
use super::Result;
fn add_defaults(cmdline: &mut KernelCmdLine, rdonly_root: bool, verbose: bool) {
let root_mount_type = if rdonly_root { "ro" } else { "rw" };
let output = if verbose {"earlyprintk=serial"} else {"quiet"};
cmdline
.push("noapic")
.push("noacpi")
// keyboard reboot
.push("reboot=k")
.push_set_true("panic")
.push_set_val("tsc", "reliable")
.push("no_timer_check")
// faster rcu updates
.push_set_true("rcuupdate.rcu_expedited")
// then restore to normal after booting
.push_set_true("rcuupdate.rcu_normal_after_boot")
.push_set_val("console", "hvc0")
.push(root_mount_type)
.push_set_val("rootfstype", "9p")
.push_set_val("rootflags", "trans=virtio,version=9p2000.L,cache=loose")
.push_set_true("i8042.direct")
.push_set_true("i8042.dumbkbd")
.push_set_true("i8042.nopnp")
.push_set_true("i8042.noaux")
.push("noreplace-smp")
//.push("initcall_debug")
.push_set_val("iommu", "off")
.push("cryptomgr.notests")
.push(output)
.push_set_val("8250.nr_uarts", "0")
//.push_set_val("init", "/home/user/virt/init");
.push_set_val("init", "/phinit");
}
pub struct KernelCmdLine {
address: u64,
buffer: OsString,
}
impl KernelCmdLine {
pub fn new() -> KernelCmdLine {
KernelCmdLine { address: KERNEL_CMDLINE_ADDRESS, buffer: OsString::new() }
}
pub fn new_default(verbose: bool) -> KernelCmdLine {
let mut cmdline = KernelCmdLine::new();
add_defaults(&mut cmdline, true, verbose);
cmdline
}
pub fn push(&mut self, option: &str) -> &mut Self {
if !self.buffer.is_empty() {
self.buffer.push(" ");
}
self.buffer.push(option);
self
}
pub fn push_set_true(&mut self, flag_option: &str) -> &mut Self {
self.push(&format!("{}=1", flag_option))
}
pub fn push_set_val(&mut self, var: &str, val: &str) -> &mut Self {
self.push(&format!("{}={}", var, val))
}
pub fn address(&self) -> u64 {
self.address
}
pub fn size(&self) -> usize {
(&self.buffer).as_bytes().len() + 1
}
pub fn write_to_memory(&self, memory: &GuestRam) -> Result<()> {
let bs = self.buffer.as_bytes();
let len = bs.len();
//println!("Kernel CmdLine: {:?}", self.buffer);
//println!("writing {} command line bytes to 0x{:x}", len + 1, KERNEL_CMDLINE_ADDRESS);
memory.write_bytes(KERNEL_CMDLINE_ADDRESS, bs)?;
memory.write_int(KERNEL_CMDLINE_ADDRESS + len as u64, 0u8)?;
Ok(())
}
}

142
src/vm/mod.rs Normal file
View File

@ -0,0 +1,142 @@
use std::sync::Arc;
use std::thread;
use std::path::{PathBuf,Path};
use std::env;
use self::io::IoDispatcher;
use virtio::VirtioBus;
use devices;
use memory::{GuestRam,KVM_KERNEL_LOAD_ADDRESS};
use kvm::*;
mod run;
pub mod io;
mod setup;
mod error;
mod kernel_cmdline;
pub use self::error::{Result,Error,ErrorKind};
use self::run::KvmRunArea;
use self::kernel_cmdline::KernelCmdLine;
pub struct VmConfig {
ram_size: usize,
ncpus: usize,
kernel_path: PathBuf,
init_path: PathBuf,
}
#[allow(dead_code)]
impl VmConfig {
pub fn new() -> VmConfig {
VmConfig {
ram_size: 256 * 1024 * 1024,
ncpus: 1,
kernel_path: PathBuf::new(),
init_path: PathBuf::new(),
}
}
pub fn ram_size_megs(&mut self, megs: usize) {
self.ram_size = megs * 1024 * 1024;
}
pub fn num_cpus(&mut self, ncpus: usize) {
self.ncpus = ncpus;
}
pub fn kernel_path(&mut self, path: &Path) {
self.kernel_path = path.to_path_buf();
}
pub fn init_path(&mut self, path: &Path) {
self.init_path = path.to_path_buf();
}
}
pub struct Vm {
kvm: Kvm,
memory: GuestRam,
io_dispatcher: Arc<IoDispatcher>,
_virtio: VirtioBus,
}
static REQUIRED_EXTENSIONS: &[u32] = &[
KVM_CAP_IRQCHIP,
KVM_CAP_HLT,
KVM_CAP_USER_MEMORY,
KVM_CAP_SET_TSS_ADDR,
KVM_CAP_EXT_CPUID,
KVM_CAP_IRQ_ROUTING,
KVM_CAP_IRQ_INJECT_STATUS,
KVM_CAP_PIT2,
KVM_CAP_IOEVENTFD,
];
impl Vm {
pub fn open(config: VmConfig) -> Result<Vm> {
let mut kvm = Kvm::open(&REQUIRED_EXTENSIONS)?;
kvm.set_tss_addr(0xFFFbd000)?;
kvm.create_pit2()?;
let memory = GuestRam::new(config.ram_size, &kvm)?;
kvm.create_irqchip()?;
let verbose = env::args().any(|arg| arg == "-v");
let cmdline = KernelCmdLine::new_default(verbose);
cmdline.write_to_memory(&memory)?;
let path = PathBuf::from(&config.kernel_path);
setup::kernel::load_pm_kernel(&memory, &path, cmdline.address(), cmdline.size())?;
let io_dispatch = IoDispatcher::new();
kvm.create_vcpus(config.ncpus)?;
devices::rtc::Rtc::register(io_dispatch.clone());
if verbose {
devices::serial::SerialDevice::register(kvm.clone(),io_dispatch.clone(), 0);
}
let mut virtio = VirtioBus::new(memory.clone(), io_dispatch.clone(), kvm.clone());
devices::VirtioSerial::create(&mut virtio)?;
devices::VirtioRandom::create(&mut virtio)?;
devices::VirtioP9::create(&mut virtio, "/dev/root", "/", &config.init_path)?;
setup::mptable::setup_mptable(&memory, config.ncpus, virtio.pci_irqs())?;
Ok(Vm {
kvm,
memory,
io_dispatcher: io_dispatch,
_virtio: virtio,
})
}
pub fn start(&self) -> Result<()> {
let mut handles = Vec::new();
for vcpu in self.kvm.get_vcpus() {
setup::cpu::setup_protected_mode(&vcpu, KVM_KERNEL_LOAD_ADDRESS + 0x200, &self.memory)?;
let mut run_area = KvmRunArea::new(vcpu, self.io_dispatcher.clone())?;
let h = thread::spawn(move || run_area.run());
handles.push(h);
}
for h in handles {
h.join().expect("...");
}
Ok(())
}
}

203
src/vm/run.rs Normal file
View File

@ -0,0 +1,203 @@
use std::sync::Arc;
use kvm::KvmVcpu;
use memory::Mapping;
use super::Result;
use super::io::IoDispatcher;
const KVM_EXIT_UNKNOWN:u32 = 0;
const KVM_EXIT_IO:u32 = 2;
const KVM_EXIT_MMIO:u32 = 6;
const KVM_EXIT_INTR:u32 = 10;
const KVM_EXIT_SHUTDOWN:u32 = 8;
const KVM_EXIT_INTERNAL_ERROR: u32 = 17;
const KVM_EXIT_SYSTEM_EVENT:u32 = 24;
pub struct KvmRunArea {
vcpu: KvmVcpu,
io: Arc<IoDispatcher>,
mapping: Mapping,
}
pub struct IoExitData {
dir_out: bool,
size: usize,
port: u16,
count: usize,
offset: usize,
}
pub struct MmioExitData {
phys: u64,
size: usize,
write: bool,
}
impl KvmRunArea {
pub fn new(vcpu: KvmVcpu, io_dispatcher: Arc<IoDispatcher>) -> Result<KvmRunArea> {
let size = vcpu.get_vcpu_mmap_size()?;
let mapping = Mapping::new_from_fd(vcpu.raw_fd(), size)?;
Ok(KvmRunArea{
vcpu,
io: io_dispatcher,
mapping,
})
}
fn r8(&self, offset: usize) -> u8 { self.mapping.read_int(offset).unwrap() }
fn r16(&self, offset: usize) -> u16 { self.mapping.read_int(offset).unwrap() }
fn r32(&self, offset: usize) -> u32 { self.mapping.read_int(offset).unwrap() }
fn r64(&self, offset: usize) -> u64 { self.mapping.read_int(offset).unwrap() }
fn w8(&self, offset: usize, val: u8) { self.mapping.write_int(offset, val).unwrap() }
fn w16(&self, offset: usize, val: u16) { self.mapping.write_int(offset, val).unwrap() }
fn w32(&self, offset: usize, val: u32) { self.mapping.write_int(offset, val).unwrap() }
fn w64(&self, offset: usize, val: u64) { self.mapping.write_int(offset, val).unwrap() }
fn exit_reason(&self) -> u32 {
self.r32(8)
}
fn suberror(&self) -> u32 {
self.r32(32)
}
fn get_io_exit(&self) -> IoExitData {
let d = self.r8(32) != 0;
let size = self.r8(33) as usize;
let port = self.r16(34);
let count = self.r32(36) as usize;
let offset = self.r64(40) as usize;
IoExitData{
dir_out: d,
size,
port,
count,
offset,
}
}
fn get_mmio_exit(&self) -> MmioExitData {
let phys = self.r64(32);
let size = self.r32(48) as usize;
assert!(size <= 8);
let write = self.r8(52) != 0;
MmioExitData {
phys, size, write
}
}
pub fn run(&mut self) {
loop {
if let Err(err) = self.vcpu.run() {
if !err.is_interrupted() {
println!("KVM_RUN returned error, bailing: {:?}", err);
return;
}
} else {
self.handle_exit();
}
}
}
fn handle_exit(&mut self) {
match self.exit_reason() {
KVM_EXIT_UNKNOWN => {println!("unknown")},
KVM_EXIT_IO => { self.handle_exit_io() },
KVM_EXIT_MMIO => { self.handle_exit_mmio() },
KVM_EXIT_INTR => { println!("intr")},
KVM_EXIT_SHUTDOWN => { println!("shut");
self.handle_problem();
},
KVM_EXIT_SYSTEM_EVENT => { println!("event")},
KVM_EXIT_INTERNAL_ERROR => {
let sub = self.suberror();
println!("internal error: {}", sub);
println!("{:?}", self.vcpu.get_regs().unwrap());
println!("{:?}", self.vcpu.get_sregs().unwrap());
}
n => { println!("unhandled exit: {}", n);},
}
}
fn handle_problem(&mut self) {
let regs = self.vcpu.get_regs().unwrap();
let sregs = self.vcpu.get_sregs().unwrap();
println!("REGS:\n{:?}", regs);
println!("SREGS:\n{:?}", sregs);
panic!(":(");
}
fn handle_exit_io(&mut self) {
let exit = self.get_io_exit();
if exit.dir_out {
self.handle_exit_io_out(&exit);
} else {
self.handle_exit_io_in(&exit);
}
}
fn handle_exit_io_in(&mut self, exit: &IoExitData) {
for i in 0..exit.count {
let v = self.io.emulate_io_in(exit.port, exit.size);
match exit.size {
1 => self.w8(exit.offset + i, v as u8),
2 => self.w16(exit.offset + i * 2, v as u16),
4 => self.w32(exit.offset + i * 4, v as u32),
_ => {},
}
}
}
fn handle_exit_io_out(&self, exit: &IoExitData) {
for i in 0..exit.count {
let v = match exit.size {
1 => self.r8(exit.offset + i) as u32,
2 => self.r16(exit.offset + i * 2) as u32,
4 => self.r32(exit.offset + i * 4) as u32,
_ => 0,
};
self.io.emulate_io_out(exit.port, exit.size, v);
}
}
fn handle_exit_mmio(&mut self) {
let exit = self.get_mmio_exit();
if exit.write {
self.handle_mmio_write(exit.phys, exit.size)
} else {
self.handle_mmio_read(exit.phys, exit.size)
}
}
fn handle_mmio_write(&self, address: u64, size: usize) {
if let Some(val) = self.data_to_val64(size) {
self.io.emulate_mmio_write(address, size, val)
}
}
fn handle_mmio_read(&self, address: u64, size: usize) {
if size == 1 || size == 2 || size == 4 || size == 8 {
let val = self.io.emulate_mmio_read(address, size);
match size {
1 => self.w8(40, val as u8),
2 => self.w16(40, val as u16),
4 => self.w32(40, val as u32),
8 => self.w64(40, val),
_ => (),
}
}
}
fn data_to_val64(&self, size: usize) -> Option<u64> {
match size {
1 => { Some(self.r8(40) as u64)}
2 => { Some(self.r16(40) as u64)}
4 => { Some(self.r32(40) as u64)}
8 => { Some(self.r64(40))}
_ => { None }
}
}
}

198
src/vm/setup/cpu.rs Normal file
View File

@ -0,0 +1,198 @@
use vm::Result;
use kvm::{KvmVcpu,KvmRegs,KvmFpu, KvmMsrs, KvmSegment};
use memory::{GuestRam,KERNEL_ZERO_PAGE};
const MSR_IA32_SYSENTER_CS: u32 = 0x00000174;
const MSR_IA32_SYSENTER_ESP: u32 = 0x00000175;
const MSR_IA32_SYSENTER_EIP: u32 = 0x00000176;
const MSR_STAR: u32 = 0xc0000081;
const MSR_LSTAR: u32 = 0xc0000082;
const MSR_CSTAR: u32 = 0xc0000083;
const MSR_SYSCALL_MASK: u32 = 0xc0000084;
const MSR_KERNEL_GS_BASE: u32 = 0xc0000102;
const MSR_IA32_TSC: u32 = 0x00000010;
const MSR_IA32_MISC_ENABLE: u32 = 0x000001a0;
const MSR_IA32_MISC_ENABLE_FAST_STRING: u64 = 0x01;
const EBX_CLFLUSH_CACHELINE: u32 = 8; // Flush a cache line size.
const EBX_CLFLUSH_SIZE_SHIFT: u32 = 8; // Bytes flushed when executing CLFLUSH.
const _EBX_CPU_COUNT_SHIFT: u32 = 16; // Index of this CPU.
const EBX_CPUID_SHIFT: u32 = 24; // Index of this CPU.
const _ECX_EPB_SHIFT: u32 = 3; // "Energy Performance Bias" bit.
const _ECX_HYPERVISOR_SHIFT: u32 = 31; // Flag to be set when the cpu is running on a hypervisor.
const _EDX_HTT_SHIFT: u32 = 28; // Hyper Threading Enabled.
fn setup_cpuid(vcpu: &KvmVcpu) -> Result<()> {
let mut cpuid = vcpu.get_supported_cpuid()?;
let cpu_id = 0u32; // first vcpu
for e in &mut cpuid {
match e.function {
0 => {
e.ebx = 0x67627553;
e.ecx = 0x20487020;
e.edx = 0x68706172;
}
1 => {
if e.index == 0 {
e.ecx |= 1<<31;
}
e.ebx = (cpu_id << EBX_CPUID_SHIFT) as u32 |
(EBX_CLFLUSH_CACHELINE << EBX_CLFLUSH_SIZE_SHIFT);
/*
if cpu_count > 1 {
entry.ebx |= (cpu_count as u32) << EBX_CPU_COUNT_SHIFT;
entry.edx |= 1 << EDX_HTT_SHIFT;
}
*/
}
6 => {
e.ecx &= !(1<<3);
}
10 => {
if e.eax > 0 {
let version = e.eax & 0xFF;
let ncounters = (e.eax >> 8) & 0xFF;
if version != 2 || ncounters == 0 {
e.eax = 0;
}
}
}
_ => {}
}
}
vcpu.set_cpuid2(cpuid)?;
Ok(())
}
fn setup_fpu(vcpu: &KvmVcpu) -> Result<()> {
let mut fpu = KvmFpu::new();
fpu.fcw = 0x37f;
fpu.mxcsr = 0x1f80;
vcpu.set_fpu(&fpu)?;
Ok(())
}
fn setup_msrs(vcpu: &KvmVcpu) -> Result<()> {
let mut msrs = KvmMsrs::new();
msrs.add(MSR_IA32_SYSENTER_CS, 0);
msrs.add(MSR_IA32_SYSENTER_ESP, 0);
msrs.add(MSR_IA32_SYSENTER_EIP, 0);
msrs.add(MSR_STAR, 0);
msrs.add(MSR_CSTAR, 0);
msrs.add(MSR_KERNEL_GS_BASE, 0);
msrs.add(MSR_SYSCALL_MASK, 0);
msrs.add(MSR_LSTAR, 0);
msrs.add(MSR_IA32_TSC, 0);
msrs.add(MSR_IA32_MISC_ENABLE, MSR_IA32_MISC_ENABLE_FAST_STRING);
vcpu.set_msrs(&msrs)?;
Ok(())
}
pub fn gdt_entry(flags: u16, base: u32, limit: u32) -> u64 {
((((base as u64) & 0xff000000u64) << (56 - 24)) | (((flags as u64) & 0x0000f0ffu64) << 40) |
(((limit as u64) & 0x000f0000u64) << (48 - 16)) |
(((base as u64) & 0x00ffffffu64) << 16) | ((limit as u64) & 0x0000ffffu64))
}
const BOOT_GDT_OFFSET: usize = 0x500;
const BOOT_IDT_OFFSET: usize = 0x520;
const BOOT_STACK: u64 = 0x8000;
const BOOT_PML4: u64 = 0x9000;
const BOOT_PDPTE: u64 = 0xA000;
const BOOT_PDE: u64 = 0xB000;
const X86_CR0_PE: u64 = 0x1;
const X86_CR0_PG: u64 = 0x80000000;
const X86_CR4_PAE: u64 = 0x20;
const EFER_LME: u64 = 0x100;
fn setup_boot_pagetables(memory: &GuestRam) -> Result<()> {
memory.write_int::<u64>(BOOT_PML4, BOOT_PDPTE | 0x3)?;
memory.write_int::<u64>(BOOT_PDPTE, BOOT_PDE | 0x3)?;
for i in 0..512_u64 {
let entry = (i << 21) | 0x83;
memory.write_int::<u64>(BOOT_PDE + (i * 8), entry)?;
}
Ok(())
}
fn write_gdt_table(table: &[u64], memory: &GuestRam) -> Result<()> {
for i in 0..table.len() {
memory.write_int((BOOT_GDT_OFFSET + i * 8) as u64, table[i])?;
}
Ok(())
}
pub fn setup_pm_sregs(vcpu: &KvmVcpu, memory: &GuestRam) -> Result<()> {
let table = [
gdt_entry(0,0,0),
gdt_entry(0xa09b,0,0xfffff),
gdt_entry(0xc093,0,0xfffff),
gdt_entry(0x808b,0,0xfffff),
];
write_gdt_table(&table, memory)?;
memory.write_int::<u64>(BOOT_IDT_OFFSET as u64, 0u64)?;
let code = KvmSegment::new(0, 0xfffff, 1 * 8, 0xa09b);
let data = KvmSegment::new(0, 0xfffff, 2 * 8, 0xc093);
let tss = KvmSegment::new(0, 0xfffff, 3 * 8, 0x808b);
let mut regs = vcpu.get_sregs()?;
regs.gdt.base = BOOT_GDT_OFFSET as u64;
regs.gdt.limit = 32 - 1;
regs.itd.base = BOOT_IDT_OFFSET as u64;
regs.itd.limit = 8 - 1;
regs.cs = code;
regs.ds = data;
regs.es = data;
regs.fs = data;
regs.gs = data;
regs.ss = data;
regs.tr = tss;
// protected mode
regs.cr0 |= X86_CR0_PE;
regs.efer |= EFER_LME;
setup_boot_pagetables(&memory)?;
regs.cr3 = BOOT_PML4;
regs.cr4 |= X86_CR4_PAE;
regs.cr0 |= X86_CR0_PG;
vcpu.set_sregs(&regs)?;
Ok(())
}
pub fn setup_pm_regs(vcpu: &KvmVcpu, kernel_entry: u64) -> Result<()> {
let mut regs = KvmRegs::new();
regs.rflags = 0x0000000000000002;
regs.rip = kernel_entry;
regs.rsp = BOOT_STACK;
regs.rbp = BOOT_STACK;
regs.rsi = KERNEL_ZERO_PAGE;
vcpu.set_regs(&regs)?;
Ok(())
}
pub fn setup_protected_mode(vcpu: &KvmVcpu, kernel_entry: u64, memory: &GuestRam) -> Result<()> {
setup_cpuid(&vcpu)?;
setup_pm_sregs(&vcpu, memory)?;
setup_pm_regs(&vcpu, kernel_entry)?;
setup_fpu(&vcpu)?;
setup_msrs(&vcpu)?;
Ok(())
}

118
src/vm/setup/kernel.rs Normal file
View File

@ -0,0 +1,118 @@
use std::path::Path;
use std::fs::{File};
use std::io::{self, Read,SeekFrom,Seek};
use byteorder::{LittleEndian,ReadBytesExt};
use memory::{self,GuestRam,KERNEL_ZERO_PAGE};
use vm::{Result,Error,ErrorKind};
// Documentation/x86/boot.txt
const HDR_BOOT_FLAG: u64 = 0x1fe; // u16
const HDR_HEADER: u64 = 0x202; // u32
const HDR_TYPE_LOADER: u64 = 0x210; // u8
const HDR_CMDLINE_PTR: u64 = 0x228; // u32
const HDR_CMDLINE_SIZE: u64 = 0x238; // u32
const HDR_KERNEL_ALIGNMENT: u64 = 0x230; // u32
// Documentation/x86/zero-page.txt
const BOOT_PARAM_E820_ENTRIES: u64 = 0x1e8;
const BOOT_PARAM_E820_MAP: u64 = 0x2d0;
const KERNEL_BOOT_FLAG_MAGIC: u16 = 0xaa55;
const EBDA_START: u64 = 0x0009fc00;
const KERNEL_HDR_MAGIC: u32 = 0x53726448;
const KERNEL_LOADER_OTHER: u8 = 0xff;
const KERNEL_MIN_ALIGNMENT_BYTES: u32 = 0x1000000;
const E820_RAM: u32 = 1;
fn setup_e820(memory: &GuestRam, base: u64) -> Result<()> {
let ram_size = memory.ram_size() as u64;
let mut e820_ranges = Vec::new();
e820_ranges.push((0u64, EBDA_START));
if ram_size < memory::PCI_MMIO_RESERVED_BASE {
e820_ranges.push((memory::KVM_KERNEL_LOAD_ADDRESS, ram_size - memory::KVM_KERNEL_LOAD_ADDRESS));
} else {
e820_ranges.push((memory::KVM_KERNEL_LOAD_ADDRESS, memory::PCI_MMIO_RESERVED_BASE - memory::KVM_KERNEL_LOAD_ADDRESS));
e820_ranges.push((memory::HIMEM_BASE, ram_size - memory::HIMEM_BASE));
}
memory.write_int::<u8>(base + BOOT_PARAM_E820_ENTRIES, e820_ranges.len() as u8)?;
for i in 0..e820_ranges.len() {
let entry_base = base + BOOT_PARAM_E820_MAP + (i as u64 * 20);
memory.write_int::<u64>(entry_base, e820_ranges[i].0)?;
memory.write_int::<u64>(entry_base + 8, e820_ranges[i].1)?;
memory.write_int::<u32>(entry_base + 16, E820_RAM)?;
}
Ok(())
}
fn setup_zero_page(memory: &GuestRam, cmdline_addr: u64, cmdline_size: usize) -> Result<()> {
let base = KERNEL_ZERO_PAGE;
memory.write_int::<u16>(base + HDR_BOOT_FLAG, KERNEL_BOOT_FLAG_MAGIC)?;
memory.write_int::<u32>(base + HDR_HEADER, KERNEL_HDR_MAGIC)?;
memory.write_int::<u8>(base + HDR_TYPE_LOADER, KERNEL_LOADER_OTHER)?;
memory.write_int::<u32>(base + HDR_CMDLINE_PTR, cmdline_addr as u32)?;
memory.write_int::<u32>(base + HDR_CMDLINE_SIZE, cmdline_size as u32)?;
memory.write_int::<u32>(base + HDR_KERNEL_ALIGNMENT, KERNEL_MIN_ALIGNMENT_BYTES)?;
setup_e820(memory, base)
}
pub fn load_pm_kernel(memory: &GuestRam, path: &Path, cmdline_addr: u64, cmdline_size: usize) -> Result<()> {
load_elf_kernel(memory, path).map_err(|_| Error::from(ErrorKind::ReadKernelFailed))?;
setup_zero_page(memory, cmdline_addr, cmdline_size)
}
pub fn load_elf_kernel(memory: &GuestRam, path: &Path) -> io::Result<()> {
let mut f = File::open(&path)?;
f.seek(SeekFrom::Start(32))?;
let phoff = f.read_u64::<LittleEndian>()?;
f.seek(SeekFrom::Current(16))?;
let phnum = f.read_u16::<LittleEndian>()?;
f.seek(SeekFrom::Start(phoff))?;
let mut v = Vec::new();
for _ in 0..phnum {
let hdr = load_phdr(&f)?;
if hdr.p_type == 1 {
v.push(hdr);
}
}
for h in v {
f.seek(SeekFrom::Start(h.p_offset))?;
let slice = memory.mut_slice(memory::KVM_KERNEL_LOAD_ADDRESS + h.p_paddr, h.p_filesz as usize).unwrap();
f.read_exact(slice)?;
}
Ok(())
}
fn load_phdr<R: Read+Sized>(mut r: R) -> io::Result<ElfPhdr> {
let mut phdr: ElfPhdr = Default::default();
phdr.p_type = r.read_u32::<LittleEndian>()?;
phdr.p_flags = r.read_u32::<LittleEndian>()?;
phdr.p_offset = r.read_u64::<LittleEndian>()?;
phdr.p_vaddr = r.read_u64::<LittleEndian>()?;
phdr.p_paddr = r.read_u64::<LittleEndian>()?;
phdr.p_filesz = r.read_u64::<LittleEndian>()?;
phdr.p_memsz = r.read_u64::<LittleEndian>()?;
phdr.p_align = r.read_u64::<LittleEndian>()?;
Ok(phdr)
}
#[derive(Default,Debug)]
struct ElfPhdr {
pub p_type: u32,
pub p_flags: u32,
pub p_offset: u64,
pub p_vaddr: u64,
pub p_paddr: u64,
pub p_filesz: u64,
pub p_memsz: u64,
pub p_align: u64,
}

3
src/vm/setup/mod.rs Normal file
View File

@ -0,0 +1,3 @@
pub mod cpu;
pub mod kernel;
pub mod mptable;

214
src/vm/setup/mptable.rs Normal file
View File

@ -0,0 +1,214 @@
use byteorder::{LittleEndian, WriteBytesExt};
use std::io::Write;
use std::iter;
use memory::GuestRam;
use virtio::PciIrq;
use vm::Result;
const APIC_DEFAULT_PHYS_BASE: u32 = 0xfee00000;
const IO_APIC_DEFAULT_PHYS_BASE: u32 = 0xfec00000;
const MP_PROCESSOR: u8 = 0;
const MP_BUS: u8 = 1;
const MP_IOAPIC: u8 = 2;
const MP_INTSRC: u8 = 3;
const MP_LINTSRC: u8 = 4;
const MP_IRQ_SRC_INT: u8 = 0;
const MP_IRQ_SRC_NMI: u8 = 1;
const MP_IRQ_DEFAULT: u16 = 0;
const MPC_APIC_USABLE: u8 = 0x01;
const KVM_APIC_VER: u8 = 0x14;
const CPU_ENABLED: u8 = 1;
const CPU_BOOTPROCESSOR: u8 = 2;
const CPU_STEPPING: u32 = 0x600;
const CPU_FEATURE_APIC: u32 = 0x200;
const CPU_FEATURE_FPU: u32 = 0x001;
const PCI_BUSID: u8 = 0;
const PCI_BUSTYPE: &[u8] = b"PCI ";
const ISA_BUSID: u8 = 1;
const ISA_BUSTYPE: &[u8] = b"ISA ";
struct Buffer {
vec: Vec<u8>,
count: usize,
}
impl Buffer {
fn new() -> Buffer {
Buffer {
vec: Vec::new(),
count: 0,
}
}
fn write_all_mpc_cpu(&mut self, ncpus: usize) -> &mut Self {
for i in 0..ncpus {
self.write_mpc_cpu(i as u8);
}
self
}
fn write_mpc_cpu(&mut self, cpuid: u8) -> &mut Self {
self.count += 1;
let flag = CPU_ENABLED | if cpuid == 0 { CPU_BOOTPROCESSOR } else { 0 };
let featureflag = CPU_FEATURE_APIC | CPU_FEATURE_FPU;
self.w8(MP_PROCESSOR) // type
.w8(cpuid) // Local APIC number
.w8(KVM_APIC_VER) // APIC version
.w8(flag) // cpuflag
.w32(CPU_STEPPING) // cpufeature
.w32(featureflag) // CPUID feature value
.w32(0).w32(0) // reserved[2]
}
fn write_mpc_ioapic(&mut self, ioapicid: u8) -> &mut Self {
self.count += 1;
self.w8(MP_IOAPIC) // type
.w8(ioapicid) // Local APIC number
.w8(KVM_APIC_VER) // APIC version
.w8(MPC_APIC_USABLE) // flags
.w32(IO_APIC_DEFAULT_PHYS_BASE) // apic addr
}
fn write_mpc_bus(&mut self, busid: u8, bustype: &[u8]) -> &mut Self {
assert!(bustype.len() == 6);
self.count += 1;
self.w8(MP_BUS)
.w8(busid)
.bytes(bustype)
}
fn write_mpc_intsrc(&mut self, ioapicid: u8, srcbusirq: u8, dstirq: u8) -> &mut Self {
self.count += 1;
self.w8(MP_INTSRC)
.w8(MP_IRQ_SRC_INT) // irq type
.w16(MP_IRQ_DEFAULT) // irq flag
.w8(PCI_BUSID) // src bus id
.w8(srcbusirq) // src bus irq
.w8(ioapicid) // dest apic id
.w8(dstirq) // dest irq
}
fn write_all_mpc_intsrc(&mut self, ioapicid: u8, pci_irqs: &Vec<PciIrq>) -> &mut Self {
for irq in pci_irqs {
self.write_mpc_intsrc(ioapicid, irq.src_bus_irq(), irq.irq_line());
}
self
}
fn write_mpc_lintsrc(&mut self, irqtype: u8, dstirq: u8) -> &mut Self {
self.count += 1;
self.w8(MP_LINTSRC)
.w8(irqtype) // irq type
.w16(MP_IRQ_DEFAULT) // irq flag
.w8(ISA_BUSID) // src bus id
.w8(0) // src bus irq
.w8(0) // dest apic id
.w8(dstirq) // dest apid lint
}
fn write_mpf_intel(&mut self, address: u32) -> &mut Self {
let start = self.vec.len();
self.align(16)
.bytes(b"_MP_") // Signature
.w32(address) // Configuration table address
.w8(1) // Our length (paragraphs)
.w8(4) // Specification version
.w8(0) // checksum (offset 10)
.pad(5) // feature1 - feature5
.checksum(start, 16, 10)
}
fn write_mpctable(&mut self, ncpus: u16, body: &Buffer) -> &mut Self {
let len = 44 + body.vec.len();
self.bytes(b"PCMP") // 0 Signature
.w16(len as u16) // 4 length
.w8(4) // 6 Specification version
.w8(0) // 7 checksum
.bytes(b"SUBGRAPH") // 8 oem[8]
.bytes(b"0.1 ") // 16 productid[12]
.w32(0) // 28 oem ptr (0 if not present)
.w16(body.count as u16) // 32 oem size
.w16(ncpus) // 34 oem count
.w32(APIC_DEFAULT_PHYS_BASE) // 36 APIC address
.w32(0) // 40 reserved
.bytes(&body.vec)
.checksum(0, len, 7)
}
fn w8(&mut self, val: u8) -> &mut Self {
self.vec.push(val);
self
}
fn w16(&mut self, data: u16) -> &mut Self {
self.vec.write_u16::<LittleEndian>(data).unwrap();
self
}
fn w32(&mut self, data: u32) -> &mut Self {
self.vec.write_u32::<LittleEndian>(data).unwrap();
self
}
fn bytes(&mut self, data: &[u8]) -> &mut Self {
self.vec.write(data).unwrap();
self
}
fn pad(&mut self, count: usize) -> &mut Self {
if count > 0 {
self.vec.extend(iter::repeat(0).take(count));
}
self
}
fn align(&mut self, n: usize) -> &mut Self {
let aligned = align(self.vec.len(), n);
let padlen = aligned - self.vec.len();
self.pad(padlen)
}
fn checksum(&mut self, start: usize, len: usize, csum_off: usize) -> &mut Self {
{
let slice = &mut self.vec[start..start + len];
let csum = slice.iter().fold(0i32, |acc, &x| acc.wrapping_add(x as i32));
let b = (-csum & 0xFF) as u8;
slice[csum_off] = b;
}
self
}
}
fn align(sz: usize, n: usize) -> usize {
(sz + (n - 1)) & !(n - 1)
}
pub fn setup_mptable(memory: &GuestRam, ncpus: usize, pci_irqs: Vec<PciIrq>) -> Result<()> {
let ioapicid = (ncpus + 1) as u8;
//let address= align(BIOS_BEGIN as usize + BIOS_BIN.len(), 16) as u32;
let mut body = Buffer::new();
let address = 0;
body.write_all_mpc_cpu(ncpus)
.write_mpc_bus(PCI_BUSID, PCI_BUSTYPE)
.write_mpc_bus(ISA_BUSID, ISA_BUSTYPE)
.write_mpc_ioapic(ioapicid)
.write_all_mpc_intsrc(ioapicid, &pci_irqs)
.write_mpc_lintsrc(MP_IRQ_SRC_INT, 0)
.write_mpc_lintsrc(MP_IRQ_SRC_NMI, 1)
.write_mpf_intel(address);
let mut table = Buffer::new();
table.write_mpctable(ncpus as u16, &body);
//memory.write_bytes(address as u64, &table.vec)
memory.write_bytes(address as u64, &table.vec)
}