Skip to content

Commit 61fdd9d

Browse files
committed
feat: enable write-protection on guest memory
UFFD provides an API to enable write-protection for memory ranges tracked by a userfault file descriptor. Detailed information can be found here: https://docs.kernel.org/admin-guide/mm/userfaultfd.html. To use the feature, users need to register the memory region with UFFDIO_REGISTER_MODE_WP. Then, users need to enable explicitly write-protection for sub-ranges of the registered region. Writes in pages within write-protected memory ranges can be handled in one of two ways. In synchronous mode, writes in a protected page will cause kernel to send a write protection event over the userfaultfd. In asynchronous mode, the kernel will automatically handle writes to protected pages by clearing the write-protection bit. Userspace can later observe the write protection bit by looking into the corresponding entry of /proc/<pid>/pagemap. This commit, uncoditionally, enables write protection for guest memory using the asynchronous mode. !NOTE!: asynchronous write protection requires (host) kernel version 6.7 or later). Signed-off-by: Babis Chalios <babis.chalios@e2b.dev>
1 parent 29c025c commit 61fdd9d

4 files changed

Lines changed: 55 additions & 9 deletions

File tree

Cargo.lock

Lines changed: 26 additions & 3 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/vmm/Cargo.toml

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,11 @@ serde_json = "1.0.145"
4747
slab = "0.4.11"
4848
thiserror = "2.0.17"
4949
timerfd = "1.5.0"
50-
userfaultfd = "0.9.0"
50+
userfaultfd = { git = "https://github.com/e2b-dev/userfaultfd-rs", branch = "feat_write_protection", features = [
51+
"linux5_7",
52+
"linux5_13",
53+
"linux6_7"
54+
] }
5155
utils = { path = "../utils" }
5256
uuid = "1.18.1"
5357
vhost = { version = "0.15.0", features = ["vhost-user-frontend"] }

src/vmm/src/lib.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -795,7 +795,8 @@ impl Vmm {
795795
// Use mincore_bitmap to get resident pages at guest page size granularity
796796
let resident_bitmap = vstate::vm::mincore_bitmap(base_addr as *mut u8, len, page_size)?;
797797

798-
// TODO: if we don't support UFFD/async WP, we can completely skip this bit. For the
798+
// TODO: if we don't support UFFD/async WP, we can completely skip this bit, as the
799+
// UFFD handler already tracks dirty pages through the WriteProtected events. For the
799800
// time being, we always do.
800801
//
801802
// Build dirty bitmap: check pagemap only for pages that mincore reports resident.

src/vmm/src/persist.rs

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@ use std::sync::{Arc, Mutex};
1414

1515
use semver::Version;
1616
use serde::{Deserialize, Serialize};
17-
use userfaultfd::{FeatureFlags, Uffd, UffdBuilder};
17+
use userfaultfd::{FeatureFlags, RegisterMode, Uffd, UffdBuilder};
1818
use vmm_sys_util::sock_ctrl_msg::ScmSocket;
1919

2020
#[cfg(target_arch = "aarch64")]
@@ -483,6 +483,8 @@ pub enum GuestMemoryFromUffdError {
483483
Create(userfaultfd::Error),
484484
/// Failed to register memory address range with the userfaultfd object: {0}
485485
Register(userfaultfd::Error),
486+
/// Failed to enable write protection on memory address range with the userfaultfd object: {0}
487+
WriteProtect(userfaultfd::Error),
486488
/// Failed to connect to UDS Unix stream: {0}
487489
Connect(#[from] std::io::Error),
488490
/// Failed to sends file descriptor: {0}
@@ -504,7 +506,9 @@ fn guest_memory_from_uffd(
504506
// because the only place the kernel checks this is in a hook from madvise, e.g. it doesn't
505507
// actively change the behavior of UFFD, only passively. Without balloon devices
506508
// we never call madvise anyway, so no need to put this into a conditional.
507-
uffd_builder.require_features(FeatureFlags::EVENT_REMOVE);
509+
uffd_builder.require_features(
510+
FeatureFlags::EVENT_REMOVE | FeatureFlags::MISSING_HUGETLBFS | FeatureFlags::WP_ASYNC,
511+
);
508512

509513
let uffd = uffd_builder
510514
.close_on_exec(true)
@@ -514,8 +518,22 @@ fn guest_memory_from_uffd(
514518
.map_err(GuestMemoryFromUffdError::Create)?;
515519

516520
for mem_region in guest_memory.iter() {
517-
uffd.register(mem_region.as_ptr().cast(), mem_region.size() as _)
518-
.map_err(GuestMemoryFromUffdError::Register)?;
521+
uffd.register_with_mode(
522+
mem_region.as_ptr().cast(),
523+
mem_region.size() as _,
524+
RegisterMode::MISSING | RegisterMode::WRITE_PROTECT,
525+
)
526+
.map_err(GuestMemoryFromUffdError::Register)?;
527+
528+
// If memory is backed by huge pages, we can immediately write protect it.
529+
// Otherwise (memory is backed by anonymous memory), write protecting here
530+
// won't have any effect, as the write-protection bit for a bitwill be
531+
// wiped when the first page fault occurs. These cases need to be handled
532+
// directly from the UFFD handler.
533+
if huge_pages.is_hugetlbfs() {
534+
uffd.write_protect(mem_region.as_ptr().cast(), mem_region.size() as _)
535+
.map_err(GuestMemoryFromUffdError::WriteProtect)?;
536+
}
519537
}
520538

521539
send_uffd_handshake(mem_uds_path, &backend_mappings, &uffd)?;

0 commit comments

Comments
 (0)