Skip to content

Commit 29c025c

Browse files
committed
api: implement API for dirty memory
Implement API /memory/dirty which returns a bitmap tracking dirty guest memory. The bitmap is structured as a vector of u64, so its length is: total_number_of_pages.div_ceil(64). Pages are ordered in the order of pages as reported by /memory/mappings. Signed-off-by: Babis Chalios <babis.chalios@e2b.dev>
1 parent 5696244 commit 29c025c

8 files changed

Lines changed: 205 additions & 2 deletions

File tree

resources/seccomp/x86_64-unknown-linux-musl.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@
3131
{
3232
"syscall": "mincore"
3333
},
34+
{
35+
"syscall": "pread64"
36+
},
3437
{
3538
"syscall": "writev",
3639
"comment": "Used by the VirtIO net device to write to tap"

src/firecracker/src/api_server/parsed_request.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -200,6 +200,7 @@ impl ParsedRequest {
200200
VmmData::FullVmConfig(config) => Self::success_response_with_data(config),
201201
VmmData::MemoryMappings(mappings) => Self::success_response_with_data(mappings),
202202
VmmData::Memory(meminfo) => Self::success_response_with_data(meminfo),
203+
VmmData::MemoryDirty(dirty) => Self::success_response_with_data(dirty),
203204
},
204205
Err(vmm_action_error) => {
205206
let mut response = match vmm_action_error {
@@ -620,6 +621,9 @@ pub mod tests {
620621
VmmData::Memory(meminfo) => {
621622
http_response(&serde_json::to_string(meminfo).unwrap(), 200)
622623
}
624+
VmmData::MemoryDirty(dirty) => {
625+
http_response(&serde_json::to_string(dirty).unwrap(), 200)
626+
}
623627
};
624628
let response = ParsedRequest::convert_to_response(&data);
625629
response.write_all(&mut buf).unwrap();

src/firecracker/src/api_server/request/memory_info.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ where
99
{
1010
match path_tokens.next() {
1111
Some("mappings") => Ok(ParsedRequest::new_sync(VmmAction::GetMemoryMappings)),
12+
Some("dirty") => Ok(ParsedRequest::new_sync(VmmAction::GetMemoryDirty)),
1213
Some(unknown_path) => Err(RequestError::InvalidPathMethod(
1314
format!("/memory/{}", unknown_path),
1415
Method::Get,

src/vmm/src/lib.rs

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -256,6 +256,8 @@ pub enum VmmError {
256256
Block(#[from] BlockError),
257257
/// Balloon: {0}
258258
Balloon(#[from] BalloonError),
259+
/// Pagemap error: {0}
260+
Pagemap(#[from] utils::pagemap::PagemapError),
259261
/// Failed to create memory hotplug device: {0}
260262
VirtioMem(#[from] VirtioMemError),
261263
}
@@ -774,6 +776,49 @@ impl Vmm {
774776

775777
Ok((resident, empty))
776778
}
779+
780+
/// Get dirty pages bitmap for guest memory
781+
pub fn get_dirty_memory(&self, page_size: usize) -> Result<Vec<u64>, VmmError> {
782+
let pagemap = utils::pagemap::PagemapReader::new(page_size)?;
783+
let mut dirty_bitmap = vec![];
784+
785+
for mem_slot in self
786+
.vm
787+
.guest_memory()
788+
.iter()
789+
.flat_map(|region| region.plugged_slots())
790+
{
791+
let base_addr = mem_slot.slice.ptr_guard_mut().as_ptr() as usize;
792+
let len = mem_slot.slice.len();
793+
let nr_pages = len / page_size;
794+
795+
// Use mincore_bitmap to get resident pages at guest page size granularity
796+
let resident_bitmap = vstate::vm::mincore_bitmap(base_addr as *mut u8, len, page_size)?;
797+
798+
// TODO: if we don't support UFFD/async WP, we can completely skip this bit. For the
799+
// time being, we always do.
800+
//
801+
// Build dirty bitmap: check pagemap only for pages that mincore reports resident.
802+
// This way we reduce the amount of times we read out of /proc/<pid>/pagemap.
803+
let mut slot_bitmap = vec![0u64; nr_pages.div_ceil(64)];
804+
for page_idx in 0..nr_pages {
805+
// Check if page is resident in the bitmap.
806+
// TODO: These operations (add to bitmap, check for presence, etc.) merit their own
807+
// implementation, somewhere within a bitmap type).
808+
let is_resident = (resident_bitmap[page_idx / 64] & (1u64 << (page_idx % 64))) != 0;
809+
if is_resident {
810+
let virt_addr = base_addr + (page_idx * page_size);
811+
if pagemap.is_page_dirty(virt_addr)? {
812+
slot_bitmap[page_idx / 64] |= 1u64 << (page_idx % 64);
813+
}
814+
}
815+
}
816+
817+
dirty_bitmap.extend_from_slice(&slot_bitmap);
818+
}
819+
820+
Ok(dirty_bitmap)
821+
}
777822
}
778823

779824
/// Process the content of the MPIDR_EL1 register in order to be able to pass it to KVM

src/vmm/src/rpc_interface.rs

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ use crate::vmm_config::drive::{BlockDeviceConfig, BlockDeviceUpdateConfig, Drive
3030
use crate::vmm_config::entropy::{EntropyDeviceConfig, EntropyDeviceError};
3131
use crate::vmm_config::instance_info::{InstanceInfo, VmState};
3232
use crate::vmm_config::machine_config::{MachineConfig, MachineConfigError, MachineConfigUpdate};
33-
use crate::vmm_config::meminfo::{MemoryMapingsResponse, MemoryResponse};
33+
use crate::vmm_config::meminfo::{MemoryDirty, MemoryMapingsResponse, MemoryResponse};
3434
use crate::vmm_config::memory_hotplug::{
3535
MemoryHotplugConfig, MemoryHotplugConfigError, MemoryHotplugSizeUpdate,
3636
};
@@ -151,6 +151,8 @@ pub enum VmmAction {
151151
GetMemoryMappings,
152152
/// Get guest memory resident and empty pages information
153153
GetMemory,
154+
/// Get guest memory dirty pages information
155+
GetMemoryDirty,
154156
}
155157

156158
/// Wrapper for all errors associated with VMM actions.
@@ -239,6 +241,8 @@ pub enum VmmData {
239241
MemoryMappings(MemoryMapingsResponse),
240242
/// The guest memory resident and empty pages information
241243
Memory(MemoryResponse),
244+
/// The guest memory dirty pages information
245+
MemoryDirty(MemoryDirty),
242246
}
243247

244248
/// Trait used for deduplicating the MMDS request handling across the two ApiControllers.
@@ -508,7 +512,8 @@ impl<'a> PrebootApiController<'a> {
508512
| GetFreePageHintingStatus
509513
| StopFreePageHinting
510514
| GetMemoryMappings
511-
| GetMemory => Err(VmmActionError::OperationNotSupportedPreBoot),
515+
| GetMemory
516+
| GetMemoryDirty => Err(VmmActionError::OperationNotSupportedPreBoot),
512517
#[cfg(target_arch = "x86_64")]
513518
SendCtrlAltDel => Err(VmmActionError::OperationNotSupportedPreBoot),
514519
}
@@ -786,6 +791,7 @@ impl RuntimeApiController {
786791
.map_err(VmmActionError::MemoryHotplugUpdate),
787792
GetMemoryMappings => self.get_guest_memory_mappings(),
788793
GetMemory => self.get_guest_memory_info(),
794+
GetMemoryDirty => self.get_dirty_memory_info(),
789795
// Operations not allowed post-boot.
790796
ConfigureBootSource(_)
791797
| ConfigureLogger(_)
@@ -984,6 +990,25 @@ impl RuntimeApiController {
984990

985991
Ok(VmmData::Memory(MemoryResponse { resident, empty }))
986992
}
993+
994+
/// Get dirty pages information for guest memory
995+
fn get_dirty_memory_info(&self) -> Result<VmmData, VmmActionError> {
996+
let start_us = get_time_us(ClockType::Monotonic);
997+
let vmm = self.vmm.lock().expect("Poisoned lock");
998+
999+
// Check if VM is paused
1000+
if vmm.instance_info.state != VmState::Paused {
1001+
return Err(VmmActionError::OperationNotSupportedWhileRunning);
1002+
}
1003+
1004+
let page_size = self.vm_resources.machine_config.huge_pages.page_size();
1005+
let bitmap = vmm.get_dirty_memory(page_size)?;
1006+
1007+
let elapsed_time_us = get_time_us(ClockType::Monotonic) - start_us;
1008+
info!("'get dirty memory' VMM action took {elapsed_time_us} us.");
1009+
1010+
Ok(VmmData::MemoryDirty(MemoryDirty { bitmap }))
1011+
}
9871012
}
9881013

9891014
#[cfg(test)]

src/vmm/src/utils/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ pub mod net;
99
pub mod signal;
1010
/// Module with state machine
1111
pub mod sm;
12+
/// Module with pagemap utilities
13+
pub mod pagemap;
1214

1315
use std::fs::{File, OpenOptions};
1416
use std::num::Wrapping;

src/vmm/src/utils/pagemap.rs

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
//! Utilities for reading /proc/self/pagemap to track dirty pages.
2+
3+
#![allow(clippy::cast_possible_wrap)]
4+
5+
use std::fs::File;
6+
use std::os::unix::io::AsRawFd;
7+
8+
use crate::arch::host_page_size;
9+
10+
const PAGEMAP_ENTRY_SIZE: usize = 8;
11+
12+
/// Errors related to pagemap operations
13+
#[derive(Debug, thiserror::Error, displaydoc::Display)]
14+
pub enum PagemapError {
15+
/// Failed to open /proc/self/pagemap: {0}
16+
OpenPagemap(#[source] std::io::Error),
17+
/// Failed to read pagemap entry: {0}
18+
ReadEntry(#[source] std::io::Error),
19+
/// Failed to open /proc/self/clear_refs: {0}
20+
OpenClearRefs(#[source] std::io::Error),
21+
/// Failed to clear soft-dirty bits: {0}
22+
ClearSoftDirty(#[source] std::io::Error),
23+
}
24+
25+
/// Represents a single entry in /proc/pid/pagemap.
26+
///
27+
/// Each virtual page has an 8-byte entry with the following layout:
28+
/// - Bits 0-54: Page frame number (PFN) if present
29+
/// - Bit 55: Page is soft-dirty (written to since last clear)
30+
/// - Bit 56: Page is exclusively mapped
31+
/// - Bit 57: Page is write-protected via userfaultfd
32+
/// - Bit 58: Unused
33+
/// - Bit 59-60: Unused
34+
/// - Bit 61: Page is file-page or shared-anon
35+
/// - Bit 62: Page is swapped
36+
/// - Bit 63: Page is present in RAM
37+
#[derive(Debug, Clone, Copy)]
38+
pub struct PagemapEntry {
39+
raw: u64,
40+
}
41+
42+
impl PagemapEntry {
43+
/// Create a PagemapEntry from bytes (little-endian)
44+
pub fn from_bytes(bytes: [u8; 8]) -> Self {
45+
Self {
46+
raw: u64::from_ne_bytes(bytes),
47+
}
48+
}
49+
50+
/// Check if page is write-protected via userfaultfd
51+
pub fn is_write_protected(&self) -> bool {
52+
(self.raw & (1u64 << 57)) != 0
53+
}
54+
55+
/// Check if page is present in RAM (bit 63)
56+
pub fn is_present(&self) -> bool {
57+
(self.raw & (1u64 << 63)) != 0
58+
}
59+
}
60+
61+
/// Reader for /proc/self/pagemap
62+
#[derive(Debug)]
63+
pub struct PagemapReader {
64+
pagemap_fd: File,
65+
}
66+
67+
impl PagemapReader {
68+
/// Create a new PagemapReader
69+
pub fn new(_page_size: usize) -> Result<Self, PagemapError> {
70+
let pagemap_fd = File::open("/proc/self/pagemap").map_err(PagemapError::OpenPagemap)?;
71+
72+
Ok(Self { pagemap_fd })
73+
}
74+
75+
/// Check if a single page is dirty (write-protected bit cleared).
76+
///
77+
/// Checks the first host page (4K) of the guest page at the given address.
78+
/// For huge pages, all host pages within the huge page typically have the same
79+
/// dirty status, so sampling the first is sufficient.
80+
///
81+
/// # Arguments
82+
/// * `virt_addr` - Virtual address of the page to check
83+
///
84+
/// # Returns
85+
/// True if the page is present and write-protected bit is cleared (dirty).
86+
pub fn is_page_dirty(&self, virt_addr: usize) -> Result<bool, PagemapError> {
87+
// Pagemap always uses host (4K) page size
88+
let host_page_size = host_page_size();
89+
90+
// Calculate offset for this virtual page (using host page size)
91+
let host_vpn = virt_addr / host_page_size;
92+
let offset = (host_vpn * PAGEMAP_ENTRY_SIZE) as i64;
93+
94+
let mut entry_bytes = [0u8; 8];
95+
96+
// SAFETY: pread is safe as long as the fd is valid and the buffer is properly sized
97+
let ret = unsafe {
98+
libc::pread(
99+
self.pagemap_fd.as_raw_fd(),
100+
entry_bytes.as_mut_ptr().cast(),
101+
PAGEMAP_ENTRY_SIZE,
102+
offset,
103+
)
104+
};
105+
106+
if ret != PAGEMAP_ENTRY_SIZE as isize {
107+
return Err(PagemapError::ReadEntry(std::io::Error::last_os_error()));
108+
}
109+
110+
let entry = PagemapEntry::from_bytes(entry_bytes);
111+
112+
// Page must be present and the write_protected bit cleared (indicating it was written to)
113+
Ok(entry.is_present() && !entry.is_write_protected())
114+
}
115+
}

src/vmm/src/vmm_config/meminfo.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,3 +19,11 @@ pub struct MemoryResponse {
1919
/// Each bit represents whether a page is empty (all 0s).
2020
pub empty: Vec<u64>,
2121
}
22+
23+
/// Information about dirty guest memory pages
24+
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize)]
25+
pub struct MemoryDirty {
26+
/// Bitmap for dirty pages. The bitmap is encoded as a vector of u64 values.
27+
/// Each bit represents whether a page has been written since the last snapshot.
28+
pub bitmap: Vec<u64>,
29+
}

0 commit comments

Comments
 (0)