Skip to content

Commit 5696244

Browse files
committed
api: implement API for resident and zero memory
Implement API /memory which returns two bitmaps: resident and empty. `resident` tracks whether a guest page is in the resident set and `empty` tracks whether it's actually all 0s. Both bitmaps are structures as vectors of u64, so their length is: total_number_of_pages.div_ceil(64). Pages are ordered in the order of pages as reported by/memory/mappings. Signed-off-by: Babis Chalios <[email protected]>
1 parent 717fbe9 commit 5696244

8 files changed

Lines changed: 134 additions & 20 deletions

File tree

src/firecracker/src/api_server/parsed_request.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,7 @@ impl ParsedRequest {
199199
),
200200
VmmData::FullVmConfig(config) => Self::success_response_with_data(config),
201201
VmmData::MemoryMappings(mappings) => Self::success_response_with_data(mappings),
202+
VmmData::Memory(meminfo) => Self::success_response_with_data(meminfo),
202203
},
203204
Err(vmm_action_error) => {
204205
let mut response = match vmm_action_error {
@@ -616,6 +617,9 @@ pub mod tests {
616617
VmmData::MemoryMappings(mappings) => {
617618
http_response(&serde_json::to_string(mappings).unwrap(), 200)
618619
}
620+
VmmData::Memory(meminfo) => {
621+
http_response(&serde_json::to_string(meminfo).unwrap(), 200)
622+
}
619623
};
620624
let response = ParsedRequest::convert_to_response(&data);
621625
response.write_all(&mut buf).unwrap();

src/firecracker/src/api_server/request/memory_info.rs

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use micro_http::{Method, StatusCode};
1+
use micro_http::Method;
22
use vmm::rpc_interface::VmmAction;
33

44
use crate::api_server::parsed_request::{ParsedRequest, RequestError};
@@ -13,9 +13,6 @@ where
1313
format!("/memory/{}", unknown_path),
1414
Method::Get,
1515
)),
16-
None => Err(RequestError::Generic(
17-
StatusCode::BadRequest,
18-
"Missing memory info type.".to_string(),
19-
)),
16+
None => Ok(ParsedRequest::new_sync(VmmAction::GetMemory)),
2017
}
2118
}

src/vmm/src/builder.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -318,6 +318,7 @@ pub fn build_microvm_for_boot(
318318
vcpus_handles: Vec::new(),
319319
vcpus_exit_evt,
320320
device_manager,
321+
page_size: vm_resources.machine_config.huge_pages.page_size(),
321322
};
322323
let vmm = Arc::new(Mutex::new(vmm));
323324

@@ -518,6 +519,7 @@ pub fn build_microvm_from_snapshot(
518519
vcpus_handles: Vec::new(),
519520
vcpus_exit_evt,
520521
device_manager,
522+
page_size: vm_resources.machine_config.huge_pages.page_size(),
521523
};
522524

523525
// Move vcpus to their own threads and start their state machine in the 'Paused' state.
@@ -751,6 +753,7 @@ pub(crate) mod tests {
751753
use vmm_sys_util::tempfile::TempFile;
752754

753755
use super::*;
756+
use crate::arch::host_page_size;
754757
use crate::device_manager::tests::default_device_manager;
755758
use crate::devices::virtio::block::CacheType;
756759
use crate::devices::virtio::generated::virtio_ids;
@@ -836,6 +839,7 @@ pub(crate) mod tests {
836839
vcpus_handles: Vec::new(),
837840
vcpus_exit_evt,
838841
device_manager: default_device_manager(),
842+
page_size: host_page_size(),
839843
}
840844
}
841845

src/vmm/src/lib.rs

Lines changed: 66 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,7 @@ use crate::vstate::memory::{GuestMemory, GuestMemoryMmap, GuestMemoryRegion};
152152
use crate::vstate::vcpu::VcpuState;
153153
pub use crate::vstate::vcpu::{Vcpu, VcpuConfig, VcpuEvent, VcpuHandle, VcpuResponse};
154154
pub use crate::vstate::vm::Vm;
155+
use crate::vstate::vm::mincore_bitmap;
155156

156157
/// Shorthand type for the EventManager flavour used by Firecracker.
157158
pub type EventManager = BaseEventManager<Arc<Mutex<dyn MutEventSubscriber>>>;
@@ -314,6 +315,8 @@ pub struct Vmm {
314315
vcpus_exit_evt: EventFd,
315316
// Device manager
316317
device_manager: DeviceManager,
318+
/// Page size used for backing guest memory
319+
pub page_size: usize,
317320
}
318321

319322
impl Vmm {
@@ -697,21 +700,80 @@ impl Vmm {
697700
let mut mappings = vec![];
698701
let mut offset = 0;
699702

700-
for region in self.vm.guest_memory().iter() {
703+
for region in self
704+
.vm
705+
.guest_memory()
706+
.iter()
707+
.flat_map(|region| region.plugged_slots())
708+
{
709+
let size = region.slice.len();
701710
#[allow(deprecated)]
702711
mappings.push(GuestRegionUffdMapping {
703-
base_host_virt_addr: region.as_ptr() as u64,
704-
size: region.size(),
712+
base_host_virt_addr: region.slice.ptr_guard_mut().as_ptr() as u64,
713+
size,
705714
offset,
706715
page_size,
707716
page_size_kib: page_size,
708717
});
709718

710-
offset += usize_to_u64(region.size());
719+
offset += usize_to_u64(size);
711720
}
712721

713722
mappings
714723
}
724+
725+
/// Get info regarding resident and empty pages for guest memory
726+
pub fn guest_memory_info(&self, page_size: usize) -> Result<(Vec<u64>, Vec<u64>), VmmError> {
727+
let mut resident = vec![];
728+
let mut empty = vec![];
729+
let zero_page = vec![0u8; page_size];
730+
731+
for mem_slot in self
732+
.vm
733+
.guest_memory()
734+
.iter()
735+
.flat_map(|region| region.plugged_slots())
736+
{
737+
debug_assert!(mem_slot.slice.len().is_multiple_of(page_size));
738+
debug_assert!(
739+
(mem_slot.slice.ptr_guard_mut().as_ptr() as usize).is_multiple_of(page_size)
740+
);
741+
742+
let len = mem_slot.slice.len();
743+
let nr_pages = len / page_size;
744+
let addr = mem_slot.slice.ptr_guard_mut().as_ptr();
745+
let mut curr_empty = vec![0u64; nr_pages.div_ceil(64)];
746+
let curr_resident = mincore_bitmap(addr, mem_slot.slice.len(), page_size)?;
747+
748+
for page_idx in 0..nr_pages {
749+
if (curr_resident[page_idx / 64] & (1u64 << (page_idx % 64))) == 0 {
750+
continue;
751+
}
752+
753+
// SAFETY: `addr` points to a memory region that is `nr_pages * page_size` long.
754+
let curr_addr = unsafe { addr.add(page_idx * page_size) };
755+
756+
// SAFETY: both addresses are valid and they point to a memory region
757+
// that is (at least) `page_size` long
758+
let ret = unsafe {
759+
libc::memcmp(
760+
curr_addr.cast::<libc::c_void>(),
761+
zero_page.as_ptr().cast::<libc::c_void>(),
762+
page_size,
763+
)
764+
};
765+
766+
if ret == 0 {
767+
curr_empty[page_idx / 64] |= 1u64 << (page_idx % 64);
768+
}
769+
}
770+
771+
resident.extend_from_slice(&curr_resident);
772+
empty.extend_from_slice(&curr_empty);
773+
}
774+
775+
Ok((resident, empty))
776+
}
715777
}
716778

717779
/// Process the content of the MPIDR_EL1 register in order to be able to pass it to KVM

src/vmm/src/persist.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -162,8 +162,8 @@ pub fn create_snapshot(
162162
snapshot_state_to_file(&microvm_state, &params.snapshot_path)?;
163163

164164
if let Some(mem_file_path) = params.mem_file_path.as_ref() {
165-
vmm.vm
166-
.snapshot_memory_to_file(mem_file_path, params.snapshot_type)?;
165+
vmm.vm
166+
.snapshot_memory_to_file(mem_file_path, params.snapshot_type, vmm.page_size)?;
167167
}
168168

169169
// We need to mark queues as dirty again for all activated devices. The reason we

src/vmm/src/rpc_interface.rs

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,9 +28,9 @@ use crate::vmm_config::balloon::{
2828
use crate::vmm_config::boot_source::{BootSourceConfig, BootSourceConfigError};
2929
use crate::vmm_config::drive::{BlockDeviceConfig, BlockDeviceUpdateConfig, DriveError};
3030
use crate::vmm_config::entropy::{EntropyDeviceConfig, EntropyDeviceError};
31-
use crate::vmm_config::instance_info::InstanceInfo;
31+
use crate::vmm_config::instance_info::{InstanceInfo, VmState};
3232
use crate::vmm_config::machine_config::{MachineConfig, MachineConfigError, MachineConfigUpdate};
33-
use crate::vmm_config::meminfo::MemoryMapingsResponse;
33+
use crate::vmm_config::meminfo::{MemoryMapingsResponse, MemoryResponse};
3434
use crate::vmm_config::memory_hotplug::{
3535
MemoryHotplugConfig, MemoryHotplugConfigError, MemoryHotplugSizeUpdate,
3636
};
@@ -149,6 +149,8 @@ pub enum VmmAction {
149149
UpdateMachineConfiguration(MachineConfigUpdate),
150150
/// Get the guest memory mappings to host memory
151151
GetMemoryMappings,
152+
/// Get guest memory resident and empty pages information
153+
GetMemory,
152154
}
153155

154156
/// Wrapper for all errors associated with VMM actions.
@@ -200,6 +202,8 @@ pub enum VmmActionError {
200202
OperationNotSupportedPostBoot,
201203
/// The requested operation is not supported before starting the microVM.
202204
OperationNotSupportedPreBoot,
205+
/// The requested operation is not supported while the microVM is running.
206+
OperationNotSupportedWhileRunning,
203207
/// Start microvm error: {0}
204208
StartMicrovm(#[from] StartMicrovmError),
205209
/// Vsock config error: {0}
@@ -233,6 +237,8 @@ pub enum VmmData {
233237
HintingStatus(HintingStatus),
234238
/// The guest memory mapping information.
235239
MemoryMappings(MemoryMapingsResponse),
240+
/// The guest memory resident and empty pages information
241+
Memory(MemoryResponse),
236242
}
237243

238244
/// Trait used for deduplicating the MMDS request handling across the two ApiControllers.
@@ -501,7 +507,8 @@ impl<'a> PrebootApiController<'a> {
501507
| StartFreePageHinting(_)
502508
| GetFreePageHintingStatus
503509
| StopFreePageHinting
504-
| GetMemoryMappings => Err(VmmActionError::OperationNotSupportedPreBoot),
510+
| GetMemoryMappings
511+
| GetMemory => Err(VmmActionError::OperationNotSupportedPreBoot),
505512
#[cfg(target_arch = "x86_64")]
506513
SendCtrlAltDel => Err(VmmActionError::OperationNotSupportedPreBoot),
507514
}
@@ -778,6 +785,7 @@ impl RuntimeApiController {
778785
.map(|_| VmmData::Empty)
779786
.map_err(VmmActionError::MemoryHotplugUpdate),
780787
GetMemoryMappings => self.get_guest_memory_mappings(),
788+
GetMemory => self.get_guest_memory_info(),
781789
// Operations not allowed post-boot.
782790
ConfigureBootSource(_)
783791
| ConfigureLogger(_)
@@ -957,6 +965,25 @@ impl RuntimeApiController {
957965
info!("'get memory mappings' VMM action took {elapsed_time_us} us.");
958966
Ok(VmmData::MemoryMappings(MemoryMapingsResponse { mappings }))
959967
}
968+
969+
/// Get resident and empty pages information for guest memory
970+
fn get_guest_memory_info(&self) -> Result<VmmData, VmmActionError> {
971+
let start_us = get_time_us(ClockType::Monotonic);
972+
let vmm = self.vmm.lock().expect("Poisoned lock");
973+
974+
// Check if VM is paused
975+
if vmm.instance_info.state != VmState::Paused {
976+
return Err(VmmActionError::OperationNotSupportedWhileRunning);
977+
}
978+
979+
let page_size = self.vm_resources.machine_config.huge_pages.page_size();
980+
let (resident, empty) = vmm.guest_memory_info(page_size)?;
981+
982+
let elapsed_time_us = get_time_us(ClockType::Monotonic) - start_us;
983+
info!("'get memory info' VMM action took {elapsed_time_us} us.");
984+
985+
Ok(VmmData::Memory(MemoryResponse { resident, empty }))
986+
}
960987
}
961988

962989
#[cfg(test)]

src/vmm/src/vmm_config/meminfo.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,3 +9,13 @@ pub struct MemoryMapingsResponse {
99
pub mappings: Vec<GuestRegionUffdMapping>,
1010
}
1111

12+
/// Information about guest memory resident pages and pages that are all-0s
13+
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize)]
14+
pub struct MemoryResponse {
15+
/// Bitmap for resident pages. The bitmap is encoded as a vector of u64 values.
16+
/// Each bit represents whether a page is present in the resident memory set
17+
pub resident: Vec<u64>,
18+
/// Bitmap for empty pages. The bitmap is encoded as a vector of u64 values.
19+
/// Each bit represents whether a page is empty (all 0s).
20+
pub empty: Vec<u64>,
21+
}

src/vmm/src/vstate/vm.rs

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -305,7 +305,7 @@ impl Vm {
305305
}
306306

307307
/// Retrieves the KVM dirty bitmap for each of the guest's memory regions.
308-
pub fn get_dirty_bitmap(&self) -> Result<DirtyBitmap, VmError> {
308+
pub fn get_dirty_bitmap(&self, page_size: usize) -> Result<DirtyBitmap, VmError> {
309309
self.guest_memory()
310310
.iter()
311311
.flat_map(|region| region.plugged_slots())
@@ -318,6 +318,7 @@ impl Vm {
318318
None => mincore_bitmap(
319319
mem_slot.slice.ptr_guard_mut().as_ptr(),
320320
mem_slot.slice.len(),
321+
page_size,
321322
)?,
322323
};
323324
Ok((mem_slot.slot, bitmap))
@@ -335,6 +336,7 @@ impl Vm {
335336
&self,
336337
mem_file_path: &Path,
337338
snapshot_type: SnapshotType,
339+
page_size: usize,
338340
) -> Result<(), CreateSnapshotError> {
339341
use self::CreateSnapshotError::*;
340342

@@ -377,7 +379,7 @@ impl Vm {
377379

378380
match snapshot_type {
379381
SnapshotType::Diff => {
380-
let dirty_bitmap = self.get_dirty_bitmap()?;
382+
let dirty_bitmap = self.get_dirty_bitmap(page_size)?;
381383
self.guest_memory().dump_dirty(&mut file, &dirty_bitmap)?;
382384
}
383385
SnapshotType::Full => {
@@ -503,7 +505,11 @@ impl Vm {
503505

504506
/// Use `mincore(2)` to overapproximate the dirty bitmap for the given memslot. To be used
505507
/// if a diff snapshot is requested, but dirty page tracking wasn't enabled.
506-
fn mincore_bitmap(addr: *mut u8, len: usize) -> Result<Vec<u64>, VmError> {
508+
pub(crate) fn mincore_bitmap(
509+
addr: *mut u8,
510+
len: usize,
511+
page_size: usize,
512+
) -> Result<Vec<u64>, VmError> {
507513
// TODO: Once Host 5.10 goes out of support, we can make this more robust and work on
508514
// swap-enabled systems, by doing mlock2(MLOCK_ONFAULT)/munlock() in this function (to
509515
// force swapped-out pages to get paged in, so that mincore will consider them incore).
@@ -513,8 +519,11 @@ fn mincore_bitmap(addr: *mut u8, len: usize) -> Result<Vec<u64>, VmError> {
513519
// Mincore always works at PAGE_SIZE granularity, even if the VMA we are dealing with
514520
// is a hugetlbfs VMA (e.g. to report a single hugepage as "present", mincore will
515521
// give us 512 4k markers with the lowest bit set).
516-
let page_size = host_page_size();
517-
let mut mincore_bitmap = vec![0u8; len / page_size];
522+
let host_page_size = host_page_size();
523+
let mut mincore_bitmap = vec![0u8; len / host_page_size];
524+
// The bitmap we return though tracks pages in terms of the actually used page size. In
525+
// the case of a hugetlbfs VMA, we just need to check if the first of the reported pages
526+
// is present.
518527
let mut bitmap = vec![0u64; (len / page_size).div_ceil(64)];
519528

520529
// SAFETY: The safety invariants of GuestRegionMmap ensure that region.as_ptr() is a valid
@@ -529,7 +538,8 @@ fn mincore_bitmap(addr: *mut u8, len: usize) -> Result<Vec<u64>, VmError> {
529538
return Err(VmError::Mincore(vmm_sys_util::errno::Error::last()));
530539
}
531540

532-
for (page_idx, b) in mincore_bitmap.iter().enumerate() {
541+
let step = page_size / host_page_size;
542+
for (page_idx, b) in mincore_bitmap.iter().step_by(step).enumerate() {
533543
bitmap[page_idx / 64] |= (*b as u64 & 0x1) << (page_idx as u64 % 64);
534544
}
535545

0 commit comments

Comments
 (0)