diff --git a/extension/data_loader/mman.h b/extension/data_loader/mman.h index 26a9ee08067..fb6fe4fd39b 100644 --- a/extension/data_loader/mman.h +++ b/extension/data_loader/mman.h @@ -43,6 +43,16 @@ ET_INLINE off_t get_mmap_offset(size_t offset) { return static_cast(offset); } +/** + * Hint the kernel to prefetch pages eagerly and to optimize for sequential + * reads. Intended to reduce page-fault stutter during model initialization + * when the caller does not want to mlock the pages into RAM. + */ +ET_INLINE void madvise_pages_willneed_sequential(void* addr, size_t len) { + ::madvise(addr, len, MADV_WILLNEED); + ::madvise(addr, len, MADV_SEQUENTIAL); +} + #else #define NOMINMAX @@ -80,4 +90,13 @@ ET_INLINE uint64_t get_mmap_offset(size_t offset) { return static_cast(offset); } +/** + * No-op on Windows: there is no direct equivalent to madvise(MADV_WILLNEED | + * MADV_SEQUENTIAL) and the existing mman_windows shim does not implement one. + */ +ET_INLINE void madvise_pages_willneed_sequential(void* addr, size_t len) { + (void)addr; + (void)len; +} + #endif diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp index 5d77b67cc59..b07c8dd7d62 100644 --- a/extension/data_loader/mmap_data_loader.cpp +++ b/extension/data_loader/mmap_data_loader.cpp @@ -249,6 +249,10 @@ Result MmapDataLoader::load( // No need to keep track of this. munmap() will unlock as a side effect. } + if (mlock_config_ == MlockConfig::UseMadvise) { + madvise_pages_willneed_sequential(pages, map_size); + } + // The requested data is at an offset into the mapped pages. const void* data = static_cast(pages) + offset - range.start; diff --git a/extension/data_loader/mmap_data_loader.h b/extension/data_loader/mmap_data_loader.h index c0496a39d4b..2bbdd96013b 100644 --- a/extension/data_loader/mmap_data_loader.h +++ b/extension/data_loader/mmap_data_loader.h @@ -38,6 +38,10 @@ class MmapDataLoader final : public executorch::runtime::DataLoader { UseMlock, /// Call `mlock()` on loaded pages, ignoring errors if it fails. UseMlockIgnoreErrors, + /// Use madvise(MADV_WILLNEED | MADV_SEQUENTIAL) instead of mlock. + /// Tells the kernel to prefetch pages eagerly and optimize for + /// sequential reads, without pinning them in RAM. + UseMadvise, }; /** diff --git a/extension/data_loader/test/mmap_data_loader_test.cpp b/extension/data_loader/test/mmap_data_loader_test.cpp index df071fd7474..e08001af245 100644 --- a/extension/data_loader/test/mmap_data_loader_test.cpp +++ b/extension/data_loader/test/mmap_data_loader_test.cpp @@ -244,6 +244,12 @@ TEST_F(MmapDataLoaderTest, InBoundsLoadsSucceedUseMlockIgnoreErrors) { MmapDataLoader::MlockConfig::UseMlockIgnoreErrors); } +TEST_F(MmapDataLoaderTest, InBoundsLoadsSucceedUseMadvise) { + // There's no portable way to verify madvise() is called, but exercise the + // path to make sure the code still behaves correctly. + test_in_bounds_loads_succeed(MmapDataLoader::MlockConfig::UseMadvise); +} + TEST_F(MmapDataLoaderTest, FinalPageOfUnevenFileSucceeds) { // Create a file whose length is not an even multiple of a page. // Each 4-byte word in the file has a different value. diff --git a/extension/module/module.cpp b/extension/module/module.cpp index ec7236276f5..0b95a86ac1e 100644 --- a/extension/module/module.cpp +++ b/extension/module/module.cpp @@ -70,6 +70,17 @@ runtime::Result> make_data_loader( std::move(*res_mlock_ignore)); break; } + case Module::LoadMode::MmapUseMadvise: { + auto res_madvise = MmapDataLoader::from( + file_path.c_str(), MmapDataLoader::MlockConfig::UseMadvise); + if (!res_madvise.ok()) { + return res_madvise.error(); + } + data_loader = + std::make_unique>( + std::move(*res_madvise)); + break; + } } return data_loader; } diff --git a/extension/module/module.h b/extension/module/module.h index 08a68b2676b..5f2c30bbfbe 100644 --- a/extension/module/module.h +++ b/extension/module/module.h @@ -51,6 +51,8 @@ class Module { MmapUseMlock, /// Use memory locking and ignore errors. MmapUseMlockIgnoreErrors, + /// Use mmap with madvise(MADV_WILLNEED | MADV_SEQUENTIAL) hints. + MmapUseMadvise, }; /** diff --git a/extension/module/test/module_test.cpp b/extension/module/test/module_test.cpp index 7e1d657094c..1e010504789 100644 --- a/extension/module/test/module_test.cpp +++ b/extension/module/test/module_test.cpp @@ -50,6 +50,23 @@ TEST_F(ModuleTest, TestLoad) { EXPECT_TRUE(module.is_loaded()); } +TEST_F(ModuleTest, TestLoadMmapUseMadvise) { + Module module(model_path_, Module::LoadMode::MmapUseMadvise); + + EXPECT_FALSE(module.is_loaded()); + const auto error = module.load(); + EXPECT_EQ(error, Error::Ok); + EXPECT_TRUE(module.is_loaded()); + + auto tensor = make_tensor_ptr({2, 2}, {1.f, 2.f, 3.f, 4.f}); + + const auto result = module.execute("forward", {tensor, tensor, 1.0}); + EXPECT_EQ(result.error(), Error::Ok); + + const auto expected = make_tensor_ptr({2, 2}, {2.f, 4.f, 6.f, 8.f}); + EXPECT_TENSOR_CLOSE(result->at(0).toTensor(), *expected.get()); +} + TEST_F(ModuleTest, TestLoadNonExistent) { Module module("/path/to/nonexistent/file.pte"); const auto error = module.load();