From e3b9ea340e9cacfe353bf17e169cbfce4011f909 Mon Sep 17 00:00:00 2001 From: Kimish Patel Date: Wed, 13 May 2026 16:55:28 -0700 Subject: [PATCH 1/2] [Executorch] Enable madvise based mmap Pull Request resolved: https://github.com/pytorch/executorch/pull/19553 madvise based mmap allows us to indicate to os that we aim to read from this memory soon, so it can start prefetching and taking page faults eagerly. Mlock ends up being significantly more heavy on iOS where it trieds to lock the entire region and according claude the overhead comes not from pagefults necessarily, but more from system call overheads as opposited to file read ghstack-source-id: 381777643 @exported-using-ghexport Differential Revision: [D104318324](https://our.internmc.facebook.com/intern/diff/D104318324/) --- extension/data_loader/mman.h | 19 +++++++++++++++++++ extension/data_loader/mmap_data_loader.cpp | 4 ++++ extension/data_loader/mmap_data_loader.h | 4 ++++ .../test/mmap_data_loader_test.cpp | 6 ++++++ extension/module/module.cpp | 11 +++++++++++ extension/module/module.h | 2 ++ extension/module/test/module_test.cpp | 17 +++++++++++++++++ 7 files changed, 63 insertions(+) diff --git a/extension/data_loader/mman.h b/extension/data_loader/mman.h index 26a9ee08067..fb6fe4fd39b 100644 --- a/extension/data_loader/mman.h +++ b/extension/data_loader/mman.h @@ -43,6 +43,16 @@ ET_INLINE off_t get_mmap_offset(size_t offset) { return static_cast(offset); } +/** + * Hint the kernel to prefetch pages eagerly and to optimize for sequential + * reads. Intended to reduce page-fault stutter during model initialization + * when the caller does not want to mlock the pages into RAM. + */ +ET_INLINE void madvise_pages_willneed_sequential(void* addr, size_t len) { + ::madvise(addr, len, MADV_WILLNEED); + ::madvise(addr, len, MADV_SEQUENTIAL); +} + #else #define NOMINMAX @@ -80,4 +90,13 @@ ET_INLINE uint64_t get_mmap_offset(size_t offset) { return static_cast(offset); } +/** + * No-op on Windows: there is no direct equivalent to madvise(MADV_WILLNEED | + * MADV_SEQUENTIAL) and the existing mman_windows shim does not implement one. + */ +ET_INLINE void madvise_pages_willneed_sequential(void* addr, size_t len) { + (void)addr; + (void)len; +} + #endif diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp index 5d77b67cc59..b07c8dd7d62 100644 --- a/extension/data_loader/mmap_data_loader.cpp +++ b/extension/data_loader/mmap_data_loader.cpp @@ -249,6 +249,10 @@ Result MmapDataLoader::load( // No need to keep track of this. munmap() will unlock as a side effect. } + if (mlock_config_ == MlockConfig::UseMadvise) { + madvise_pages_willneed_sequential(pages, map_size); + } + // The requested data is at an offset into the mapped pages. const void* data = static_cast(pages) + offset - range.start; diff --git a/extension/data_loader/mmap_data_loader.h b/extension/data_loader/mmap_data_loader.h index c0496a39d4b..2bbdd96013b 100644 --- a/extension/data_loader/mmap_data_loader.h +++ b/extension/data_loader/mmap_data_loader.h @@ -38,6 +38,10 @@ class MmapDataLoader final : public executorch::runtime::DataLoader { UseMlock, /// Call `mlock()` on loaded pages, ignoring errors if it fails. UseMlockIgnoreErrors, + /// Use madvise(MADV_WILLNEED | MADV_SEQUENTIAL) instead of mlock. + /// Tells the kernel to prefetch pages eagerly and optimize for + /// sequential reads, without pinning them in RAM. + UseMadvise, }; /** diff --git a/extension/data_loader/test/mmap_data_loader_test.cpp b/extension/data_loader/test/mmap_data_loader_test.cpp index df071fd7474..e08001af245 100644 --- a/extension/data_loader/test/mmap_data_loader_test.cpp +++ b/extension/data_loader/test/mmap_data_loader_test.cpp @@ -244,6 +244,12 @@ TEST_F(MmapDataLoaderTest, InBoundsLoadsSucceedUseMlockIgnoreErrors) { MmapDataLoader::MlockConfig::UseMlockIgnoreErrors); } +TEST_F(MmapDataLoaderTest, InBoundsLoadsSucceedUseMadvise) { + // There's no portable way to verify madvise() is called, but exercise the + // path to make sure the code still behaves correctly. + test_in_bounds_loads_succeed(MmapDataLoader::MlockConfig::UseMadvise); +} + TEST_F(MmapDataLoaderTest, FinalPageOfUnevenFileSucceeds) { // Create a file whose length is not an even multiple of a page. // Each 4-byte word in the file has a different value. diff --git a/extension/module/module.cpp b/extension/module/module.cpp index ec7236276f5..0b95a86ac1e 100644 --- a/extension/module/module.cpp +++ b/extension/module/module.cpp @@ -70,6 +70,17 @@ runtime::Result> make_data_loader( std::move(*res_mlock_ignore)); break; } + case Module::LoadMode::MmapUseMadvise: { + auto res_madvise = MmapDataLoader::from( + file_path.c_str(), MmapDataLoader::MlockConfig::UseMadvise); + if (!res_madvise.ok()) { + return res_madvise.error(); + } + data_loader = + std::make_unique>( + std::move(*res_madvise)); + break; + } } return data_loader; } diff --git a/extension/module/module.h b/extension/module/module.h index 08a68b2676b..5f2c30bbfbe 100644 --- a/extension/module/module.h +++ b/extension/module/module.h @@ -51,6 +51,8 @@ class Module { MmapUseMlock, /// Use memory locking and ignore errors. MmapUseMlockIgnoreErrors, + /// Use mmap with madvise(MADV_WILLNEED | MADV_SEQUENTIAL) hints. + MmapUseMadvise, }; /** diff --git a/extension/module/test/module_test.cpp b/extension/module/test/module_test.cpp index 7e1d657094c..1e010504789 100644 --- a/extension/module/test/module_test.cpp +++ b/extension/module/test/module_test.cpp @@ -50,6 +50,23 @@ TEST_F(ModuleTest, TestLoad) { EXPECT_TRUE(module.is_loaded()); } +TEST_F(ModuleTest, TestLoadMmapUseMadvise) { + Module module(model_path_, Module::LoadMode::MmapUseMadvise); + + EXPECT_FALSE(module.is_loaded()); + const auto error = module.load(); + EXPECT_EQ(error, Error::Ok); + EXPECT_TRUE(module.is_loaded()); + + auto tensor = make_tensor_ptr({2, 2}, {1.f, 2.f, 3.f, 4.f}); + + const auto result = module.execute("forward", {tensor, tensor, 1.0}); + EXPECT_EQ(result.error(), Error::Ok); + + const auto expected = make_tensor_ptr({2, 2}, {2.f, 4.f, 6.f, 8.f}); + EXPECT_TENSOR_CLOSE(result->at(0).toTensor(), *expected.get()); +} + TEST_F(ModuleTest, TestLoadNonExistent) { Module module("/path/to/nonexistent/file.pte"); const auto error = module.load(); From 79942a89fb8d872d888fa57c7aa461962d001502 Mon Sep 17 00:00:00 2001 From: Kimish Patel Date: Wed, 13 May 2026 16:57:21 -0700 Subject: [PATCH 2/2] [ExecuTorch][MmapDataLoader] Issue F_RDADVISE on Apple platforms in UseMadvise path Pull Request resolved: https://github.com/pytorch/executorch/pull/19554 In the MmapDataLoader UseMadvise codepath, after the existing madvise(MADV_WILLNEED | MADV_SEQUENTIAL) calls, also issue fcntl(F_RDADVISE) on Apple platforms (iOS/macOS). F_RDADVISE is more aggressive than madvise for cold starts: it schedules read-ahead on the file descriptor itself, bringing pages into the unified buffer cache so first-touch faults during inference are serviced from RAM instead of storage. F_RDADVISE closes part of the gap between lazy mmap and eager file read, while still allowing pages to be evicted under memory pressure (unlike mlock, which pins pages and counts against RLIMIT_MEMLOCK). Differential Revision: [D104318326](https://our.internmc.facebook.com/intern/diff/D104318326/) ghstack-source-id: 381778987 --- extension/data_loader/mman.h | 27 ++++++++++++++++++++++ extension/data_loader/mmap_data_loader.cpp | 1 + 2 files changed, 28 insertions(+) diff --git a/extension/data_loader/mman.h b/extension/data_loader/mman.h index fb6fe4fd39b..a7a335961c8 100644 --- a/extension/data_loader/mman.h +++ b/extension/data_loader/mman.h @@ -17,6 +17,7 @@ #ifndef _WIN32 +#include #include #include @@ -53,6 +54,24 @@ ET_INLINE void madvise_pages_willneed_sequential(void* addr, size_t len) { ::madvise(addr, len, MADV_SEQUENTIAL); } +/** + * On Apple platforms, schedule kernel read-ahead on the file descriptor itself + * via fcntl(F_RDADVISE). This is more aggressive than madvise for cold starts: + * it brings pages into the unified buffer cache so first-touch faults are + * serviced from RAM instead of storage. No-op on non-Apple POSIX platforms. + */ +ET_INLINE void fcntl_rdadvise_apple(int fd, size_t file_size) { +#if defined(__APPLE__) + struct radvisory advice; + advice.ra_offset = 0; + advice.ra_count = static_cast(file_size); + ::fcntl(fd, F_RDADVISE, &advice); +#else + (void)fd; + (void)file_size; +#endif +} + #else #define NOMINMAX @@ -99,4 +118,12 @@ ET_INLINE void madvise_pages_willneed_sequential(void* addr, size_t len) { (void)len; } +/** + * No-op on Windows: F_RDADVISE is an Apple-specific fcntl command. + */ +ET_INLINE void fcntl_rdadvise_apple(int fd, size_t file_size) { + (void)fd; + (void)file_size; +} + #endif diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp index b07c8dd7d62..dc9e1a615bf 100644 --- a/extension/data_loader/mmap_data_loader.cpp +++ b/extension/data_loader/mmap_data_loader.cpp @@ -251,6 +251,7 @@ Result MmapDataLoader::load( if (mlock_config_ == MlockConfig::UseMadvise) { madvise_pages_willneed_sequential(pages, map_size); + fcntl_rdadvise_apple(fd_, file_size_); } // The requested data is at an offset into the mapped pages.