From e3b9ea340e9cacfe353bf17e169cbfce4011f909 Mon Sep 17 00:00:00 2001
From: Kimish Patel <kimishpatel@fb.com>
Date: Wed, 13 May 2026 16:55:28 -0700
Subject: [PATCH 1/2] [Executorch] Enable madvise based mmap

Pull Request resolved: https://github.com/pytorch/executorch/pull/19553

madvise based mmap allows us to indicate to os that we aim to read from this memory soon, so it can start prefetching and taking page faults eagerly.
Mlock ends up being significantly more heavy on iOS where it trieds to lock the entire region and according claude the overhead comes not from pagefults
necessarily, but more from system call overheads as opposited to file read
ghstack-source-id: 381777643
@exported-using-ghexport

Differential Revision: [D104318324](https://our.internmc.facebook.com/intern/diff/D104318324/)
---
 extension/data_loader/mman.h                  | 19 +++++++++++++++++++
 extension/data_loader/mmap_data_loader.cpp    |  4 ++++
 extension/data_loader/mmap_data_loader.h      |  4 ++++
 .../test/mmap_data_loader_test.cpp            |  6 ++++++
 extension/module/module.cpp                   | 11 +++++++++++
 extension/module/module.h                     |  2 ++
 extension/module/test/module_test.cpp         | 17 +++++++++++++++++
 7 files changed, 63 insertions(+)
diff --git a/extension/data_loader/mman.h b/extension/data_loader/mman.h
index 26a9ee08067..fb6fe4fd39b 100644
--- a/extension/data_loader/mman.h
+++ b/extension/data_loader/mman.h
@@ -43,6 +43,16 @@ ET_INLINE off_t get_mmap_offset(size_t offset) {
   return static_cast<off_t>(offset);
 }
 
+/**
+ * Hint the kernel to prefetch pages eagerly and to optimize for sequential
+ * reads. Intended to reduce page-fault stutter during model initialization
+ * when the caller does not want to mlock the pages into RAM.
+ */
+ET_INLINE void madvise_pages_willneed_sequential(void* addr, size_t len) {
+  ::madvise(addr, len, MADV_WILLNEED);
+  ::madvise(addr, len, MADV_SEQUENTIAL);
+}
+
 #else
 
 #define NOMINMAX
@@ -80,4 +90,13 @@ ET_INLINE uint64_t get_mmap_offset(size_t offset) {
   return static_cast<uint64_t>(offset);
 }
 
+/**
+ * No-op on Windows: there is no direct equivalent to madvise(MADV_WILLNEED |
+ * MADV_SEQUENTIAL) and the existing mman_windows shim does not implement one.
+ */
+ET_INLINE void madvise_pages_willneed_sequential(void* addr, size_t len) {
+  (void)addr;
+  (void)len;
+}
+
 #endif
diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp
index 5d77b67cc59..b07c8dd7d62 100644
--- a/extension/data_loader/mmap_data_loader.cpp
+++ b/extension/data_loader/mmap_data_loader.cpp
@@ -249,6 +249,10 @@ Result<FreeableBuffer> MmapDataLoader::load(
     // No need to keep track of this. munmap() will unlock as a side effect.
   }
 
+  if (mlock_config_ == MlockConfig::UseMadvise) {
+    madvise_pages_willneed_sequential(pages, map_size);
+  }
+
   // The requested data is at an offset into the mapped pages.
   const void* data = static_cast<const uint8_t*>(pages) + offset - range.start;
 
diff --git a/extension/data_loader/mmap_data_loader.h b/extension/data_loader/mmap_data_loader.h
index c0496a39d4b..2bbdd96013b 100644
--- a/extension/data_loader/mmap_data_loader.h
+++ b/extension/data_loader/mmap_data_loader.h
@@ -38,6 +38,10 @@ class MmapDataLoader final : public executorch::runtime::DataLoader {
     UseMlock,
     /// Call `mlock()` on loaded pages, ignoring errors if it fails.
     UseMlockIgnoreErrors,
+    /// Use madvise(MADV_WILLNEED | MADV_SEQUENTIAL) instead of mlock.
+    /// Tells the kernel to prefetch pages eagerly and optimize for
+    /// sequential reads, without pinning them in RAM.
+    UseMadvise,
   };
 
   /**
diff --git a/extension/data_loader/test/mmap_data_loader_test.cpp b/extension/data_loader/test/mmap_data_loader_test.cpp
index df071fd7474..e08001af245 100644
--- a/extension/data_loader/test/mmap_data_loader_test.cpp
+++ b/extension/data_loader/test/mmap_data_loader_test.cpp
@@ -244,6 +244,12 @@ TEST_F(MmapDataLoaderTest, InBoundsLoadsSucceedUseMlockIgnoreErrors) {
       MmapDataLoader::MlockConfig::UseMlockIgnoreErrors);
 }
 
+TEST_F(MmapDataLoaderTest, InBoundsLoadsSucceedUseMadvise) {
+  // There's no portable way to verify madvise() is called, but exercise the
+  // path to make sure the code still behaves correctly.
+  test_in_bounds_loads_succeed(MmapDataLoader::MlockConfig::UseMadvise);
+}
+
 TEST_F(MmapDataLoaderTest, FinalPageOfUnevenFileSucceeds) {
   // Create a file whose length is not an even multiple of a page.
   // Each 4-byte word in the file has a different value.
diff --git a/extension/module/module.cpp b/extension/module/module.cpp
index ec7236276f5..0b95a86ac1e 100644
--- a/extension/module/module.cpp
+++ b/extension/module/module.cpp
@@ -70,6 +70,17 @@ runtime::Result<std::unique_ptr<runtime::DataLoader>> make_data_loader(
           std::move(*res_mlock_ignore));
       break;
     }
+    case Module::LoadMode::MmapUseMadvise: {
+      auto res_madvise = MmapDataLoader::from(
+          file_path.c_str(), MmapDataLoader::MlockConfig::UseMadvise);
+      if (!res_madvise.ok()) {
+        return res_madvise.error();
+      }
+      data_loader =
+          std::make_unique<std::remove_reference_t<decltype(*res_madvise)>>(
+              std::move(*res_madvise));
+      break;
+    }
   }
   return data_loader;
 }
diff --git a/extension/module/module.h b/extension/module/module.h
index 08a68b2676b..5f2c30bbfbe 100644
--- a/extension/module/module.h
+++ b/extension/module/module.h
@@ -51,6 +51,8 @@ class Module {
     MmapUseMlock,
     /// Use memory locking and ignore errors.
     MmapUseMlockIgnoreErrors,
+    /// Use mmap with madvise(MADV_WILLNEED | MADV_SEQUENTIAL) hints.
+    MmapUseMadvise,
   };
 
   /**
diff --git a/extension/module/test/module_test.cpp b/extension/module/test/module_test.cpp
index 7e1d657094c..1e010504789 100644
--- a/extension/module/test/module_test.cpp
+++ b/extension/module/test/module_test.cpp
@@ -50,6 +50,23 @@ TEST_F(ModuleTest, TestLoad) {
   EXPECT_TRUE(module.is_loaded());
 }
 
+TEST_F(ModuleTest, TestLoadMmapUseMadvise) {
+  Module module(model_path_, Module::LoadMode::MmapUseMadvise);
+
+  EXPECT_FALSE(module.is_loaded());
+  const auto error = module.load();
+  EXPECT_EQ(error, Error::Ok);
+  EXPECT_TRUE(module.is_loaded());
+
+  auto tensor = make_tensor_ptr({2, 2}, {1.f, 2.f, 3.f, 4.f});
+
+  const auto result = module.execute("forward", {tensor, tensor, 1.0});
+  EXPECT_EQ(result.error(), Error::Ok);
+
+  const auto expected = make_tensor_ptr({2, 2}, {2.f, 4.f, 6.f, 8.f});
+  EXPECT_TENSOR_CLOSE(result->at(0).toTensor(), *expected.get());
+}
+
 TEST_F(ModuleTest, TestLoadNonExistent) {
   Module module("/path/to/nonexistent/file.pte");
   const auto error = module.load();

From 79942a89fb8d872d888fa57c7aa461962d001502 Mon Sep 17 00:00:00 2001
From: Kimish Patel <kimishpatel@fb.com>
Date: Wed, 13 May 2026 16:57:21 -0700
Subject: [PATCH 2/2] [ExecuTorch][MmapDataLoader] Issue F_RDADVISE on Apple
 platforms in UseMadvise path

Pull Request resolved: https://github.com/pytorch/executorch/pull/19554

In the MmapDataLoader UseMadvise codepath, after the existing
madvise(MADV_WILLNEED | MADV_SEQUENTIAL) calls, also issue fcntl(F_RDADVISE)
on Apple platforms (iOS/macOS). F_RDADVISE is more aggressive than madvise
for cold starts: it schedules read-ahead on the file descriptor itself,
bringing pages into the unified buffer cache so first-touch faults during
inference are serviced from RAM instead of storage.

F_RDADVISE closes part of the gap between lazy mmap and eager file read,
while still allowing pages to be evicted under memory pressure (unlike
mlock, which pins pages and counts against RLIMIT_MEMLOCK).

Differential Revision: [D104318326](https://our.internmc.facebook.com/intern/diff/D104318326/)
ghstack-source-id: 381778987
---
 extension/data_loader/mman.h               | 27 ++++++++++++++++++++++
 extension/data_loader/mmap_data_loader.cpp |  1 +
 2 files changed, 28 insertions(+)

diff --git a/extension/data_loader/mman.h b/extension/data_loader/mman.h
index fb6fe4fd39b..a7a335961c8 100644
--- a/extension/data_loader/mman.h
+++ b/extension/data_loader/mman.h
@@ -17,6 +17,7 @@
 
 #ifndef _WIN32
 
+#include <fcntl.h>
 #include <sys/mman.h>
 #include <unistd.h>
 
@@ -53,6 +54,24 @@ ET_INLINE void madvise_pages_willneed_sequential(void* addr, size_t len) {
   ::madvise(addr, len, MADV_SEQUENTIAL);
 }
 
+/**
+ * On Apple platforms, schedule kernel read-ahead on the file descriptor itself
+ * via fcntl(F_RDADVISE). This is more aggressive than madvise for cold starts:
+ * it brings pages into the unified buffer cache so first-touch faults are
+ * serviced from RAM instead of storage. No-op on non-Apple POSIX platforms.
+ */
+ET_INLINE void fcntl_rdadvise_apple(int fd, size_t file_size) {
+#if defined(__APPLE__)
+  struct radvisory advice;
+  advice.ra_offset = 0;
+  advice.ra_count = static_cast<int>(file_size);
+  ::fcntl(fd, F_RDADVISE, &advice);
+#else
+  (void)fd;
+  (void)file_size;
+#endif
+}
+
 #else
 
 #define NOMINMAX
@@ -99,4 +118,12 @@ ET_INLINE void madvise_pages_willneed_sequential(void* addr, size_t len) {
   (void)len;
 }
 
+/**
+ * No-op on Windows: F_RDADVISE is an Apple-specific fcntl command.
+ */
+ET_INLINE void fcntl_rdadvise_apple(int fd, size_t file_size) {
+  (void)fd;
+  (void)file_size;
+}
+
 #endif
diff --git a/extension/data_loader/mmap_data_loader.cpp b/extension/data_loader/mmap_data_loader.cpp
index b07c8dd7d62..dc9e1a615bf 100644
--- a/extension/data_loader/mmap_data_loader.cpp
+++ b/extension/data_loader/mmap_data_loader.cpp
@@ -251,6 +251,7 @@ Result<FreeableBuffer> MmapDataLoader::load(
 
   if (mlock_config_ == MlockConfig::UseMadvise) {
     madvise_pages_willneed_sequential(pages, map_size);
+    fcntl_rdadvise_apple(fd_, file_size_);
   }
 
   // The requested data is at an offset into the mapped pages.