Address remaining PR review feedback

yeyu-nvidia · claude · yeyu-nvidia · commit 696d2519e8e5 · 2026-03-19T09:38:11.000-07:00
- Wrap reference forward pass in try/finally so LoRA adapters are always
  re-enabled even if the forward throws (prevents silent training with
  permanently disabled LoRA on subsequent calls)
- Replace assert with raise ValueError for eagle_offline compatibility check
  so it cannot be silently optimized away with python -O; update test to
  expect ValueError instead of AssertionError

Co-Authored-By: Claude Sonnet 4.6 &lt;noreply@anthropic.com&gt;
Signed-off-by: Ye Yu &lt;yeyu@nvidia.com&gt;
diff --git a/modelopt/torch/speculative/plugins/transformers.py b/modelopt/torch/speculative/plugins/transformers.py
@@ -648,7 +648,8 @@ def modify(
 
         # Inject HF PEFT LoRA adapters into the base model for co-training
         if self.eagle_base_lora:
-            assert not self.eagle_offline, "eagle_base_lora is incompatible with eagle_offline=True"
+            if self.eagle_offline:
+                raise ValueError("eagle_base_lora is incompatible with eagle_offline=True")
             self._inject_base_lora()
 
         # delete base model layers for offline training
@@ -818,10 +819,12 @@ def _run_forward(no_grad):
         ref_logits = None
         if self.eagle_base_lora:
             self._set_base_lora_enabled(False)
-            ref_logits = _run_forward(no_grad=True).logits
-            if hasattr(self, "_aux_hidden_states"):
-                self._aux_hidden_states.clear()
-            self._set_base_lora_enabled(True)
+            try:
+                ref_logits = _run_forward(no_grad=True).logits
+            finally:
+                if hasattr(self, "_aux_hidden_states"):
+                    self._aux_hidden_states.clear()
+                self._set_base_lora_enabled(True)
 
         # Main forward — LoRA params receive gradients when eagle_base_lora is True.
         outputs = _run_forward(no_grad=freeze_base_model and not self.eagle_base_lora)
diff --git a/tests/unit/torch/speculative/plugins/test_hf_speculative_lora.py b/tests/unit/torch/speculative/plugins/test_hf_speculative_lora.py
@@ -86,7 +86,7 @@ def test_eagle_offline_incompatible():
     model = get_tiny_llama(num_hidden_layers=4)
     config = deepcopy(EAGLE_LORA_CONFIG)
     config["eagle_offline"] = True
-    with pytest.raises(AssertionError, match="eagle_base_lora is incompatible with eagle_offline"):
+    with pytest.raises(ValueError, match="eagle_base_lora is incompatible with eagle_offline"):
         mtsp.convert(model, mode=[("eagle", config)])