softmin · statmlben · May 5, 2026 · May 5, 2026
diff --git a/rehline/_class.py b/rehline/_class.py
@@ -527,8 +527,9 @@ class plqERM_ElasticNet(_BaseReHLine, BaseEstimator):
         The ElasticNet mixing parameter, with 0 <= l1_ratio < 1. For l1_ratio = 0 the penalty
         is an L2 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.
 
-    omega : array of shape (n_features, ), default=np.empty(shape=(0, 0))
-        Weight coefficients for adaptive lasso.
+    omega : array of shape (n_features, ), default=np.empty(shape=0)
+        Non-negative weight coefficients for adaptive lasso. If not provided, all coefficients receive the 
+        same L1 penalty controlled by ``l1_ratio``.
 
     verbose : int, default=0
         Enable verbose output. Note that this setting takes advantage of a
@@ -606,8 +607,7 @@ def __init__(
         self.constraint = constraint if constraint is not None else []
         self.C = C
         self.l1_ratio = l1_ratio
-        self.C_eff = C / (1 - l1_ratio)
-        self.omega = omega if omega is not None else np.empty(shape=(0, 0))
+        self.omega = omega if omega is not None else np.empty(shape=(0))
         self._U = U if U is not None else np.empty(shape=(0, 0))
         self._V = V if V is not None else np.empty(shape=(0, 0))
         self._S = S if S is not None else np.empty(shape=(0, 0))
@@ -627,7 +627,7 @@ def __init__(
         self._Lambda = np.empty(shape=(0, 0))
         self._Gamma = np.empty(shape=(0, 0))
         self._xi = np.empty(shape=(0, 0))
-        self._mu = np.empty(shape=(0, 0))
+        self._mu = np.empty(shape=(0))
         self.coef_ = None
 
     def fit(self, X, y, sample_weight=None):
@@ -664,14 +664,14 @@ def fit(self, X, y, sample_weight=None):
         self.auto_shape()
 
         sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)
-
+        C_eff = self.C / (1 - self.l1_ratio)
         U_weight, V_weight, Tau_weight, S_weight, T_weight = _cast_sample_weight(
             self._U,
             self._V,
             self._Tau,
             self._S,
             self._T,
-            C=self.C_eff,
+            C=C_eff,
             sample_weight=sample_weight,
         )
 
@@ -680,7 +680,7 @@ def fit(self, X, y, sample_weight=None):
             self._Lambda = np.empty(shape=(0, 0))
             self._Gamma = np.empty(shape=(0, 0))
             self._xi = np.empty(shape=(0, 0))
-            self._mu = np.empty(shape=(0, 0))
+            self._mu = np.empty(shape=(0))
 
         if self.l1_ratio == 0:
             self.rho = None
@@ -695,9 +695,9 @@ def fit(self, X, y, sample_weight=None):
                 raise ValueError(
                     f"Omega length {self.omega.size} must be 0 or {d} (n_features)"
                 )
-            if not np.all(self.omega > 0):
+            if not np.all(self.omega >= 0):
                 raise ValueError(
-                    "All elements in omega must be strictly positive."
+                    "All elements in omega must be strictly non-negative."
                 )
             self.rho = np.full(d, self.l1_ratio / (1 - self.l1_ratio)) * (self.omega if self.omega.size == d else 1.0)
 

diff --git a/rehline/_sklearn_mixin.py b/rehline/_sklearn_mixin.py
@@ -714,6 +714,11 @@ class plq_ElasticNet_Classifier(plqERM_ElasticNet, ClassifierMixin):
         - 0 < l1_ratio < 1 → combined L1 + L2 penalty
         Must be strictly less than 1.0 to avoid division by zero in rho/C_eff.
 
+    omega : array of shape (n_features, ), default=np.empty(shape=(0, 0))
+        Non-negative weight coefficients for adaptive lasso. If not provided, all non-intercept coefficients 
+        receive the same L1 penalty controlled by ``l1_ratio``. The penalty for the intercept 
+        can be scaled via ``intercept_scaling``.
+
     fit_intercept : bool, default=True
         Whether to fit an intercept term via an augmented constant feature column.
 
@@ -754,6 +759,7 @@ def __init__(
         constraint=None,
         C=1.0,
         l1_ratio=0.5,
+        omega=None,
         U=None,
         V=None,
         Tau=None,
@@ -780,6 +786,7 @@ def __init__(
             )
 
         constraint = [] if constraint is None else constraint
+        omega = np.empty((0,)) if omega is None else omega
         U = np.empty((0, 0)) if U is None else U
         V = np.empty((0, 0)) if V is None else V
         Tau = np.empty((0, 0)) if Tau is None else Tau
@@ -794,6 +801,7 @@ def __init__(
             constraint=constraint,
             C=C,
             l1_ratio=l1_ratio,
+            omega=omega,
             U=U,
             V=V,
             Tau=Tau,
@@ -850,6 +858,7 @@ def _fit_subproblem(estimator, X_aug, y_pm, sample_weight, fit_intercept):
             constraint=estimator.constraint,
             C=estimator.C,
             l1_ratio=estimator.l1_ratio,
+            omega=estimator.omega,
             max_iter=estimator.max_iter,
             tol=estimator.tol,
             shrink=estimator.shrink,
@@ -908,17 +917,19 @@ def fit(self, X, y, sample_weight=None):
 
         # Intercept augmentation
         X_aug = X
+        omega_copy = self.omega.copy()
         if self.fit_intercept:
             col = np.full((X.shape[0], 1), self.intercept_scaling, dtype=X.dtype)
             X_aug = np.hstack([X, col])
+            self.omega = np.append(self.omega, 1) if self.omega.size > 0 else self.omega
 
         if self.classes_.size == 2:
             y01 = le.transform(y)
             y_pm = 2 * y01 - 1
 
             # super() resolves to plqERM_ElasticNet.fit()
             super().fit(X_aug, y_pm, sample_weight=sample_weight)
-
+            self.omega = omega_copy
             if self.fit_intercept:
                 self.intercept_ = float(self.coef_[-1])
                 self.coef_ = self.coef_[:-1].copy()
@@ -931,6 +942,7 @@ def fit(self, X, y, sample_weight=None):
                     f"multi_class must be 'ovr' or 'ovo' for multiclass problems, got '{self.multi_class}'."
                 )
             self._fit_multiclass(X_aug, y, sample_weight)
+            self.omega = omega_copy
 
         return self
 
@@ -1067,6 +1079,11 @@ class plq_ElasticNet_Regressor(plqERM_ElasticNet, RegressorMixin):
         - l1_ratio = 0  → pure Ridge (equivalent to plq_Ridge_Regressor)
         - 0 < l1_ratio < 1 → combined L1 + L2 penalty
         Must be strictly less than 1.0 to avoid division by zero in rho/C_eff.
+
+    omega : array of shape (n_features, ), default=np.empty(shape=(0, 0))
+            Non-negative weight coefficients for adaptive lasso. If not provided, all non-intercept coefficients 
+            receive the same L1 penalty controlled by ``l1_ratio``. The penalty for the intercept 
+            can be scaled via ``intercept_scaling``.
 
     fit_intercept : bool, default=True
         If True, append a constant column (value = ``intercept_scaling``) to
@@ -1101,6 +1118,7 @@ def __init__(
         constraint=None,
         C=1.0,
         l1_ratio=0.5,
+        omega=None,
         U=None,
         V=None,
         Tau=None,
@@ -1125,6 +1143,7 @@ def __init__(
 
         loss = {"name": "QR", "qt": 0.5} if loss is None else loss
         constraint = [] if constraint is None else constraint
+        omega = np.empty((0,)) if omega is None else omega
         U = np.empty((0, 0)) if U is None else U
         V = np.empty((0, 0)) if V is None else V
         Tau = np.empty((0, 0)) if Tau is None else Tau
@@ -1138,6 +1157,7 @@ def __init__(
             constraint=constraint,
             C=C,
             l1_ratio=l1_ratio,
+            omega=omega,
             U=U,
             V=V,
             Tau=Tau,
@@ -1183,12 +1203,15 @@ def fit(self, X, y, sample_weight=None):
         self.n_features_in_ = X.shape[1]
 
         X_aug = X
+        omega_copy = self.omega.copy()
         if self.fit_intercept:
             col = np.full((X.shape[0], 1), self.intercept_scaling, dtype=X.dtype)
             X_aug = np.hstack([X, col])
+            self.omega = np.append(self.omega, 1) if self.omega.size > 0 else self.omega
 
         # MRO resolves super() to plqERM_ElasticNet.fit()
         super().fit(X_aug, y, sample_weight=sample_weight)
+        self.omega = omega_copy
 
         if self.fit_intercept:
             self.intercept_ = float(self.coef_[-1])

diff --git a/tests/test_elastic_net.py b/tests/test_elastic_net.py
@@ -317,8 +317,8 @@ def test_omega_validation():
             tol=1e-4,
         )
         clf.fit(X_scaled, y)
-    # Test invalid omega value (all elements must be strictly positive)
-    with pytest.raises(ValueError, match="All elements in omega must be strictly positive"):
+    # Test invalid omega value (all elements must be strictly non-negative)
+    with pytest.raises(ValueError, match="All elements in omega must be strictly non-negative"):
         omega = np.ones(n_features)
         omega[0] = -1
         clf = plqERM_ElasticNet(
@@ -341,3 +341,39 @@ def test_omega_validation():
             tol=1e-4,
         )
         clf.fit(X_scaled, y)
+
+
+def test_zero_omega_vs_ridge():
+    """ElasticNet with omega=(0, 0, ..., 0) should exactly match Ridge within 1e-4.."""
+    n, n_features, C, l1_ratio = 2000, 10, 0.01, 0.5
+
+    X, y = make_regression(
+        n_samples=n,
+        n_features=n_features,
+        noise=0.1,
+        random_state=42,
+        n_informative=6,
+    )
+    scaler = StandardScaler()
+    X_scaled = scaler.fit_transform(X)
+
+    clf_EN = plqERM_ElasticNet(
+                 loss={"name": "mse"},
+                 C=C,
+                 l1_ratio=l1_ratio,
+                 omega=np.zeros(n_features),
+                 max_iter=5000,
+                 tol=1e-4,
+    )
+    clf_EN.fit(X_scaled, y)
+
+    clf_RG = plqERM_Ridge(
+                    loss={"name": "mse"},
+                    C=C/(1-l1_ratio),
+                    max_iter=5000,
+                    tol=1e-4,
+    )
+    clf_RG.fit(X_scaled, y)
+
+    max_diff = np.max(np.abs(clf_EN.coef_.flatten() - clf_RG.coef_.flatten()))
+    assert max_diff < 1e-4, f"ElasticNet(omega=(0, 0, ..., 0)) should match Ridge within 1e-4, max_diff={max_diff:.6e}"
diff --git a/tests/test_sklearn_elasticnet.py b/tests/test_sklearn_elasticnet.py
@@ -101,6 +101,19 @@ def test_elasticnet_clf_l1_ratio_invalid_raises():
     with pytest.raises(ValueError, match="l1_ratio"):
         plq_ElasticNet_Classifier(loss={"name": "svm"}, C=1.0, l1_ratio=1.0)
 
+def test_elasticnet_clf_binary_omega_effect():
+    """Model coefficient with higher omega weights should be smaller."""
+    X, y = _binary_dataset()
+    omega_small = np.random.rand(10)
+    omega_large = omega_small * 5
+
+    clf1 = plq_ElasticNet_Classifier(loss={"name": "svm"}, C=1.0, l1_ratio=0.5, omega=omega_small)
+    clf1.fit(X, y)
+    clf2 = plq_ElasticNet_Classifier(loss={"name": "svm"}, C=1.0, l1_ratio=0.5, omega=omega_large)
+    clf2.fit(X, y)
+
+    assert np.sum(np.abs(clf2.coef_)) <= np.sum(np.abs(clf1.coef_))
+
 
 # ===========================================================================
 # plq_ElasticNet_Classifier — multiclass OvR
@@ -142,6 +155,32 @@ def test_elasticnet_clf_ovr_pipeline():
     assert pipe.predict(X).shape == (len(y),)
 
 
+def test_elasticnet_clf_ovr_omega_effect():
+    """Model coefficient with higher omega weights should be smaller."""
+    X, y = _multiclass_dataset(n_classes=3)
+    omega_small = np.random.rand(10)
+    omega_large = omega_small * 5
+
+    clf1 = plq_ElasticNet_Classifier(loss={"name": "svm"}, 
+                                     C=1.0, 
+                                     l1_ratio=0.5, 
+                                     fit_intercept=True,
+                                     omega=omega_small,
+                                     multi_class="ovr"
+    )
+    clf1.fit(X, y)
+    clf2 = plq_ElasticNet_Classifier(loss={"name": "svm"}, 
+                                     C=1.0, 
+                                     l1_ratio=0.5, 
+                                     fit_intercept=True,
+                                     omega=omega_large,
+                                     multi_class="ovr"
+    )
+    clf2.fit(X, y)
+
+    assert np.sum(np.abs(clf2.coef_)) <= np.sum(np.abs(clf1.coef_))
+
+
 # ===========================================================================
 # plq_ElasticNet_Classifier — multiclass OvO
 # ===========================================================================
@@ -179,6 +218,31 @@ def test_elasticnet_clf_multiclass_invalid_strategy_raises():
         clf.fit(X, y)
 
 
+def test_elasticnet_clf_ovo_omega_effect():
+    """Model coefficient with higher omega weights should be smaller."""
+    X, y = _multiclass_dataset(n_classes=3)
+    omega_small = np.random.rand(10)
+    omega_large = omega_small * 5
+
+    clf1 = plq_ElasticNet_Classifier(loss={"name": "svm"}, 
+                                     C=1.0, 
+                                     l1_ratio=0.5, 
+                                     fit_intercept=False,
+                                     omega=omega_small,
+                                     multi_class="ovo"
+    )
+    clf1.fit(X, y)
+    clf2 = plq_ElasticNet_Classifier(loss={"name": "svm"}, 
+                                     C=1.0, 
+                                     l1_ratio=0.5, 
+                                     fit_intercept=False,
+                                     omega=omega_large,
+                                     multi_class="ovo"
+    )
+    clf2.fit(X, y)
+
+    assert np.sum(np.abs(clf2.coef_)) <= np.sum(np.abs(clf1.coef_))
+
 # ===========================================================================
 # plq_ElasticNet_Regressor
 # ===========================================================================
@@ -256,3 +320,16 @@ def test_elasticnet_reg_predict_equals_decision_function():
     reg = plq_ElasticNet_Regressor(loss={"name": "QR", "qt": 0.5}, C=1.0, l1_ratio=0.5)
     reg.fit(X_tr, y_tr)
     np.testing.assert_array_equal(reg.predict(X_te), reg.decision_function(X_te))
+
+def test_elasticnet_reg_omega_effect():
+    """Model coefficient with higher omega weights should be smaller."""
+    X, y = _reg_dataset()
+    omega_small = np.random.rand(10)
+    omega_large = omega_small * 5
+
+    reg1 = plq_ElasticNet_Regressor(loss={"name": "mae"}, C=1.0, l1_ratio=0.5, omega=omega_small)
+    reg1.fit(X, y)
+    reg2 = plq_ElasticNet_Regressor(loss={"name": "mae"}, C=1.0, l1_ratio=0.5, omega=omega_large)
+    reg2.fit(X, y)
+
+    assert np.sum(np.abs(reg2.coef_)) <= np.sum(np.abs(reg1.coef_))
diff --git a/tests/test_warmstart.py b/tests/test_warmstart.py
@@ -8,7 +8,7 @@
 
 import numpy as np
 
-from rehline import ReHLine, plqERM_Ridge
+from rehline import ReHLine, plqERM_Ridge, plqERM_ElasticNet
 from rehline._base import ReHLine_solver
 
 
@@ -140,3 +140,38 @@ def test_plqERM_Ridge_warmstart_coef_consistent():
         atol=1e-3,
         err_msg="plqERM_Ridge: warm-start and cold-start should agree at the same C",
     )
+
+
+# ---------------------------------------------------------------------------
+# plqERM_ElasticNet
+# ---------------------------------------------------------------------------
+
+
+def test_plqERM_ElasticNet_warmstart_coef_consistent():
+    """Warm-started plqERM_ElasticNet should match cold-start solution for the same C."""
+    X, y = _make_classification_data()
+    C = 0.5
+    l1_ratio = 0.2
+
+    clf_cold = plqERM_ElasticNet(loss={"name": "svm"}, C=C, l1_ratio=l1_ratio, verbose=0)
+    clf_cold.fit(X=X, y=y)
+
+    # Fit at C, then warm-start at 2*C
+    clf_warm = plqERM_ElasticNet(loss={"name": "svm"}, C=C, l1_ratio=l1_ratio, verbose=0)
+    clf_warm.fit(X=X, y=y)
+    clf_warm.C = 2 * C
+    clf_warm.warm_start = 1
+    clf_warm.fit(X=X, y=y)
+    coef_warm_2C = clf_warm.coef_.copy()
+
+    # Reference: cold-start at 2*C
+    clf_ref = plqERM_ElasticNet(loss={"name": "svm"}, C=2 * C, l1_ratio=l1_ratio, verbose=0)
+    clf_ref.fit(X=X, y=y)
+    coef_ref_2C = clf_ref.coef_.copy()
+
+    np.testing.assert_allclose(
+        coef_warm_2C,
+        coef_ref_2C,
+        atol=1e-3,
+        err_msg="plqERM_ElasticNet: warm-start and cold-start should agree at the same C",
+    )