diff --git a/rehline/_class.py b/rehline/_class.py index d61cf1a..b0b4352 100644 --- a/rehline/_class.py +++ b/rehline/_class.py @@ -527,8 +527,9 @@ class plqERM_ElasticNet(_BaseReHLine, BaseEstimator): The ElasticNet mixing parameter, with 0 <= l1_ratio < 1. For l1_ratio = 0 the penalty is an L2 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2. - omega : array of shape (n_features, ), default=np.empty(shape=(0, 0)) - Weight coefficients for adaptive lasso. + omega : array of shape (n_features, ), default=np.empty(shape=0) + Non-negative weight coefficients for adaptive lasso. If not provided, all coefficients receive the + same L1 penalty controlled by ``l1_ratio``. verbose : int, default=0 Enable verbose output. Note that this setting takes advantage of a @@ -606,8 +607,7 @@ def __init__( self.constraint = constraint if constraint is not None else [] self.C = C self.l1_ratio = l1_ratio - self.C_eff = C / (1 - l1_ratio) - self.omega = omega if omega is not None else np.empty(shape=(0, 0)) + self.omega = omega if omega is not None else np.empty(shape=(0)) self._U = U if U is not None else np.empty(shape=(0, 0)) self._V = V if V is not None else np.empty(shape=(0, 0)) self._S = S if S is not None else np.empty(shape=(0, 0)) @@ -627,7 +627,7 @@ def __init__( self._Lambda = np.empty(shape=(0, 0)) self._Gamma = np.empty(shape=(0, 0)) self._xi = np.empty(shape=(0, 0)) - self._mu = np.empty(shape=(0, 0)) + self._mu = np.empty(shape=(0)) self.coef_ = None def fit(self, X, y, sample_weight=None): @@ -664,14 +664,14 @@ def fit(self, X, y, sample_weight=None): self.auto_shape() sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype) - + C_eff = self.C / (1 - self.l1_ratio) U_weight, V_weight, Tau_weight, S_weight, T_weight = _cast_sample_weight( self._U, self._V, self._Tau, self._S, self._T, - C=self.C_eff, + C=C_eff, sample_weight=sample_weight, ) @@ -680,7 +680,7 @@ def fit(self, X, y, sample_weight=None): self._Lambda = np.empty(shape=(0, 0)) self._Gamma = np.empty(shape=(0, 0)) self._xi = np.empty(shape=(0, 0)) - self._mu = np.empty(shape=(0, 0)) + self._mu = np.empty(shape=(0)) if self.l1_ratio == 0: self.rho = None @@ -695,9 +695,9 @@ def fit(self, X, y, sample_weight=None): raise ValueError( f"Omega length {self.omega.size} must be 0 or {d} (n_features)" ) - if not np.all(self.omega > 0): + if not np.all(self.omega >= 0): raise ValueError( - "All elements in omega must be strictly positive." + "All elements in omega must be strictly non-negative." ) self.rho = np.full(d, self.l1_ratio / (1 - self.l1_ratio)) * (self.omega if self.omega.size == d else 1.0) diff --git a/rehline/_sklearn_mixin.py b/rehline/_sklearn_mixin.py index 5449b61..85c2185 100644 --- a/rehline/_sklearn_mixin.py +++ b/rehline/_sklearn_mixin.py @@ -714,6 +714,11 @@ class plq_ElasticNet_Classifier(plqERM_ElasticNet, ClassifierMixin): - 0 < l1_ratio < 1 → combined L1 + L2 penalty Must be strictly less than 1.0 to avoid division by zero in rho/C_eff. + omega : array of shape (n_features, ), default=np.empty(shape=(0, 0)) + Non-negative weight coefficients for adaptive lasso. If not provided, all non-intercept coefficients + receive the same L1 penalty controlled by ``l1_ratio``. The penalty for the intercept + can be scaled via ``intercept_scaling``. + fit_intercept : bool, default=True Whether to fit an intercept term via an augmented constant feature column. @@ -754,6 +759,7 @@ def __init__( constraint=None, C=1.0, l1_ratio=0.5, + omega=None, U=None, V=None, Tau=None, @@ -780,6 +786,7 @@ def __init__( ) constraint = [] if constraint is None else constraint + omega = np.empty((0,)) if omega is None else omega U = np.empty((0, 0)) if U is None else U V = np.empty((0, 0)) if V is None else V Tau = np.empty((0, 0)) if Tau is None else Tau @@ -794,6 +801,7 @@ def __init__( constraint=constraint, C=C, l1_ratio=l1_ratio, + omega=omega, U=U, V=V, Tau=Tau, @@ -850,6 +858,7 @@ def _fit_subproblem(estimator, X_aug, y_pm, sample_weight, fit_intercept): constraint=estimator.constraint, C=estimator.C, l1_ratio=estimator.l1_ratio, + omega=estimator.omega, max_iter=estimator.max_iter, tol=estimator.tol, shrink=estimator.shrink, @@ -908,9 +917,11 @@ def fit(self, X, y, sample_weight=None): # Intercept augmentation X_aug = X + omega_copy = self.omega.copy() if self.fit_intercept: col = np.full((X.shape[0], 1), self.intercept_scaling, dtype=X.dtype) X_aug = np.hstack([X, col]) + self.omega = np.append(self.omega, 1) if self.omega.size > 0 else self.omega if self.classes_.size == 2: y01 = le.transform(y) @@ -918,7 +929,7 @@ def fit(self, X, y, sample_weight=None): # super() resolves to plqERM_ElasticNet.fit() super().fit(X_aug, y_pm, sample_weight=sample_weight) - + self.omega = omega_copy if self.fit_intercept: self.intercept_ = float(self.coef_[-1]) self.coef_ = self.coef_[:-1].copy() @@ -931,6 +942,7 @@ def fit(self, X, y, sample_weight=None): f"multi_class must be 'ovr' or 'ovo' for multiclass problems, got '{self.multi_class}'." ) self._fit_multiclass(X_aug, y, sample_weight) + self.omega = omega_copy return self @@ -1067,6 +1079,11 @@ class plq_ElasticNet_Regressor(plqERM_ElasticNet, RegressorMixin): - l1_ratio = 0 → pure Ridge (equivalent to plq_Ridge_Regressor) - 0 < l1_ratio < 1 → combined L1 + L2 penalty Must be strictly less than 1.0 to avoid division by zero in rho/C_eff. + + omega : array of shape (n_features, ), default=np.empty(shape=(0, 0)) + Non-negative weight coefficients for adaptive lasso. If not provided, all non-intercept coefficients + receive the same L1 penalty controlled by ``l1_ratio``. The penalty for the intercept + can be scaled via ``intercept_scaling``. fit_intercept : bool, default=True If True, append a constant column (value = ``intercept_scaling``) to @@ -1101,6 +1118,7 @@ def __init__( constraint=None, C=1.0, l1_ratio=0.5, + omega=None, U=None, V=None, Tau=None, @@ -1125,6 +1143,7 @@ def __init__( loss = {"name": "QR", "qt": 0.5} if loss is None else loss constraint = [] if constraint is None else constraint + omega = np.empty((0,)) if omega is None else omega U = np.empty((0, 0)) if U is None else U V = np.empty((0, 0)) if V is None else V Tau = np.empty((0, 0)) if Tau is None else Tau @@ -1138,6 +1157,7 @@ def __init__( constraint=constraint, C=C, l1_ratio=l1_ratio, + omega=omega, U=U, V=V, Tau=Tau, @@ -1183,12 +1203,15 @@ def fit(self, X, y, sample_weight=None): self.n_features_in_ = X.shape[1] X_aug = X + omega_copy = self.omega.copy() if self.fit_intercept: col = np.full((X.shape[0], 1), self.intercept_scaling, dtype=X.dtype) X_aug = np.hstack([X, col]) + self.omega = np.append(self.omega, 1) if self.omega.size > 0 else self.omega # MRO resolves super() to plqERM_ElasticNet.fit() super().fit(X_aug, y, sample_weight=sample_weight) + self.omega = omega_copy if self.fit_intercept: self.intercept_ = float(self.coef_[-1]) diff --git a/tests/test_elastic_net.py b/tests/test_elastic_net.py index bd25377..438866b 100644 --- a/tests/test_elastic_net.py +++ b/tests/test_elastic_net.py @@ -317,8 +317,8 @@ def test_omega_validation(): tol=1e-4, ) clf.fit(X_scaled, y) - # Test invalid omega value (all elements must be strictly positive) - with pytest.raises(ValueError, match="All elements in omega must be strictly positive"): + # Test invalid omega value (all elements must be strictly non-negative) + with pytest.raises(ValueError, match="All elements in omega must be strictly non-negative"): omega = np.ones(n_features) omega[0] = -1 clf = plqERM_ElasticNet( @@ -341,3 +341,39 @@ def test_omega_validation(): tol=1e-4, ) clf.fit(X_scaled, y) + + +def test_zero_omega_vs_ridge(): + """ElasticNet with omega=(0, 0, ..., 0) should exactly match Ridge within 1e-4..""" + n, n_features, C, l1_ratio = 2000, 10, 0.01, 0.5 + + X, y = make_regression( + n_samples=n, + n_features=n_features, + noise=0.1, + random_state=42, + n_informative=6, + ) + scaler = StandardScaler() + X_scaled = scaler.fit_transform(X) + + clf_EN = plqERM_ElasticNet( + loss={"name": "mse"}, + C=C, + l1_ratio=l1_ratio, + omega=np.zeros(n_features), + max_iter=5000, + tol=1e-4, + ) + clf_EN.fit(X_scaled, y) + + clf_RG = plqERM_Ridge( + loss={"name": "mse"}, + C=C/(1-l1_ratio), + max_iter=5000, + tol=1e-4, + ) + clf_RG.fit(X_scaled, y) + + max_diff = np.max(np.abs(clf_EN.coef_.flatten() - clf_RG.coef_.flatten())) + assert max_diff < 1e-4, f"ElasticNet(omega=(0, 0, ..., 0)) should match Ridge within 1e-4, max_diff={max_diff:.6e}" \ No newline at end of file diff --git a/tests/test_sklearn_elasticnet.py b/tests/test_sklearn_elasticnet.py index a84a1cc..857d036 100644 --- a/tests/test_sklearn_elasticnet.py +++ b/tests/test_sklearn_elasticnet.py @@ -101,6 +101,19 @@ def test_elasticnet_clf_l1_ratio_invalid_raises(): with pytest.raises(ValueError, match="l1_ratio"): plq_ElasticNet_Classifier(loss={"name": "svm"}, C=1.0, l1_ratio=1.0) +def test_elasticnet_clf_binary_omega_effect(): + """Model coefficient with higher omega weights should be smaller.""" + X, y = _binary_dataset() + omega_small = np.random.rand(10) + omega_large = omega_small * 5 + + clf1 = plq_ElasticNet_Classifier(loss={"name": "svm"}, C=1.0, l1_ratio=0.5, omega=omega_small) + clf1.fit(X, y) + clf2 = plq_ElasticNet_Classifier(loss={"name": "svm"}, C=1.0, l1_ratio=0.5, omega=omega_large) + clf2.fit(X, y) + + assert np.sum(np.abs(clf2.coef_)) <= np.sum(np.abs(clf1.coef_)) + # =========================================================================== # plq_ElasticNet_Classifier — multiclass OvR @@ -142,6 +155,32 @@ def test_elasticnet_clf_ovr_pipeline(): assert pipe.predict(X).shape == (len(y),) +def test_elasticnet_clf_ovr_omega_effect(): + """Model coefficient with higher omega weights should be smaller.""" + X, y = _multiclass_dataset(n_classes=3) + omega_small = np.random.rand(10) + omega_large = omega_small * 5 + + clf1 = plq_ElasticNet_Classifier(loss={"name": "svm"}, + C=1.0, + l1_ratio=0.5, + fit_intercept=True, + omega=omega_small, + multi_class="ovr" + ) + clf1.fit(X, y) + clf2 = plq_ElasticNet_Classifier(loss={"name": "svm"}, + C=1.0, + l1_ratio=0.5, + fit_intercept=True, + omega=omega_large, + multi_class="ovr" + ) + clf2.fit(X, y) + + assert np.sum(np.abs(clf2.coef_)) <= np.sum(np.abs(clf1.coef_)) + + # =========================================================================== # plq_ElasticNet_Classifier — multiclass OvO # =========================================================================== @@ -179,6 +218,31 @@ def test_elasticnet_clf_multiclass_invalid_strategy_raises(): clf.fit(X, y) +def test_elasticnet_clf_ovo_omega_effect(): + """Model coefficient with higher omega weights should be smaller.""" + X, y = _multiclass_dataset(n_classes=3) + omega_small = np.random.rand(10) + omega_large = omega_small * 5 + + clf1 = plq_ElasticNet_Classifier(loss={"name": "svm"}, + C=1.0, + l1_ratio=0.5, + fit_intercept=False, + omega=omega_small, + multi_class="ovo" + ) + clf1.fit(X, y) + clf2 = plq_ElasticNet_Classifier(loss={"name": "svm"}, + C=1.0, + l1_ratio=0.5, + fit_intercept=False, + omega=omega_large, + multi_class="ovo" + ) + clf2.fit(X, y) + + assert np.sum(np.abs(clf2.coef_)) <= np.sum(np.abs(clf1.coef_)) + # =========================================================================== # plq_ElasticNet_Regressor # =========================================================================== @@ -256,3 +320,16 @@ def test_elasticnet_reg_predict_equals_decision_function(): reg = plq_ElasticNet_Regressor(loss={"name": "QR", "qt": 0.5}, C=1.0, l1_ratio=0.5) reg.fit(X_tr, y_tr) np.testing.assert_array_equal(reg.predict(X_te), reg.decision_function(X_te)) + +def test_elasticnet_reg_omega_effect(): + """Model coefficient with higher omega weights should be smaller.""" + X, y = _reg_dataset() + omega_small = np.random.rand(10) + omega_large = omega_small * 5 + + reg1 = plq_ElasticNet_Regressor(loss={"name": "mae"}, C=1.0, l1_ratio=0.5, omega=omega_small) + reg1.fit(X, y) + reg2 = plq_ElasticNet_Regressor(loss={"name": "mae"}, C=1.0, l1_ratio=0.5, omega=omega_large) + reg2.fit(X, y) + + assert np.sum(np.abs(reg2.coef_)) <= np.sum(np.abs(reg1.coef_)) diff --git a/tests/test_warmstart.py b/tests/test_warmstart.py index bf7b992..f6e78cf 100644 --- a/tests/test_warmstart.py +++ b/tests/test_warmstart.py @@ -8,7 +8,7 @@ import numpy as np -from rehline import ReHLine, plqERM_Ridge +from rehline import ReHLine, plqERM_Ridge, plqERM_ElasticNet from rehline._base import ReHLine_solver @@ -140,3 +140,38 @@ def test_plqERM_Ridge_warmstart_coef_consistent(): atol=1e-3, err_msg="plqERM_Ridge: warm-start and cold-start should agree at the same C", ) + + +# --------------------------------------------------------------------------- +# plqERM_ElasticNet +# --------------------------------------------------------------------------- + + +def test_plqERM_ElasticNet_warmstart_coef_consistent(): + """Warm-started plqERM_ElasticNet should match cold-start solution for the same C.""" + X, y = _make_classification_data() + C = 0.5 + l1_ratio = 0.2 + + clf_cold = plqERM_ElasticNet(loss={"name": "svm"}, C=C, l1_ratio=l1_ratio, verbose=0) + clf_cold.fit(X=X, y=y) + + # Fit at C, then warm-start at 2*C + clf_warm = plqERM_ElasticNet(loss={"name": "svm"}, C=C, l1_ratio=l1_ratio, verbose=0) + clf_warm.fit(X=X, y=y) + clf_warm.C = 2 * C + clf_warm.warm_start = 1 + clf_warm.fit(X=X, y=y) + coef_warm_2C = clf_warm.coef_.copy() + + # Reference: cold-start at 2*C + clf_ref = plqERM_ElasticNet(loss={"name": "svm"}, C=2 * C, l1_ratio=l1_ratio, verbose=0) + clf_ref.fit(X=X, y=y) + coef_ref_2C = clf_ref.coef_.copy() + + np.testing.assert_allclose( + coef_warm_2C, + coef_ref_2C, + atol=1e-3, + err_msg="plqERM_ElasticNet: warm-start and cold-start should agree at the same C", + )