Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 10 additions & 10 deletions rehline/_class.py
Original file line number Diff line number Diff line change
Expand Up @@ -527,8 +527,9 @@ class plqERM_ElasticNet(_BaseReHLine, BaseEstimator):
The ElasticNet mixing parameter, with 0 <= l1_ratio < 1. For l1_ratio = 0 the penalty
is an L2 penalty. For 0 < l1_ratio < 1, the penalty is a combination of L1 and L2.

omega : array of shape (n_features, ), default=np.empty(shape=(0, 0))
Weight coefficients for adaptive lasso.
omega : array of shape (n_features, ), default=np.empty(shape=0)
Non-negative weight coefficients for adaptive lasso. If not provided, all coefficients receive the
same L1 penalty controlled by ``l1_ratio``.

verbose : int, default=0
Enable verbose output. Note that this setting takes advantage of a
Expand Down Expand Up @@ -606,8 +607,7 @@ def __init__(
self.constraint = constraint if constraint is not None else []
self.C = C
self.l1_ratio = l1_ratio
self.C_eff = C / (1 - l1_ratio)
self.omega = omega if omega is not None else np.empty(shape=(0, 0))
self.omega = omega if omega is not None else np.empty(shape=(0))
self._U = U if U is not None else np.empty(shape=(0, 0))
self._V = V if V is not None else np.empty(shape=(0, 0))
self._S = S if S is not None else np.empty(shape=(0, 0))
Expand All @@ -627,7 +627,7 @@ def __init__(
self._Lambda = np.empty(shape=(0, 0))
self._Gamma = np.empty(shape=(0, 0))
self._xi = np.empty(shape=(0, 0))
self._mu = np.empty(shape=(0, 0))
self._mu = np.empty(shape=(0))
self.coef_ = None

def fit(self, X, y, sample_weight=None):
Expand Down Expand Up @@ -664,14 +664,14 @@ def fit(self, X, y, sample_weight=None):
self.auto_shape()

sample_weight = _check_sample_weight(sample_weight, X, dtype=X.dtype)

C_eff = self.C / (1 - self.l1_ratio)
U_weight, V_weight, Tau_weight, S_weight, T_weight = _cast_sample_weight(
self._U,
self._V,
self._Tau,
self._S,
self._T,
C=self.C_eff,
C=C_eff,
sample_weight=sample_weight,
)

Expand All @@ -680,7 +680,7 @@ def fit(self, X, y, sample_weight=None):
self._Lambda = np.empty(shape=(0, 0))
self._Gamma = np.empty(shape=(0, 0))
self._xi = np.empty(shape=(0, 0))
self._mu = np.empty(shape=(0, 0))
self._mu = np.empty(shape=(0))

if self.l1_ratio == 0:
self.rho = None
Expand All @@ -695,9 +695,9 @@ def fit(self, X, y, sample_weight=None):
raise ValueError(
f"Omega length {self.omega.size} must be 0 or {d} (n_features)"
)
if not np.all(self.omega > 0):
if not np.all(self.omega >= 0):
raise ValueError(
"All elements in omega must be strictly positive."
"All elements in omega must be strictly non-negative."
)
self.rho = np.full(d, self.l1_ratio / (1 - self.l1_ratio)) * (self.omega if self.omega.size == d else 1.0)

Expand Down
25 changes: 24 additions & 1 deletion rehline/_sklearn_mixin.py
Original file line number Diff line number Diff line change
Expand Up @@ -714,6 +714,11 @@ class plq_ElasticNet_Classifier(plqERM_ElasticNet, ClassifierMixin):
- 0 < l1_ratio < 1 → combined L1 + L2 penalty
Must be strictly less than 1.0 to avoid division by zero in rho/C_eff.

omega : array of shape (n_features, ), default=np.empty(shape=(0, 0))
Non-negative weight coefficients for adaptive lasso. If not provided, all non-intercept coefficients
receive the same L1 penalty controlled by ``l1_ratio``. The penalty for the intercept
can be scaled via ``intercept_scaling``.

fit_intercept : bool, default=True
Whether to fit an intercept term via an augmented constant feature column.

Expand Down Expand Up @@ -754,6 +759,7 @@ def __init__(
constraint=None,
C=1.0,
l1_ratio=0.5,
omega=None,
U=None,
V=None,
Tau=None,
Expand All @@ -780,6 +786,7 @@ def __init__(
)

constraint = [] if constraint is None else constraint
omega = np.empty((0,)) if omega is None else omega
U = np.empty((0, 0)) if U is None else U
V = np.empty((0, 0)) if V is None else V
Tau = np.empty((0, 0)) if Tau is None else Tau
Expand All @@ -794,6 +801,7 @@ def __init__(
constraint=constraint,
C=C,
l1_ratio=l1_ratio,
omega=omega,
U=U,
V=V,
Tau=Tau,
Expand Down Expand Up @@ -850,6 +858,7 @@ def _fit_subproblem(estimator, X_aug, y_pm, sample_weight, fit_intercept):
constraint=estimator.constraint,
C=estimator.C,
l1_ratio=estimator.l1_ratio,
omega=estimator.omega,
max_iter=estimator.max_iter,
tol=estimator.tol,
shrink=estimator.shrink,
Expand Down Expand Up @@ -908,17 +917,19 @@ def fit(self, X, y, sample_weight=None):

# Intercept augmentation
X_aug = X
omega_copy = self.omega.copy()
if self.fit_intercept:
col = np.full((X.shape[0], 1), self.intercept_scaling, dtype=X.dtype)
X_aug = np.hstack([X, col])
self.omega = np.append(self.omega, 1) if self.omega.size > 0 else self.omega

if self.classes_.size == 2:
y01 = le.transform(y)
y_pm = 2 * y01 - 1

# super() resolves to plqERM_ElasticNet.fit()
super().fit(X_aug, y_pm, sample_weight=sample_weight)

self.omega = omega_copy
if self.fit_intercept:
self.intercept_ = float(self.coef_[-1])
self.coef_ = self.coef_[:-1].copy()
Expand All @@ -931,6 +942,7 @@ def fit(self, X, y, sample_weight=None):
f"multi_class must be 'ovr' or 'ovo' for multiclass problems, got '{self.multi_class}'."
)
self._fit_multiclass(X_aug, y, sample_weight)
self.omega = omega_copy

return self

Expand Down Expand Up @@ -1067,6 +1079,11 @@ class plq_ElasticNet_Regressor(plqERM_ElasticNet, RegressorMixin):
- l1_ratio = 0 → pure Ridge (equivalent to plq_Ridge_Regressor)
- 0 < l1_ratio < 1 → combined L1 + L2 penalty
Must be strictly less than 1.0 to avoid division by zero in rho/C_eff.

omega : array of shape (n_features, ), default=np.empty(shape=(0, 0))
Non-negative weight coefficients for adaptive lasso. If not provided, all non-intercept coefficients
receive the same L1 penalty controlled by ``l1_ratio``. The penalty for the intercept
can be scaled via ``intercept_scaling``.

fit_intercept : bool, default=True
If True, append a constant column (value = ``intercept_scaling``) to
Expand Down Expand Up @@ -1101,6 +1118,7 @@ def __init__(
constraint=None,
C=1.0,
l1_ratio=0.5,
omega=None,
U=None,
V=None,
Tau=None,
Expand All @@ -1125,6 +1143,7 @@ def __init__(

loss = {"name": "QR", "qt": 0.5} if loss is None else loss
constraint = [] if constraint is None else constraint
omega = np.empty((0,)) if omega is None else omega
U = np.empty((0, 0)) if U is None else U
V = np.empty((0, 0)) if V is None else V
Tau = np.empty((0, 0)) if Tau is None else Tau
Expand All @@ -1138,6 +1157,7 @@ def __init__(
constraint=constraint,
C=C,
l1_ratio=l1_ratio,
omega=omega,
U=U,
V=V,
Tau=Tau,
Expand Down Expand Up @@ -1183,12 +1203,15 @@ def fit(self, X, y, sample_weight=None):
self.n_features_in_ = X.shape[1]

X_aug = X
omega_copy = self.omega.copy()
if self.fit_intercept:
col = np.full((X.shape[0], 1), self.intercept_scaling, dtype=X.dtype)
X_aug = np.hstack([X, col])
self.omega = np.append(self.omega, 1) if self.omega.size > 0 else self.omega

# MRO resolves super() to plqERM_ElasticNet.fit()
super().fit(X_aug, y, sample_weight=sample_weight)
self.omega = omega_copy

if self.fit_intercept:
self.intercept_ = float(self.coef_[-1])
Expand Down
40 changes: 38 additions & 2 deletions tests/test_elastic_net.py
Original file line number Diff line number Diff line change
Expand Up @@ -317,8 +317,8 @@ def test_omega_validation():
tol=1e-4,
)
clf.fit(X_scaled, y)
# Test invalid omega value (all elements must be strictly positive)
with pytest.raises(ValueError, match="All elements in omega must be strictly positive"):
# Test invalid omega value (all elements must be strictly non-negative)
with pytest.raises(ValueError, match="All elements in omega must be strictly non-negative"):
omega = np.ones(n_features)
omega[0] = -1
clf = plqERM_ElasticNet(
Expand All @@ -341,3 +341,39 @@ def test_omega_validation():
tol=1e-4,
)
clf.fit(X_scaled, y)


def test_zero_omega_vs_ridge():
"""ElasticNet with omega=(0, 0, ..., 0) should exactly match Ridge within 1e-4.."""
n, n_features, C, l1_ratio = 2000, 10, 0.01, 0.5

X, y = make_regression(
n_samples=n,
n_features=n_features,
noise=0.1,
random_state=42,
n_informative=6,
)
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

clf_EN = plqERM_ElasticNet(
loss={"name": "mse"},
C=C,
l1_ratio=l1_ratio,
omega=np.zeros(n_features),
max_iter=5000,
tol=1e-4,
)
clf_EN.fit(X_scaled, y)

clf_RG = plqERM_Ridge(
loss={"name": "mse"},
C=C/(1-l1_ratio),
max_iter=5000,
tol=1e-4,
)
clf_RG.fit(X_scaled, y)

max_diff = np.max(np.abs(clf_EN.coef_.flatten() - clf_RG.coef_.flatten()))
assert max_diff < 1e-4, f"ElasticNet(omega=(0, 0, ..., 0)) should match Ridge within 1e-4, max_diff={max_diff:.6e}"
77 changes: 77 additions & 0 deletions tests/test_sklearn_elasticnet.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,19 @@ def test_elasticnet_clf_l1_ratio_invalid_raises():
with pytest.raises(ValueError, match="l1_ratio"):
plq_ElasticNet_Classifier(loss={"name": "svm"}, C=1.0, l1_ratio=1.0)

def test_elasticnet_clf_binary_omega_effect():
"""Model coefficient with higher omega weights should be smaller."""
X, y = _binary_dataset()
omega_small = np.random.rand(10)
omega_large = omega_small * 5

clf1 = plq_ElasticNet_Classifier(loss={"name": "svm"}, C=1.0, l1_ratio=0.5, omega=omega_small)
clf1.fit(X, y)
clf2 = plq_ElasticNet_Classifier(loss={"name": "svm"}, C=1.0, l1_ratio=0.5, omega=omega_large)
clf2.fit(X, y)

assert np.sum(np.abs(clf2.coef_)) <= np.sum(np.abs(clf1.coef_))


# ===========================================================================
# plq_ElasticNet_Classifier — multiclass OvR
Expand Down Expand Up @@ -142,6 +155,32 @@ def test_elasticnet_clf_ovr_pipeline():
assert pipe.predict(X).shape == (len(y),)


def test_elasticnet_clf_ovr_omega_effect():
"""Model coefficient with higher omega weights should be smaller."""
X, y = _multiclass_dataset(n_classes=3)
omega_small = np.random.rand(10)
omega_large = omega_small * 5

clf1 = plq_ElasticNet_Classifier(loss={"name": "svm"},
C=1.0,
l1_ratio=0.5,
fit_intercept=True,
omega=omega_small,
multi_class="ovr"
)
clf1.fit(X, y)
clf2 = plq_ElasticNet_Classifier(loss={"name": "svm"},
C=1.0,
l1_ratio=0.5,
fit_intercept=True,
omega=omega_large,
multi_class="ovr"
)
clf2.fit(X, y)

assert np.sum(np.abs(clf2.coef_)) <= np.sum(np.abs(clf1.coef_))


# ===========================================================================
# plq_ElasticNet_Classifier — multiclass OvO
# ===========================================================================
Expand Down Expand Up @@ -179,6 +218,31 @@ def test_elasticnet_clf_multiclass_invalid_strategy_raises():
clf.fit(X, y)


def test_elasticnet_clf_ovo_omega_effect():
"""Model coefficient with higher omega weights should be smaller."""
X, y = _multiclass_dataset(n_classes=3)
omega_small = np.random.rand(10)
omega_large = omega_small * 5

clf1 = plq_ElasticNet_Classifier(loss={"name": "svm"},
C=1.0,
l1_ratio=0.5,
fit_intercept=False,
omega=omega_small,
multi_class="ovo"
)
clf1.fit(X, y)
clf2 = plq_ElasticNet_Classifier(loss={"name": "svm"},
C=1.0,
l1_ratio=0.5,
fit_intercept=False,
omega=omega_large,
multi_class="ovo"
)
clf2.fit(X, y)

assert np.sum(np.abs(clf2.coef_)) <= np.sum(np.abs(clf1.coef_))

# ===========================================================================
# plq_ElasticNet_Regressor
# ===========================================================================
Expand Down Expand Up @@ -256,3 +320,16 @@ def test_elasticnet_reg_predict_equals_decision_function():
reg = plq_ElasticNet_Regressor(loss={"name": "QR", "qt": 0.5}, C=1.0, l1_ratio=0.5)
reg.fit(X_tr, y_tr)
np.testing.assert_array_equal(reg.predict(X_te), reg.decision_function(X_te))

def test_elasticnet_reg_omega_effect():
"""Model coefficient with higher omega weights should be smaller."""
X, y = _reg_dataset()
omega_small = np.random.rand(10)
omega_large = omega_small * 5

reg1 = plq_ElasticNet_Regressor(loss={"name": "mae"}, C=1.0, l1_ratio=0.5, omega=omega_small)
reg1.fit(X, y)
reg2 = plq_ElasticNet_Regressor(loss={"name": "mae"}, C=1.0, l1_ratio=0.5, omega=omega_large)
reg2.fit(X, y)

assert np.sum(np.abs(reg2.coef_)) <= np.sum(np.abs(reg1.coef_))
37 changes: 36 additions & 1 deletion tests/test_warmstart.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import numpy as np

from rehline import ReHLine, plqERM_Ridge
from rehline import ReHLine, plqERM_Ridge, plqERM_ElasticNet
from rehline._base import ReHLine_solver


Expand Down Expand Up @@ -140,3 +140,38 @@ def test_plqERM_Ridge_warmstart_coef_consistent():
atol=1e-3,
err_msg="plqERM_Ridge: warm-start and cold-start should agree at the same C",
)


# ---------------------------------------------------------------------------
# plqERM_ElasticNet
# ---------------------------------------------------------------------------


def test_plqERM_ElasticNet_warmstart_coef_consistent():
"""Warm-started plqERM_ElasticNet should match cold-start solution for the same C."""
X, y = _make_classification_data()
C = 0.5
l1_ratio = 0.2

clf_cold = plqERM_ElasticNet(loss={"name": "svm"}, C=C, l1_ratio=l1_ratio, verbose=0)
clf_cold.fit(X=X, y=y)

# Fit at C, then warm-start at 2*C
clf_warm = plqERM_ElasticNet(loss={"name": "svm"}, C=C, l1_ratio=l1_ratio, verbose=0)
clf_warm.fit(X=X, y=y)
clf_warm.C = 2 * C
clf_warm.warm_start = 1
clf_warm.fit(X=X, y=y)
coef_warm_2C = clf_warm.coef_.copy()

# Reference: cold-start at 2*C
clf_ref = plqERM_ElasticNet(loss={"name": "svm"}, C=2 * C, l1_ratio=l1_ratio, verbose=0)
clf_ref.fit(X=X, y=y)
coef_ref_2C = clf_ref.coef_.copy()

np.testing.assert_allclose(
coef_warm_2C,
coef_ref_2C,
atol=1e-3,
err_msg="plqERM_ElasticNet: warm-start and cold-start should agree at the same C",
)
Loading