import numpy as np
from numba import jit
from ...utils import math
[docs]@jit(nopython=True, nogil=True, cache=False) # pragma: no cover
def ordinary_least_squares_(beta, r, X, tol=1e-8, max_iter=1000):
r"""
ordinary_least_squares_(beta, r, X, tol=1e-8, max_iter=1000)
Ordinary least squares regression. This function finds the :math:`\vec{\beta}` that minimizes
.. math::
\tfrac{1}{2N}||\vec{y}-X\vec{\beta}||_2^2
Args:
beta (numpy.ndarray): shape (D,) coefficient vector. modified in-place.
r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place.
X (numpy.ndarray): shape (N,D) data matrix.
tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**.
max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met.
Note
**beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`.
"""
N, D = X.shape
rho = np.ones(D, dtype=np.float64) + tol
iter_num = 0
converged = False
while not converged and iter_num < max_iter:
iter_num += 1
for j in range(D):
rho[j] = np.dot(X[:, j], r) / N
r -= rho[j] * X[:, j]
beta[j] += rho[j]
converged = np.max(np.abs(rho)) < tol
return (converged, iter_num)
[docs]@jit(nopython=True, nogil=True, cache=True) # pragma: no cover
def ridge_(beta, r, X, lambda_total=1.0, tol=1e-8, max_iter=1000):
r"""
ridge_(beta, r, X, lambda_total=1.0, tol=1e-8, max_iter=1000)
Ridge regression. This function finds the :math:`\vec{\beta}` that minimizes
.. math::
\tfrac{1}{2N} ||\vec{y}-X\vec{\beta}||_2^2 + \lambda \tfrac{1}{2} ||\vec{\beta}||_2^2
Args:
beta (numpy.ndarray): shape (D,) coefficient vector. modified in-place.
r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place.
X (numpy.ndarray): shape (N,D) data matrix.
lambda_total (float): must be non-negative. total regularization penalty strength.
tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**.
max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met.
Returns:
converged (tuple): tuple ``(converged, iter_num)`` containing convergence information. ``converged`` (bool) is whether or not the algorithm converged in the alloted number of iterations) and ``iter_num`` (int) is how many iterations the algorithm ran for.
Note
**beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`.
"""
N, D = X.shape
beta_old = beta.copy()
delta_beta = np.ones(D, dtype=np.float64) + tol
rho = np.ones(D, dtype=np.float64) + tol
iter_num = 0
converged = False
while not converged and iter_num < max_iter:
iter_num += 1
for j in range(D):
rho[j] = np.dot(X[:, j], r) / N
beta[j] = (beta_old[j] + rho[j]) / (1 + lambda_total)
delta_beta[j] = beta[j] - beta_old[j]
r -= X[:, j] * delta_beta[j]
beta_old[j] = beta[j]
converged = np.max(np.abs(delta_beta)) < tol
return (converged, iter_num)
[docs]@jit(nopython=True, nogil=True, cache=True) # pragma: no cover
def lasso_(beta, r, X, lambda_total=1.0, tol=1e-8, max_iter=1000):
r"""
lasso_(beta, r, X, lambda_total=1.0, tol=1e-8, max_iter=1000)
Lasso regression. This function finds the :math:`\vec{\beta}` that minimizes
.. math::
\tfrac{1}{2N} ||\vec{y}-X\vec{\beta}||_2^2 + \lambda ||\vec{\beta}||_1
Args:
beta (numpy.ndarray): shape (D,) coefficient vector. modified in-place.
r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place.
X (numpy.ndarray): shape (N,D) data matrix.
lambda_total (float): must be non-negative. total regularization penalty strength.
tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**.
max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met.
Note
**beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`.
"""
N, D = X.shape
beta_old = beta.copy()
delta_beta = np.ones(D, dtype=np.float64) + tol
rho = np.ones(D, dtype=np.float64) + tol
iter_num = 0
converged = False
while not converged and iter_num < max_iter:
iter_num += 1
for j in range(D):
rho[j] = np.dot(X[:, j], r) / N
beta[j] = math.soft_thresh(lambda_total, beta_old[j] + rho[j])
delta_beta[j] = beta[j] - beta_old[j]
r -= X[:, j] * delta_beta[j]
beta_old[j] = beta[j]
converged = np.max(np.abs(delta_beta)) < tol
return (converged, iter_num)
[docs]@jit(nopython=True, nogil=True, cache=True) # pragma: no cover
def elastic_net_(beta, r, X, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000):
r"""
elastic_net_(beta, r, X, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000)
Elastic net regression. This function finds the :math:`\vec{\beta}` that minimizes
.. math::
\tfrac{1}{2N} ||\vec{y}-X\vec{\beta}||_2^2 + \lambda \bigl( \alpha||\vec{\beta}||_1 + (1-\alpha) \tfrac{1}{2} ||\vec{\beta}||_2^2 \bigr)
Args:
beta (numpy.ndarray): shape (D,) coefficient vector. modified in-place.
r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place.
X (numpy.ndarray): shape (N,D) data matrix.
lambda_total (float): must be non-negative. total regularization penalty strength.
alpha (float): mixing parameter between L1 and L1 penalties. must be between zero and one. :math:`\alpha=0` is pure L2 penalty, :math:`\alpha=1` is pure L1 penalty.
tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**.
max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met.
Note
**beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`.
"""
lambda1 = alpha * lambda_total
lambda2 = (1.0 - alpha) * lambda_total
N, D = X.shape
beta_old = beta.copy()
delta_beta = np.ones(D, dtype=np.float64) + tol
rho = np.ones(D, dtype=np.float64) + tol
iter_num = 0
converged = False
while not converged and iter_num < max_iter:
iter_num += 1
for j in range(D):
rho[j] = np.dot(X[:, j], r) / N
beta[j] = math.soft_thresh(lambda1, beta_old[j] + rho[j]) / (1 + lambda2)
delta_beta[j] = beta[j] - beta_old[j]
r -= X[:, j] * delta_beta[j]
beta_old[j] = beta[j]
converged = np.max(np.abs(delta_beta)) < tol
return (converged, iter_num)
[docs]@jit(nopython=True, nogil=True, cache=True) # pragma: no cover
def general_plastic_net_(
beta, r, X, xi, zeta, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000
):
r"""
general_plastic_net_(beta, r, X, xi, zeta, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000)
General plastic net regression. This function finds the :math:`\vec{\beta}` that minimizes
.. math::
\tfrac{1}{2N} ||\vec{y}-X\vec{\beta}||_2^2 + \lambda \bigl( \alpha||\vec{\beta}-\vec{\xi}||_1 + (1-\alpha) \tfrac{1}{2} ||\vec{\beta}-\vec{\zeta}||_2^2 \bigr)
Args:
beta (numpy.ndarray): shape (P,) initial guess for the solution to the regression. modified in-place.
r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place.
X (numpy.ndarray): shape (N,P) data matrix.
xi (numpy.ndarray): shape (P,) target for L1 penalty.
zeta (numpy.ndarray): shape (P,) target for L2 penalty.
lambda_total (float): must be non-negative. total regularization penalty strength.
alpha (float): mixing parameter between L1 and L1 penalties. must be between zero and one. :math:`\alpha=0` is pure L2 penalty, :math:`\alpha=1` is pure L1 penalty.
tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**.
max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met.
Note
**beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`.
"""
lambda1 = alpha * lambda_total
lambda2 = (1.0 - alpha) * lambda_total
N, D = X.shape
beta_old = beta.copy()
delta_beta = np.ones(D, dtype=np.float64) + tol
rho = np.ones(D, dtype=np.float64) + tol
iter_num = 0
converged = False
while not converged and iter_num < max_iter:
iter_num += 1
for j in range(D):
rho[j] = np.dot(X[:, j], r) / N
beta[j] = (
math.soft_thresh(
lambda1,
beta_old[j] + rho[j] + lambda2 * zeta[j] - (1 + lambda2) * xi[j],
)
/ (1 + lambda2)
+ xi[j]
)
delta_beta[j] = beta[j] - beta_old[j]
r -= X[:, j] * delta_beta[j]
beta_old[j] = beta[j]
converged = np.max(np.abs(delta_beta)) < tol
return (converged, iter_num)
[docs]@jit(nopython=True, nogil=True, cache=True) # pragma: no cover
def plastic_ridge_(beta, r, X, zeta, lambda_total=1.0, tol=1e-8, max_iter=1000):
r"""
plastic_ridge_(beta, r, X, zeta, lambda_total=1.0, tol=1e-8, max_iter=1000)
Plastic ridge regression. This function finds the :math:`\vec{\beta}` that minimizes
.. math::
\tfrac{1}{2N} ||\vec{y}-X\vec{\beta}||_2^2 + \lambda \tfrac{1}{2} ||\vec{\beta}-\vec{\zeta}||_2^2
Args:
beta (numpy.ndarray): shape (P,) initial guess for the solution to the regression. modified in-place.
r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place.
X (numpy.ndarray): shape (N,P) data matrix.
zeta (numpy.ndarray): shape (P,) target for L2 penalty.
lambda_total (float): must be non-negative. total regularization penalty strength.
tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**.
max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met.
Note
**beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`.
"""
N, D = X.shape
converged, iter_num = general_plastic_net_(
beta,
r,
X,
np.zeros(D, dtype=np.float64),
zeta,
lambda_total=lambda_total,
alpha=0.0,
tol=tol,
max_iter=max_iter,
)
return (converged, iter_num)
[docs]@jit(nopython=True, nogil=True, cache=True) # pragma: no cover
def plastic_lasso_(beta, r, X, xi, lambda_total=1.0, tol=1e-8, max_iter=1000):
r"""
plastic_lasso_(beta, r, X, xi, lambda_total=1.0, tol=1e-8, max_iter=1000)
Plastic lasso regression. This function finds the :math:`\vec{\beta}` that minimizes
.. math::
\tfrac{1}{2N} ||\vec{y}-X\vec{\beta}||_2^2 + \lambda ||\vec{\beta}-\vec{\xi}||_1
Args:
beta (numpy.ndarray): shape (P,) initial guess for the solution to the regression. modified in-place.
r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place.
X (numpy.ndarray): shape (N,P) data matrix.
xi (numpy.ndarray): shape (P,) target for L1 penalty.
lambda_total (float): must be non-negative. total regularization penalty strength.
tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**.
max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met.
Note
**beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`.
"""
N, D = X.shape
converged, iter_num = general_plastic_net_(
beta,
r,
X,
xi,
np.zeros(D, dtype=np.float64),
lambda_total=lambda_total,
alpha=1.0,
tol=tol,
max_iter=max_iter,
)
return (converged, iter_num)
[docs]@jit(nopython=True, nogil=True, cache=True) # pragma: no cover
def hard_plastic_net_(
beta, r, X, xi, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000
):
r"""
hard_plastic_net_(beta, r, X, xi, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000)
Hard plastic net regression. This function finds the :math:`\vec{\beta}` that minimizes
.. math::
\tfrac{1}{2N} ||\vec{y}-X\vec{\beta}||_2^2 + \lambda \bigl( \alpha||\vec{\beta}-\vec{\xi}||_1 + (1-\alpha) \tfrac{1}{2} ||\vec{\beta}||_2^2 \bigr)
Args:
beta (numpy.ndarray): shape (P,) initial guess for the solution to the regression. modified in-place.
r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place.
X (numpy.ndarray): shape (N,P) data matrix.
xi (numpy.ndarray): shape (P,) target for L1 penalty.
lambda_total (float): must be non-negative. total regularization penalty strength.
alpha (float): mixing parameter between L1 and L1 penalties. must be between zero and one. :math:`\alpha=0` is pure L2 penalty, :math:`\alpha=1` is pure L1 penalty.
tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**.
max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met.
Note
**beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`.
"""
N, D = X.shape
converged, iter_num = general_plastic_net_(
beta,
r,
X,
xi,
np.zeros(D, dtype=np.float64),
lambda_total=lambda_total,
alpha=alpha,
tol=tol,
max_iter=max_iter,
)
return (converged, iter_num)
[docs]@jit(nopython=True, nogil=True, cache=True) # pragma: no cover
def soft_plastic_net_(
beta, r, X, zeta, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000
):
r"""
soft_plastic_net_(beta, r, X, zeta, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000)
Soft plastic net regression. This function finds the :math:`\vec{\beta}` that minimizes
.. math::
\tfrac{1}{2N} ||\vec{y}-X\vec{\beta}||_2^2 + \lambda \bigl( \alpha||\vec{\beta}||_1 + (1-\alpha) \tfrac{1}{2} ||\vec{\beta}-\vec{\zeta}||_2^2 \bigr)
Args:
beta (numpy.ndarray): shape (P,) initial guess for the solution to the regression. modified in-place.
r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place.
X (numpy.ndarray): shape (N,P) data matrix.
zeta (numpy.ndarray): shape (P,) target for L2 penalty.
lambda_total (float): must be non-negative. total regularization penalty strength.
alpha (float): mixing parameter between L1 and L1 penalties. must be between zero and one. :math:`\alpha=0` is pure L2 penalty, :math:`\alpha=1` is pure L1 penalty.
tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**.
max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met.
Note
**beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`.
"""
N, D = X.shape
converged, iter_num = general_plastic_net_(
beta,
r,
X,
np.zeros(D, dtype=np.float64),
zeta,
lambda_total=lambda_total,
alpha=alpha,
tol=tol,
max_iter=max_iter,
)
return (converged, iter_num)
[docs]@jit(nopython=True, nogil=True, cache=True) # pragma: no cover
def unified_plastic_net_(
beta, r, X, xi, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000
):
r"""
unified_plastic_net_(beta, r, X, xi, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000)
Unified plastic net regression. This function finds the :math:`\vec{\beta}` that minimizes
.. math::
\tfrac{1}{2N} ||\vec{y}-X\vec{\beta}||_2^2 + \lambda \bigl( \alpha||\vec{\beta}-\vec{\xi}||_1 + (1-\alpha) \tfrac{1}{2} ||\vec{\beta}-\vec{\xi}||_2^2 \bigr)
Args:
beta (numpy.ndarray): shape (P,) initial guess for the solution to the regression. modified in-place.
r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place.
X (numpy.ndarray): shape (N,P) data matrix.
xi (numpy.ndarray): shape (P,) target for both L1 and L2 penalties.
lambda_total (float): must be non-negative. total regularization penalty strength.
alpha (float): mixing parameter between L1 and L1 penalties. must be between zero and one. :math:`\alpha=0` is pure L2 penalty, :math:`\alpha=1` is pure L1 penalty.
tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**.
max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met.
Note
**beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`.
"""
N, D = X.shape
converged, iter_num = general_plastic_net_(
beta,
r,
X,
xi,
xi,
lambda_total=lambda_total,
alpha=alpha,
tol=tol,
max_iter=max_iter,
)
return (converged, iter_num)