Source code for plasticnet.solvers.in_place.in_place

import numpy as np
from numba import jit

from ...utils import math


[docs]@jit(nopython=True, nogil=True, cache=False) # pragma: no cover def ordinary_least_squares_(beta, r, X, tol=1e-8, max_iter=1000): r""" ordinary_least_squares_(beta, r, X, tol=1e-8, max_iter=1000) Ordinary least squares regression. This function finds the :math:`\vec{\beta}` that minimizes .. math:: \tfrac{1}{2N}||\vec{y}-X\vec{\beta}||_2^2 Args: beta (numpy.ndarray): shape (D,) coefficient vector. modified in-place. r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place. X (numpy.ndarray): shape (N,D) data matrix. tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**. max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met. Note **beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`. """ N, D = X.shape rho = np.ones(D, dtype=np.float64) + tol iter_num = 0 converged = False while not converged and iter_num < max_iter: iter_num += 1 for j in range(D): rho[j] = np.dot(X[:, j], r) / N r -= rho[j] * X[:, j] beta[j] += rho[j] converged = np.max(np.abs(rho)) < tol return (converged, iter_num)
[docs]@jit(nopython=True, nogil=True, cache=True) # pragma: no cover def ridge_(beta, r, X, lambda_total=1.0, tol=1e-8, max_iter=1000): r""" ridge_(beta, r, X, lambda_total=1.0, tol=1e-8, max_iter=1000) Ridge regression. This function finds the :math:`\vec{\beta}` that minimizes .. math:: \tfrac{1}{2N} ||\vec{y}-X\vec{\beta}||_2^2 + \lambda \tfrac{1}{2} ||\vec{\beta}||_2^2 Args: beta (numpy.ndarray): shape (D,) coefficient vector. modified in-place. r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place. X (numpy.ndarray): shape (N,D) data matrix. lambda_total (float): must be non-negative. total regularization penalty strength. tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**. max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met. Returns: converged (tuple): tuple ``(converged, iter_num)`` containing convergence information. ``converged`` (bool) is whether or not the algorithm converged in the alloted number of iterations) and ``iter_num`` (int) is how many iterations the algorithm ran for. Note **beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`. """ N, D = X.shape beta_old = beta.copy() delta_beta = np.ones(D, dtype=np.float64) + tol rho = np.ones(D, dtype=np.float64) + tol iter_num = 0 converged = False while not converged and iter_num < max_iter: iter_num += 1 for j in range(D): rho[j] = np.dot(X[:, j], r) / N beta[j] = (beta_old[j] + rho[j]) / (1 + lambda_total) delta_beta[j] = beta[j] - beta_old[j] r -= X[:, j] * delta_beta[j] beta_old[j] = beta[j] converged = np.max(np.abs(delta_beta)) < tol return (converged, iter_num)
[docs]@jit(nopython=True, nogil=True, cache=True) # pragma: no cover def lasso_(beta, r, X, lambda_total=1.0, tol=1e-8, max_iter=1000): r""" lasso_(beta, r, X, lambda_total=1.0, tol=1e-8, max_iter=1000) Lasso regression. This function finds the :math:`\vec{\beta}` that minimizes .. math:: \tfrac{1}{2N} ||\vec{y}-X\vec{\beta}||_2^2 + \lambda ||\vec{\beta}||_1 Args: beta (numpy.ndarray): shape (D,) coefficient vector. modified in-place. r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place. X (numpy.ndarray): shape (N,D) data matrix. lambda_total (float): must be non-negative. total regularization penalty strength. tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**. max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met. Note **beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`. """ N, D = X.shape beta_old = beta.copy() delta_beta = np.ones(D, dtype=np.float64) + tol rho = np.ones(D, dtype=np.float64) + tol iter_num = 0 converged = False while not converged and iter_num < max_iter: iter_num += 1 for j in range(D): rho[j] = np.dot(X[:, j], r) / N beta[j] = math.soft_thresh(lambda_total, beta_old[j] + rho[j]) delta_beta[j] = beta[j] - beta_old[j] r -= X[:, j] * delta_beta[j] beta_old[j] = beta[j] converged = np.max(np.abs(delta_beta)) < tol return (converged, iter_num)
[docs]@jit(nopython=True, nogil=True, cache=True) # pragma: no cover def elastic_net_(beta, r, X, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000): r""" elastic_net_(beta, r, X, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000) Elastic net regression. This function finds the :math:`\vec{\beta}` that minimizes .. math:: \tfrac{1}{2N} ||\vec{y}-X\vec{\beta}||_2^2 + \lambda \bigl( \alpha||\vec{\beta}||_1 + (1-\alpha) \tfrac{1}{2} ||\vec{\beta}||_2^2 \bigr) Args: beta (numpy.ndarray): shape (D,) coefficient vector. modified in-place. r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place. X (numpy.ndarray): shape (N,D) data matrix. lambda_total (float): must be non-negative. total regularization penalty strength. alpha (float): mixing parameter between L1 and L1 penalties. must be between zero and one. :math:`\alpha=0` is pure L2 penalty, :math:`\alpha=1` is pure L1 penalty. tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**. max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met. Note **beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`. """ lambda1 = alpha * lambda_total lambda2 = (1.0 - alpha) * lambda_total N, D = X.shape beta_old = beta.copy() delta_beta = np.ones(D, dtype=np.float64) + tol rho = np.ones(D, dtype=np.float64) + tol iter_num = 0 converged = False while not converged and iter_num < max_iter: iter_num += 1 for j in range(D): rho[j] = np.dot(X[:, j], r) / N beta[j] = math.soft_thresh(lambda1, beta_old[j] + rho[j]) / (1 + lambda2) delta_beta[j] = beta[j] - beta_old[j] r -= X[:, j] * delta_beta[j] beta_old[j] = beta[j] converged = np.max(np.abs(delta_beta)) < tol return (converged, iter_num)
[docs]@jit(nopython=True, nogil=True, cache=True) # pragma: no cover def general_plastic_net_( beta, r, X, xi, zeta, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000 ): r""" general_plastic_net_(beta, r, X, xi, zeta, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000) General plastic net regression. This function finds the :math:`\vec{\beta}` that minimizes .. math:: \tfrac{1}{2N} ||\vec{y}-X\vec{\beta}||_2^2 + \lambda \bigl( \alpha||\vec{\beta}-\vec{\xi}||_1 + (1-\alpha) \tfrac{1}{2} ||\vec{\beta}-\vec{\zeta}||_2^2 \bigr) Args: beta (numpy.ndarray): shape (P,) initial guess for the solution to the regression. modified in-place. r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place. X (numpy.ndarray): shape (N,P) data matrix. xi (numpy.ndarray): shape (P,) target for L1 penalty. zeta (numpy.ndarray): shape (P,) target for L2 penalty. lambda_total (float): must be non-negative. total regularization penalty strength. alpha (float): mixing parameter between L1 and L1 penalties. must be between zero and one. :math:`\alpha=0` is pure L2 penalty, :math:`\alpha=1` is pure L1 penalty. tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**. max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met. Note **beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`. """ lambda1 = alpha * lambda_total lambda2 = (1.0 - alpha) * lambda_total N, D = X.shape beta_old = beta.copy() delta_beta = np.ones(D, dtype=np.float64) + tol rho = np.ones(D, dtype=np.float64) + tol iter_num = 0 converged = False while not converged and iter_num < max_iter: iter_num += 1 for j in range(D): rho[j] = np.dot(X[:, j], r) / N beta[j] = ( math.soft_thresh( lambda1, beta_old[j] + rho[j] + lambda2 * zeta[j] - (1 + lambda2) * xi[j], ) / (1 + lambda2) + xi[j] ) delta_beta[j] = beta[j] - beta_old[j] r -= X[:, j] * delta_beta[j] beta_old[j] = beta[j] converged = np.max(np.abs(delta_beta)) < tol return (converged, iter_num)
[docs]@jit(nopython=True, nogil=True, cache=True) # pragma: no cover def plastic_ridge_(beta, r, X, zeta, lambda_total=1.0, tol=1e-8, max_iter=1000): r""" plastic_ridge_(beta, r, X, zeta, lambda_total=1.0, tol=1e-8, max_iter=1000) Plastic ridge regression. This function finds the :math:`\vec{\beta}` that minimizes .. math:: \tfrac{1}{2N} ||\vec{y}-X\vec{\beta}||_2^2 + \lambda \tfrac{1}{2} ||\vec{\beta}-\vec{\zeta}||_2^2 Args: beta (numpy.ndarray): shape (P,) initial guess for the solution to the regression. modified in-place. r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place. X (numpy.ndarray): shape (N,P) data matrix. zeta (numpy.ndarray): shape (P,) target for L2 penalty. lambda_total (float): must be non-negative. total regularization penalty strength. tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**. max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met. Note **beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`. """ N, D = X.shape converged, iter_num = general_plastic_net_( beta, r, X, np.zeros(D, dtype=np.float64), zeta, lambda_total=lambda_total, alpha=0.0, tol=tol, max_iter=max_iter, ) return (converged, iter_num)
[docs]@jit(nopython=True, nogil=True, cache=True) # pragma: no cover def plastic_lasso_(beta, r, X, xi, lambda_total=1.0, tol=1e-8, max_iter=1000): r""" plastic_lasso_(beta, r, X, xi, lambda_total=1.0, tol=1e-8, max_iter=1000) Plastic lasso regression. This function finds the :math:`\vec{\beta}` that minimizes .. math:: \tfrac{1}{2N} ||\vec{y}-X\vec{\beta}||_2^2 + \lambda ||\vec{\beta}-\vec{\xi}||_1 Args: beta (numpy.ndarray): shape (P,) initial guess for the solution to the regression. modified in-place. r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place. X (numpy.ndarray): shape (N,P) data matrix. xi (numpy.ndarray): shape (P,) target for L1 penalty. lambda_total (float): must be non-negative. total regularization penalty strength. tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**. max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met. Note **beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`. """ N, D = X.shape converged, iter_num = general_plastic_net_( beta, r, X, xi, np.zeros(D, dtype=np.float64), lambda_total=lambda_total, alpha=1.0, tol=tol, max_iter=max_iter, ) return (converged, iter_num)
[docs]@jit(nopython=True, nogil=True, cache=True) # pragma: no cover def hard_plastic_net_( beta, r, X, xi, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000 ): r""" hard_plastic_net_(beta, r, X, xi, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000) Hard plastic net regression. This function finds the :math:`\vec{\beta}` that minimizes .. math:: \tfrac{1}{2N} ||\vec{y}-X\vec{\beta}||_2^2 + \lambda \bigl( \alpha||\vec{\beta}-\vec{\xi}||_1 + (1-\alpha) \tfrac{1}{2} ||\vec{\beta}||_2^2 \bigr) Args: beta (numpy.ndarray): shape (P,) initial guess for the solution to the regression. modified in-place. r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place. X (numpy.ndarray): shape (N,P) data matrix. xi (numpy.ndarray): shape (P,) target for L1 penalty. lambda_total (float): must be non-negative. total regularization penalty strength. alpha (float): mixing parameter between L1 and L1 penalties. must be between zero and one. :math:`\alpha=0` is pure L2 penalty, :math:`\alpha=1` is pure L1 penalty. tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**. max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met. Note **beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`. """ N, D = X.shape converged, iter_num = general_plastic_net_( beta, r, X, xi, np.zeros(D, dtype=np.float64), lambda_total=lambda_total, alpha=alpha, tol=tol, max_iter=max_iter, ) return (converged, iter_num)
[docs]@jit(nopython=True, nogil=True, cache=True) # pragma: no cover def soft_plastic_net_( beta, r, X, zeta, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000 ): r""" soft_plastic_net_(beta, r, X, zeta, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000) Soft plastic net regression. This function finds the :math:`\vec{\beta}` that minimizes .. math:: \tfrac{1}{2N} ||\vec{y}-X\vec{\beta}||_2^2 + \lambda \bigl( \alpha||\vec{\beta}||_1 + (1-\alpha) \tfrac{1}{2} ||\vec{\beta}-\vec{\zeta}||_2^2 \bigr) Args: beta (numpy.ndarray): shape (P,) initial guess for the solution to the regression. modified in-place. r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place. X (numpy.ndarray): shape (N,P) data matrix. zeta (numpy.ndarray): shape (P,) target for L2 penalty. lambda_total (float): must be non-negative. total regularization penalty strength. alpha (float): mixing parameter between L1 and L1 penalties. must be between zero and one. :math:`\alpha=0` is pure L2 penalty, :math:`\alpha=1` is pure L1 penalty. tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**. max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met. Note **beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`. """ N, D = X.shape converged, iter_num = general_plastic_net_( beta, r, X, np.zeros(D, dtype=np.float64), zeta, lambda_total=lambda_total, alpha=alpha, tol=tol, max_iter=max_iter, ) return (converged, iter_num)
[docs]@jit(nopython=True, nogil=True, cache=True) # pragma: no cover def unified_plastic_net_( beta, r, X, xi, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000 ): r""" unified_plastic_net_(beta, r, X, xi, lambda_total=1.0, alpha=0.75, tol=1e-8, max_iter=1000) Unified plastic net regression. This function finds the :math:`\vec{\beta}` that minimizes .. math:: \tfrac{1}{2N} ||\vec{y}-X\vec{\beta}||_2^2 + \lambda \bigl( \alpha||\vec{\beta}-\vec{\xi}||_1 + (1-\alpha) \tfrac{1}{2} ||\vec{\beta}-\vec{\xi}||_2^2 \bigr) Args: beta (numpy.ndarray): shape (P,) initial guess for the solution to the regression. modified in-place. r (numpy.ndarray): shape (N,) residual, i.e :math:`\vec{r} = \vec{y} - X\vec{\beta}`. modified in-place. X (numpy.ndarray): shape (N,P) data matrix. xi (numpy.ndarray): shape (P,) target for both L1 and L2 penalties. lambda_total (float): must be non-negative. total regularization penalty strength. alpha (float): mixing parameter between L1 and L1 penalties. must be between zero and one. :math:`\alpha=0` is pure L2 penalty, :math:`\alpha=1` is pure L1 penalty. tol (float): convergence criterion for coordinate descent. coordinate descent runs until the maximum element-wise change in **beta** is less than **tol**. max_iter (int): maximum number of update passes through all P elements of **beta**, in case **tol** is never met. Note **beta** and **r** are modified in-place. As inputs, if :math:`\vec{\beta} = 0`, then it *must* be the case that :math:`\vec{r} = \vec{y}`, or the function will not converge to the correct answer. In general, the inputs **beta** and **r** must be coordinated such that :math:`\vec{r} = \vec{y} - X\vec{\beta}`. """ N, D = X.shape converged, iter_num = general_plastic_net_( beta, r, X, xi, xi, lambda_total=lambda_total, alpha=alpha, tol=tol, max_iter=max_iter, ) return (converged, iter_num)