Source code for proxtoolbox.algorithms.samsara.hessian

# -*- coding: utf8 -*-
# Development on SAMSARA began in 2007 with funding from the National
# Science Foundation of the USA, DMS-0712796.
#
# Contributors include:
#
# Russell Luke (main author)
# Institute for Numerical and Applied  Mathematics, University of Göttingen
#
# Student helpers:
# Rachael Bailine (Matlab and Fortran version),  University of Delaware
# Patrick Rowe (Fortran version), University of Delaware
# Brian Rife (Fortran version), University of Delaware
# Marco Bedolla (Fortran version), University of Delaware
# Benedikt Rascher-Friesenhausen (Python version), University of Göttingen
#
# Special thanks to Laurence Marks at Northwestern University and
# Peter Blaha at the Technical University of Vienna who provided much
# of the inspiration for SAMSARA.
"""
Contains all implementations of hessian products.
"""
# The following lines are no longer part of the block comment above
# because this creates duplicates with Sphinx automatic API generation
#
#     The following lines 
#     Hessian functions
#     =================

#     Some introductory text.

#     .. autofunction:: proxtoolbox.algorithms.samsara.hessian.BFGS1_product
#     .. autofunction:: proxtoolbox.algorithms.samsara.hessian.Broyden1_product
#     .. autofunction:: proxtoolbox.algorithms.samsara.hessian.Broyden2_product
#     .. autofunction:: proxtoolbox.algorithms.samsara.hessian.invBFGS1_product
#     .. autofunction:: proxtoolbox.algorithms.samsara.hessian.invBroyden1_product
#     .. autofunction:: proxtoolbox.algorithms.samsara.hessian.invBroyden2_product
#     .. autofunction:: proxtoolbox.algorithms.samsara.hessian.hessian_wrapper

#     Scaler functions
#     ================

#     These functions are used to determine the `scale` variable for the
#     hessian functions.

#     .. autofunction:: proxtoolbox.algorithms.samsara.hessian.delta_invBFGS
#     .. autofunction:: proxtoolbox.algorithms.samsara.hessian.delta_MSB2
#     .. autofunction:: proxtoolbox.algorithms.samsara.hessian.scaler_wrapper

from numpy import abs, c_, dot, diag, eye, r_, sqrt, tril, triu, zeros
from numpy.linalg import cholesky, norm, solve
from numpy.random import random, seed


# PYTHON PORT ANNOTATION
# Removed parameters dim and mem, as the value can be obtained from
# the shape of S_mat/Y_mat.
[docs]def delta_MSB2(S_mat, Y_mat, alpha):
    """Scaling for the MSB2 matrix.

    Parameters
    ----------
    S_mat : array_like
        The matrix of steps.
    Y_mat : array_like
        The matrix of gradients.
    alpha : float
        A regularisation parameter.

    Returns
    -------
    float
        TODO

    See Also
    --------
    delta_invBFGS : Alternative function.
    samsara.samsara.Normsqresid, samsara.step_finder.Step_generator
    """
    dim = Y_mat.shape[1]
    # normalize the columns of Y_mat for stability
    # PYTHON PORT ANNOTATION
    # Replaced the (in Python) slow for-loop.
    ny = 1./norm(Y_mat, axis=0)

    # PYTHON PORT ANNOTATION
    # In Python we can't modify the input parameters as they are
    # just references to the originals in the calling function.
    Yc_mat = dot(Y_mat, diag(ny))
    Sc_mat = dot(S_mat, diag(ny))

    # Look into doing a Cholesky factorization of Y'Y for stability
    M_mat = solve(dot(Yc_mat.T, Yc_mat) + alpha*eye(dim, dim), Yc_mat.T)
    seed(5489)   # sets the same seed for the same 'random' vector
    v0_vec = random(S_mat.shape[0])   # used to compute Rayleigh quotient

    delta0 = 0
    delta = 10

    RQ_iter = 0
    v_vec = dot(Sc_mat, dot(M_mat, v0_vec))
    while abs((delta - delta0)/delta) > 1e-5 and RQ_iter < 10:
        RQ_iter += 1
        v0_vec = v_vec/sqrt(dot(v_vec, v_vec))
        delta0 = delta
        v_vec = dot(Sc_mat, dot(M_mat, v0_vec))
        delta = dot(v0_vec, v_vec)

    return abs(delta)


# PYTHON PORT ANNOTATION
# Removed parameter dim, as the value can be obtained from
# the shape of S_mat or Y_mat.
[docs]def delta_invBFGS(mem, S_mat, Y_mat):
    """Scaling for the inverse BFGS matrix

    Parameters
    ----------
    mem : int
        The step to use.
    S_mat : array_like
        The matrix of steps.
    Y_mat : array_like
        The matrix of gradients.

    Returns
    -------
    float
        TODO

    See Also
    --------
    BFGS1_product : TODO
    delta_MSB2 : Alternative function.
    """
    return abs(dot(S_mat[:, mem-1], Y_mat[:, mem-1]))\
        / dot(Y_mat[:, mem-1], Y_mat[:, mem-1])


[docs]def scaler_wrapper(Scaler, mem, S_mat, Y_mat, alpha):
    """A wrapper for all scaler functions.

    Used to have unified function calls.

    Parameters
    ----------
    Scaler : function
        The scaler function to use.
    mem : int
        The step to use.
    S_mat : array_like
        The matrix of steps.
    Y_mat : array_like
        The matrix of gradients.

    Returns
    -------
    float
        TODO

    See Also
    --------
    delta_MSB2, delta_invBFGS
    """
    if Scaler is delta_invBFGS:
        return delta_invBFGS(mem, S_mat, Y_mat)

    if Scaler is delta_MSB2:
        return delta_MSB2(S_mat, Y_mat, alpha)


[docs]def BFGS1_product(u_vec, S_mat, Y_mat, scale):
    """An implementation of the BFGS algorithm.

    Parameters
    ----------
    u_vec : array_like
        The vector to be multiplied.
    S_mat : array_like
        The matrix of steps.
    Y_mat : array_like
        The matrix of gradient differences.
    scale : float
        The initial scaling.

    Returns
    -------
    array_like
        TODO

    See Also
    --------
    delta_invBFGS : TODO
    Broyden1_product, Broyden2_product
    """
    STY_mat = dot(S_mat.T, Y_mat)
    D_mat = diag(diag(STY_mat))
    L_mat = tril(STY_mat, -1)

    Di_mat = diag(diag(STY_mat)**(-1))
    R_mat = 1./scale*dot(S_mat.T, S_mat) + dot(L_mat, dot(Di_mat, L_mat.T))
    # PYTHON PORT ANNOTATION
    # `cholesky` returns a lower triangular matrix in Python, not an
    # upper one as in MATLAB.
    R_mat = cholesky(R_mat).T

    Dsqrt_mat = diag(diag(STY_mat)**.5)
    Disqrt_mat = diag(diag(STY_mat)**(-.5))
    Phi_mat = c_[Y_mat, (1./scale)*S_mat]
    Gam1_mat = r_[c_[-Dsqrt_mat, dot(Disqrt_mat, L_mat.T)],
                  c_[zeros(D_mat.shape), R_mat]]
    Gam2_mat = r_[c_[Dsqrt_mat, zeros(D_mat.shape)],
                  c_[-dot(L_mat, Disqrt_mat), R_mat.T]]

    v_vec = dot(Phi_mat.T, u_vec)
    v_vec = solve(Gam2_mat, v_vec)
    v_vec = solve(Gam1_mat, v_vec)
    v_vec = (1./scale)*u_vec - dot(Phi_mat, v_vec)

    return v_vec


[docs]def invBFGS1_product(u_vec, S_mat, Y_mat, scale):
    """Stable inverse-BFGS matrix product.

    Parameters
    ----------
    u_vec : array_like
        The vector to be multiplied.
    S_mat : array_like
        The matrix of steps.
    Y_mat : array_like
        The matrix of gradient differences.
    scale : float
        The initial scaling.

    Returns
    -------
    array_like
        TODO

    See Also
    --------
    invBroyden1_product, invBroyden2_product
    """
    STY_mat = dot(S_mat.T, Y_mat)
    D_mat = diag(diag(STY_mat))
    R_mat = triu(STY_mat + 1e-11*eye(STY_mat.shape[0], STY_mat.shape[1]), 0)
    Rt_mat = R_mat.T

    v1_vec = dot(S_mat.T, u_vec)
    v2_vec = scale * dot(Y_mat.T, u_vec)
    v3_vec = solve(R_mat, v1_vec)
    v3_vec = dot(D_mat + scale*dot(Y_mat.T, Y_mat), v3_vec) - v2_vec
    v3_vec = solve(Rt_mat, v3_vec)
    v2_vec = -solve(R_mat, v1_vec)
    v_vec = scale*u_vec + dot(S_mat, v3_vec) + scale*dot(Y_mat, v2_vec)

    return v_vec


[docs]def Broyden1_product(mem, u_vec, S_mat, Y_mat, scale):
    """An implementation of Broyden's method.

    Parameters
    ----------
    mem : int
        TODO
    u_vec : array_like
        The vector to be multiplied.
    S_mat : array_like
        The matrix of steps.
    Y_mat : array_like
        The matrix of gradient differences.
    scale : float
        The initial scaling.

    Returns
    -------
    array_like
        TODO

    See Also
    --------
    Broyden2_product, BFGS1_product
    """
    tmpzw = dot(S_mat[:, mem-1], u_vec) / dot(S_mat[:, mem-1], S_mat[:, mem-1])
    Wtmp_vec = u_vec.copy()
    Ztmp_vec = tmpzw*Y_mat[:, mem-1]

    # PYTHON PORT ANNOTATION
    # Changed the order of the loop for improved readability.
    for j in range(mem-1, 0, -1):
        Wtmp_vec = Wtmp_vec - tmpzw*S_mat[:, j]
        tmpzw = dot(S_mat[:, j-1], Wtmp_vec) / dot(S_mat[:, j-1],
                                                   S_mat[:, j-1])
        Ztmp_vec = Ztmp_vec + tmpzw*Y_mat[:, j-1]

    Wtmp_vec = Wtmp_vec - tmpzw*S_mat[:, 0]
    v_vec = scale*Wtmp_vec + Ztmp_vec

    return v_vec


[docs]def invBroyden1_product(u_vec, S_mat, Y_mat, scale, alpha):
    """An implementation of the inverse Broyden's method.

    Parameters
    ----------
    u_vec : array_like
        The vector to be multiplied.
    S_mat : array_like
        The matrix of steps.
    Y_mat : array_like
        The matrix of gradient differences.
    scale : float
        The initial scaling.
    alpha : float
        A regularization parameter.

    Returns
    -------
    array_like
        TODO

    See Also
    --------
    invBFGS1_product, invBroyden2_product
    """
    dim = Y_mat.shape[1]
    M_mat = -tril(dot(S_mat.T, S_mat), -1)
    v_vec = solve(M_mat + scale*dot(S_mat.T, Y_mat) + alpha*eye(dim, dim),
                  scale*dot(S_mat.T, u_vec))
    v_vec = scale*u_vec - dot(scale*Y_mat - S_mat, v_vec)

    return v_vec


[docs]def Broyden2_product(mem, u_vec, S_mat, Y_mat, scale):
    """An implementation of Broyden's method.

    Parameters
    ----------
    mem : int
        How many old steps to consider.
    u_vec : array_like
        The vector to be multiplied.
    S_mat : array_like
        The matrix of steps.
    Y_mat : array_like
        The matrix of gradient differences.
    scale : float
        The initial scaling.

    Returns
    -------
    array_like
        TODO

    See Also
    --------
    Broyden1_product, BFGS1_product
    """
    tmpzw = dot(Y_mat[:, mem-1], u_vec) / dot(Y_mat[:, mem-1], Y_mat[:, mem-1])
    Wtmp_vec = u_vec.copy()
    Ztmp_vec = tmpzw*S_mat[:, mem-1]

    # PYTHON PORT ANNOTATION
    # Changed the order of the loop for improved readability.
    for j in range(mem-1, 0, -1):
        Wtmp_vec = Wtmp_vec - tmpzw*Y_mat[:, j]
        tmpzw = dot(Y_mat[:, j-1], Wtmp_vec) / dot(Y_mat[:, j-1],
                                                   Y_mat[:, j-1])
        Ztmp_vec = Ztmp_vec + tmpzw*S_mat[:, j-1]

    Wtmp_vec = Wtmp_vec - tmpzw*Y_mat[:, 0]
    v_vec = scale*Wtmp_vec + Ztmp_vec

    return v_vec


[docs]def invBroyden2_product(u_vec, S_mat, Y_mat, scale, alpha):
    """An implementation of the inverse Broyden's method.

    Parameters
    ----------
    u_vec : array_like
        The vector to be multiplied.
    S_mat : array_like
        The matrix of steps.
    Y_mat : array_like
        The matrix of gradient differences.
    scale : float
        The initial scaling.
    alpha : float
        A regularization parameter.

    Returns
    -------
    array_like
        TODO

    See Also
    --------
    invBFGS1_product, invBroyden1_product
    """
    dim = Y_mat.shape[1]
    M_mat = -tril(dot(Y_mat.T, Y_mat), -1)
    v_vec = solve(M_mat + 1./scale*dot(Y_mat.T, S_mat) + alpha*eye(dim, dim),
                  1./scale*dot(Y_mat.T, u_vec))
    v_vec = 1./scale*u_vec - dot(1./scale*S_mat - Y_mat, v_vec)

    return v_vec


[docs]def hessian_wrapper(Hessian_product, mem, u_vec, S_mat, Y_mat,
                    scale, alpha):
    """Wrapper for all hessian functions in this module.

    Used to have unified parameters across all function calls.

    Parameters
    ----------
    Hessian_product : function
        The hessian function to use.
    mem : int
        The step to use.
    u_vec : array_like
        The vector to be multiplied.
    S_mat : array_like
        The matrix of steps.
    Y_mat : array_like
        The matrix of gradients.
    scale : float
        The initial scaling.
    alpha : float
        A regularisation parameter.

    Returns
    -------
    array_like
        TODO

    See Also
    --------
    BFGS1_product, invBFGS1_product, Broyden1_product, invBroyden1_product,
    Broyden2_product, invBroyden2_product
    """
    if Hessian_product is BFGS1_product:
        return BFGS1_product(u_vec, S_mat, Y_mat, scale)

    if Hessian_product is invBFGS1_product:
        return invBFGS1_product(u_vec, S_mat, Y_mat, scale)

    if Hessian_product is Broyden1_product:
        return Broyden1_product(mem, u_vec, S_mat, Y_mat, scale)

    if Hessian_product is invBroyden1_product:
        return invBroyden1_product(u_vec, S_mat, Y_mat, scale, alpha)

    if Hessian_product is Broyden2_product:
        return Broyden2_product(mem, u_vec, S_mat, Y_mat, scale)

    if Hessian_product is invBroyden2_product:
        return invBroyden2_product(u_vec, S_mat, Y_mat, scale, alpha)