# -*- coding: utf8 -*-
# Development on SAMSARA began in 2007 with funding from the National
# Science Foundation of the USA, DMS-0712796.
#
# Contributors include:
#
# Russell Luke (main author)
# Institute for Numerical and Applied Mathematics, University of Göttingen
#
# Student helpers:
# Rachael Bailine (Matlab and Fortran version), University of Delaware
# Patrick Rowe (Fortran version), University of Delaware
# Brian Rife (Fortran version), University of Delaware
# Marco Bedolla (Fortran version), University of Delaware
# Benedikt Rascher-Friesenhausen (Python version), University of Göttingen
#
# Special thanks to Laurence Marks at Northwestern University and
# Peter Blaha at the Technical University of Vienna who provided much
# of the inspiration for SAMSARA.
"""
Contains all implementations of hessian products.
"""
# The following lines are no longer part of the block comment above
# because this creates duplicates with Sphinx automatic API generation
#
# The following lines
# Hessian functions
# =================
# Some introductory text.
# .. autofunction:: proxtoolbox.algorithms.samsara.hessian.BFGS1_product
# .. autofunction:: proxtoolbox.algorithms.samsara.hessian.Broyden1_product
# .. autofunction:: proxtoolbox.algorithms.samsara.hessian.Broyden2_product
# .. autofunction:: proxtoolbox.algorithms.samsara.hessian.invBFGS1_product
# .. autofunction:: proxtoolbox.algorithms.samsara.hessian.invBroyden1_product
# .. autofunction:: proxtoolbox.algorithms.samsara.hessian.invBroyden2_product
# .. autofunction:: proxtoolbox.algorithms.samsara.hessian.hessian_wrapper
# Scaler functions
# ================
# These functions are used to determine the `scale` variable for the
# hessian functions.
# .. autofunction:: proxtoolbox.algorithms.samsara.hessian.delta_invBFGS
# .. autofunction:: proxtoolbox.algorithms.samsara.hessian.delta_MSB2
# .. autofunction:: proxtoolbox.algorithms.samsara.hessian.scaler_wrapper
from numpy import abs, c_, dot, diag, eye, r_, sqrt, tril, triu, zeros
from numpy.linalg import cholesky, norm, solve
from numpy.random import random, seed
# PYTHON PORT ANNOTATION
# Removed parameters dim and mem, as the value can be obtained from
# the shape of S_mat/Y_mat.
[docs]def delta_MSB2(S_mat, Y_mat, alpha):
"""Scaling for the MSB2 matrix.
Parameters
----------
S_mat : array_like
The matrix of steps.
Y_mat : array_like
The matrix of gradients.
alpha : float
A regularisation parameter.
Returns
-------
float
TODO
See Also
--------
delta_invBFGS : Alternative function.
samsara.samsara.Normsqresid, samsara.step_finder.Step_generator
"""
dim = Y_mat.shape[1]
# normalize the columns of Y_mat for stability
# PYTHON PORT ANNOTATION
# Replaced the (in Python) slow for-loop.
ny = 1./norm(Y_mat, axis=0)
# PYTHON PORT ANNOTATION
# In Python we can't modify the input parameters as they are
# just references to the originals in the calling function.
Yc_mat = dot(Y_mat, diag(ny))
Sc_mat = dot(S_mat, diag(ny))
# Look into doing a Cholesky factorization of Y'Y for stability
M_mat = solve(dot(Yc_mat.T, Yc_mat) + alpha*eye(dim, dim), Yc_mat.T)
seed(5489) # sets the same seed for the same 'random' vector
v0_vec = random(S_mat.shape[0]) # used to compute Rayleigh quotient
delta0 = 0
delta = 10
RQ_iter = 0
v_vec = dot(Sc_mat, dot(M_mat, v0_vec))
while abs((delta - delta0)/delta) > 1e-5 and RQ_iter < 10:
RQ_iter += 1
v0_vec = v_vec/sqrt(dot(v_vec, v_vec))
delta0 = delta
v_vec = dot(Sc_mat, dot(M_mat, v0_vec))
delta = dot(v0_vec, v_vec)
return abs(delta)
# PYTHON PORT ANNOTATION
# Removed parameter dim, as the value can be obtained from
# the shape of S_mat or Y_mat.
[docs]def delta_invBFGS(mem, S_mat, Y_mat):
"""Scaling for the inverse BFGS matrix
Parameters
----------
mem : int
The step to use.
S_mat : array_like
The matrix of steps.
Y_mat : array_like
The matrix of gradients.
Returns
-------
float
TODO
See Also
--------
BFGS1_product : TODO
delta_MSB2 : Alternative function.
"""
return abs(dot(S_mat[:, mem-1], Y_mat[:, mem-1]))\
/ dot(Y_mat[:, mem-1], Y_mat[:, mem-1])
[docs]def scaler_wrapper(Scaler, mem, S_mat, Y_mat, alpha):
"""A wrapper for all scaler functions.
Used to have unified function calls.
Parameters
----------
Scaler : function
The scaler function to use.
mem : int
The step to use.
S_mat : array_like
The matrix of steps.
Y_mat : array_like
The matrix of gradients.
Returns
-------
float
TODO
See Also
--------
delta_MSB2, delta_invBFGS
"""
if Scaler is delta_invBFGS:
return delta_invBFGS(mem, S_mat, Y_mat)
if Scaler is delta_MSB2:
return delta_MSB2(S_mat, Y_mat, alpha)
[docs]def BFGS1_product(u_vec, S_mat, Y_mat, scale):
"""An implementation of the BFGS algorithm.
Parameters
----------
u_vec : array_like
The vector to be multiplied.
S_mat : array_like
The matrix of steps.
Y_mat : array_like
The matrix of gradient differences.
scale : float
The initial scaling.
Returns
-------
array_like
TODO
See Also
--------
delta_invBFGS : TODO
Broyden1_product, Broyden2_product
"""
STY_mat = dot(S_mat.T, Y_mat)
D_mat = diag(diag(STY_mat))
L_mat = tril(STY_mat, -1)
Di_mat = diag(diag(STY_mat)**(-1))
R_mat = 1./scale*dot(S_mat.T, S_mat) + dot(L_mat, dot(Di_mat, L_mat.T))
# PYTHON PORT ANNOTATION
# `cholesky` returns a lower triangular matrix in Python, not an
# upper one as in MATLAB.
R_mat = cholesky(R_mat).T
Dsqrt_mat = diag(diag(STY_mat)**.5)
Disqrt_mat = diag(diag(STY_mat)**(-.5))
Phi_mat = c_[Y_mat, (1./scale)*S_mat]
Gam1_mat = r_[c_[-Dsqrt_mat, dot(Disqrt_mat, L_mat.T)],
c_[zeros(D_mat.shape), R_mat]]
Gam2_mat = r_[c_[Dsqrt_mat, zeros(D_mat.shape)],
c_[-dot(L_mat, Disqrt_mat), R_mat.T]]
v_vec = dot(Phi_mat.T, u_vec)
v_vec = solve(Gam2_mat, v_vec)
v_vec = solve(Gam1_mat, v_vec)
v_vec = (1./scale)*u_vec - dot(Phi_mat, v_vec)
return v_vec
[docs]def invBFGS1_product(u_vec, S_mat, Y_mat, scale):
"""Stable inverse-BFGS matrix product.
Parameters
----------
u_vec : array_like
The vector to be multiplied.
S_mat : array_like
The matrix of steps.
Y_mat : array_like
The matrix of gradient differences.
scale : float
The initial scaling.
Returns
-------
array_like
TODO
See Also
--------
invBroyden1_product, invBroyden2_product
"""
STY_mat = dot(S_mat.T, Y_mat)
D_mat = diag(diag(STY_mat))
R_mat = triu(STY_mat + 1e-11*eye(STY_mat.shape[0], STY_mat.shape[1]), 0)
Rt_mat = R_mat.T
v1_vec = dot(S_mat.T, u_vec)
v2_vec = scale * dot(Y_mat.T, u_vec)
v3_vec = solve(R_mat, v1_vec)
v3_vec = dot(D_mat + scale*dot(Y_mat.T, Y_mat), v3_vec) - v2_vec
v3_vec = solve(Rt_mat, v3_vec)
v2_vec = -solve(R_mat, v1_vec)
v_vec = scale*u_vec + dot(S_mat, v3_vec) + scale*dot(Y_mat, v2_vec)
return v_vec
[docs]def Broyden1_product(mem, u_vec, S_mat, Y_mat, scale):
"""An implementation of Broyden's method.
Parameters
----------
mem : int
TODO
u_vec : array_like
The vector to be multiplied.
S_mat : array_like
The matrix of steps.
Y_mat : array_like
The matrix of gradient differences.
scale : float
The initial scaling.
Returns
-------
array_like
TODO
See Also
--------
Broyden2_product, BFGS1_product
"""
tmpzw = dot(S_mat[:, mem-1], u_vec) / dot(S_mat[:, mem-1], S_mat[:, mem-1])
Wtmp_vec = u_vec.copy()
Ztmp_vec = tmpzw*Y_mat[:, mem-1]
# PYTHON PORT ANNOTATION
# Changed the order of the loop for improved readability.
for j in range(mem-1, 0, -1):
Wtmp_vec = Wtmp_vec - tmpzw*S_mat[:, j]
tmpzw = dot(S_mat[:, j-1], Wtmp_vec) / dot(S_mat[:, j-1],
S_mat[:, j-1])
Ztmp_vec = Ztmp_vec + tmpzw*Y_mat[:, j-1]
Wtmp_vec = Wtmp_vec - tmpzw*S_mat[:, 0]
v_vec = scale*Wtmp_vec + Ztmp_vec
return v_vec
[docs]def invBroyden1_product(u_vec, S_mat, Y_mat, scale, alpha):
"""An implementation of the inverse Broyden's method.
Parameters
----------
u_vec : array_like
The vector to be multiplied.
S_mat : array_like
The matrix of steps.
Y_mat : array_like
The matrix of gradient differences.
scale : float
The initial scaling.
alpha : float
A regularization parameter.
Returns
-------
array_like
TODO
See Also
--------
invBFGS1_product, invBroyden2_product
"""
dim = Y_mat.shape[1]
M_mat = -tril(dot(S_mat.T, S_mat), -1)
v_vec = solve(M_mat + scale*dot(S_mat.T, Y_mat) + alpha*eye(dim, dim),
scale*dot(S_mat.T, u_vec))
v_vec = scale*u_vec - dot(scale*Y_mat - S_mat, v_vec)
return v_vec
[docs]def Broyden2_product(mem, u_vec, S_mat, Y_mat, scale):
"""An implementation of Broyden's method.
Parameters
----------
mem : int
How many old steps to consider.
u_vec : array_like
The vector to be multiplied.
S_mat : array_like
The matrix of steps.
Y_mat : array_like
The matrix of gradient differences.
scale : float
The initial scaling.
Returns
-------
array_like
TODO
See Also
--------
Broyden1_product, BFGS1_product
"""
tmpzw = dot(Y_mat[:, mem-1], u_vec) / dot(Y_mat[:, mem-1], Y_mat[:, mem-1])
Wtmp_vec = u_vec.copy()
Ztmp_vec = tmpzw*S_mat[:, mem-1]
# PYTHON PORT ANNOTATION
# Changed the order of the loop for improved readability.
for j in range(mem-1, 0, -1):
Wtmp_vec = Wtmp_vec - tmpzw*Y_mat[:, j]
tmpzw = dot(Y_mat[:, j-1], Wtmp_vec) / dot(Y_mat[:, j-1],
Y_mat[:, j-1])
Ztmp_vec = Ztmp_vec + tmpzw*S_mat[:, j-1]
Wtmp_vec = Wtmp_vec - tmpzw*Y_mat[:, 0]
v_vec = scale*Wtmp_vec + Ztmp_vec
return v_vec
[docs]def invBroyden2_product(u_vec, S_mat, Y_mat, scale, alpha):
"""An implementation of the inverse Broyden's method.
Parameters
----------
u_vec : array_like
The vector to be multiplied.
S_mat : array_like
The matrix of steps.
Y_mat : array_like
The matrix of gradient differences.
scale : float
The initial scaling.
alpha : float
A regularization parameter.
Returns
-------
array_like
TODO
See Also
--------
invBFGS1_product, invBroyden1_product
"""
dim = Y_mat.shape[1]
M_mat = -tril(dot(Y_mat.T, Y_mat), -1)
v_vec = solve(M_mat + 1./scale*dot(Y_mat.T, S_mat) + alpha*eye(dim, dim),
1./scale*dot(Y_mat.T, u_vec))
v_vec = 1./scale*u_vec - dot(1./scale*S_mat - Y_mat, v_vec)
return v_vec
[docs]def hessian_wrapper(Hessian_product, mem, u_vec, S_mat, Y_mat,
scale, alpha):
"""Wrapper for all hessian functions in this module.
Used to have unified parameters across all function calls.
Parameters
----------
Hessian_product : function
The hessian function to use.
mem : int
The step to use.
u_vec : array_like
The vector to be multiplied.
S_mat : array_like
The matrix of steps.
Y_mat : array_like
The matrix of gradients.
scale : float
The initial scaling.
alpha : float
A regularisation parameter.
Returns
-------
array_like
TODO
See Also
--------
BFGS1_product, invBFGS1_product, Broyden1_product, invBroyden1_product,
Broyden2_product, invBroyden2_product
"""
if Hessian_product is BFGS1_product:
return BFGS1_product(u_vec, S_mat, Y_mat, scale)
if Hessian_product is invBFGS1_product:
return invBFGS1_product(u_vec, S_mat, Y_mat, scale)
if Hessian_product is Broyden1_product:
return Broyden1_product(mem, u_vec, S_mat, Y_mat, scale)
if Hessian_product is invBroyden1_product:
return invBroyden1_product(u_vec, S_mat, Y_mat, scale, alpha)
if Hessian_product is Broyden2_product:
return Broyden2_product(mem, u_vec, S_mat, Y_mat, scale)
if Hessian_product is invBroyden2_product:
return invBroyden2_product(u_vec, S_mat, Y_mat, scale, alpha)