# -*- coding: utf8 -*-
# Development on SAMSARA began in 2007 with funding from the National
# Science Foundation of the USA, DMS-0712796.
#
# Contributors include:
#
# Russell Luke (main author)
# Institute for Numerical and Applied Mathematics, University of Göttingen
#
# Student helpers:
# Rachael Bailine (Matlab and Fortran version), University of Delaware
# Patrick Rowe (Fortran version), University of Delaware
# Brian Rife (Fortran version), University of Delaware
# Marco Bedolla (Fortran version), University of Delaware
# Benedikt Rascher-Friesenhausen (Python version), University of Göttingen
#
# Special thanks to Laurence Marks at Northwestern University and
# Peter Blaha at the Technical University of Vienna who provided much
# of the inspiration for SAMSARA.
"""
Contains all functions that are used to calculate the next step.
"""
# The following lines are no longer part of the block comment above
# because this creates duplicates with Sphinx automatic API generation
# Step finder
# ===========
# These functions are used in :class:`samsara.samsara.Samsara` to
# calculate the next step.
# .. autofunction:: proxtoolbox.algorithms.samsara.step_finder.Dogleg_QN
# .. autofunction:: proxtoolbox.algorithms.samsara.step_finder.Explicit_TR
# Step finder helper
# ==================
# These functions are used in the step finder functions.
# .. autofunction:: proxtoolbox.algorithms.samsara.step_finder.TRBFGS1
# .. autofunction:: proxtoolbox.algorithms.samsara.step_finder.inv_tGammPhiTPhi_product2
# Samsara helper
# ==============
# These functions are directly called in
# :class:`samsara.samsara.Samsara` to return the next step.
# .. autofunction:: proxtoolbox.algorithms.samsara.step_finder.Step_generator
# .. autofunction:: proxtoolbox.algorithms.samsara.step_finder.Step_length_update
from numpy import abs, amax, c_, diag, dot, r_, sum, sqrt, tril, triu, zeros
from numpy.linalg import cholesky, LinAlgError, norm, solve
from .hessian import hessian_wrapper, scaler_wrapper
from .history import history_wrapper
# PYTHON PORT ANNOTATION
# Renamed delinv to scaleinv as del is a keyword in Python.
# Removed parameter dim, as it was not used.
[docs]def inv_tGammPhiTPhi_product2(mem, u_vec, S_mat, Y_mat, scaleinv, omega):
"""TODO
Parameters
----------
mem : int
TODO
u_vec : array_like
TODO
S_mat : array_like
The matrix of steps.
Y_mat : array_like
The matrix of gradients.
scaleinv : float
The initial scaling.
omega : float
TODO
Returns
-------
array_like
TODO
See Also
--------
TRFBGS1
"""
# Build the pieces:
tau = scaleinv + omega
# PYTHON PORT ANNOTATION
# Removed all variables for transposed matrices, that were only used once.
YtY_mat = dot(Y_mat.T, Y_mat)
St_mat = S_mat.T
StS_mat = dot(St_mat, S_mat)
StY_mat = dot(St_mat, Y_mat)
D_mat = diag(diag(StY_mat))
Rbar_mat = triu(StY_mat, 0)
L_mat = tril(StY_mat, -1)
K_mat = YtY_mat + tau*D_mat
try:
K_mat = cholesky(K_mat).T
except LinAlgError:
print('Not positive definite')
Kt_mat = K_mat.T
E_mat = scaleinv*Rbar_mat - omega*L_mat
Et_mat = E_mat.T
B_mat = omega*scaleinv*StS_mat\
+ dot(E_mat, solve(K_mat, solve(Kt_mat, Et_mat)))
try:
B_mat = cholesky(B_mat).T
except LinAlgError:
print('Not positive definite')
Bt_mat = B_mat.T
# Execute inverse multiplication:
u01_vec = u_vec[0:mem]
u02_vec = u_vec[mem:2*mem]
u11_vec = solve(Bt_mat, u01_vec)
u11_vec = solve(B_mat, u11_vec)
u21_vec = dot(Et_mat, u11_vec)
u21_vec = solve(Kt_mat, u21_vec)
u21_vec = solve(K_mat, u21_vec)
u12_vec = solve(Kt_mat, u02_vec)
u12_vec = solve(K_mat, u12_vec)
u22_vec = dot(E_mat, u12_vec)
u22_vec = solve(Bt_mat, u22_vec)
u22_vec = solve(B_mat, u22_vec)
u32_vec = dot(Et_mat, u22_vec)
u32_vec = solve(Kt_mat, u32_vec)
u32_vec = solve(K_mat, u32_vec)
v_vec = u11_vec - u22_vec
v_vec = r_[v_vec, u32_vec - u21_vec - u12_vec]
return v_vec
# PYTHON PORT ANNOTATION
# Renamed del to scale, as del is a keyword in Python.
# Removed parameter dim, as it was not needed.
[docs]def TRBFGS1(mem, u_vec, ngrad, S_mat, Y_mat, tr, scale):
"""TODO
Parameters
----------
mem : int
How many steps to look back.
u_vec : array_like
The negative gradient vector.
ngrad : float
The norm of the current gradient.
S_mat : array_like
The matrix of steps.
Y_mat : array_like
The matrix of gradients.
tr : float
The radius of the trust region.
scale : float
The initial scaling.
Returns
-------
d_vec : array_like
The next step.
See Also
--------
Explicit_TR : Calling function.
inv_tGammPhiTPhi_product2 : Used in this function.
"""
# PYTHON PORT ANNOTATION
# Removed temp_mat, if it was only used once before reassignment.
scaleinv = 1./scale
Phi_mat = c_[scaleinv*S_mat, Y_mat]
v0_vec = dot(Phi_mat.T, u_vec)
# OPT Remove temp_mat
temp_mat = S_mat.T
StS_mat = dot(temp_mat, S_mat)
StY_mat = dot(temp_mat, Y_mat)
# PYTHON PORT ANNOTATION
# Replaced spdiags with diag, as the sparsity of the matrix was
# never used.
D_mat = diag(diag(StY_mat))
YtY_mat = dot(Y_mat.T, Y_mat)
temp_mat = scaleinv*StY_mat
Psi_mat = r_[c_[scaleinv**2*StS_mat, temp_mat], c_[temp_mat.T, YtY_mat]]
temp_mat = tril(StY_mat, -1)
Gamma_mat = r_[c_[scaleinv*StS_mat, temp_mat], c_[temp_mat.T, -D_mat]]
Maxit_TRsub = 12
Tol_TRsub = 1e-10
Iter_TRsub = 0
dmu = 1.
mu_max = ngrad/tr
mu0 = mu_max/2.
while Iter_TRsub <= Maxit_TRsub and abs(dmu) > Tol_TRsub:
Iter_TRsub += 1
tau = scaleinv + mu0
v1_vec = inv_tGammPhiTPhi_product2(mem, v0_vec, S_mat, Y_mat,
scaleinv, mu0)
v2_vec = dot(Gamma_mat, v1_vec)
v2_vec = inv_tGammPhiTPhi_product2(mem, v2_vec, S_mat, Y_mat,
scaleinv, mu0)
temp = dot(v0_vec, v1_vec)
sigma = dot(v1_vec, dot(Psi_mat, v1_vec)) + 2*temp + ngrad**2
if sigma < 0 or Iter_TRsub == Maxit_TRsub:
sigma = 0.
# PYTHON PORT ANNOTATION
# Removed sqrtsigma, as it was only used once.
temp = sqrt(sigma) - tau*tr
Delta = sigma + tau*(dot(v1_vec, dot(Psi_mat, v2_vec))
+ dot(v0_vec, v2_vec))
dmu = (sigma/Delta)*(temp/tr)
mu1 = mu0 + dmu
if mu1 >= 0:
if mu1 >= mu_max:
if mu1 == mu_max:
print('Upper bound on mu is wrong.')
else:
mu0 = mu_max
else:
mu0 = mu1
else:
mu0 = .2*mu0
temp = dot(Phi_mat, v1_vec)
d_vec = (u_vec + temp)/tau
return d_vec
# PYTHON PORT ANNOTATION
# Renamed iter to it, as iter is a keyword in Python.
[docs]def Step_generator(Step_finder, Scaler, History, Hessian_product,
invHessian_product, it, mem, c, beta, gamma, x_mat,
gradf_mat, ngradf_vec, stepsize_vec, tr, trstep,
alpha_0, verbose):
"""Calculates the direction of the next step and it's inital length.
Parameters
----------
Step_finder : function
TODO
Scaler : function
TODO
History : function
TODO
Hessian_product : function
TODO
invHessian_product : function
TODO
it : int
The current iteration.
mem : int
TODO
c : float
TODO
beta : float
TODO
gamma : float
TODO
x_mat : array_like
TODO
gradf_mat : array_like
TODO
ngradf_vec : array_like
TODO
stepsize_vec : array_like, modified
TODO. Gets modified.
tr : float
TODO
trstep : int
TODO
alpha_0 : float
TODO
verbose : bool
TODO
Returns
-------
d_vec : array_like
The next step.
"""
dim = x_mat.shape[0]
######################################################################
# choose search direction and length:
######################################################################
if mem > 0:
S_mat = zeros((dim, mem))
Y_mat = zeros((dim, mem))
S_mat, Y_mat = history_wrapper(History, mem+1,
x_mat[:, -mem-2:-1],
gradf_mat[:, -mem-2:-1])
delta = scaler_wrapper(Scaler, mem, S_mat, Y_mat, alpha_0)
alpha = alpha_0
d_vec = step_wrapper(Step_finder, Hessian_product,
invHessian_product, mem,
gradf_mat[:, -2],
ngradf_vec[it], S_mat, Y_mat, tr,
delta, alpha, verbose)
stepsize_vec[it] = norm(d_vec)
else:
# steepest descent direction
delta = 0
alpha = alpha_0
# if we get to this option, the step MUST have been accepted
# and iter updated
d_vec = -stepsize_vec[it]*gradf_mat[:, -2]/ngradf_vec[it]
if abs(stepsize_vec[it] - tr) < 1e-11:
trstep += 1
DDf = dot(gradf_mat[:, -2], d_vec)
while DDf > 0:
if verbose:
print('The Step generator yields a direction of nondescent. ')
# shrink memory and try again
if mem > 2:
S_mat = S_mat[:, 1:mem]
Y_mat = Y_mat[:, 1:mem]
delta = scaler_wrapper(Scaler, mem-1, S_mat, Y_mat, alpha_0)
alpha = alpha_0
d_vec = step_wrapper(Step_finder, Hessian_product,
invHessian_product, mem-1,
gradf_mat[:, -2],
ngradf_vec[it], S_mat, Y_mat,
tr, delta, alpha)
else:
# steepest descent
d_vec = -gamma*stepsize_vec[it]*gradf_mat[:, -2]\
/ ngradf_vec[it]
mem -= 1
DDf = dot(gradf_mat[:, -2], d_vec)
##################################################
# end choose search direction and length:
##################################################
cDDf = c*DDf
bDDf = beta*DDf
return delta, alpha, d_vec, stepsize_vec, trstep, DDf, cDDf, bDDf, mem
# PYTHON PORT ANNOTATION
# Renamed iter to it as iter is a keyword in Python.
[docs]def Step_length_update(History, Hessian_product, update_conditions,
mem, it, x_mat, f_vec,
gradf_mat, d_vec, mu_vec, stepsize_vec, tr,
delta, alpha, eta1, eta2, eta3, cDDf, DDfnew,
bDDf, orthog_TOL, lower_step_bound, gamma):
"""Trust region adjustment routine.
Parameters
----------
History : function
TODO
Hessian_product : function
TODO
update_conditions : str
TODO
mem : int
TODO
it : int
The current iteration.
x_mat : array_like
TODO
f_vec : array_like
TODO
gradf_mat : array_like
TODO
d_vec : array_like, modified
TODO. Gets modified.
mu_vec : array_like, modified
TODO. Gets modified.
stepsize_vec : array_like, modified
TODO. Gets modified.
tr : float
TODO
delta : float
TODO
alpha : float
TODO
eta1 : float
TODO
eta2 : float
TODO
eta3 : float
TODO
cDDf : float
TODO
DDfnew : float
TODO
bDDf : float
TODO
orthog_TOL : float
TODO
lower_step_bound : float
TODO
gamma : float
TODO
Returns
-------
tr : float
TODO
mu_vec : array_like
TODO
cDDf : float
TODO
lower_step_bound : float
TODO
d_vec : array_like
TODO
stepsize_vec : array_like
TODO
accept : int
TODO
"""
if update_conditions == 'Exact':
##################################################
# exact line search via bisection
##################################################
if mu_vec[it] == 0:
tr = 1e+15
lower_step_bound = 0
mu_vec[it] = 1
min_d = lower_step_bound
max_d = tr
nd = stepsize_vec[it]
if abs(DDfnew)/stepsize_vec[it] <= orthog_TOL:
accept = 1
else:
accept = -2
if f_vec[it+1] <= f_vec[it] and DDfnew/nd < -orthog_TOL:
# step too short:
if abs(nd-max_d)/(max_d-min_d) < 1e-2:
max_d = 1e+7*max_d
tr = max_d
d_vec = 2*d_vec
else:
# increase step length by one half the interval to
# the outer bound
d_vec = min(nd+max_d/2., 10.*nd)*d_vec/nd
min_d = nd
elif f_vec[it+1] <= f_vec[it] and DDfnew/nd > orthog_TOL:
# step too long, but not by too much:
# decrease step length by one half the interval to the
# inner bound
d_vec = (nd+min_d)/2.*(d_vec/nd)
max_d = nd
tr = max_d
elif f_vec[it+1] > f_vec[it]:
# step too long, by a lot:
# decrease step length by one half the current steplength
max_d = nd
tr = max_d
d_vec = .5*(min_d+nd)*(d_vec/nd)
lower_step_bound = min_d
stepsize_vec[it] = norm(d_vec)
# do backtracking linesearch/ adjust the trust region
else:
if mem > 0:
S_mat, Y_mat = history_wrapper(History, mem+1,
x_mat[:, -mem-2:-1],
gradf_mat[:, -mem-2:-1])
##################################################
# Adjust trust region:
##################################################
# actual change/predicted change
# look-back - this is a type of nonmonotone trust region adjustment
p = max(mem-1, 1)
pind = max(it-p, 0)
# PYTHON PORT ANNOTATION
# Added initialisation of pred here, so that it always exists.
pred = 0.
if mem > 0:
pred = dot(gradf_mat[:, -2], d_vec) \
+ .5*dot(d_vec, hessian_wrapper(Hessian_product, mem,
d_vec, S_mat, Y_mat, delta,
alpha))
actual = f_vec[it+1] - f_vec[it]
mu_vec[it] = actual/pred
# the following warning was commented out in the Matlab version
#if abs(pred-actual)/abs(pred) > 1e+5:
# print('Warning: step length may be too small -- samsara results might be at numerical accuracy.')
# print('Try increasing your step length tolerance.')
else:
mu_vec[it] = (f_vec[it+1]-f_vec[it])\
/ dot(gradf_mat[:, -2], d_vec)
if mem > 0:
if mu_vec[it] <= 0 or pred > 0:
tr = min(.1*stepsize_vec[it-1], .1*tr)
accept = -1
elif sum(mu_vec[pind+1:it+1])/(it-pind) >= eta1:
tr = min(5.*tr, 1e+15)
accept = 1
elif sum(mu_vec[pind+1:it+1])/(it-pind) >= eta2:
tr = min(2.*tr, 1e+15)
accept = 1
elif sum(mu_vec[pind+1:it+1])/(it-pind) > eta3:
# Nocedal & Wright would suggest leaving tr the same
tr = 1.2*tr
accept = 1
else:
tr = min(stepsize_vec[it], .25*tr)
if f_vec[it+1] <= amax(f_vec[max(0, it-p):it+1])\
+ cDDf and DDfnew >= bDDf:
accept = 1
elif f_vec[it+1] <= amax(f_vec[max(0, it-p):it+1])\
and stepsize_vec[it] < 2.*stepsize_vec[it-1]:
# still got decrease, so will keep the
# information, but not advance the point
accept = 0
else:
# no decrease and stepsize might be too small,
# shrink memory and don't keep info
accept = -1
else:
max_d = tr
nd = stepsize_vec[it]
if lower_step_bound >= max_d:
min_d = 0
else:
min_d = lower_step_bound
# weakness of this method. In general don't know what max_d
# should be, and if you underestimate the routine will stagnate.
if abs(nd-max_d)/(max_d-min_d) < 1e-2:
max_d *= 1e+7
tr = max_d
##################################################
# nonmonotone bisection line search
##################################################
p = max(mem, 1)
if f_vec[it+1] <= amax(f_vec[max(0, it-p):it+1])+cDDf\
and DDfnew >= bDDf:
# Wolfe conditions satisfied, do not change step.
accept = 1
else:
accept = -2
if f_vec[it+1] <= amax(f_vec[max(0, it-p):it+1])+cDDf\
and DDfnew < bDDf:
# probably underestimating the upper bound
if (max_d-min_d)/max_d <= 1e-2:
max_d *= 10.
tr = max_d
# step too short:
# increase step length to the outer bound
d_vec = min(.1*nd + .9*max_d, 100.*nd)*d_vec/nd
min_d = nd
accept = 1
elif f_vec[it+1] <= amax(f_vec[max(0, it-p):it+1])\
and DDfnew >= bDDf:
# step too long, but not by too much:
# decrease step length by one half the
# interval to the inner bound
d_vec = (nd+min_d)/2.*(d_vec/nd)
max_d = nd
tr = max_d
elif f_vec[it+1] >= f_vec[it]:
# step too long, by a lot:
# decrease step length by one half the current steplength
max_d = nd
d_vec = .5*(min_d+nd)*(d_vec/nd)
tr = max_d
cDDf *= gamma
lower_step_bound = min_d
stepsize_vec[it] = norm(d_vec)
##################################################
# end trust region adjust
##################################################
return tr, mu_vec, cDDf, lower_step_bound, d_vec, stepsize_vec, accept
# PYTHON PORT ANNOTATION
# Renamed del to scale, as del is a keyword in Python.
[docs]def Dogleg_QN(Hessian_product, invHessian_product, mem, grad_vec,
ngrad, S_mat, Y_mat, tr, scale, alpha):
"""Stable Quasi-Newton Dogleg.
Dogleg subroutine for line search methods.
Parameters
----------
Hessian_product : function
The dogleg method.
invHessian_product : function
The quasi newton method.
mem : int
How far to look back.
grad_vec : array_like
The current gradient.
ngrad : float
The norm of the current gradient.
S_mat : array_like
The matrix of steps.
Y_mat : array_like
The matix of gradients.
tr : float
The radius of the trust region.
scale : float
The initial scaling.
alpha : float
A regularisation parameter.
Returns
-------
d_vec : array_like
The next step. Equals the step calculated with the quasi
newton method, if that step already is within the trust
region.
Notes
-----
Solves the following problem:
See Also
--------
step_wrapper : Wrapper for this function.
"""
dn = hessian_wrapper(invHessian_product, mem, -grad_vec,
S_mat, Y_mat, scale, alpha)
if norm(dn) <= tr: # to turn on set <= 1e+15
ddl = dn
else:
# initialization
u = -grad_vec/ngrad
v = hessian_wrapper(Hessian_product, mem, u, S_mat,
Y_mat, scale, alpha)
w = dot(u, v)
# predicted change at constrained solution
b = -tr*ngrad + .5*(tr**2)*w
# t_star solves min{-t ||grad_f||
# + 1/2*t^2* grad_f'*H*grad_f/||grad_f||^2}
# => t_star = ||grad_f||^3/grad_f'*H*grad_f
# from first order necessary conditions for optimality
t_star = ngrad/w
if t_star < 0 or t_star > tr:
if b > 0:
t = 0
else:
t = tr
else:
# predicted change at unconstrained solution
c = -.5*(ngrad**2)/w
# only want the t that solves
# min{-t ||grad_f|| + 1/2*t^2* grad_f'*H*grad_f/||grad_f||^2}
# on [0, tr]
if c < 0 or b < 0:
if c < b:
t = t_star
else:
t = tr
else:
t = 0
dsd = t*u
# PYTHON PORT ANNOTATION
# Replaced norm(dsd-dn)**2 with dot(dsd-dn, dsd-dn).
tmp = dot(dsd-dn, dsd-dn)
disc = dot(dsd-dn, dsd)**2 + tmp*(tr**2 - dot(dsd, dsd))
lamb = max((sqrt(disc) - dot(dn-dsd, dsd))/tmp, 0)
ddl = lamb*dn + (1-lamb)*dsd
return ddl
# PYTHON PORT ANNOTATION
# Renamed del to scale, as del is a keyword in Python.
[docs]def Explicit_TR(invHessian_product, mem, grad_vec, ngrad,
S_mat, Y_mat, tr, scale, alpha, verbose):
"""Stable Explicit Trust Region.
Stable Explicit Trust Region for Symmetric PSD matrix secant methods.
Parameters
----------
invHessian_product : function
The quasi newton method.
mem : int
How many steps to look back.
grad_vec : array_like
The current gradient.
ngrad : float
The norm of the current gradient.
S_mat : array_like
The matrix of steps.
Y_mat : array_like
The matrix of gradients.
tr : float
The radius of the trust region.
scale : float
The initial scaling.
alpha : float
A regularisation parameter.
verbose : bool
If additional output should be printed.
Returns
-------
d_vec : array_like
The next step.
See Also
--------
step_wrapper : Wrapper for this function.
TRBFGS1, Dogleg_QN
"""
TRsub = TRBFGS1
dn_vec = hessian_wrapper(invHessian_product, mem, -grad_vec,
S_mat, Y_mat, scale, alpha)
if norm(dn_vec) <= tr: # to turn on set <= 1e+15
d_vec = dn_vec
else:
if verbose:
print('in trust region subproblem')
d_vec = TRsub(mem, -grad_vec, ngrad, S_mat, Y_mat, tr, scale)
return d_vec
# PYTHON PORT ANNOTATION
# Renamed del to scale, as del is a keyword in Python.
[docs]def step_wrapper(step_finder, Hessian_product, invHessian_product, mem,
grad_vec, ngrad, S_mat, Y_mat, tr, scale, alpha,
verbose):
"""A wrapper for all step finding functions.
The wrapper provides a unified function call for all step finders.
Parameters
----------
step_finder : function
The step finder to use (Dogleg_QN, Explicit_TR).
Hessian_product : function
The dogleg method for Dogleg_QN.
invHessian_product : function
The quasi newton method.
mem : int
How many steps to look back.
grad_vec : array_like
The current gradient.
ngrad : float
The norm of the current gradient.
S_mat : array_like
The matrix of steps.
Y_mat : array_like
The matrix of gradients.
tr : float
The radius of the trust region.
scale : float
The initial scaling.
alpha : float
A regularisation parameter.
verbose : bool
If additional output should be printed.
Returns
-------
d_vec - array_like
The next step as calculated by the `step_finder`.
See Also
--------
Dogleg_QN, Explicit_TR
"""
if step_finder is Explicit_TR:
return Explicit_TR(invHessian_product, mem, grad_vec, ngrad,
S_mat, Y_mat, tr, scale, alpha, verbose)
if step_finder is Dogleg_QN:
return Dogleg_QN(Hessian_product, invHessian_product, mem,
grad_vec, ngrad, S_mat, Y_mat, tr, scale, alpha)