| """ |
| Functions |
| --------- |
| .. autosummary:: |
| :toctree: generated/ |
| |
| line_search_armijo |
| line_search_wolfe1 |
| line_search_wolfe2 |
| scalar_search_wolfe1 |
| scalar_search_wolfe2 |
| |
| """ |
| from warnings import warn |
|
|
| from ._dcsrch import DCSRCH |
| import numpy as np |
|
|
| __all__ = ['LineSearchWarning', 'line_search_wolfe1', 'line_search_wolfe2', |
| 'scalar_search_wolfe1', 'scalar_search_wolfe2', |
| 'line_search_armijo'] |
|
|
| class LineSearchWarning(RuntimeWarning): |
| pass |
|
|
|
|
| def _check_c1_c2(c1, c2): |
| if not (0 < c1 < c2 < 1): |
| raise ValueError("'c1' and 'c2' do not satisfy" |
| "'0 < c1 < c2 < 1'.") |
|
|
|
|
| |
| |
| |
|
|
| def line_search_wolfe1(f, fprime, xk, pk, gfk=None, |
| old_fval=None, old_old_fval=None, |
| args=(), c1=1e-4, c2=0.9, amax=50, amin=1e-8, |
| xtol=1e-14): |
| """ |
| As `scalar_search_wolfe1` but do a line search to direction `pk` |
| |
| Parameters |
| ---------- |
| f : callable |
| Function `f(x)` |
| fprime : callable |
| Gradient of `f` |
| xk : array_like |
| Current point |
| pk : array_like |
| Search direction |
| gfk : array_like, optional |
| Gradient of `f` at point `xk` |
| old_fval : float, optional |
| Value of `f` at point `xk` |
| old_old_fval : float, optional |
| Value of `f` at point preceding `xk` |
| |
| The rest of the parameters are the same as for `scalar_search_wolfe1`. |
| |
| Returns |
| ------- |
| stp, f_count, g_count, fval, old_fval |
| As in `line_search_wolfe1` |
| gval : array |
| Gradient of `f` at the final point |
| |
| Notes |
| ----- |
| Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1``. |
| |
| """ |
| if gfk is None: |
| gfk = fprime(xk, *args) |
|
|
| gval = [gfk] |
| gc = [0] |
| fc = [0] |
|
|
| def phi(s): |
| fc[0] += 1 |
| return f(xk + s*pk, *args) |
|
|
| def derphi(s): |
| gval[0] = fprime(xk + s*pk, *args) |
| gc[0] += 1 |
| return np.dot(gval[0], pk) |
|
|
| derphi0 = np.dot(gfk, pk) |
|
|
| stp, fval, old_fval = scalar_search_wolfe1( |
| phi, derphi, old_fval, old_old_fval, derphi0, |
| c1=c1, c2=c2, amax=amax, amin=amin, xtol=xtol) |
|
|
| return stp, fc[0], gc[0], fval, old_fval, gval[0] |
|
|
|
|
| def scalar_search_wolfe1(phi, derphi, phi0=None, old_phi0=None, derphi0=None, |
| c1=1e-4, c2=0.9, |
| amax=50, amin=1e-8, xtol=1e-14): |
| """ |
| Scalar function search for alpha that satisfies strong Wolfe conditions |
| |
| alpha > 0 is assumed to be a descent direction. |
| |
| Parameters |
| ---------- |
| phi : callable phi(alpha) |
| Function at point `alpha` |
| derphi : callable phi'(alpha) |
| Objective function derivative. Returns a scalar. |
| phi0 : float, optional |
| Value of phi at 0 |
| old_phi0 : float, optional |
| Value of phi at previous point |
| derphi0 : float, optional |
| Value derphi at 0 |
| c1 : float, optional |
| Parameter for Armijo condition rule. |
| c2 : float, optional |
| Parameter for curvature condition rule. |
| amax, amin : float, optional |
| Maximum and minimum step size |
| xtol : float, optional |
| Relative tolerance for an acceptable step. |
| |
| Returns |
| ------- |
| alpha : float |
| Step size, or None if no suitable step was found |
| phi : float |
| Value of `phi` at the new point `alpha` |
| phi0 : float |
| Value of `phi` at `alpha=0` |
| |
| Notes |
| ----- |
| Uses routine DCSRCH from MINPACK. |
| |
| Parameters `c1` and `c2` must satisfy ``0 < c1 < c2 < 1`` as described in [1]_. |
| |
| References |
| ---------- |
| |
| .. [1] Nocedal, J., & Wright, S. J. (2006). Numerical optimization. |
| In Springer Series in Operations Research and Financial Engineering. |
| (Springer Series in Operations Research and Financial Engineering). |
| Springer Nature. |
| |
| """ |
| _check_c1_c2(c1, c2) |
|
|
| if phi0 is None: |
| phi0 = phi(0.) |
| if derphi0 is None: |
| derphi0 = derphi(0.) |
|
|
| if old_phi0 is not None and derphi0 != 0: |
| alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0) |
| if alpha1 < 0: |
| alpha1 = 1.0 |
| else: |
| alpha1 = 1.0 |
|
|
| maxiter = 100 |
|
|
| dcsrch = DCSRCH(phi, derphi, c1, c2, xtol, amin, amax) |
| stp, phi1, phi0, task = dcsrch( |
| alpha1, phi0=phi0, derphi0=derphi0, maxiter=maxiter |
| ) |
|
|
| return stp, phi1, phi0 |
|
|
|
|
| line_search = line_search_wolfe1 |
|
|
|
|
| |
| |
| |
|
|
| |
|
|
| def line_search_wolfe2(f, myfprime, xk, pk, gfk=None, old_fval=None, |
| old_old_fval=None, args=(), c1=1e-4, c2=0.9, amax=None, |
| extra_condition=None, maxiter=10): |
| """Find alpha that satisfies strong Wolfe conditions. |
| |
| Parameters |
| ---------- |
| f : callable f(x,*args) |
| Objective function. |
| myfprime : callable f'(x,*args) |
| Objective function gradient. |
| xk : ndarray |
| Starting point. |
| pk : ndarray |
| Search direction. The search direction must be a descent direction |
| for the algorithm to converge. |
| gfk : ndarray, optional |
| Gradient value for x=xk (xk being the current parameter |
| estimate). Will be recomputed if omitted. |
| old_fval : float, optional |
| Function value for x=xk. Will be recomputed if omitted. |
| old_old_fval : float, optional |
| Function value for the point preceding x=xk. |
| args : tuple, optional |
| Additional arguments passed to objective function. |
| c1 : float, optional |
| Parameter for Armijo condition rule. |
| c2 : float, optional |
| Parameter for curvature condition rule. |
| amax : float, optional |
| Maximum step size |
| extra_condition : callable, optional |
| A callable of the form ``extra_condition(alpha, x, f, g)`` |
| returning a boolean. Arguments are the proposed step ``alpha`` |
| and the corresponding ``x``, ``f`` and ``g`` values. The line search |
| accepts the value of ``alpha`` only if this |
| callable returns ``True``. If the callable returns ``False`` |
| for the step length, the algorithm will continue with |
| new iterates. The callable is only called for iterates |
| satisfying the strong Wolfe conditions. |
| maxiter : int, optional |
| Maximum number of iterations to perform. |
| |
| Returns |
| ------- |
| alpha : float or None |
| Alpha for which ``x_new = x0 + alpha * pk``, |
| or None if the line search algorithm did not converge. |
| fc : int |
| Number of function evaluations made. |
| gc : int |
| Number of gradient evaluations made. |
| new_fval : float or None |
| New function value ``f(x_new)=f(x0+alpha*pk)``, |
| or None if the line search algorithm did not converge. |
| old_fval : float |
| Old function value ``f(x0)``. |
| new_slope : float or None |
| The local slope along the search direction at the |
| new value ``<myfprime(x_new), pk>``, |
| or None if the line search algorithm did not converge. |
| |
| |
| Notes |
| ----- |
| Uses the line search algorithm to enforce strong Wolfe |
| conditions. See Wright and Nocedal, 'Numerical Optimization', |
| 1999, pp. 59-61. |
| |
| The search direction `pk` must be a descent direction (e.g. |
| ``-myfprime(xk)``) to find a step length that satisfies the strong Wolfe |
| conditions. If the search direction is not a descent direction (e.g. |
| ``myfprime(xk)``), then `alpha`, `new_fval`, and `new_slope` will be None. |
| |
| Examples |
| -------- |
| >>> import numpy as np |
| >>> from scipy.optimize import line_search |
| |
| A objective function and its gradient are defined. |
| |
| >>> def obj_func(x): |
| ... return (x[0])**2+(x[1])**2 |
| >>> def obj_grad(x): |
| ... return [2*x[0], 2*x[1]] |
| |
| We can find alpha that satisfies strong Wolfe conditions. |
| |
| >>> start_point = np.array([1.8, 1.7]) |
| >>> search_gradient = np.array([-1.0, -1.0]) |
| >>> line_search(obj_func, obj_grad, start_point, search_gradient) |
| (1.0, 2, 1, 1.1300000000000001, 6.13, [1.6, 1.4]) |
| |
| """ |
| fc = [0] |
| gc = [0] |
| gval = [None] |
| gval_alpha = [None] |
|
|
| def phi(alpha): |
| fc[0] += 1 |
| return f(xk + alpha * pk, *args) |
|
|
| fprime = myfprime |
|
|
| def derphi(alpha): |
| gc[0] += 1 |
| gval[0] = fprime(xk + alpha * pk, *args) |
| gval_alpha[0] = alpha |
| return np.dot(gval[0], pk) |
|
|
| if gfk is None: |
| gfk = fprime(xk, *args) |
| derphi0 = np.dot(gfk, pk) |
|
|
| if extra_condition is not None: |
| |
| |
| def extra_condition2(alpha, phi): |
| if gval_alpha[0] != alpha: |
| derphi(alpha) |
| x = xk + alpha * pk |
| return extra_condition(alpha, x, phi, gval[0]) |
| else: |
| extra_condition2 = None |
|
|
| alpha_star, phi_star, old_fval, derphi_star = scalar_search_wolfe2( |
| phi, derphi, old_fval, old_old_fval, derphi0, c1, c2, amax, |
| extra_condition2, maxiter=maxiter) |
|
|
| if derphi_star is None: |
| warn('The line search algorithm did not converge', |
| LineSearchWarning, stacklevel=2) |
| else: |
| |
| |
| |
| |
| derphi_star = gval[0] |
|
|
| return alpha_star, fc[0], gc[0], phi_star, old_fval, derphi_star |
|
|
|
|
| def scalar_search_wolfe2(phi, derphi, phi0=None, |
| old_phi0=None, derphi0=None, |
| c1=1e-4, c2=0.9, amax=None, |
| extra_condition=None, maxiter=10): |
| """Find alpha that satisfies strong Wolfe conditions. |
| |
| alpha > 0 is assumed to be a descent direction. |
| |
| Parameters |
| ---------- |
| phi : callable phi(alpha) |
| Objective scalar function. |
| derphi : callable phi'(alpha) |
| Objective function derivative. Returns a scalar. |
| phi0 : float, optional |
| Value of phi at 0. |
| old_phi0 : float, optional |
| Value of phi at previous point. |
| derphi0 : float, optional |
| Value of derphi at 0 |
| c1 : float, optional |
| Parameter for Armijo condition rule. |
| c2 : float, optional |
| Parameter for curvature condition rule. |
| amax : float, optional |
| Maximum step size. |
| extra_condition : callable, optional |
| A callable of the form ``extra_condition(alpha, phi_value)`` |
| returning a boolean. The line search accepts the value |
| of ``alpha`` only if this callable returns ``True``. |
| If the callable returns ``False`` for the step length, |
| the algorithm will continue with new iterates. |
| The callable is only called for iterates satisfying |
| the strong Wolfe conditions. |
| maxiter : int, optional |
| Maximum number of iterations to perform. |
| |
| Returns |
| ------- |
| alpha_star : float or None |
| Best alpha, or None if the line search algorithm did not converge. |
| phi_star : float |
| phi at alpha_star. |
| phi0 : float |
| phi at 0. |
| derphi_star : float or None |
| derphi at alpha_star, or None if the line search algorithm |
| did not converge. |
| |
| Notes |
| ----- |
| Uses the line search algorithm to enforce strong Wolfe |
| conditions. See Wright and Nocedal, 'Numerical Optimization', |
| 1999, pp. 59-61. |
| |
| """ |
| _check_c1_c2(c1, c2) |
|
|
| if phi0 is None: |
| phi0 = phi(0.) |
|
|
| if derphi0 is None: |
| derphi0 = derphi(0.) |
|
|
| alpha0 = 0 |
| if old_phi0 is not None and derphi0 != 0: |
| alpha1 = min(1.0, 1.01*2*(phi0 - old_phi0)/derphi0) |
| else: |
| alpha1 = 1.0 |
|
|
| if alpha1 < 0: |
| alpha1 = 1.0 |
|
|
| if amax is not None: |
| alpha1 = min(alpha1, amax) |
|
|
| phi_a1 = phi(alpha1) |
| |
|
|
| phi_a0 = phi0 |
| derphi_a0 = derphi0 |
|
|
| if extra_condition is None: |
| def extra_condition(alpha, phi): |
| return True |
|
|
| for i in range(maxiter): |
| if alpha1 == 0 or (amax is not None and alpha0 > amax): |
| |
| |
| alpha_star = None |
| phi_star = phi0 |
| phi0 = old_phi0 |
| derphi_star = None |
|
|
| if alpha1 == 0: |
| msg = 'Rounding errors prevent the line search from converging' |
| else: |
| msg = "The line search algorithm could not find a solution " + \ |
| f"less than or equal to amax: {amax}" |
|
|
| warn(msg, LineSearchWarning, stacklevel=2) |
| break |
|
|
| not_first_iteration = i > 0 |
| if (phi_a1 > phi0 + c1 * alpha1 * derphi0) or \ |
| ((phi_a1 >= phi_a0) and not_first_iteration): |
| alpha_star, phi_star, derphi_star = \ |
| _zoom(alpha0, alpha1, phi_a0, |
| phi_a1, derphi_a0, phi, derphi, |
| phi0, derphi0, c1, c2, extra_condition) |
| break |
|
|
| derphi_a1 = derphi(alpha1) |
| if (abs(derphi_a1) <= -c2*derphi0): |
| if extra_condition(alpha1, phi_a1): |
| alpha_star = alpha1 |
| phi_star = phi_a1 |
| derphi_star = derphi_a1 |
| break |
|
|
| if (derphi_a1 >= 0): |
| alpha_star, phi_star, derphi_star = \ |
| _zoom(alpha1, alpha0, phi_a1, |
| phi_a0, derphi_a1, phi, derphi, |
| phi0, derphi0, c1, c2, extra_condition) |
| break |
|
|
| alpha2 = 2 * alpha1 |
| if amax is not None: |
| alpha2 = min(alpha2, amax) |
| alpha0 = alpha1 |
| alpha1 = alpha2 |
| phi_a0 = phi_a1 |
| phi_a1 = phi(alpha1) |
| derphi_a0 = derphi_a1 |
|
|
| else: |
| |
| alpha_star = alpha1 |
| phi_star = phi_a1 |
| derphi_star = None |
| warn('The line search algorithm did not converge', |
| LineSearchWarning, stacklevel=2) |
|
|
| return alpha_star, phi_star, phi0, derphi_star |
|
|
|
|
| def _cubicmin(a, fa, fpa, b, fb, c, fc): |
| """ |
| Finds the minimizer for a cubic polynomial that goes through the |
| points (a,fa), (b,fb), and (c,fc) with derivative at a of fpa. |
| |
| If no minimizer can be found, return None. |
| |
| """ |
| |
|
|
| with np.errstate(divide='raise', over='raise', invalid='raise'): |
| try: |
| C = fpa |
| db = b - a |
| dc = c - a |
| denom = (db * dc) ** 2 * (db - dc) |
| d1 = np.empty((2, 2)) |
| d1[0, 0] = dc ** 2 |
| d1[0, 1] = -db ** 2 |
| d1[1, 0] = -dc ** 3 |
| d1[1, 1] = db ** 3 |
| [A, B] = np.dot(d1, np.asarray([fb - fa - C * db, |
| fc - fa - C * dc]).flatten()) |
| A /= denom |
| B /= denom |
| radical = B * B - 3 * A * C |
| xmin = a + (-B + np.sqrt(radical)) / (3 * A) |
| except ArithmeticError: |
| return None |
| if not np.isfinite(xmin): |
| return None |
| return xmin |
|
|
|
|
| def _quadmin(a, fa, fpa, b, fb): |
| """ |
| Finds the minimizer for a quadratic polynomial that goes through |
| the points (a,fa), (b,fb) with derivative at a of fpa. |
| |
| """ |
| |
| with np.errstate(divide='raise', over='raise', invalid='raise'): |
| try: |
| D = fa |
| C = fpa |
| db = b - a * 1.0 |
| B = (fb - D - C * db) / (db * db) |
| xmin = a - C / (2.0 * B) |
| except ArithmeticError: |
| return None |
| if not np.isfinite(xmin): |
| return None |
| return xmin |
|
|
|
|
| def _zoom(a_lo, a_hi, phi_lo, phi_hi, derphi_lo, |
| phi, derphi, phi0, derphi0, c1, c2, extra_condition): |
| """Zoom stage of approximate linesearch satisfying strong Wolfe conditions. |
| |
| Part of the optimization algorithm in `scalar_search_wolfe2`. |
| |
| Notes |
| ----- |
| Implements Algorithm 3.6 (zoom) in Wright and Nocedal, |
| 'Numerical Optimization', 1999, pp. 61. |
| |
| """ |
|
|
| maxiter = 10 |
| i = 0 |
| delta1 = 0.2 |
| delta2 = 0.1 |
| phi_rec = phi0 |
| a_rec = 0 |
| while True: |
| |
| |
| |
| |
| |
| |
|
|
| dalpha = a_hi - a_lo |
| if dalpha < 0: |
| a, b = a_hi, a_lo |
| else: |
| a, b = a_lo, a_hi |
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| if (i > 0): |
| cchk = delta1 * dalpha |
| a_j = _cubicmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi, |
| a_rec, phi_rec) |
| if (i == 0) or (a_j is None) or (a_j > b - cchk) or (a_j < a + cchk): |
| qchk = delta2 * dalpha |
| a_j = _quadmin(a_lo, phi_lo, derphi_lo, a_hi, phi_hi) |
| if (a_j is None) or (a_j > b-qchk) or (a_j < a+qchk): |
| a_j = a_lo + 0.5*dalpha |
|
|
| |
|
|
| phi_aj = phi(a_j) |
| if (phi_aj > phi0 + c1*a_j*derphi0) or (phi_aj >= phi_lo): |
| phi_rec = phi_hi |
| a_rec = a_hi |
| a_hi = a_j |
| phi_hi = phi_aj |
| else: |
| derphi_aj = derphi(a_j) |
| if abs(derphi_aj) <= -c2*derphi0 and extra_condition(a_j, phi_aj): |
| a_star = a_j |
| val_star = phi_aj |
| valprime_star = derphi_aj |
| break |
| if derphi_aj*(a_hi - a_lo) >= 0: |
| phi_rec = phi_hi |
| a_rec = a_hi |
| a_hi = a_lo |
| phi_hi = phi_lo |
| else: |
| phi_rec = phi_lo |
| a_rec = a_lo |
| a_lo = a_j |
| phi_lo = phi_aj |
| derphi_lo = derphi_aj |
| i += 1 |
| if (i > maxiter): |
| |
| a_star = None |
| val_star = None |
| valprime_star = None |
| break |
| return a_star, val_star, valprime_star |
|
|
|
|
| |
| |
| |
|
|
| def line_search_armijo(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1): |
| """Minimize over alpha, the function ``f(xk+alpha pk)``. |
| |
| Parameters |
| ---------- |
| f : callable |
| Function to be minimized. |
| xk : array_like |
| Current point. |
| pk : array_like |
| Search direction. |
| gfk : array_like |
| Gradient of `f` at point `xk`. |
| old_fval : float |
| Value of `f` at point `xk`. |
| args : tuple, optional |
| Optional arguments. |
| c1 : float, optional |
| Value to control stopping criterion. |
| alpha0 : scalar, optional |
| Value of `alpha` at start of the optimization. |
| |
| Returns |
| ------- |
| alpha |
| f_count |
| f_val_at_alpha |
| |
| Notes |
| ----- |
| Uses the interpolation algorithm (Armijo backtracking) as suggested by |
| Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57 |
| |
| """ |
| xk = np.atleast_1d(xk) |
| fc = [0] |
|
|
| def phi(alpha1): |
| fc[0] += 1 |
| return f(xk + alpha1*pk, *args) |
|
|
| if old_fval is None: |
| phi0 = phi(0.) |
| else: |
| phi0 = old_fval |
|
|
| derphi0 = np.dot(gfk, pk) |
| alpha, phi1 = scalar_search_armijo(phi, phi0, derphi0, c1=c1, |
| alpha0=alpha0) |
| return alpha, fc[0], phi1 |
|
|
|
|
| def line_search_BFGS(f, xk, pk, gfk, old_fval, args=(), c1=1e-4, alpha0=1): |
| """ |
| Compatibility wrapper for `line_search_armijo` |
| """ |
| r = line_search_armijo(f, xk, pk, gfk, old_fval, args=args, c1=c1, |
| alpha0=alpha0) |
| return r[0], r[1], 0, r[2] |
|
|
|
|
| def scalar_search_armijo(phi, phi0, derphi0, c1=1e-4, alpha0=1, amin=0): |
| """Minimize over alpha, the function ``phi(alpha)``. |
| |
| Uses the interpolation algorithm (Armijo backtracking) as suggested by |
| Wright and Nocedal in 'Numerical Optimization', 1999, pp. 56-57 |
| |
| alpha > 0 is assumed to be a descent direction. |
| |
| Returns |
| ------- |
| alpha |
| phi1 |
| |
| """ |
| phi_a0 = phi(alpha0) |
| if phi_a0 <= phi0 + c1*alpha0*derphi0: |
| return alpha0, phi_a0 |
|
|
| |
|
|
| alpha1 = -(derphi0) * alpha0**2 / 2.0 / (phi_a0 - phi0 - derphi0 * alpha0) |
| phi_a1 = phi(alpha1) |
|
|
| if (phi_a1 <= phi0 + c1*alpha1*derphi0): |
| return alpha1, phi_a1 |
|
|
| |
| |
| |
| |
|
|
| while alpha1 > amin: |
| factor = alpha0**2 * alpha1**2 * (alpha1-alpha0) |
| a = alpha0**2 * (phi_a1 - phi0 - derphi0*alpha1) - \ |
| alpha1**2 * (phi_a0 - phi0 - derphi0*alpha0) |
| a = a / factor |
| b = -alpha0**3 * (phi_a1 - phi0 - derphi0*alpha1) + \ |
| alpha1**3 * (phi_a0 - phi0 - derphi0*alpha0) |
| b = b / factor |
|
|
| alpha2 = (-b + np.sqrt(abs(b**2 - 3 * a * derphi0))) / (3.0*a) |
| phi_a2 = phi(alpha2) |
|
|
| if (phi_a2 <= phi0 + c1*alpha2*derphi0): |
| return alpha2, phi_a2 |
|
|
| if (alpha1 - alpha2) > alpha1 / 2.0 or (1 - alpha2/alpha1) < 0.96: |
| alpha2 = alpha1 / 2.0 |
|
|
| alpha0 = alpha1 |
| alpha1 = alpha2 |
| phi_a0 = phi_a1 |
| phi_a1 = phi_a2 |
|
|
| |
| return None, phi_a1 |
|
|
|
|
| |
| |
| |
|
|
| def _nonmonotone_line_search_cruz(f, x_k, d, prev_fs, eta, |
| gamma=1e-4, tau_min=0.1, tau_max=0.5): |
| """ |
| Nonmonotone backtracking line search as described in [1]_ |
| |
| Parameters |
| ---------- |
| f : callable |
| Function returning a tuple ``(f, F)`` where ``f`` is the value |
| of a merit function and ``F`` the residual. |
| x_k : ndarray |
| Initial position. |
| d : ndarray |
| Search direction. |
| prev_fs : float |
| List of previous merit function values. Should have ``len(prev_fs) <= M`` |
| where ``M`` is the nonmonotonicity window parameter. |
| eta : float |
| Allowed merit function increase, see [1]_ |
| gamma, tau_min, tau_max : float, optional |
| Search parameters, see [1]_ |
| |
| Returns |
| ------- |
| alpha : float |
| Step length |
| xp : ndarray |
| Next position |
| fp : float |
| Merit function value at next position |
| Fp : ndarray |
| Residual at next position |
| |
| References |
| ---------- |
| [1] "Spectral residual method without gradient information for solving |
| large-scale nonlinear systems of equations." W. La Cruz, |
| J.M. Martinez, M. Raydan. Math. Comp. **75**, 1429 (2006). |
| |
| """ |
| f_k = prev_fs[-1] |
| f_bar = max(prev_fs) |
|
|
| alpha_p = 1 |
| alpha_m = 1 |
| alpha = 1 |
|
|
| while True: |
| xp = x_k + alpha_p * d |
| fp, Fp = f(xp) |
|
|
| if fp <= f_bar + eta - gamma * alpha_p**2 * f_k: |
| alpha = alpha_p |
| break |
|
|
| alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k) |
|
|
| xp = x_k - alpha_m * d |
| fp, Fp = f(xp) |
|
|
| if fp <= f_bar + eta - gamma * alpha_m**2 * f_k: |
| alpha = -alpha_m |
| break |
|
|
| alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k) |
|
|
| alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p) |
| alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m) |
|
|
| return alpha, xp, fp, Fp |
|
|
|
|
| def _nonmonotone_line_search_cheng(f, x_k, d, f_k, C, Q, eta, |
| gamma=1e-4, tau_min=0.1, tau_max=0.5, |
| nu=0.85): |
| """ |
| Nonmonotone line search from [1] |
| |
| Parameters |
| ---------- |
| f : callable |
| Function returning a tuple ``(f, F)`` where ``f`` is the value |
| of a merit function and ``F`` the residual. |
| x_k : ndarray |
| Initial position. |
| d : ndarray |
| Search direction. |
| f_k : float |
| Initial merit function value. |
| C, Q : float |
| Control parameters. On the first iteration, give values |
| Q=1.0, C=f_k |
| eta : float |
| Allowed merit function increase, see [1]_ |
| nu, gamma, tau_min, tau_max : float, optional |
| Search parameters, see [1]_ |
| |
| Returns |
| ------- |
| alpha : float |
| Step length |
| xp : ndarray |
| Next position |
| fp : float |
| Merit function value at next position |
| Fp : ndarray |
| Residual at next position |
| C : float |
| New value for the control parameter C |
| Q : float |
| New value for the control parameter Q |
| |
| References |
| ---------- |
| .. [1] W. Cheng & D.-H. Li, ''A derivative-free nonmonotone line |
| search and its application to the spectral residual |
| method'', IMA J. Numer. Anal. 29, 814 (2009). |
| |
| """ |
| alpha_p = 1 |
| alpha_m = 1 |
| alpha = 1 |
|
|
| while True: |
| xp = x_k + alpha_p * d |
| fp, Fp = f(xp) |
|
|
| if fp <= C + eta - gamma * alpha_p**2 * f_k: |
| alpha = alpha_p |
| break |
|
|
| alpha_tp = alpha_p**2 * f_k / (fp + (2*alpha_p - 1)*f_k) |
|
|
| xp = x_k - alpha_m * d |
| fp, Fp = f(xp) |
|
|
| if fp <= C + eta - gamma * alpha_m**2 * f_k: |
| alpha = -alpha_m |
| break |
|
|
| alpha_tm = alpha_m**2 * f_k / (fp + (2*alpha_m - 1)*f_k) |
|
|
| alpha_p = np.clip(alpha_tp, tau_min * alpha_p, tau_max * alpha_p) |
| alpha_m = np.clip(alpha_tm, tau_min * alpha_m, tau_max * alpha_m) |
|
|
| |
| Q_next = nu * Q + 1 |
| C = (nu * Q * (C + eta) + fp) / Q_next |
| Q = Q_next |
|
|
| return alpha, xp, fp, Fp, C, Q |
|
|