| """ |
| Functions |
| --------- |
| .. autosummary:: |
| :toctree: generated/ |
| |
| fmin_l_bfgs_b |
| |
| """ |
|
|
| |
| |
|
|
| |
|
|
| |
| |
| |
| |
| |
| |
|
|
| |
| |
|
|
| |
| |
| |
| |
| |
| |
| |
|
|
| |
|
|
| import numpy as np |
| from numpy import array, asarray, float64, zeros |
| from . import _lbfgsb |
| from ._optimize import (MemoizeJac, OptimizeResult, _call_callback_maybe_halt, |
| _wrap_callback, _check_unknown_options, |
| _prepare_scalar_function) |
| from ._constraints import old_bound_to_new |
|
|
| from scipy.sparse.linalg import LinearOperator |
|
|
| __all__ = ['fmin_l_bfgs_b', 'LbfgsInvHessProduct'] |
|
|
|
|
| status_messages = { |
| 0 : "START", |
| 1 : "NEW_X", |
| 2 : "RESTART", |
| 3 : "FG", |
| 4 : "CONVERGENCE", |
| 5 : "STOP", |
| 6 : "WARNING", |
| 7 : "ERROR", |
| 8 : "ABNORMAL" |
| } |
|
|
|
|
| task_messages = { |
| 0 : "", |
| 301 : "", |
| 302 : "", |
| 401 : "NORM OF PROJECTED GRADIENT <= PGTOL", |
| 402 : "RELATIVE REDUCTION OF F <= FACTR*EPSMCH", |
| 501 : "CPU EXCEEDING THE TIME LIMIT", |
| 502 : "TOTAL NO. OF F,G EVALUATIONS EXCEEDS LIMIT", |
| 503 : "PROJECTED GRADIENT IS SUFFICIENTLY SMALL", |
| 504 : "TOTAL NO. OF ITERATIONS REACHED LIMIT", |
| 505 : "CALLBACK REQUESTED HALT", |
| 601 : "ROUNDING ERRORS PREVENT PROGRESS", |
| 602 : "STP = STPMAX", |
| 603 : "STP = STPMIN", |
| 604 : "XTOL TEST SATISFIED", |
| 701 : "NO FEASIBLE SOLUTION", |
| 702 : "FACTR < 0", |
| 703 : "FTOL < 0", |
| 704 : "GTOL < 0", |
| 705 : "XTOL < 0", |
| 706 : "STP < STPMIN", |
| 707 : "STP > STPMAX", |
| 708 : "STPMIN < 0", |
| 709 : "STPMAX < STPMIN", |
| 710 : "INITIAL G >= 0", |
| 711 : "M <= 0", |
| 712 : "N <= 0", |
| 713 : "INVALID NBD", |
| } |
|
|
| def fmin_l_bfgs_b(func, x0, fprime=None, args=(), |
| approx_grad=0, |
| bounds=None, m=10, factr=1e7, pgtol=1e-5, |
| epsilon=1e-8, |
| iprint=-1, maxfun=15000, maxiter=15000, disp=None, |
| callback=None, maxls=20): |
| """ |
| Minimize a function func using the L-BFGS-B algorithm. |
| |
| Parameters |
| ---------- |
| func : callable f(x,*args) |
| Function to minimize. |
| x0 : ndarray |
| Initial guess. |
| fprime : callable fprime(x,*args), optional |
| The gradient of `func`. If None, then `func` returns the function |
| value and the gradient (``f, g = func(x, *args)``), unless |
| `approx_grad` is True in which case `func` returns only ``f``. |
| args : sequence, optional |
| Arguments to pass to `func` and `fprime`. |
| approx_grad : bool, optional |
| Whether to approximate the gradient numerically (in which case |
| `func` returns only the function value). |
| bounds : list, optional |
| ``(min, max)`` pairs for each element in ``x``, defining |
| the bounds on that parameter. Use None or +-inf for one of ``min`` or |
| ``max`` when there is no bound in that direction. |
| m : int, optional |
| The maximum number of variable metric corrections |
| used to define the limited memory matrix. (The limited memory BFGS |
| method does not store the full hessian but uses this many terms in an |
| approximation to it.) |
| factr : float, optional |
| The iteration stops when |
| ``(f^k - f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= factr * eps``, |
| where ``eps`` is the machine precision, which is automatically |
| generated by the code. Typical values for `factr` are: 1e12 for |
| low accuracy; 1e7 for moderate accuracy; 10.0 for extremely |
| high accuracy. See Notes for relationship to `ftol`, which is exposed |
| (instead of `factr`) by the `scipy.optimize.minimize` interface to |
| L-BFGS-B. |
| pgtol : float, optional |
| The iteration will stop when |
| ``max{|proj g_i | i = 1, ..., n} <= pgtol`` |
| where ``proj g_i`` is the i-th component of the projected gradient. |
| epsilon : float, optional |
| Step size used when `approx_grad` is True, for numerically |
| calculating the gradient |
| iprint : int, optional |
| Deprecated option that previously controlled the text printed on the |
| screen during the problem solution. Now the code does not emit any |
| output and this keyword has no function. |
| |
| .. deprecated:: 1.15.0 |
| This keyword is deprecated and will be removed from SciPy 1.17.0. |
| |
| disp : int, optional |
| Deprecated option that previously controlled the text printed on the |
| screen during the problem solution. Now the code does not emit any |
| output and this keyword has no function. |
| |
| .. deprecated:: 1.15.0 |
| This keyword is deprecated and will be removed from SciPy 1.17.0. |
| |
| maxfun : int, optional |
| Maximum number of function evaluations. Note that this function |
| may violate the limit because of evaluating gradients by numerical |
| differentiation. |
| maxiter : int, optional |
| Maximum number of iterations. |
| callback : callable, optional |
| Called after each iteration, as ``callback(xk)``, where ``xk`` is the |
| current parameter vector. |
| maxls : int, optional |
| Maximum number of line search steps (per iteration). Default is 20. |
| |
| Returns |
| ------- |
| x : array_like |
| Estimated position of the minimum. |
| f : float |
| Value of `func` at the minimum. |
| d : dict |
| Information dictionary. |
| |
| * d['warnflag'] is |
| |
| - 0 if converged, |
| - 1 if too many function evaluations or too many iterations, |
| - 2 if stopped for another reason, given in d['task'] |
| |
| * d['grad'] is the gradient at the minimum (should be 0 ish) |
| * d['funcalls'] is the number of function calls made. |
| * d['nit'] is the number of iterations. |
| |
| See also |
| -------- |
| minimize: Interface to minimization algorithms for multivariate |
| functions. See the 'L-BFGS-B' `method` in particular. Note that the |
| `ftol` option is made available via that interface, while `factr` is |
| provided via this interface, where `factr` is the factor multiplying |
| the default machine floating-point precision to arrive at `ftol`: |
| ``ftol = factr * numpy.finfo(float).eps``. |
| |
| Notes |
| ----- |
| SciPy uses a C-translated and modified version of the Fortran code, |
| L-BFGS-B v3.0 (released April 25, 2011, BSD-3 licensed). Original Fortran |
| version was written by Ciyou Zhu, Richard Byrd, Jorge Nocedal and, |
| Jose Luis Morales. |
| |
| References |
| ---------- |
| * R. H. Byrd, P. Lu and J. Nocedal. A Limited Memory Algorithm for Bound |
| Constrained Optimization, (1995), SIAM Journal on Scientific and |
| Statistical Computing, 16, 5, pp. 1190-1208. |
| * C. Zhu, R. H. Byrd and J. Nocedal. L-BFGS-B: Algorithm 778: L-BFGS-B, |
| FORTRAN routines for large scale bound constrained optimization (1997), |
| ACM Transactions on Mathematical Software, 23, 4, pp. 550 - 560. |
| * J.L. Morales and J. Nocedal. L-BFGS-B: Remark on Algorithm 778: L-BFGS-B, |
| FORTRAN routines for large scale bound constrained optimization (2011), |
| ACM Transactions on Mathematical Software, 38, 1. |
| |
| Examples |
| -------- |
| Solve a linear regression problem via `fmin_l_bfgs_b`. To do this, first we |
| define an objective function ``f(m, b) = (y - y_model)**2``, where `y` |
| describes the observations and `y_model` the prediction of the linear model |
| as ``y_model = m*x + b``. The bounds for the parameters, ``m`` and ``b``, |
| are arbitrarily chosen as ``(0,5)`` and ``(5,10)`` for this example. |
| |
| >>> import numpy as np |
| >>> from scipy.optimize import fmin_l_bfgs_b |
| >>> X = np.arange(0, 10, 1) |
| >>> M = 2 |
| >>> B = 3 |
| >>> Y = M * X + B |
| >>> def func(parameters, *args): |
| ... x = args[0] |
| ... y = args[1] |
| ... m, b = parameters |
| ... y_model = m*x + b |
| ... error = sum(np.power((y - y_model), 2)) |
| ... return error |
| |
| >>> initial_values = np.array([0.0, 1.0]) |
| |
| >>> x_opt, f_opt, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y), |
| ... approx_grad=True) |
| >>> x_opt, f_opt |
| array([1.99999999, 3.00000006]), 1.7746231151323805e-14 # may vary |
| |
| The optimized parameters in ``x_opt`` agree with the ground truth parameters |
| ``m`` and ``b``. Next, let us perform a bound constrained optimization using |
| the `bounds` parameter. |
| |
| >>> bounds = [(0, 5), (5, 10)] |
| >>> x_opt, f_op, info = fmin_l_bfgs_b(func, x0=initial_values, args=(X, Y), |
| ... approx_grad=True, bounds=bounds) |
| >>> x_opt, f_opt |
| array([1.65990508, 5.31649385]), 15.721334516453945 # may vary |
| """ |
| |
| if approx_grad: |
| fun = func |
| jac = None |
| elif fprime is None: |
| fun = MemoizeJac(func) |
| jac = fun.derivative |
| else: |
| fun = func |
| jac = fprime |
|
|
| |
| callback = _wrap_callback(callback) |
| opts = {'maxcor': m, |
| 'ftol': factr * np.finfo(float).eps, |
| 'gtol': pgtol, |
| 'eps': epsilon, |
| 'maxfun': maxfun, |
| 'maxiter': maxiter, |
| 'callback': callback, |
| 'maxls': maxls} |
|
|
| res = _minimize_lbfgsb(fun, x0, args=args, jac=jac, bounds=bounds, |
| **opts) |
| d = {'grad': res['jac'], |
| 'task': res['message'], |
| 'funcalls': res['nfev'], |
| 'nit': res['nit'], |
| 'warnflag': res['status']} |
| f = res['fun'] |
| x = res['x'] |
|
|
| return x, f, d |
|
|
|
|
| def _minimize_lbfgsb(fun, x0, args=(), jac=None, bounds=None, |
| disp=None, maxcor=10, ftol=2.2204460492503131e-09, |
| gtol=1e-5, eps=1e-8, maxfun=15000, maxiter=15000, |
| iprint=-1, callback=None, maxls=20, |
| finite_diff_rel_step=None, **unknown_options): |
| """ |
| Minimize a scalar function of one or more variables using the L-BFGS-B |
| algorithm. |
| |
| Options |
| ------- |
| disp : None or int |
| Deprecated option that previously controlled the text printed on the |
| screen during the problem solution. Now the code does not emit any |
| output and this keyword has no function. |
| |
| .. deprecated:: 1.15.0 |
| This keyword is deprecated and will be removed from SciPy 1.17.0. |
| |
| maxcor : int |
| The maximum number of variable metric corrections used to |
| define the limited memory matrix. (The limited memory BFGS |
| method does not store the full hessian but uses this many terms |
| in an approximation to it.) |
| ftol : float |
| The iteration stops when ``(f^k - |
| f^{k+1})/max{|f^k|,|f^{k+1}|,1} <= ftol``. |
| gtol : float |
| The iteration will stop when ``max{|proj g_i | i = 1, ..., n} |
| <= gtol`` where ``proj g_i`` is the i-th component of the |
| projected gradient. |
| eps : float or ndarray |
| If `jac is None` the absolute step size used for numerical |
| approximation of the jacobian via forward differences. |
| maxfun : int |
| Maximum number of function evaluations. Note that this function |
| may violate the limit because of evaluating gradients by numerical |
| differentiation. |
| maxiter : int |
| Maximum number of iterations. |
| iprint : int, optional |
| Deprecated option that previously controlled the text printed on the |
| screen during the problem solution. Now the code does not emit any |
| output and this keyword has no function. |
| |
| .. deprecated:: 1.15.0 |
| This keyword is deprecated and will be removed from SciPy 1.17.0. |
| |
| maxls : int, optional |
| Maximum number of line search steps (per iteration). Default is 20. |
| finite_diff_rel_step : None or array_like, optional |
| If ``jac in ['2-point', '3-point', 'cs']`` the relative step size to |
| use for numerical approximation of the jacobian. The absolute step |
| size is computed as ``h = rel_step * sign(x) * max(1, abs(x))``, |
| possibly adjusted to fit into the bounds. For ``method='3-point'`` |
| the sign of `h` is ignored. If None (default) then step is selected |
| automatically. |
| |
| Notes |
| ----- |
| The option `ftol` is exposed via the `scipy.optimize.minimize` interface, |
| but calling `scipy.optimize.fmin_l_bfgs_b` directly exposes `factr`. The |
| relationship between the two is ``ftol = factr * numpy.finfo(float).eps``. |
| I.e., `factr` multiplies the default machine floating-point precision to |
| arrive at `ftol`. |
| |
| """ |
| _check_unknown_options(unknown_options) |
| m = maxcor |
| pgtol = gtol |
| factr = ftol / np.finfo(float).eps |
|
|
| x0 = asarray(x0).ravel() |
| n, = x0.shape |
|
|
| |
| |
| |
| if bounds is None: |
| pass |
| elif len(bounds) != n: |
| raise ValueError('length of x0 != length of bounds') |
| else: |
| bounds = np.array(old_bound_to_new(bounds)) |
|
|
| |
| if (bounds[0] > bounds[1]).any(): |
| raise ValueError( |
| "LBFGSB - one of the lower bounds is greater than an upper bound." |
| ) |
|
|
| |
| |
| x0 = np.clip(x0, bounds[0], bounds[1]) |
|
|
| |
| sf = _prepare_scalar_function(fun, x0, jac=jac, args=args, epsilon=eps, |
| bounds=bounds, |
| finite_diff_rel_step=finite_diff_rel_step) |
|
|
| func_and_grad = sf.fun_and_grad |
|
|
| nbd = zeros(n, np.int32) |
| low_bnd = zeros(n, float64) |
| upper_bnd = zeros(n, float64) |
| bounds_map = {(-np.inf, np.inf): 0, |
| (1, np.inf): 1, |
| (1, 1): 2, |
| (-np.inf, 1): 3} |
|
|
| if bounds is not None: |
| for i in range(0, n): |
| L, U = bounds[0, i], bounds[1, i] |
| if not np.isinf(L): |
| low_bnd[i] = L |
| L = 1 |
| if not np.isinf(U): |
| upper_bnd[i] = U |
| U = 1 |
| nbd[i] = bounds_map[L, U] |
|
|
| if not maxls > 0: |
| raise ValueError('maxls must be positive.') |
|
|
| x = array(x0, dtype=np.float64) |
| f = array(0.0, dtype=np.int32) |
| g = zeros((n,), dtype=np.int32) |
| wa = zeros(2*m*n + 5*n + 11*m*m + 8*m, float64) |
| iwa = zeros(3*n, dtype=np.int32) |
| task = zeros(2, dtype=np.int32) |
| ln_task = zeros(2, dtype=np.int32) |
| lsave = zeros(4, dtype=np.int32) |
| isave = zeros(44, dtype=np.int32) |
| dsave = zeros(29, dtype=float64) |
|
|
| n_iterations = 0 |
|
|
| while True: |
| |
| |
| |
| g = g.astype(np.float64) |
| |
| _lbfgsb.setulb(m, x, low_bnd, upper_bnd, nbd, f, g, factr, pgtol, wa, |
| iwa, task, lsave, isave, dsave, maxls, ln_task) |
|
|
| if task[0] == 3: |
| |
| |
| |
| |
| f, g = func_and_grad(x) |
| elif task[0] == 1: |
| |
| n_iterations += 1 |
|
|
| intermediate_result = OptimizeResult(x=x, fun=f) |
| if _call_callback_maybe_halt(callback, intermediate_result): |
| task[0] = 5 |
| task[1] = 505 |
| if n_iterations >= maxiter: |
| task[0] = 5 |
| task[1] = 504 |
| elif sf.nfev > maxfun: |
| task[0] = 5 |
| task[1] = 502 |
| else: |
| break |
|
|
| if task[0] == 4: |
| warnflag = 0 |
| elif sf.nfev > maxfun or n_iterations >= maxiter: |
| warnflag = 1 |
| else: |
| warnflag = 2 |
|
|
| |
| |
| s = wa[0: m*n].reshape(m, n) |
| y = wa[m*n: 2*m*n].reshape(m, n) |
|
|
| |
| n_bfgs_updates = isave[30] |
|
|
| n_corrs = min(n_bfgs_updates, maxcor) |
| hess_inv = LbfgsInvHessProduct(s[:n_corrs], y[:n_corrs]) |
|
|
| msg = status_messages[task[0]] + ": " + task_messages[task[1]] |
|
|
| return OptimizeResult(fun=f, jac=g, nfev=sf.nfev, |
| njev=sf.ngev, |
| nit=n_iterations, status=warnflag, message=msg, |
| x=x, success=(warnflag == 0), hess_inv=hess_inv) |
|
|
|
|
| class LbfgsInvHessProduct(LinearOperator): |
| """Linear operator for the L-BFGS approximate inverse Hessian. |
| |
| This operator computes the product of a vector with the approximate inverse |
| of the Hessian of the objective function, using the L-BFGS limited |
| memory approximation to the inverse Hessian, accumulated during the |
| optimization. |
| |
| Objects of this class implement the ``scipy.sparse.linalg.LinearOperator`` |
| interface. |
| |
| Parameters |
| ---------- |
| sk : array_like, shape=(n_corr, n) |
| Array of `n_corr` most recent updates to the solution vector. |
| (See [1]). |
| yk : array_like, shape=(n_corr, n) |
| Array of `n_corr` most recent updates to the gradient. (See [1]). |
| |
| References |
| ---------- |
| .. [1] Nocedal, Jorge. "Updating quasi-Newton matrices with limited |
| storage." Mathematics of computation 35.151 (1980): 773-782. |
| |
| """ |
|
|
| def __init__(self, sk, yk): |
| """Construct the operator.""" |
| if sk.shape != yk.shape or sk.ndim != 2: |
| raise ValueError('sk and yk must have matching shape, (n_corrs, n)') |
| n_corrs, n = sk.shape |
|
|
| super().__init__(dtype=np.float64, shape=(n, n)) |
|
|
| self.sk = sk |
| self.yk = yk |
| self.n_corrs = n_corrs |
| self.rho = 1 / np.einsum('ij,ij->i', sk, yk) |
|
|
| def _matvec(self, x): |
| """Efficient matrix-vector multiply with the BFGS matrices. |
| |
| This calculation is described in Section (4) of [1]. |
| |
| Parameters |
| ---------- |
| x : ndarray |
| An array with shape (n,) or (n,1). |
| |
| Returns |
| ------- |
| y : ndarray |
| The matrix-vector product |
| |
| """ |
| s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho |
| q = np.array(x, dtype=self.dtype, copy=True) |
| if q.ndim == 2 and q.shape[1] == 1: |
| q = q.reshape(-1) |
|
|
| alpha = np.empty(n_corrs) |
|
|
| for i in range(n_corrs-1, -1, -1): |
| alpha[i] = rho[i] * np.dot(s[i], q) |
| q = q - alpha[i]*y[i] |
|
|
| r = q |
| for i in range(n_corrs): |
| beta = rho[i] * np.dot(y[i], r) |
| r = r + s[i] * (alpha[i] - beta) |
|
|
| return r |
|
|
| def todense(self): |
| """Return a dense array representation of this operator. |
| |
| Returns |
| ------- |
| arr : ndarray, shape=(n, n) |
| An array with the same shape and containing |
| the same data represented by this `LinearOperator`. |
| |
| """ |
| s, y, n_corrs, rho = self.sk, self.yk, self.n_corrs, self.rho |
| I_arr = np.eye(*self.shape, dtype=self.dtype) |
| Hk = I_arr |
|
|
| for i in range(n_corrs): |
| A1 = I_arr - s[i][:, np.newaxis] * y[i][np.newaxis, :] * rho[i] |
| A2 = I_arr - y[i][:, np.newaxis] * s[i][np.newaxis, :] * rho[i] |
|
|
| Hk = np.dot(A1, np.dot(Hk, A2)) + (rho[i] * s[i][:, np.newaxis] * |
| s[i][np.newaxis, :]) |
| return Hk |
|
|