|
|
""" |
|
|
This module include the following optimizer: |
|
|
1. differential_evolution: |
|
|
The differential evolution global optimization algorithm |
|
|
https://github.com/scipy/scipy/blob/70e61dee181de23fdd8d893eaa9491100e2218d7/scipy/optimize/_differentialevolution.py |
|
|
|
|
|
modified by: |
|
|
https://github.com/DebangLi/one-pixel-attack-pytorch/blob/master/differential_evolution.py |
|
|
|
|
|
2. Basic Adam Optimizer |
|
|
|
|
|
""" |
|
|
|
|
|
from __future__ import division, print_function, absolute_import |
|
|
import numpy as np |
|
|
from scipy.optimize import OptimizeResult, minimize |
|
|
from scipy.optimize.optimize import _status_message |
|
|
from scipy._lib._util import check_random_state |
|
|
import warnings |
|
|
|
|
|
|
|
|
__all__ = ['differential_evolution', 'AdamOptimizer'] |
|
|
|
|
|
_MACHEPS = np.finfo(np.float64).eps |
|
|
|
|
|
|
|
|
def differential_evolution(func, bounds, args=(), strategy='best1bin', |
|
|
maxiter=1000, popsize=15, tol=0.01, |
|
|
mutation=(0.5, 1), recombination=0.7, seed=None, |
|
|
callback=None, disp=False, polish=True, |
|
|
init='latinhypercube', atol=0): |
|
|
"""Finds the global minimum of a multivariate function. |
|
|
Differential Evolution is stochastic in nature (does not use gradient |
|
|
methods) to find the minimium, and can search large areas of candidate |
|
|
space, but often requires larger numbers of function evaluations than |
|
|
conventional gradient based techniques. |
|
|
The algorithm is due to Storn and Price [1]_. |
|
|
Parameters |
|
|
---------- |
|
|
func : callable |
|
|
The objective function to be minimized. Must be in the form |
|
|
``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array |
|
|
and ``args`` is a tuple of any additional fixed parameters needed to |
|
|
completely specify the function. |
|
|
bounds : sequence |
|
|
Bounds for variables. ``(min, max)`` pairs for each element in ``x``, |
|
|
defining the lower and upper bounds for the optimizing argument of |
|
|
`func`. It is required to have ``len(bounds) == len(x)``. |
|
|
``len(bounds)`` is used to determine the number of parameters in ``x``. |
|
|
args : tuple, optional |
|
|
Any additional fixed parameters needed to |
|
|
completely specify the objective function. |
|
|
strategy : str, optional |
|
|
The differential evolution strategy to use. Should be one of: |
|
|
- 'best1bin' |
|
|
- 'best1exp' |
|
|
- 'rand1exp' |
|
|
- 'randtobest1exp' |
|
|
- 'currenttobest1exp' |
|
|
- 'best2exp' |
|
|
- 'rand2exp' |
|
|
- 'randtobest1bin' |
|
|
- 'currenttobest1bin' |
|
|
- 'best2bin' |
|
|
- 'rand2bin' |
|
|
- 'rand1bin' |
|
|
The default is 'best1bin'. |
|
|
maxiter : int, optional |
|
|
The maximum number of generations over which the entire population is |
|
|
evolved. The maximum number of function evaluations (with no polishing) |
|
|
is: ``(maxiter + 1) * popsize * len(x)`` |
|
|
popsize : int, optional |
|
|
A multiplier for setting the total population size. The population has |
|
|
``popsize * len(x)`` individuals (unless the initial population is |
|
|
supplied via the `init` keyword). |
|
|
tol : float, optional |
|
|
Relative tolerance for convergence, the solving stops when |
|
|
``np.std(pop) <= atol + tol * np.abs(np.mean(population_energies))``, |
|
|
where and `atol` and `tol` are the absolute and relative tolerance |
|
|
respectively. |
|
|
mutation : float or tuple(float, float), optional |
|
|
The mutation constant. In the literature this is also known as |
|
|
differential weight, being denoted by F. |
|
|
If specified as a float it should be in the range [0, 2]. |
|
|
If specified as a tuple ``(min, max)`` dithering is employed. Dithering |
|
|
randomly changes the mutation constant on a generation by generation |
|
|
basis. The mutation constant for that generation is taken from |
|
|
``U[min, max)``. Dithering can help speed convergence significantly. |
|
|
Increasing the mutation constant increases the search radius, but will |
|
|
slow down convergence. |
|
|
recombination : float, optional |
|
|
The recombination constant, should be in the range [0, 1]. In the |
|
|
literature this is also known as the crossover probability, being |
|
|
denoted by CR. Increasing this value allows a larger number of mutants |
|
|
to progress into the next generation, but at the risk of population |
|
|
stability. |
|
|
seed : int or `np.random.RandomState`, optional |
|
|
If `seed` is not specified the `np.RandomState` singleton is used. |
|
|
If `seed` is an int, a new `np.random.RandomState` instance is used, |
|
|
seeded with seed. |
|
|
If `seed` is already a `np.random.RandomState instance`, then that |
|
|
`np.random.RandomState` instance is used. |
|
|
Specify `seed` for repeatable minimizations. |
|
|
disp : bool, optional |
|
|
Display status messages |
|
|
callback : callable, `callback(xk, convergence=val)`, optional |
|
|
A function to follow the progress of the minimization. ``xk`` is |
|
|
the current value of ``x0``. ``val`` represents the fractional |
|
|
value of the population convergence. When ``val`` is greater than one |
|
|
the function halts. If callback returns `True`, then the minimization |
|
|
is halted (any polishing is still carried out). |
|
|
polish : bool, optional |
|
|
If True (default), then `scipy.optimize.minimize` with the `L-BFGS-B` |
|
|
method is used to polish the best population member at the end, which |
|
|
can improve the minimization slightly. |
|
|
init : str or array-like, optional |
|
|
Specify which type of population initialization is performed. Should be |
|
|
one of: |
|
|
- 'latinhypercube' |
|
|
- 'random' |
|
|
- array specifying the initial population. The array should have |
|
|
shape ``(M, len(x))``, where len(x) is the number of parameters. |
|
|
`init` is clipped to `bounds` before use. |
|
|
The default is 'latinhypercube'. Latin Hypercube sampling tries to |
|
|
maximize coverage of the available parameter space. 'random' |
|
|
initializes the population randomly - this has the drawback that |
|
|
clustering can occur, preventing the whole of parameter space being |
|
|
covered. Use of an array to specify a population subset could be used, |
|
|
for example, to create a tight bunch of initial guesses in an location |
|
|
where the solution is known to exist, thereby reducing time for |
|
|
convergence. |
|
|
atol : float, optional |
|
|
Absolute tolerance for convergence, the solving stops when |
|
|
``np.std(pop) <= atol + tol * np.abs(np.mean(population_energies))``, |
|
|
where and `atol` and `tol` are the absolute and relative tolerance |
|
|
respectively. |
|
|
Returns |
|
|
------- |
|
|
res : OptimizeResult |
|
|
The optimization result represented as a `OptimizeResult` object. |
|
|
Important attributes are: ``x`` the solution array, ``success`` a |
|
|
Boolean flag indicating if the optimizer exited successfully and |
|
|
``message`` which describes the cause of the termination. See |
|
|
`OptimizeResult` for a description of other attributes. If `polish` |
|
|
was employed, and a lower minimum was obtained by the polishing, then |
|
|
OptimizeResult also contains the ``jac`` attribute. |
|
|
Notes |
|
|
----- |
|
|
Differential evolution is a stochastic population based method that is |
|
|
useful for global optimization problems. At each pass through the population |
|
|
the algorithm mutates each candidate solution by mixing with other candidate |
|
|
solutions to create a trial candidate. There are several strategies [2]_ for |
|
|
creating trial candidates, which suit some problems more than others. The |
|
|
'best1bin' strategy is a good starting point for many systems. In this |
|
|
strategy two members of the population are randomly chosen. Their difference |
|
|
is used to mutate the best member (the `best` in `best1bin`), :math:`b_0`, |
|
|
so far: |
|
|
.. math:: |
|
|
b' = b_0 + mutation * (population[rand0] - population[rand1]) |
|
|
A trial vector is then constructed. Starting with a randomly chosen 'i'th |
|
|
parameter the trial is sequentially filled (in modulo) with parameters from |
|
|
`b'` or the original candidate. The choice of whether to use `b'` or the |
|
|
original candidate is made with a binomial distribution (the 'bin' in |
|
|
'best1bin') - a random number in [0, 1) is generated. If this number is |
|
|
less than the `recombination` constant then the parameter is loaded from |
|
|
`b'`, otherwise it is loaded from the original candidate. The final |
|
|
parameter is always loaded from `b'`. Once the trial candidate is built |
|
|
its fitness is assessed. If the trial is better than the original candidate |
|
|
then it takes its place. If it is also better than the best overall |
|
|
candidate it also replaces that. |
|
|
To improve your chances of finding a global minimum use higher `popsize` |
|
|
values, with higher `mutation` and (dithering), but lower `recombination` |
|
|
values. This has the effect of widening the search radius, but slowing |
|
|
convergence. |
|
|
.. versionadded:: 0.15.0 |
|
|
|
|
|
References |
|
|
---------- |
|
|
.. [1] Storn, R and Price, K, Differential Evolution - a Simple and |
|
|
Efficient Heuristic for Global Optimization over Continuous Spaces, |
|
|
Journal of Global Optimization, 1997, 11, 341 - 359. |
|
|
.. [2] http://www1.icsi.berkeley.edu/~storn/code.html |
|
|
.. [3] http://en.wikipedia.org/wiki/Differential_evolution |
|
|
""" |
|
|
|
|
|
solver = DifferentialEvolutionSolver(func, bounds, args=args, |
|
|
strategy=strategy, maxiter=maxiter, |
|
|
popsize=popsize, tol=tol, |
|
|
mutation=mutation, |
|
|
recombination=recombination, |
|
|
seed=seed, polish=polish, |
|
|
callback=callback, |
|
|
disp=disp, init=init, atol=atol) |
|
|
return solver.solve() |
|
|
|
|
|
|
|
|
class DifferentialEvolutionSolver(object): |
|
|
|
|
|
"""This class implements the differential evolution solver |
|
|
Parameters |
|
|
---------- |
|
|
func : callable |
|
|
The objective function to be minimized. Must be in the form |
|
|
``f(x, *args)``, where ``x`` is the argument in the form of a 1-D array |
|
|
and ``args`` is a tuple of any additional fixed parameters needed to |
|
|
completely specify the function. |
|
|
bounds : sequence |
|
|
Bounds for variables. ``(min, max)`` pairs for each element in ``x``, |
|
|
defining the lower and upper bounds for the optimizing argument of |
|
|
`func`. It is required to have ``len(bounds) == len(x)``. |
|
|
``len(bounds)`` is used to determine the number of parameters in ``x``. |
|
|
args : tuple, optional |
|
|
Any additional fixed parameters needed to |
|
|
completely specify the objective function. |
|
|
strategy : str, optional |
|
|
The differential evolution strategy to use. Should be one of: |
|
|
- 'best1bin' |
|
|
- 'best1exp' |
|
|
- 'rand1exp' |
|
|
- 'randtobest1exp' |
|
|
- 'currenttobest1exp' |
|
|
- 'best2exp' |
|
|
- 'rand2exp' |
|
|
- 'randtobest1bin' |
|
|
- 'currenttobest1bin' |
|
|
- 'best2bin' |
|
|
- 'rand2bin' |
|
|
- 'rand1bin' |
|
|
The default is 'best1bin' |
|
|
maxiter : int, optional |
|
|
The maximum number of generations over which the entire population is |
|
|
evolved. The maximum number of function evaluations (with no polishing) |
|
|
is: ``(maxiter + 1) * popsize * len(x)`` |
|
|
popsize : int, optional |
|
|
A multiplier for setting the total population size. The population has |
|
|
``popsize * len(x)`` individuals (unless the initial population is |
|
|
supplied via the `init` keyword). |
|
|
tol : float, optional |
|
|
Relative tolerance for convergence, the solving stops when |
|
|
``np.std(pop) <= atol + tol * np.abs(np.mean(population_energies))``, |
|
|
where and `atol` and `tol` are the absolute and relative tolerance |
|
|
respectively. |
|
|
mutation : float or tuple(float, float), optional |
|
|
The mutation constant. In the literature this is also known as |
|
|
differential weight, being denoted by F. |
|
|
If specified as a float it should be in the range [0, 2]. |
|
|
If specified as a tuple ``(min, max)`` dithering is employed. Dithering |
|
|
randomly changes the mutation constant on a generation by generation |
|
|
basis. The mutation constant for that generation is taken from |
|
|
U[min, max). Dithering can help speed convergence significantly. |
|
|
Increasing the mutation constant increases the search radius, but will |
|
|
slow down convergence. |
|
|
recombination : float, optional |
|
|
The recombination constant, should be in the range [0, 1]. In the |
|
|
literature this is also known as the crossover probability, being |
|
|
denoted by CR. Increasing this value allows a larger number of mutants |
|
|
to progress into the next generation, but at the risk of population |
|
|
stability. |
|
|
seed : int or `np.random.RandomState`, optional |
|
|
If `seed` is not specified the `np.random.RandomState` singleton is |
|
|
used. |
|
|
If `seed` is an int, a new `np.random.RandomState` instance is used, |
|
|
seeded with `seed`. |
|
|
If `seed` is already a `np.random.RandomState` instance, then that |
|
|
`np.random.RandomState` instance is used. |
|
|
Specify `seed` for repeatable minimizations. |
|
|
disp : bool, optional |
|
|
Display status messages |
|
|
callback : callable, `callback(xk, convergence=val)`, optional |
|
|
A function to follow the progress of the minimization. ``xk`` is |
|
|
the current value of ``x0``. ``val`` represents the fractional |
|
|
value of the population convergence. When ``val`` is greater than one |
|
|
the function halts. If callback returns `True`, then the minimization |
|
|
is halted (any polishing is still carried out). |
|
|
polish : bool, optional |
|
|
If True, then `scipy.optimize.minimize` with the `L-BFGS-B` method |
|
|
is used to polish the best population member at the end. This requires |
|
|
a few more function evaluations. |
|
|
maxfun : int, optional |
|
|
Set the maximum number of function evaluations. However, it probably |
|
|
makes more sense to set `maxiter` instead. |
|
|
init : str or array-like, optional |
|
|
Specify which type of population initialization is performed. Should be |
|
|
one of: |
|
|
- 'latinhypercube' |
|
|
- 'random' |
|
|
- array specifying the initial population. The array should have |
|
|
shape ``(M, len(x))``, where len(x) is the number of parameters. |
|
|
`init` is clipped to `bounds` before use. |
|
|
The default is 'latinhypercube'. Latin Hypercube sampling tries to |
|
|
maximize coverage of the available parameter space. 'random' |
|
|
initializes the population randomly - this has the drawback that |
|
|
clustering can occur, preventing the whole of parameter space being |
|
|
covered. Use of an array to specify a population could be used, for |
|
|
example, to create a tight bunch of initial guesses in an location |
|
|
where the solution is known to exist, thereby reducing time for |
|
|
convergence. |
|
|
atol : float, optional |
|
|
Absolute tolerance for convergence, the solving stops when |
|
|
``np.std(pop) <= atol + tol * np.abs(np.mean(population_energies))``, |
|
|
where and `atol` and `tol` are the absolute and relative tolerance |
|
|
respectively. |
|
|
""" |
|
|
|
|
|
|
|
|
_binomial = {'best1bin': '_best1', |
|
|
'randtobest1bin': '_randtobest1', |
|
|
'currenttobest1bin': '_currenttobest1', |
|
|
'best2bin': '_best2', |
|
|
'rand2bin': '_rand2', |
|
|
'rand1bin': '_rand1'} |
|
|
_exponential = {'best1exp': '_best1', |
|
|
'rand1exp': '_rand1', |
|
|
'randtobest1exp': '_randtobest1', |
|
|
'currenttobest1exp': '_currenttobest1', |
|
|
'best2exp': '_best2', |
|
|
'rand2exp': '_rand2'} |
|
|
|
|
|
__init_error_msg = ("The population initialization method must be one of " |
|
|
"'latinhypercube' or 'random', or an array of shape " |
|
|
"(M, N) where N is the number of parameters and M>5") |
|
|
|
|
|
def __init__(self, func, bounds, args=(), |
|
|
strategy='best1bin', maxiter=1000, popsize=15, |
|
|
tol=0.01, mutation=(0.5, 1), recombination=0.7, seed=None, |
|
|
maxfun=np.inf, callback=None, disp=False, polish=True, |
|
|
init='latinhypercube', atol=0): |
|
|
|
|
|
if strategy in self._binomial: |
|
|
self.mutation_func = getattr(self, self._binomial[strategy]) |
|
|
elif strategy in self._exponential: |
|
|
self.mutation_func = getattr(self, self._exponential[strategy]) |
|
|
else: |
|
|
raise ValueError("Please select a valid mutation strategy") |
|
|
self.strategy = strategy |
|
|
|
|
|
self.callback = callback |
|
|
self.polish = polish |
|
|
|
|
|
|
|
|
self.tol, self.atol = tol, atol |
|
|
|
|
|
|
|
|
|
|
|
self.scale = mutation |
|
|
if (not np.all(np.isfinite(mutation)) or |
|
|
np.any(np.array(mutation) >= 2) or |
|
|
np.any(np.array(mutation) < 0)): |
|
|
raise ValueError('The mutation constant must be a float in ' |
|
|
'U[0, 2), or specified as a tuple(min, max)' |
|
|
' where min < max and min, max are in U[0, 2).') |
|
|
|
|
|
self.dither = None |
|
|
if hasattr(mutation, '__iter__') and len(mutation) > 1: |
|
|
self.dither = [mutation[0], mutation[1]] |
|
|
self.dither.sort() |
|
|
|
|
|
self.cross_over_probability = recombination |
|
|
|
|
|
self.func = func |
|
|
self.args = args |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.limits = np.array(bounds, dtype='float').T |
|
|
if (np.size(self.limits, 0) != 2 or not |
|
|
np.all(np.isfinite(self.limits))): |
|
|
raise ValueError('bounds should be a sequence containing ' |
|
|
'real valued (min, max) pairs for each value' |
|
|
' in x') |
|
|
|
|
|
if maxiter is None: |
|
|
maxiter = 1000 |
|
|
self.maxiter = maxiter |
|
|
if maxfun is None: |
|
|
maxfun = np.inf |
|
|
self.maxfun = maxfun |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.__scale_arg1 = 0.5 * (self.limits[0] + self.limits[1]) |
|
|
self.__scale_arg2 = np.fabs(self.limits[0] - self.limits[1]) |
|
|
|
|
|
self.parameter_count = np.size(self.limits, 1) |
|
|
|
|
|
self.random_number_generator = check_random_state(seed) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
self.num_population_members = max(5, popsize * self.parameter_count) |
|
|
|
|
|
self.population_shape = (self.num_population_members, |
|
|
self.parameter_count) |
|
|
|
|
|
self._nfev = 0 |
|
|
if isinstance(init, str): |
|
|
if init == 'latinhypercube': |
|
|
self.init_population_lhs() |
|
|
elif init == 'random': |
|
|
self.init_population_random() |
|
|
else: |
|
|
raise ValueError(self.__init_error_msg) |
|
|
else: |
|
|
self.init_population_array(init) |
|
|
|
|
|
self.disp = disp |
|
|
|
|
|
def init_population_lhs(self): |
|
|
""" |
|
|
Initializes the population with Latin Hypercube Sampling. |
|
|
Latin Hypercube Sampling ensures that each parameter is uniformly |
|
|
sampled over its range. |
|
|
""" |
|
|
rng = self.random_number_generator |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
segsize = 1.0 / self.num_population_members |
|
|
|
|
|
|
|
|
|
|
|
samples = (segsize * rng.random_sample(self.population_shape) |
|
|
|
|
|
|
|
|
+ np.linspace(0., 1., self.num_population_members, |
|
|
endpoint=False)[:, np.newaxis]) |
|
|
|
|
|
|
|
|
self.population = np.zeros_like(samples) |
|
|
|
|
|
|
|
|
|
|
|
for j in range(self.parameter_count): |
|
|
order = rng.permutation(range(self.num_population_members)) |
|
|
self.population[:, j] = samples[order, j] |
|
|
|
|
|
|
|
|
self.population_energies = (np.ones(self.num_population_members) * |
|
|
np.inf) |
|
|
|
|
|
|
|
|
self._nfev = 0 |
|
|
|
|
|
def init_population_random(self): |
|
|
""" |
|
|
Initialises the population at random. This type of initialization |
|
|
can possess clustering, Latin Hypercube sampling is generally better. |
|
|
""" |
|
|
rng = self.random_number_generator |
|
|
self.population = rng.random_sample(self.population_shape) |
|
|
|
|
|
|
|
|
self.population_energies = (np.ones(self.num_population_members) * |
|
|
np.inf) |
|
|
|
|
|
|
|
|
self._nfev = 0 |
|
|
|
|
|
def init_population_array(self, init): |
|
|
""" |
|
|
Initialises the population with a user specified population. |
|
|
Parameters |
|
|
---------- |
|
|
init : np.ndarray |
|
|
Array specifying subset of the initial population. The array should |
|
|
have shape (M, len(x)), where len(x) is the number of parameters. |
|
|
The population is clipped to the lower and upper `bounds`. |
|
|
""" |
|
|
|
|
|
popn = np.asfarray(init) |
|
|
|
|
|
if (np.size(popn, 0) < 5 or |
|
|
popn.shape[1] != self.parameter_count or |
|
|
len(popn.shape) != 2): |
|
|
raise ValueError("The population supplied needs to have shape" |
|
|
" (M, len(x)), where M > 4.") |
|
|
|
|
|
|
|
|
self.population = np.clip(self._unscale_parameters(popn), 0, 1) |
|
|
|
|
|
self.num_population_members = np.size(self.population, 0) |
|
|
|
|
|
self.population_shape = (self.num_population_members, |
|
|
self.parameter_count) |
|
|
|
|
|
|
|
|
self.population_energies = (np.ones(self.num_population_members) * |
|
|
np.inf) |
|
|
|
|
|
|
|
|
self._nfev = 0 |
|
|
|
|
|
@property |
|
|
def x(self): |
|
|
""" |
|
|
The best solution from the solver |
|
|
Returns |
|
|
------- |
|
|
x : ndarray |
|
|
The best solution from the solver. |
|
|
""" |
|
|
return self._scale_parameters(self.population[0]) |
|
|
|
|
|
@property |
|
|
def convergence(self): |
|
|
""" |
|
|
The standard deviation of the population energies divided by their |
|
|
mean. |
|
|
""" |
|
|
return (np.std(self.population_energies) / |
|
|
np.abs(np.mean(self.population_energies) + _MACHEPS)) |
|
|
|
|
|
def solve(self): |
|
|
""" |
|
|
Runs the DifferentialEvolutionSolver. |
|
|
Returns |
|
|
------- |
|
|
res : OptimizeResult |
|
|
The optimization result represented as a ``OptimizeResult`` object. |
|
|
Important attributes are: ``x`` the solution array, ``success`` a |
|
|
Boolean flag indicating if the optimizer exited successfully and |
|
|
``message`` which describes the cause of the termination. See |
|
|
`OptimizeResult` for a description of other attributes. If `polish` |
|
|
was employed, and a lower minimum was obtained by the polishing, |
|
|
then OptimizeResult also contains the ``jac`` attribute. |
|
|
""" |
|
|
nit, warning_flag = 0, False |
|
|
status_message = _status_message['success'] |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if np.all(np.isinf(self.population_energies)): |
|
|
self._calculate_population_energies() |
|
|
|
|
|
|
|
|
for nit in range(1, self.maxiter + 1): |
|
|
|
|
|
try: |
|
|
next(self) |
|
|
except StopIteration: |
|
|
warning_flag = True |
|
|
status_message = _status_message['maxfev'] |
|
|
break |
|
|
|
|
|
if self.disp: |
|
|
print("differential_evolution step %d: f(x)= %g" |
|
|
% (nit, |
|
|
self.population_energies[0])) |
|
|
|
|
|
|
|
|
convergence = self.convergence |
|
|
|
|
|
if (self.callback and |
|
|
self.callback(self._scale_parameters(self.population[0]), |
|
|
convergence=self.tol / convergence) is True): |
|
|
|
|
|
warning_flag = True |
|
|
status_message = ('callback function requested stop early ' |
|
|
'by returning True') |
|
|
break |
|
|
|
|
|
intol = (np.std(self.population_energies) <= |
|
|
self.atol + |
|
|
self.tol * np.abs(np.mean(self.population_energies))) |
|
|
if warning_flag or intol: |
|
|
break |
|
|
|
|
|
else: |
|
|
status_message = _status_message['maxiter'] |
|
|
warning_flag = True |
|
|
|
|
|
DE_result = OptimizeResult( |
|
|
x=self.x, |
|
|
fun=self.population_energies[0], |
|
|
nfev=self._nfev, |
|
|
nit=nit, |
|
|
message=status_message, |
|
|
success=(warning_flag is not True)) |
|
|
|
|
|
if self.polish: |
|
|
result = minimize(self.func, |
|
|
np.copy(DE_result.x), |
|
|
method='L-BFGS-B', |
|
|
bounds=self.limits.T, |
|
|
args=self.args) |
|
|
|
|
|
self._nfev += result.nfev |
|
|
DE_result.nfev = self._nfev |
|
|
|
|
|
if result.fun < DE_result.fun: |
|
|
DE_result.fun = result.fun |
|
|
DE_result.x = result.x |
|
|
DE_result.jac = result.jac |
|
|
|
|
|
self.population_energies[0] = result.fun |
|
|
self.population[0] = self._unscale_parameters(result.x) |
|
|
|
|
|
return DE_result |
|
|
|
|
|
def _calculate_population_energies(self): |
|
|
""" |
|
|
Calculate the energies of all the population members at the same time. |
|
|
Puts the best member in first place. Useful if the population has just |
|
|
been initialised. |
|
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
itersize = max(0, min(len(self.population), self.maxfun - self._nfev + 1)) |
|
|
candidates = self.population[:itersize] |
|
|
parameters = np.array([self._scale_parameters(c) for c in candidates]) |
|
|
energies = self.func(parameters, *self.args) |
|
|
self.population_energies = energies |
|
|
self._nfev += itersize |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
minval = np.argmin(self.population_energies) |
|
|
|
|
|
|
|
|
lowest_energy = self.population_energies[minval] |
|
|
self.population_energies[minval] = self.population_energies[0] |
|
|
self.population_energies[0] = lowest_energy |
|
|
|
|
|
self.population[[0, minval], :] = self.population[[minval, 0], :] |
|
|
|
|
|
def __iter__(self): |
|
|
return self |
|
|
|
|
|
def __next__(self): |
|
|
""" |
|
|
Evolve the population by a single generation |
|
|
Returns |
|
|
------- |
|
|
x : ndarray |
|
|
The best solution from the solver. |
|
|
fun : float |
|
|
Value of objective function obtained from the best solution. |
|
|
""" |
|
|
|
|
|
|
|
|
if np.all(np.isinf(self.population_energies)): |
|
|
self._calculate_population_energies() |
|
|
|
|
|
if self.dither is not None: |
|
|
self.scale = (self.random_number_generator.rand() |
|
|
* (self.dither[1] - self.dither[0]) + self.dither[0]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
itersize = max(0, min(self.num_population_members, self.maxfun - self._nfev + 1)) |
|
|
trials = np.array([self._mutate(c) for c in range(itersize)]) |
|
|
for trial in trials: self._ensure_constraint(trial) |
|
|
parameters = np.array([self._scale_parameters(trial) for trial in trials]) |
|
|
energies = self.func(parameters, *self.args) |
|
|
self._nfev += itersize |
|
|
|
|
|
for candidate,(energy,trial) in enumerate(zip(energies, trials)): |
|
|
|
|
|
|
|
|
if energy < self.population_energies[candidate]: |
|
|
self.population[candidate] = trial |
|
|
self.population_energies[candidate] = energy |
|
|
|
|
|
|
|
|
|
|
|
if energy < self.population_energies[0]: |
|
|
self.population_energies[0] = energy |
|
|
self.population[0] = trial |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return self.x, self.population_energies[0] |
|
|
|
|
|
def next(self): |
|
|
""" |
|
|
Evolve the population by a single generation |
|
|
Returns |
|
|
------- |
|
|
x : ndarray |
|
|
The best solution from the solver. |
|
|
fun : float |
|
|
Value of objective function obtained from the best solution. |
|
|
""" |
|
|
|
|
|
return self.__next__() |
|
|
|
|
|
def _scale_parameters(self, trial): |
|
|
""" |
|
|
scale from a number between 0 and 1 to parameters. |
|
|
""" |
|
|
return self.__scale_arg1 + (trial - 0.5) * self.__scale_arg2 |
|
|
|
|
|
def _unscale_parameters(self, parameters): |
|
|
""" |
|
|
scale from parameters to a number between 0 and 1. |
|
|
""" |
|
|
return (parameters - self.__scale_arg1) / self.__scale_arg2 + 0.5 |
|
|
|
|
|
def _ensure_constraint(self, trial): |
|
|
""" |
|
|
make sure the parameters lie between the limits |
|
|
""" |
|
|
for index in np.where((trial < 0) | (trial > 1))[0]: |
|
|
trial[index] = self.random_number_generator.rand() |
|
|
|
|
|
def _mutate(self, candidate): |
|
|
""" |
|
|
create a trial vector based on a mutation strategy |
|
|
""" |
|
|
trial = np.copy(self.population[candidate]) |
|
|
|
|
|
rng = self.random_number_generator |
|
|
|
|
|
fill_point = rng.randint(0, self.parameter_count) |
|
|
|
|
|
if self.strategy in ['currenttobest1exp', 'currenttobest1bin']: |
|
|
bprime = self.mutation_func(candidate, |
|
|
self._select_samples(candidate, 5)) |
|
|
else: |
|
|
bprime = self.mutation_func(self._select_samples(candidate, 5)) |
|
|
|
|
|
if self.strategy in self._binomial: |
|
|
crossovers = rng.rand(self.parameter_count) |
|
|
crossovers = crossovers < self.cross_over_probability |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
crossovers[fill_point] = True |
|
|
trial = np.where(crossovers, bprime, trial) |
|
|
return trial |
|
|
|
|
|
elif self.strategy in self._exponential: |
|
|
i = 0 |
|
|
while (i < self.parameter_count and |
|
|
rng.rand() < self.cross_over_probability): |
|
|
|
|
|
trial[fill_point] = bprime[fill_point] |
|
|
fill_point = (fill_point + 1) % self.parameter_count |
|
|
i += 1 |
|
|
|
|
|
return trial |
|
|
|
|
|
def _best1(self, samples): |
|
|
""" |
|
|
best1bin, best1exp |
|
|
""" |
|
|
r0, r1 = samples[:2] |
|
|
return (self.population[0] + self.scale * |
|
|
(self.population[r0] - self.population[r1])) |
|
|
|
|
|
def _rand1(self, samples): |
|
|
""" |
|
|
rand1bin, rand1exp |
|
|
""" |
|
|
r0, r1, r2 = samples[:3] |
|
|
return (self.population[r0] + self.scale * |
|
|
(self.population[r1] - self.population[r2])) |
|
|
|
|
|
def _randtobest1(self, samples): |
|
|
""" |
|
|
randtobest1bin, randtobest1exp |
|
|
""" |
|
|
r0, r1, r2 = samples[:3] |
|
|
bprime = np.copy(self.population[r0]) |
|
|
bprime += self.scale * (self.population[0] - bprime) |
|
|
bprime += self.scale * (self.population[r1] - |
|
|
self.population[r2]) |
|
|
return bprime |
|
|
|
|
|
def _currenttobest1(self, candidate, samples): |
|
|
""" |
|
|
currenttobest1bin, currenttobest1exp |
|
|
""" |
|
|
r0, r1 = samples[:2] |
|
|
bprime = (self.population[candidate] + self.scale * |
|
|
(self.population[0] - self.population[candidate] + |
|
|
self.population[r0] - self.population[r1])) |
|
|
return bprime |
|
|
|
|
|
def _best2(self, samples): |
|
|
""" |
|
|
best2bin, best2exp |
|
|
""" |
|
|
r0, r1, r2, r3 = samples[:4] |
|
|
bprime = (self.population[0] + self.scale * |
|
|
(self.population[r0] + self.population[r1] - |
|
|
self.population[r2] - self.population[r3])) |
|
|
|
|
|
return bprime |
|
|
|
|
|
def _rand2(self, samples): |
|
|
""" |
|
|
rand2bin, rand2exp |
|
|
""" |
|
|
r0, r1, r2, r3, r4 = samples |
|
|
bprime = (self.population[r0] + self.scale * |
|
|
(self.population[r1] + self.population[r2] - |
|
|
self.population[r3] - self.population[r4])) |
|
|
|
|
|
return bprime |
|
|
|
|
|
def _select_samples(self, candidate, number_samples): |
|
|
""" |
|
|
obtain random integers from range(self.num_population_members), |
|
|
without replacement. You can't have the original candidate either. |
|
|
""" |
|
|
idxs = list(range(self.num_population_members)) |
|
|
idxs.remove(candidate) |
|
|
self.random_number_generator.shuffle(idxs) |
|
|
idxs = idxs[:number_samples] |
|
|
return idxs |
|
|
|
|
|
class AdamOptimizer: |
|
|
"""Basic Adam optimizer implementation that can minimize w.r.t. |
|
|
a single variable. |
|
|
Parameters |
|
|
---------- |
|
|
shape : tuple |
|
|
shape of the variable w.r.t. which the loss should be minimized |
|
|
""" |
|
|
|
|
|
def __init__(self, shape): |
|
|
self.m = np.zeros(shape) |
|
|
self.v = np.zeros(shape) |
|
|
self.t = 0 |
|
|
|
|
|
def __call__(self, gradient, learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-8): |
|
|
"""Updates internal parameters of the optimizer and returns |
|
|
the change that should be applied to the variable. |
|
|
Parameters |
|
|
---------- |
|
|
gradient : `np.ndarray` |
|
|
the gradient of the loss w.r.t. to the variable |
|
|
learning_rate: float |
|
|
the learning rate in the current iteration |
|
|
beta1: float |
|
|
decay rate for calculating the exponentially |
|
|
decaying average of past gradients |
|
|
beta2: float |
|
|
decay rate for calculating the exponentially |
|
|
decaying average of past squared gradients |
|
|
epsilon: float |
|
|
small value to avoid division by zero |
|
|
""" |
|
|
|
|
|
self.t += 1 |
|
|
|
|
|
self.m = beta1 * self.m + (1 - beta1) * gradient |
|
|
self.v = beta2 * self.v + (1 - beta2) * gradient ** 2 |
|
|
|
|
|
bias_correction_1 = 1 - beta1 ** self.t |
|
|
bias_correction_2 = 1 - beta2 ** self.t |
|
|
|
|
|
m_hat = self.m / bias_correction_1 |
|
|
v_hat = self.v / bias_correction_2 |
|
|
|
|
|
return -learning_rate * m_hat / (np.sqrt(v_hat) + epsilon) |
|
|
|